st/nine: Begin programmable shader fog support
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
100
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
104
105 /*
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
112 * X2 = PS 1.4 (2x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
119 */
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
134
135 static const char *sm1_mod_str[] =
136 {
137 [NINED3DSPSM_NONE] = "",
138 [NINED3DSPSM_NEG] = "-",
139 [NINED3DSPSM_BIAS] = "bias",
140 [NINED3DSPSM_BIASNEG] = "biasneg",
141 [NINED3DSPSM_SIGN] = "sign",
142 [NINED3DSPSM_SIGNNEG] = "signneg",
143 [NINED3DSPSM_COMP] = "comp",
144 [NINED3DSPSM_X2] = "x2",
145 [NINED3DSPSM_X2NEG] = "x2neg",
146 [NINED3DSPSM_DZ] = "dz",
147 [NINED3DSPSM_DW] = "dw",
148 [NINED3DSPSM_ABS] = "abs",
149 [NINED3DSPSM_ABSNEG] = "-abs",
150 [NINED3DSPSM_NOT] = "not"
151 };
152
153 static void
154 sm1_dump_writemask(BYTE mask)
155 {
156 if (mask & 1) DUMP("x"); else DUMP("_");
157 if (mask & 2) DUMP("y"); else DUMP("_");
158 if (mask & 4) DUMP("z"); else DUMP("_");
159 if (mask & 8) DUMP("w"); else DUMP("_");
160 }
161
162 static void
163 sm1_dump_swizzle(BYTE s)
164 {
165 char c[4] = { 'x', 'y', 'z', 'w' };
166 DUMP("%c%c%c%c",
167 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
168 }
169
170 static const char sm1_file_char[] =
171 {
172 [D3DSPR_TEMP] = 'r',
173 [D3DSPR_INPUT] = 'v',
174 [D3DSPR_CONST] = 'c',
175 [D3DSPR_ADDR] = 'A',
176 [D3DSPR_RASTOUT] = 'R',
177 [D3DSPR_ATTROUT] = 'D',
178 [D3DSPR_OUTPUT] = 'o',
179 [D3DSPR_CONSTINT] = 'I',
180 [D3DSPR_COLOROUT] = 'C',
181 [D3DSPR_DEPTHOUT] = 'D',
182 [D3DSPR_SAMPLER] = 's',
183 [D3DSPR_CONST2] = 'c',
184 [D3DSPR_CONST3] = 'c',
185 [D3DSPR_CONST4] = 'c',
186 [D3DSPR_CONSTBOOL] = 'B',
187 [D3DSPR_LOOP] = 'L',
188 [D3DSPR_TEMPFLOAT16] = 'h',
189 [D3DSPR_MISCTYPE] = 'M',
190 [D3DSPR_LABEL] = 'X',
191 [D3DSPR_PREDICATE] = 'p'
192 };
193
194 static void
195 sm1_dump_reg(BYTE file, INT index)
196 {
197 switch (file) {
198 case D3DSPR_LOOP:
199 DUMP("aL");
200 break;
201 case D3DSPR_COLOROUT:
202 DUMP("oC%i", index);
203 break;
204 case D3DSPR_DEPTHOUT:
205 DUMP("oDepth");
206 break;
207 case D3DSPR_RASTOUT:
208 DUMP("oRast%i", index);
209 break;
210 case D3DSPR_CONSTINT:
211 DUMP("iconst[%i]", index);
212 break;
213 case D3DSPR_CONSTBOOL:
214 DUMP("bconst[%i]", index);
215 break;
216 default:
217 DUMP("%c%i", sm1_file_char[file], index);
218 break;
219 }
220 }
221
222 struct sm1_src_param
223 {
224 INT idx;
225 struct sm1_src_param *rel;
226 BYTE file;
227 BYTE swizzle;
228 BYTE mod;
229 BYTE type;
230 union {
231 DWORD d[4];
232 float f[4];
233 int i[4];
234 BOOL b;
235 } imm;
236 };
237 static void
238 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
240 struct sm1_dst_param
241 {
242 INT idx;
243 struct sm1_src_param *rel;
244 BYTE file;
245 BYTE mask;
246 BYTE mod;
247 int8_t shift; /* sint4 */
248 BYTE type;
249 };
250
251 static inline void
252 assert_replicate_swizzle(const struct ureg_src *reg)
253 {
254 assert(reg->SwizzleY == reg->SwizzleX &&
255 reg->SwizzleZ == reg->SwizzleX &&
256 reg->SwizzleW == reg->SwizzleX);
257 }
258
259 static void
260 sm1_dump_immediate(const struct sm1_src_param *param)
261 {
262 switch (param->type) {
263 case NINED3DSPTYPE_FLOAT4:
264 DUMP("{ %f %f %f %f }",
265 param->imm.f[0], param->imm.f[1],
266 param->imm.f[2], param->imm.f[3]);
267 break;
268 case NINED3DSPTYPE_INT4:
269 DUMP("{ %i %i %i %i }",
270 param->imm.i[0], param->imm.i[1],
271 param->imm.i[2], param->imm.i[3]);
272 break;
273 case NINED3DSPTYPE_BOOL:
274 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
275 break;
276 default:
277 assert(0);
278 break;
279 }
280 }
281
282 static void
283 sm1_dump_src_param(const struct sm1_src_param *param)
284 {
285 if (param->file == NINED3DSPR_IMMEDIATE) {
286 assert(!param->mod &&
287 !param->rel &&
288 param->swizzle == NINED3DSP_NOSWIZZLE);
289 sm1_dump_immediate(param);
290 return;
291 }
292
293 if (param->mod)
294 DUMP("%s(", sm1_mod_str[param->mod]);
295 if (param->rel) {
296 DUMP("%c[", sm1_file_char[param->file]);
297 sm1_dump_src_param(param->rel);
298 DUMP("+%i]", param->idx);
299 } else {
300 sm1_dump_reg(param->file, param->idx);
301 }
302 if (param->mod)
303 DUMP(")");
304 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
305 DUMP(".");
306 sm1_dump_swizzle(param->swizzle);
307 }
308 }
309
310 static void
311 sm1_dump_dst_param(const struct sm1_dst_param *param)
312 {
313 if (param->mod & NINED3DSPDM_SATURATE)
314 DUMP("sat ");
315 if (param->mod & NINED3DSPDM_PARTIALP)
316 DUMP("pp ");
317 if (param->mod & NINED3DSPDM_CENTROID)
318 DUMP("centroid ");
319 if (param->shift < 0)
320 DUMP("/%u ", 1 << -param->shift);
321 if (param->shift > 0)
322 DUMP("*%u ", 1 << param->shift);
323
324 if (param->rel) {
325 DUMP("%c[", sm1_file_char[param->file]);
326 sm1_dump_src_param(param->rel);
327 DUMP("+%i]", param->idx);
328 } else {
329 sm1_dump_reg(param->file, param->idx);
330 }
331 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
332 DUMP(".");
333 sm1_dump_writemask(param->mask);
334 }
335 }
336
337 struct sm1_semantic
338 {
339 struct sm1_dst_param reg;
340 BYTE sampler_type;
341 D3DDECLUSAGE usage;
342 BYTE usage_idx;
343 };
344
345 struct sm1_op_info
346 {
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
349 unsigned sio;
350 unsigned opcode; /* TGSI_OPCODE_x */
351
352 /* versions are still set even handler is set */
353 struct {
354 unsigned min;
355 unsigned max;
356 } vert_version, frag_version;
357
358 /* number of regs parsed outside of special handler */
359 unsigned ndst;
360 unsigned nsrc;
361
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler;
364 };
365
366 struct sm1_instruction
367 {
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
369 BYTE flags;
370 BOOL coissue;
371 BOOL predicated;
372 BYTE ndst;
373 BYTE nsrc;
374 struct sm1_src_param src[4];
375 struct sm1_src_param src_rel[4];
376 struct sm1_src_param pred;
377 struct sm1_src_param dst_rel[1];
378 struct sm1_dst_param dst[1];
379
380 struct sm1_op_info *info;
381 };
382
383 static void
384 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
385 {
386 unsigned i;
387
388 /* no info stored for these: */
389 if (insn->opcode == D3DSIO_DCL)
390 return;
391 for (i = 0; i < indent; ++i)
392 DUMP(" ");
393
394 if (insn->predicated) {
395 DUMP("@");
396 sm1_dump_src_param(&insn->pred);
397 DUMP(" ");
398 }
399 DUMP("%s", d3dsio_to_string(insn->opcode));
400 if (insn->flags) {
401 switch (insn->opcode) {
402 case D3DSIO_TEX:
403 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
404 break;
405 default:
406 DUMP("_%x", insn->flags);
407 break;
408 }
409 }
410 if (insn->coissue)
411 DUMP("_co");
412 DUMP(" ");
413
414 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
415 sm1_dump_dst_param(&insn->dst[i]);
416 DUMP(" ");
417 }
418
419 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
420 sm1_dump_src_param(&insn->src[i]);
421 DUMP(" ");
422 }
423 if (insn->opcode == D3DSIO_DEF ||
424 insn->opcode == D3DSIO_DEFI ||
425 insn->opcode == D3DSIO_DEFB)
426 sm1_dump_immediate(&insn->src[0]);
427
428 DUMP("\n");
429 }
430
431 struct sm1_local_const
432 {
433 INT idx;
434 struct ureg_src reg;
435 union {
436 boolean b;
437 float f[4];
438 int32_t i[4];
439 } imm;
440 };
441
442 struct shader_translator
443 {
444 const DWORD *byte_code;
445 const DWORD *parse;
446 const DWORD *parse_next;
447
448 struct ureg_program *ureg;
449
450 /* shader version */
451 struct {
452 BYTE major;
453 BYTE minor;
454 } version;
455 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
456
457 boolean native_integers;
458 boolean inline_subroutines;
459 boolean lower_preds;
460 boolean want_texcoord;
461 boolean shift_wpos;
462 unsigned texcoord_sn;
463
464 struct sm1_instruction insn; /* current instruction */
465
466 struct {
467 struct ureg_dst *r;
468 struct ureg_dst oPos;
469 struct ureg_dst oFog;
470 struct ureg_dst oPts;
471 struct ureg_dst oCol[4];
472 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
473 struct ureg_dst oDepth;
474 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
475 struct ureg_src vPos;
476 struct ureg_src vFace;
477 struct ureg_src s;
478 struct ureg_dst p;
479 struct ureg_dst address;
480 struct ureg_dst a0;
481 struct ureg_dst tS[8]; /* texture stage registers */
482 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
483 struct ureg_dst t[5]; /* scratch TEMPs */
484 struct ureg_src vC[2]; /* PS color in */
485 struct ureg_src vT[8]; /* PS texcoord in */
486 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
487 } regs;
488 unsigned num_temp; /* Elements(regs.r) */
489 unsigned num_scratch;
490 unsigned loop_depth;
491 unsigned loop_depth_max;
492 unsigned cond_depth;
493 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
494 unsigned cond_labels[NINE_MAX_COND_DEPTH];
495 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
496
497 unsigned *inst_labels; /* LABEL op */
498 unsigned num_inst_labels;
499
500 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
501
502 struct sm1_local_const *lconstf;
503 unsigned num_lconstf;
504 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
505 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
506
507 boolean indirect_const_access;
508 boolean failure;
509
510 struct nine_shader_info *info;
511
512 int16_t op_info_map[D3DSIO_BREAKP + 1];
513 };
514
515 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
516 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
517 #define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
518
519 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
520
521 static void
522 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
523
524 static void
525 sm1_instruction_check(const struct sm1_instruction *insn)
526 {
527 if (insn->opcode == D3DSIO_CRS)
528 {
529 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
530 {
531 DBG("CRS.mask.w\n");
532 }
533 }
534 }
535
536 static boolean
537 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
538 {
539 INT i;
540 if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
541 tx->failure = TRUE;
542 return FALSE;
543 }
544 for (i = 0; i < tx->num_lconstf; ++i) {
545 if (tx->lconstf[i].idx == index) {
546 *src = tx->lconstf[i].reg;
547 return TRUE;
548 }
549 }
550 return FALSE;
551 }
552 static boolean
553 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
554 {
555 if (index < 0 || index >= NINE_MAX_CONST_I) {
556 tx->failure = TRUE;
557 return FALSE;
558 }
559 if (tx->lconsti[index].idx == index)
560 *src = tx->lconsti[index].reg;
561 return tx->lconsti[index].idx == index;
562 }
563 static boolean
564 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
565 {
566 if (index < 0 || index >= NINE_MAX_CONST_B) {
567 tx->failure = TRUE;
568 return FALSE;
569 }
570 if (tx->lconstb[index].idx == index)
571 *src = tx->lconstb[index].reg;
572 return tx->lconstb[index].idx == index;
573 }
574
575 static void
576 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
577 {
578 unsigned n;
579
580 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
581 if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
582 WARN("lconstf index %i too high, indirect access won't work\n", index);
583
584 for (n = 0; n < tx->num_lconstf; ++n)
585 if (tx->lconstf[n].idx == index)
586 break;
587 if (n == tx->num_lconstf) {
588 if ((n % 8) == 0) {
589 tx->lconstf = REALLOC(tx->lconstf,
590 (n + 0) * sizeof(tx->lconstf[0]),
591 (n + 8) * sizeof(tx->lconstf[0]));
592 assert(tx->lconstf);
593 }
594 tx->num_lconstf++;
595 }
596 tx->lconstf[n].idx = index;
597 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
598
599 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
600 }
601 static void
602 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
603 {
604 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
605 tx->lconsti[index].idx = index;
606 tx->lconsti[index].reg = tx->native_integers ?
607 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
608 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
609 }
610 static void
611 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
612 {
613 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
614 tx->lconstb[index].idx = index;
615 tx->lconstb[index].reg = tx->native_integers ?
616 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
617 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
618 }
619
620 static inline struct ureg_dst
621 tx_scratch(struct shader_translator *tx)
622 {
623 if (tx->num_scratch >= Elements(tx->regs.t)) {
624 tx->failure = TRUE;
625 return tx->regs.t[0];
626 }
627 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
628 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
629 return tx->regs.t[tx->num_scratch++];
630 }
631
632 static inline struct ureg_dst
633 tx_scratch_scalar(struct shader_translator *tx)
634 {
635 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
636 }
637
638 static inline struct ureg_src
639 tx_src_scalar(struct ureg_dst dst)
640 {
641 struct ureg_src src = ureg_src(dst);
642 int c = ffs(dst.WriteMask) - 1;
643 if (dst.WriteMask == (1 << c))
644 src = ureg_scalar(src, c);
645 return src;
646 }
647
648 static inline void
649 tx_temp_alloc(struct shader_translator *tx, INT idx)
650 {
651 assert(idx >= 0);
652 if (idx >= tx->num_temp) {
653 unsigned k = tx->num_temp;
654 unsigned n = idx + 1;
655 tx->regs.r = REALLOC(tx->regs.r,
656 k * sizeof(tx->regs.r[0]),
657 n * sizeof(tx->regs.r[0]));
658 for (; k < n; ++k)
659 tx->regs.r[k] = ureg_dst_undef();
660 tx->num_temp = n;
661 }
662 if (ureg_dst_is_undef(tx->regs.r[idx]))
663 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
664 }
665
666 static inline void
667 tx_addr_alloc(struct shader_translator *tx, INT idx)
668 {
669 assert(idx == 0);
670 if (ureg_dst_is_undef(tx->regs.address))
671 tx->regs.address = ureg_DECL_address(tx->ureg);
672 if (ureg_dst_is_undef(tx->regs.a0))
673 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
674 }
675
676 static inline void
677 tx_pred_alloc(struct shader_translator *tx, INT idx)
678 {
679 assert(idx == 0);
680 if (ureg_dst_is_undef(tx->regs.p))
681 tx->regs.p = ureg_DECL_predicate(tx->ureg);
682 }
683
684 static inline void
685 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
686 {
687 assert(IS_PS);
688 assert(idx >= 0 && idx < Elements(tx->regs.vT));
689 if (ureg_src_is_undef(tx->regs.vT[idx]))
690 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
691 TGSI_INTERPOLATE_PERSPECTIVE);
692 }
693
694 static inline unsigned *
695 tx_bgnloop(struct shader_translator *tx)
696 {
697 tx->loop_depth++;
698 if (tx->loop_depth_max < tx->loop_depth)
699 tx->loop_depth_max = tx->loop_depth;
700 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
701 return &tx->loop_labels[tx->loop_depth - 1];
702 }
703
704 static inline unsigned *
705 tx_endloop(struct shader_translator *tx)
706 {
707 assert(tx->loop_depth);
708 tx->loop_depth--;
709 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
710 ureg_get_instruction_number(tx->ureg));
711 return &tx->loop_labels[tx->loop_depth];
712 }
713
714 static struct ureg_dst
715 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
716 {
717 const unsigned l = tx->loop_depth - 1;
718
719 if (!tx->loop_depth)
720 {
721 DBG("loop counter requested outside of loop\n");
722 return ureg_dst_undef();
723 }
724
725 if (ureg_dst_is_undef(tx->regs.rL[l])) {
726 /* loop or rep ctr creation */
727 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
728 tx->loop_or_rep[l] = loop_or_rep;
729 }
730 /* loop - rep - endloop - endrep not allowed */
731 assert(tx->loop_or_rep[l] == loop_or_rep);
732
733 return tx->regs.rL[l];
734 }
735
736 static struct ureg_src
737 tx_get_loopal(struct shader_translator *tx)
738 {
739 int loop_level = tx->loop_depth - 1;
740
741 while (loop_level >= 0) {
742 /* handle loop - rep - endrep - endloop case */
743 if (tx->loop_or_rep[loop_level])
744 /* the value is in the loop counter y component (nine implementation) */
745 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
746 loop_level--;
747 }
748
749 DBG("aL counter requested outside of loop\n");
750 return ureg_src_undef();
751 }
752
753 static inline unsigned *
754 tx_cond(struct shader_translator *tx)
755 {
756 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
757 tx->cond_depth++;
758 return &tx->cond_labels[tx->cond_depth - 1];
759 }
760
761 static inline unsigned *
762 tx_elsecond(struct shader_translator *tx)
763 {
764 assert(tx->cond_depth);
765 return &tx->cond_labels[tx->cond_depth - 1];
766 }
767
768 static inline void
769 tx_endcond(struct shader_translator *tx)
770 {
771 assert(tx->cond_depth);
772 tx->cond_depth--;
773 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
774 ureg_get_instruction_number(tx->ureg));
775 }
776
777 static inline struct ureg_dst
778 nine_ureg_dst_register(unsigned file, int index)
779 {
780 return ureg_dst(ureg_src_register(file, index));
781 }
782
783 static struct ureg_src
784 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
785 {
786 struct ureg_program *ureg = tx->ureg;
787 struct ureg_src src;
788 struct ureg_dst tmp;
789
790 switch (param->file)
791 {
792 case D3DSPR_TEMP:
793 assert(!param->rel);
794 tx_temp_alloc(tx, param->idx);
795 src = ureg_src(tx->regs.r[param->idx]);
796 break;
797 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
798 case D3DSPR_ADDR:
799 assert(!param->rel);
800 if (IS_VS) {
801 assert(param->idx == 0);
802 /* the address register (vs only) must be
803 * assigned before use */
804 assert(!ureg_dst_is_undef(tx->regs.a0));
805 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
806 src = ureg_src(tx->regs.address);
807 } else {
808 if (tx->version.major < 2 && tx->version.minor < 4) {
809 /* no subroutines, so should be defined */
810 src = ureg_src(tx->regs.tS[param->idx]);
811 } else {
812 tx_texcoord_alloc(tx, param->idx);
813 src = tx->regs.vT[param->idx];
814 }
815 }
816 break;
817 case D3DSPR_INPUT:
818 if (IS_VS) {
819 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
820 } else {
821 if (tx->version.major < 3) {
822 assert(!param->rel);
823 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
824 param->idx,
825 TGSI_INTERPOLATE_PERSPECTIVE);
826 } else {
827 assert(!param->rel); /* TODO */
828 assert(param->idx < Elements(tx->regs.v));
829 src = tx->regs.v[param->idx];
830 }
831 }
832 break;
833 case D3DSPR_PREDICATE:
834 assert(!param->rel);
835 tx_pred_alloc(tx, param->idx);
836 src = ureg_src(tx->regs.p);
837 break;
838 case D3DSPR_SAMPLER:
839 assert(param->mod == NINED3DSPSM_NONE);
840 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
841 assert(!param->rel);
842 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
843 break;
844 case D3DSPR_CONST:
845 assert(!param->rel || IS_VS);
846 if (param->rel)
847 tx->indirect_const_access = TRUE;
848 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
849 if (!param->rel)
850 nine_info_mark_const_f_used(tx->info, param->idx);
851 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
852 }
853 if (!IS_VS && tx->version.major < 2) {
854 /* ps 1.X clamps constants */
855 tmp = tx_scratch(tx);
856 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
857 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
858 src = ureg_src(tmp);
859 }
860 break;
861 case D3DSPR_CONST2:
862 case D3DSPR_CONST3:
863 case D3DSPR_CONST4:
864 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
865 assert(!"CONST2/3/4");
866 src = ureg_imm1f(ureg, 0.0f);
867 break;
868 case D3DSPR_CONSTINT:
869 /* relative adressing only possible for float constants in vs */
870 assert(!param->rel);
871 if (!tx_lconsti(tx, &src, param->idx)) {
872 nine_info_mark_const_i_used(tx->info, param->idx);
873 src = ureg_src_register(TGSI_FILE_CONSTANT,
874 tx->info->const_i_base + param->idx);
875 }
876 break;
877 case D3DSPR_CONSTBOOL:
878 assert(!param->rel);
879 if (!tx_lconstb(tx, &src, param->idx)) {
880 char r = param->idx / 4;
881 char s = param->idx & 3;
882 nine_info_mark_const_b_used(tx->info, param->idx);
883 src = ureg_src_register(TGSI_FILE_CONSTANT,
884 tx->info->const_b_base + r);
885 src = ureg_swizzle(src, s, s, s, s);
886 }
887 break;
888 case D3DSPR_LOOP:
889 if (ureg_dst_is_undef(tx->regs.address))
890 tx->regs.address = ureg_DECL_address(ureg);
891 if (!tx->native_integers)
892 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
893 else
894 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
895 src = ureg_src(tx->regs.address);
896 break;
897 case D3DSPR_MISCTYPE:
898 switch (param->idx) {
899 case D3DSMO_POSITION:
900 if (ureg_src_is_undef(tx->regs.vPos))
901 tx->regs.vPos = ureg_DECL_fs_input(ureg,
902 TGSI_SEMANTIC_POSITION, 0,
903 TGSI_INTERPOLATE_LINEAR);
904 if (tx->shift_wpos) {
905 /* TODO: do this only once */
906 struct ureg_dst wpos = tx_scratch(tx);
907 ureg_SUB(ureg, wpos, tx->regs.vPos,
908 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
909 src = ureg_src(wpos);
910 } else {
911 src = tx->regs.vPos;
912 }
913 break;
914 case D3DSMO_FACE:
915 if (ureg_src_is_undef(tx->regs.vFace)) {
916 tx->regs.vFace = ureg_DECL_fs_input(ureg,
917 TGSI_SEMANTIC_FACE, 0,
918 TGSI_INTERPOLATE_CONSTANT);
919 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
920 }
921 src = tx->regs.vFace;
922 break;
923 default:
924 assert(!"invalid src D3DSMO");
925 break;
926 }
927 assert(!param->rel);
928 break;
929 case D3DSPR_TEMPFLOAT16:
930 break;
931 default:
932 assert(!"invalid src D3DSPR");
933 }
934 if (param->rel)
935 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
936
937 switch (param->mod) {
938 case NINED3DSPSM_DW:
939 tmp = tx_scratch(tx);
940 /* NOTE: app is not allowed to read w with this modifier */
941 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
942 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
943 src = ureg_src(tmp);
944 break;
945 case NINED3DSPSM_DZ:
946 tmp = tx_scratch(tx);
947 /* NOTE: app is not allowed to read z with this modifier */
948 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
949 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
950 src = ureg_src(tmp);
951 break;
952 default:
953 break;
954 }
955
956 if (param->swizzle != NINED3DSP_NOSWIZZLE)
957 src = ureg_swizzle(src,
958 (param->swizzle >> 0) & 0x3,
959 (param->swizzle >> 2) & 0x3,
960 (param->swizzle >> 4) & 0x3,
961 (param->swizzle >> 6) & 0x3);
962
963 switch (param->mod) {
964 case NINED3DSPSM_ABS:
965 src = ureg_abs(src);
966 break;
967 case NINED3DSPSM_ABSNEG:
968 src = ureg_negate(ureg_abs(src));
969 break;
970 case NINED3DSPSM_NEG:
971 src = ureg_negate(src);
972 break;
973 case NINED3DSPSM_BIAS:
974 tmp = tx_scratch(tx);
975 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
976 src = ureg_src(tmp);
977 break;
978 case NINED3DSPSM_BIASNEG:
979 tmp = tx_scratch(tx);
980 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
981 src = ureg_src(tmp);
982 break;
983 case NINED3DSPSM_NOT:
984 if (tx->native_integers) {
985 tmp = tx_scratch(tx);
986 ureg_NOT(ureg, tmp, src);
987 src = ureg_src(tmp);
988 break;
989 }
990 /* fall through */
991 case NINED3DSPSM_COMP:
992 tmp = tx_scratch(tx);
993 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
994 src = ureg_src(tmp);
995 break;
996 case NINED3DSPSM_DZ:
997 case NINED3DSPSM_DW:
998 /* Already handled*/
999 break;
1000 case NINED3DSPSM_SIGN:
1001 tmp = tx_scratch(tx);
1002 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1003 src = ureg_src(tmp);
1004 break;
1005 case NINED3DSPSM_SIGNNEG:
1006 tmp = tx_scratch(tx);
1007 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1008 src = ureg_src(tmp);
1009 break;
1010 case NINED3DSPSM_X2:
1011 tmp = tx_scratch(tx);
1012 ureg_ADD(ureg, tmp, src, src);
1013 src = ureg_src(tmp);
1014 break;
1015 case NINED3DSPSM_X2NEG:
1016 tmp = tx_scratch(tx);
1017 ureg_ADD(ureg, tmp, src, src);
1018 src = ureg_negate(ureg_src(tmp));
1019 break;
1020 default:
1021 assert(param->mod == NINED3DSPSM_NONE);
1022 break;
1023 }
1024
1025 return src;
1026 }
1027
1028 static struct ureg_dst
1029 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1030 {
1031 struct ureg_dst dst;
1032
1033 switch (param->file)
1034 {
1035 case D3DSPR_TEMP:
1036 assert(!param->rel);
1037 tx_temp_alloc(tx, param->idx);
1038 dst = tx->regs.r[param->idx];
1039 break;
1040 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1041 case D3DSPR_ADDR:
1042 assert(!param->rel);
1043 if (tx->version.major < 2 && !IS_VS) {
1044 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1045 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1046 dst = tx->regs.tS[param->idx];
1047 } else
1048 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1049 tx_texcoord_alloc(tx, param->idx);
1050 dst = ureg_dst(tx->regs.vT[param->idx]);
1051 } else {
1052 tx_addr_alloc(tx, param->idx);
1053 dst = tx->regs.a0;
1054 }
1055 break;
1056 case D3DSPR_RASTOUT:
1057 assert(!param->rel);
1058 switch (param->idx) {
1059 case 0:
1060 if (ureg_dst_is_undef(tx->regs.oPos))
1061 tx->regs.oPos =
1062 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1063 dst = tx->regs.oPos;
1064 break;
1065 case 1:
1066 if (ureg_dst_is_undef(tx->regs.oFog))
1067 tx->regs.oFog =
1068 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1069 dst = tx->regs.oFog;
1070 break;
1071 case 2:
1072 if (ureg_dst_is_undef(tx->regs.oPts))
1073 tx->regs.oPts =
1074 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1075 dst = tx->regs.oPts;
1076 break;
1077 default:
1078 assert(0);
1079 break;
1080 }
1081 break;
1082 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1083 case D3DSPR_OUTPUT:
1084 if (tx->version.major < 3) {
1085 assert(!param->rel);
1086 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1087 } else {
1088 assert(!param->rel); /* TODO */
1089 assert(param->idx < Elements(tx->regs.o));
1090 dst = tx->regs.o[param->idx];
1091 }
1092 break;
1093 case D3DSPR_ATTROUT: /* VS */
1094 case D3DSPR_COLOROUT: /* PS */
1095 assert(param->idx >= 0 && param->idx < 4);
1096 assert(!param->rel);
1097 tx->info->rt_mask |= 1 << param->idx;
1098 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1099 /* ps < 3: oCol[0] will have fog blending afterward
1100 * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
1101 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1102 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1103 } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
1104 tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
1105 } else {
1106 tx->regs.oCol[param->idx] =
1107 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1108 }
1109 }
1110 dst = tx->regs.oCol[param->idx];
1111 if (IS_VS && tx->version.major < 3)
1112 dst = ureg_saturate(dst);
1113 break;
1114 case D3DSPR_DEPTHOUT:
1115 assert(!param->rel);
1116 if (ureg_dst_is_undef(tx->regs.oDepth))
1117 tx->regs.oDepth =
1118 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1119 TGSI_WRITEMASK_Z, 0, 1);
1120 dst = tx->regs.oDepth; /* XXX: must write .z component */
1121 break;
1122 case D3DSPR_PREDICATE:
1123 assert(!param->rel);
1124 tx_pred_alloc(tx, param->idx);
1125 dst = tx->regs.p;
1126 break;
1127 case D3DSPR_TEMPFLOAT16:
1128 DBG("unhandled D3DSPR: %u\n", param->file);
1129 break;
1130 default:
1131 assert(!"invalid dst D3DSPR");
1132 break;
1133 }
1134 if (param->rel)
1135 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1136
1137 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1138 dst = ureg_writemask(dst, param->mask);
1139 if (param->mod & NINED3DSPDM_SATURATE)
1140 dst = ureg_saturate(dst);
1141
1142 return dst;
1143 }
1144
1145 static struct ureg_dst
1146 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1147 {
1148 if (param->shift) {
1149 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1150 return tx->regs.tdst;
1151 }
1152 return _tx_dst_param(tx, param);
1153 }
1154
1155 static void
1156 tx_apply_dst0_modifiers(struct shader_translator *tx)
1157 {
1158 struct ureg_dst rdst;
1159 float f;
1160
1161 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1162 return;
1163 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1164
1165 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1166
1167 if (tx->insn.dst[0].shift < 0)
1168 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1169 else
1170 f = 1 << tx->insn.dst[0].shift;
1171
1172 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1173 }
1174
1175 static struct ureg_src
1176 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1177 {
1178 struct ureg_src src;
1179
1180 assert(!param->shift);
1181 assert(!(param->mod & NINED3DSPDM_SATURATE));
1182
1183 switch (param->file) {
1184 case D3DSPR_INPUT:
1185 if (IS_VS) {
1186 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1187 } else {
1188 assert(!param->rel);
1189 assert(param->idx < Elements(tx->regs.v));
1190 src = tx->regs.v[param->idx];
1191 }
1192 break;
1193 default:
1194 src = ureg_src(tx_dst_param(tx, param));
1195 break;
1196 }
1197 if (param->rel)
1198 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1199
1200 if (!param->mask)
1201 WARN("mask is 0, using identity swizzle\n");
1202
1203 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1204 char s[4];
1205 int n;
1206 int c;
1207 for (n = 0, c = 0; c < 4; ++c)
1208 if (param->mask & (1 << c))
1209 s[n++] = c;
1210 assert(n);
1211 for (c = n; c < 4; ++c)
1212 s[c] = s[n - 1];
1213 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1214 }
1215 return src;
1216 }
1217
1218 static HRESULT
1219 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1220 {
1221 struct ureg_program *ureg = tx->ureg;
1222 struct ureg_dst dst;
1223 struct ureg_src src[2];
1224 struct sm1_src_param *src_mat = &tx->insn.src[1];
1225 unsigned i;
1226
1227 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1228 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1229
1230 for (i = 0; i < n; i++)
1231 {
1232 const unsigned m = (1 << i);
1233
1234 src[1] = tx_src_param(tx, src_mat);
1235 src_mat->idx++;
1236
1237 if (!(dst.WriteMask & m))
1238 continue;
1239
1240 /* XXX: src == dst case ? */
1241
1242 switch (k) {
1243 case 3:
1244 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1245 break;
1246 case 4:
1247 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1248 break;
1249 default:
1250 DBG("invalid operation: M%ux%u\n", m, n);
1251 break;
1252 }
1253 }
1254
1255 return D3D_OK;
1256 }
1257
1258 #define VNOTSUPPORTED 0, 0
1259 #define V(maj, min) (((maj) << 8) | (min))
1260
1261 static inline const char *
1262 d3dsio_to_string( unsigned opcode )
1263 {
1264 static const char *names[] = {
1265 "NOP",
1266 "MOV",
1267 "ADD",
1268 "SUB",
1269 "MAD",
1270 "MUL",
1271 "RCP",
1272 "RSQ",
1273 "DP3",
1274 "DP4",
1275 "MIN",
1276 "MAX",
1277 "SLT",
1278 "SGE",
1279 "EXP",
1280 "LOG",
1281 "LIT",
1282 "DST",
1283 "LRP",
1284 "FRC",
1285 "M4x4",
1286 "M4x3",
1287 "M3x4",
1288 "M3x3",
1289 "M3x2",
1290 "CALL",
1291 "CALLNZ",
1292 "LOOP",
1293 "RET",
1294 "ENDLOOP",
1295 "LABEL",
1296 "DCL",
1297 "POW",
1298 "CRS",
1299 "SGN",
1300 "ABS",
1301 "NRM",
1302 "SINCOS",
1303 "REP",
1304 "ENDREP",
1305 "IF",
1306 "IFC",
1307 "ELSE",
1308 "ENDIF",
1309 "BREAK",
1310 "BREAKC",
1311 "MOVA",
1312 "DEFB",
1313 "DEFI",
1314 NULL,
1315 NULL,
1316 NULL,
1317 NULL,
1318 NULL,
1319 NULL,
1320 NULL,
1321 NULL,
1322 NULL,
1323 NULL,
1324 NULL,
1325 NULL,
1326 NULL,
1327 NULL,
1328 NULL,
1329 "TEXCOORD",
1330 "TEXKILL",
1331 "TEX",
1332 "TEXBEM",
1333 "TEXBEML",
1334 "TEXREG2AR",
1335 "TEXREG2GB",
1336 "TEXM3x2PAD",
1337 "TEXM3x2TEX",
1338 "TEXM3x3PAD",
1339 "TEXM3x3TEX",
1340 NULL,
1341 "TEXM3x3SPEC",
1342 "TEXM3x3VSPEC",
1343 "EXPP",
1344 "LOGP",
1345 "CND",
1346 "DEF",
1347 "TEXREG2RGB",
1348 "TEXDP3TEX",
1349 "TEXM3x2DEPTH",
1350 "TEXDP3",
1351 "TEXM3x3",
1352 "TEXDEPTH",
1353 "CMP",
1354 "BEM",
1355 "DP2ADD",
1356 "DSX",
1357 "DSY",
1358 "TEXLDD",
1359 "SETP",
1360 "TEXLDL",
1361 "BREAKP"
1362 };
1363
1364 if (opcode < Elements(names)) return names[opcode];
1365
1366 switch (opcode) {
1367 case D3DSIO_PHASE: return "PHASE";
1368 case D3DSIO_COMMENT: return "COMMENT";
1369 case D3DSIO_END: return "END";
1370 default:
1371 return NULL;
1372 }
1373 }
1374
1375 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1376 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1377 (inst).vert_version.max | \
1378 (inst).frag_version.min | \
1379 (inst).frag_version.max)
1380
1381 #define SPECIAL(name) \
1382 NineTranslateInstruction_##name
1383
1384 #define DECL_SPECIAL(name) \
1385 static HRESULT \
1386 NineTranslateInstruction_##name( struct shader_translator *tx )
1387
1388 static HRESULT
1389 NineTranslateInstruction_Generic(struct shader_translator *);
1390
1391 DECL_SPECIAL(M4x4)
1392 {
1393 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1394 }
1395
1396 DECL_SPECIAL(M4x3)
1397 {
1398 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1399 }
1400
1401 DECL_SPECIAL(M3x4)
1402 {
1403 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1404 }
1405
1406 DECL_SPECIAL(M3x3)
1407 {
1408 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1409 }
1410
1411 DECL_SPECIAL(M3x2)
1412 {
1413 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1414 }
1415
1416 DECL_SPECIAL(CMP)
1417 {
1418 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1419 tx_src_param(tx, &tx->insn.src[0]),
1420 tx_src_param(tx, &tx->insn.src[2]),
1421 tx_src_param(tx, &tx->insn.src[1]));
1422 return D3D_OK;
1423 }
1424
1425 DECL_SPECIAL(CND)
1426 {
1427 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1428 struct ureg_dst cgt;
1429 struct ureg_src cnd;
1430
1431 /* the coissue flag was a tip for compilers to advise to
1432 * execute two operations at the same time, in cases
1433 * the two executions had same dst with different channels.
1434 * It has no effect on current hw. However it seems CND
1435 * is affected. The handling of this very specific case
1436 * handled below mimick wine behaviour */
1437 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1438 ureg_MOV(tx->ureg,
1439 dst, tx_src_param(tx, &tx->insn.src[1]));
1440 return D3D_OK;
1441 }
1442
1443 cnd = tx_src_param(tx, &tx->insn.src[0]);
1444 cgt = tx_scratch(tx);
1445
1446 if (tx->version.major == 1 && tx->version.minor < 4)
1447 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1448
1449 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1450
1451 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1452 tx_src_param(tx, &tx->insn.src[1]),
1453 tx_src_param(tx, &tx->insn.src[2]));
1454 return D3D_OK;
1455 }
1456
1457 DECL_SPECIAL(CALL)
1458 {
1459 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1460 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1461 return D3D_OK;
1462 }
1463
1464 DECL_SPECIAL(CALLNZ)
1465 {
1466 struct ureg_program *ureg = tx->ureg;
1467 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1468
1469 if (!tx->native_integers)
1470 ureg_IF(ureg, src, tx_cond(tx));
1471 else
1472 ureg_UIF(ureg, src, tx_cond(tx));
1473 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1474 tx_endcond(tx);
1475 ureg_ENDIF(ureg);
1476 return D3D_OK;
1477 }
1478
1479 DECL_SPECIAL(MOV_vs1x)
1480 {
1481 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1482 /* Implementation note: We don't write directly
1483 * to the addr register, but to an intermediate
1484 * float register.
1485 * Contrary to the doc, when writing to ADDR here,
1486 * the rounding is not to nearest, but to lowest
1487 * (wine test).
1488 * Since we use ARR next, substract 0.5. */
1489 ureg_SUB(tx->ureg,
1490 tx_dst_param(tx, &tx->insn.dst[0]),
1491 tx_src_param(tx, &tx->insn.src[0]),
1492 ureg_imm1f(tx->ureg, 0.5f));
1493 return D3D_OK;
1494 }
1495 return NineTranslateInstruction_Generic(tx);
1496 }
1497
1498 DECL_SPECIAL(LOOP)
1499 {
1500 struct ureg_program *ureg = tx->ureg;
1501 unsigned *label;
1502 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1503 struct ureg_dst ctr;
1504 struct ureg_dst tmp;
1505 struct ureg_src ctrx;
1506
1507 label = tx_bgnloop(tx);
1508 ctr = tx_get_loopctr(tx, TRUE);
1509 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1510
1511 /* src: num_iterations - start_value of al - step for al - 0 */
1512 ureg_MOV(ureg, ctr, src);
1513 ureg_BGNLOOP(tx->ureg, label);
1514 tmp = tx_scratch_scalar(tx);
1515 /* Initially ctr.x contains the number of iterations.
1516 * ctr.y will contain the updated value of al.
1517 * We decrease ctr.x at the end of every iteration,
1518 * and stop when it reaches 0. */
1519
1520 if (!tx->native_integers) {
1521 /* case src and ctr contain floats */
1522 /* to avoid precision issue, we stop when ctr <= 0.5 */
1523 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1524 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1525 } else {
1526 /* case src and ctr contain integers */
1527 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1528 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1529 }
1530 ureg_BRK(ureg);
1531 tx_endcond(tx);
1532 ureg_ENDIF(ureg);
1533 return D3D_OK;
1534 }
1535
1536 DECL_SPECIAL(RET)
1537 {
1538 ureg_RET(tx->ureg);
1539 return D3D_OK;
1540 }
1541
1542 DECL_SPECIAL(ENDLOOP)
1543 {
1544 struct ureg_program *ureg = tx->ureg;
1545 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1546 struct ureg_dst dst_ctrx, dst_al;
1547 struct ureg_src src_ctr, al_counter;
1548
1549 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1550 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1551 src_ctr = ureg_src(ctr);
1552 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1553
1554 /* ctr.x -= 1
1555 * ctr.y (aL) += step */
1556 if (!tx->native_integers) {
1557 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1558 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1559 } else {
1560 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1561 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1562 }
1563 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1564 return D3D_OK;
1565 }
1566
1567 DECL_SPECIAL(LABEL)
1568 {
1569 unsigned k = tx->num_inst_labels;
1570 unsigned n = tx->insn.src[0].idx;
1571 assert(n < 2048);
1572 if (n >= k)
1573 tx->inst_labels = REALLOC(tx->inst_labels,
1574 k * sizeof(tx->inst_labels[0]),
1575 n * sizeof(tx->inst_labels[0]));
1576
1577 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1578 return D3D_OK;
1579 }
1580
1581 DECL_SPECIAL(SINCOS)
1582 {
1583 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1584 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1585
1586 assert(!(dst.WriteMask & 0xc));
1587
1588 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1589 ureg_SCS(tx->ureg, dst, src);
1590 return D3D_OK;
1591 }
1592
1593 DECL_SPECIAL(SGN)
1594 {
1595 ureg_SSG(tx->ureg,
1596 tx_dst_param(tx, &tx->insn.dst[0]),
1597 tx_src_param(tx, &tx->insn.src[0]));
1598 return D3D_OK;
1599 }
1600
1601 DECL_SPECIAL(REP)
1602 {
1603 struct ureg_program *ureg = tx->ureg;
1604 unsigned *label;
1605 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1606 struct ureg_dst ctr;
1607 struct ureg_dst tmp;
1608 struct ureg_src ctrx;
1609
1610 label = tx_bgnloop(tx);
1611 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1612 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1613
1614 /* NOTE: rep must be constant, so we don't have to save the count */
1615 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1616
1617 /* rep: num_iterations - 0 - 0 - 0 */
1618 ureg_MOV(ureg, ctr, rep);
1619 ureg_BGNLOOP(ureg, label);
1620 tmp = tx_scratch_scalar(tx);
1621 /* Initially ctr.x contains the number of iterations.
1622 * We decrease ctr.x at the end of every iteration,
1623 * and stop when it reaches 0. */
1624
1625 if (!tx->native_integers) {
1626 /* case src and ctr contain floats */
1627 /* to avoid precision issue, we stop when ctr <= 0.5 */
1628 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1629 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1630 } else {
1631 /* case src and ctr contain integers */
1632 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1633 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1634 }
1635 ureg_BRK(ureg);
1636 tx_endcond(tx);
1637 ureg_ENDIF(ureg);
1638
1639 return D3D_OK;
1640 }
1641
1642 DECL_SPECIAL(ENDREP)
1643 {
1644 struct ureg_program *ureg = tx->ureg;
1645 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1646 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1647 struct ureg_src src_ctr = ureg_src(ctr);
1648
1649 /* ctr.x -= 1 */
1650 if (!tx->native_integers)
1651 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1652 else
1653 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1654
1655 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1656 return D3D_OK;
1657 }
1658
1659 DECL_SPECIAL(ENDIF)
1660 {
1661 tx_endcond(tx);
1662 ureg_ENDIF(tx->ureg);
1663 return D3D_OK;
1664 }
1665
1666 DECL_SPECIAL(IF)
1667 {
1668 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1669
1670 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1671 ureg_UIF(tx->ureg, src, tx_cond(tx));
1672 else
1673 ureg_IF(tx->ureg, src, tx_cond(tx));
1674
1675 return D3D_OK;
1676 }
1677
1678 static inline unsigned
1679 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1680 {
1681 switch (flags) {
1682 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1683 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1684 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1685 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1686 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1687 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1688 default:
1689 assert(!"invalid comparison flags");
1690 return TGSI_OPCODE_SGT;
1691 }
1692 }
1693
1694 DECL_SPECIAL(IFC)
1695 {
1696 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1697 struct ureg_src src[2];
1698 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1699 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1700 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1701 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1702 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1703 return D3D_OK;
1704 }
1705
1706 DECL_SPECIAL(ELSE)
1707 {
1708 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1709 return D3D_OK;
1710 }
1711
1712 DECL_SPECIAL(BREAKC)
1713 {
1714 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1715 struct ureg_src src[2];
1716 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1717 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1718 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1719 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1720 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1721 ureg_BRK(tx->ureg);
1722 tx_endcond(tx);
1723 ureg_ENDIF(tx->ureg);
1724 return D3D_OK;
1725 }
1726
1727 static const char *sm1_declusage_names[] =
1728 {
1729 [D3DDECLUSAGE_POSITION] = "POSITION",
1730 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1731 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1732 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1733 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1734 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1735 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1736 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1737 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1738 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1739 [D3DDECLUSAGE_COLOR] = "COLOR",
1740 [D3DDECLUSAGE_FOG] = "FOG",
1741 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1742 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1743 };
1744
1745 static inline unsigned
1746 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1747 {
1748 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1749 }
1750
1751 static void
1752 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1753 boolean tc,
1754 struct sm1_semantic *dcl)
1755 {
1756 BYTE index = dcl->usage_idx;
1757
1758 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1759 * we match to a TGSI_SEMANTIC_GENERIC with index.
1760 *
1761 * The index can be anything UINT16 and usage_idx is BYTE,
1762 * so we can fit everything. It doesn't matter if indices
1763 * are close together or low.
1764 *
1765 *
1766 * POSITION >= 1: 10 * index + 6
1767 * COLOR >= 2: 10 * (index-1) + 7
1768 * TEXCOORD[0..15]: index
1769 * BLENDWEIGHT: 10 * index + 18
1770 * BLENDINDICES: 10 * index + 19
1771 * NORMAL: 10 * index + 20
1772 * TANGENT: 10 * index + 21
1773 * BINORMAL: 10 * index + 22
1774 * TESSFACTOR: 10 * index + 23
1775 */
1776
1777 switch (dcl->usage) {
1778 case D3DDECLUSAGE_POSITION:
1779 case D3DDECLUSAGE_POSITIONT:
1780 case D3DDECLUSAGE_DEPTH:
1781 if (index == 0) {
1782 sem->Name = TGSI_SEMANTIC_POSITION;
1783 sem->Index = 0;
1784 } else {
1785 sem->Name = TGSI_SEMANTIC_GENERIC;
1786 sem->Index = 10 * index + 6;
1787 }
1788 break;
1789 case D3DDECLUSAGE_COLOR:
1790 if (index < 2) {
1791 sem->Name = TGSI_SEMANTIC_COLOR;
1792 sem->Index = index;
1793 } else {
1794 sem->Name = TGSI_SEMANTIC_GENERIC;
1795 sem->Index = 10 * (index-1) + 7;
1796 }
1797 break;
1798 case D3DDECLUSAGE_FOG:
1799 assert(index == 0);
1800 sem->Name = TGSI_SEMANTIC_FOG;
1801 sem->Index = 0;
1802 break;
1803 case D3DDECLUSAGE_PSIZE:
1804 assert(index == 0);
1805 sem->Name = TGSI_SEMANTIC_PSIZE;
1806 sem->Index = 0;
1807 break;
1808 case D3DDECLUSAGE_TEXCOORD:
1809 assert(index < 16);
1810 if (index < 8 && tc)
1811 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1812 else
1813 sem->Name = TGSI_SEMANTIC_GENERIC;
1814 sem->Index = index;
1815 break;
1816 case D3DDECLUSAGE_BLENDWEIGHT:
1817 sem->Name = TGSI_SEMANTIC_GENERIC;
1818 sem->Index = 10 * index + 18;
1819 break;
1820 case D3DDECLUSAGE_BLENDINDICES:
1821 sem->Name = TGSI_SEMANTIC_GENERIC;
1822 sem->Index = 10 * index + 19;
1823 break;
1824 case D3DDECLUSAGE_NORMAL:
1825 sem->Name = TGSI_SEMANTIC_GENERIC;
1826 sem->Index = 10 * index + 20;
1827 break;
1828 case D3DDECLUSAGE_TANGENT:
1829 sem->Name = TGSI_SEMANTIC_GENERIC;
1830 sem->Index = 10 * index + 21;
1831 break;
1832 case D3DDECLUSAGE_BINORMAL:
1833 sem->Name = TGSI_SEMANTIC_GENERIC;
1834 sem->Index = 10 * index + 22;
1835 break;
1836 case D3DDECLUSAGE_TESSFACTOR:
1837 sem->Name = TGSI_SEMANTIC_GENERIC;
1838 sem->Index = 10 * index + 23;
1839 break;
1840 case D3DDECLUSAGE_SAMPLE:
1841 sem->Name = TGSI_SEMANTIC_COUNT;
1842 sem->Index = 0;
1843 break;
1844 default:
1845 assert(!"Invalid DECLUSAGE.");
1846 break;
1847 }
1848 }
1849
1850 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1851 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1852 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1853 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1854 static inline unsigned
1855 d3dstt_to_tgsi_tex(BYTE sampler_type)
1856 {
1857 switch (sampler_type) {
1858 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1859 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1860 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1861 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1862 default:
1863 assert(0);
1864 return TGSI_TEXTURE_UNKNOWN;
1865 }
1866 }
1867 static inline unsigned
1868 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1869 {
1870 switch (sampler_type) {
1871 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1872 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1873 case NINED3DSTT_VOLUME:
1874 case NINED3DSTT_CUBE:
1875 default:
1876 assert(0);
1877 return TGSI_TEXTURE_UNKNOWN;
1878 }
1879 }
1880 static inline unsigned
1881 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1882 {
1883 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1884 case 1: return TGSI_TEXTURE_1D;
1885 case 0: return TGSI_TEXTURE_2D;
1886 case 3: return TGSI_TEXTURE_3D;
1887 default:
1888 return TGSI_TEXTURE_CUBE;
1889 }
1890 }
1891
1892 static const char *
1893 sm1_sampler_type_name(BYTE sampler_type)
1894 {
1895 switch (sampler_type) {
1896 case NINED3DSTT_1D: return "1D";
1897 case NINED3DSTT_2D: return "2D";
1898 case NINED3DSTT_VOLUME: return "VOLUME";
1899 case NINED3DSTT_CUBE: return "CUBE";
1900 default:
1901 return "(D3DSTT_?)";
1902 }
1903 }
1904
1905 static inline unsigned
1906 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1907 {
1908 switch (sem->Name) {
1909 case TGSI_SEMANTIC_POSITION:
1910 case TGSI_SEMANTIC_NORMAL:
1911 return TGSI_INTERPOLATE_LINEAR;
1912 case TGSI_SEMANTIC_BCOLOR:
1913 case TGSI_SEMANTIC_COLOR:
1914 case TGSI_SEMANTIC_FOG:
1915 case TGSI_SEMANTIC_GENERIC:
1916 case TGSI_SEMANTIC_TEXCOORD:
1917 case TGSI_SEMANTIC_CLIPDIST:
1918 case TGSI_SEMANTIC_CLIPVERTEX:
1919 return TGSI_INTERPOLATE_PERSPECTIVE;
1920 case TGSI_SEMANTIC_EDGEFLAG:
1921 case TGSI_SEMANTIC_FACE:
1922 case TGSI_SEMANTIC_INSTANCEID:
1923 case TGSI_SEMANTIC_PCOORD:
1924 case TGSI_SEMANTIC_PRIMID:
1925 case TGSI_SEMANTIC_PSIZE:
1926 case TGSI_SEMANTIC_VERTEXID:
1927 return TGSI_INTERPOLATE_CONSTANT;
1928 default:
1929 assert(0);
1930 return TGSI_INTERPOLATE_CONSTANT;
1931 }
1932 }
1933
1934 DECL_SPECIAL(DCL)
1935 {
1936 struct ureg_program *ureg = tx->ureg;
1937 boolean is_input;
1938 boolean is_sampler;
1939 struct tgsi_declaration_semantic tgsi;
1940 struct sm1_semantic sem;
1941 sm1_read_semantic(tx, &sem);
1942
1943 is_input = sem.reg.file == D3DSPR_INPUT;
1944 is_sampler =
1945 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1946
1947 DUMP("DCL ");
1948 sm1_dump_dst_param(&sem.reg);
1949 if (is_sampler)
1950 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1951 else
1952 if (tx->version.major >= 3)
1953 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1954 else
1955 if (sem.usage | sem.usage_idx)
1956 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1957 else
1958 DUMP("\n");
1959
1960 if (is_sampler) {
1961 const unsigned m = 1 << sem.reg.idx;
1962 ureg_DECL_sampler(ureg, sem.reg.idx);
1963 tx->info->sampler_mask |= m;
1964 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1965 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1966 d3dstt_to_tgsi_tex(sem.sampler_type);
1967 return D3D_OK;
1968 }
1969
1970 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1971 if (IS_VS) {
1972 if (is_input) {
1973 /* linkage outside of shader with vertex declaration */
1974 ureg_DECL_vs_input(ureg, sem.reg.idx);
1975 assert(sem.reg.idx < Elements(tx->info->input_map));
1976 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1977 tx->info->num_inputs = sem.reg.idx + 1;
1978 /* NOTE: preserving order in case of indirect access */
1979 } else
1980 if (tx->version.major >= 3) {
1981 /* SM2 output semantic determined by file */
1982 assert(sem.reg.mask != 0);
1983 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1984 tx->info->position_t = TRUE;
1985 assert(sem.reg.idx < Elements(tx->regs.o));
1986 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1987 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
1988
1989 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1990 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1991 }
1992 } else {
1993 if (is_input && tx->version.major >= 3) {
1994 /* SM3 only, SM2 input semantic determined by file */
1995 assert(sem.reg.idx < Elements(tx->regs.v));
1996 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1997 ureg, tgsi.Name, tgsi.Index,
1998 nine_tgsi_to_interp_mode(&tgsi),
1999 0, /* cylwrap */
2000 sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
2001 } else
2002 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2003 /* FragColor or FragDepth */
2004 assert(sem.reg.mask != 0);
2005 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2006 0, 1);
2007 }
2008 }
2009 return D3D_OK;
2010 }
2011
2012 DECL_SPECIAL(DEF)
2013 {
2014 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2015 return D3D_OK;
2016 }
2017
2018 DECL_SPECIAL(DEFB)
2019 {
2020 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2021 return D3D_OK;
2022 }
2023
2024 DECL_SPECIAL(DEFI)
2025 {
2026 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2027 return D3D_OK;
2028 }
2029
2030 DECL_SPECIAL(POW)
2031 {
2032 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2033 struct ureg_src src[2] = {
2034 tx_src_param(tx, &tx->insn.src[0]),
2035 tx_src_param(tx, &tx->insn.src[1])
2036 };
2037 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2038 return D3D_OK;
2039 }
2040
2041 DECL_SPECIAL(RSQ)
2042 {
2043 struct ureg_program *ureg = tx->ureg;
2044 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2045 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2046 struct ureg_dst tmp = tx_scratch(tx);
2047 ureg_RSQ(ureg, tmp, ureg_abs(src));
2048 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2049 return D3D_OK;
2050 }
2051
2052 DECL_SPECIAL(LOG)
2053 {
2054 struct ureg_program *ureg = tx->ureg;
2055 struct ureg_dst tmp = tx_scratch_scalar(tx);
2056 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2057 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2058 ureg_LG2(ureg, tmp, ureg_abs(src));
2059 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2060 return D3D_OK;
2061 }
2062
2063 DECL_SPECIAL(LIT)
2064 {
2065 struct ureg_program *ureg = tx->ureg;
2066 struct ureg_dst tmp = tx_scratch(tx);
2067 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2068 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2069 ureg_LIT(ureg, tmp, src);
2070 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2071 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2072 * it 0^0 if src.w=0, which value is driver dependent. */
2073 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2074 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2075 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2076 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2077 return D3D_OK;
2078 }
2079
2080 DECL_SPECIAL(NRM)
2081 {
2082 struct ureg_program *ureg = tx->ureg;
2083 struct ureg_dst tmp = tx_scratch_scalar(tx);
2084 struct ureg_src nrm = tx_src_scalar(tmp);
2085 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2086 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2087 ureg_DP3(ureg, tmp, src, src);
2088 ureg_RSQ(ureg, tmp, nrm);
2089 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2090 ureg_MUL(ureg, dst, src, nrm);
2091 return D3D_OK;
2092 }
2093
2094 DECL_SPECIAL(DP2ADD)
2095 {
2096 struct ureg_dst tmp = tx_scratch_scalar(tx);
2097 struct ureg_src dp2 = tx_src_scalar(tmp);
2098 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2099 struct ureg_src src[3];
2100 int i;
2101 for (i = 0; i < 3; ++i)
2102 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2103 assert_replicate_swizzle(&src[2]);
2104
2105 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2106 ureg_ADD(tx->ureg, dst, src[2], dp2);
2107
2108 return D3D_OK;
2109 }
2110
2111 DECL_SPECIAL(TEXCOORD)
2112 {
2113 struct ureg_program *ureg = tx->ureg;
2114 const unsigned s = tx->insn.dst[0].idx;
2115 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2116
2117 tx_texcoord_alloc(tx, s);
2118 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2119 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2120
2121 return D3D_OK;
2122 }
2123
2124 DECL_SPECIAL(TEXCOORD_ps14)
2125 {
2126 struct ureg_program *ureg = tx->ureg;
2127 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2128 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2129
2130 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2131
2132 ureg_MOV(ureg, dst, src);
2133
2134 return D3D_OK;
2135 }
2136
2137 DECL_SPECIAL(TEXKILL)
2138 {
2139 struct ureg_src reg;
2140
2141 if (tx->version.major > 1 || tx->version.minor > 3) {
2142 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2143 } else {
2144 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2145 reg = tx->regs.vT[tx->insn.dst[0].idx];
2146 }
2147 if (tx->version.major < 2)
2148 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2149 ureg_KILL_IF(tx->ureg, reg);
2150
2151 return D3D_OK;
2152 }
2153
2154 DECL_SPECIAL(TEXBEM)
2155 {
2156 struct ureg_program *ureg = tx->ureg;
2157 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2158 struct ureg_dst tmp, tmp2;
2159 struct ureg_src sample, m00, m01, m10, m11;
2160 struct ureg_src bumpenvlscale, bumpenvloffset;
2161 const int m = tx->insn.dst[0].idx;
2162 const int n = tx->insn.src[0].idx;
2163
2164 assert(tx->version.major == 1);
2165
2166 sample = ureg_DECL_sampler(ureg, m);
2167 tx->info->sampler_mask |= 1 << m;
2168
2169 tx_texcoord_alloc(tx, m);
2170
2171 tmp = tx_scratch(tx);
2172 tmp2 = tx_scratch(tx);
2173 /*
2174 * Bump-env-matrix:
2175 * 00 is X
2176 * 01 is Y
2177 * 10 is Z
2178 * 11 is W
2179 */
2180 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2181 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2182 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2183 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2184 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2185
2186 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2187 if (m % 2 == 0) {
2188 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2189 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2190 } else {
2191 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2192 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2193 }
2194
2195 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2196 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2197 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]);
2198 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2199 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2200 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2201 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2202
2203 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2204 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2205 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]);
2206 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2207 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2208 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2209 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2210
2211 /* Now the texture coordinates are in tmp.xy */
2212
2213 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2214 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2215 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2216 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2217 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2218 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2219 bumpenvlscale, bumpenvloffset);
2220 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2221 }
2222
2223 tx->info->bumpenvmat_needed = 1;
2224
2225 return D3D_OK;
2226 }
2227
2228 DECL_SPECIAL(TEXREG2AR)
2229 {
2230 struct ureg_program *ureg = tx->ureg;
2231 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2232 struct ureg_src sample;
2233 const int m = tx->insn.dst[0].idx;
2234 const int n = tx->insn.src[0].idx;
2235 assert(m >= 0 && m > n);
2236
2237 sample = ureg_DECL_sampler(ureg, m);
2238 tx->info->sampler_mask |= 1 << m;
2239 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2240
2241 return D3D_OK;
2242 }
2243
2244 DECL_SPECIAL(TEXREG2GB)
2245 {
2246 struct ureg_program *ureg = tx->ureg;
2247 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2248 struct ureg_src sample;
2249 const int m = tx->insn.dst[0].idx;
2250 const int n = tx->insn.src[0].idx;
2251 assert(m >= 0 && m > n);
2252
2253 sample = ureg_DECL_sampler(ureg, m);
2254 tx->info->sampler_mask |= 1 << m;
2255 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2256
2257 return D3D_OK;
2258 }
2259
2260 DECL_SPECIAL(TEXM3x2PAD)
2261 {
2262 return D3D_OK; /* this is just padding */
2263 }
2264
2265 DECL_SPECIAL(TEXM3x2TEX)
2266 {
2267 struct ureg_program *ureg = tx->ureg;
2268 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2269 struct ureg_src sample;
2270 const int m = tx->insn.dst[0].idx - 1;
2271 const int n = tx->insn.src[0].idx;
2272 assert(m >= 0 && m > n);
2273
2274 tx_texcoord_alloc(tx, m);
2275 tx_texcoord_alloc(tx, m+1);
2276
2277 /* performs the matrix multiplication */
2278 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2279 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2280
2281 sample = ureg_DECL_sampler(ureg, m + 1);
2282 tx->info->sampler_mask |= 1 << (m + 1);
2283 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2284
2285 return D3D_OK;
2286 }
2287
2288 DECL_SPECIAL(TEXM3x3PAD)
2289 {
2290 return D3D_OK; /* this is just padding */
2291 }
2292
2293 DECL_SPECIAL(TEXM3x3SPEC)
2294 {
2295 struct ureg_program *ureg = tx->ureg;
2296 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2297 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2298 struct ureg_src sample;
2299 struct ureg_dst tmp;
2300 const int m = tx->insn.dst[0].idx - 2;
2301 const int n = tx->insn.src[0].idx;
2302 assert(m >= 0 && m > n);
2303
2304 tx_texcoord_alloc(tx, m);
2305 tx_texcoord_alloc(tx, m+1);
2306 tx_texcoord_alloc(tx, m+2);
2307
2308 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2309 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2310 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2311
2312 sample = ureg_DECL_sampler(ureg, m + 2);
2313 tx->info->sampler_mask |= 1 << (m + 2);
2314 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2315
2316 /* At this step, dst = N = (u', w', z').
2317 * We want dst to be the texture sampled at (u'', w'', z''), with
2318 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2319 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2320 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2321 /* at this step tmp.x = 1/N.N */
2322 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2323 /* at this step tmp.y = N.E */
2324 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2325 /* at this step tmp.x = N.E/N.N */
2326 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2327 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2328 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2329 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2330 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2331
2332 return D3D_OK;
2333 }
2334
2335 DECL_SPECIAL(TEXREG2RGB)
2336 {
2337 struct ureg_program *ureg = tx->ureg;
2338 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2339 struct ureg_src sample;
2340 const int m = tx->insn.dst[0].idx;
2341 const int n = tx->insn.src[0].idx;
2342 assert(m >= 0 && m > n);
2343
2344 sample = ureg_DECL_sampler(ureg, m);
2345 tx->info->sampler_mask |= 1 << m;
2346 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2347
2348 return D3D_OK;
2349 }
2350
2351 DECL_SPECIAL(TEXDP3TEX)
2352 {
2353 struct ureg_program *ureg = tx->ureg;
2354 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2355 struct ureg_dst tmp;
2356 struct ureg_src sample;
2357 const int m = tx->insn.dst[0].idx;
2358 const int n = tx->insn.src[0].idx;
2359 assert(m >= 0 && m > n);
2360
2361 tx_texcoord_alloc(tx, m);
2362
2363 tmp = tx_scratch(tx);
2364 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2365 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2366
2367 sample = ureg_DECL_sampler(ureg, m);
2368 tx->info->sampler_mask |= 1 << m;
2369 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2370
2371 return D3D_OK;
2372 }
2373
2374 DECL_SPECIAL(TEXM3x2DEPTH)
2375 {
2376 struct ureg_program *ureg = tx->ureg;
2377 struct ureg_dst tmp;
2378 const int m = tx->insn.dst[0].idx - 1;
2379 const int n = tx->insn.src[0].idx;
2380 assert(m >= 0 && m > n);
2381
2382 tx_texcoord_alloc(tx, m);
2383 tx_texcoord_alloc(tx, m+1);
2384
2385 tmp = tx_scratch(tx);
2386
2387 /* performs the matrix multiplication */
2388 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2389 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2390
2391 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2392 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2393 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2394 /* res = 'w' == 0 ? 1.0 : z/w */
2395 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2396 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2397 /* replace the depth for depth testing with the result */
2398 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2399 TGSI_WRITEMASK_Z, 0, 1);
2400 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2401 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2402 return D3D_OK;
2403 }
2404
2405 DECL_SPECIAL(TEXDP3)
2406 {
2407 struct ureg_program *ureg = tx->ureg;
2408 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2409 const int m = tx->insn.dst[0].idx;
2410 const int n = tx->insn.src[0].idx;
2411 assert(m >= 0 && m > n);
2412
2413 tx_texcoord_alloc(tx, m);
2414
2415 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2416
2417 return D3D_OK;
2418 }
2419
2420 DECL_SPECIAL(TEXM3x3)
2421 {
2422 struct ureg_program *ureg = tx->ureg;
2423 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2424 struct ureg_src sample;
2425 struct ureg_dst E, tmp;
2426 const int m = tx->insn.dst[0].idx - 2;
2427 const int n = tx->insn.src[0].idx;
2428 assert(m >= 0 && m > n);
2429
2430 tx_texcoord_alloc(tx, m);
2431 tx_texcoord_alloc(tx, m+1);
2432 tx_texcoord_alloc(tx, m+2);
2433
2434 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2435 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2436 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2437
2438 switch (tx->insn.opcode) {
2439 case D3DSIO_TEXM3x3:
2440 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2441 break;
2442 case D3DSIO_TEXM3x3TEX:
2443 sample = ureg_DECL_sampler(ureg, m + 2);
2444 tx->info->sampler_mask |= 1 << (m + 2);
2445 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2446 break;
2447 case D3DSIO_TEXM3x3VSPEC:
2448 sample = ureg_DECL_sampler(ureg, m + 2);
2449 tx->info->sampler_mask |= 1 << (m + 2);
2450 E = tx_scratch(tx);
2451 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2452 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2453 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2454 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2455 /* At this step, dst = N = (u', w', z').
2456 * We want dst to be the texture sampled at (u'', w'', z''), with
2457 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2458 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2459 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2460 /* at this step tmp.x = 1/N.N */
2461 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2462 /* at this step tmp.y = N.E */
2463 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2464 /* at this step tmp.x = N.E/N.N */
2465 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2466 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2467 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2468 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2469 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2470 break;
2471 default:
2472 return D3DERR_INVALIDCALL;
2473 }
2474 return D3D_OK;
2475 }
2476
2477 DECL_SPECIAL(TEXDEPTH)
2478 {
2479 struct ureg_program *ureg = tx->ureg;
2480 struct ureg_dst r5;
2481 struct ureg_src r5r, r5g;
2482
2483 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2484
2485 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2486 * r5 won't be used afterward, thus we can use r5.ba */
2487 r5 = tx->regs.r[5];
2488 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2489 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2490
2491 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2492 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2493 /* r5.r = r/g */
2494 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2495 r5r, ureg_imm1f(ureg, 1.0f));
2496 /* replace the depth for depth testing with the result */
2497 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2498 TGSI_WRITEMASK_Z, 0, 1);
2499 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2500
2501 return D3D_OK;
2502 }
2503
2504 DECL_SPECIAL(BEM)
2505 {
2506 struct ureg_program *ureg = tx->ureg;
2507 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2508 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2509 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2510 struct ureg_src m00, m01, m10, m11;
2511 const int m = tx->insn.dst[0].idx;
2512 struct ureg_dst tmp;
2513 /*
2514 * Bump-env-matrix:
2515 * 00 is X
2516 * 01 is Y
2517 * 10 is Z
2518 * 11 is W
2519 */
2520 nine_info_mark_const_f_used(tx->info, 8 + m);
2521 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2522 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2523 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2524 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2525 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2526 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2527 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2528 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2529 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2530 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2531
2532 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2533 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2534 NINE_APPLY_SWIZZLE(src1, X), src0);
2535 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2536 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2537 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2538 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2539
2540 tx->info->bumpenvmat_needed = 1;
2541
2542 return D3D_OK;
2543 }
2544
2545 DECL_SPECIAL(TEXLD)
2546 {
2547 struct ureg_program *ureg = tx->ureg;
2548 unsigned target;
2549 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2550 struct ureg_src src[2] = {
2551 tx_src_param(tx, &tx->insn.src[0]),
2552 tx_src_param(tx, &tx->insn.src[1])
2553 };
2554 assert(tx->insn.src[1].idx >= 0 &&
2555 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2556 target = tx->sampler_targets[tx->insn.src[1].idx];
2557
2558 switch (tx->insn.flags) {
2559 case 0:
2560 ureg_TEX(ureg, dst, target, src[0], src[1]);
2561 break;
2562 case NINED3DSI_TEXLD_PROJECT:
2563 ureg_TXP(ureg, dst, target, src[0], src[1]);
2564 break;
2565 case NINED3DSI_TEXLD_BIAS:
2566 ureg_TXB(ureg, dst, target, src[0], src[1]);
2567 break;
2568 default:
2569 assert(0);
2570 return D3DERR_INVALIDCALL;
2571 }
2572 return D3D_OK;
2573 }
2574
2575 DECL_SPECIAL(TEXLD_14)
2576 {
2577 struct ureg_program *ureg = tx->ureg;
2578 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2579 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2580 const unsigned s = tx->insn.dst[0].idx;
2581 const unsigned t = ps1x_sampler_type(tx->info, s);
2582
2583 tx->info->sampler_mask |= 1 << s;
2584 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2585
2586 return D3D_OK;
2587 }
2588
2589 DECL_SPECIAL(TEX)
2590 {
2591 struct ureg_program *ureg = tx->ureg;
2592 const unsigned s = tx->insn.dst[0].idx;
2593 const unsigned t = ps1x_sampler_type(tx->info, s);
2594 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2595 struct ureg_src src[2];
2596
2597 tx_texcoord_alloc(tx, s);
2598
2599 src[0] = tx->regs.vT[s];
2600 src[1] = ureg_DECL_sampler(ureg, s);
2601 tx->info->sampler_mask |= 1 << s;
2602
2603 ureg_TEX(ureg, dst, t, src[0], src[1]);
2604
2605 return D3D_OK;
2606 }
2607
2608 DECL_SPECIAL(TEXLDD)
2609 {
2610 unsigned target;
2611 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2612 struct ureg_src src[4] = {
2613 tx_src_param(tx, &tx->insn.src[0]),
2614 tx_src_param(tx, &tx->insn.src[1]),
2615 tx_src_param(tx, &tx->insn.src[2]),
2616 tx_src_param(tx, &tx->insn.src[3])
2617 };
2618 assert(tx->insn.src[1].idx >= 0 &&
2619 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2620 target = tx->sampler_targets[tx->insn.src[1].idx];
2621
2622 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2623 return D3D_OK;
2624 }
2625
2626 DECL_SPECIAL(TEXLDL)
2627 {
2628 unsigned target;
2629 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2630 struct ureg_src src[2] = {
2631 tx_src_param(tx, &tx->insn.src[0]),
2632 tx_src_param(tx, &tx->insn.src[1])
2633 };
2634 assert(tx->insn.src[1].idx >= 0 &&
2635 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2636 target = tx->sampler_targets[tx->insn.src[1].idx];
2637
2638 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2639 return D3D_OK;
2640 }
2641
2642 DECL_SPECIAL(SETP)
2643 {
2644 STUB(D3DERR_INVALIDCALL);
2645 }
2646
2647 DECL_SPECIAL(BREAKP)
2648 {
2649 STUB(D3DERR_INVALIDCALL);
2650 }
2651
2652 DECL_SPECIAL(PHASE)
2653 {
2654 return D3D_OK; /* we don't care about phase */
2655 }
2656
2657 DECL_SPECIAL(COMMENT)
2658 {
2659 return D3D_OK; /* nothing to do */
2660 }
2661
2662
2663 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2664 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2665
2666 struct sm1_op_info inst_table[] =
2667 {
2668 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2669 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2670 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2671 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2672 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2673 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2674 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2675 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2676 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2677 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2678 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2679 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2680 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2681 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2682 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2683 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2684 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2685 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2686 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2687 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2688 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2689
2690 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2691 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2692 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2693 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2694 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2695
2696 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2697 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2698 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2699 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2700 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2701 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2702
2703 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2704
2705 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2706 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2707 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2708 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2709 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2710
2711 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2712 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2713
2714 /* More flow control */
2715 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2716 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2717 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2718 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2719 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2720 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2721 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2722 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2723 /* we don't write to the address register, but a normal register (copied
2724 * when needed to the address register), thus we don't use ARR */
2725 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2726
2727 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2728 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2729
2730 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2731 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2732 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2733 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2734 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2735 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2736 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2737 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2738 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2739 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2740 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2741 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2742 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2743 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2744 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2745 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2746
2747 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2748 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2749 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2750 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2751
2752 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2753
2754 /* More tex stuff */
2755 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2756 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2757 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2758 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2759 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2760 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2761
2762 /* Misc */
2763 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2764 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2765 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2766 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2767 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2768 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2769 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2770 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2771 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2772 };
2773
2774 struct sm1_op_info inst_phase =
2775 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2776
2777 struct sm1_op_info inst_comment =
2778 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2779
2780 static void
2781 create_op_info_map(struct shader_translator *tx)
2782 {
2783 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2784 unsigned i;
2785
2786 for (i = 0; i < Elements(tx->op_info_map); ++i)
2787 tx->op_info_map[i] = -1;
2788
2789 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2790 for (i = 0; i < Elements(inst_table); ++i) {
2791 assert(inst_table[i].sio < Elements(tx->op_info_map));
2792 if (inst_table[i].vert_version.min <= version &&
2793 inst_table[i].vert_version.max >= version)
2794 tx->op_info_map[inst_table[i].sio] = i;
2795 }
2796 } else {
2797 for (i = 0; i < Elements(inst_table); ++i) {
2798 assert(inst_table[i].sio < Elements(tx->op_info_map));
2799 if (inst_table[i].frag_version.min <= version &&
2800 inst_table[i].frag_version.max >= version)
2801 tx->op_info_map[inst_table[i].sio] = i;
2802 }
2803 }
2804 }
2805
2806 static inline HRESULT
2807 NineTranslateInstruction_Generic(struct shader_translator *tx)
2808 {
2809 struct ureg_dst dst[1];
2810 struct ureg_src src[4];
2811 unsigned i;
2812
2813 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2814 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2815 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2816 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2817
2818 ureg_insn(tx->ureg, tx->insn.info->opcode,
2819 dst, tx->insn.ndst,
2820 src, tx->insn.nsrc);
2821 return D3D_OK;
2822 }
2823
2824 static inline DWORD
2825 TOKEN_PEEK(struct shader_translator *tx)
2826 {
2827 return *(tx->parse);
2828 }
2829
2830 static inline DWORD
2831 TOKEN_NEXT(struct shader_translator *tx)
2832 {
2833 return *(tx->parse)++;
2834 }
2835
2836 static inline void
2837 TOKEN_JUMP(struct shader_translator *tx)
2838 {
2839 if (tx->parse_next && tx->parse != tx->parse_next) {
2840 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2841 tx->parse = tx->parse_next;
2842 }
2843 }
2844
2845 static inline boolean
2846 sm1_parse_eof(struct shader_translator *tx)
2847 {
2848 return TOKEN_PEEK(tx) == NINED3DSP_END;
2849 }
2850
2851 static void
2852 sm1_read_version(struct shader_translator *tx)
2853 {
2854 const DWORD tok = TOKEN_NEXT(tx);
2855
2856 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2857 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2858
2859 switch (tok >> 16) {
2860 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2861 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2862 default:
2863 DBG("Invalid shader type: %x\n", tok);
2864 tx->processor = ~0;
2865 break;
2866 }
2867 }
2868
2869 /* This is just to check if we parsed the instruction properly. */
2870 static void
2871 sm1_parse_get_skip(struct shader_translator *tx)
2872 {
2873 const DWORD tok = TOKEN_PEEK(tx);
2874
2875 if (tx->version.major >= 2) {
2876 tx->parse_next = tx->parse + 1 /* this */ +
2877 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2878 } else {
2879 tx->parse_next = NULL; /* TODO: determine from param count */
2880 }
2881 }
2882
2883 static void
2884 sm1_print_comment(const char *comment, UINT size)
2885 {
2886 if (!size)
2887 return;
2888 /* TODO */
2889 }
2890
2891 static void
2892 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2893 {
2894 DWORD tok = TOKEN_PEEK(tx);
2895
2896 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2897 {
2898 const char *comment = "";
2899 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2900 tx->parse += size + 1;
2901
2902 if (print)
2903 sm1_print_comment(comment, size);
2904
2905 tok = TOKEN_PEEK(tx);
2906 }
2907 }
2908
2909 static void
2910 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2911 {
2912 *reg = TOKEN_NEXT(tx);
2913
2914 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2915 {
2916 if (tx->version.major < 2)
2917 *rel = (1 << 31) |
2918 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2919 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2920 D3DSP_NOSWIZZLE;
2921 else
2922 *rel = TOKEN_NEXT(tx);
2923 }
2924 }
2925
2926 static void
2927 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2928 {
2929 uint8_t shift;
2930 dst->file =
2931 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2932 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2933 dst->type = TGSI_RETURN_TYPE_FLOAT;
2934 dst->idx = tok & D3DSP_REGNUM_MASK;
2935 dst->rel = NULL;
2936 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2937 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2938 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2939 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2940 }
2941
2942 static void
2943 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2944 {
2945 src->file =
2946 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2947 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2948 src->type = TGSI_RETURN_TYPE_FLOAT;
2949 src->idx = tok & D3DSP_REGNUM_MASK;
2950 src->rel = NULL;
2951 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2952 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2953
2954 switch (src->file) {
2955 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2956 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2957 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2958 default:
2959 break;
2960 }
2961 }
2962
2963 static void
2964 sm1_parse_immediate(struct shader_translator *tx,
2965 struct sm1_src_param *imm)
2966 {
2967 imm->file = NINED3DSPR_IMMEDIATE;
2968 imm->idx = INT_MIN;
2969 imm->rel = NULL;
2970 imm->swizzle = NINED3DSP_NOSWIZZLE;
2971 imm->mod = 0;
2972 switch (tx->insn.opcode) {
2973 case D3DSIO_DEF:
2974 imm->type = NINED3DSPTYPE_FLOAT4;
2975 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2976 tx->parse += 4;
2977 break;
2978 case D3DSIO_DEFI:
2979 imm->type = NINED3DSPTYPE_INT4;
2980 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2981 tx->parse += 4;
2982 break;
2983 case D3DSIO_DEFB:
2984 imm->type = NINED3DSPTYPE_BOOL;
2985 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2986 tx->parse += 1;
2987 break;
2988 default:
2989 assert(0);
2990 break;
2991 }
2992 }
2993
2994 static void
2995 sm1_read_dst_param(struct shader_translator *tx,
2996 struct sm1_dst_param *dst,
2997 struct sm1_src_param *rel)
2998 {
2999 DWORD tok_dst, tok_rel = 0;
3000
3001 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3002 sm1_parse_dst_param(dst, tok_dst);
3003 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3004 sm1_parse_src_param(rel, tok_rel);
3005 dst->rel = rel;
3006 }
3007 }
3008
3009 static void
3010 sm1_read_src_param(struct shader_translator *tx,
3011 struct sm1_src_param *src,
3012 struct sm1_src_param *rel)
3013 {
3014 DWORD tok_src, tok_rel = 0;
3015
3016 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3017 sm1_parse_src_param(src, tok_src);
3018 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3019 assert(rel);
3020 sm1_parse_src_param(rel, tok_rel);
3021 src->rel = rel;
3022 }
3023 }
3024
3025 static void
3026 sm1_read_semantic(struct shader_translator *tx,
3027 struct sm1_semantic *sem)
3028 {
3029 const DWORD tok_usg = TOKEN_NEXT(tx);
3030 const DWORD tok_dst = TOKEN_NEXT(tx);
3031
3032 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3033 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3034 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3035
3036 sm1_parse_dst_param(&sem->reg, tok_dst);
3037 }
3038
3039 static void
3040 sm1_parse_instruction(struct shader_translator *tx)
3041 {
3042 struct sm1_instruction *insn = &tx->insn;
3043 DWORD tok;
3044 struct sm1_op_info *info = NULL;
3045 unsigned i;
3046
3047 sm1_parse_comments(tx, TRUE);
3048 sm1_parse_get_skip(tx);
3049
3050 tok = TOKEN_NEXT(tx);
3051
3052 insn->opcode = tok & D3DSI_OPCODE_MASK;
3053 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3054 insn->coissue = !!(tok & D3DSI_COISSUE);
3055 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3056
3057 if (insn->opcode < Elements(tx->op_info_map)) {
3058 int k = tx->op_info_map[insn->opcode];
3059 if (k >= 0) {
3060 assert(k < Elements(inst_table));
3061 info = &inst_table[k];
3062 }
3063 } else {
3064 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3065 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3066 }
3067 if (!info) {
3068 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3069 TOKEN_JUMP(tx);
3070 return;
3071 }
3072 insn->info = info;
3073 insn->ndst = info->ndst;
3074 insn->nsrc = info->nsrc;
3075
3076 assert(!insn->predicated && "TODO: predicated instructions");
3077
3078 /* check version */
3079 {
3080 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3081 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3082 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3083 if (ver < min || ver > max) {
3084 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3085 min, ver, max);
3086 return;
3087 }
3088 }
3089
3090 for (i = 0; i < insn->ndst; ++i)
3091 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3092 if (insn->predicated)
3093 sm1_read_src_param(tx, &insn->pred, NULL);
3094 for (i = 0; i < insn->nsrc; ++i)
3095 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3096
3097 /* parse here so we can dump them before processing */
3098 if (insn->opcode == D3DSIO_DEF ||
3099 insn->opcode == D3DSIO_DEFI ||
3100 insn->opcode == D3DSIO_DEFB)
3101 sm1_parse_immediate(tx, &tx->insn.src[0]);
3102
3103 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3104 sm1_instruction_check(insn);
3105
3106 if (info->handler)
3107 info->handler(tx);
3108 else
3109 NineTranslateInstruction_Generic(tx);
3110 tx_apply_dst0_modifiers(tx);
3111
3112 tx->num_scratch = 0; /* reset */
3113
3114 TOKEN_JUMP(tx);
3115 }
3116
3117 static void
3118 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3119 {
3120 unsigned i;
3121
3122 tx->info = info;
3123
3124 tx->byte_code = info->byte_code;
3125 tx->parse = info->byte_code;
3126
3127 for (i = 0; i < Elements(info->input_map); ++i)
3128 info->input_map[i] = NINE_DECLUSAGE_NONE;
3129 info->num_inputs = 0;
3130
3131 info->position_t = FALSE;
3132 info->point_size = FALSE;
3133
3134 tx->info->const_float_slots = 0;
3135 tx->info->const_int_slots = 0;
3136 tx->info->const_bool_slots = 0;
3137
3138 info->sampler_mask = 0x0;
3139 info->rt_mask = 0x0;
3140
3141 info->lconstf.data = NULL;
3142 info->lconstf.ranges = NULL;
3143
3144 info->bumpenvmat_needed = 0;
3145
3146 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3147 tx->regs.rL[i] = ureg_dst_undef();
3148 }
3149 tx->regs.address = ureg_dst_undef();
3150 tx->regs.a0 = ureg_dst_undef();
3151 tx->regs.p = ureg_dst_undef();
3152 tx->regs.oDepth = ureg_dst_undef();
3153 tx->regs.vPos = ureg_src_undef();
3154 tx->regs.vFace = ureg_src_undef();
3155 for (i = 0; i < Elements(tx->regs.o); ++i)
3156 tx->regs.o[i] = ureg_dst_undef();
3157 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3158 tx->regs.oCol[i] = ureg_dst_undef();
3159 for (i = 0; i < Elements(tx->regs.vC); ++i)
3160 tx->regs.vC[i] = ureg_src_undef();
3161 for (i = 0; i < Elements(tx->regs.vT); ++i)
3162 tx->regs.vT[i] = ureg_src_undef();
3163
3164 for (i = 0; i < Elements(tx->lconsti); ++i)
3165 tx->lconsti[i].idx = -1;
3166 for (i = 0; i < Elements(tx->lconstb); ++i)
3167 tx->lconstb[i].idx = -1;
3168
3169 sm1_read_version(tx);
3170
3171 info->version = (tx->version.major << 4) | tx->version.minor;
3172
3173 create_op_info_map(tx);
3174 }
3175
3176 static void
3177 tx_dtor(struct shader_translator *tx)
3178 {
3179 if (tx->num_inst_labels)
3180 FREE(tx->inst_labels);
3181 FREE(tx->lconstf);
3182 FREE(tx->regs.r);
3183 FREE(tx);
3184 }
3185
3186 static inline unsigned
3187 tgsi_processor_from_type(unsigned shader_type)
3188 {
3189 switch (shader_type) {
3190 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3191 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3192 default:
3193 return ~0;
3194 }
3195 }
3196
3197 static void
3198 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3199 {
3200 struct ureg_program *ureg = tx->ureg;
3201 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3202
3203 /* TODO: fog computation */
3204 ureg_MOV(ureg, oCol0, src_col);
3205 }
3206
3207 #define GET_CAP(n) device->screen->get_param( \
3208 device->screen, PIPE_CAP_##n)
3209 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3210 device->screen, info->type, PIPE_SHADER_CAP_##n)
3211
3212 HRESULT
3213 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3214 {
3215 struct shader_translator *tx;
3216 HRESULT hr = D3D_OK;
3217 const unsigned processor = tgsi_processor_from_type(info->type);
3218 unsigned s, slot_max;
3219 unsigned max_const_f;
3220
3221 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3222
3223 tx = CALLOC_STRUCT(shader_translator);
3224 if (!tx)
3225 return E_OUTOFMEMORY;
3226 tx_ctor(tx, info);
3227
3228 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3229 hr = D3DERR_INVALIDCALL;
3230 DBG("Unsupported shader version: %u.%u !\n",
3231 tx->version.major, tx->version.minor);
3232 goto out;
3233 }
3234 if (tx->processor != processor) {
3235 hr = D3DERR_INVALIDCALL;
3236 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3237 goto out;
3238 }
3239 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3240 tx->version.major, tx->version.minor);
3241
3242 tx->ureg = ureg_create(processor);
3243 if (!tx->ureg) {
3244 hr = E_OUTOFMEMORY;
3245 goto out;
3246 }
3247
3248 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3249 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3250 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3251 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3252 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3253 tx->texcoord_sn = tx->want_texcoord ?
3254 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3255
3256 /* VS must always write position. Declare it here to make it the 1st output.
3257 * (Some drivers like nv50 are buggy and rely on that.)
3258 */
3259 if (IS_VS) {
3260 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3261 } else {
3262 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3263 if (!tx->shift_wpos)
3264 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3265 }
3266
3267 while (!sm1_parse_eof(tx) && !tx->failure)
3268 sm1_parse_instruction(tx);
3269 tx->parse++; /* for byte_size */
3270
3271 if (tx->failure) {
3272 ERR("Encountered buggy shader\n");
3273 ureg_destroy(tx->ureg);
3274 hr = D3DERR_INVALIDCALL;
3275 goto out;
3276 }
3277
3278 if (IS_PS && tx->version.major < 3) {
3279 if (tx->version.major < 2) {
3280 assert(tx->num_temp); /* there must be color output */
3281 info->rt_mask |= 0x1;
3282 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3283 } else {
3284 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3285 }
3286 }
3287
3288 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog)) {
3289 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3290 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3291 }
3292
3293 /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
3294 if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
3295 struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
3296 ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
3297 ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
3298 }
3299
3300 if (info->position_t)
3301 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3302
3303 ureg_END(tx->ureg);
3304
3305 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3306 info->point_size = TRUE;
3307
3308 /* record local constants */
3309 if (tx->num_lconstf && tx->indirect_const_access) {
3310 struct nine_range *ranges;
3311 float *data;
3312 int *indices;
3313 unsigned i, k, n;
3314
3315 hr = E_OUTOFMEMORY;
3316
3317 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3318 if (!data)
3319 goto out;
3320 info->lconstf.data = data;
3321
3322 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3323 if (!indices)
3324 goto out;
3325
3326 /* lazy sort, num_lconstf should be small */
3327 for (n = 0; n < tx->num_lconstf; ++n) {
3328 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3329 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3330 k = i;
3331 }
3332 indices[n] = tx->lconstf[k].idx;
3333 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3334 tx->lconstf[k].idx = INT_MAX;
3335 }
3336
3337 /* count ranges */
3338 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3339 if (indices[i] != indices[i - 1] + 1)
3340 ++n;
3341 ranges = MALLOC(n * sizeof(ranges[0]));
3342 if (!ranges) {
3343 FREE(indices);
3344 goto out;
3345 }
3346 info->lconstf.ranges = ranges;
3347
3348 k = 0;
3349 ranges[k].bgn = indices[0];
3350 for (i = 1; i < tx->num_lconstf; ++i) {
3351 if (indices[i] != indices[i - 1] + 1) {
3352 ranges[k].next = &ranges[k + 1];
3353 ranges[k].end = indices[i - 1] + 1;
3354 ++k;
3355 ranges[k].bgn = indices[i];
3356 }
3357 }
3358 ranges[k].end = indices[i - 1] + 1;
3359 ranges[k].next = NULL;
3360 assert(n == (k + 1));
3361
3362 FREE(indices);
3363 hr = D3D_OK;
3364 }
3365
3366 /* r500 */
3367 if (info->const_float_slots > device->max_vs_const_f &&
3368 (info->const_int_slots || info->const_bool_slots))
3369 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3370
3371
3372 if (tx->indirect_const_access) /* vs only */
3373 info->const_float_slots = device->max_vs_const_f;
3374
3375 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3376 slot_max = info->const_bool_slots > 0 ?
3377 max_const_f + NINE_MAX_CONST_I
3378 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3379 info->const_int_slots > 0 ?
3380 max_const_f + info->const_int_slots :
3381 info->const_float_slots;
3382
3383 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3384
3385 for (s = 0; s < slot_max; s++)
3386 ureg_DECL_constant(tx->ureg, s);
3387
3388 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3389 unsigned count;
3390 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3391 tgsi_dump(toks, 0);
3392 ureg_free_tokens(toks);
3393 }
3394
3395 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3396 if (!info->cso) {
3397 hr = D3DERR_DRIVERINTERNALERROR;
3398 FREE(info->lconstf.data);
3399 FREE(info->lconstf.ranges);
3400 goto out;
3401 }
3402
3403 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3404 out:
3405 tx_dtor(tx);
3406 return hr;
3407 }