nine: Don't use the otherwise-dead SFL opcode in an unreachable path.
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #if 1
39 #define NINE_TGSI_LAZY_DEVS /* don't use TGSI_OPCODE_BREAKC */
40 #endif
41 #define NINE_TGSI_LAZY_R600 /* don't use TGSI_OPCODE_DP2A */
42
43 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
44
45
46 struct shader_translator;
47
48 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
49
50 static INLINE const char *d3dsio_to_string(unsigned opcode);
51
52
53 #define NINED3D_SM1_VS 0xfffe
54 #define NINED3D_SM1_PS 0xffff
55
56 #define NINE_MAX_COND_DEPTH 64
57 #define NINE_MAX_LOOP_DEPTH 64
58
59 #define NINED3DSP_END 0x0000ffff
60
61 #define NINED3DSPTYPE_FLOAT4 0
62 #define NINED3DSPTYPE_INT4 1
63 #define NINED3DSPTYPE_BOOL 2
64
65 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
66
67 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
68 #define NINED3DSP_WRITEMASK_SHIFT 16
69
70 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
71
72 #define NINED3DSHADER_REL_OP_GT 1
73 #define NINED3DSHADER_REL_OP_EQ 2
74 #define NINED3DSHADER_REL_OP_GE 3
75 #define NINED3DSHADER_REL_OP_LT 4
76 #define NINED3DSHADER_REL_OP_NE 5
77 #define NINED3DSHADER_REL_OP_LE 6
78
79 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
80 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
81
82 #define NINED3DSI_TEXLD_PROJECT 0x1
83 #define NINED3DSI_TEXLD_BIAS 0x2
84
85 #define NINED3DSP_WRITEMASK_0 0x1
86 #define NINED3DSP_WRITEMASK_1 0x2
87 #define NINED3DSP_WRITEMASK_2 0x4
88 #define NINED3DSP_WRITEMASK_3 0x8
89 #define NINED3DSP_WRITEMASK_ALL 0xf
90
91 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
92
93 #define NINE_SWIZZLE4(x,y,z,w) \
94 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
95
96 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
97 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
99
100 /*
101 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
102 * BIAS <= PS 1.4 (x-0.5)
103 * BIASNEG <= PS 1.4 (-(x-0.5))
104 * SIGN <= PS 1.4 (2(x-0.5))
105 * SIGNNEG <= PS 1.4 (-2(x-0.5))
106 * COMP <= PS 1.4 (1-x)
107 * X2 = PS 1.4 (2x)
108 * X2NEG = PS 1.4 (-2x)
109 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
110 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
111 * ABS >= SM 3.0 (abs(x))
112 * ABSNEG >= SM 3.0 (-abs(x))
113 * NOT >= SM 2.0 pedication only
114 */
115 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
129
130 static const char *sm1_mod_str[] =
131 {
132 [NINED3DSPSM_NONE] = "",
133 [NINED3DSPSM_NEG] = "-",
134 [NINED3DSPSM_BIAS] = "bias",
135 [NINED3DSPSM_BIASNEG] = "biasneg",
136 [NINED3DSPSM_SIGN] = "sign",
137 [NINED3DSPSM_SIGNNEG] = "signneg",
138 [NINED3DSPSM_COMP] = "comp",
139 [NINED3DSPSM_X2] = "x2",
140 [NINED3DSPSM_X2NEG] = "x2neg",
141 [NINED3DSPSM_DZ] = "dz",
142 [NINED3DSPSM_DW] = "dw",
143 [NINED3DSPSM_ABS] = "abs",
144 [NINED3DSPSM_ABSNEG] = "-abs",
145 [NINED3DSPSM_NOT] = "not"
146 };
147
148 static void
149 sm1_dump_writemask(BYTE mask)
150 {
151 if (mask & 1) DUMP("x"); else DUMP("_");
152 if (mask & 2) DUMP("y"); else DUMP("_");
153 if (mask & 4) DUMP("z"); else DUMP("_");
154 if (mask & 8) DUMP("w"); else DUMP("_");
155 }
156
157 static void
158 sm1_dump_swizzle(BYTE s)
159 {
160 char c[4] = { 'x', 'y', 'z', 'w' };
161 DUMP("%c%c%c%c",
162 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
163 }
164
165 static const char sm1_file_char[] =
166 {
167 [D3DSPR_TEMP] = 'r',
168 [D3DSPR_INPUT] = 'v',
169 [D3DSPR_CONST] = 'c',
170 [D3DSPR_ADDR] = 'A',
171 [D3DSPR_RASTOUT] = 'R',
172 [D3DSPR_ATTROUT] = 'D',
173 [D3DSPR_OUTPUT] = 'o',
174 [D3DSPR_CONSTINT] = 'I',
175 [D3DSPR_COLOROUT] = 'C',
176 [D3DSPR_DEPTHOUT] = 'D',
177 [D3DSPR_SAMPLER] = 's',
178 [D3DSPR_CONST2] = 'c',
179 [D3DSPR_CONST3] = 'c',
180 [D3DSPR_CONST4] = 'c',
181 [D3DSPR_CONSTBOOL] = 'B',
182 [D3DSPR_LOOP] = 'L',
183 [D3DSPR_TEMPFLOAT16] = 'h',
184 [D3DSPR_MISCTYPE] = 'M',
185 [D3DSPR_LABEL] = 'X',
186 [D3DSPR_PREDICATE] = 'p'
187 };
188
189 static void
190 sm1_dump_reg(BYTE file, INT index)
191 {
192 switch (file) {
193 case D3DSPR_LOOP:
194 DUMP("aL");
195 break;
196 case D3DSPR_COLOROUT:
197 DUMP("oC%i", index);
198 break;
199 case D3DSPR_DEPTHOUT:
200 DUMP("oDepth");
201 break;
202 case D3DSPR_RASTOUT:
203 DUMP("oRast%i", index);
204 break;
205 case D3DSPR_CONSTINT:
206 DUMP("iconst[%i]", index);
207 break;
208 case D3DSPR_CONSTBOOL:
209 DUMP("bconst[%i]", index);
210 break;
211 default:
212 DUMP("%c%i", sm1_file_char[file], index);
213 break;
214 }
215 }
216
217 struct sm1_src_param
218 {
219 INT idx;
220 struct sm1_src_param *rel;
221 BYTE file;
222 BYTE swizzle;
223 BYTE mod;
224 BYTE type;
225 union {
226 DWORD d[4];
227 float f[4];
228 int i[4];
229 BOOL b;
230 } imm;
231 };
232 static void
233 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
234
235 struct sm1_dst_param
236 {
237 INT idx;
238 struct sm1_src_param *rel;
239 BYTE file;
240 BYTE mask;
241 BYTE mod;
242 BYTE shift; /* sint4 */
243 BYTE type;
244 };
245
246 static INLINE void
247 assert_replicate_swizzle(const struct ureg_src *reg)
248 {
249 assert(reg->SwizzleY == reg->SwizzleX &&
250 reg->SwizzleZ == reg->SwizzleX &&
251 reg->SwizzleW == reg->SwizzleX);
252 }
253
254 static void
255 sm1_dump_immediate(const struct sm1_src_param *param)
256 {
257 switch (param->type) {
258 case NINED3DSPTYPE_FLOAT4:
259 DUMP("{ %f %f %f %f }",
260 param->imm.f[0], param->imm.f[1],
261 param->imm.f[2], param->imm.f[3]);
262 break;
263 case NINED3DSPTYPE_INT4:
264 DUMP("{ %i %i %i %i }",
265 param->imm.i[0], param->imm.i[1],
266 param->imm.i[2], param->imm.i[3]);
267 break;
268 case NINED3DSPTYPE_BOOL:
269 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
270 break;
271 default:
272 assert(0);
273 break;
274 }
275 }
276
277 static void
278 sm1_dump_src_param(const struct sm1_src_param *param)
279 {
280 if (param->file == NINED3DSPR_IMMEDIATE) {
281 assert(!param->mod &&
282 !param->rel &&
283 param->swizzle == NINED3DSP_NOSWIZZLE);
284 sm1_dump_immediate(param);
285 return;
286 }
287
288 if (param->mod)
289 DUMP("%s(", sm1_mod_str[param->mod]);
290 if (param->rel) {
291 DUMP("%c[", sm1_file_char[param->file]);
292 sm1_dump_src_param(param->rel);
293 DUMP("+%i]", param->idx);
294 } else {
295 sm1_dump_reg(param->file, param->idx);
296 }
297 if (param->mod)
298 DUMP(")");
299 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
300 DUMP(".");
301 sm1_dump_swizzle(param->swizzle);
302 }
303 }
304
305 static void
306 sm1_dump_dst_param(const struct sm1_dst_param *param)
307 {
308 if (param->mod & NINED3DSPDM_SATURATE)
309 DUMP("sat ");
310 if (param->mod & NINED3DSPDM_PARTIALP)
311 DUMP("pp ");
312 if (param->mod & NINED3DSPDM_CENTROID)
313 DUMP("centroid ");
314 if (param->shift < 0)
315 DUMP("/%u ", 1 << -param->shift);
316 if (param->shift > 0)
317 DUMP("*%u ", 1 << param->shift);
318
319 if (param->rel) {
320 DUMP("%c[", sm1_file_char[param->file]);
321 sm1_dump_src_param(param->rel);
322 DUMP("+%i]", param->idx);
323 } else {
324 sm1_dump_reg(param->file, param->idx);
325 }
326 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
327 DUMP(".");
328 sm1_dump_writemask(param->mask);
329 }
330 }
331
332 struct sm1_semantic
333 {
334 struct sm1_dst_param reg;
335 BYTE sampler_type;
336 D3DDECLUSAGE usage;
337 BYTE usage_idx;
338 };
339
340 struct sm1_op_info
341 {
342 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
343 * should be ignored completely */
344 unsigned sio;
345 unsigned opcode; /* TGSI_OPCODE_x */
346
347 /* versions are still set even handler is set */
348 struct {
349 unsigned min;
350 unsigned max;
351 } vert_version, frag_version;
352
353 /* number of regs parsed outside of special handler */
354 unsigned ndst;
355 unsigned nsrc;
356
357 /* some instructions don't map perfectly, so use a special handler */
358 translate_instruction_func handler;
359 };
360
361 struct sm1_instruction
362 {
363 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
364 BYTE flags;
365 BOOL coissue;
366 BOOL predicated;
367 BYTE ndst;
368 BYTE nsrc;
369 struct sm1_src_param src[4];
370 struct sm1_src_param src_rel[4];
371 struct sm1_src_param pred;
372 struct sm1_src_param dst_rel[1];
373 struct sm1_dst_param dst[1];
374
375 struct sm1_op_info *info;
376 };
377
378 static void
379 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
380 {
381 unsigned i;
382
383 /* no info stored for these: */
384 if (insn->opcode == D3DSIO_DCL)
385 return;
386 for (i = 0; i < indent; ++i)
387 DUMP(" ");
388
389 if (insn->predicated) {
390 DUMP("@");
391 sm1_dump_src_param(&insn->pred);
392 DUMP(" ");
393 }
394 DUMP("%s", d3dsio_to_string(insn->opcode));
395 if (insn->flags) {
396 switch (insn->opcode) {
397 case D3DSIO_TEX:
398 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
399 break;
400 default:
401 DUMP("_%x", insn->flags);
402 break;
403 }
404 }
405 if (insn->coissue)
406 DUMP("_co");
407 DUMP(" ");
408
409 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
410 sm1_dump_dst_param(&insn->dst[i]);
411 DUMP(" ");
412 }
413
414 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
415 sm1_dump_src_param(&insn->src[i]);
416 DUMP(" ");
417 }
418 if (insn->opcode == D3DSIO_DEF ||
419 insn->opcode == D3DSIO_DEFI ||
420 insn->opcode == D3DSIO_DEFB)
421 sm1_dump_immediate(&insn->src[0]);
422
423 DUMP("\n");
424 }
425
426 struct sm1_local_const
427 {
428 INT idx;
429 struct ureg_src reg;
430 union {
431 boolean b;
432 float f[4];
433 int32_t i[4];
434 } imm;
435 };
436
437 struct shader_translator
438 {
439 const DWORD *byte_code;
440 const DWORD *parse;
441 const DWORD *parse_next;
442
443 struct ureg_program *ureg;
444
445 /* shader version */
446 struct {
447 BYTE major;
448 BYTE minor;
449 } version;
450 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
451
452 boolean native_integers;
453 boolean inline_subroutines;
454 boolean lower_preds;
455 boolean want_texcoord;
456 boolean shift_wpos;
457 unsigned texcoord_sn;
458
459 struct sm1_instruction insn; /* current instruction */
460
461 struct {
462 struct ureg_dst *r;
463 struct ureg_dst oPos;
464 struct ureg_dst oFog;
465 struct ureg_dst oPts;
466 struct ureg_dst oCol[4];
467 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
468 struct ureg_dst oDepth;
469 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
470 struct ureg_src vPos;
471 struct ureg_src vFace;
472 struct ureg_src s;
473 struct ureg_dst p;
474 struct ureg_dst a;
475 struct ureg_dst tS[8]; /* texture stage registers */
476 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
477 struct ureg_dst t[5]; /* scratch TEMPs */
478 struct ureg_src vC[2]; /* PS color in */
479 struct ureg_src vT[8]; /* PS texcoord in */
480 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
481 struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* loop ctr ADDR register */
482 } regs;
483 unsigned num_temp; /* Elements(regs.r) */
484 unsigned num_scratch;
485 unsigned loop_depth;
486 unsigned loop_depth_max;
487 unsigned cond_depth;
488 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
489 unsigned cond_labels[NINE_MAX_COND_DEPTH];
490
491 unsigned *inst_labels; /* LABEL op */
492 unsigned num_inst_labels;
493
494 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
495
496 struct sm1_local_const *lconstf;
497 unsigned num_lconstf;
498 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
499 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
500
501 boolean indirect_const_access;
502
503 struct nine_shader_info *info;
504
505 int16_t op_info_map[D3DSIO_BREAKP + 1];
506 };
507
508 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
509 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
510
511 static void
512 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
513
514 static void
515 sm1_instruction_check(const struct sm1_instruction *insn)
516 {
517 if (insn->opcode == D3DSIO_CRS)
518 {
519 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
520 {
521 DBG("CRS.mask.w\n");
522 }
523 }
524 }
525
526 static boolean
527 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
528 {
529 INT i;
530 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
531 for (i = 0; i < tx->num_lconstf; ++i) {
532 if (tx->lconstf[i].idx == index) {
533 *src = tx->lconstf[i].reg;
534 return TRUE;
535 }
536 }
537 return FALSE;
538 }
539 static boolean
540 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
541 {
542 assert(index >= 0 && index < NINE_MAX_CONST_I);
543 if (tx->lconsti[index].idx == index)
544 *src = tx->lconsti[index].reg;
545 return tx->lconsti[index].idx == index;
546 }
547 static boolean
548 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
549 {
550 assert(index >= 0 && index < NINE_MAX_CONST_B);
551 if (tx->lconstb[index].idx == index)
552 *src = tx->lconstb[index].reg;
553 return tx->lconstb[index].idx == index;
554 }
555
556 static void
557 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
558 {
559 unsigned n;
560
561 /* Anno1404 sets out of range constants. */
562 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
563 if (index >= NINE_MAX_CONST_F)
564 WARN("lconstf index %i too high, indirect access won't work\n", index);
565
566 for (n = 0; n < tx->num_lconstf; ++n)
567 if (tx->lconstf[n].idx == index)
568 break;
569 if (n == tx->num_lconstf) {
570 if ((n % 8) == 0) {
571 tx->lconstf = REALLOC(tx->lconstf,
572 (n + 0) * sizeof(tx->lconstf[0]),
573 (n + 8) * sizeof(tx->lconstf[0]));
574 assert(tx->lconstf);
575 }
576 tx->num_lconstf++;
577 }
578 tx->lconstf[n].idx = index;
579 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
580
581 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
582 }
583 static void
584 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
585 {
586 assert(index >= 0 && index < NINE_MAX_CONST_I);
587 tx->lconsti[index].idx = index;
588 tx->lconsti[index].reg = tx->native_integers ?
589 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
590 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
591 }
592 static void
593 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
594 {
595 assert(index >= 0 && index < NINE_MAX_CONST_B);
596 tx->lconstb[index].idx = index;
597 tx->lconstb[index].reg = tx->native_integers ?
598 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
599 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
600 }
601
602 static INLINE struct ureg_dst
603 tx_scratch(struct shader_translator *tx)
604 {
605 assert(tx->num_scratch < Elements(tx->regs.t));
606 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
607 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
608 return tx->regs.t[tx->num_scratch++];
609 }
610
611 static INLINE struct ureg_dst
612 tx_scratch_scalar(struct shader_translator *tx)
613 {
614 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
615 }
616
617 static INLINE struct ureg_src
618 tx_src_scalar(struct ureg_dst dst)
619 {
620 struct ureg_src src = ureg_src(dst);
621 int c = ffs(dst.WriteMask) - 1;
622 if (dst.WriteMask == (1 << c))
623 src = ureg_scalar(src, c);
624 return src;
625 }
626
627 /* Need to declare all constants if indirect addressing is used,
628 * otherwise we could scan the shader to determine the maximum.
629 * TODO: It doesn't really matter for nv50 so I won't do the scan,
630 * but radeon drivers might care, if they don't infer it from TGSI.
631 */
632 static void
633 tx_decl_constants(struct shader_translator *tx)
634 {
635 unsigned i, n = 0;
636
637 for (i = 0; i < NINE_MAX_CONST_F; ++i)
638 ureg_DECL_constant(tx->ureg, n++);
639 for (i = 0; i < NINE_MAX_CONST_I; ++i)
640 ureg_DECL_constant(tx->ureg, n++);
641 for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
642 ureg_DECL_constant(tx->ureg, n++);
643 }
644
645 static INLINE void
646 tx_temp_alloc(struct shader_translator *tx, INT idx)
647 {
648 assert(idx >= 0);
649 if (idx >= tx->num_temp) {
650 unsigned k = tx->num_temp;
651 unsigned n = idx + 1;
652 tx->regs.r = REALLOC(tx->regs.r,
653 k * sizeof(tx->regs.r[0]),
654 n * sizeof(tx->regs.r[0]));
655 for (; k < n; ++k)
656 tx->regs.r[k] = ureg_dst_undef();
657 tx->num_temp = n;
658 }
659 if (ureg_dst_is_undef(tx->regs.r[idx]))
660 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
661 }
662
663 static INLINE void
664 tx_addr_alloc(struct shader_translator *tx, INT idx)
665 {
666 assert(idx == 0);
667 if (ureg_dst_is_undef(tx->regs.a))
668 tx->regs.a = ureg_DECL_address(tx->ureg);
669 }
670
671 static INLINE void
672 tx_pred_alloc(struct shader_translator *tx, INT idx)
673 {
674 assert(idx == 0);
675 if (ureg_dst_is_undef(tx->regs.p))
676 tx->regs.p = ureg_DECL_predicate(tx->ureg);
677 }
678
679 static INLINE void
680 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
681 {
682 assert(IS_PS);
683 assert(idx >= 0 && idx < Elements(tx->regs.vT));
684 if (ureg_src_is_undef(tx->regs.vT[idx]))
685 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
686 TGSI_INTERPOLATE_PERSPECTIVE);
687 }
688
689 static INLINE unsigned *
690 tx_bgnloop(struct shader_translator *tx)
691 {
692 tx->loop_depth++;
693 if (tx->loop_depth_max < tx->loop_depth)
694 tx->loop_depth_max = tx->loop_depth;
695 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
696 return &tx->loop_labels[tx->loop_depth - 1];
697 }
698
699 static INLINE unsigned *
700 tx_endloop(struct shader_translator *tx)
701 {
702 assert(tx->loop_depth);
703 tx->loop_depth--;
704 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
705 ureg_get_instruction_number(tx->ureg));
706 return &tx->loop_labels[tx->loop_depth];
707 }
708
709 static struct ureg_dst
710 tx_get_loopctr(struct shader_translator *tx)
711 {
712 const unsigned l = tx->loop_depth - 1;
713
714 if (!tx->loop_depth)
715 {
716 DBG("loop counter requested outside of loop\n");
717 return ureg_dst_undef();
718 }
719
720 if (ureg_dst_is_undef(tx->regs.aL[l]))
721 {
722 struct ureg_dst rreg = ureg_DECL_local_temporary(tx->ureg);
723 struct ureg_dst areg = ureg_DECL_address(tx->ureg);
724 unsigned c;
725
726 assert(l % 4 == 0);
727 for (c = l; c < (l + 4) && c < Elements(tx->regs.aL); ++c) {
728 tx->regs.rL[c] = ureg_writemask(rreg, 1 << (c & 3));
729 tx->regs.aL[c] = ureg_writemask(areg, 1 << (c & 3));
730 }
731 }
732 return tx->regs.rL[l];
733 }
734 static struct ureg_dst
735 tx_get_aL(struct shader_translator *tx)
736 {
737 if (!ureg_dst_is_undef(tx_get_loopctr(tx)))
738 return tx->regs.aL[tx->loop_depth - 1];
739 return ureg_dst_undef();
740 }
741
742 static INLINE unsigned *
743 tx_cond(struct shader_translator *tx)
744 {
745 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
746 tx->cond_depth++;
747 return &tx->cond_labels[tx->cond_depth - 1];
748 }
749
750 static INLINE unsigned *
751 tx_elsecond(struct shader_translator *tx)
752 {
753 assert(tx->cond_depth);
754 return &tx->cond_labels[tx->cond_depth - 1];
755 }
756
757 static INLINE void
758 tx_endcond(struct shader_translator *tx)
759 {
760 assert(tx->cond_depth);
761 tx->cond_depth--;
762 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
763 ureg_get_instruction_number(tx->ureg));
764 }
765
766 static INLINE struct ureg_dst
767 nine_ureg_dst_register(unsigned file, int index)
768 {
769 return ureg_dst(ureg_src_register(file, index));
770 }
771
772 static struct ureg_src
773 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
774 {
775 struct ureg_program *ureg = tx->ureg;
776 struct ureg_src src;
777 struct ureg_dst tmp;
778
779 switch (param->file)
780 {
781 case D3DSPR_TEMP:
782 assert(!param->rel);
783 tx_temp_alloc(tx, param->idx);
784 src = ureg_src(tx->regs.r[param->idx]);
785 break;
786 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
787 case D3DSPR_ADDR:
788 assert(!param->rel);
789 if (IS_VS) {
790 tx_addr_alloc(tx, param->idx);
791 src = ureg_src(tx->regs.a);
792 } else {
793 if (tx->version.major < 2 && tx->version.minor < 4) {
794 /* no subroutines, so should be defined */
795 src = ureg_src(tx->regs.tS[param->idx]);
796 } else {
797 tx_texcoord_alloc(tx, param->idx);
798 src = tx->regs.vT[param->idx];
799 }
800 }
801 break;
802 case D3DSPR_INPUT:
803 if (IS_VS) {
804 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
805 } else {
806 if (tx->version.major < 3) {
807 assert(!param->rel);
808 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
809 param->idx,
810 TGSI_INTERPOLATE_PERSPECTIVE);
811 } else {
812 assert(!param->rel); /* TODO */
813 assert(param->idx < Elements(tx->regs.v));
814 src = tx->regs.v[param->idx];
815 }
816 }
817 break;
818 case D3DSPR_PREDICATE:
819 assert(!param->rel);
820 tx_pred_alloc(tx, param->idx);
821 src = ureg_src(tx->regs.p);
822 break;
823 case D3DSPR_SAMPLER:
824 assert(param->mod == NINED3DSPSM_NONE);
825 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
826 assert(!param->rel);
827 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
828 break;
829 case D3DSPR_CONST:
830 if (param->rel)
831 tx->indirect_const_access = TRUE;
832 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
833 if (!param->rel)
834 nine_info_mark_const_f_used(tx->info, param->idx);
835 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
836 }
837 break;
838 case D3DSPR_CONST2:
839 case D3DSPR_CONST3:
840 case D3DSPR_CONST4:
841 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
842 assert(!"CONST2/3/4");
843 src = ureg_imm1f(ureg, 0.0f);
844 break;
845 case D3DSPR_CONSTINT:
846 if (param->rel || !tx_lconsti(tx, &src, param->idx)) {
847 if (!param->rel)
848 nine_info_mark_const_i_used(tx->info, param->idx);
849 src = ureg_src_register(TGSI_FILE_CONSTANT,
850 tx->info->const_i_base + param->idx);
851 }
852 break;
853 case D3DSPR_CONSTBOOL:
854 if (param->rel || !tx_lconstb(tx, &src, param->idx)) {
855 char r = param->idx / 4;
856 char s = param->idx & 3;
857 if (!param->rel)
858 nine_info_mark_const_b_used(tx->info, param->idx);
859 src = ureg_src_register(TGSI_FILE_CONSTANT,
860 tx->info->const_b_base + r);
861 src = ureg_swizzle(src, s, s, s, s);
862 }
863 break;
864 case D3DSPR_LOOP:
865 src = tx_src_scalar(tx_get_aL(tx));
866 break;
867 case D3DSPR_MISCTYPE:
868 switch (param->idx) {
869 case D3DSMO_POSITION:
870 if (ureg_src_is_undef(tx->regs.vPos))
871 tx->regs.vPos = ureg_DECL_fs_input(ureg,
872 TGSI_SEMANTIC_POSITION, 0,
873 TGSI_INTERPOLATE_LINEAR);
874 if (tx->shift_wpos) {
875 /* TODO: do this only once */
876 struct ureg_dst wpos = tx_scratch(tx);
877 ureg_SUB(ureg, wpos, tx->regs.vPos,
878 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
879 src = ureg_src(wpos);
880 } else {
881 src = tx->regs.vPos;
882 }
883 break;
884 case D3DSMO_FACE:
885 if (ureg_src_is_undef(tx->regs.vFace)) {
886 tx->regs.vFace = ureg_DECL_fs_input(ureg,
887 TGSI_SEMANTIC_FACE, 0,
888 TGSI_INTERPOLATE_CONSTANT);
889 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
890 }
891 src = tx->regs.vFace;
892 break;
893 default:
894 assert(!"invalid src D3DSMO");
895 break;
896 }
897 assert(!param->rel);
898 break;
899 case D3DSPR_TEMPFLOAT16:
900 break;
901 default:
902 assert(!"invalid src D3DSPR");
903 }
904 if (param->rel)
905 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
906
907 if (param->swizzle != NINED3DSP_NOSWIZZLE)
908 src = ureg_swizzle(src,
909 (param->swizzle >> 0) & 0x3,
910 (param->swizzle >> 2) & 0x3,
911 (param->swizzle >> 4) & 0x3,
912 (param->swizzle >> 6) & 0x3);
913
914 switch (param->mod) {
915 case NINED3DSPSM_ABS:
916 src = ureg_abs(src);
917 break;
918 case NINED3DSPSM_ABSNEG:
919 src = ureg_negate(ureg_abs(src));
920 break;
921 case NINED3DSPSM_NEG:
922 src = ureg_negate(src);
923 break;
924 case NINED3DSPSM_BIAS:
925 tmp = tx_scratch(tx);
926 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
927 src = ureg_src(tmp);
928 break;
929 case NINED3DSPSM_BIASNEG:
930 tmp = tx_scratch(tx);
931 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
932 src = ureg_src(tmp);
933 break;
934 case NINED3DSPSM_NOT:
935 if (tx->native_integers) {
936 tmp = tx_scratch(tx);
937 ureg_NOT(ureg, tmp, src);
938 src = ureg_src(tmp);
939 break;
940 }
941 /* fall through */
942 case NINED3DSPSM_COMP:
943 tmp = tx_scratch(tx);
944 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
945 src = ureg_src(tmp);
946 break;
947 case NINED3DSPSM_DZ:
948 case NINED3DSPSM_DW:
949 /* handled in instruction */
950 break;
951 case NINED3DSPSM_SIGN:
952 tmp = tx_scratch(tx);
953 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
954 src = ureg_src(tmp);
955 break;
956 case NINED3DSPSM_SIGNNEG:
957 tmp = tx_scratch(tx);
958 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
959 src = ureg_src(tmp);
960 break;
961 case NINED3DSPSM_X2:
962 tmp = tx_scratch(tx);
963 ureg_ADD(ureg, tmp, src, src);
964 src = ureg_src(tmp);
965 break;
966 case NINED3DSPSM_X2NEG:
967 tmp = tx_scratch(tx);
968 ureg_ADD(ureg, tmp, src, src);
969 src = ureg_negate(ureg_src(tmp));
970 break;
971 default:
972 assert(param->mod == NINED3DSPSM_NONE);
973 break;
974 }
975
976 return src;
977 }
978
979 static struct ureg_dst
980 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
981 {
982 struct ureg_dst dst;
983
984 switch (param->file)
985 {
986 case D3DSPR_TEMP:
987 assert(!param->rel);
988 tx_temp_alloc(tx, param->idx);
989 dst = tx->regs.r[param->idx];
990 break;
991 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
992 case D3DSPR_ADDR:
993 assert(!param->rel);
994 if (tx->version.major < 2 && !IS_VS) {
995 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
996 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
997 dst = tx->regs.tS[param->idx];
998 } else
999 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1000 tx_texcoord_alloc(tx, param->idx);
1001 dst = ureg_dst(tx->regs.vT[param->idx]);
1002 } else {
1003 tx_addr_alloc(tx, param->idx);
1004 dst = tx->regs.a;
1005 }
1006 break;
1007 case D3DSPR_RASTOUT:
1008 assert(!param->rel);
1009 switch (param->idx) {
1010 case 0:
1011 if (ureg_dst_is_undef(tx->regs.oPos))
1012 tx->regs.oPos =
1013 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1014 dst = tx->regs.oPos;
1015 break;
1016 case 1:
1017 if (ureg_dst_is_undef(tx->regs.oFog))
1018 tx->regs.oFog =
1019 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
1020 dst = tx->regs.oFog;
1021 break;
1022 case 2:
1023 if (ureg_dst_is_undef(tx->regs.oPts))
1024 tx->regs.oPts =
1025 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
1026 dst = tx->regs.oPts;
1027 break;
1028 default:
1029 assert(0);
1030 break;
1031 }
1032 break;
1033 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1034 case D3DSPR_OUTPUT:
1035 if (tx->version.major < 3) {
1036 assert(!param->rel);
1037 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1038 } else {
1039 assert(!param->rel); /* TODO */
1040 assert(param->idx < Elements(tx->regs.o));
1041 dst = tx->regs.o[param->idx];
1042 }
1043 break;
1044 case D3DSPR_ATTROUT: /* VS */
1045 case D3DSPR_COLOROUT: /* PS */
1046 assert(param->idx >= 0 && param->idx < 4);
1047 assert(!param->rel);
1048 tx->info->rt_mask |= 1 << param->idx;
1049 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1050 tx->regs.oCol[param->idx] =
1051 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1052 dst = tx->regs.oCol[param->idx];
1053 if (IS_VS && tx->version.major < 3)
1054 dst = ureg_saturate(dst);
1055 break;
1056 case D3DSPR_DEPTHOUT:
1057 assert(!param->rel);
1058 if (ureg_dst_is_undef(tx->regs.oDepth))
1059 tx->regs.oDepth =
1060 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1061 TGSI_WRITEMASK_Z);
1062 dst = tx->regs.oDepth; /* XXX: must write .z component */
1063 break;
1064 case D3DSPR_PREDICATE:
1065 assert(!param->rel);
1066 tx_pred_alloc(tx, param->idx);
1067 dst = tx->regs.p;
1068 break;
1069 case D3DSPR_TEMPFLOAT16:
1070 DBG("unhandled D3DSPR: %u\n", param->file);
1071 break;
1072 default:
1073 assert(!"invalid dst D3DSPR");
1074 break;
1075 }
1076 if (param->rel)
1077 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1078
1079 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1080 dst = ureg_writemask(dst, param->mask);
1081 if (param->mod & NINED3DSPDM_SATURATE)
1082 dst = ureg_saturate(dst);
1083
1084 return dst;
1085 }
1086
1087 static struct ureg_dst
1088 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1089 {
1090 if (param->shift) {
1091 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1092 return tx->regs.tdst;
1093 }
1094 return _tx_dst_param(tx, param);
1095 }
1096
1097 static void
1098 tx_apply_dst0_modifiers(struct shader_translator *tx)
1099 {
1100 struct ureg_dst rdst;
1101 float f;
1102
1103 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1104 return;
1105 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1106
1107 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1108
1109 if (tx->insn.dst[0].shift < 0)
1110 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1111 else
1112 f = 1 << tx->insn.dst[0].shift;
1113
1114 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1115 }
1116
1117 static struct ureg_src
1118 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1119 {
1120 struct ureg_src src;
1121
1122 assert(!param->shift);
1123 assert(!(param->mod & NINED3DSPDM_SATURATE));
1124
1125 switch (param->file) {
1126 case D3DSPR_INPUT:
1127 if (IS_VS) {
1128 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1129 } else {
1130 assert(!param->rel);
1131 assert(param->idx < Elements(tx->regs.v));
1132 src = tx->regs.v[param->idx];
1133 }
1134 break;
1135 default:
1136 src = ureg_src(tx_dst_param(tx, param));
1137 break;
1138 }
1139 if (param->rel)
1140 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1141
1142 if (!param->mask)
1143 WARN("mask is 0, using identity swizzle\n");
1144
1145 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1146 char s[4];
1147 int n;
1148 int c;
1149 for (n = 0, c = 0; c < 4; ++c)
1150 if (param->mask & (1 << c))
1151 s[n++] = c;
1152 assert(n);
1153 for (c = n; c < 4; ++c)
1154 s[c] = s[n - 1];
1155 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1156 }
1157 return src;
1158 }
1159
1160 static HRESULT
1161 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1162 {
1163 struct ureg_program *ureg = tx->ureg;
1164 struct ureg_dst dst;
1165 struct ureg_src src[2];
1166 unsigned i;
1167
1168 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1169 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1170 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1171
1172 for (i = 0; i < n; i++, src[1].Index++)
1173 {
1174 const unsigned m = (1 << i);
1175
1176 if (!(dst.WriteMask & m))
1177 continue;
1178
1179 /* XXX: src == dst case ? */
1180
1181 switch (k) {
1182 case 3:
1183 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1184 break;
1185 case 4:
1186 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1187 break;
1188 default:
1189 DBG("invalid operation: M%ux%u\n", m, n);
1190 break;
1191 }
1192 }
1193
1194 return D3D_OK;
1195 }
1196
1197 #define VNOTSUPPORTED 0, 0
1198 #define V(maj, min) (((maj) << 8) | (min))
1199
1200 static INLINE const char *
1201 d3dsio_to_string( unsigned opcode )
1202 {
1203 static const char *names[] = {
1204 "NOP",
1205 "MOV",
1206 "ADD",
1207 "SUB",
1208 "MAD",
1209 "MUL",
1210 "RCP",
1211 "RSQ",
1212 "DP3",
1213 "DP4",
1214 "MIN",
1215 "MAX",
1216 "SLT",
1217 "SGE",
1218 "EXP",
1219 "LOG",
1220 "LIT",
1221 "DST",
1222 "LRP",
1223 "FRC",
1224 "M4x4",
1225 "M4x3",
1226 "M3x4",
1227 "M3x3",
1228 "M3x2",
1229 "CALL",
1230 "CALLNZ",
1231 "LOOP",
1232 "RET",
1233 "ENDLOOP",
1234 "LABEL",
1235 "DCL",
1236 "POW",
1237 "CRS",
1238 "SGN",
1239 "ABS",
1240 "NRM",
1241 "SINCOS",
1242 "REP",
1243 "ENDREP",
1244 "IF",
1245 "IFC",
1246 "ELSE",
1247 "ENDIF",
1248 "BREAK",
1249 "BREAKC",
1250 "MOVA",
1251 "DEFB",
1252 "DEFI",
1253 NULL,
1254 NULL,
1255 NULL,
1256 NULL,
1257 NULL,
1258 NULL,
1259 NULL,
1260 NULL,
1261 NULL,
1262 NULL,
1263 NULL,
1264 NULL,
1265 NULL,
1266 NULL,
1267 NULL,
1268 "TEXCOORD",
1269 "TEXKILL",
1270 "TEX",
1271 "TEXBEM",
1272 "TEXBEML",
1273 "TEXREG2AR",
1274 "TEXREG2GB",
1275 "TEXM3x2PAD",
1276 "TEXM3x2TEX",
1277 "TEXM3x3PAD",
1278 "TEXM3x3TEX",
1279 NULL,
1280 "TEXM3x3SPEC",
1281 "TEXM3x3VSPEC",
1282 "EXPP",
1283 "LOGP",
1284 "CND",
1285 "DEF",
1286 "TEXREG2RGB",
1287 "TEXDP3TEX",
1288 "TEXM3x2DEPTH",
1289 "TEXDP3",
1290 "TEXM3x3",
1291 "TEXDEPTH",
1292 "CMP",
1293 "BEM",
1294 "DP2ADD",
1295 "DSX",
1296 "DSY",
1297 "TEXLDD",
1298 "SETP",
1299 "TEXLDL",
1300 "BREAKP"
1301 };
1302
1303 if (opcode < Elements(names)) return names[opcode];
1304
1305 switch (opcode) {
1306 case D3DSIO_PHASE: return "PHASE";
1307 case D3DSIO_COMMENT: return "COMMENT";
1308 case D3DSIO_END: return "END";
1309 default:
1310 return NULL;
1311 }
1312 }
1313
1314 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1315 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1316 (inst).vert_version.max | \
1317 (inst).frag_version.min | \
1318 (inst).frag_version.max)
1319
1320 #define SPECIAL(name) \
1321 NineTranslateInstruction_##name
1322
1323 #define DECL_SPECIAL(name) \
1324 static HRESULT \
1325 NineTranslateInstruction_##name( struct shader_translator *tx )
1326
1327 static HRESULT
1328 NineTranslateInstruction_Generic(struct shader_translator *);
1329
1330 DECL_SPECIAL(M4x4)
1331 {
1332 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1333 }
1334
1335 DECL_SPECIAL(M4x3)
1336 {
1337 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1338 }
1339
1340 DECL_SPECIAL(M3x4)
1341 {
1342 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1343 }
1344
1345 DECL_SPECIAL(M3x3)
1346 {
1347 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1348 }
1349
1350 DECL_SPECIAL(M3x2)
1351 {
1352 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1353 }
1354
1355 DECL_SPECIAL(CMP)
1356 {
1357 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1358 tx_src_param(tx, &tx->insn.src[0]),
1359 tx_src_param(tx, &tx->insn.src[2]),
1360 tx_src_param(tx, &tx->insn.src[1]));
1361 return D3D_OK;
1362 }
1363
1364 DECL_SPECIAL(CND)
1365 {
1366 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1367 struct ureg_dst cgt;
1368 struct ureg_src cnd;
1369
1370 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4) {
1371 ureg_MOV(tx->ureg,
1372 dst, tx_src_param(tx, &tx->insn.src[1]));
1373 return D3D_OK;
1374 }
1375
1376 cnd = tx_src_param(tx, &tx->insn.src[0]);
1377 #ifdef NINE_TGSI_LAZY_R600
1378 cgt = tx_scratch(tx);
1379
1380 if (tx->version.major == 1 && tx->version.minor < 4) {
1381 cgt.WriteMask = TGSI_WRITEMASK_W;
1382 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1383 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1384 } else {
1385 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1386 }
1387 ureg_CMP(tx->ureg, dst,
1388 tx_src_param(tx, &tx->insn.src[1]),
1389 tx_src_param(tx, &tx->insn.src[2]), ureg_negate(cnd));
1390 #else
1391 if (tx->version.major == 1 && tx->version.minor < 4)
1392 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1393 ureg_CND(tx->ureg, dst,
1394 tx_src_param(tx, &tx->insn.src[1]),
1395 tx_src_param(tx, &tx->insn.src[2]), cnd);
1396 #endif
1397 return D3D_OK;
1398 }
1399
1400 DECL_SPECIAL(CALL)
1401 {
1402 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1403 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1404 return D3D_OK;
1405 }
1406
1407 DECL_SPECIAL(CALLNZ)
1408 {
1409 struct ureg_program *ureg = tx->ureg;
1410 struct ureg_dst tmp = tx_scratch_scalar(tx);
1411 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1412
1413 /* NOTE: source should be const bool, so we can use NOT/SUB instead of [U]SNE 0 */
1414 if (!tx->insn.flags) {
1415 if (tx->native_integers)
1416 ureg_NOT(ureg, tmp, src);
1417 else
1418 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1419 }
1420 ureg_IF(ureg, tx->insn.flags ? src : tx_src_scalar(tmp), tx_cond(tx));
1421 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1422 tx_endcond(tx);
1423 ureg_ENDIF(ureg);
1424 return D3D_OK;
1425 }
1426
1427 DECL_SPECIAL(MOV_vs1x)
1428 {
1429 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1430 ureg_ARL(tx->ureg,
1431 tx_dst_param(tx, &tx->insn.dst[0]),
1432 tx_src_param(tx, &tx->insn.src[0]));
1433 return D3D_OK;
1434 }
1435 return NineTranslateInstruction_Generic(tx);
1436 }
1437
1438 DECL_SPECIAL(LOOP)
1439 {
1440 struct ureg_program *ureg = tx->ureg;
1441 unsigned *label;
1442 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1443 struct ureg_src iter = ureg_scalar(src, TGSI_SWIZZLE_X);
1444 struct ureg_src init = ureg_scalar(src, TGSI_SWIZZLE_Y);
1445 struct ureg_src step = ureg_scalar(src, TGSI_SWIZZLE_Z);
1446 struct ureg_dst ctr;
1447 struct ureg_dst tmp = tx_scratch_scalar(tx);
1448
1449 label = tx_bgnloop(tx);
1450 ctr = tx_get_loopctr(tx);
1451
1452 ureg_MOV(tx->ureg, ctr, init);
1453 ureg_BGNLOOP(tx->ureg, label);
1454 if (tx->native_integers) {
1455 /* we'll let the backend pull up that MAD ... */
1456 ureg_UMAD(ureg, tmp, iter, step, init);
1457 ureg_USEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1458 #ifdef NINE_TGSI_LAZY_DEVS
1459 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1460 #endif
1461 } else {
1462 /* can't simply use SGE for precision because step might be negative */
1463 ureg_MAD(ureg, tmp, iter, step, init);
1464 ureg_SEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1465 #ifdef NINE_TGSI_LAZY_DEVS
1466 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1467 #endif
1468 }
1469 #ifdef NINE_TGSI_LAZY_DEVS
1470 ureg_BRK(ureg);
1471 tx_endcond(tx);
1472 ureg_ENDIF(ureg);
1473 #else
1474 ureg_BREAKC(ureg, tx_src_scalar(tmp));
1475 #endif
1476 if (tx->native_integers) {
1477 ureg_UARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1478 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), step);
1479 } else {
1480 ureg_ARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1481 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), step);
1482 }
1483 return D3D_OK;
1484 }
1485
1486 DECL_SPECIAL(RET)
1487 {
1488 ureg_RET(tx->ureg);
1489 return D3D_OK;
1490 }
1491
1492 DECL_SPECIAL(ENDLOOP)
1493 {
1494 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1495 return D3D_OK;
1496 }
1497
1498 DECL_SPECIAL(LABEL)
1499 {
1500 unsigned k = tx->num_inst_labels;
1501 unsigned n = tx->insn.src[0].idx;
1502 assert(n < 2048);
1503 if (n >= k)
1504 tx->inst_labels = REALLOC(tx->inst_labels,
1505 k * sizeof(tx->inst_labels[0]),
1506 n * sizeof(tx->inst_labels[0]));
1507
1508 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1509 return D3D_OK;
1510 }
1511
1512 DECL_SPECIAL(SINCOS)
1513 {
1514 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1515 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1516
1517 assert(!(dst.WriteMask & 0xc));
1518
1519 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1520 ureg_SCS(tx->ureg, dst, src);
1521 return D3D_OK;
1522 }
1523
1524 DECL_SPECIAL(SGN)
1525 {
1526 ureg_SSG(tx->ureg,
1527 tx_dst_param(tx, &tx->insn.dst[0]),
1528 tx_src_param(tx, &tx->insn.src[0]));
1529 return D3D_OK;
1530 }
1531
1532 DECL_SPECIAL(REP)
1533 {
1534 struct ureg_program *ureg = tx->ureg;
1535 unsigned *label;
1536 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1537 struct ureg_dst ctr;
1538 struct ureg_dst tmp = tx_scratch_scalar(tx);
1539 struct ureg_src imm =
1540 tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f);
1541
1542 label = tx_bgnloop(tx);
1543 ctr = tx_get_loopctr(tx);
1544
1545 /* NOTE: rep must be constant, so we don't have to save the count */
1546 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1547
1548 ureg_MOV(ureg, ctr, imm);
1549 ureg_BGNLOOP(ureg, label);
1550 if (tx->native_integers)
1551 {
1552 ureg_USGE(ureg, tmp, tx_src_scalar(ctr), rep);
1553 #ifdef NINE_TGSI_LAZY_DEVS
1554 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1555 #endif
1556 }
1557 else
1558 {
1559 ureg_SGE(ureg, tmp, tx_src_scalar(ctr), rep);
1560 #ifdef NINE_TGSI_LAZY_DEVS
1561 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1562 #endif
1563 }
1564 #ifdef NINE_TGSI_LAZY_DEVS
1565 ureg_BRK(ureg);
1566 tx_endcond(tx);
1567 ureg_ENDIF(ureg);
1568 #else
1569 ureg_BREAKC(ureg, tx_src_scalar(tmp));
1570 #endif
1571
1572 if (tx->native_integers) {
1573 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1u(ureg, 1));
1574 } else {
1575 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1f(ureg, 1.0f));
1576 }
1577
1578 return D3D_OK;
1579 }
1580
1581 DECL_SPECIAL(ENDREP)
1582 {
1583 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1584 return D3D_OK;
1585 }
1586
1587 DECL_SPECIAL(ENDIF)
1588 {
1589 tx_endcond(tx);
1590 ureg_ENDIF(tx->ureg);
1591 return D3D_OK;
1592 }
1593
1594 DECL_SPECIAL(IF)
1595 {
1596 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1597
1598 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1599 ureg_UIF(tx->ureg, src, tx_cond(tx));
1600 else
1601 ureg_IF(tx->ureg, src, tx_cond(tx));
1602
1603 return D3D_OK;
1604 }
1605
1606 static INLINE unsigned
1607 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1608 {
1609 switch (flags) {
1610 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1611 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1612 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1613 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1614 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1615 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1616 default:
1617 assert(!"invalid comparison flags");
1618 return TGSI_OPCODE_SGT;
1619 }
1620 }
1621
1622 DECL_SPECIAL(IFC)
1623 {
1624 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1625 struct ureg_src src[2];
1626 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1627 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1628 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1629 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1630 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1631 return D3D_OK;
1632 }
1633
1634 DECL_SPECIAL(ELSE)
1635 {
1636 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1637 return D3D_OK;
1638 }
1639
1640 DECL_SPECIAL(BREAKC)
1641 {
1642 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1643 struct ureg_src src[2];
1644 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1645 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1646 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1647 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1648 #ifdef NINE_TGSI_LAZY_DEVS
1649 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1650 ureg_BRK(tx->ureg);
1651 tx_endcond(tx);
1652 ureg_ENDIF(tx->ureg);
1653 #else
1654 ureg_BREAKC(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
1655 #endif
1656 return D3D_OK;
1657 }
1658
1659 static const char *sm1_declusage_names[] =
1660 {
1661 [D3DDECLUSAGE_POSITION] = "POSITION",
1662 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1663 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1664 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1665 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1666 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1667 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1668 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1669 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1670 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1671 [D3DDECLUSAGE_COLOR] = "COLOR",
1672 [D3DDECLUSAGE_FOG] = "FOG",
1673 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1674 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1675 };
1676
1677 static INLINE unsigned
1678 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1679 {
1680 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1681 }
1682
1683 static void
1684 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1685 boolean tc,
1686 struct sm1_semantic *dcl)
1687 {
1688 const unsigned generic_base = tc ? 0 : 8; /* TEXCOORD[0..7] */
1689
1690 sem->Name = TGSI_SEMANTIC_GENERIC;
1691 sem->Index = 0;
1692
1693 /* TGSI_SEMANTIC_GENERIC assignments (+8 if !PIPE_CAP_TGSI_TEXCOORD):
1694 * Try to put frequently used semantics at low GENERIC indices.
1695 *
1696 * POSITION[1..4]: 17, 27, 28, 29
1697 * COLOR[2..4]: 14, 15, 26
1698 * TEXCOORD[8..15]: 10, 11, 18, 19, 20, 21, 22, 23
1699 * BLENDWEIGHT[0..3]: 0, 4, 8, 12
1700 * BLENDINDICES[0..3]: 1, 5, 9, 13
1701 * NORMAL[0..1]: 2, 6
1702 * TANGENT[0]: 3, 24
1703 * BINORMAL[0]: 7, 25
1704 * TESSFACTOR[0]: 16
1705 */
1706
1707 switch (dcl->usage) {
1708 case D3DDECLUSAGE_POSITION:
1709 case D3DDECLUSAGE_POSITIONT:
1710 case D3DDECLUSAGE_DEPTH:
1711 sem->Name = TGSI_SEMANTIC_POSITION;
1712 assert(dcl->usage_idx <= 4);
1713 if (dcl->usage_idx == 1) {
1714 sem->Name = TGSI_SEMANTIC_GENERIC;
1715 sem->Index = generic_base + 17;
1716 } else
1717 if (dcl->usage_idx >= 2) {
1718 sem->Name = TGSI_SEMANTIC_GENERIC;
1719 sem->Index = generic_base + 27 + (dcl->usage_idx - 2);
1720 }
1721 break;
1722 case D3DDECLUSAGE_COLOR:
1723 assert(dcl->usage_idx <= 4);
1724 if (dcl->usage_idx < 2) {
1725 sem->Name = TGSI_SEMANTIC_COLOR;
1726 sem->Index = dcl->usage_idx;
1727 } else
1728 if (dcl->usage_idx < 4) {
1729 sem->Index = generic_base + 14 + (dcl->usage_idx - 2);
1730 } else {
1731 sem->Index = generic_base + 26;
1732 }
1733 break;
1734 case D3DDECLUSAGE_FOG:
1735 sem->Name = TGSI_SEMANTIC_FOG;
1736 assert(dcl->usage_idx == 0);
1737 break;
1738 case D3DDECLUSAGE_PSIZE:
1739 sem->Name = TGSI_SEMANTIC_PSIZE;
1740 assert(dcl->usage_idx == 0);
1741 break;
1742 case D3DDECLUSAGE_TEXCOORD:
1743 assert(dcl->usage_idx < 16);
1744 if (dcl->usage_idx < 8) {
1745 if (tc)
1746 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1747 sem->Index = dcl->usage_idx;
1748 } else
1749 if (dcl->usage_idx < 10) {
1750 sem->Index = generic_base + 10 + (dcl->usage_idx - 8);
1751 } else {
1752 sem->Index = generic_base + 18 + (dcl->usage_idx - 10);
1753 }
1754 break;
1755 case D3DDECLUSAGE_BLENDWEIGHT: /* 0, 4, 8, 12 */
1756 assert(dcl->usage_idx < 4);
1757 sem->Index = generic_base + dcl->usage_idx * 4;
1758 break;
1759 case D3DDECLUSAGE_BLENDINDICES: /* 1, 5, 9, 13 */
1760 assert(dcl->usage_idx < 4);
1761 sem->Index = generic_base + dcl->usage_idx * 4 + 1;
1762 break;
1763 case D3DDECLUSAGE_NORMAL: /* 2, 3 */
1764 assert(dcl->usage_idx < 2);
1765 sem->Index = generic_base + 2 + dcl->usage_idx * 4;
1766 break;
1767 case D3DDECLUSAGE_TANGENT:
1768 /* Yes these are weird, but we try to fit the more frequently used
1769 * into lower slots. */
1770 assert(dcl->usage_idx <= 1);
1771 sem->Index = generic_base + (dcl->usage_idx ? 24 : 3);
1772 break;
1773 case D3DDECLUSAGE_BINORMAL:
1774 assert(dcl->usage_idx <= 1);
1775 sem->Index = generic_base + (dcl->usage_idx ? 25 : 7);
1776 break;
1777 case D3DDECLUSAGE_TESSFACTOR:
1778 assert(dcl->usage_idx == 0);
1779 sem->Index = generic_base + 16;
1780 break;
1781 case D3DDECLUSAGE_SAMPLE:
1782 sem->Name = TGSI_SEMANTIC_COUNT;
1783 break;
1784 default:
1785 assert(!"Invalid DECLUSAGE.");
1786 break;
1787 }
1788 }
1789
1790 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1791 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1792 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1793 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1794 static INLINE unsigned
1795 d3dstt_to_tgsi_tex(BYTE sampler_type)
1796 {
1797 switch (sampler_type) {
1798 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1799 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1800 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1801 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1802 default:
1803 assert(0);
1804 return TGSI_TEXTURE_UNKNOWN;
1805 }
1806 }
1807 static INLINE unsigned
1808 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1809 {
1810 switch (sampler_type) {
1811 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1812 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1813 case NINED3DSTT_VOLUME:
1814 case NINED3DSTT_CUBE:
1815 default:
1816 assert(0);
1817 return TGSI_TEXTURE_UNKNOWN;
1818 }
1819 }
1820 static INLINE unsigned
1821 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1822 {
1823 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1824 case 1: return TGSI_TEXTURE_1D;
1825 case 0: return TGSI_TEXTURE_2D;
1826 case 3: return TGSI_TEXTURE_3D;
1827 default:
1828 return TGSI_TEXTURE_CUBE;
1829 }
1830 }
1831
1832 static const char *
1833 sm1_sampler_type_name(BYTE sampler_type)
1834 {
1835 switch (sampler_type) {
1836 case NINED3DSTT_1D: return "1D";
1837 case NINED3DSTT_2D: return "2D";
1838 case NINED3DSTT_VOLUME: return "VOLUME";
1839 case NINED3DSTT_CUBE: return "CUBE";
1840 default:
1841 return "(D3DSTT_?)";
1842 }
1843 }
1844
1845 static INLINE unsigned
1846 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1847 {
1848 switch (sem->Name) {
1849 case TGSI_SEMANTIC_POSITION:
1850 case TGSI_SEMANTIC_NORMAL:
1851 return TGSI_INTERPOLATE_LINEAR;
1852 case TGSI_SEMANTIC_BCOLOR:
1853 case TGSI_SEMANTIC_COLOR:
1854 case TGSI_SEMANTIC_FOG:
1855 case TGSI_SEMANTIC_GENERIC:
1856 case TGSI_SEMANTIC_TEXCOORD:
1857 case TGSI_SEMANTIC_CLIPDIST:
1858 case TGSI_SEMANTIC_CLIPVERTEX:
1859 return TGSI_INTERPOLATE_PERSPECTIVE;
1860 case TGSI_SEMANTIC_EDGEFLAG:
1861 case TGSI_SEMANTIC_FACE:
1862 case TGSI_SEMANTIC_INSTANCEID:
1863 case TGSI_SEMANTIC_PCOORD:
1864 case TGSI_SEMANTIC_PRIMID:
1865 case TGSI_SEMANTIC_PSIZE:
1866 case TGSI_SEMANTIC_VERTEXID:
1867 return TGSI_INTERPOLATE_CONSTANT;
1868 default:
1869 assert(0);
1870 return TGSI_INTERPOLATE_CONSTANT;
1871 }
1872 }
1873
1874 DECL_SPECIAL(DCL)
1875 {
1876 struct ureg_program *ureg = tx->ureg;
1877 boolean is_input;
1878 boolean is_sampler;
1879 struct tgsi_declaration_semantic tgsi;
1880 struct sm1_semantic sem;
1881 sm1_read_semantic(tx, &sem);
1882
1883 is_input = sem.reg.file == D3DSPR_INPUT;
1884 is_sampler =
1885 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1886
1887 DUMP("DCL ");
1888 sm1_dump_dst_param(&sem.reg);
1889 if (is_sampler)
1890 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1891 else
1892 if (tx->version.major >= 3)
1893 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1894 else
1895 if (sem.usage | sem.usage_idx)
1896 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1897 else
1898 DUMP("\n");
1899
1900 if (is_sampler) {
1901 const unsigned m = 1 << sem.reg.idx;
1902 ureg_DECL_sampler(ureg, sem.reg.idx);
1903 tx->info->sampler_mask |= m;
1904 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1905 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1906 d3dstt_to_tgsi_tex(sem.sampler_type);
1907 return D3D_OK;
1908 }
1909
1910 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1911 if (IS_VS) {
1912 if (is_input) {
1913 /* linkage outside of shader with vertex declaration */
1914 ureg_DECL_vs_input(ureg, sem.reg.idx);
1915 assert(sem.reg.idx < Elements(tx->info->input_map));
1916 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1917 tx->info->num_inputs = sem.reg.idx + 1;
1918 /* NOTE: preserving order in case of indirect access */
1919 } else
1920 if (tx->version.major >= 3) {
1921 /* SM2 output semantic determined by file */
1922 assert(sem.reg.mask != 0);
1923 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1924 tx->info->position_t = TRUE;
1925 assert(sem.reg.idx < Elements(tx->regs.o));
1926 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1927 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1928
1929 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1930 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1931 }
1932 } else {
1933 if (is_input && tx->version.major >= 3) {
1934 /* SM3 only, SM2 input semantic determined by file */
1935 assert(sem.reg.idx < Elements(tx->regs.v));
1936 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1937 ureg, tgsi.Name, tgsi.Index,
1938 nine_tgsi_to_interp_mode(&tgsi),
1939 0, /* cylwrap */
1940 sem.reg.mod & NINED3DSPDM_CENTROID);
1941 } else
1942 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1943 /* FragColor or FragDepth */
1944 assert(sem.reg.mask != 0);
1945 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1946 }
1947 }
1948 return D3D_OK;
1949 }
1950
1951 DECL_SPECIAL(DEF)
1952 {
1953 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1954 return D3D_OK;
1955 }
1956
1957 DECL_SPECIAL(DEFB)
1958 {
1959 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
1960 return D3D_OK;
1961 }
1962
1963 DECL_SPECIAL(DEFI)
1964 {
1965 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
1966 return D3D_OK;
1967 }
1968
1969 DECL_SPECIAL(NRM)
1970 {
1971 struct ureg_program *ureg = tx->ureg;
1972 struct ureg_dst tmp = tx_scratch_scalar(tx);
1973 struct ureg_src nrm = tx_src_scalar(tmp);
1974 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1975 ureg_DP3(ureg, tmp, src, src);
1976 ureg_RSQ(ureg, tmp, nrm);
1977 ureg_MUL(ureg, tx_dst_param(tx, &tx->insn.dst[0]), src, nrm);
1978 return D3D_OK;
1979 }
1980
1981 DECL_SPECIAL(DP2ADD)
1982 {
1983 #ifdef NINE_TGSI_LAZY_R600
1984 struct ureg_dst tmp = tx_scratch_scalar(tx);
1985 struct ureg_src dp2 = tx_src_scalar(tmp);
1986 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1987 struct ureg_src src[3];
1988 int i;
1989 for (i = 0; i < 3; ++i)
1990 src[i] = tx_src_param(tx, &tx->insn.src[i]);
1991 assert_replicate_swizzle(&src[2]);
1992
1993 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
1994 ureg_ADD(tx->ureg, dst, src[2], dp2);
1995
1996 return D3D_OK;
1997 #else
1998 return NineTranslateInstruction_Generic(tx);
1999 #endif
2000 }
2001
2002 DECL_SPECIAL(TEXCOORD)
2003 {
2004 struct ureg_program *ureg = tx->ureg;
2005 const unsigned s = tx->insn.dst[0].idx;
2006 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2007
2008 if (ureg_src_is_undef(tx->regs.vT[s]))
2009 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2010 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
2011
2012 return D3D_OK;
2013 }
2014
2015 DECL_SPECIAL(TEXCOORD_ps14)
2016 {
2017 struct ureg_program *ureg = tx->ureg;
2018 const unsigned s = tx->insn.src[0].idx;
2019 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2020
2021 if (ureg_src_is_undef(tx->regs.vT[s]))
2022 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2023 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
2024
2025 return D3D_OK;
2026 }
2027
2028 DECL_SPECIAL(TEXKILL)
2029 {
2030 struct ureg_src reg;
2031
2032 if (tx->version.major > 1 || tx->version.minor > 3) {
2033 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2034 } else {
2035 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2036 reg = tx->regs.vT[tx->insn.dst[0].idx];
2037 }
2038 if (tx->version.major < 2)
2039 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2040 ureg_KILL_IF(tx->ureg, reg);
2041
2042 return D3D_OK;
2043 }
2044
2045 DECL_SPECIAL(TEXBEM)
2046 {
2047 STUB(D3DERR_INVALIDCALL);
2048 }
2049
2050 DECL_SPECIAL(TEXBEML)
2051 {
2052 STUB(D3DERR_INVALIDCALL);
2053 }
2054
2055 DECL_SPECIAL(TEXREG2AR)
2056 {
2057 STUB(D3DERR_INVALIDCALL);
2058 }
2059
2060 DECL_SPECIAL(TEXREG2GB)
2061 {
2062 STUB(D3DERR_INVALIDCALL);
2063 }
2064
2065 DECL_SPECIAL(TEXM3x2PAD)
2066 {
2067 STUB(D3DERR_INVALIDCALL);
2068 }
2069
2070 DECL_SPECIAL(TEXM3x2TEX)
2071 {
2072 STUB(D3DERR_INVALIDCALL);
2073 }
2074
2075 DECL_SPECIAL(TEXM3x3PAD)
2076 {
2077 return D3D_OK; /* this is just padding */
2078 }
2079
2080 DECL_SPECIAL(TEXM3x3SPEC)
2081 {
2082 STUB(D3DERR_INVALIDCALL);
2083 }
2084
2085 DECL_SPECIAL(TEXM3x3VSPEC)
2086 {
2087 STUB(D3DERR_INVALIDCALL);
2088 }
2089
2090 DECL_SPECIAL(TEXREG2RGB)
2091 {
2092 STUB(D3DERR_INVALIDCALL);
2093 }
2094
2095 DECL_SPECIAL(TEXDP3TEX)
2096 {
2097 STUB(D3DERR_INVALIDCALL);
2098 }
2099
2100 DECL_SPECIAL(TEXM3x2DEPTH)
2101 {
2102 STUB(D3DERR_INVALIDCALL);
2103 }
2104
2105 DECL_SPECIAL(TEXDP3)
2106 {
2107 STUB(D3DERR_INVALIDCALL);
2108 }
2109
2110 DECL_SPECIAL(TEXM3x3)
2111 {
2112 struct ureg_program *ureg = tx->ureg;
2113 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2114 struct ureg_src src[4];
2115 int s;
2116 const int m = tx->insn.dst[0].idx - 2;
2117 const int n = tx->insn.src[0].idx;
2118 assert(m >= 0 && m > n);
2119
2120 for (s = m; s <= (m + 2); ++s) {
2121 if (ureg_src_is_undef(tx->regs.vT[s]))
2122 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2123 src[s] = tx->regs.vT[s];
2124 }
2125 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), src[0], ureg_src(tx->regs.tS[n]));
2126 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), src[1], ureg_src(tx->regs.tS[n]));
2127 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), src[2], ureg_src(tx->regs.tS[n]));
2128
2129 switch (tx->insn.opcode) {
2130 case D3DSIO_TEXM3x3:
2131 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2132 break;
2133 case D3DSIO_TEXM3x3TEX:
2134 src[3] = ureg_DECL_sampler(ureg, m + 2);
2135 tx->info->sampler_mask |= 1 << (m + 2);
2136 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), src[3]);
2137 break;
2138 default:
2139 return D3DERR_INVALIDCALL;
2140 }
2141 return D3D_OK;
2142 }
2143
2144 DECL_SPECIAL(TEXDEPTH)
2145 {
2146 STUB(D3DERR_INVALIDCALL);
2147 }
2148
2149 DECL_SPECIAL(BEM)
2150 {
2151 STUB(D3DERR_INVALIDCALL);
2152 }
2153
2154 DECL_SPECIAL(TEXLD)
2155 {
2156 struct ureg_program *ureg = tx->ureg;
2157 unsigned target;
2158 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2159 struct ureg_src src[2] = {
2160 tx_src_param(tx, &tx->insn.src[0]),
2161 tx_src_param(tx, &tx->insn.src[1])
2162 };
2163 assert(tx->insn.src[1].idx >= 0 &&
2164 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2165 target = tx->sampler_targets[tx->insn.src[1].idx];
2166
2167 switch (tx->insn.flags) {
2168 case 0:
2169 ureg_TEX(ureg, dst, target, src[0], src[1]);
2170 break;
2171 case NINED3DSI_TEXLD_PROJECT:
2172 ureg_TXP(ureg, dst, target, src[0], src[1]);
2173 break;
2174 case NINED3DSI_TEXLD_BIAS:
2175 ureg_TXB(ureg, dst, target, src[0], src[1]);
2176 break;
2177 default:
2178 assert(0);
2179 return D3DERR_INVALIDCALL;
2180 }
2181 return D3D_OK;
2182 }
2183
2184 DECL_SPECIAL(TEXLD_14)
2185 {
2186 struct ureg_program *ureg = tx->ureg;
2187 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2188 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2189 const unsigned s = tx->insn.dst[0].idx;
2190 const unsigned t = ps1x_sampler_type(tx->info, s);
2191
2192 tx->info->sampler_mask |= 1 << s;
2193 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2194
2195 return D3D_OK;
2196 }
2197
2198 DECL_SPECIAL(TEX)
2199 {
2200 struct ureg_program *ureg = tx->ureg;
2201 const unsigned s = tx->insn.dst[0].idx;
2202 const unsigned t = ps1x_sampler_type(tx->info, s);
2203 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2204 struct ureg_src src[2];
2205
2206 if (ureg_src_is_undef(tx->regs.vT[s]))
2207 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2208
2209 src[0] = tx->regs.vT[s];
2210 src[1] = ureg_DECL_sampler(ureg, s);
2211 tx->info->sampler_mask |= 1 << s;
2212
2213 ureg_TEX(ureg, dst, t, src[0], src[1]);
2214
2215 return D3D_OK;
2216 }
2217
2218 DECL_SPECIAL(TEXLDD)
2219 {
2220 unsigned target;
2221 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2222 struct ureg_src src[4] = {
2223 tx_src_param(tx, &tx->insn.src[0]),
2224 tx_src_param(tx, &tx->insn.src[1]),
2225 tx_src_param(tx, &tx->insn.src[2]),
2226 tx_src_param(tx, &tx->insn.src[3])
2227 };
2228 assert(tx->insn.src[3].idx >= 0 &&
2229 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2230 target = tx->sampler_targets[tx->insn.src[1].idx];
2231
2232 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2233 return D3D_OK;
2234 }
2235
2236 DECL_SPECIAL(TEXLDL)
2237 {
2238 unsigned target;
2239 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2240 struct ureg_src src[2] = {
2241 tx_src_param(tx, &tx->insn.src[0]),
2242 tx_src_param(tx, &tx->insn.src[1])
2243 };
2244 assert(tx->insn.src[3].idx >= 0 &&
2245 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2246 target = tx->sampler_targets[tx->insn.src[1].idx];
2247
2248 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2249 return D3D_OK;
2250 }
2251
2252 DECL_SPECIAL(SETP)
2253 {
2254 STUB(D3DERR_INVALIDCALL);
2255 }
2256
2257 DECL_SPECIAL(BREAKP)
2258 {
2259 STUB(D3DERR_INVALIDCALL);
2260 }
2261
2262 DECL_SPECIAL(PHASE)
2263 {
2264 return D3D_OK; /* we don't care about phase */
2265 }
2266
2267 DECL_SPECIAL(COMMENT)
2268 {
2269 return D3D_OK; /* nothing to do */
2270 }
2271
2272
2273 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2274 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2275
2276 struct sm1_op_info inst_table[] =
2277 {
2278 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2279 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2280 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2281 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2282 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2283 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2284 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2285 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2286 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 7 */
2287 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2288 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2289 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2290 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2291 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2292 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2293 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2294 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 15 */
2295 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2296 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2297 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2298 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2299
2300 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2301 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2302 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2303 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2304 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2305
2306 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALL)),
2307 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALLNZ)),
2308 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2309 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2310 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2311 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(LABEL)),
2312
2313 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2314
2315 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL),
2316 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2317 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2318 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2319 _OPI(NRM, NRM, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2320
2321 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2322 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2323
2324 /* More flow control */
2325 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2326 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2327 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2328 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2329 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2330 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2331 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2332 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2333
2334 _OPI(MOVA, ARR, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2335
2336 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2337 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2338
2339 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2340 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2341 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2342 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2343 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2344 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2345 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEM)),
2346 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEML)),
2347 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2AR)),
2348 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2GB)),
2349 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2PAD)),
2350 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2TEX)),
2351 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3PAD)),
2352 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2353 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3SPEC)),
2354 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3VSPEC)),
2355
2356 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2357 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2358 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2359 _OPI(CND, CND, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2360
2361 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2362
2363 /* More tex stuff */
2364 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXREG2RGB)),
2365 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3TEX)),
2366 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 0, 0, SPECIAL(TEXM3x2DEPTH)),
2367 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3)),
2368 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2369 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(TEXDEPTH)),
2370
2371 /* Misc */
2372 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2373 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(BEM)),
2374 _OPI(DP2ADD, DP2A, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)), /* for radeons */
2375 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2376 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2377 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2378 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(SETP)),
2379 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2380 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(BREAKP))
2381 };
2382
2383 struct sm1_op_info inst_phase =
2384 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2385
2386 struct sm1_op_info inst_comment =
2387 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2388
2389 static void
2390 create_op_info_map(struct shader_translator *tx)
2391 {
2392 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2393 unsigned i;
2394
2395 for (i = 0; i < Elements(tx->op_info_map); ++i)
2396 tx->op_info_map[i] = -1;
2397
2398 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2399 for (i = 0; i < Elements(inst_table); ++i) {
2400 assert(inst_table[i].sio < Elements(tx->op_info_map));
2401 if (inst_table[i].vert_version.min <= version &&
2402 inst_table[i].vert_version.max >= version)
2403 tx->op_info_map[inst_table[i].sio] = i;
2404 }
2405 } else {
2406 for (i = 0; i < Elements(inst_table); ++i) {
2407 assert(inst_table[i].sio < Elements(tx->op_info_map));
2408 if (inst_table[i].frag_version.min <= version &&
2409 inst_table[i].frag_version.max >= version)
2410 tx->op_info_map[inst_table[i].sio] = i;
2411 }
2412 }
2413 }
2414
2415 static INLINE HRESULT
2416 NineTranslateInstruction_Generic(struct shader_translator *tx)
2417 {
2418 struct ureg_dst dst[1];
2419 struct ureg_src src[4];
2420 unsigned i;
2421
2422 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2423 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2424 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2425 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2426
2427 ureg_insn(tx->ureg, tx->insn.info->opcode,
2428 dst, tx->insn.ndst,
2429 src, tx->insn.nsrc);
2430 return D3D_OK;
2431 }
2432
2433 static INLINE DWORD
2434 TOKEN_PEEK(struct shader_translator *tx)
2435 {
2436 return *(tx->parse);
2437 }
2438
2439 static INLINE DWORD
2440 TOKEN_NEXT(struct shader_translator *tx)
2441 {
2442 return *(tx->parse)++;
2443 }
2444
2445 static INLINE void
2446 TOKEN_JUMP(struct shader_translator *tx)
2447 {
2448 if (tx->parse_next && tx->parse != tx->parse_next) {
2449 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2450 tx->parse = tx->parse_next;
2451 }
2452 }
2453
2454 static INLINE boolean
2455 sm1_parse_eof(struct shader_translator *tx)
2456 {
2457 return TOKEN_PEEK(tx) == NINED3DSP_END;
2458 }
2459
2460 static void
2461 sm1_read_version(struct shader_translator *tx)
2462 {
2463 const DWORD tok = TOKEN_NEXT(tx);
2464
2465 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2466 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2467
2468 switch (tok >> 16) {
2469 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2470 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2471 default:
2472 DBG("Invalid shader type: %x\n", tok);
2473 tx->processor = ~0;
2474 break;
2475 }
2476 }
2477
2478 /* This is just to check if we parsed the instruction properly. */
2479 static void
2480 sm1_parse_get_skip(struct shader_translator *tx)
2481 {
2482 const DWORD tok = TOKEN_PEEK(tx);
2483
2484 if (tx->version.major >= 2) {
2485 tx->parse_next = tx->parse + 1 /* this */ +
2486 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2487 } else {
2488 tx->parse_next = NULL; /* TODO: determine from param count */
2489 }
2490 }
2491
2492 static void
2493 sm1_print_comment(const char *comment, UINT size)
2494 {
2495 if (!size)
2496 return;
2497 /* TODO */
2498 }
2499
2500 static void
2501 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2502 {
2503 DWORD tok = TOKEN_PEEK(tx);
2504
2505 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2506 {
2507 const char *comment = "";
2508 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2509 tx->parse += size + 1;
2510
2511 if (print)
2512 sm1_print_comment(comment, size);
2513
2514 tok = TOKEN_PEEK(tx);
2515 }
2516 }
2517
2518 static void
2519 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2520 {
2521 *reg = TOKEN_NEXT(tx);
2522
2523 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2524 {
2525 if (tx->version.major < 2)
2526 *rel = (1 << 31) |
2527 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2528 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2529 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2530 else
2531 *rel = TOKEN_NEXT(tx);
2532 }
2533 }
2534
2535 static void
2536 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2537 {
2538 dst->file =
2539 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2540 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2541 dst->type = TGSI_RETURN_TYPE_FLOAT;
2542 dst->idx = tok & D3DSP_REGNUM_MASK;
2543 dst->rel = NULL;
2544 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2545 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2546 dst->shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2547 }
2548
2549 static void
2550 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2551 {
2552 src->file =
2553 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2554 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2555 src->type = TGSI_RETURN_TYPE_FLOAT;
2556 src->idx = tok & D3DSP_REGNUM_MASK;
2557 src->rel = NULL;
2558 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2559 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2560
2561 switch (src->file) {
2562 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2563 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2564 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2565 default:
2566 break;
2567 }
2568 }
2569
2570 static void
2571 sm1_parse_immediate(struct shader_translator *tx,
2572 struct sm1_src_param *imm)
2573 {
2574 imm->file = NINED3DSPR_IMMEDIATE;
2575 imm->idx = INT_MIN;
2576 imm->rel = NULL;
2577 imm->swizzle = NINED3DSP_NOSWIZZLE;
2578 imm->mod = 0;
2579 switch (tx->insn.opcode) {
2580 case D3DSIO_DEF:
2581 imm->type = NINED3DSPTYPE_FLOAT4;
2582 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2583 tx->parse += 4;
2584 break;
2585 case D3DSIO_DEFI:
2586 imm->type = NINED3DSPTYPE_INT4;
2587 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2588 tx->parse += 4;
2589 break;
2590 case D3DSIO_DEFB:
2591 imm->type = NINED3DSPTYPE_BOOL;
2592 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2593 tx->parse += 1;
2594 break;
2595 default:
2596 assert(0);
2597 break;
2598 }
2599 }
2600
2601 static void
2602 sm1_read_dst_param(struct shader_translator *tx,
2603 struct sm1_dst_param *dst,
2604 struct sm1_src_param *rel)
2605 {
2606 DWORD tok_dst, tok_rel = 0;
2607
2608 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2609 sm1_parse_dst_param(dst, tok_dst);
2610 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2611 sm1_parse_src_param(rel, tok_rel);
2612 dst->rel = rel;
2613 }
2614 }
2615
2616 static void
2617 sm1_read_src_param(struct shader_translator *tx,
2618 struct sm1_src_param *src,
2619 struct sm1_src_param *rel)
2620 {
2621 DWORD tok_src, tok_rel = 0;
2622
2623 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2624 sm1_parse_src_param(src, tok_src);
2625 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2626 assert(rel);
2627 sm1_parse_src_param(rel, tok_rel);
2628 src->rel = rel;
2629 }
2630 }
2631
2632 static void
2633 sm1_read_semantic(struct shader_translator *tx,
2634 struct sm1_semantic *sem)
2635 {
2636 const DWORD tok_usg = TOKEN_NEXT(tx);
2637 const DWORD tok_dst = TOKEN_NEXT(tx);
2638
2639 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2640 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2641 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2642
2643 sm1_parse_dst_param(&sem->reg, tok_dst);
2644 }
2645
2646 static void
2647 sm1_parse_instruction(struct shader_translator *tx)
2648 {
2649 struct sm1_instruction *insn = &tx->insn;
2650 DWORD tok;
2651 struct sm1_op_info *info = NULL;
2652 unsigned i;
2653
2654 sm1_parse_comments(tx, TRUE);
2655 sm1_parse_get_skip(tx);
2656
2657 tok = TOKEN_NEXT(tx);
2658
2659 insn->opcode = tok & D3DSI_OPCODE_MASK;
2660 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2661 insn->coissue = !!(tok & D3DSI_COISSUE);
2662 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2663
2664 if (insn->opcode < Elements(tx->op_info_map)) {
2665 int k = tx->op_info_map[insn->opcode];
2666 if (k >= 0) {
2667 assert(k < Elements(inst_table));
2668 info = &inst_table[k];
2669 }
2670 } else {
2671 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2672 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2673 }
2674 if (!info) {
2675 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2676 TOKEN_JUMP(tx);
2677 return;
2678 }
2679 insn->info = info;
2680 insn->ndst = info->ndst;
2681 insn->nsrc = info->nsrc;
2682
2683 assert(!insn->predicated && "TODO: predicated instructions");
2684
2685 /* check version */
2686 {
2687 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2688 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2689 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2690 if (ver < min || ver > max) {
2691 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2692 min, ver, max);
2693 return;
2694 }
2695 }
2696
2697 for (i = 0; i < insn->ndst; ++i)
2698 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2699 if (insn->predicated)
2700 sm1_read_src_param(tx, &insn->pred, NULL);
2701 for (i = 0; i < insn->nsrc; ++i)
2702 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2703
2704 /* parse here so we can dump them before processing */
2705 if (insn->opcode == D3DSIO_DEF ||
2706 insn->opcode == D3DSIO_DEFI ||
2707 insn->opcode == D3DSIO_DEFB)
2708 sm1_parse_immediate(tx, &tx->insn.src[0]);
2709
2710 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2711 sm1_instruction_check(insn);
2712
2713 if (info->handler)
2714 info->handler(tx);
2715 else
2716 NineTranslateInstruction_Generic(tx);
2717 tx_apply_dst0_modifiers(tx);
2718
2719 tx->num_scratch = 0; /* reset */
2720
2721 TOKEN_JUMP(tx);
2722 }
2723
2724 static void
2725 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2726 {
2727 unsigned i;
2728
2729 tx->info = info;
2730
2731 tx->byte_code = info->byte_code;
2732 tx->parse = info->byte_code;
2733
2734 for (i = 0; i < Elements(info->input_map); ++i)
2735 info->input_map[i] = NINE_DECLUSAGE_NONE;
2736 info->num_inputs = 0;
2737
2738 info->position_t = FALSE;
2739 info->point_size = FALSE;
2740
2741 tx->info->const_used_size = 0;
2742
2743 info->sampler_mask = 0x0;
2744 info->rt_mask = 0x0;
2745
2746 info->lconstf.data = NULL;
2747 info->lconstf.ranges = NULL;
2748
2749 for (i = 0; i < Elements(tx->regs.aL); ++i) {
2750 tx->regs.aL[i] = ureg_dst_undef();
2751 tx->regs.rL[i] = ureg_dst_undef();
2752 }
2753 tx->regs.a = ureg_dst_undef();
2754 tx->regs.p = ureg_dst_undef();
2755 tx->regs.oDepth = ureg_dst_undef();
2756 tx->regs.vPos = ureg_src_undef();
2757 tx->regs.vFace = ureg_src_undef();
2758 for (i = 0; i < Elements(tx->regs.o); ++i)
2759 tx->regs.o[i] = ureg_dst_undef();
2760 for (i = 0; i < Elements(tx->regs.oCol); ++i)
2761 tx->regs.oCol[i] = ureg_dst_undef();
2762 for (i = 0; i < Elements(tx->regs.vC); ++i)
2763 tx->regs.vC[i] = ureg_src_undef();
2764 for (i = 0; i < Elements(tx->regs.vT); ++i)
2765 tx->regs.vT[i] = ureg_src_undef();
2766
2767 for (i = 0; i < Elements(tx->lconsti); ++i)
2768 tx->lconsti[i].idx = -1;
2769 for (i = 0; i < Elements(tx->lconstb); ++i)
2770 tx->lconstb[i].idx = -1;
2771
2772 sm1_read_version(tx);
2773
2774 info->version = (tx->version.major << 4) | tx->version.minor;
2775
2776 create_op_info_map(tx);
2777 }
2778
2779 static void
2780 tx_dtor(struct shader_translator *tx)
2781 {
2782 if (tx->num_inst_labels)
2783 FREE(tx->inst_labels);
2784 if (tx->lconstf)
2785 FREE(tx->lconstf);
2786 if (tx->regs.r)
2787 FREE(tx->regs.r);
2788 FREE(tx);
2789 }
2790
2791 static INLINE unsigned
2792 tgsi_processor_from_type(unsigned shader_type)
2793 {
2794 switch (shader_type) {
2795 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
2796 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
2797 default:
2798 return ~0;
2799 }
2800 }
2801
2802 #define GET_CAP(n) device->screen->get_param( \
2803 device->screen, PIPE_CAP_##n)
2804 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
2805 device->screen, info->type, PIPE_SHADER_CAP_##n)
2806
2807 HRESULT
2808 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
2809 {
2810 struct shader_translator *tx;
2811 HRESULT hr = D3D_OK;
2812 const unsigned processor = tgsi_processor_from_type(info->type);
2813
2814 user_assert(processor != ~0, D3DERR_INVALIDCALL);
2815
2816 tx = CALLOC_STRUCT(shader_translator);
2817 if (!tx)
2818 return E_OUTOFMEMORY;
2819 tx_ctor(tx, info);
2820
2821 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
2822 hr = D3DERR_INVALIDCALL;
2823 DBG("Unsupported shader version: %u.%u !\n",
2824 tx->version.major, tx->version.minor);
2825 goto out;
2826 }
2827 if (tx->processor != processor) {
2828 hr = D3DERR_INVALIDCALL;
2829 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
2830 goto out;
2831 }
2832 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
2833 tx->version.major, tx->version.minor);
2834
2835 tx->ureg = ureg_create(processor);
2836 if (!tx->ureg) {
2837 hr = E_OUTOFMEMORY;
2838 goto out;
2839 }
2840 tx_decl_constants(tx);
2841
2842 tx->native_integers = GET_SHADER_CAP(INTEGERS);
2843 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
2844 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
2845 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
2846 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2847 tx->texcoord_sn = tx->want_texcoord ?
2848 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
2849
2850 /* VS must always write position. Declare it here to make it the 1st output.
2851 * (Some drivers like nv50 are buggy and rely on that.)
2852 */
2853 if (IS_VS) {
2854 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
2855 } else {
2856 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
2857 if (!tx->shift_wpos)
2858 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2859 }
2860
2861 if (!ureg_dst_is_undef(tx->regs.oPts))
2862 info->point_size = TRUE;
2863
2864 while (!sm1_parse_eof(tx))
2865 sm1_parse_instruction(tx);
2866 tx->parse++; /* for byte_size */
2867
2868 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
2869 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
2870 ureg_src(tx->regs.r[0]));
2871 info->rt_mask |= 0x1;
2872 }
2873
2874 if (info->position_t)
2875 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
2876
2877 ureg_END(tx->ureg);
2878
2879 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
2880 unsigned count;
2881 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
2882 tgsi_dump(toks, 0);
2883 ureg_free_tokens(toks);
2884 }
2885
2886 /* record local constants */
2887 if (tx->num_lconstf && tx->indirect_const_access) {
2888 struct nine_range *ranges;
2889 float *data;
2890 int *indices;
2891 unsigned i, k, n;
2892
2893 hr = E_OUTOFMEMORY;
2894
2895 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
2896 if (!data)
2897 goto out;
2898 info->lconstf.data = data;
2899
2900 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
2901 if (!indices)
2902 goto out;
2903
2904 /* lazy sort, num_lconstf should be small */
2905 for (n = 0; n < tx->num_lconstf; ++n) {
2906 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
2907 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
2908 k = i;
2909 }
2910 indices[n] = tx->lconstf[k].idx;
2911 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
2912 tx->lconstf[k].idx = INT_MAX;
2913 }
2914
2915 /* count ranges */
2916 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
2917 if (indices[i] != indices[i - 1] + 1)
2918 ++n;
2919 ranges = MALLOC(n * sizeof(ranges[0]));
2920 if (!ranges) {
2921 FREE(indices);
2922 goto out;
2923 }
2924 info->lconstf.ranges = ranges;
2925
2926 k = 0;
2927 ranges[k].bgn = indices[0];
2928 for (i = 1; i < tx->num_lconstf; ++i) {
2929 if (indices[i] != indices[i - 1] + 1) {
2930 ranges[k].next = &ranges[k + 1];
2931 ranges[k].end = indices[i - 1] + 1;
2932 ++k;
2933 ranges[k].bgn = indices[i];
2934 }
2935 }
2936 ranges[k].end = indices[i - 1] + 1;
2937 ranges[k].next = NULL;
2938 assert(n == (k + 1));
2939
2940 FREE(indices);
2941 hr = D3D_OK;
2942 }
2943
2944 if (tx->indirect_const_access)
2945 info->const_used_size = ~0;
2946
2947 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
2948 if (!info->cso) {
2949 hr = D3DERR_DRIVERINTERNALERROR;
2950 FREE(info->lconstf.data);
2951 FREE(info->lconstf.ranges);
2952 goto out;
2953 }
2954
2955 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
2956 out:
2957 tx_dtor(tx);
2958 return hr;
2959 }