Remove useless checks for NULL before freeing
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #if 1
39 #define NINE_TGSI_LAZY_DEVS /* don't use TGSI_OPCODE_BREAKC */
40 #endif
41 #define NINE_TGSI_LAZY_R600 /* don't use TGSI_OPCODE_DP2A */
42
43 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
44
45
46 struct shader_translator;
47
48 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
49
50 static INLINE const char *d3dsio_to_string(unsigned opcode);
51
52
53 #define NINED3D_SM1_VS 0xfffe
54 #define NINED3D_SM1_PS 0xffff
55
56 #define NINE_MAX_COND_DEPTH 64
57 #define NINE_MAX_LOOP_DEPTH 64
58
59 #define NINED3DSP_END 0x0000ffff
60
61 #define NINED3DSPTYPE_FLOAT4 0
62 #define NINED3DSPTYPE_INT4 1
63 #define NINED3DSPTYPE_BOOL 2
64
65 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
66
67 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
68 #define NINED3DSP_WRITEMASK_SHIFT 16
69
70 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
71
72 #define NINED3DSHADER_REL_OP_GT 1
73 #define NINED3DSHADER_REL_OP_EQ 2
74 #define NINED3DSHADER_REL_OP_GE 3
75 #define NINED3DSHADER_REL_OP_LT 4
76 #define NINED3DSHADER_REL_OP_NE 5
77 #define NINED3DSHADER_REL_OP_LE 6
78
79 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
80 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
81
82 #define NINED3DSI_TEXLD_PROJECT 0x1
83 #define NINED3DSI_TEXLD_BIAS 0x2
84
85 #define NINED3DSP_WRITEMASK_0 0x1
86 #define NINED3DSP_WRITEMASK_1 0x2
87 #define NINED3DSP_WRITEMASK_2 0x4
88 #define NINED3DSP_WRITEMASK_3 0x8
89 #define NINED3DSP_WRITEMASK_ALL 0xf
90
91 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
92
93 #define NINE_SWIZZLE4(x,y,z,w) \
94 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
95
96 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
97 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
99
100 /*
101 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
102 * BIAS <= PS 1.4 (x-0.5)
103 * BIASNEG <= PS 1.4 (-(x-0.5))
104 * SIGN <= PS 1.4 (2(x-0.5))
105 * SIGNNEG <= PS 1.4 (-2(x-0.5))
106 * COMP <= PS 1.4 (1-x)
107 * X2 = PS 1.4 (2x)
108 * X2NEG = PS 1.4 (-2x)
109 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
110 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
111 * ABS >= SM 3.0 (abs(x))
112 * ABSNEG >= SM 3.0 (-abs(x))
113 * NOT >= SM 2.0 pedication only
114 */
115 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
129
130 static const char *sm1_mod_str[] =
131 {
132 [NINED3DSPSM_NONE] = "",
133 [NINED3DSPSM_NEG] = "-",
134 [NINED3DSPSM_BIAS] = "bias",
135 [NINED3DSPSM_BIASNEG] = "biasneg",
136 [NINED3DSPSM_SIGN] = "sign",
137 [NINED3DSPSM_SIGNNEG] = "signneg",
138 [NINED3DSPSM_COMP] = "comp",
139 [NINED3DSPSM_X2] = "x2",
140 [NINED3DSPSM_X2NEG] = "x2neg",
141 [NINED3DSPSM_DZ] = "dz",
142 [NINED3DSPSM_DW] = "dw",
143 [NINED3DSPSM_ABS] = "abs",
144 [NINED3DSPSM_ABSNEG] = "-abs",
145 [NINED3DSPSM_NOT] = "not"
146 };
147
148 static void
149 sm1_dump_writemask(BYTE mask)
150 {
151 if (mask & 1) DUMP("x"); else DUMP("_");
152 if (mask & 2) DUMP("y"); else DUMP("_");
153 if (mask & 4) DUMP("z"); else DUMP("_");
154 if (mask & 8) DUMP("w"); else DUMP("_");
155 }
156
157 static void
158 sm1_dump_swizzle(BYTE s)
159 {
160 char c[4] = { 'x', 'y', 'z', 'w' };
161 DUMP("%c%c%c%c",
162 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
163 }
164
165 static const char sm1_file_char[] =
166 {
167 [D3DSPR_TEMP] = 'r',
168 [D3DSPR_INPUT] = 'v',
169 [D3DSPR_CONST] = 'c',
170 [D3DSPR_ADDR] = 'A',
171 [D3DSPR_RASTOUT] = 'R',
172 [D3DSPR_ATTROUT] = 'D',
173 [D3DSPR_OUTPUT] = 'o',
174 [D3DSPR_CONSTINT] = 'I',
175 [D3DSPR_COLOROUT] = 'C',
176 [D3DSPR_DEPTHOUT] = 'D',
177 [D3DSPR_SAMPLER] = 's',
178 [D3DSPR_CONST2] = 'c',
179 [D3DSPR_CONST3] = 'c',
180 [D3DSPR_CONST4] = 'c',
181 [D3DSPR_CONSTBOOL] = 'B',
182 [D3DSPR_LOOP] = 'L',
183 [D3DSPR_TEMPFLOAT16] = 'h',
184 [D3DSPR_MISCTYPE] = 'M',
185 [D3DSPR_LABEL] = 'X',
186 [D3DSPR_PREDICATE] = 'p'
187 };
188
189 static void
190 sm1_dump_reg(BYTE file, INT index)
191 {
192 switch (file) {
193 case D3DSPR_LOOP:
194 DUMP("aL");
195 break;
196 case D3DSPR_COLOROUT:
197 DUMP("oC%i", index);
198 break;
199 case D3DSPR_DEPTHOUT:
200 DUMP("oDepth");
201 break;
202 case D3DSPR_RASTOUT:
203 DUMP("oRast%i", index);
204 break;
205 case D3DSPR_CONSTINT:
206 DUMP("iconst[%i]", index);
207 break;
208 case D3DSPR_CONSTBOOL:
209 DUMP("bconst[%i]", index);
210 break;
211 default:
212 DUMP("%c%i", sm1_file_char[file], index);
213 break;
214 }
215 }
216
217 struct sm1_src_param
218 {
219 INT idx;
220 struct sm1_src_param *rel;
221 BYTE file;
222 BYTE swizzle;
223 BYTE mod;
224 BYTE type;
225 union {
226 DWORD d[4];
227 float f[4];
228 int i[4];
229 BOOL b;
230 } imm;
231 };
232 static void
233 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
234
235 struct sm1_dst_param
236 {
237 INT idx;
238 struct sm1_src_param *rel;
239 BYTE file;
240 BYTE mask;
241 BYTE mod;
242 int8_t shift; /* sint4 */
243 BYTE type;
244 };
245
246 static INLINE void
247 assert_replicate_swizzle(const struct ureg_src *reg)
248 {
249 assert(reg->SwizzleY == reg->SwizzleX &&
250 reg->SwizzleZ == reg->SwizzleX &&
251 reg->SwizzleW == reg->SwizzleX);
252 }
253
254 static void
255 sm1_dump_immediate(const struct sm1_src_param *param)
256 {
257 switch (param->type) {
258 case NINED3DSPTYPE_FLOAT4:
259 DUMP("{ %f %f %f %f }",
260 param->imm.f[0], param->imm.f[1],
261 param->imm.f[2], param->imm.f[3]);
262 break;
263 case NINED3DSPTYPE_INT4:
264 DUMP("{ %i %i %i %i }",
265 param->imm.i[0], param->imm.i[1],
266 param->imm.i[2], param->imm.i[3]);
267 break;
268 case NINED3DSPTYPE_BOOL:
269 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
270 break;
271 default:
272 assert(0);
273 break;
274 }
275 }
276
277 static void
278 sm1_dump_src_param(const struct sm1_src_param *param)
279 {
280 if (param->file == NINED3DSPR_IMMEDIATE) {
281 assert(!param->mod &&
282 !param->rel &&
283 param->swizzle == NINED3DSP_NOSWIZZLE);
284 sm1_dump_immediate(param);
285 return;
286 }
287
288 if (param->mod)
289 DUMP("%s(", sm1_mod_str[param->mod]);
290 if (param->rel) {
291 DUMP("%c[", sm1_file_char[param->file]);
292 sm1_dump_src_param(param->rel);
293 DUMP("+%i]", param->idx);
294 } else {
295 sm1_dump_reg(param->file, param->idx);
296 }
297 if (param->mod)
298 DUMP(")");
299 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
300 DUMP(".");
301 sm1_dump_swizzle(param->swizzle);
302 }
303 }
304
305 static void
306 sm1_dump_dst_param(const struct sm1_dst_param *param)
307 {
308 if (param->mod & NINED3DSPDM_SATURATE)
309 DUMP("sat ");
310 if (param->mod & NINED3DSPDM_PARTIALP)
311 DUMP("pp ");
312 if (param->mod & NINED3DSPDM_CENTROID)
313 DUMP("centroid ");
314 if (param->shift < 0)
315 DUMP("/%u ", 1 << -param->shift);
316 if (param->shift > 0)
317 DUMP("*%u ", 1 << param->shift);
318
319 if (param->rel) {
320 DUMP("%c[", sm1_file_char[param->file]);
321 sm1_dump_src_param(param->rel);
322 DUMP("+%i]", param->idx);
323 } else {
324 sm1_dump_reg(param->file, param->idx);
325 }
326 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
327 DUMP(".");
328 sm1_dump_writemask(param->mask);
329 }
330 }
331
332 struct sm1_semantic
333 {
334 struct sm1_dst_param reg;
335 BYTE sampler_type;
336 D3DDECLUSAGE usage;
337 BYTE usage_idx;
338 };
339
340 struct sm1_op_info
341 {
342 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
343 * should be ignored completely */
344 unsigned sio;
345 unsigned opcode; /* TGSI_OPCODE_x */
346
347 /* versions are still set even handler is set */
348 struct {
349 unsigned min;
350 unsigned max;
351 } vert_version, frag_version;
352
353 /* number of regs parsed outside of special handler */
354 unsigned ndst;
355 unsigned nsrc;
356
357 /* some instructions don't map perfectly, so use a special handler */
358 translate_instruction_func handler;
359 };
360
361 struct sm1_instruction
362 {
363 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
364 BYTE flags;
365 BOOL coissue;
366 BOOL predicated;
367 BYTE ndst;
368 BYTE nsrc;
369 struct sm1_src_param src[4];
370 struct sm1_src_param src_rel[4];
371 struct sm1_src_param pred;
372 struct sm1_src_param dst_rel[1];
373 struct sm1_dst_param dst[1];
374
375 struct sm1_op_info *info;
376 };
377
378 static void
379 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
380 {
381 unsigned i;
382
383 /* no info stored for these: */
384 if (insn->opcode == D3DSIO_DCL)
385 return;
386 for (i = 0; i < indent; ++i)
387 DUMP(" ");
388
389 if (insn->predicated) {
390 DUMP("@");
391 sm1_dump_src_param(&insn->pred);
392 DUMP(" ");
393 }
394 DUMP("%s", d3dsio_to_string(insn->opcode));
395 if (insn->flags) {
396 switch (insn->opcode) {
397 case D3DSIO_TEX:
398 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
399 break;
400 default:
401 DUMP("_%x", insn->flags);
402 break;
403 }
404 }
405 if (insn->coissue)
406 DUMP("_co");
407 DUMP(" ");
408
409 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
410 sm1_dump_dst_param(&insn->dst[i]);
411 DUMP(" ");
412 }
413
414 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
415 sm1_dump_src_param(&insn->src[i]);
416 DUMP(" ");
417 }
418 if (insn->opcode == D3DSIO_DEF ||
419 insn->opcode == D3DSIO_DEFI ||
420 insn->opcode == D3DSIO_DEFB)
421 sm1_dump_immediate(&insn->src[0]);
422
423 DUMP("\n");
424 }
425
426 struct sm1_local_const
427 {
428 INT idx;
429 struct ureg_src reg;
430 union {
431 boolean b;
432 float f[4];
433 int32_t i[4];
434 } imm;
435 };
436
437 struct shader_translator
438 {
439 const DWORD *byte_code;
440 const DWORD *parse;
441 const DWORD *parse_next;
442
443 struct ureg_program *ureg;
444
445 /* shader version */
446 struct {
447 BYTE major;
448 BYTE minor;
449 } version;
450 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
451
452 boolean native_integers;
453 boolean inline_subroutines;
454 boolean lower_preds;
455 boolean want_texcoord;
456 boolean shift_wpos;
457 unsigned texcoord_sn;
458
459 struct sm1_instruction insn; /* current instruction */
460
461 struct {
462 struct ureg_dst *r;
463 struct ureg_dst oPos;
464 struct ureg_dst oFog;
465 struct ureg_dst oPts;
466 struct ureg_dst oCol[4];
467 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
468 struct ureg_dst oDepth;
469 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
470 struct ureg_src vPos;
471 struct ureg_src vFace;
472 struct ureg_src s;
473 struct ureg_dst p;
474 struct ureg_dst a;
475 struct ureg_dst tS[8]; /* texture stage registers */
476 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
477 struct ureg_dst t[5]; /* scratch TEMPs */
478 struct ureg_src vC[2]; /* PS color in */
479 struct ureg_src vT[8]; /* PS texcoord in */
480 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
481 struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* loop ctr ADDR register */
482 } regs;
483 unsigned num_temp; /* Elements(regs.r) */
484 unsigned num_scratch;
485 unsigned loop_depth;
486 unsigned loop_depth_max;
487 unsigned cond_depth;
488 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
489 unsigned cond_labels[NINE_MAX_COND_DEPTH];
490
491 unsigned *inst_labels; /* LABEL op */
492 unsigned num_inst_labels;
493
494 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
495
496 struct sm1_local_const *lconstf;
497 unsigned num_lconstf;
498 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
499 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
500
501 boolean indirect_const_access;
502
503 struct nine_shader_info *info;
504
505 int16_t op_info_map[D3DSIO_BREAKP + 1];
506 };
507
508 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
509 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
510
511 static void
512 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
513
514 static void
515 sm1_instruction_check(const struct sm1_instruction *insn)
516 {
517 if (insn->opcode == D3DSIO_CRS)
518 {
519 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
520 {
521 DBG("CRS.mask.w\n");
522 }
523 }
524 }
525
526 static boolean
527 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
528 {
529 INT i;
530 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
531 for (i = 0; i < tx->num_lconstf; ++i) {
532 if (tx->lconstf[i].idx == index) {
533 *src = tx->lconstf[i].reg;
534 return TRUE;
535 }
536 }
537 return FALSE;
538 }
539 static boolean
540 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
541 {
542 assert(index >= 0 && index < NINE_MAX_CONST_I);
543 if (tx->lconsti[index].idx == index)
544 *src = tx->lconsti[index].reg;
545 return tx->lconsti[index].idx == index;
546 }
547 static boolean
548 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
549 {
550 assert(index >= 0 && index < NINE_MAX_CONST_B);
551 if (tx->lconstb[index].idx == index)
552 *src = tx->lconstb[index].reg;
553 return tx->lconstb[index].idx == index;
554 }
555
556 static void
557 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
558 {
559 unsigned n;
560
561 /* Anno1404 sets out of range constants. */
562 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
563 if (index >= NINE_MAX_CONST_F)
564 WARN("lconstf index %i too high, indirect access won't work\n", index);
565
566 for (n = 0; n < tx->num_lconstf; ++n)
567 if (tx->lconstf[n].idx == index)
568 break;
569 if (n == tx->num_lconstf) {
570 if ((n % 8) == 0) {
571 tx->lconstf = REALLOC(tx->lconstf,
572 (n + 0) * sizeof(tx->lconstf[0]),
573 (n + 8) * sizeof(tx->lconstf[0]));
574 assert(tx->lconstf);
575 }
576 tx->num_lconstf++;
577 }
578 tx->lconstf[n].idx = index;
579 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
580
581 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
582 }
583 static void
584 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
585 {
586 assert(index >= 0 && index < NINE_MAX_CONST_I);
587 tx->lconsti[index].idx = index;
588 tx->lconsti[index].reg = tx->native_integers ?
589 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
590 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
591 }
592 static void
593 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
594 {
595 assert(index >= 0 && index < NINE_MAX_CONST_B);
596 tx->lconstb[index].idx = index;
597 tx->lconstb[index].reg = tx->native_integers ?
598 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
599 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
600 }
601
602 static INLINE struct ureg_dst
603 tx_scratch(struct shader_translator *tx)
604 {
605 assert(tx->num_scratch < Elements(tx->regs.t));
606 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
607 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
608 return tx->regs.t[tx->num_scratch++];
609 }
610
611 static INLINE struct ureg_dst
612 tx_scratch_scalar(struct shader_translator *tx)
613 {
614 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
615 }
616
617 static INLINE struct ureg_src
618 tx_src_scalar(struct ureg_dst dst)
619 {
620 struct ureg_src src = ureg_src(dst);
621 int c = ffs(dst.WriteMask) - 1;
622 if (dst.WriteMask == (1 << c))
623 src = ureg_scalar(src, c);
624 return src;
625 }
626
627 /* Need to declare all constants if indirect addressing is used,
628 * otherwise we could scan the shader to determine the maximum.
629 * TODO: It doesn't really matter for nv50 so I won't do the scan,
630 * but radeon drivers might care, if they don't infer it from TGSI.
631 */
632 static void
633 tx_decl_constants(struct shader_translator *tx)
634 {
635 unsigned i, n = 0;
636
637 for (i = 0; i < NINE_MAX_CONST_F; ++i)
638 ureg_DECL_constant(tx->ureg, n++);
639 for (i = 0; i < NINE_MAX_CONST_I; ++i)
640 ureg_DECL_constant(tx->ureg, n++);
641 for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
642 ureg_DECL_constant(tx->ureg, n++);
643 }
644
645 static INLINE void
646 tx_temp_alloc(struct shader_translator *tx, INT idx)
647 {
648 assert(idx >= 0);
649 if (idx >= tx->num_temp) {
650 unsigned k = tx->num_temp;
651 unsigned n = idx + 1;
652 tx->regs.r = REALLOC(tx->regs.r,
653 k * sizeof(tx->regs.r[0]),
654 n * sizeof(tx->regs.r[0]));
655 for (; k < n; ++k)
656 tx->regs.r[k] = ureg_dst_undef();
657 tx->num_temp = n;
658 }
659 if (ureg_dst_is_undef(tx->regs.r[idx]))
660 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
661 }
662
663 static INLINE void
664 tx_addr_alloc(struct shader_translator *tx, INT idx)
665 {
666 assert(idx == 0);
667 if (ureg_dst_is_undef(tx->regs.a))
668 tx->regs.a = ureg_DECL_address(tx->ureg);
669 }
670
671 static INLINE void
672 tx_pred_alloc(struct shader_translator *tx, INT idx)
673 {
674 assert(idx == 0);
675 if (ureg_dst_is_undef(tx->regs.p))
676 tx->regs.p = ureg_DECL_predicate(tx->ureg);
677 }
678
679 static INLINE void
680 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
681 {
682 assert(IS_PS);
683 assert(idx >= 0 && idx < Elements(tx->regs.vT));
684 if (ureg_src_is_undef(tx->regs.vT[idx]))
685 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
686 TGSI_INTERPOLATE_PERSPECTIVE);
687 }
688
689 static INLINE unsigned *
690 tx_bgnloop(struct shader_translator *tx)
691 {
692 tx->loop_depth++;
693 if (tx->loop_depth_max < tx->loop_depth)
694 tx->loop_depth_max = tx->loop_depth;
695 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
696 return &tx->loop_labels[tx->loop_depth - 1];
697 }
698
699 static INLINE unsigned *
700 tx_endloop(struct shader_translator *tx)
701 {
702 assert(tx->loop_depth);
703 tx->loop_depth--;
704 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
705 ureg_get_instruction_number(tx->ureg));
706 return &tx->loop_labels[tx->loop_depth];
707 }
708
709 static struct ureg_dst
710 tx_get_loopctr(struct shader_translator *tx)
711 {
712 const unsigned l = tx->loop_depth - 1;
713
714 if (!tx->loop_depth)
715 {
716 DBG("loop counter requested outside of loop\n");
717 return ureg_dst_undef();
718 }
719
720 if (ureg_dst_is_undef(tx->regs.aL[l]))
721 {
722 struct ureg_dst rreg = ureg_DECL_local_temporary(tx->ureg);
723 struct ureg_dst areg = ureg_DECL_address(tx->ureg);
724 unsigned c;
725
726 assert(l % 4 == 0);
727 for (c = l; c < (l + 4) && c < Elements(tx->regs.aL); ++c) {
728 tx->regs.rL[c] = ureg_writemask(rreg, 1 << (c & 3));
729 tx->regs.aL[c] = ureg_writemask(areg, 1 << (c & 3));
730 }
731 }
732 return tx->regs.rL[l];
733 }
734 static struct ureg_dst
735 tx_get_aL(struct shader_translator *tx)
736 {
737 if (!ureg_dst_is_undef(tx_get_loopctr(tx)))
738 return tx->regs.aL[tx->loop_depth - 1];
739 return ureg_dst_undef();
740 }
741
742 static INLINE unsigned *
743 tx_cond(struct shader_translator *tx)
744 {
745 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
746 tx->cond_depth++;
747 return &tx->cond_labels[tx->cond_depth - 1];
748 }
749
750 static INLINE unsigned *
751 tx_elsecond(struct shader_translator *tx)
752 {
753 assert(tx->cond_depth);
754 return &tx->cond_labels[tx->cond_depth - 1];
755 }
756
757 static INLINE void
758 tx_endcond(struct shader_translator *tx)
759 {
760 assert(tx->cond_depth);
761 tx->cond_depth--;
762 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
763 ureg_get_instruction_number(tx->ureg));
764 }
765
766 static INLINE struct ureg_dst
767 nine_ureg_dst_register(unsigned file, int index)
768 {
769 return ureg_dst(ureg_src_register(file, index));
770 }
771
772 static struct ureg_src
773 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
774 {
775 struct ureg_program *ureg = tx->ureg;
776 struct ureg_src src;
777 struct ureg_dst tmp;
778
779 switch (param->file)
780 {
781 case D3DSPR_TEMP:
782 assert(!param->rel);
783 tx_temp_alloc(tx, param->idx);
784 src = ureg_src(tx->regs.r[param->idx]);
785 break;
786 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
787 case D3DSPR_ADDR:
788 assert(!param->rel);
789 if (IS_VS) {
790 tx_addr_alloc(tx, param->idx);
791 src = ureg_src(tx->regs.a);
792 } else {
793 if (tx->version.major < 2 && tx->version.minor < 4) {
794 /* no subroutines, so should be defined */
795 src = ureg_src(tx->regs.tS[param->idx]);
796 } else {
797 tx_texcoord_alloc(tx, param->idx);
798 src = tx->regs.vT[param->idx];
799 }
800 }
801 break;
802 case D3DSPR_INPUT:
803 if (IS_VS) {
804 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
805 } else {
806 if (tx->version.major < 3) {
807 assert(!param->rel);
808 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
809 param->idx,
810 TGSI_INTERPOLATE_PERSPECTIVE);
811 } else {
812 assert(!param->rel); /* TODO */
813 assert(param->idx < Elements(tx->regs.v));
814 src = tx->regs.v[param->idx];
815 }
816 }
817 break;
818 case D3DSPR_PREDICATE:
819 assert(!param->rel);
820 tx_pred_alloc(tx, param->idx);
821 src = ureg_src(tx->regs.p);
822 break;
823 case D3DSPR_SAMPLER:
824 assert(param->mod == NINED3DSPSM_NONE);
825 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
826 assert(!param->rel);
827 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
828 break;
829 case D3DSPR_CONST:
830 if (param->rel)
831 tx->indirect_const_access = TRUE;
832 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
833 if (!param->rel)
834 nine_info_mark_const_f_used(tx->info, param->idx);
835 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
836 }
837 break;
838 case D3DSPR_CONST2:
839 case D3DSPR_CONST3:
840 case D3DSPR_CONST4:
841 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
842 assert(!"CONST2/3/4");
843 src = ureg_imm1f(ureg, 0.0f);
844 break;
845 case D3DSPR_CONSTINT:
846 if (param->rel || !tx_lconsti(tx, &src, param->idx)) {
847 if (!param->rel)
848 nine_info_mark_const_i_used(tx->info, param->idx);
849 src = ureg_src_register(TGSI_FILE_CONSTANT,
850 tx->info->const_i_base + param->idx);
851 }
852 break;
853 case D3DSPR_CONSTBOOL:
854 if (param->rel || !tx_lconstb(tx, &src, param->idx)) {
855 char r = param->idx / 4;
856 char s = param->idx & 3;
857 if (!param->rel)
858 nine_info_mark_const_b_used(tx->info, param->idx);
859 src = ureg_src_register(TGSI_FILE_CONSTANT,
860 tx->info->const_b_base + r);
861 src = ureg_swizzle(src, s, s, s, s);
862 }
863 break;
864 case D3DSPR_LOOP:
865 src = tx_src_scalar(tx_get_aL(tx));
866 break;
867 case D3DSPR_MISCTYPE:
868 switch (param->idx) {
869 case D3DSMO_POSITION:
870 if (ureg_src_is_undef(tx->regs.vPos))
871 tx->regs.vPos = ureg_DECL_fs_input(ureg,
872 TGSI_SEMANTIC_POSITION, 0,
873 TGSI_INTERPOLATE_LINEAR);
874 if (tx->shift_wpos) {
875 /* TODO: do this only once */
876 struct ureg_dst wpos = tx_scratch(tx);
877 ureg_SUB(ureg, wpos, tx->regs.vPos,
878 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
879 src = ureg_src(wpos);
880 } else {
881 src = tx->regs.vPos;
882 }
883 break;
884 case D3DSMO_FACE:
885 if (ureg_src_is_undef(tx->regs.vFace)) {
886 tx->regs.vFace = ureg_DECL_fs_input(ureg,
887 TGSI_SEMANTIC_FACE, 0,
888 TGSI_INTERPOLATE_CONSTANT);
889 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
890 }
891 src = tx->regs.vFace;
892 break;
893 default:
894 assert(!"invalid src D3DSMO");
895 break;
896 }
897 assert(!param->rel);
898 break;
899 case D3DSPR_TEMPFLOAT16:
900 break;
901 default:
902 assert(!"invalid src D3DSPR");
903 }
904 if (param->rel)
905 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
906
907 if (param->swizzle != NINED3DSP_NOSWIZZLE)
908 src = ureg_swizzle(src,
909 (param->swizzle >> 0) & 0x3,
910 (param->swizzle >> 2) & 0x3,
911 (param->swizzle >> 4) & 0x3,
912 (param->swizzle >> 6) & 0x3);
913
914 switch (param->mod) {
915 case NINED3DSPSM_ABS:
916 src = ureg_abs(src);
917 break;
918 case NINED3DSPSM_ABSNEG:
919 src = ureg_negate(ureg_abs(src));
920 break;
921 case NINED3DSPSM_NEG:
922 src = ureg_negate(src);
923 break;
924 case NINED3DSPSM_BIAS:
925 tmp = tx_scratch(tx);
926 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
927 src = ureg_src(tmp);
928 break;
929 case NINED3DSPSM_BIASNEG:
930 tmp = tx_scratch(tx);
931 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
932 src = ureg_src(tmp);
933 break;
934 case NINED3DSPSM_NOT:
935 if (tx->native_integers) {
936 tmp = tx_scratch(tx);
937 ureg_NOT(ureg, tmp, src);
938 src = ureg_src(tmp);
939 break;
940 }
941 /* fall through */
942 case NINED3DSPSM_COMP:
943 tmp = tx_scratch(tx);
944 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
945 src = ureg_src(tmp);
946 break;
947 case NINED3DSPSM_DZ:
948 case NINED3DSPSM_DW:
949 /* handled in instruction */
950 break;
951 case NINED3DSPSM_SIGN:
952 tmp = tx_scratch(tx);
953 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
954 src = ureg_src(tmp);
955 break;
956 case NINED3DSPSM_SIGNNEG:
957 tmp = tx_scratch(tx);
958 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
959 src = ureg_src(tmp);
960 break;
961 case NINED3DSPSM_X2:
962 tmp = tx_scratch(tx);
963 ureg_ADD(ureg, tmp, src, src);
964 src = ureg_src(tmp);
965 break;
966 case NINED3DSPSM_X2NEG:
967 tmp = tx_scratch(tx);
968 ureg_ADD(ureg, tmp, src, src);
969 src = ureg_negate(ureg_src(tmp));
970 break;
971 default:
972 assert(param->mod == NINED3DSPSM_NONE);
973 break;
974 }
975
976 return src;
977 }
978
979 static struct ureg_dst
980 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
981 {
982 struct ureg_dst dst;
983
984 switch (param->file)
985 {
986 case D3DSPR_TEMP:
987 assert(!param->rel);
988 tx_temp_alloc(tx, param->idx);
989 dst = tx->regs.r[param->idx];
990 break;
991 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
992 case D3DSPR_ADDR:
993 assert(!param->rel);
994 if (tx->version.major < 2 && !IS_VS) {
995 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
996 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
997 dst = tx->regs.tS[param->idx];
998 } else
999 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1000 tx_texcoord_alloc(tx, param->idx);
1001 dst = ureg_dst(tx->regs.vT[param->idx]);
1002 } else {
1003 tx_addr_alloc(tx, param->idx);
1004 dst = tx->regs.a;
1005 }
1006 break;
1007 case D3DSPR_RASTOUT:
1008 assert(!param->rel);
1009 switch (param->idx) {
1010 case 0:
1011 if (ureg_dst_is_undef(tx->regs.oPos))
1012 tx->regs.oPos =
1013 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1014 dst = tx->regs.oPos;
1015 break;
1016 case 1:
1017 if (ureg_dst_is_undef(tx->regs.oFog))
1018 tx->regs.oFog =
1019 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
1020 dst = tx->regs.oFog;
1021 break;
1022 case 2:
1023 if (ureg_dst_is_undef(tx->regs.oPts))
1024 tx->regs.oPts =
1025 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
1026 dst = tx->regs.oPts;
1027 break;
1028 default:
1029 assert(0);
1030 break;
1031 }
1032 break;
1033 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1034 case D3DSPR_OUTPUT:
1035 if (tx->version.major < 3) {
1036 assert(!param->rel);
1037 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1038 } else {
1039 assert(!param->rel); /* TODO */
1040 assert(param->idx < Elements(tx->regs.o));
1041 dst = tx->regs.o[param->idx];
1042 }
1043 break;
1044 case D3DSPR_ATTROUT: /* VS */
1045 case D3DSPR_COLOROUT: /* PS */
1046 assert(param->idx >= 0 && param->idx < 4);
1047 assert(!param->rel);
1048 tx->info->rt_mask |= 1 << param->idx;
1049 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1050 tx->regs.oCol[param->idx] =
1051 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1052 dst = tx->regs.oCol[param->idx];
1053 if (IS_VS && tx->version.major < 3)
1054 dst = ureg_saturate(dst);
1055 break;
1056 case D3DSPR_DEPTHOUT:
1057 assert(!param->rel);
1058 if (ureg_dst_is_undef(tx->regs.oDepth))
1059 tx->regs.oDepth =
1060 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1061 TGSI_WRITEMASK_Z);
1062 dst = tx->regs.oDepth; /* XXX: must write .z component */
1063 break;
1064 case D3DSPR_PREDICATE:
1065 assert(!param->rel);
1066 tx_pred_alloc(tx, param->idx);
1067 dst = tx->regs.p;
1068 break;
1069 case D3DSPR_TEMPFLOAT16:
1070 DBG("unhandled D3DSPR: %u\n", param->file);
1071 break;
1072 default:
1073 assert(!"invalid dst D3DSPR");
1074 break;
1075 }
1076 if (param->rel)
1077 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1078
1079 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1080 dst = ureg_writemask(dst, param->mask);
1081 if (param->mod & NINED3DSPDM_SATURATE)
1082 dst = ureg_saturate(dst);
1083
1084 return dst;
1085 }
1086
1087 static struct ureg_dst
1088 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1089 {
1090 if (param->shift) {
1091 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1092 return tx->regs.tdst;
1093 }
1094 return _tx_dst_param(tx, param);
1095 }
1096
1097 static void
1098 tx_apply_dst0_modifiers(struct shader_translator *tx)
1099 {
1100 struct ureg_dst rdst;
1101 float f;
1102
1103 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1104 return;
1105 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1106
1107 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1108
1109 if (tx->insn.dst[0].shift < 0)
1110 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1111 else
1112 f = 1 << tx->insn.dst[0].shift;
1113
1114 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1115 }
1116
1117 static struct ureg_src
1118 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1119 {
1120 struct ureg_src src;
1121
1122 assert(!param->shift);
1123 assert(!(param->mod & NINED3DSPDM_SATURATE));
1124
1125 switch (param->file) {
1126 case D3DSPR_INPUT:
1127 if (IS_VS) {
1128 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1129 } else {
1130 assert(!param->rel);
1131 assert(param->idx < Elements(tx->regs.v));
1132 src = tx->regs.v[param->idx];
1133 }
1134 break;
1135 default:
1136 src = ureg_src(tx_dst_param(tx, param));
1137 break;
1138 }
1139 if (param->rel)
1140 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1141
1142 if (!param->mask)
1143 WARN("mask is 0, using identity swizzle\n");
1144
1145 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1146 char s[4];
1147 int n;
1148 int c;
1149 for (n = 0, c = 0; c < 4; ++c)
1150 if (param->mask & (1 << c))
1151 s[n++] = c;
1152 assert(n);
1153 for (c = n; c < 4; ++c)
1154 s[c] = s[n - 1];
1155 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1156 }
1157 return src;
1158 }
1159
1160 static HRESULT
1161 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1162 {
1163 struct ureg_program *ureg = tx->ureg;
1164 struct ureg_dst dst;
1165 struct ureg_src src[2];
1166 unsigned i;
1167
1168 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1169 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1170 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1171
1172 for (i = 0; i < n; i++, src[1].Index++)
1173 {
1174 const unsigned m = (1 << i);
1175
1176 if (!(dst.WriteMask & m))
1177 continue;
1178
1179 /* XXX: src == dst case ? */
1180
1181 switch (k) {
1182 case 3:
1183 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1184 break;
1185 case 4:
1186 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1187 break;
1188 default:
1189 DBG("invalid operation: M%ux%u\n", m, n);
1190 break;
1191 }
1192 }
1193
1194 return D3D_OK;
1195 }
1196
1197 #define VNOTSUPPORTED 0, 0
1198 #define V(maj, min) (((maj) << 8) | (min))
1199
1200 static INLINE const char *
1201 d3dsio_to_string( unsigned opcode )
1202 {
1203 static const char *names[] = {
1204 "NOP",
1205 "MOV",
1206 "ADD",
1207 "SUB",
1208 "MAD",
1209 "MUL",
1210 "RCP",
1211 "RSQ",
1212 "DP3",
1213 "DP4",
1214 "MIN",
1215 "MAX",
1216 "SLT",
1217 "SGE",
1218 "EXP",
1219 "LOG",
1220 "LIT",
1221 "DST",
1222 "LRP",
1223 "FRC",
1224 "M4x4",
1225 "M4x3",
1226 "M3x4",
1227 "M3x3",
1228 "M3x2",
1229 "CALL",
1230 "CALLNZ",
1231 "LOOP",
1232 "RET",
1233 "ENDLOOP",
1234 "LABEL",
1235 "DCL",
1236 "POW",
1237 "CRS",
1238 "SGN",
1239 "ABS",
1240 "NRM",
1241 "SINCOS",
1242 "REP",
1243 "ENDREP",
1244 "IF",
1245 "IFC",
1246 "ELSE",
1247 "ENDIF",
1248 "BREAK",
1249 "BREAKC",
1250 "MOVA",
1251 "DEFB",
1252 "DEFI",
1253 NULL,
1254 NULL,
1255 NULL,
1256 NULL,
1257 NULL,
1258 NULL,
1259 NULL,
1260 NULL,
1261 NULL,
1262 NULL,
1263 NULL,
1264 NULL,
1265 NULL,
1266 NULL,
1267 NULL,
1268 "TEXCOORD",
1269 "TEXKILL",
1270 "TEX",
1271 "TEXBEM",
1272 "TEXBEML",
1273 "TEXREG2AR",
1274 "TEXREG2GB",
1275 "TEXM3x2PAD",
1276 "TEXM3x2TEX",
1277 "TEXM3x3PAD",
1278 "TEXM3x3TEX",
1279 NULL,
1280 "TEXM3x3SPEC",
1281 "TEXM3x3VSPEC",
1282 "EXPP",
1283 "LOGP",
1284 "CND",
1285 "DEF",
1286 "TEXREG2RGB",
1287 "TEXDP3TEX",
1288 "TEXM3x2DEPTH",
1289 "TEXDP3",
1290 "TEXM3x3",
1291 "TEXDEPTH",
1292 "CMP",
1293 "BEM",
1294 "DP2ADD",
1295 "DSX",
1296 "DSY",
1297 "TEXLDD",
1298 "SETP",
1299 "TEXLDL",
1300 "BREAKP"
1301 };
1302
1303 if (opcode < Elements(names)) return names[opcode];
1304
1305 switch (opcode) {
1306 case D3DSIO_PHASE: return "PHASE";
1307 case D3DSIO_COMMENT: return "COMMENT";
1308 case D3DSIO_END: return "END";
1309 default:
1310 return NULL;
1311 }
1312 }
1313
1314 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1315 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1316 (inst).vert_version.max | \
1317 (inst).frag_version.min | \
1318 (inst).frag_version.max)
1319
1320 #define SPECIAL(name) \
1321 NineTranslateInstruction_##name
1322
1323 #define DECL_SPECIAL(name) \
1324 static HRESULT \
1325 NineTranslateInstruction_##name( struct shader_translator *tx )
1326
1327 static HRESULT
1328 NineTranslateInstruction_Generic(struct shader_translator *);
1329
1330 DECL_SPECIAL(M4x4)
1331 {
1332 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1333 }
1334
1335 DECL_SPECIAL(M4x3)
1336 {
1337 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1338 }
1339
1340 DECL_SPECIAL(M3x4)
1341 {
1342 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1343 }
1344
1345 DECL_SPECIAL(M3x3)
1346 {
1347 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1348 }
1349
1350 DECL_SPECIAL(M3x2)
1351 {
1352 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1353 }
1354
1355 DECL_SPECIAL(CMP)
1356 {
1357 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1358 tx_src_param(tx, &tx->insn.src[0]),
1359 tx_src_param(tx, &tx->insn.src[2]),
1360 tx_src_param(tx, &tx->insn.src[1]));
1361 return D3D_OK;
1362 }
1363
1364 DECL_SPECIAL(CND)
1365 {
1366 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1367 struct ureg_dst cgt;
1368 struct ureg_src cnd;
1369
1370 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4) {
1371 ureg_MOV(tx->ureg,
1372 dst, tx_src_param(tx, &tx->insn.src[1]));
1373 return D3D_OK;
1374 }
1375
1376 cnd = tx_src_param(tx, &tx->insn.src[0]);
1377 cgt = tx_scratch(tx);
1378
1379 if (tx->version.major == 1 && tx->version.minor < 4) {
1380 cgt.WriteMask = TGSI_WRITEMASK_W;
1381 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1382 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1383 } else {
1384 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1385 }
1386 ureg_CMP(tx->ureg, dst,
1387 tx_src_param(tx, &tx->insn.src[1]),
1388 tx_src_param(tx, &tx->insn.src[2]), ureg_negate(cnd));
1389 return D3D_OK;
1390 }
1391
1392 DECL_SPECIAL(CALL)
1393 {
1394 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1395 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1396 return D3D_OK;
1397 }
1398
1399 DECL_SPECIAL(CALLNZ)
1400 {
1401 struct ureg_program *ureg = tx->ureg;
1402 struct ureg_dst tmp = tx_scratch_scalar(tx);
1403 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1404
1405 /* NOTE: source should be const bool, so we can use NOT/SUB instead of [U]SNE 0 */
1406 if (!tx->insn.flags) {
1407 if (tx->native_integers)
1408 ureg_NOT(ureg, tmp, src);
1409 else
1410 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1411 }
1412 ureg_IF(ureg, tx->insn.flags ? src : tx_src_scalar(tmp), tx_cond(tx));
1413 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1414 tx_endcond(tx);
1415 ureg_ENDIF(ureg);
1416 return D3D_OK;
1417 }
1418
1419 DECL_SPECIAL(MOV_vs1x)
1420 {
1421 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1422 ureg_ARL(tx->ureg,
1423 tx_dst_param(tx, &tx->insn.dst[0]),
1424 tx_src_param(tx, &tx->insn.src[0]));
1425 return D3D_OK;
1426 }
1427 return NineTranslateInstruction_Generic(tx);
1428 }
1429
1430 DECL_SPECIAL(LOOP)
1431 {
1432 struct ureg_program *ureg = tx->ureg;
1433 unsigned *label;
1434 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1435 struct ureg_src iter = ureg_scalar(src, TGSI_SWIZZLE_X);
1436 struct ureg_src init = ureg_scalar(src, TGSI_SWIZZLE_Y);
1437 struct ureg_src step = ureg_scalar(src, TGSI_SWIZZLE_Z);
1438 struct ureg_dst ctr;
1439 struct ureg_dst tmp = tx_scratch_scalar(tx);
1440
1441 label = tx_bgnloop(tx);
1442 ctr = tx_get_loopctr(tx);
1443
1444 ureg_MOV(tx->ureg, ctr, init);
1445 ureg_BGNLOOP(tx->ureg, label);
1446 if (tx->native_integers) {
1447 /* we'll let the backend pull up that MAD ... */
1448 ureg_UMAD(ureg, tmp, iter, step, init);
1449 ureg_USEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1450 #ifdef NINE_TGSI_LAZY_DEVS
1451 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1452 #endif
1453 } else {
1454 /* can't simply use SGE for precision because step might be negative */
1455 ureg_MAD(ureg, tmp, iter, step, init);
1456 ureg_SEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1457 #ifdef NINE_TGSI_LAZY_DEVS
1458 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1459 #endif
1460 }
1461 #ifdef NINE_TGSI_LAZY_DEVS
1462 ureg_BRK(ureg);
1463 tx_endcond(tx);
1464 ureg_ENDIF(ureg);
1465 #else
1466 ureg_BREAKC(ureg, tx_src_scalar(tmp));
1467 #endif
1468 if (tx->native_integers) {
1469 ureg_UARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1470 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), step);
1471 } else {
1472 ureg_ARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1473 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), step);
1474 }
1475 return D3D_OK;
1476 }
1477
1478 DECL_SPECIAL(RET)
1479 {
1480 ureg_RET(tx->ureg);
1481 return D3D_OK;
1482 }
1483
1484 DECL_SPECIAL(ENDLOOP)
1485 {
1486 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1487 return D3D_OK;
1488 }
1489
1490 DECL_SPECIAL(LABEL)
1491 {
1492 unsigned k = tx->num_inst_labels;
1493 unsigned n = tx->insn.src[0].idx;
1494 assert(n < 2048);
1495 if (n >= k)
1496 tx->inst_labels = REALLOC(tx->inst_labels,
1497 k * sizeof(tx->inst_labels[0]),
1498 n * sizeof(tx->inst_labels[0]));
1499
1500 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1501 return D3D_OK;
1502 }
1503
1504 DECL_SPECIAL(SINCOS)
1505 {
1506 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1507 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1508
1509 assert(!(dst.WriteMask & 0xc));
1510
1511 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1512 ureg_SCS(tx->ureg, dst, src);
1513 return D3D_OK;
1514 }
1515
1516 DECL_SPECIAL(SGN)
1517 {
1518 ureg_SSG(tx->ureg,
1519 tx_dst_param(tx, &tx->insn.dst[0]),
1520 tx_src_param(tx, &tx->insn.src[0]));
1521 return D3D_OK;
1522 }
1523
1524 DECL_SPECIAL(REP)
1525 {
1526 struct ureg_program *ureg = tx->ureg;
1527 unsigned *label;
1528 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1529 struct ureg_dst ctr;
1530 struct ureg_dst tmp = tx_scratch_scalar(tx);
1531 struct ureg_src imm =
1532 tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f);
1533
1534 label = tx_bgnloop(tx);
1535 ctr = tx_get_loopctr(tx);
1536
1537 /* NOTE: rep must be constant, so we don't have to save the count */
1538 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1539
1540 ureg_MOV(ureg, ctr, imm);
1541 ureg_BGNLOOP(ureg, label);
1542 if (tx->native_integers)
1543 {
1544 ureg_USGE(ureg, tmp, tx_src_scalar(ctr), rep);
1545 #ifdef NINE_TGSI_LAZY_DEVS
1546 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1547 #endif
1548 }
1549 else
1550 {
1551 ureg_SGE(ureg, tmp, tx_src_scalar(ctr), rep);
1552 #ifdef NINE_TGSI_LAZY_DEVS
1553 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1554 #endif
1555 }
1556 #ifdef NINE_TGSI_LAZY_DEVS
1557 ureg_BRK(ureg);
1558 tx_endcond(tx);
1559 ureg_ENDIF(ureg);
1560 #else
1561 ureg_BREAKC(ureg, tx_src_scalar(tmp));
1562 #endif
1563
1564 if (tx->native_integers) {
1565 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1u(ureg, 1));
1566 } else {
1567 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1f(ureg, 1.0f));
1568 }
1569
1570 return D3D_OK;
1571 }
1572
1573 DECL_SPECIAL(ENDREP)
1574 {
1575 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1576 return D3D_OK;
1577 }
1578
1579 DECL_SPECIAL(ENDIF)
1580 {
1581 tx_endcond(tx);
1582 ureg_ENDIF(tx->ureg);
1583 return D3D_OK;
1584 }
1585
1586 DECL_SPECIAL(IF)
1587 {
1588 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1589
1590 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1591 ureg_UIF(tx->ureg, src, tx_cond(tx));
1592 else
1593 ureg_IF(tx->ureg, src, tx_cond(tx));
1594
1595 return D3D_OK;
1596 }
1597
1598 static INLINE unsigned
1599 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1600 {
1601 switch (flags) {
1602 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1603 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1604 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1605 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1606 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1607 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1608 default:
1609 assert(!"invalid comparison flags");
1610 return TGSI_OPCODE_SGT;
1611 }
1612 }
1613
1614 DECL_SPECIAL(IFC)
1615 {
1616 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1617 struct ureg_src src[2];
1618 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1619 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1620 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1621 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1622 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1623 return D3D_OK;
1624 }
1625
1626 DECL_SPECIAL(ELSE)
1627 {
1628 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1629 return D3D_OK;
1630 }
1631
1632 DECL_SPECIAL(BREAKC)
1633 {
1634 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1635 struct ureg_src src[2];
1636 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1637 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1638 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1639 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1640 #ifdef NINE_TGSI_LAZY_DEVS
1641 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1642 ureg_BRK(tx->ureg);
1643 tx_endcond(tx);
1644 ureg_ENDIF(tx->ureg);
1645 #else
1646 ureg_BREAKC(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
1647 #endif
1648 return D3D_OK;
1649 }
1650
1651 static const char *sm1_declusage_names[] =
1652 {
1653 [D3DDECLUSAGE_POSITION] = "POSITION",
1654 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1655 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1656 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1657 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1658 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1659 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1660 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1661 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1662 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1663 [D3DDECLUSAGE_COLOR] = "COLOR",
1664 [D3DDECLUSAGE_FOG] = "FOG",
1665 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1666 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1667 };
1668
1669 static INLINE unsigned
1670 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1671 {
1672 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1673 }
1674
1675 static void
1676 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1677 boolean tc,
1678 struct sm1_semantic *dcl)
1679 {
1680 BYTE index = dcl->usage_idx;
1681
1682 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1683 * we match to a TGSI_SEMANTIC_GENERIC with index.
1684 *
1685 * The index can be anything UINT16 and usage_idx is BYTE,
1686 * so we can fit everything. It doesn't matter if indices
1687 * are close together or low.
1688 *
1689 *
1690 * POSITION >= 1: 10 * index + 6
1691 * COLOR >= 2: 10 * (index-1) + 7
1692 * TEXCOORD[0..15]: index
1693 * BLENDWEIGHT: 10 * index + 18
1694 * BLENDINDICES: 10 * index + 19
1695 * NORMAL: 10 * index + 20
1696 * TANGENT: 10 * index + 21
1697 * BINORMAL: 10 * index + 22
1698 * TESSFACTOR: 10 * index + 23
1699 */
1700
1701 switch (dcl->usage) {
1702 case D3DDECLUSAGE_POSITION:
1703 case D3DDECLUSAGE_POSITIONT:
1704 case D3DDECLUSAGE_DEPTH:
1705 if (index == 0) {
1706 sem->Name = TGSI_SEMANTIC_POSITION;
1707 sem->Index = 0;
1708 } else {
1709 sem->Name = TGSI_SEMANTIC_GENERIC;
1710 sem->Index = 10 * index + 6;
1711 }
1712 break;
1713 case D3DDECLUSAGE_COLOR:
1714 if (index < 2) {
1715 sem->Name = TGSI_SEMANTIC_COLOR;
1716 sem->Index = index;
1717 } else {
1718 sem->Name = TGSI_SEMANTIC_GENERIC;
1719 sem->Index = 10 * (index-1) + 7;
1720 }
1721 break;
1722 case D3DDECLUSAGE_FOG:
1723 assert(index == 0);
1724 sem->Name = TGSI_SEMANTIC_FOG;
1725 sem->Index = 0;
1726 break;
1727 case D3DDECLUSAGE_PSIZE:
1728 assert(index == 0);
1729 sem->Name = TGSI_SEMANTIC_PSIZE;
1730 sem->Index = 0;
1731 break;
1732 case D3DDECLUSAGE_TEXCOORD:
1733 assert(index < 16);
1734 if (index < 8 && tc)
1735 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1736 else
1737 sem->Name = TGSI_SEMANTIC_GENERIC;
1738 sem->Index = index;
1739 break;
1740 case D3DDECLUSAGE_BLENDWEIGHT:
1741 sem->Name = TGSI_SEMANTIC_GENERIC;
1742 sem->Index = 10 * index + 18;
1743 break;
1744 case D3DDECLUSAGE_BLENDINDICES:
1745 sem->Name = TGSI_SEMANTIC_GENERIC;
1746 sem->Index = 10 * index + 19;
1747 break;
1748 case D3DDECLUSAGE_NORMAL:
1749 sem->Name = TGSI_SEMANTIC_GENERIC;
1750 sem->Index = 10 * index + 20;
1751 break;
1752 case D3DDECLUSAGE_TANGENT:
1753 sem->Name = TGSI_SEMANTIC_GENERIC;
1754 sem->Index = 10 * index + 21;
1755 break;
1756 case D3DDECLUSAGE_BINORMAL:
1757 sem->Name = TGSI_SEMANTIC_GENERIC;
1758 sem->Index = 10 * index + 22;
1759 break;
1760 case D3DDECLUSAGE_TESSFACTOR:
1761 sem->Name = TGSI_SEMANTIC_GENERIC;
1762 sem->Index = 10 * index + 23;
1763 break;
1764 case D3DDECLUSAGE_SAMPLE:
1765 sem->Name = TGSI_SEMANTIC_COUNT;
1766 sem->Index = 0;
1767 break;
1768 default:
1769 assert(!"Invalid DECLUSAGE.");
1770 break;
1771 }
1772 }
1773
1774 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1775 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1776 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1777 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1778 static INLINE unsigned
1779 d3dstt_to_tgsi_tex(BYTE sampler_type)
1780 {
1781 switch (sampler_type) {
1782 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1783 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1784 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1785 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1786 default:
1787 assert(0);
1788 return TGSI_TEXTURE_UNKNOWN;
1789 }
1790 }
1791 static INLINE unsigned
1792 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1793 {
1794 switch (sampler_type) {
1795 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1796 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1797 case NINED3DSTT_VOLUME:
1798 case NINED3DSTT_CUBE:
1799 default:
1800 assert(0);
1801 return TGSI_TEXTURE_UNKNOWN;
1802 }
1803 }
1804 static INLINE unsigned
1805 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1806 {
1807 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1808 case 1: return TGSI_TEXTURE_1D;
1809 case 0: return TGSI_TEXTURE_2D;
1810 case 3: return TGSI_TEXTURE_3D;
1811 default:
1812 return TGSI_TEXTURE_CUBE;
1813 }
1814 }
1815
1816 static const char *
1817 sm1_sampler_type_name(BYTE sampler_type)
1818 {
1819 switch (sampler_type) {
1820 case NINED3DSTT_1D: return "1D";
1821 case NINED3DSTT_2D: return "2D";
1822 case NINED3DSTT_VOLUME: return "VOLUME";
1823 case NINED3DSTT_CUBE: return "CUBE";
1824 default:
1825 return "(D3DSTT_?)";
1826 }
1827 }
1828
1829 static INLINE unsigned
1830 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1831 {
1832 switch (sem->Name) {
1833 case TGSI_SEMANTIC_POSITION:
1834 case TGSI_SEMANTIC_NORMAL:
1835 return TGSI_INTERPOLATE_LINEAR;
1836 case TGSI_SEMANTIC_BCOLOR:
1837 case TGSI_SEMANTIC_COLOR:
1838 case TGSI_SEMANTIC_FOG:
1839 case TGSI_SEMANTIC_GENERIC:
1840 case TGSI_SEMANTIC_TEXCOORD:
1841 case TGSI_SEMANTIC_CLIPDIST:
1842 case TGSI_SEMANTIC_CLIPVERTEX:
1843 return TGSI_INTERPOLATE_PERSPECTIVE;
1844 case TGSI_SEMANTIC_EDGEFLAG:
1845 case TGSI_SEMANTIC_FACE:
1846 case TGSI_SEMANTIC_INSTANCEID:
1847 case TGSI_SEMANTIC_PCOORD:
1848 case TGSI_SEMANTIC_PRIMID:
1849 case TGSI_SEMANTIC_PSIZE:
1850 case TGSI_SEMANTIC_VERTEXID:
1851 return TGSI_INTERPOLATE_CONSTANT;
1852 default:
1853 assert(0);
1854 return TGSI_INTERPOLATE_CONSTANT;
1855 }
1856 }
1857
1858 DECL_SPECIAL(DCL)
1859 {
1860 struct ureg_program *ureg = tx->ureg;
1861 boolean is_input;
1862 boolean is_sampler;
1863 struct tgsi_declaration_semantic tgsi;
1864 struct sm1_semantic sem;
1865 sm1_read_semantic(tx, &sem);
1866
1867 is_input = sem.reg.file == D3DSPR_INPUT;
1868 is_sampler =
1869 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1870
1871 DUMP("DCL ");
1872 sm1_dump_dst_param(&sem.reg);
1873 if (is_sampler)
1874 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1875 else
1876 if (tx->version.major >= 3)
1877 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1878 else
1879 if (sem.usage | sem.usage_idx)
1880 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1881 else
1882 DUMP("\n");
1883
1884 if (is_sampler) {
1885 const unsigned m = 1 << sem.reg.idx;
1886 ureg_DECL_sampler(ureg, sem.reg.idx);
1887 tx->info->sampler_mask |= m;
1888 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1889 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1890 d3dstt_to_tgsi_tex(sem.sampler_type);
1891 return D3D_OK;
1892 }
1893
1894 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1895 if (IS_VS) {
1896 if (is_input) {
1897 /* linkage outside of shader with vertex declaration */
1898 ureg_DECL_vs_input(ureg, sem.reg.idx);
1899 assert(sem.reg.idx < Elements(tx->info->input_map));
1900 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1901 tx->info->num_inputs = sem.reg.idx + 1;
1902 /* NOTE: preserving order in case of indirect access */
1903 } else
1904 if (tx->version.major >= 3) {
1905 /* SM2 output semantic determined by file */
1906 assert(sem.reg.mask != 0);
1907 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1908 tx->info->position_t = TRUE;
1909 assert(sem.reg.idx < Elements(tx->regs.o));
1910 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1911 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1912
1913 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1914 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1915 }
1916 } else {
1917 if (is_input && tx->version.major >= 3) {
1918 /* SM3 only, SM2 input semantic determined by file */
1919 assert(sem.reg.idx < Elements(tx->regs.v));
1920 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1921 ureg, tgsi.Name, tgsi.Index,
1922 nine_tgsi_to_interp_mode(&tgsi),
1923 0, /* cylwrap */
1924 sem.reg.mod & NINED3DSPDM_CENTROID);
1925 } else
1926 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1927 /* FragColor or FragDepth */
1928 assert(sem.reg.mask != 0);
1929 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1930 }
1931 }
1932 return D3D_OK;
1933 }
1934
1935 DECL_SPECIAL(DEF)
1936 {
1937 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1938 return D3D_OK;
1939 }
1940
1941 DECL_SPECIAL(DEFB)
1942 {
1943 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
1944 return D3D_OK;
1945 }
1946
1947 DECL_SPECIAL(DEFI)
1948 {
1949 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
1950 return D3D_OK;
1951 }
1952
1953 DECL_SPECIAL(NRM)
1954 {
1955 struct ureg_program *ureg = tx->ureg;
1956 struct ureg_dst tmp = tx_scratch_scalar(tx);
1957 struct ureg_src nrm = tx_src_scalar(tmp);
1958 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1959 ureg_DP3(ureg, tmp, src, src);
1960 ureg_RSQ(ureg, tmp, nrm);
1961 ureg_MUL(ureg, tx_dst_param(tx, &tx->insn.dst[0]), src, nrm);
1962 return D3D_OK;
1963 }
1964
1965 DECL_SPECIAL(DP2ADD)
1966 {
1967 #ifdef NINE_TGSI_LAZY_R600
1968 struct ureg_dst tmp = tx_scratch_scalar(tx);
1969 struct ureg_src dp2 = tx_src_scalar(tmp);
1970 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1971 struct ureg_src src[3];
1972 int i;
1973 for (i = 0; i < 3; ++i)
1974 src[i] = tx_src_param(tx, &tx->insn.src[i]);
1975 assert_replicate_swizzle(&src[2]);
1976
1977 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
1978 ureg_ADD(tx->ureg, dst, src[2], dp2);
1979
1980 return D3D_OK;
1981 #else
1982 return NineTranslateInstruction_Generic(tx);
1983 #endif
1984 }
1985
1986 DECL_SPECIAL(TEXCOORD)
1987 {
1988 struct ureg_program *ureg = tx->ureg;
1989 const unsigned s = tx->insn.dst[0].idx;
1990 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1991
1992 if (ureg_src_is_undef(tx->regs.vT[s]))
1993 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1994 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
1995
1996 return D3D_OK;
1997 }
1998
1999 DECL_SPECIAL(TEXCOORD_ps14)
2000 {
2001 struct ureg_program *ureg = tx->ureg;
2002 const unsigned s = tx->insn.src[0].idx;
2003 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2004
2005 if (ureg_src_is_undef(tx->regs.vT[s]))
2006 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2007 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
2008
2009 return D3D_OK;
2010 }
2011
2012 DECL_SPECIAL(TEXKILL)
2013 {
2014 struct ureg_src reg;
2015
2016 if (tx->version.major > 1 || tx->version.minor > 3) {
2017 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2018 } else {
2019 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2020 reg = tx->regs.vT[tx->insn.dst[0].idx];
2021 }
2022 if (tx->version.major < 2)
2023 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2024 ureg_KILL_IF(tx->ureg, reg);
2025
2026 return D3D_OK;
2027 }
2028
2029 DECL_SPECIAL(TEXBEM)
2030 {
2031 STUB(D3DERR_INVALIDCALL);
2032 }
2033
2034 DECL_SPECIAL(TEXBEML)
2035 {
2036 STUB(D3DERR_INVALIDCALL);
2037 }
2038
2039 DECL_SPECIAL(TEXREG2AR)
2040 {
2041 STUB(D3DERR_INVALIDCALL);
2042 }
2043
2044 DECL_SPECIAL(TEXREG2GB)
2045 {
2046 STUB(D3DERR_INVALIDCALL);
2047 }
2048
2049 DECL_SPECIAL(TEXM3x2PAD)
2050 {
2051 STUB(D3DERR_INVALIDCALL);
2052 }
2053
2054 DECL_SPECIAL(TEXM3x2TEX)
2055 {
2056 STUB(D3DERR_INVALIDCALL);
2057 }
2058
2059 DECL_SPECIAL(TEXM3x3PAD)
2060 {
2061 return D3D_OK; /* this is just padding */
2062 }
2063
2064 DECL_SPECIAL(TEXM3x3SPEC)
2065 {
2066 STUB(D3DERR_INVALIDCALL);
2067 }
2068
2069 DECL_SPECIAL(TEXM3x3VSPEC)
2070 {
2071 STUB(D3DERR_INVALIDCALL);
2072 }
2073
2074 DECL_SPECIAL(TEXREG2RGB)
2075 {
2076 STUB(D3DERR_INVALIDCALL);
2077 }
2078
2079 DECL_SPECIAL(TEXDP3TEX)
2080 {
2081 STUB(D3DERR_INVALIDCALL);
2082 }
2083
2084 DECL_SPECIAL(TEXM3x2DEPTH)
2085 {
2086 STUB(D3DERR_INVALIDCALL);
2087 }
2088
2089 DECL_SPECIAL(TEXDP3)
2090 {
2091 STUB(D3DERR_INVALIDCALL);
2092 }
2093
2094 DECL_SPECIAL(TEXM3x3)
2095 {
2096 struct ureg_program *ureg = tx->ureg;
2097 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2098 struct ureg_src src[4];
2099 int s;
2100 const int m = tx->insn.dst[0].idx - 2;
2101 const int n = tx->insn.src[0].idx;
2102 assert(m >= 0 && m > n);
2103
2104 for (s = m; s <= (m + 2); ++s) {
2105 if (ureg_src_is_undef(tx->regs.vT[s]))
2106 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2107 src[s] = tx->regs.vT[s];
2108 }
2109 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), src[0], ureg_src(tx->regs.tS[n]));
2110 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), src[1], ureg_src(tx->regs.tS[n]));
2111 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), src[2], ureg_src(tx->regs.tS[n]));
2112
2113 switch (tx->insn.opcode) {
2114 case D3DSIO_TEXM3x3:
2115 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2116 break;
2117 case D3DSIO_TEXM3x3TEX:
2118 src[3] = ureg_DECL_sampler(ureg, m + 2);
2119 tx->info->sampler_mask |= 1 << (m + 2);
2120 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), src[3]);
2121 break;
2122 default:
2123 return D3DERR_INVALIDCALL;
2124 }
2125 return D3D_OK;
2126 }
2127
2128 DECL_SPECIAL(TEXDEPTH)
2129 {
2130 STUB(D3DERR_INVALIDCALL);
2131 }
2132
2133 DECL_SPECIAL(BEM)
2134 {
2135 STUB(D3DERR_INVALIDCALL);
2136 }
2137
2138 DECL_SPECIAL(TEXLD)
2139 {
2140 struct ureg_program *ureg = tx->ureg;
2141 unsigned target;
2142 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2143 struct ureg_src src[2] = {
2144 tx_src_param(tx, &tx->insn.src[0]),
2145 tx_src_param(tx, &tx->insn.src[1])
2146 };
2147 assert(tx->insn.src[1].idx >= 0 &&
2148 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2149 target = tx->sampler_targets[tx->insn.src[1].idx];
2150
2151 switch (tx->insn.flags) {
2152 case 0:
2153 ureg_TEX(ureg, dst, target, src[0], src[1]);
2154 break;
2155 case NINED3DSI_TEXLD_PROJECT:
2156 ureg_TXP(ureg, dst, target, src[0], src[1]);
2157 break;
2158 case NINED3DSI_TEXLD_BIAS:
2159 ureg_TXB(ureg, dst, target, src[0], src[1]);
2160 break;
2161 default:
2162 assert(0);
2163 return D3DERR_INVALIDCALL;
2164 }
2165 return D3D_OK;
2166 }
2167
2168 DECL_SPECIAL(TEXLD_14)
2169 {
2170 struct ureg_program *ureg = tx->ureg;
2171 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2172 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2173 const unsigned s = tx->insn.dst[0].idx;
2174 const unsigned t = ps1x_sampler_type(tx->info, s);
2175
2176 tx->info->sampler_mask |= 1 << s;
2177 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2178
2179 return D3D_OK;
2180 }
2181
2182 DECL_SPECIAL(TEX)
2183 {
2184 struct ureg_program *ureg = tx->ureg;
2185 const unsigned s = tx->insn.dst[0].idx;
2186 const unsigned t = ps1x_sampler_type(tx->info, s);
2187 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2188 struct ureg_src src[2];
2189
2190 if (ureg_src_is_undef(tx->regs.vT[s]))
2191 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2192
2193 src[0] = tx->regs.vT[s];
2194 src[1] = ureg_DECL_sampler(ureg, s);
2195 tx->info->sampler_mask |= 1 << s;
2196
2197 ureg_TEX(ureg, dst, t, src[0], src[1]);
2198
2199 return D3D_OK;
2200 }
2201
2202 DECL_SPECIAL(TEXLDD)
2203 {
2204 unsigned target;
2205 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2206 struct ureg_src src[4] = {
2207 tx_src_param(tx, &tx->insn.src[0]),
2208 tx_src_param(tx, &tx->insn.src[1]),
2209 tx_src_param(tx, &tx->insn.src[2]),
2210 tx_src_param(tx, &tx->insn.src[3])
2211 };
2212 assert(tx->insn.src[3].idx >= 0 &&
2213 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2214 target = tx->sampler_targets[tx->insn.src[1].idx];
2215
2216 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2217 return D3D_OK;
2218 }
2219
2220 DECL_SPECIAL(TEXLDL)
2221 {
2222 unsigned target;
2223 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2224 struct ureg_src src[2] = {
2225 tx_src_param(tx, &tx->insn.src[0]),
2226 tx_src_param(tx, &tx->insn.src[1])
2227 };
2228 assert(tx->insn.src[3].idx >= 0 &&
2229 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2230 target = tx->sampler_targets[tx->insn.src[1].idx];
2231
2232 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2233 return D3D_OK;
2234 }
2235
2236 DECL_SPECIAL(SETP)
2237 {
2238 STUB(D3DERR_INVALIDCALL);
2239 }
2240
2241 DECL_SPECIAL(BREAKP)
2242 {
2243 STUB(D3DERR_INVALIDCALL);
2244 }
2245
2246 DECL_SPECIAL(PHASE)
2247 {
2248 return D3D_OK; /* we don't care about phase */
2249 }
2250
2251 DECL_SPECIAL(COMMENT)
2252 {
2253 return D3D_OK; /* nothing to do */
2254 }
2255
2256
2257 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2258 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2259
2260 struct sm1_op_info inst_table[] =
2261 {
2262 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2263 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2264 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2265 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2266 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2267 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2268 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2269 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2270 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 7 */
2271 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2272 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2273 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2274 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2275 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2276 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2277 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2278 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 15 */
2279 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2280 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2281 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2282 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2283
2284 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2285 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2286 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2287 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2288 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2289
2290 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALL)),
2291 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALLNZ)),
2292 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2293 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2294 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2295 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(LABEL)),
2296
2297 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2298
2299 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL),
2300 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2301 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2302 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2303 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2304
2305 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2306 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2307
2308 /* More flow control */
2309 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2310 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2311 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2312 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2313 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2314 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2315 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2316 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2317
2318 _OPI(MOVA, ARR, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2319
2320 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2321 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2322
2323 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2324 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2325 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2326 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2327 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2328 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2329 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEM)),
2330 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEML)),
2331 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2AR)),
2332 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2GB)),
2333 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2PAD)),
2334 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2TEX)),
2335 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3PAD)),
2336 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2337 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3SPEC)),
2338 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3VSPEC)),
2339
2340 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2341 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2342 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2343 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2344
2345 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2346
2347 /* More tex stuff */
2348 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXREG2RGB)),
2349 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3TEX)),
2350 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 0, 0, SPECIAL(TEXM3x2DEPTH)),
2351 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3)),
2352 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2353 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(TEXDEPTH)),
2354
2355 /* Misc */
2356 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2357 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(BEM)),
2358 _OPI(DP2ADD, DP2A, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)), /* for radeons */
2359 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2360 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2361 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2362 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(SETP)),
2363 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2364 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(BREAKP))
2365 };
2366
2367 struct sm1_op_info inst_phase =
2368 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2369
2370 struct sm1_op_info inst_comment =
2371 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2372
2373 static void
2374 create_op_info_map(struct shader_translator *tx)
2375 {
2376 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2377 unsigned i;
2378
2379 for (i = 0; i < Elements(tx->op_info_map); ++i)
2380 tx->op_info_map[i] = -1;
2381
2382 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2383 for (i = 0; i < Elements(inst_table); ++i) {
2384 assert(inst_table[i].sio < Elements(tx->op_info_map));
2385 if (inst_table[i].vert_version.min <= version &&
2386 inst_table[i].vert_version.max >= version)
2387 tx->op_info_map[inst_table[i].sio] = i;
2388 }
2389 } else {
2390 for (i = 0; i < Elements(inst_table); ++i) {
2391 assert(inst_table[i].sio < Elements(tx->op_info_map));
2392 if (inst_table[i].frag_version.min <= version &&
2393 inst_table[i].frag_version.max >= version)
2394 tx->op_info_map[inst_table[i].sio] = i;
2395 }
2396 }
2397 }
2398
2399 static INLINE HRESULT
2400 NineTranslateInstruction_Generic(struct shader_translator *tx)
2401 {
2402 struct ureg_dst dst[1];
2403 struct ureg_src src[4];
2404 unsigned i;
2405
2406 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2407 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2408 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2409 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2410
2411 ureg_insn(tx->ureg, tx->insn.info->opcode,
2412 dst, tx->insn.ndst,
2413 src, tx->insn.nsrc);
2414 return D3D_OK;
2415 }
2416
2417 static INLINE DWORD
2418 TOKEN_PEEK(struct shader_translator *tx)
2419 {
2420 return *(tx->parse);
2421 }
2422
2423 static INLINE DWORD
2424 TOKEN_NEXT(struct shader_translator *tx)
2425 {
2426 return *(tx->parse)++;
2427 }
2428
2429 static INLINE void
2430 TOKEN_JUMP(struct shader_translator *tx)
2431 {
2432 if (tx->parse_next && tx->parse != tx->parse_next) {
2433 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2434 tx->parse = tx->parse_next;
2435 }
2436 }
2437
2438 static INLINE boolean
2439 sm1_parse_eof(struct shader_translator *tx)
2440 {
2441 return TOKEN_PEEK(tx) == NINED3DSP_END;
2442 }
2443
2444 static void
2445 sm1_read_version(struct shader_translator *tx)
2446 {
2447 const DWORD tok = TOKEN_NEXT(tx);
2448
2449 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2450 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2451
2452 switch (tok >> 16) {
2453 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2454 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2455 default:
2456 DBG("Invalid shader type: %x\n", tok);
2457 tx->processor = ~0;
2458 break;
2459 }
2460 }
2461
2462 /* This is just to check if we parsed the instruction properly. */
2463 static void
2464 sm1_parse_get_skip(struct shader_translator *tx)
2465 {
2466 const DWORD tok = TOKEN_PEEK(tx);
2467
2468 if (tx->version.major >= 2) {
2469 tx->parse_next = tx->parse + 1 /* this */ +
2470 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2471 } else {
2472 tx->parse_next = NULL; /* TODO: determine from param count */
2473 }
2474 }
2475
2476 static void
2477 sm1_print_comment(const char *comment, UINT size)
2478 {
2479 if (!size)
2480 return;
2481 /* TODO */
2482 }
2483
2484 static void
2485 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2486 {
2487 DWORD tok = TOKEN_PEEK(tx);
2488
2489 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2490 {
2491 const char *comment = "";
2492 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2493 tx->parse += size + 1;
2494
2495 if (print)
2496 sm1_print_comment(comment, size);
2497
2498 tok = TOKEN_PEEK(tx);
2499 }
2500 }
2501
2502 static void
2503 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2504 {
2505 *reg = TOKEN_NEXT(tx);
2506
2507 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2508 {
2509 if (tx->version.major < 2)
2510 *rel = (1 << 31) |
2511 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2512 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2513 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2514 else
2515 *rel = TOKEN_NEXT(tx);
2516 }
2517 }
2518
2519 static void
2520 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2521 {
2522 uint8_t shift;
2523 dst->file =
2524 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2525 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2526 dst->type = TGSI_RETURN_TYPE_FLOAT;
2527 dst->idx = tok & D3DSP_REGNUM_MASK;
2528 dst->rel = NULL;
2529 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2530 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2531 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2532 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2533 }
2534
2535 static void
2536 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2537 {
2538 src->file =
2539 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2540 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2541 src->type = TGSI_RETURN_TYPE_FLOAT;
2542 src->idx = tok & D3DSP_REGNUM_MASK;
2543 src->rel = NULL;
2544 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2545 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2546
2547 switch (src->file) {
2548 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2549 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2550 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2551 default:
2552 break;
2553 }
2554 }
2555
2556 static void
2557 sm1_parse_immediate(struct shader_translator *tx,
2558 struct sm1_src_param *imm)
2559 {
2560 imm->file = NINED3DSPR_IMMEDIATE;
2561 imm->idx = INT_MIN;
2562 imm->rel = NULL;
2563 imm->swizzle = NINED3DSP_NOSWIZZLE;
2564 imm->mod = 0;
2565 switch (tx->insn.opcode) {
2566 case D3DSIO_DEF:
2567 imm->type = NINED3DSPTYPE_FLOAT4;
2568 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2569 tx->parse += 4;
2570 break;
2571 case D3DSIO_DEFI:
2572 imm->type = NINED3DSPTYPE_INT4;
2573 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2574 tx->parse += 4;
2575 break;
2576 case D3DSIO_DEFB:
2577 imm->type = NINED3DSPTYPE_BOOL;
2578 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2579 tx->parse += 1;
2580 break;
2581 default:
2582 assert(0);
2583 break;
2584 }
2585 }
2586
2587 static void
2588 sm1_read_dst_param(struct shader_translator *tx,
2589 struct sm1_dst_param *dst,
2590 struct sm1_src_param *rel)
2591 {
2592 DWORD tok_dst, tok_rel = 0;
2593
2594 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2595 sm1_parse_dst_param(dst, tok_dst);
2596 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2597 sm1_parse_src_param(rel, tok_rel);
2598 dst->rel = rel;
2599 }
2600 }
2601
2602 static void
2603 sm1_read_src_param(struct shader_translator *tx,
2604 struct sm1_src_param *src,
2605 struct sm1_src_param *rel)
2606 {
2607 DWORD tok_src, tok_rel = 0;
2608
2609 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2610 sm1_parse_src_param(src, tok_src);
2611 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2612 assert(rel);
2613 sm1_parse_src_param(rel, tok_rel);
2614 src->rel = rel;
2615 }
2616 }
2617
2618 static void
2619 sm1_read_semantic(struct shader_translator *tx,
2620 struct sm1_semantic *sem)
2621 {
2622 const DWORD tok_usg = TOKEN_NEXT(tx);
2623 const DWORD tok_dst = TOKEN_NEXT(tx);
2624
2625 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2626 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2627 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2628
2629 sm1_parse_dst_param(&sem->reg, tok_dst);
2630 }
2631
2632 static void
2633 sm1_parse_instruction(struct shader_translator *tx)
2634 {
2635 struct sm1_instruction *insn = &tx->insn;
2636 DWORD tok;
2637 struct sm1_op_info *info = NULL;
2638 unsigned i;
2639
2640 sm1_parse_comments(tx, TRUE);
2641 sm1_parse_get_skip(tx);
2642
2643 tok = TOKEN_NEXT(tx);
2644
2645 insn->opcode = tok & D3DSI_OPCODE_MASK;
2646 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2647 insn->coissue = !!(tok & D3DSI_COISSUE);
2648 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2649
2650 if (insn->opcode < Elements(tx->op_info_map)) {
2651 int k = tx->op_info_map[insn->opcode];
2652 if (k >= 0) {
2653 assert(k < Elements(inst_table));
2654 info = &inst_table[k];
2655 }
2656 } else {
2657 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2658 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2659 }
2660 if (!info) {
2661 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2662 TOKEN_JUMP(tx);
2663 return;
2664 }
2665 insn->info = info;
2666 insn->ndst = info->ndst;
2667 insn->nsrc = info->nsrc;
2668
2669 assert(!insn->predicated && "TODO: predicated instructions");
2670
2671 /* check version */
2672 {
2673 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2674 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2675 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2676 if (ver < min || ver > max) {
2677 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2678 min, ver, max);
2679 return;
2680 }
2681 }
2682
2683 for (i = 0; i < insn->ndst; ++i)
2684 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2685 if (insn->predicated)
2686 sm1_read_src_param(tx, &insn->pred, NULL);
2687 for (i = 0; i < insn->nsrc; ++i)
2688 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2689
2690 /* parse here so we can dump them before processing */
2691 if (insn->opcode == D3DSIO_DEF ||
2692 insn->opcode == D3DSIO_DEFI ||
2693 insn->opcode == D3DSIO_DEFB)
2694 sm1_parse_immediate(tx, &tx->insn.src[0]);
2695
2696 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2697 sm1_instruction_check(insn);
2698
2699 if (info->handler)
2700 info->handler(tx);
2701 else
2702 NineTranslateInstruction_Generic(tx);
2703 tx_apply_dst0_modifiers(tx);
2704
2705 tx->num_scratch = 0; /* reset */
2706
2707 TOKEN_JUMP(tx);
2708 }
2709
2710 static void
2711 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2712 {
2713 unsigned i;
2714
2715 tx->info = info;
2716
2717 tx->byte_code = info->byte_code;
2718 tx->parse = info->byte_code;
2719
2720 for (i = 0; i < Elements(info->input_map); ++i)
2721 info->input_map[i] = NINE_DECLUSAGE_NONE;
2722 info->num_inputs = 0;
2723
2724 info->position_t = FALSE;
2725 info->point_size = FALSE;
2726
2727 tx->info->const_used_size = 0;
2728
2729 info->sampler_mask = 0x0;
2730 info->rt_mask = 0x0;
2731
2732 info->lconstf.data = NULL;
2733 info->lconstf.ranges = NULL;
2734
2735 for (i = 0; i < Elements(tx->regs.aL); ++i) {
2736 tx->regs.aL[i] = ureg_dst_undef();
2737 tx->regs.rL[i] = ureg_dst_undef();
2738 }
2739 tx->regs.a = ureg_dst_undef();
2740 tx->regs.p = ureg_dst_undef();
2741 tx->regs.oDepth = ureg_dst_undef();
2742 tx->regs.vPos = ureg_src_undef();
2743 tx->regs.vFace = ureg_src_undef();
2744 for (i = 0; i < Elements(tx->regs.o); ++i)
2745 tx->regs.o[i] = ureg_dst_undef();
2746 for (i = 0; i < Elements(tx->regs.oCol); ++i)
2747 tx->regs.oCol[i] = ureg_dst_undef();
2748 for (i = 0; i < Elements(tx->regs.vC); ++i)
2749 tx->regs.vC[i] = ureg_src_undef();
2750 for (i = 0; i < Elements(tx->regs.vT); ++i)
2751 tx->regs.vT[i] = ureg_src_undef();
2752
2753 for (i = 0; i < Elements(tx->lconsti); ++i)
2754 tx->lconsti[i].idx = -1;
2755 for (i = 0; i < Elements(tx->lconstb); ++i)
2756 tx->lconstb[i].idx = -1;
2757
2758 sm1_read_version(tx);
2759
2760 info->version = (tx->version.major << 4) | tx->version.minor;
2761
2762 create_op_info_map(tx);
2763 }
2764
2765 static void
2766 tx_dtor(struct shader_translator *tx)
2767 {
2768 if (tx->num_inst_labels)
2769 FREE(tx->inst_labels);
2770 FREE(tx->lconstf);
2771 FREE(tx->regs.r);
2772 FREE(tx);
2773 }
2774
2775 static INLINE unsigned
2776 tgsi_processor_from_type(unsigned shader_type)
2777 {
2778 switch (shader_type) {
2779 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
2780 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
2781 default:
2782 return ~0;
2783 }
2784 }
2785
2786 #define GET_CAP(n) device->screen->get_param( \
2787 device->screen, PIPE_CAP_##n)
2788 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
2789 device->screen, info->type, PIPE_SHADER_CAP_##n)
2790
2791 HRESULT
2792 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
2793 {
2794 struct shader_translator *tx;
2795 HRESULT hr = D3D_OK;
2796 const unsigned processor = tgsi_processor_from_type(info->type);
2797
2798 user_assert(processor != ~0, D3DERR_INVALIDCALL);
2799
2800 tx = CALLOC_STRUCT(shader_translator);
2801 if (!tx)
2802 return E_OUTOFMEMORY;
2803 tx_ctor(tx, info);
2804
2805 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
2806 hr = D3DERR_INVALIDCALL;
2807 DBG("Unsupported shader version: %u.%u !\n",
2808 tx->version.major, tx->version.minor);
2809 goto out;
2810 }
2811 if (tx->processor != processor) {
2812 hr = D3DERR_INVALIDCALL;
2813 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
2814 goto out;
2815 }
2816 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
2817 tx->version.major, tx->version.minor);
2818
2819 tx->ureg = ureg_create(processor);
2820 if (!tx->ureg) {
2821 hr = E_OUTOFMEMORY;
2822 goto out;
2823 }
2824 tx_decl_constants(tx);
2825
2826 tx->native_integers = GET_SHADER_CAP(INTEGERS);
2827 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
2828 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
2829 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
2830 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2831 tx->texcoord_sn = tx->want_texcoord ?
2832 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
2833
2834 /* VS must always write position. Declare it here to make it the 1st output.
2835 * (Some drivers like nv50 are buggy and rely on that.)
2836 */
2837 if (IS_VS) {
2838 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
2839 } else {
2840 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
2841 if (!tx->shift_wpos)
2842 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2843 }
2844
2845 if (!ureg_dst_is_undef(tx->regs.oPts))
2846 info->point_size = TRUE;
2847
2848 while (!sm1_parse_eof(tx))
2849 sm1_parse_instruction(tx);
2850 tx->parse++; /* for byte_size */
2851
2852 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
2853 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
2854 ureg_src(tx->regs.r[0]));
2855 info->rt_mask |= 0x1;
2856 }
2857
2858 if (info->position_t)
2859 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
2860
2861 ureg_END(tx->ureg);
2862
2863 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
2864 unsigned count;
2865 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
2866 tgsi_dump(toks, 0);
2867 ureg_free_tokens(toks);
2868 }
2869
2870 /* record local constants */
2871 if (tx->num_lconstf && tx->indirect_const_access) {
2872 struct nine_range *ranges;
2873 float *data;
2874 int *indices;
2875 unsigned i, k, n;
2876
2877 hr = E_OUTOFMEMORY;
2878
2879 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
2880 if (!data)
2881 goto out;
2882 info->lconstf.data = data;
2883
2884 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
2885 if (!indices)
2886 goto out;
2887
2888 /* lazy sort, num_lconstf should be small */
2889 for (n = 0; n < tx->num_lconstf; ++n) {
2890 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
2891 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
2892 k = i;
2893 }
2894 indices[n] = tx->lconstf[k].idx;
2895 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
2896 tx->lconstf[k].idx = INT_MAX;
2897 }
2898
2899 /* count ranges */
2900 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
2901 if (indices[i] != indices[i - 1] + 1)
2902 ++n;
2903 ranges = MALLOC(n * sizeof(ranges[0]));
2904 if (!ranges) {
2905 FREE(indices);
2906 goto out;
2907 }
2908 info->lconstf.ranges = ranges;
2909
2910 k = 0;
2911 ranges[k].bgn = indices[0];
2912 for (i = 1; i < tx->num_lconstf; ++i) {
2913 if (indices[i] != indices[i - 1] + 1) {
2914 ranges[k].next = &ranges[k + 1];
2915 ranges[k].end = indices[i - 1] + 1;
2916 ++k;
2917 ranges[k].bgn = indices[i];
2918 }
2919 }
2920 ranges[k].end = indices[i - 1] + 1;
2921 ranges[k].next = NULL;
2922 assert(n == (k + 1));
2923
2924 FREE(indices);
2925 hr = D3D_OK;
2926 }
2927
2928 if (tx->indirect_const_access)
2929 info->const_used_size = ~0;
2930
2931 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
2932 if (!info->cso) {
2933 hr = D3DERR_DRIVERINTERNALERROR;
2934 FREE(info->lconstf.data);
2935 FREE(info->lconstf.ranges);
2936 goto out;
2937 }
2938
2939 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
2940 out:
2941 tx_dtor(tx);
2942 return hr;
2943 }