st/nine: Fix POW implementation
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
39
40
41 struct shader_translator;
42
43 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
44
45 static INLINE const char *d3dsio_to_string(unsigned opcode);
46
47
48 #define NINED3D_SM1_VS 0xfffe
49 #define NINED3D_SM1_PS 0xffff
50
51 #define NINE_MAX_COND_DEPTH 64
52 #define NINE_MAX_LOOP_DEPTH 64
53
54 #define NINED3DSP_END 0x0000ffff
55
56 #define NINED3DSPTYPE_FLOAT4 0
57 #define NINED3DSPTYPE_INT4 1
58 #define NINED3DSPTYPE_BOOL 2
59
60 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
61
62 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
63 #define NINED3DSP_WRITEMASK_SHIFT 16
64
65 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
66
67 #define NINED3DSHADER_REL_OP_GT 1
68 #define NINED3DSHADER_REL_OP_EQ 2
69 #define NINED3DSHADER_REL_OP_GE 3
70 #define NINED3DSHADER_REL_OP_LT 4
71 #define NINED3DSHADER_REL_OP_NE 5
72 #define NINED3DSHADER_REL_OP_LE 6
73
74 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
75 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
76
77 #define NINED3DSI_TEXLD_PROJECT 0x1
78 #define NINED3DSI_TEXLD_BIAS 0x2
79
80 #define NINED3DSP_WRITEMASK_0 0x1
81 #define NINED3DSP_WRITEMASK_1 0x2
82 #define NINED3DSP_WRITEMASK_2 0x4
83 #define NINED3DSP_WRITEMASK_3 0x8
84 #define NINED3DSP_WRITEMASK_ALL 0xf
85
86 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
87
88 #define NINE_SWIZZLE4(x,y,z,w) \
89 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
90
91 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
92 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
94
95 /*
96 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
97 * BIAS <= PS 1.4 (x-0.5)
98 * BIASNEG <= PS 1.4 (-(x-0.5))
99 * SIGN <= PS 1.4 (2(x-0.5))
100 * SIGNNEG <= PS 1.4 (-2(x-0.5))
101 * COMP <= PS 1.4 (1-x)
102 * X2 = PS 1.4 (2x)
103 * X2NEG = PS 1.4 (-2x)
104 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
105 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
106 * ABS >= SM 3.0 (abs(x))
107 * ABSNEG >= SM 3.0 (-abs(x))
108 * NOT >= SM 2.0 pedication only
109 */
110 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
124
125 static const char *sm1_mod_str[] =
126 {
127 [NINED3DSPSM_NONE] = "",
128 [NINED3DSPSM_NEG] = "-",
129 [NINED3DSPSM_BIAS] = "bias",
130 [NINED3DSPSM_BIASNEG] = "biasneg",
131 [NINED3DSPSM_SIGN] = "sign",
132 [NINED3DSPSM_SIGNNEG] = "signneg",
133 [NINED3DSPSM_COMP] = "comp",
134 [NINED3DSPSM_X2] = "x2",
135 [NINED3DSPSM_X2NEG] = "x2neg",
136 [NINED3DSPSM_DZ] = "dz",
137 [NINED3DSPSM_DW] = "dw",
138 [NINED3DSPSM_ABS] = "abs",
139 [NINED3DSPSM_ABSNEG] = "-abs",
140 [NINED3DSPSM_NOT] = "not"
141 };
142
143 static void
144 sm1_dump_writemask(BYTE mask)
145 {
146 if (mask & 1) DUMP("x"); else DUMP("_");
147 if (mask & 2) DUMP("y"); else DUMP("_");
148 if (mask & 4) DUMP("z"); else DUMP("_");
149 if (mask & 8) DUMP("w"); else DUMP("_");
150 }
151
152 static void
153 sm1_dump_swizzle(BYTE s)
154 {
155 char c[4] = { 'x', 'y', 'z', 'w' };
156 DUMP("%c%c%c%c",
157 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
158 }
159
160 static const char sm1_file_char[] =
161 {
162 [D3DSPR_TEMP] = 'r',
163 [D3DSPR_INPUT] = 'v',
164 [D3DSPR_CONST] = 'c',
165 [D3DSPR_ADDR] = 'A',
166 [D3DSPR_RASTOUT] = 'R',
167 [D3DSPR_ATTROUT] = 'D',
168 [D3DSPR_OUTPUT] = 'o',
169 [D3DSPR_CONSTINT] = 'I',
170 [D3DSPR_COLOROUT] = 'C',
171 [D3DSPR_DEPTHOUT] = 'D',
172 [D3DSPR_SAMPLER] = 's',
173 [D3DSPR_CONST2] = 'c',
174 [D3DSPR_CONST3] = 'c',
175 [D3DSPR_CONST4] = 'c',
176 [D3DSPR_CONSTBOOL] = 'B',
177 [D3DSPR_LOOP] = 'L',
178 [D3DSPR_TEMPFLOAT16] = 'h',
179 [D3DSPR_MISCTYPE] = 'M',
180 [D3DSPR_LABEL] = 'X',
181 [D3DSPR_PREDICATE] = 'p'
182 };
183
184 static void
185 sm1_dump_reg(BYTE file, INT index)
186 {
187 switch (file) {
188 case D3DSPR_LOOP:
189 DUMP("aL");
190 break;
191 case D3DSPR_COLOROUT:
192 DUMP("oC%i", index);
193 break;
194 case D3DSPR_DEPTHOUT:
195 DUMP("oDepth");
196 break;
197 case D3DSPR_RASTOUT:
198 DUMP("oRast%i", index);
199 break;
200 case D3DSPR_CONSTINT:
201 DUMP("iconst[%i]", index);
202 break;
203 case D3DSPR_CONSTBOOL:
204 DUMP("bconst[%i]", index);
205 break;
206 default:
207 DUMP("%c%i", sm1_file_char[file], index);
208 break;
209 }
210 }
211
212 struct sm1_src_param
213 {
214 INT idx;
215 struct sm1_src_param *rel;
216 BYTE file;
217 BYTE swizzle;
218 BYTE mod;
219 BYTE type;
220 union {
221 DWORD d[4];
222 float f[4];
223 int i[4];
224 BOOL b;
225 } imm;
226 };
227 static void
228 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
229
230 struct sm1_dst_param
231 {
232 INT idx;
233 struct sm1_src_param *rel;
234 BYTE file;
235 BYTE mask;
236 BYTE mod;
237 int8_t shift; /* sint4 */
238 BYTE type;
239 };
240
241 static INLINE void
242 assert_replicate_swizzle(const struct ureg_src *reg)
243 {
244 assert(reg->SwizzleY == reg->SwizzleX &&
245 reg->SwizzleZ == reg->SwizzleX &&
246 reg->SwizzleW == reg->SwizzleX);
247 }
248
249 static void
250 sm1_dump_immediate(const struct sm1_src_param *param)
251 {
252 switch (param->type) {
253 case NINED3DSPTYPE_FLOAT4:
254 DUMP("{ %f %f %f %f }",
255 param->imm.f[0], param->imm.f[1],
256 param->imm.f[2], param->imm.f[3]);
257 break;
258 case NINED3DSPTYPE_INT4:
259 DUMP("{ %i %i %i %i }",
260 param->imm.i[0], param->imm.i[1],
261 param->imm.i[2], param->imm.i[3]);
262 break;
263 case NINED3DSPTYPE_BOOL:
264 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
265 break;
266 default:
267 assert(0);
268 break;
269 }
270 }
271
272 static void
273 sm1_dump_src_param(const struct sm1_src_param *param)
274 {
275 if (param->file == NINED3DSPR_IMMEDIATE) {
276 assert(!param->mod &&
277 !param->rel &&
278 param->swizzle == NINED3DSP_NOSWIZZLE);
279 sm1_dump_immediate(param);
280 return;
281 }
282
283 if (param->mod)
284 DUMP("%s(", sm1_mod_str[param->mod]);
285 if (param->rel) {
286 DUMP("%c[", sm1_file_char[param->file]);
287 sm1_dump_src_param(param->rel);
288 DUMP("+%i]", param->idx);
289 } else {
290 sm1_dump_reg(param->file, param->idx);
291 }
292 if (param->mod)
293 DUMP(")");
294 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
295 DUMP(".");
296 sm1_dump_swizzle(param->swizzle);
297 }
298 }
299
300 static void
301 sm1_dump_dst_param(const struct sm1_dst_param *param)
302 {
303 if (param->mod & NINED3DSPDM_SATURATE)
304 DUMP("sat ");
305 if (param->mod & NINED3DSPDM_PARTIALP)
306 DUMP("pp ");
307 if (param->mod & NINED3DSPDM_CENTROID)
308 DUMP("centroid ");
309 if (param->shift < 0)
310 DUMP("/%u ", 1 << -param->shift);
311 if (param->shift > 0)
312 DUMP("*%u ", 1 << param->shift);
313
314 if (param->rel) {
315 DUMP("%c[", sm1_file_char[param->file]);
316 sm1_dump_src_param(param->rel);
317 DUMP("+%i]", param->idx);
318 } else {
319 sm1_dump_reg(param->file, param->idx);
320 }
321 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
322 DUMP(".");
323 sm1_dump_writemask(param->mask);
324 }
325 }
326
327 struct sm1_semantic
328 {
329 struct sm1_dst_param reg;
330 BYTE sampler_type;
331 D3DDECLUSAGE usage;
332 BYTE usage_idx;
333 };
334
335 struct sm1_op_info
336 {
337 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
338 * should be ignored completely */
339 unsigned sio;
340 unsigned opcode; /* TGSI_OPCODE_x */
341
342 /* versions are still set even handler is set */
343 struct {
344 unsigned min;
345 unsigned max;
346 } vert_version, frag_version;
347
348 /* number of regs parsed outside of special handler */
349 unsigned ndst;
350 unsigned nsrc;
351
352 /* some instructions don't map perfectly, so use a special handler */
353 translate_instruction_func handler;
354 };
355
356 struct sm1_instruction
357 {
358 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
359 BYTE flags;
360 BOOL coissue;
361 BOOL predicated;
362 BYTE ndst;
363 BYTE nsrc;
364 struct sm1_src_param src[4];
365 struct sm1_src_param src_rel[4];
366 struct sm1_src_param pred;
367 struct sm1_src_param dst_rel[1];
368 struct sm1_dst_param dst[1];
369
370 struct sm1_op_info *info;
371 };
372
373 static void
374 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
375 {
376 unsigned i;
377
378 /* no info stored for these: */
379 if (insn->opcode == D3DSIO_DCL)
380 return;
381 for (i = 0; i < indent; ++i)
382 DUMP(" ");
383
384 if (insn->predicated) {
385 DUMP("@");
386 sm1_dump_src_param(&insn->pred);
387 DUMP(" ");
388 }
389 DUMP("%s", d3dsio_to_string(insn->opcode));
390 if (insn->flags) {
391 switch (insn->opcode) {
392 case D3DSIO_TEX:
393 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
394 break;
395 default:
396 DUMP("_%x", insn->flags);
397 break;
398 }
399 }
400 if (insn->coissue)
401 DUMP("_co");
402 DUMP(" ");
403
404 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
405 sm1_dump_dst_param(&insn->dst[i]);
406 DUMP(" ");
407 }
408
409 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
410 sm1_dump_src_param(&insn->src[i]);
411 DUMP(" ");
412 }
413 if (insn->opcode == D3DSIO_DEF ||
414 insn->opcode == D3DSIO_DEFI ||
415 insn->opcode == D3DSIO_DEFB)
416 sm1_dump_immediate(&insn->src[0]);
417
418 DUMP("\n");
419 }
420
421 struct sm1_local_const
422 {
423 INT idx;
424 struct ureg_src reg;
425 union {
426 boolean b;
427 float f[4];
428 int32_t i[4];
429 } imm;
430 };
431
432 struct shader_translator
433 {
434 const DWORD *byte_code;
435 const DWORD *parse;
436 const DWORD *parse_next;
437
438 struct ureg_program *ureg;
439
440 /* shader version */
441 struct {
442 BYTE major;
443 BYTE minor;
444 } version;
445 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
446
447 boolean native_integers;
448 boolean inline_subroutines;
449 boolean lower_preds;
450 boolean want_texcoord;
451 boolean shift_wpos;
452 unsigned texcoord_sn;
453
454 struct sm1_instruction insn; /* current instruction */
455
456 struct {
457 struct ureg_dst *r;
458 struct ureg_dst oPos;
459 struct ureg_dst oFog;
460 struct ureg_dst oPts;
461 struct ureg_dst oCol[4];
462 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
463 struct ureg_dst oDepth;
464 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
465 struct ureg_src vPos;
466 struct ureg_src vFace;
467 struct ureg_src s;
468 struct ureg_dst p;
469 struct ureg_dst a;
470 struct ureg_dst tS[8]; /* texture stage registers */
471 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
472 struct ureg_dst t[5]; /* scratch TEMPs */
473 struct ureg_src vC[2]; /* PS color in */
474 struct ureg_src vT[8]; /* PS texcoord in */
475 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
476 struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* loop ctr ADDR register */
477 } regs;
478 unsigned num_temp; /* Elements(regs.r) */
479 unsigned num_scratch;
480 unsigned loop_depth;
481 unsigned loop_depth_max;
482 unsigned cond_depth;
483 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
484 unsigned cond_labels[NINE_MAX_COND_DEPTH];
485
486 unsigned *inst_labels; /* LABEL op */
487 unsigned num_inst_labels;
488
489 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
490
491 struct sm1_local_const *lconstf;
492 unsigned num_lconstf;
493 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
494 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
495
496 boolean indirect_const_access;
497
498 struct nine_shader_info *info;
499
500 int16_t op_info_map[D3DSIO_BREAKP + 1];
501 };
502
503 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
504 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
505
506 static void
507 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
508
509 static void
510 sm1_instruction_check(const struct sm1_instruction *insn)
511 {
512 if (insn->opcode == D3DSIO_CRS)
513 {
514 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
515 {
516 DBG("CRS.mask.w\n");
517 }
518 }
519 }
520
521 static boolean
522 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
523 {
524 INT i;
525 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
526 for (i = 0; i < tx->num_lconstf; ++i) {
527 if (tx->lconstf[i].idx == index) {
528 *src = tx->lconstf[i].reg;
529 return TRUE;
530 }
531 }
532 return FALSE;
533 }
534 static boolean
535 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
536 {
537 assert(index >= 0 && index < NINE_MAX_CONST_I);
538 if (tx->lconsti[index].idx == index)
539 *src = tx->lconsti[index].reg;
540 return tx->lconsti[index].idx == index;
541 }
542 static boolean
543 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
544 {
545 assert(index >= 0 && index < NINE_MAX_CONST_B);
546 if (tx->lconstb[index].idx == index)
547 *src = tx->lconstb[index].reg;
548 return tx->lconstb[index].idx == index;
549 }
550
551 static void
552 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
553 {
554 unsigned n;
555
556 /* Anno1404 sets out of range constants. */
557 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
558 if (index >= NINE_MAX_CONST_F)
559 WARN("lconstf index %i too high, indirect access won't work\n", index);
560
561 for (n = 0; n < tx->num_lconstf; ++n)
562 if (tx->lconstf[n].idx == index)
563 break;
564 if (n == tx->num_lconstf) {
565 if ((n % 8) == 0) {
566 tx->lconstf = REALLOC(tx->lconstf,
567 (n + 0) * sizeof(tx->lconstf[0]),
568 (n + 8) * sizeof(tx->lconstf[0]));
569 assert(tx->lconstf);
570 }
571 tx->num_lconstf++;
572 }
573 tx->lconstf[n].idx = index;
574 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
575
576 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
577 }
578 static void
579 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
580 {
581 assert(index >= 0 && index < NINE_MAX_CONST_I);
582 tx->lconsti[index].idx = index;
583 tx->lconsti[index].reg = tx->native_integers ?
584 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
585 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
586 }
587 static void
588 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
589 {
590 assert(index >= 0 && index < NINE_MAX_CONST_B);
591 tx->lconstb[index].idx = index;
592 tx->lconstb[index].reg = tx->native_integers ?
593 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
594 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
595 }
596
597 static INLINE struct ureg_dst
598 tx_scratch(struct shader_translator *tx)
599 {
600 assert(tx->num_scratch < Elements(tx->regs.t));
601 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
602 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
603 return tx->regs.t[tx->num_scratch++];
604 }
605
606 static INLINE struct ureg_dst
607 tx_scratch_scalar(struct shader_translator *tx)
608 {
609 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
610 }
611
612 static INLINE struct ureg_src
613 tx_src_scalar(struct ureg_dst dst)
614 {
615 struct ureg_src src = ureg_src(dst);
616 int c = ffs(dst.WriteMask) - 1;
617 if (dst.WriteMask == (1 << c))
618 src = ureg_scalar(src, c);
619 return src;
620 }
621
622 /* Need to declare all constants if indirect addressing is used,
623 * otherwise we could scan the shader to determine the maximum.
624 * TODO: It doesn't really matter for nv50 so I won't do the scan,
625 * but radeon drivers might care, if they don't infer it from TGSI.
626 */
627 static void
628 tx_decl_constants(struct shader_translator *tx)
629 {
630 unsigned i, n = 0;
631
632 for (i = 0; i < NINE_MAX_CONST_F; ++i)
633 ureg_DECL_constant(tx->ureg, n++);
634 for (i = 0; i < NINE_MAX_CONST_I; ++i)
635 ureg_DECL_constant(tx->ureg, n++);
636 for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
637 ureg_DECL_constant(tx->ureg, n++);
638 }
639
640 static INLINE void
641 tx_temp_alloc(struct shader_translator *tx, INT idx)
642 {
643 assert(idx >= 0);
644 if (idx >= tx->num_temp) {
645 unsigned k = tx->num_temp;
646 unsigned n = idx + 1;
647 tx->regs.r = REALLOC(tx->regs.r,
648 k * sizeof(tx->regs.r[0]),
649 n * sizeof(tx->regs.r[0]));
650 for (; k < n; ++k)
651 tx->regs.r[k] = ureg_dst_undef();
652 tx->num_temp = n;
653 }
654 if (ureg_dst_is_undef(tx->regs.r[idx]))
655 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
656 }
657
658 static INLINE void
659 tx_addr_alloc(struct shader_translator *tx, INT idx)
660 {
661 assert(idx == 0);
662 if (ureg_dst_is_undef(tx->regs.a))
663 tx->regs.a = ureg_DECL_address(tx->ureg);
664 }
665
666 static INLINE void
667 tx_pred_alloc(struct shader_translator *tx, INT idx)
668 {
669 assert(idx == 0);
670 if (ureg_dst_is_undef(tx->regs.p))
671 tx->regs.p = ureg_DECL_predicate(tx->ureg);
672 }
673
674 static INLINE void
675 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
676 {
677 assert(IS_PS);
678 assert(idx >= 0 && idx < Elements(tx->regs.vT));
679 if (ureg_src_is_undef(tx->regs.vT[idx]))
680 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
681 TGSI_INTERPOLATE_PERSPECTIVE);
682 }
683
684 static INLINE unsigned *
685 tx_bgnloop(struct shader_translator *tx)
686 {
687 tx->loop_depth++;
688 if (tx->loop_depth_max < tx->loop_depth)
689 tx->loop_depth_max = tx->loop_depth;
690 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
691 return &tx->loop_labels[tx->loop_depth - 1];
692 }
693
694 static INLINE unsigned *
695 tx_endloop(struct shader_translator *tx)
696 {
697 assert(tx->loop_depth);
698 tx->loop_depth--;
699 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
700 ureg_get_instruction_number(tx->ureg));
701 return &tx->loop_labels[tx->loop_depth];
702 }
703
704 static struct ureg_dst
705 tx_get_loopctr(struct shader_translator *tx)
706 {
707 const unsigned l = tx->loop_depth - 1;
708
709 if (!tx->loop_depth)
710 {
711 DBG("loop counter requested outside of loop\n");
712 return ureg_dst_undef();
713 }
714
715 if (ureg_dst_is_undef(tx->regs.aL[l]))
716 {
717 struct ureg_dst rreg = ureg_DECL_local_temporary(tx->ureg);
718 struct ureg_dst areg = ureg_DECL_address(tx->ureg);
719 unsigned c;
720
721 assert(l % 4 == 0);
722 for (c = l; c < (l + 4) && c < Elements(tx->regs.aL); ++c) {
723 tx->regs.rL[c] = ureg_writemask(rreg, 1 << (c & 3));
724 tx->regs.aL[c] = ureg_writemask(areg, 1 << (c & 3));
725 }
726 }
727 return tx->regs.rL[l];
728 }
729 static struct ureg_dst
730 tx_get_aL(struct shader_translator *tx)
731 {
732 if (!ureg_dst_is_undef(tx_get_loopctr(tx)))
733 return tx->regs.aL[tx->loop_depth - 1];
734 return ureg_dst_undef();
735 }
736
737 static INLINE unsigned *
738 tx_cond(struct shader_translator *tx)
739 {
740 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
741 tx->cond_depth++;
742 return &tx->cond_labels[tx->cond_depth - 1];
743 }
744
745 static INLINE unsigned *
746 tx_elsecond(struct shader_translator *tx)
747 {
748 assert(tx->cond_depth);
749 return &tx->cond_labels[tx->cond_depth - 1];
750 }
751
752 static INLINE void
753 tx_endcond(struct shader_translator *tx)
754 {
755 assert(tx->cond_depth);
756 tx->cond_depth--;
757 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
758 ureg_get_instruction_number(tx->ureg));
759 }
760
761 static INLINE struct ureg_dst
762 nine_ureg_dst_register(unsigned file, int index)
763 {
764 return ureg_dst(ureg_src_register(file, index));
765 }
766
767 static struct ureg_src
768 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
769 {
770 struct ureg_program *ureg = tx->ureg;
771 struct ureg_src src;
772 struct ureg_dst tmp;
773
774 switch (param->file)
775 {
776 case D3DSPR_TEMP:
777 assert(!param->rel);
778 tx_temp_alloc(tx, param->idx);
779 src = ureg_src(tx->regs.r[param->idx]);
780 break;
781 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
782 case D3DSPR_ADDR:
783 assert(!param->rel);
784 if (IS_VS) {
785 tx_addr_alloc(tx, param->idx);
786 src = ureg_src(tx->regs.a);
787 } else {
788 if (tx->version.major < 2 && tx->version.minor < 4) {
789 /* no subroutines, so should be defined */
790 src = ureg_src(tx->regs.tS[param->idx]);
791 } else {
792 tx_texcoord_alloc(tx, param->idx);
793 src = tx->regs.vT[param->idx];
794 }
795 }
796 break;
797 case D3DSPR_INPUT:
798 if (IS_VS) {
799 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
800 } else {
801 if (tx->version.major < 3) {
802 assert(!param->rel);
803 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
804 param->idx,
805 TGSI_INTERPOLATE_PERSPECTIVE);
806 } else {
807 assert(!param->rel); /* TODO */
808 assert(param->idx < Elements(tx->regs.v));
809 src = tx->regs.v[param->idx];
810 }
811 }
812 break;
813 case D3DSPR_PREDICATE:
814 assert(!param->rel);
815 tx_pred_alloc(tx, param->idx);
816 src = ureg_src(tx->regs.p);
817 break;
818 case D3DSPR_SAMPLER:
819 assert(param->mod == NINED3DSPSM_NONE);
820 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
821 assert(!param->rel);
822 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
823 break;
824 case D3DSPR_CONST:
825 if (param->rel)
826 tx->indirect_const_access = TRUE;
827 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
828 if (!param->rel)
829 nine_info_mark_const_f_used(tx->info, param->idx);
830 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
831 }
832 break;
833 case D3DSPR_CONST2:
834 case D3DSPR_CONST3:
835 case D3DSPR_CONST4:
836 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
837 assert(!"CONST2/3/4");
838 src = ureg_imm1f(ureg, 0.0f);
839 break;
840 case D3DSPR_CONSTINT:
841 if (param->rel || !tx_lconsti(tx, &src, param->idx)) {
842 if (!param->rel)
843 nine_info_mark_const_i_used(tx->info, param->idx);
844 src = ureg_src_register(TGSI_FILE_CONSTANT,
845 tx->info->const_i_base + param->idx);
846 }
847 break;
848 case D3DSPR_CONSTBOOL:
849 if (param->rel || !tx_lconstb(tx, &src, param->idx)) {
850 char r = param->idx / 4;
851 char s = param->idx & 3;
852 if (!param->rel)
853 nine_info_mark_const_b_used(tx->info, param->idx);
854 src = ureg_src_register(TGSI_FILE_CONSTANT,
855 tx->info->const_b_base + r);
856 src = ureg_swizzle(src, s, s, s, s);
857 }
858 break;
859 case D3DSPR_LOOP:
860 src = tx_src_scalar(tx_get_aL(tx));
861 break;
862 case D3DSPR_MISCTYPE:
863 switch (param->idx) {
864 case D3DSMO_POSITION:
865 if (ureg_src_is_undef(tx->regs.vPos))
866 tx->regs.vPos = ureg_DECL_fs_input(ureg,
867 TGSI_SEMANTIC_POSITION, 0,
868 TGSI_INTERPOLATE_LINEAR);
869 if (tx->shift_wpos) {
870 /* TODO: do this only once */
871 struct ureg_dst wpos = tx_scratch(tx);
872 ureg_SUB(ureg, wpos, tx->regs.vPos,
873 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
874 src = ureg_src(wpos);
875 } else {
876 src = tx->regs.vPos;
877 }
878 break;
879 case D3DSMO_FACE:
880 if (ureg_src_is_undef(tx->regs.vFace)) {
881 tx->regs.vFace = ureg_DECL_fs_input(ureg,
882 TGSI_SEMANTIC_FACE, 0,
883 TGSI_INTERPOLATE_CONSTANT);
884 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
885 }
886 src = tx->regs.vFace;
887 break;
888 default:
889 assert(!"invalid src D3DSMO");
890 break;
891 }
892 assert(!param->rel);
893 break;
894 case D3DSPR_TEMPFLOAT16:
895 break;
896 default:
897 assert(!"invalid src D3DSPR");
898 }
899 if (param->rel)
900 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
901
902 if (param->swizzle != NINED3DSP_NOSWIZZLE)
903 src = ureg_swizzle(src,
904 (param->swizzle >> 0) & 0x3,
905 (param->swizzle >> 2) & 0x3,
906 (param->swizzle >> 4) & 0x3,
907 (param->swizzle >> 6) & 0x3);
908
909 switch (param->mod) {
910 case NINED3DSPSM_ABS:
911 src = ureg_abs(src);
912 break;
913 case NINED3DSPSM_ABSNEG:
914 src = ureg_negate(ureg_abs(src));
915 break;
916 case NINED3DSPSM_NEG:
917 src = ureg_negate(src);
918 break;
919 case NINED3DSPSM_BIAS:
920 tmp = tx_scratch(tx);
921 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
922 src = ureg_src(tmp);
923 break;
924 case NINED3DSPSM_BIASNEG:
925 tmp = tx_scratch(tx);
926 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
927 src = ureg_src(tmp);
928 break;
929 case NINED3DSPSM_NOT:
930 if (tx->native_integers) {
931 tmp = tx_scratch(tx);
932 ureg_NOT(ureg, tmp, src);
933 src = ureg_src(tmp);
934 break;
935 }
936 /* fall through */
937 case NINED3DSPSM_COMP:
938 tmp = tx_scratch(tx);
939 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
940 src = ureg_src(tmp);
941 break;
942 case NINED3DSPSM_DZ:
943 case NINED3DSPSM_DW:
944 /* handled in instruction */
945 break;
946 case NINED3DSPSM_SIGN:
947 tmp = tx_scratch(tx);
948 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
949 src = ureg_src(tmp);
950 break;
951 case NINED3DSPSM_SIGNNEG:
952 tmp = tx_scratch(tx);
953 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
954 src = ureg_src(tmp);
955 break;
956 case NINED3DSPSM_X2:
957 tmp = tx_scratch(tx);
958 ureg_ADD(ureg, tmp, src, src);
959 src = ureg_src(tmp);
960 break;
961 case NINED3DSPSM_X2NEG:
962 tmp = tx_scratch(tx);
963 ureg_ADD(ureg, tmp, src, src);
964 src = ureg_negate(ureg_src(tmp));
965 break;
966 default:
967 assert(param->mod == NINED3DSPSM_NONE);
968 break;
969 }
970
971 return src;
972 }
973
974 static struct ureg_dst
975 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
976 {
977 struct ureg_dst dst;
978
979 switch (param->file)
980 {
981 case D3DSPR_TEMP:
982 assert(!param->rel);
983 tx_temp_alloc(tx, param->idx);
984 dst = tx->regs.r[param->idx];
985 break;
986 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
987 case D3DSPR_ADDR:
988 assert(!param->rel);
989 if (tx->version.major < 2 && !IS_VS) {
990 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
991 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
992 dst = tx->regs.tS[param->idx];
993 } else
994 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
995 tx_texcoord_alloc(tx, param->idx);
996 dst = ureg_dst(tx->regs.vT[param->idx]);
997 } else {
998 tx_addr_alloc(tx, param->idx);
999 dst = tx->regs.a;
1000 }
1001 break;
1002 case D3DSPR_RASTOUT:
1003 assert(!param->rel);
1004 switch (param->idx) {
1005 case 0:
1006 if (ureg_dst_is_undef(tx->regs.oPos))
1007 tx->regs.oPos =
1008 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1009 dst = tx->regs.oPos;
1010 break;
1011 case 1:
1012 if (ureg_dst_is_undef(tx->regs.oFog))
1013 tx->regs.oFog =
1014 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1015 dst = tx->regs.oFog;
1016 break;
1017 case 2:
1018 if (ureg_dst_is_undef(tx->regs.oPts))
1019 tx->regs.oPts =
1020 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1021 dst = tx->regs.oPts;
1022 break;
1023 default:
1024 assert(0);
1025 break;
1026 }
1027 break;
1028 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1029 case D3DSPR_OUTPUT:
1030 if (tx->version.major < 3) {
1031 assert(!param->rel);
1032 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1033 } else {
1034 assert(!param->rel); /* TODO */
1035 assert(param->idx < Elements(tx->regs.o));
1036 dst = tx->regs.o[param->idx];
1037 }
1038 break;
1039 case D3DSPR_ATTROUT: /* VS */
1040 case D3DSPR_COLOROUT: /* PS */
1041 assert(param->idx >= 0 && param->idx < 4);
1042 assert(!param->rel);
1043 tx->info->rt_mask |= 1 << param->idx;
1044 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1045 tx->regs.oCol[param->idx] =
1046 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1047 dst = tx->regs.oCol[param->idx];
1048 if (IS_VS && tx->version.major < 3)
1049 dst = ureg_saturate(dst);
1050 break;
1051 case D3DSPR_DEPTHOUT:
1052 assert(!param->rel);
1053 if (ureg_dst_is_undef(tx->regs.oDepth))
1054 tx->regs.oDepth =
1055 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1056 TGSI_WRITEMASK_Z);
1057 dst = tx->regs.oDepth; /* XXX: must write .z component */
1058 break;
1059 case D3DSPR_PREDICATE:
1060 assert(!param->rel);
1061 tx_pred_alloc(tx, param->idx);
1062 dst = tx->regs.p;
1063 break;
1064 case D3DSPR_TEMPFLOAT16:
1065 DBG("unhandled D3DSPR: %u\n", param->file);
1066 break;
1067 default:
1068 assert(!"invalid dst D3DSPR");
1069 break;
1070 }
1071 if (param->rel)
1072 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1073
1074 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1075 dst = ureg_writemask(dst, param->mask);
1076 if (param->mod & NINED3DSPDM_SATURATE)
1077 dst = ureg_saturate(dst);
1078
1079 return dst;
1080 }
1081
1082 static struct ureg_dst
1083 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1084 {
1085 if (param->shift) {
1086 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1087 return tx->regs.tdst;
1088 }
1089 return _tx_dst_param(tx, param);
1090 }
1091
1092 static void
1093 tx_apply_dst0_modifiers(struct shader_translator *tx)
1094 {
1095 struct ureg_dst rdst;
1096 float f;
1097
1098 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1099 return;
1100 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1101
1102 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1103
1104 if (tx->insn.dst[0].shift < 0)
1105 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1106 else
1107 f = 1 << tx->insn.dst[0].shift;
1108
1109 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1110 }
1111
1112 static struct ureg_src
1113 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1114 {
1115 struct ureg_src src;
1116
1117 assert(!param->shift);
1118 assert(!(param->mod & NINED3DSPDM_SATURATE));
1119
1120 switch (param->file) {
1121 case D3DSPR_INPUT:
1122 if (IS_VS) {
1123 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1124 } else {
1125 assert(!param->rel);
1126 assert(param->idx < Elements(tx->regs.v));
1127 src = tx->regs.v[param->idx];
1128 }
1129 break;
1130 default:
1131 src = ureg_src(tx_dst_param(tx, param));
1132 break;
1133 }
1134 if (param->rel)
1135 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1136
1137 if (!param->mask)
1138 WARN("mask is 0, using identity swizzle\n");
1139
1140 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1141 char s[4];
1142 int n;
1143 int c;
1144 for (n = 0, c = 0; c < 4; ++c)
1145 if (param->mask & (1 << c))
1146 s[n++] = c;
1147 assert(n);
1148 for (c = n; c < 4; ++c)
1149 s[c] = s[n - 1];
1150 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1151 }
1152 return src;
1153 }
1154
1155 static HRESULT
1156 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1157 {
1158 struct ureg_program *ureg = tx->ureg;
1159 struct ureg_dst dst;
1160 struct ureg_src src[2];
1161 struct sm1_src_param *src_mat = &tx->insn.src[1];
1162 unsigned i;
1163
1164 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1165 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1166
1167 for (i = 0; i < n; i++)
1168 {
1169 const unsigned m = (1 << i);
1170
1171 src[1] = tx_src_param(tx, src_mat);
1172 src_mat->idx++;
1173
1174 if (!(dst.WriteMask & m))
1175 continue;
1176
1177 /* XXX: src == dst case ? */
1178
1179 switch (k) {
1180 case 3:
1181 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1182 break;
1183 case 4:
1184 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1185 break;
1186 default:
1187 DBG("invalid operation: M%ux%u\n", m, n);
1188 break;
1189 }
1190 }
1191
1192 return D3D_OK;
1193 }
1194
1195 #define VNOTSUPPORTED 0, 0
1196 #define V(maj, min) (((maj) << 8) | (min))
1197
1198 static INLINE const char *
1199 d3dsio_to_string( unsigned opcode )
1200 {
1201 static const char *names[] = {
1202 "NOP",
1203 "MOV",
1204 "ADD",
1205 "SUB",
1206 "MAD",
1207 "MUL",
1208 "RCP",
1209 "RSQ",
1210 "DP3",
1211 "DP4",
1212 "MIN",
1213 "MAX",
1214 "SLT",
1215 "SGE",
1216 "EXP",
1217 "LOG",
1218 "LIT",
1219 "DST",
1220 "LRP",
1221 "FRC",
1222 "M4x4",
1223 "M4x3",
1224 "M3x4",
1225 "M3x3",
1226 "M3x2",
1227 "CALL",
1228 "CALLNZ",
1229 "LOOP",
1230 "RET",
1231 "ENDLOOP",
1232 "LABEL",
1233 "DCL",
1234 "POW",
1235 "CRS",
1236 "SGN",
1237 "ABS",
1238 "NRM",
1239 "SINCOS",
1240 "REP",
1241 "ENDREP",
1242 "IF",
1243 "IFC",
1244 "ELSE",
1245 "ENDIF",
1246 "BREAK",
1247 "BREAKC",
1248 "MOVA",
1249 "DEFB",
1250 "DEFI",
1251 NULL,
1252 NULL,
1253 NULL,
1254 NULL,
1255 NULL,
1256 NULL,
1257 NULL,
1258 NULL,
1259 NULL,
1260 NULL,
1261 NULL,
1262 NULL,
1263 NULL,
1264 NULL,
1265 NULL,
1266 "TEXCOORD",
1267 "TEXKILL",
1268 "TEX",
1269 "TEXBEM",
1270 "TEXBEML",
1271 "TEXREG2AR",
1272 "TEXREG2GB",
1273 "TEXM3x2PAD",
1274 "TEXM3x2TEX",
1275 "TEXM3x3PAD",
1276 "TEXM3x3TEX",
1277 NULL,
1278 "TEXM3x3SPEC",
1279 "TEXM3x3VSPEC",
1280 "EXPP",
1281 "LOGP",
1282 "CND",
1283 "DEF",
1284 "TEXREG2RGB",
1285 "TEXDP3TEX",
1286 "TEXM3x2DEPTH",
1287 "TEXDP3",
1288 "TEXM3x3",
1289 "TEXDEPTH",
1290 "CMP",
1291 "BEM",
1292 "DP2ADD",
1293 "DSX",
1294 "DSY",
1295 "TEXLDD",
1296 "SETP",
1297 "TEXLDL",
1298 "BREAKP"
1299 };
1300
1301 if (opcode < Elements(names)) return names[opcode];
1302
1303 switch (opcode) {
1304 case D3DSIO_PHASE: return "PHASE";
1305 case D3DSIO_COMMENT: return "COMMENT";
1306 case D3DSIO_END: return "END";
1307 default:
1308 return NULL;
1309 }
1310 }
1311
1312 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1313 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1314 (inst).vert_version.max | \
1315 (inst).frag_version.min | \
1316 (inst).frag_version.max)
1317
1318 #define SPECIAL(name) \
1319 NineTranslateInstruction_##name
1320
1321 #define DECL_SPECIAL(name) \
1322 static HRESULT \
1323 NineTranslateInstruction_##name( struct shader_translator *tx )
1324
1325 static HRESULT
1326 NineTranslateInstruction_Generic(struct shader_translator *);
1327
1328 DECL_SPECIAL(M4x4)
1329 {
1330 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1331 }
1332
1333 DECL_SPECIAL(M4x3)
1334 {
1335 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1336 }
1337
1338 DECL_SPECIAL(M3x4)
1339 {
1340 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1341 }
1342
1343 DECL_SPECIAL(M3x3)
1344 {
1345 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1346 }
1347
1348 DECL_SPECIAL(M3x2)
1349 {
1350 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1351 }
1352
1353 DECL_SPECIAL(CMP)
1354 {
1355 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1356 tx_src_param(tx, &tx->insn.src[0]),
1357 tx_src_param(tx, &tx->insn.src[2]),
1358 tx_src_param(tx, &tx->insn.src[1]));
1359 return D3D_OK;
1360 }
1361
1362 DECL_SPECIAL(CND)
1363 {
1364 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1365 struct ureg_dst cgt;
1366 struct ureg_src cnd;
1367
1368 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4) {
1369 ureg_MOV(tx->ureg,
1370 dst, tx_src_param(tx, &tx->insn.src[1]));
1371 return D3D_OK;
1372 }
1373
1374 cnd = tx_src_param(tx, &tx->insn.src[0]);
1375 cgt = tx_scratch(tx);
1376
1377 if (tx->version.major == 1 && tx->version.minor < 4) {
1378 cgt.WriteMask = TGSI_WRITEMASK_W;
1379 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1380 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1381 } else {
1382 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1383 }
1384 ureg_CMP(tx->ureg, dst,
1385 tx_src_param(tx, &tx->insn.src[1]),
1386 tx_src_param(tx, &tx->insn.src[2]), ureg_negate(cnd));
1387 return D3D_OK;
1388 }
1389
1390 DECL_SPECIAL(CALL)
1391 {
1392 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1393 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1394 return D3D_OK;
1395 }
1396
1397 DECL_SPECIAL(CALLNZ)
1398 {
1399 struct ureg_program *ureg = tx->ureg;
1400 struct ureg_dst tmp = tx_scratch_scalar(tx);
1401 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1402
1403 /* NOTE: source should be const bool, so we can use NOT/SUB instead of [U]SNE 0 */
1404 if (!tx->insn.flags) {
1405 if (tx->native_integers)
1406 ureg_NOT(ureg, tmp, src);
1407 else
1408 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1409 }
1410 ureg_IF(ureg, tx->insn.flags ? src : tx_src_scalar(tmp), tx_cond(tx));
1411 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1412 tx_endcond(tx);
1413 ureg_ENDIF(ureg);
1414 return D3D_OK;
1415 }
1416
1417 DECL_SPECIAL(MOV_vs1x)
1418 {
1419 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1420 ureg_ARL(tx->ureg,
1421 tx_dst_param(tx, &tx->insn.dst[0]),
1422 tx_src_param(tx, &tx->insn.src[0]));
1423 return D3D_OK;
1424 }
1425 return NineTranslateInstruction_Generic(tx);
1426 }
1427
1428 DECL_SPECIAL(LOOP)
1429 {
1430 struct ureg_program *ureg = tx->ureg;
1431 unsigned *label;
1432 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1433 struct ureg_src iter = ureg_scalar(src, TGSI_SWIZZLE_X);
1434 struct ureg_src init = ureg_scalar(src, TGSI_SWIZZLE_Y);
1435 struct ureg_src step = ureg_scalar(src, TGSI_SWIZZLE_Z);
1436 struct ureg_dst ctr;
1437 struct ureg_dst tmp = tx_scratch_scalar(tx);
1438
1439 label = tx_bgnloop(tx);
1440 ctr = tx_get_loopctr(tx);
1441
1442 ureg_MOV(tx->ureg, ctr, init);
1443 ureg_BGNLOOP(tx->ureg, label);
1444 if (tx->native_integers) {
1445 /* we'll let the backend pull up that MAD ... */
1446 ureg_UMAD(ureg, tmp, iter, step, init);
1447 ureg_USEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1448 #ifdef NINE_TGSI_LAZY_DEVS
1449 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1450 #endif
1451 } else {
1452 /* can't simply use SGE for precision because step might be negative */
1453 ureg_MAD(ureg, tmp, iter, step, init);
1454 ureg_SEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1455 #ifdef NINE_TGSI_LAZY_DEVS
1456 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1457 #endif
1458 }
1459 #ifdef NINE_TGSI_LAZY_DEVS
1460 ureg_BRK(ureg);
1461 tx_endcond(tx);
1462 ureg_ENDIF(ureg);
1463 #else
1464 ureg_BREAKC(ureg, tx_src_scalar(tmp));
1465 #endif
1466 if (tx->native_integers) {
1467 ureg_UARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1468 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), step);
1469 } else {
1470 ureg_ARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1471 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), step);
1472 }
1473 return D3D_OK;
1474 }
1475
1476 DECL_SPECIAL(RET)
1477 {
1478 ureg_RET(tx->ureg);
1479 return D3D_OK;
1480 }
1481
1482 DECL_SPECIAL(ENDLOOP)
1483 {
1484 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1485 return D3D_OK;
1486 }
1487
1488 DECL_SPECIAL(LABEL)
1489 {
1490 unsigned k = tx->num_inst_labels;
1491 unsigned n = tx->insn.src[0].idx;
1492 assert(n < 2048);
1493 if (n >= k)
1494 tx->inst_labels = REALLOC(tx->inst_labels,
1495 k * sizeof(tx->inst_labels[0]),
1496 n * sizeof(tx->inst_labels[0]));
1497
1498 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1499 return D3D_OK;
1500 }
1501
1502 DECL_SPECIAL(SINCOS)
1503 {
1504 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1505 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1506
1507 assert(!(dst.WriteMask & 0xc));
1508
1509 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1510 ureg_SCS(tx->ureg, dst, src);
1511 return D3D_OK;
1512 }
1513
1514 DECL_SPECIAL(SGN)
1515 {
1516 ureg_SSG(tx->ureg,
1517 tx_dst_param(tx, &tx->insn.dst[0]),
1518 tx_src_param(tx, &tx->insn.src[0]));
1519 return D3D_OK;
1520 }
1521
1522 DECL_SPECIAL(REP)
1523 {
1524 struct ureg_program *ureg = tx->ureg;
1525 unsigned *label;
1526 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1527 struct ureg_dst ctr;
1528 struct ureg_dst tmp = tx_scratch_scalar(tx);
1529 struct ureg_src imm =
1530 tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f);
1531
1532 label = tx_bgnloop(tx);
1533 ctr = tx_get_loopctr(tx);
1534
1535 /* NOTE: rep must be constant, so we don't have to save the count */
1536 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1537
1538 ureg_MOV(ureg, ctr, imm);
1539 ureg_BGNLOOP(ureg, label);
1540 if (tx->native_integers)
1541 {
1542 ureg_USGE(ureg, tmp, tx_src_scalar(ctr), rep);
1543 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1544 }
1545 else
1546 {
1547 ureg_SGE(ureg, tmp, tx_src_scalar(ctr), rep);
1548 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1549 }
1550 ureg_BRK(ureg);
1551 tx_endcond(tx);
1552 ureg_ENDIF(ureg);
1553
1554 if (tx->native_integers) {
1555 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1u(ureg, 1));
1556 } else {
1557 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1f(ureg, 1.0f));
1558 }
1559
1560 return D3D_OK;
1561 }
1562
1563 DECL_SPECIAL(ENDREP)
1564 {
1565 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1566 return D3D_OK;
1567 }
1568
1569 DECL_SPECIAL(ENDIF)
1570 {
1571 tx_endcond(tx);
1572 ureg_ENDIF(tx->ureg);
1573 return D3D_OK;
1574 }
1575
1576 DECL_SPECIAL(IF)
1577 {
1578 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1579
1580 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1581 ureg_UIF(tx->ureg, src, tx_cond(tx));
1582 else
1583 ureg_IF(tx->ureg, src, tx_cond(tx));
1584
1585 return D3D_OK;
1586 }
1587
1588 static INLINE unsigned
1589 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1590 {
1591 switch (flags) {
1592 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1593 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1594 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1595 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1596 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1597 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1598 default:
1599 assert(!"invalid comparison flags");
1600 return TGSI_OPCODE_SGT;
1601 }
1602 }
1603
1604 DECL_SPECIAL(IFC)
1605 {
1606 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1607 struct ureg_src src[2];
1608 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1609 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1610 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1611 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1612 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1613 return D3D_OK;
1614 }
1615
1616 DECL_SPECIAL(ELSE)
1617 {
1618 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1619 return D3D_OK;
1620 }
1621
1622 DECL_SPECIAL(BREAKC)
1623 {
1624 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1625 struct ureg_src src[2];
1626 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1627 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1628 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1629 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1630 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1631 ureg_BRK(tx->ureg);
1632 tx_endcond(tx);
1633 ureg_ENDIF(tx->ureg);
1634 return D3D_OK;
1635 }
1636
1637 static const char *sm1_declusage_names[] =
1638 {
1639 [D3DDECLUSAGE_POSITION] = "POSITION",
1640 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1641 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1642 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1643 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1644 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1645 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1646 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1647 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1648 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1649 [D3DDECLUSAGE_COLOR] = "COLOR",
1650 [D3DDECLUSAGE_FOG] = "FOG",
1651 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1652 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1653 };
1654
1655 static INLINE unsigned
1656 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1657 {
1658 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1659 }
1660
1661 static void
1662 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1663 boolean tc,
1664 struct sm1_semantic *dcl)
1665 {
1666 BYTE index = dcl->usage_idx;
1667
1668 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1669 * we match to a TGSI_SEMANTIC_GENERIC with index.
1670 *
1671 * The index can be anything UINT16 and usage_idx is BYTE,
1672 * so we can fit everything. It doesn't matter if indices
1673 * are close together or low.
1674 *
1675 *
1676 * POSITION >= 1: 10 * index + 6
1677 * COLOR >= 2: 10 * (index-1) + 7
1678 * TEXCOORD[0..15]: index
1679 * BLENDWEIGHT: 10 * index + 18
1680 * BLENDINDICES: 10 * index + 19
1681 * NORMAL: 10 * index + 20
1682 * TANGENT: 10 * index + 21
1683 * BINORMAL: 10 * index + 22
1684 * TESSFACTOR: 10 * index + 23
1685 */
1686
1687 switch (dcl->usage) {
1688 case D3DDECLUSAGE_POSITION:
1689 case D3DDECLUSAGE_POSITIONT:
1690 case D3DDECLUSAGE_DEPTH:
1691 if (index == 0) {
1692 sem->Name = TGSI_SEMANTIC_POSITION;
1693 sem->Index = 0;
1694 } else {
1695 sem->Name = TGSI_SEMANTIC_GENERIC;
1696 sem->Index = 10 * index + 6;
1697 }
1698 break;
1699 case D3DDECLUSAGE_COLOR:
1700 if (index < 2) {
1701 sem->Name = TGSI_SEMANTIC_COLOR;
1702 sem->Index = index;
1703 } else {
1704 sem->Name = TGSI_SEMANTIC_GENERIC;
1705 sem->Index = 10 * (index-1) + 7;
1706 }
1707 break;
1708 case D3DDECLUSAGE_FOG:
1709 assert(index == 0);
1710 sem->Name = TGSI_SEMANTIC_FOG;
1711 sem->Index = 0;
1712 break;
1713 case D3DDECLUSAGE_PSIZE:
1714 assert(index == 0);
1715 sem->Name = TGSI_SEMANTIC_PSIZE;
1716 sem->Index = 0;
1717 break;
1718 case D3DDECLUSAGE_TEXCOORD:
1719 assert(index < 16);
1720 if (index < 8 && tc)
1721 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1722 else
1723 sem->Name = TGSI_SEMANTIC_GENERIC;
1724 sem->Index = index;
1725 break;
1726 case D3DDECLUSAGE_BLENDWEIGHT:
1727 sem->Name = TGSI_SEMANTIC_GENERIC;
1728 sem->Index = 10 * index + 18;
1729 break;
1730 case D3DDECLUSAGE_BLENDINDICES:
1731 sem->Name = TGSI_SEMANTIC_GENERIC;
1732 sem->Index = 10 * index + 19;
1733 break;
1734 case D3DDECLUSAGE_NORMAL:
1735 sem->Name = TGSI_SEMANTIC_GENERIC;
1736 sem->Index = 10 * index + 20;
1737 break;
1738 case D3DDECLUSAGE_TANGENT:
1739 sem->Name = TGSI_SEMANTIC_GENERIC;
1740 sem->Index = 10 * index + 21;
1741 break;
1742 case D3DDECLUSAGE_BINORMAL:
1743 sem->Name = TGSI_SEMANTIC_GENERIC;
1744 sem->Index = 10 * index + 22;
1745 break;
1746 case D3DDECLUSAGE_TESSFACTOR:
1747 sem->Name = TGSI_SEMANTIC_GENERIC;
1748 sem->Index = 10 * index + 23;
1749 break;
1750 case D3DDECLUSAGE_SAMPLE:
1751 sem->Name = TGSI_SEMANTIC_COUNT;
1752 sem->Index = 0;
1753 break;
1754 default:
1755 assert(!"Invalid DECLUSAGE.");
1756 break;
1757 }
1758 }
1759
1760 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1761 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1762 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1763 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1764 static INLINE unsigned
1765 d3dstt_to_tgsi_tex(BYTE sampler_type)
1766 {
1767 switch (sampler_type) {
1768 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1769 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1770 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1771 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1772 default:
1773 assert(0);
1774 return TGSI_TEXTURE_UNKNOWN;
1775 }
1776 }
1777 static INLINE unsigned
1778 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1779 {
1780 switch (sampler_type) {
1781 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1782 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1783 case NINED3DSTT_VOLUME:
1784 case NINED3DSTT_CUBE:
1785 default:
1786 assert(0);
1787 return TGSI_TEXTURE_UNKNOWN;
1788 }
1789 }
1790 static INLINE unsigned
1791 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1792 {
1793 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1794 case 1: return TGSI_TEXTURE_1D;
1795 case 0: return TGSI_TEXTURE_2D;
1796 case 3: return TGSI_TEXTURE_3D;
1797 default:
1798 return TGSI_TEXTURE_CUBE;
1799 }
1800 }
1801
1802 static const char *
1803 sm1_sampler_type_name(BYTE sampler_type)
1804 {
1805 switch (sampler_type) {
1806 case NINED3DSTT_1D: return "1D";
1807 case NINED3DSTT_2D: return "2D";
1808 case NINED3DSTT_VOLUME: return "VOLUME";
1809 case NINED3DSTT_CUBE: return "CUBE";
1810 default:
1811 return "(D3DSTT_?)";
1812 }
1813 }
1814
1815 static INLINE unsigned
1816 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1817 {
1818 switch (sem->Name) {
1819 case TGSI_SEMANTIC_POSITION:
1820 case TGSI_SEMANTIC_NORMAL:
1821 return TGSI_INTERPOLATE_LINEAR;
1822 case TGSI_SEMANTIC_BCOLOR:
1823 case TGSI_SEMANTIC_COLOR:
1824 case TGSI_SEMANTIC_FOG:
1825 case TGSI_SEMANTIC_GENERIC:
1826 case TGSI_SEMANTIC_TEXCOORD:
1827 case TGSI_SEMANTIC_CLIPDIST:
1828 case TGSI_SEMANTIC_CLIPVERTEX:
1829 return TGSI_INTERPOLATE_PERSPECTIVE;
1830 case TGSI_SEMANTIC_EDGEFLAG:
1831 case TGSI_SEMANTIC_FACE:
1832 case TGSI_SEMANTIC_INSTANCEID:
1833 case TGSI_SEMANTIC_PCOORD:
1834 case TGSI_SEMANTIC_PRIMID:
1835 case TGSI_SEMANTIC_PSIZE:
1836 case TGSI_SEMANTIC_VERTEXID:
1837 return TGSI_INTERPOLATE_CONSTANT;
1838 default:
1839 assert(0);
1840 return TGSI_INTERPOLATE_CONSTANT;
1841 }
1842 }
1843
1844 DECL_SPECIAL(DCL)
1845 {
1846 struct ureg_program *ureg = tx->ureg;
1847 boolean is_input;
1848 boolean is_sampler;
1849 struct tgsi_declaration_semantic tgsi;
1850 struct sm1_semantic sem;
1851 sm1_read_semantic(tx, &sem);
1852
1853 is_input = sem.reg.file == D3DSPR_INPUT;
1854 is_sampler =
1855 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1856
1857 DUMP("DCL ");
1858 sm1_dump_dst_param(&sem.reg);
1859 if (is_sampler)
1860 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1861 else
1862 if (tx->version.major >= 3)
1863 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1864 else
1865 if (sem.usage | sem.usage_idx)
1866 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1867 else
1868 DUMP("\n");
1869
1870 if (is_sampler) {
1871 const unsigned m = 1 << sem.reg.idx;
1872 ureg_DECL_sampler(ureg, sem.reg.idx);
1873 tx->info->sampler_mask |= m;
1874 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1875 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1876 d3dstt_to_tgsi_tex(sem.sampler_type);
1877 return D3D_OK;
1878 }
1879
1880 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1881 if (IS_VS) {
1882 if (is_input) {
1883 /* linkage outside of shader with vertex declaration */
1884 ureg_DECL_vs_input(ureg, sem.reg.idx);
1885 assert(sem.reg.idx < Elements(tx->info->input_map));
1886 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1887 tx->info->num_inputs = sem.reg.idx + 1;
1888 /* NOTE: preserving order in case of indirect access */
1889 } else
1890 if (tx->version.major >= 3) {
1891 /* SM2 output semantic determined by file */
1892 assert(sem.reg.mask != 0);
1893 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1894 tx->info->position_t = TRUE;
1895 assert(sem.reg.idx < Elements(tx->regs.o));
1896 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1897 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1898
1899 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1900 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1901 }
1902 } else {
1903 if (is_input && tx->version.major >= 3) {
1904 /* SM3 only, SM2 input semantic determined by file */
1905 assert(sem.reg.idx < Elements(tx->regs.v));
1906 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1907 ureg, tgsi.Name, tgsi.Index,
1908 nine_tgsi_to_interp_mode(&tgsi),
1909 0, /* cylwrap */
1910 sem.reg.mod & NINED3DSPDM_CENTROID);
1911 } else
1912 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1913 /* FragColor or FragDepth */
1914 assert(sem.reg.mask != 0);
1915 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1916 }
1917 }
1918 return D3D_OK;
1919 }
1920
1921 DECL_SPECIAL(DEF)
1922 {
1923 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1924 return D3D_OK;
1925 }
1926
1927 DECL_SPECIAL(DEFB)
1928 {
1929 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
1930 return D3D_OK;
1931 }
1932
1933 DECL_SPECIAL(DEFI)
1934 {
1935 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
1936 return D3D_OK;
1937 }
1938
1939 DECL_SPECIAL(POW)
1940 {
1941 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1942 struct ureg_src src[2] = {
1943 tx_src_param(tx, &tx->insn.src[0]),
1944 tx_src_param(tx, &tx->insn.src[1])
1945 };
1946 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
1947 return D3D_OK;
1948 }
1949
1950 DECL_SPECIAL(NRM)
1951 {
1952 struct ureg_program *ureg = tx->ureg;
1953 struct ureg_dst tmp = tx_scratch_scalar(tx);
1954 struct ureg_src nrm = tx_src_scalar(tmp);
1955 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1956 ureg_DP3(ureg, tmp, src, src);
1957 ureg_RSQ(ureg, tmp, nrm);
1958 ureg_MUL(ureg, tx_dst_param(tx, &tx->insn.dst[0]), src, nrm);
1959 return D3D_OK;
1960 }
1961
1962 DECL_SPECIAL(DP2ADD)
1963 {
1964 struct ureg_dst tmp = tx_scratch_scalar(tx);
1965 struct ureg_src dp2 = tx_src_scalar(tmp);
1966 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1967 struct ureg_src src[3];
1968 int i;
1969 for (i = 0; i < 3; ++i)
1970 src[i] = tx_src_param(tx, &tx->insn.src[i]);
1971 assert_replicate_swizzle(&src[2]);
1972
1973 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
1974 ureg_ADD(tx->ureg, dst, src[2], dp2);
1975
1976 return D3D_OK;
1977 }
1978
1979 DECL_SPECIAL(TEXCOORD)
1980 {
1981 struct ureg_program *ureg = tx->ureg;
1982 const unsigned s = tx->insn.dst[0].idx;
1983 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1984
1985 if (ureg_src_is_undef(tx->regs.vT[s]))
1986 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1987 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
1988
1989 return D3D_OK;
1990 }
1991
1992 DECL_SPECIAL(TEXCOORD_ps14)
1993 {
1994 struct ureg_program *ureg = tx->ureg;
1995 const unsigned s = tx->insn.src[0].idx;
1996 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1997
1998 if (ureg_src_is_undef(tx->regs.vT[s]))
1999 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2000 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
2001
2002 return D3D_OK;
2003 }
2004
2005 DECL_SPECIAL(TEXKILL)
2006 {
2007 struct ureg_src reg;
2008
2009 if (tx->version.major > 1 || tx->version.minor > 3) {
2010 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2011 } else {
2012 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2013 reg = tx->regs.vT[tx->insn.dst[0].idx];
2014 }
2015 if (tx->version.major < 2)
2016 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2017 ureg_KILL_IF(tx->ureg, reg);
2018
2019 return D3D_OK;
2020 }
2021
2022 DECL_SPECIAL(TEXBEM)
2023 {
2024 STUB(D3DERR_INVALIDCALL);
2025 }
2026
2027 DECL_SPECIAL(TEXBEML)
2028 {
2029 STUB(D3DERR_INVALIDCALL);
2030 }
2031
2032 DECL_SPECIAL(TEXREG2AR)
2033 {
2034 STUB(D3DERR_INVALIDCALL);
2035 }
2036
2037 DECL_SPECIAL(TEXREG2GB)
2038 {
2039 STUB(D3DERR_INVALIDCALL);
2040 }
2041
2042 DECL_SPECIAL(TEXM3x2PAD)
2043 {
2044 STUB(D3DERR_INVALIDCALL);
2045 }
2046
2047 DECL_SPECIAL(TEXM3x2TEX)
2048 {
2049 STUB(D3DERR_INVALIDCALL);
2050 }
2051
2052 DECL_SPECIAL(TEXM3x3PAD)
2053 {
2054 return D3D_OK; /* this is just padding */
2055 }
2056
2057 DECL_SPECIAL(TEXM3x3SPEC)
2058 {
2059 STUB(D3DERR_INVALIDCALL);
2060 }
2061
2062 DECL_SPECIAL(TEXM3x3VSPEC)
2063 {
2064 STUB(D3DERR_INVALIDCALL);
2065 }
2066
2067 DECL_SPECIAL(TEXREG2RGB)
2068 {
2069 STUB(D3DERR_INVALIDCALL);
2070 }
2071
2072 DECL_SPECIAL(TEXDP3TEX)
2073 {
2074 STUB(D3DERR_INVALIDCALL);
2075 }
2076
2077 DECL_SPECIAL(TEXM3x2DEPTH)
2078 {
2079 STUB(D3DERR_INVALIDCALL);
2080 }
2081
2082 DECL_SPECIAL(TEXDP3)
2083 {
2084 STUB(D3DERR_INVALIDCALL);
2085 }
2086
2087 DECL_SPECIAL(TEXM3x3)
2088 {
2089 struct ureg_program *ureg = tx->ureg;
2090 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2091 struct ureg_src src[4];
2092 int s;
2093 const int m = tx->insn.dst[0].idx - 2;
2094 const int n = tx->insn.src[0].idx;
2095 assert(m >= 0 && m > n);
2096
2097 for (s = m; s <= (m + 2); ++s) {
2098 if (ureg_src_is_undef(tx->regs.vT[s]))
2099 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2100 src[s] = tx->regs.vT[s];
2101 }
2102 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), src[0], ureg_src(tx->regs.tS[n]));
2103 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), src[1], ureg_src(tx->regs.tS[n]));
2104 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), src[2], ureg_src(tx->regs.tS[n]));
2105
2106 switch (tx->insn.opcode) {
2107 case D3DSIO_TEXM3x3:
2108 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2109 break;
2110 case D3DSIO_TEXM3x3TEX:
2111 src[3] = ureg_DECL_sampler(ureg, m + 2);
2112 tx->info->sampler_mask |= 1 << (m + 2);
2113 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), src[3]);
2114 break;
2115 default:
2116 return D3DERR_INVALIDCALL;
2117 }
2118 return D3D_OK;
2119 }
2120
2121 DECL_SPECIAL(TEXDEPTH)
2122 {
2123 STUB(D3DERR_INVALIDCALL);
2124 }
2125
2126 DECL_SPECIAL(BEM)
2127 {
2128 STUB(D3DERR_INVALIDCALL);
2129 }
2130
2131 DECL_SPECIAL(TEXLD)
2132 {
2133 struct ureg_program *ureg = tx->ureg;
2134 unsigned target;
2135 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2136 struct ureg_src src[2] = {
2137 tx_src_param(tx, &tx->insn.src[0]),
2138 tx_src_param(tx, &tx->insn.src[1])
2139 };
2140 assert(tx->insn.src[1].idx >= 0 &&
2141 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2142 target = tx->sampler_targets[tx->insn.src[1].idx];
2143
2144 switch (tx->insn.flags) {
2145 case 0:
2146 ureg_TEX(ureg, dst, target, src[0], src[1]);
2147 break;
2148 case NINED3DSI_TEXLD_PROJECT:
2149 ureg_TXP(ureg, dst, target, src[0], src[1]);
2150 break;
2151 case NINED3DSI_TEXLD_BIAS:
2152 ureg_TXB(ureg, dst, target, src[0], src[1]);
2153 break;
2154 default:
2155 assert(0);
2156 return D3DERR_INVALIDCALL;
2157 }
2158 return D3D_OK;
2159 }
2160
2161 DECL_SPECIAL(TEXLD_14)
2162 {
2163 struct ureg_program *ureg = tx->ureg;
2164 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2165 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2166 const unsigned s = tx->insn.dst[0].idx;
2167 const unsigned t = ps1x_sampler_type(tx->info, s);
2168
2169 tx->info->sampler_mask |= 1 << s;
2170 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2171
2172 return D3D_OK;
2173 }
2174
2175 DECL_SPECIAL(TEX)
2176 {
2177 struct ureg_program *ureg = tx->ureg;
2178 const unsigned s = tx->insn.dst[0].idx;
2179 const unsigned t = ps1x_sampler_type(tx->info, s);
2180 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2181 struct ureg_src src[2];
2182
2183 if (ureg_src_is_undef(tx->regs.vT[s]))
2184 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2185
2186 src[0] = tx->regs.vT[s];
2187 src[1] = ureg_DECL_sampler(ureg, s);
2188 tx->info->sampler_mask |= 1 << s;
2189
2190 ureg_TEX(ureg, dst, t, src[0], src[1]);
2191
2192 return D3D_OK;
2193 }
2194
2195 DECL_SPECIAL(TEXLDD)
2196 {
2197 unsigned target;
2198 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2199 struct ureg_src src[4] = {
2200 tx_src_param(tx, &tx->insn.src[0]),
2201 tx_src_param(tx, &tx->insn.src[1]),
2202 tx_src_param(tx, &tx->insn.src[2]),
2203 tx_src_param(tx, &tx->insn.src[3])
2204 };
2205 assert(tx->insn.src[3].idx >= 0 &&
2206 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2207 target = tx->sampler_targets[tx->insn.src[1].idx];
2208
2209 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2210 return D3D_OK;
2211 }
2212
2213 DECL_SPECIAL(TEXLDL)
2214 {
2215 unsigned target;
2216 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2217 struct ureg_src src[2] = {
2218 tx_src_param(tx, &tx->insn.src[0]),
2219 tx_src_param(tx, &tx->insn.src[1])
2220 };
2221 assert(tx->insn.src[3].idx >= 0 &&
2222 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2223 target = tx->sampler_targets[tx->insn.src[1].idx];
2224
2225 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2226 return D3D_OK;
2227 }
2228
2229 DECL_SPECIAL(SETP)
2230 {
2231 STUB(D3DERR_INVALIDCALL);
2232 }
2233
2234 DECL_SPECIAL(BREAKP)
2235 {
2236 STUB(D3DERR_INVALIDCALL);
2237 }
2238
2239 DECL_SPECIAL(PHASE)
2240 {
2241 return D3D_OK; /* we don't care about phase */
2242 }
2243
2244 DECL_SPECIAL(COMMENT)
2245 {
2246 return D3D_OK; /* nothing to do */
2247 }
2248
2249
2250 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2251 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2252
2253 struct sm1_op_info inst_table[] =
2254 {
2255 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2256 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2257 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2258 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2259 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2260 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2261 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2262 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2263 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 7 */
2264 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2265 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2266 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2267 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2268 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2269 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2270 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2271 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 15 */
2272 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2273 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2274 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2275 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2276
2277 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2278 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2279 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2280 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2281 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2282
2283 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALL)),
2284 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALLNZ)),
2285 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2286 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2287 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2288 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(LABEL)),
2289
2290 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2291
2292 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2293 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2294 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2295 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2296 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2297
2298 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2299 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2300
2301 /* More flow control */
2302 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2303 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2304 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2305 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2306 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2307 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2308 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2309 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2310
2311 _OPI(MOVA, ARR, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2312
2313 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2314 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2315
2316 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2317 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2318 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2319 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2320 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2321 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2322 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEM)),
2323 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEML)),
2324 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2AR)),
2325 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2GB)),
2326 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2PAD)),
2327 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2TEX)),
2328 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3PAD)),
2329 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2330 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3SPEC)),
2331 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3VSPEC)),
2332
2333 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2334 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2335 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2336 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2337
2338 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2339
2340 /* More tex stuff */
2341 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXREG2RGB)),
2342 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3TEX)),
2343 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 0, 0, SPECIAL(TEXM3x2DEPTH)),
2344 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3)),
2345 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2346 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(TEXDEPTH)),
2347
2348 /* Misc */
2349 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2350 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(BEM)),
2351 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2352 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2353 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2354 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2355 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(SETP)),
2356 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2357 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(BREAKP))
2358 };
2359
2360 struct sm1_op_info inst_phase =
2361 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2362
2363 struct sm1_op_info inst_comment =
2364 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2365
2366 static void
2367 create_op_info_map(struct shader_translator *tx)
2368 {
2369 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2370 unsigned i;
2371
2372 for (i = 0; i < Elements(tx->op_info_map); ++i)
2373 tx->op_info_map[i] = -1;
2374
2375 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2376 for (i = 0; i < Elements(inst_table); ++i) {
2377 assert(inst_table[i].sio < Elements(tx->op_info_map));
2378 if (inst_table[i].vert_version.min <= version &&
2379 inst_table[i].vert_version.max >= version)
2380 tx->op_info_map[inst_table[i].sio] = i;
2381 }
2382 } else {
2383 for (i = 0; i < Elements(inst_table); ++i) {
2384 assert(inst_table[i].sio < Elements(tx->op_info_map));
2385 if (inst_table[i].frag_version.min <= version &&
2386 inst_table[i].frag_version.max >= version)
2387 tx->op_info_map[inst_table[i].sio] = i;
2388 }
2389 }
2390 }
2391
2392 static INLINE HRESULT
2393 NineTranslateInstruction_Generic(struct shader_translator *tx)
2394 {
2395 struct ureg_dst dst[1];
2396 struct ureg_src src[4];
2397 unsigned i;
2398
2399 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2400 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2401 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2402 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2403
2404 ureg_insn(tx->ureg, tx->insn.info->opcode,
2405 dst, tx->insn.ndst,
2406 src, tx->insn.nsrc);
2407 return D3D_OK;
2408 }
2409
2410 static INLINE DWORD
2411 TOKEN_PEEK(struct shader_translator *tx)
2412 {
2413 return *(tx->parse);
2414 }
2415
2416 static INLINE DWORD
2417 TOKEN_NEXT(struct shader_translator *tx)
2418 {
2419 return *(tx->parse)++;
2420 }
2421
2422 static INLINE void
2423 TOKEN_JUMP(struct shader_translator *tx)
2424 {
2425 if (tx->parse_next && tx->parse != tx->parse_next) {
2426 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2427 tx->parse = tx->parse_next;
2428 }
2429 }
2430
2431 static INLINE boolean
2432 sm1_parse_eof(struct shader_translator *tx)
2433 {
2434 return TOKEN_PEEK(tx) == NINED3DSP_END;
2435 }
2436
2437 static void
2438 sm1_read_version(struct shader_translator *tx)
2439 {
2440 const DWORD tok = TOKEN_NEXT(tx);
2441
2442 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2443 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2444
2445 switch (tok >> 16) {
2446 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2447 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2448 default:
2449 DBG("Invalid shader type: %x\n", tok);
2450 tx->processor = ~0;
2451 break;
2452 }
2453 }
2454
2455 /* This is just to check if we parsed the instruction properly. */
2456 static void
2457 sm1_parse_get_skip(struct shader_translator *tx)
2458 {
2459 const DWORD tok = TOKEN_PEEK(tx);
2460
2461 if (tx->version.major >= 2) {
2462 tx->parse_next = tx->parse + 1 /* this */ +
2463 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2464 } else {
2465 tx->parse_next = NULL; /* TODO: determine from param count */
2466 }
2467 }
2468
2469 static void
2470 sm1_print_comment(const char *comment, UINT size)
2471 {
2472 if (!size)
2473 return;
2474 /* TODO */
2475 }
2476
2477 static void
2478 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2479 {
2480 DWORD tok = TOKEN_PEEK(tx);
2481
2482 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2483 {
2484 const char *comment = "";
2485 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2486 tx->parse += size + 1;
2487
2488 if (print)
2489 sm1_print_comment(comment, size);
2490
2491 tok = TOKEN_PEEK(tx);
2492 }
2493 }
2494
2495 static void
2496 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2497 {
2498 *reg = TOKEN_NEXT(tx);
2499
2500 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2501 {
2502 if (tx->version.major < 2)
2503 *rel = (1 << 31) |
2504 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2505 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2506 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2507 else
2508 *rel = TOKEN_NEXT(tx);
2509 }
2510 }
2511
2512 static void
2513 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2514 {
2515 uint8_t shift;
2516 dst->file =
2517 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2518 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2519 dst->type = TGSI_RETURN_TYPE_FLOAT;
2520 dst->idx = tok & D3DSP_REGNUM_MASK;
2521 dst->rel = NULL;
2522 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2523 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2524 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2525 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2526 }
2527
2528 static void
2529 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2530 {
2531 src->file =
2532 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2533 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2534 src->type = TGSI_RETURN_TYPE_FLOAT;
2535 src->idx = tok & D3DSP_REGNUM_MASK;
2536 src->rel = NULL;
2537 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2538 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2539
2540 switch (src->file) {
2541 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2542 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2543 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2544 default:
2545 break;
2546 }
2547 }
2548
2549 static void
2550 sm1_parse_immediate(struct shader_translator *tx,
2551 struct sm1_src_param *imm)
2552 {
2553 imm->file = NINED3DSPR_IMMEDIATE;
2554 imm->idx = INT_MIN;
2555 imm->rel = NULL;
2556 imm->swizzle = NINED3DSP_NOSWIZZLE;
2557 imm->mod = 0;
2558 switch (tx->insn.opcode) {
2559 case D3DSIO_DEF:
2560 imm->type = NINED3DSPTYPE_FLOAT4;
2561 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2562 tx->parse += 4;
2563 break;
2564 case D3DSIO_DEFI:
2565 imm->type = NINED3DSPTYPE_INT4;
2566 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2567 tx->parse += 4;
2568 break;
2569 case D3DSIO_DEFB:
2570 imm->type = NINED3DSPTYPE_BOOL;
2571 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2572 tx->parse += 1;
2573 break;
2574 default:
2575 assert(0);
2576 break;
2577 }
2578 }
2579
2580 static void
2581 sm1_read_dst_param(struct shader_translator *tx,
2582 struct sm1_dst_param *dst,
2583 struct sm1_src_param *rel)
2584 {
2585 DWORD tok_dst, tok_rel = 0;
2586
2587 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2588 sm1_parse_dst_param(dst, tok_dst);
2589 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2590 sm1_parse_src_param(rel, tok_rel);
2591 dst->rel = rel;
2592 }
2593 }
2594
2595 static void
2596 sm1_read_src_param(struct shader_translator *tx,
2597 struct sm1_src_param *src,
2598 struct sm1_src_param *rel)
2599 {
2600 DWORD tok_src, tok_rel = 0;
2601
2602 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2603 sm1_parse_src_param(src, tok_src);
2604 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2605 assert(rel);
2606 sm1_parse_src_param(rel, tok_rel);
2607 src->rel = rel;
2608 }
2609 }
2610
2611 static void
2612 sm1_read_semantic(struct shader_translator *tx,
2613 struct sm1_semantic *sem)
2614 {
2615 const DWORD tok_usg = TOKEN_NEXT(tx);
2616 const DWORD tok_dst = TOKEN_NEXT(tx);
2617
2618 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2619 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2620 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2621
2622 sm1_parse_dst_param(&sem->reg, tok_dst);
2623 }
2624
2625 static void
2626 sm1_parse_instruction(struct shader_translator *tx)
2627 {
2628 struct sm1_instruction *insn = &tx->insn;
2629 DWORD tok;
2630 struct sm1_op_info *info = NULL;
2631 unsigned i;
2632
2633 sm1_parse_comments(tx, TRUE);
2634 sm1_parse_get_skip(tx);
2635
2636 tok = TOKEN_NEXT(tx);
2637
2638 insn->opcode = tok & D3DSI_OPCODE_MASK;
2639 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2640 insn->coissue = !!(tok & D3DSI_COISSUE);
2641 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2642
2643 if (insn->opcode < Elements(tx->op_info_map)) {
2644 int k = tx->op_info_map[insn->opcode];
2645 if (k >= 0) {
2646 assert(k < Elements(inst_table));
2647 info = &inst_table[k];
2648 }
2649 } else {
2650 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2651 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2652 }
2653 if (!info) {
2654 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2655 TOKEN_JUMP(tx);
2656 return;
2657 }
2658 insn->info = info;
2659 insn->ndst = info->ndst;
2660 insn->nsrc = info->nsrc;
2661
2662 assert(!insn->predicated && "TODO: predicated instructions");
2663
2664 /* check version */
2665 {
2666 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2667 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2668 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2669 if (ver < min || ver > max) {
2670 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2671 min, ver, max);
2672 return;
2673 }
2674 }
2675
2676 for (i = 0; i < insn->ndst; ++i)
2677 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2678 if (insn->predicated)
2679 sm1_read_src_param(tx, &insn->pred, NULL);
2680 for (i = 0; i < insn->nsrc; ++i)
2681 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2682
2683 /* parse here so we can dump them before processing */
2684 if (insn->opcode == D3DSIO_DEF ||
2685 insn->opcode == D3DSIO_DEFI ||
2686 insn->opcode == D3DSIO_DEFB)
2687 sm1_parse_immediate(tx, &tx->insn.src[0]);
2688
2689 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2690 sm1_instruction_check(insn);
2691
2692 if (info->handler)
2693 info->handler(tx);
2694 else
2695 NineTranslateInstruction_Generic(tx);
2696 tx_apply_dst0_modifiers(tx);
2697
2698 tx->num_scratch = 0; /* reset */
2699
2700 TOKEN_JUMP(tx);
2701 }
2702
2703 static void
2704 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2705 {
2706 unsigned i;
2707
2708 tx->info = info;
2709
2710 tx->byte_code = info->byte_code;
2711 tx->parse = info->byte_code;
2712
2713 for (i = 0; i < Elements(info->input_map); ++i)
2714 info->input_map[i] = NINE_DECLUSAGE_NONE;
2715 info->num_inputs = 0;
2716
2717 info->position_t = FALSE;
2718 info->point_size = FALSE;
2719
2720 tx->info->const_used_size = 0;
2721
2722 info->sampler_mask = 0x0;
2723 info->rt_mask = 0x0;
2724
2725 info->lconstf.data = NULL;
2726 info->lconstf.ranges = NULL;
2727
2728 for (i = 0; i < Elements(tx->regs.aL); ++i) {
2729 tx->regs.aL[i] = ureg_dst_undef();
2730 tx->regs.rL[i] = ureg_dst_undef();
2731 }
2732 tx->regs.a = ureg_dst_undef();
2733 tx->regs.p = ureg_dst_undef();
2734 tx->regs.oDepth = ureg_dst_undef();
2735 tx->regs.vPos = ureg_src_undef();
2736 tx->regs.vFace = ureg_src_undef();
2737 for (i = 0; i < Elements(tx->regs.o); ++i)
2738 tx->regs.o[i] = ureg_dst_undef();
2739 for (i = 0; i < Elements(tx->regs.oCol); ++i)
2740 tx->regs.oCol[i] = ureg_dst_undef();
2741 for (i = 0; i < Elements(tx->regs.vC); ++i)
2742 tx->regs.vC[i] = ureg_src_undef();
2743 for (i = 0; i < Elements(tx->regs.vT); ++i)
2744 tx->regs.vT[i] = ureg_src_undef();
2745
2746 for (i = 0; i < Elements(tx->lconsti); ++i)
2747 tx->lconsti[i].idx = -1;
2748 for (i = 0; i < Elements(tx->lconstb); ++i)
2749 tx->lconstb[i].idx = -1;
2750
2751 sm1_read_version(tx);
2752
2753 info->version = (tx->version.major << 4) | tx->version.minor;
2754
2755 create_op_info_map(tx);
2756 }
2757
2758 static void
2759 tx_dtor(struct shader_translator *tx)
2760 {
2761 if (tx->num_inst_labels)
2762 FREE(tx->inst_labels);
2763 FREE(tx->lconstf);
2764 FREE(tx->regs.r);
2765 FREE(tx);
2766 }
2767
2768 static INLINE unsigned
2769 tgsi_processor_from_type(unsigned shader_type)
2770 {
2771 switch (shader_type) {
2772 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
2773 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
2774 default:
2775 return ~0;
2776 }
2777 }
2778
2779 #define GET_CAP(n) device->screen->get_param( \
2780 device->screen, PIPE_CAP_##n)
2781 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
2782 device->screen, info->type, PIPE_SHADER_CAP_##n)
2783
2784 HRESULT
2785 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
2786 {
2787 struct shader_translator *tx;
2788 HRESULT hr = D3D_OK;
2789 const unsigned processor = tgsi_processor_from_type(info->type);
2790
2791 user_assert(processor != ~0, D3DERR_INVALIDCALL);
2792
2793 tx = CALLOC_STRUCT(shader_translator);
2794 if (!tx)
2795 return E_OUTOFMEMORY;
2796 tx_ctor(tx, info);
2797
2798 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
2799 hr = D3DERR_INVALIDCALL;
2800 DBG("Unsupported shader version: %u.%u !\n",
2801 tx->version.major, tx->version.minor);
2802 goto out;
2803 }
2804 if (tx->processor != processor) {
2805 hr = D3DERR_INVALIDCALL;
2806 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
2807 goto out;
2808 }
2809 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
2810 tx->version.major, tx->version.minor);
2811
2812 tx->ureg = ureg_create(processor);
2813 if (!tx->ureg) {
2814 hr = E_OUTOFMEMORY;
2815 goto out;
2816 }
2817 tx_decl_constants(tx);
2818
2819 tx->native_integers = GET_SHADER_CAP(INTEGERS);
2820 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
2821 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
2822 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
2823 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2824 tx->texcoord_sn = tx->want_texcoord ?
2825 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
2826
2827 /* VS must always write position. Declare it here to make it the 1st output.
2828 * (Some drivers like nv50 are buggy and rely on that.)
2829 */
2830 if (IS_VS) {
2831 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
2832 } else {
2833 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
2834 if (!tx->shift_wpos)
2835 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2836 }
2837
2838 while (!sm1_parse_eof(tx))
2839 sm1_parse_instruction(tx);
2840 tx->parse++; /* for byte_size */
2841
2842 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
2843 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
2844 ureg_src(tx->regs.r[0]));
2845 info->rt_mask |= 0x1;
2846 }
2847
2848 if (info->position_t)
2849 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
2850
2851 ureg_END(tx->ureg);
2852
2853 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
2854 info->point_size = TRUE;
2855
2856 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
2857 unsigned count;
2858 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
2859 tgsi_dump(toks, 0);
2860 ureg_free_tokens(toks);
2861 }
2862
2863 /* record local constants */
2864 if (tx->num_lconstf && tx->indirect_const_access) {
2865 struct nine_range *ranges;
2866 float *data;
2867 int *indices;
2868 unsigned i, k, n;
2869
2870 hr = E_OUTOFMEMORY;
2871
2872 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
2873 if (!data)
2874 goto out;
2875 info->lconstf.data = data;
2876
2877 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
2878 if (!indices)
2879 goto out;
2880
2881 /* lazy sort, num_lconstf should be small */
2882 for (n = 0; n < tx->num_lconstf; ++n) {
2883 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
2884 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
2885 k = i;
2886 }
2887 indices[n] = tx->lconstf[k].idx;
2888 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
2889 tx->lconstf[k].idx = INT_MAX;
2890 }
2891
2892 /* count ranges */
2893 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
2894 if (indices[i] != indices[i - 1] + 1)
2895 ++n;
2896 ranges = MALLOC(n * sizeof(ranges[0]));
2897 if (!ranges) {
2898 FREE(indices);
2899 goto out;
2900 }
2901 info->lconstf.ranges = ranges;
2902
2903 k = 0;
2904 ranges[k].bgn = indices[0];
2905 for (i = 1; i < tx->num_lconstf; ++i) {
2906 if (indices[i] != indices[i - 1] + 1) {
2907 ranges[k].next = &ranges[k + 1];
2908 ranges[k].end = indices[i - 1] + 1;
2909 ++k;
2910 ranges[k].bgn = indices[i];
2911 }
2912 }
2913 ranges[k].end = indices[i - 1] + 1;
2914 ranges[k].next = NULL;
2915 assert(n == (k + 1));
2916
2917 FREE(indices);
2918 hr = D3D_OK;
2919 }
2920
2921 if (tx->indirect_const_access)
2922 info->const_used_size = ~0;
2923
2924 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
2925 if (!info->cso) {
2926 hr = D3DERR_DRIVERINTERNALERROR;
2927 FREE(info->lconstf.data);
2928 FREE(info->lconstf.ranges);
2929 goto out;
2930 }
2931
2932 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
2933 out:
2934 tx_dtor(tx);
2935 return hr;
2936 }