st/nine: Introduce failure handling for shader parsing.
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
39
40
41 struct shader_translator;
42
43 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
44
45 static INLINE const char *d3dsio_to_string(unsigned opcode);
46
47
48 #define NINED3D_SM1_VS 0xfffe
49 #define NINED3D_SM1_PS 0xffff
50
51 #define NINE_MAX_COND_DEPTH 64
52 #define NINE_MAX_LOOP_DEPTH 64
53
54 #define NINED3DSP_END 0x0000ffff
55
56 #define NINED3DSPTYPE_FLOAT4 0
57 #define NINED3DSPTYPE_INT4 1
58 #define NINED3DSPTYPE_BOOL 2
59
60 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
61
62 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
63 #define NINED3DSP_WRITEMASK_SHIFT 16
64
65 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
66
67 #define NINED3DSHADER_REL_OP_GT 1
68 #define NINED3DSHADER_REL_OP_EQ 2
69 #define NINED3DSHADER_REL_OP_GE 3
70 #define NINED3DSHADER_REL_OP_LT 4
71 #define NINED3DSHADER_REL_OP_NE 5
72 #define NINED3DSHADER_REL_OP_LE 6
73
74 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
75 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
76
77 #define NINED3DSI_TEXLD_PROJECT 0x1
78 #define NINED3DSI_TEXLD_BIAS 0x2
79
80 #define NINED3DSP_WRITEMASK_0 0x1
81 #define NINED3DSP_WRITEMASK_1 0x2
82 #define NINED3DSP_WRITEMASK_2 0x4
83 #define NINED3DSP_WRITEMASK_3 0x8
84 #define NINED3DSP_WRITEMASK_ALL 0xf
85
86 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
87
88 #define NINE_SWIZZLE4(x,y,z,w) \
89 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
90
91 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
92 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
94
95 /*
96 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
97 * BIAS <= PS 1.4 (x-0.5)
98 * BIASNEG <= PS 1.4 (-(x-0.5))
99 * SIGN <= PS 1.4 (2(x-0.5))
100 * SIGNNEG <= PS 1.4 (-2(x-0.5))
101 * COMP <= PS 1.4 (1-x)
102 * X2 = PS 1.4 (2x)
103 * X2NEG = PS 1.4 (-2x)
104 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
105 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
106 * ABS >= SM 3.0 (abs(x))
107 * ABSNEG >= SM 3.0 (-abs(x))
108 * NOT >= SM 2.0 pedication only
109 */
110 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
124
125 static const char *sm1_mod_str[] =
126 {
127 [NINED3DSPSM_NONE] = "",
128 [NINED3DSPSM_NEG] = "-",
129 [NINED3DSPSM_BIAS] = "bias",
130 [NINED3DSPSM_BIASNEG] = "biasneg",
131 [NINED3DSPSM_SIGN] = "sign",
132 [NINED3DSPSM_SIGNNEG] = "signneg",
133 [NINED3DSPSM_COMP] = "comp",
134 [NINED3DSPSM_X2] = "x2",
135 [NINED3DSPSM_X2NEG] = "x2neg",
136 [NINED3DSPSM_DZ] = "dz",
137 [NINED3DSPSM_DW] = "dw",
138 [NINED3DSPSM_ABS] = "abs",
139 [NINED3DSPSM_ABSNEG] = "-abs",
140 [NINED3DSPSM_NOT] = "not"
141 };
142
143 static void
144 sm1_dump_writemask(BYTE mask)
145 {
146 if (mask & 1) DUMP("x"); else DUMP("_");
147 if (mask & 2) DUMP("y"); else DUMP("_");
148 if (mask & 4) DUMP("z"); else DUMP("_");
149 if (mask & 8) DUMP("w"); else DUMP("_");
150 }
151
152 static void
153 sm1_dump_swizzle(BYTE s)
154 {
155 char c[4] = { 'x', 'y', 'z', 'w' };
156 DUMP("%c%c%c%c",
157 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
158 }
159
160 static const char sm1_file_char[] =
161 {
162 [D3DSPR_TEMP] = 'r',
163 [D3DSPR_INPUT] = 'v',
164 [D3DSPR_CONST] = 'c',
165 [D3DSPR_ADDR] = 'A',
166 [D3DSPR_RASTOUT] = 'R',
167 [D3DSPR_ATTROUT] = 'D',
168 [D3DSPR_OUTPUT] = 'o',
169 [D3DSPR_CONSTINT] = 'I',
170 [D3DSPR_COLOROUT] = 'C',
171 [D3DSPR_DEPTHOUT] = 'D',
172 [D3DSPR_SAMPLER] = 's',
173 [D3DSPR_CONST2] = 'c',
174 [D3DSPR_CONST3] = 'c',
175 [D3DSPR_CONST4] = 'c',
176 [D3DSPR_CONSTBOOL] = 'B',
177 [D3DSPR_LOOP] = 'L',
178 [D3DSPR_TEMPFLOAT16] = 'h',
179 [D3DSPR_MISCTYPE] = 'M',
180 [D3DSPR_LABEL] = 'X',
181 [D3DSPR_PREDICATE] = 'p'
182 };
183
184 static void
185 sm1_dump_reg(BYTE file, INT index)
186 {
187 switch (file) {
188 case D3DSPR_LOOP:
189 DUMP("aL");
190 break;
191 case D3DSPR_COLOROUT:
192 DUMP("oC%i", index);
193 break;
194 case D3DSPR_DEPTHOUT:
195 DUMP("oDepth");
196 break;
197 case D3DSPR_RASTOUT:
198 DUMP("oRast%i", index);
199 break;
200 case D3DSPR_CONSTINT:
201 DUMP("iconst[%i]", index);
202 break;
203 case D3DSPR_CONSTBOOL:
204 DUMP("bconst[%i]", index);
205 break;
206 default:
207 DUMP("%c%i", sm1_file_char[file], index);
208 break;
209 }
210 }
211
212 struct sm1_src_param
213 {
214 INT idx;
215 struct sm1_src_param *rel;
216 BYTE file;
217 BYTE swizzle;
218 BYTE mod;
219 BYTE type;
220 union {
221 DWORD d[4];
222 float f[4];
223 int i[4];
224 BOOL b;
225 } imm;
226 };
227 static void
228 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
229
230 struct sm1_dst_param
231 {
232 INT idx;
233 struct sm1_src_param *rel;
234 BYTE file;
235 BYTE mask;
236 BYTE mod;
237 int8_t shift; /* sint4 */
238 BYTE type;
239 };
240
241 static INLINE void
242 assert_replicate_swizzle(const struct ureg_src *reg)
243 {
244 assert(reg->SwizzleY == reg->SwizzleX &&
245 reg->SwizzleZ == reg->SwizzleX &&
246 reg->SwizzleW == reg->SwizzleX);
247 }
248
249 static void
250 sm1_dump_immediate(const struct sm1_src_param *param)
251 {
252 switch (param->type) {
253 case NINED3DSPTYPE_FLOAT4:
254 DUMP("{ %f %f %f %f }",
255 param->imm.f[0], param->imm.f[1],
256 param->imm.f[2], param->imm.f[3]);
257 break;
258 case NINED3DSPTYPE_INT4:
259 DUMP("{ %i %i %i %i }",
260 param->imm.i[0], param->imm.i[1],
261 param->imm.i[2], param->imm.i[3]);
262 break;
263 case NINED3DSPTYPE_BOOL:
264 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
265 break;
266 default:
267 assert(0);
268 break;
269 }
270 }
271
272 static void
273 sm1_dump_src_param(const struct sm1_src_param *param)
274 {
275 if (param->file == NINED3DSPR_IMMEDIATE) {
276 assert(!param->mod &&
277 !param->rel &&
278 param->swizzle == NINED3DSP_NOSWIZZLE);
279 sm1_dump_immediate(param);
280 return;
281 }
282
283 if (param->mod)
284 DUMP("%s(", sm1_mod_str[param->mod]);
285 if (param->rel) {
286 DUMP("%c[", sm1_file_char[param->file]);
287 sm1_dump_src_param(param->rel);
288 DUMP("+%i]", param->idx);
289 } else {
290 sm1_dump_reg(param->file, param->idx);
291 }
292 if (param->mod)
293 DUMP(")");
294 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
295 DUMP(".");
296 sm1_dump_swizzle(param->swizzle);
297 }
298 }
299
300 static void
301 sm1_dump_dst_param(const struct sm1_dst_param *param)
302 {
303 if (param->mod & NINED3DSPDM_SATURATE)
304 DUMP("sat ");
305 if (param->mod & NINED3DSPDM_PARTIALP)
306 DUMP("pp ");
307 if (param->mod & NINED3DSPDM_CENTROID)
308 DUMP("centroid ");
309 if (param->shift < 0)
310 DUMP("/%u ", 1 << -param->shift);
311 if (param->shift > 0)
312 DUMP("*%u ", 1 << param->shift);
313
314 if (param->rel) {
315 DUMP("%c[", sm1_file_char[param->file]);
316 sm1_dump_src_param(param->rel);
317 DUMP("+%i]", param->idx);
318 } else {
319 sm1_dump_reg(param->file, param->idx);
320 }
321 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
322 DUMP(".");
323 sm1_dump_writemask(param->mask);
324 }
325 }
326
327 struct sm1_semantic
328 {
329 struct sm1_dst_param reg;
330 BYTE sampler_type;
331 D3DDECLUSAGE usage;
332 BYTE usage_idx;
333 };
334
335 struct sm1_op_info
336 {
337 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
338 * should be ignored completely */
339 unsigned sio;
340 unsigned opcode; /* TGSI_OPCODE_x */
341
342 /* versions are still set even handler is set */
343 struct {
344 unsigned min;
345 unsigned max;
346 } vert_version, frag_version;
347
348 /* number of regs parsed outside of special handler */
349 unsigned ndst;
350 unsigned nsrc;
351
352 /* some instructions don't map perfectly, so use a special handler */
353 translate_instruction_func handler;
354 };
355
356 struct sm1_instruction
357 {
358 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
359 BYTE flags;
360 BOOL coissue;
361 BOOL predicated;
362 BYTE ndst;
363 BYTE nsrc;
364 struct sm1_src_param src[4];
365 struct sm1_src_param src_rel[4];
366 struct sm1_src_param pred;
367 struct sm1_src_param dst_rel[1];
368 struct sm1_dst_param dst[1];
369
370 struct sm1_op_info *info;
371 };
372
373 static void
374 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
375 {
376 unsigned i;
377
378 /* no info stored for these: */
379 if (insn->opcode == D3DSIO_DCL)
380 return;
381 for (i = 0; i < indent; ++i)
382 DUMP(" ");
383
384 if (insn->predicated) {
385 DUMP("@");
386 sm1_dump_src_param(&insn->pred);
387 DUMP(" ");
388 }
389 DUMP("%s", d3dsio_to_string(insn->opcode));
390 if (insn->flags) {
391 switch (insn->opcode) {
392 case D3DSIO_TEX:
393 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
394 break;
395 default:
396 DUMP("_%x", insn->flags);
397 break;
398 }
399 }
400 if (insn->coissue)
401 DUMP("_co");
402 DUMP(" ");
403
404 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
405 sm1_dump_dst_param(&insn->dst[i]);
406 DUMP(" ");
407 }
408
409 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
410 sm1_dump_src_param(&insn->src[i]);
411 DUMP(" ");
412 }
413 if (insn->opcode == D3DSIO_DEF ||
414 insn->opcode == D3DSIO_DEFI ||
415 insn->opcode == D3DSIO_DEFB)
416 sm1_dump_immediate(&insn->src[0]);
417
418 DUMP("\n");
419 }
420
421 struct sm1_local_const
422 {
423 INT idx;
424 struct ureg_src reg;
425 union {
426 boolean b;
427 float f[4];
428 int32_t i[4];
429 } imm;
430 };
431
432 struct shader_translator
433 {
434 const DWORD *byte_code;
435 const DWORD *parse;
436 const DWORD *parse_next;
437
438 struct ureg_program *ureg;
439
440 /* shader version */
441 struct {
442 BYTE major;
443 BYTE minor;
444 } version;
445 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
446
447 boolean native_integers;
448 boolean inline_subroutines;
449 boolean lower_preds;
450 boolean want_texcoord;
451 boolean shift_wpos;
452 unsigned texcoord_sn;
453
454 struct sm1_instruction insn; /* current instruction */
455
456 struct {
457 struct ureg_dst *r;
458 struct ureg_dst oPos;
459 struct ureg_dst oFog;
460 struct ureg_dst oPts;
461 struct ureg_dst oCol[4];
462 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
463 struct ureg_dst oDepth;
464 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
465 struct ureg_src vPos;
466 struct ureg_src vFace;
467 struct ureg_src s;
468 struct ureg_dst p;
469 struct ureg_dst address;
470 struct ureg_dst a0;
471 struct ureg_dst tS[8]; /* texture stage registers */
472 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
473 struct ureg_dst t[5]; /* scratch TEMPs */
474 struct ureg_src vC[2]; /* PS color in */
475 struct ureg_src vT[8]; /* PS texcoord in */
476 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
477 } regs;
478 unsigned num_temp; /* Elements(regs.r) */
479 unsigned num_scratch;
480 unsigned loop_depth;
481 unsigned loop_depth_max;
482 unsigned cond_depth;
483 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
484 unsigned cond_labels[NINE_MAX_COND_DEPTH];
485 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
486
487 unsigned *inst_labels; /* LABEL op */
488 unsigned num_inst_labels;
489
490 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
491
492 struct sm1_local_const *lconstf;
493 unsigned num_lconstf;
494 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
495 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
496
497 boolean indirect_const_access;
498 boolean failure;
499
500 struct nine_shader_info *info;
501
502 int16_t op_info_map[D3DSIO_BREAKP + 1];
503 };
504
505 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
506 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
507
508 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
509
510 static void
511 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
512
513 static void
514 sm1_instruction_check(const struct sm1_instruction *insn)
515 {
516 if (insn->opcode == D3DSIO_CRS)
517 {
518 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
519 {
520 DBG("CRS.mask.w\n");
521 }
522 }
523 }
524
525 static boolean
526 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
527 {
528 INT i;
529 if (index < 0 || index >= (NINE_MAX_CONST_F * 2)) {
530 tx->failure = TRUE;
531 return FALSE;
532 }
533 for (i = 0; i < tx->num_lconstf; ++i) {
534 if (tx->lconstf[i].idx == index) {
535 *src = tx->lconstf[i].reg;
536 return TRUE;
537 }
538 }
539 return FALSE;
540 }
541 static boolean
542 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
543 {
544 if (index < 0 || index >= NINE_MAX_CONST_I) {
545 tx->failure = TRUE;
546 return FALSE;
547 }
548 if (tx->lconsti[index].idx == index)
549 *src = tx->lconsti[index].reg;
550 return tx->lconsti[index].idx == index;
551 }
552 static boolean
553 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
554 {
555 if (index < 0 || index >= NINE_MAX_CONST_B) {
556 tx->failure = TRUE;
557 return FALSE;
558 }
559 if (tx->lconstb[index].idx == index)
560 *src = tx->lconstb[index].reg;
561 return tx->lconstb[index].idx == index;
562 }
563
564 static void
565 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
566 {
567 unsigned n;
568
569 /* Anno1404 sets out of range constants. */
570 FAILURE_VOID(index < 0 || index >= (NINE_MAX_CONST_F * 2))
571 if (index >= NINE_MAX_CONST_F)
572 WARN("lconstf index %i too high, indirect access won't work\n", index);
573
574 for (n = 0; n < tx->num_lconstf; ++n)
575 if (tx->lconstf[n].idx == index)
576 break;
577 if (n == tx->num_lconstf) {
578 if ((n % 8) == 0) {
579 tx->lconstf = REALLOC(tx->lconstf,
580 (n + 0) * sizeof(tx->lconstf[0]),
581 (n + 8) * sizeof(tx->lconstf[0]));
582 assert(tx->lconstf);
583 }
584 tx->num_lconstf++;
585 }
586 tx->lconstf[n].idx = index;
587 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
588
589 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
590 }
591 static void
592 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
593 {
594 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
595 tx->lconsti[index].idx = index;
596 tx->lconsti[index].reg = tx->native_integers ?
597 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
598 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
599 }
600 static void
601 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
602 {
603 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
604 tx->lconstb[index].idx = index;
605 tx->lconstb[index].reg = tx->native_integers ?
606 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
607 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
608 }
609
610 static INLINE struct ureg_dst
611 tx_scratch(struct shader_translator *tx)
612 {
613 if (tx->num_scratch >= Elements(tx->regs.t)) {
614 tx->failure = TRUE;
615 return tx->regs.t[0];
616 }
617 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
618 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
619 return tx->regs.t[tx->num_scratch++];
620 }
621
622 static INLINE struct ureg_dst
623 tx_scratch_scalar(struct shader_translator *tx)
624 {
625 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
626 }
627
628 static INLINE struct ureg_src
629 tx_src_scalar(struct ureg_dst dst)
630 {
631 struct ureg_src src = ureg_src(dst);
632 int c = ffs(dst.WriteMask) - 1;
633 if (dst.WriteMask == (1 << c))
634 src = ureg_scalar(src, c);
635 return src;
636 }
637
638 static INLINE void
639 tx_temp_alloc(struct shader_translator *tx, INT idx)
640 {
641 assert(idx >= 0);
642 if (idx >= tx->num_temp) {
643 unsigned k = tx->num_temp;
644 unsigned n = idx + 1;
645 tx->regs.r = REALLOC(tx->regs.r,
646 k * sizeof(tx->regs.r[0]),
647 n * sizeof(tx->regs.r[0]));
648 for (; k < n; ++k)
649 tx->regs.r[k] = ureg_dst_undef();
650 tx->num_temp = n;
651 }
652 if (ureg_dst_is_undef(tx->regs.r[idx]))
653 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
654 }
655
656 static INLINE void
657 tx_addr_alloc(struct shader_translator *tx, INT idx)
658 {
659 assert(idx == 0);
660 if (ureg_dst_is_undef(tx->regs.address))
661 tx->regs.address = ureg_DECL_address(tx->ureg);
662 if (ureg_dst_is_undef(tx->regs.a0))
663 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
664 }
665
666 static INLINE void
667 tx_pred_alloc(struct shader_translator *tx, INT idx)
668 {
669 assert(idx == 0);
670 if (ureg_dst_is_undef(tx->regs.p))
671 tx->regs.p = ureg_DECL_predicate(tx->ureg);
672 }
673
674 static INLINE void
675 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
676 {
677 assert(IS_PS);
678 assert(idx >= 0 && idx < Elements(tx->regs.vT));
679 if (ureg_src_is_undef(tx->regs.vT[idx]))
680 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
681 TGSI_INTERPOLATE_PERSPECTIVE);
682 }
683
684 static INLINE unsigned *
685 tx_bgnloop(struct shader_translator *tx)
686 {
687 tx->loop_depth++;
688 if (tx->loop_depth_max < tx->loop_depth)
689 tx->loop_depth_max = tx->loop_depth;
690 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
691 return &tx->loop_labels[tx->loop_depth - 1];
692 }
693
694 static INLINE unsigned *
695 tx_endloop(struct shader_translator *tx)
696 {
697 assert(tx->loop_depth);
698 tx->loop_depth--;
699 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
700 ureg_get_instruction_number(tx->ureg));
701 return &tx->loop_labels[tx->loop_depth];
702 }
703
704 static struct ureg_dst
705 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
706 {
707 const unsigned l = tx->loop_depth - 1;
708
709 if (!tx->loop_depth)
710 {
711 DBG("loop counter requested outside of loop\n");
712 return ureg_dst_undef();
713 }
714
715 if (ureg_dst_is_undef(tx->regs.rL[l])) {
716 /* loop or rep ctr creation */
717 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
718 tx->loop_or_rep[l] = loop_or_rep;
719 }
720 /* loop - rep - endloop - endrep not allowed */
721 assert(tx->loop_or_rep[l] == loop_or_rep);
722
723 return tx->regs.rL[l];
724 }
725
726 static struct ureg_src
727 tx_get_loopal(struct shader_translator *tx)
728 {
729 int loop_level = tx->loop_depth - 1;
730
731 while (loop_level >= 0) {
732 /* handle loop - rep - endrep - endloop case */
733 if (tx->loop_or_rep[loop_level])
734 /* the value is in the loop counter y component (nine implementation) */
735 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
736 loop_level--;
737 }
738
739 DBG("aL counter requested outside of loop\n");
740 return ureg_src_undef();
741 }
742
743 static INLINE unsigned *
744 tx_cond(struct shader_translator *tx)
745 {
746 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
747 tx->cond_depth++;
748 return &tx->cond_labels[tx->cond_depth - 1];
749 }
750
751 static INLINE unsigned *
752 tx_elsecond(struct shader_translator *tx)
753 {
754 assert(tx->cond_depth);
755 return &tx->cond_labels[tx->cond_depth - 1];
756 }
757
758 static INLINE void
759 tx_endcond(struct shader_translator *tx)
760 {
761 assert(tx->cond_depth);
762 tx->cond_depth--;
763 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
764 ureg_get_instruction_number(tx->ureg));
765 }
766
767 static INLINE struct ureg_dst
768 nine_ureg_dst_register(unsigned file, int index)
769 {
770 return ureg_dst(ureg_src_register(file, index));
771 }
772
773 static struct ureg_src
774 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
775 {
776 struct ureg_program *ureg = tx->ureg;
777 struct ureg_src src;
778 struct ureg_dst tmp;
779
780 switch (param->file)
781 {
782 case D3DSPR_TEMP:
783 assert(!param->rel);
784 tx_temp_alloc(tx, param->idx);
785 src = ureg_src(tx->regs.r[param->idx]);
786 break;
787 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
788 case D3DSPR_ADDR:
789 assert(!param->rel);
790 if (IS_VS) {
791 assert(param->idx == 0);
792 /* the address register (vs only) must be
793 * assigned before use */
794 assert(!ureg_dst_is_undef(tx->regs.a0));
795 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
796 src = ureg_src(tx->regs.address);
797 } else {
798 if (tx->version.major < 2 && tx->version.minor < 4) {
799 /* no subroutines, so should be defined */
800 src = ureg_src(tx->regs.tS[param->idx]);
801 } else {
802 tx_texcoord_alloc(tx, param->idx);
803 src = tx->regs.vT[param->idx];
804 }
805 }
806 break;
807 case D3DSPR_INPUT:
808 if (IS_VS) {
809 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
810 } else {
811 if (tx->version.major < 3) {
812 assert(!param->rel);
813 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
814 param->idx,
815 TGSI_INTERPOLATE_PERSPECTIVE);
816 } else {
817 assert(!param->rel); /* TODO */
818 assert(param->idx < Elements(tx->regs.v));
819 src = tx->regs.v[param->idx];
820 }
821 }
822 break;
823 case D3DSPR_PREDICATE:
824 assert(!param->rel);
825 tx_pred_alloc(tx, param->idx);
826 src = ureg_src(tx->regs.p);
827 break;
828 case D3DSPR_SAMPLER:
829 assert(param->mod == NINED3DSPSM_NONE);
830 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
831 assert(!param->rel);
832 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
833 break;
834 case D3DSPR_CONST:
835 assert(!param->rel || IS_VS);
836 if (param->rel)
837 tx->indirect_const_access = TRUE;
838 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
839 if (!param->rel)
840 nine_info_mark_const_f_used(tx->info, param->idx);
841 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
842 }
843 if (!IS_VS && tx->version.major < 2) {
844 /* ps 1.X clamps constants */
845 tmp = tx_scratch(tx);
846 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
847 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
848 src = ureg_src(tmp);
849 }
850 break;
851 case D3DSPR_CONST2:
852 case D3DSPR_CONST3:
853 case D3DSPR_CONST4:
854 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
855 assert(!"CONST2/3/4");
856 src = ureg_imm1f(ureg, 0.0f);
857 break;
858 case D3DSPR_CONSTINT:
859 /* relative adressing only possible for float constants in vs */
860 assert(!param->rel);
861 if (!tx_lconsti(tx, &src, param->idx)) {
862 nine_info_mark_const_i_used(tx->info, param->idx);
863 src = ureg_src_register(TGSI_FILE_CONSTANT,
864 tx->info->const_i_base + param->idx);
865 }
866 break;
867 case D3DSPR_CONSTBOOL:
868 assert(!param->rel);
869 if (!tx_lconstb(tx, &src, param->idx)) {
870 char r = param->idx / 4;
871 char s = param->idx & 3;
872 nine_info_mark_const_b_used(tx->info, param->idx);
873 src = ureg_src_register(TGSI_FILE_CONSTANT,
874 tx->info->const_b_base + r);
875 src = ureg_swizzle(src, s, s, s, s);
876 }
877 break;
878 case D3DSPR_LOOP:
879 if (ureg_dst_is_undef(tx->regs.address))
880 tx->regs.address = ureg_DECL_address(ureg);
881 if (!tx->native_integers)
882 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
883 else
884 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
885 src = ureg_src(tx->regs.address);
886 break;
887 case D3DSPR_MISCTYPE:
888 switch (param->idx) {
889 case D3DSMO_POSITION:
890 if (ureg_src_is_undef(tx->regs.vPos))
891 tx->regs.vPos = ureg_DECL_fs_input(ureg,
892 TGSI_SEMANTIC_POSITION, 0,
893 TGSI_INTERPOLATE_LINEAR);
894 if (tx->shift_wpos) {
895 /* TODO: do this only once */
896 struct ureg_dst wpos = tx_scratch(tx);
897 ureg_SUB(ureg, wpos, tx->regs.vPos,
898 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
899 src = ureg_src(wpos);
900 } else {
901 src = tx->regs.vPos;
902 }
903 break;
904 case D3DSMO_FACE:
905 if (ureg_src_is_undef(tx->regs.vFace)) {
906 tx->regs.vFace = ureg_DECL_fs_input(ureg,
907 TGSI_SEMANTIC_FACE, 0,
908 TGSI_INTERPOLATE_CONSTANT);
909 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
910 }
911 src = tx->regs.vFace;
912 break;
913 default:
914 assert(!"invalid src D3DSMO");
915 break;
916 }
917 assert(!param->rel);
918 break;
919 case D3DSPR_TEMPFLOAT16:
920 break;
921 default:
922 assert(!"invalid src D3DSPR");
923 }
924 if (param->rel)
925 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
926
927 switch (param->mod) {
928 case NINED3DSPSM_DW:
929 tmp = tx_scratch(tx);
930 /* NOTE: app is not allowed to read w with this modifier */
931 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
932 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
933 src = ureg_src(tmp);
934 break;
935 case NINED3DSPSM_DZ:
936 tmp = tx_scratch(tx);
937 /* NOTE: app is not allowed to read z with this modifier */
938 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
939 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
940 src = ureg_src(tmp);
941 break;
942 default:
943 break;
944 }
945
946 if (param->swizzle != NINED3DSP_NOSWIZZLE)
947 src = ureg_swizzle(src,
948 (param->swizzle >> 0) & 0x3,
949 (param->swizzle >> 2) & 0x3,
950 (param->swizzle >> 4) & 0x3,
951 (param->swizzle >> 6) & 0x3);
952
953 switch (param->mod) {
954 case NINED3DSPSM_ABS:
955 src = ureg_abs(src);
956 break;
957 case NINED3DSPSM_ABSNEG:
958 src = ureg_negate(ureg_abs(src));
959 break;
960 case NINED3DSPSM_NEG:
961 src = ureg_negate(src);
962 break;
963 case NINED3DSPSM_BIAS:
964 tmp = tx_scratch(tx);
965 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
966 src = ureg_src(tmp);
967 break;
968 case NINED3DSPSM_BIASNEG:
969 tmp = tx_scratch(tx);
970 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
971 src = ureg_src(tmp);
972 break;
973 case NINED3DSPSM_NOT:
974 if (tx->native_integers) {
975 tmp = tx_scratch(tx);
976 ureg_NOT(ureg, tmp, src);
977 src = ureg_src(tmp);
978 break;
979 }
980 /* fall through */
981 case NINED3DSPSM_COMP:
982 tmp = tx_scratch(tx);
983 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
984 src = ureg_src(tmp);
985 break;
986 case NINED3DSPSM_DZ:
987 case NINED3DSPSM_DW:
988 /* Already handled*/
989 break;
990 case NINED3DSPSM_SIGN:
991 tmp = tx_scratch(tx);
992 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
993 src = ureg_src(tmp);
994 break;
995 case NINED3DSPSM_SIGNNEG:
996 tmp = tx_scratch(tx);
997 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
998 src = ureg_src(tmp);
999 break;
1000 case NINED3DSPSM_X2:
1001 tmp = tx_scratch(tx);
1002 ureg_ADD(ureg, tmp, src, src);
1003 src = ureg_src(tmp);
1004 break;
1005 case NINED3DSPSM_X2NEG:
1006 tmp = tx_scratch(tx);
1007 ureg_ADD(ureg, tmp, src, src);
1008 src = ureg_negate(ureg_src(tmp));
1009 break;
1010 default:
1011 assert(param->mod == NINED3DSPSM_NONE);
1012 break;
1013 }
1014
1015 return src;
1016 }
1017
1018 static struct ureg_dst
1019 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1020 {
1021 struct ureg_dst dst;
1022
1023 switch (param->file)
1024 {
1025 case D3DSPR_TEMP:
1026 assert(!param->rel);
1027 tx_temp_alloc(tx, param->idx);
1028 dst = tx->regs.r[param->idx];
1029 break;
1030 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1031 case D3DSPR_ADDR:
1032 assert(!param->rel);
1033 if (tx->version.major < 2 && !IS_VS) {
1034 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1035 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1036 dst = tx->regs.tS[param->idx];
1037 } else
1038 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1039 tx_texcoord_alloc(tx, param->idx);
1040 dst = ureg_dst(tx->regs.vT[param->idx]);
1041 } else {
1042 tx_addr_alloc(tx, param->idx);
1043 dst = tx->regs.a0;
1044 }
1045 break;
1046 case D3DSPR_RASTOUT:
1047 assert(!param->rel);
1048 switch (param->idx) {
1049 case 0:
1050 if (ureg_dst_is_undef(tx->regs.oPos))
1051 tx->regs.oPos =
1052 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1053 dst = tx->regs.oPos;
1054 break;
1055 case 1:
1056 if (ureg_dst_is_undef(tx->regs.oFog))
1057 tx->regs.oFog =
1058 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1059 dst = tx->regs.oFog;
1060 break;
1061 case 2:
1062 if (ureg_dst_is_undef(tx->regs.oPts))
1063 tx->regs.oPts =
1064 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1065 dst = tx->regs.oPts;
1066 break;
1067 default:
1068 assert(0);
1069 break;
1070 }
1071 break;
1072 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1073 case D3DSPR_OUTPUT:
1074 if (tx->version.major < 3) {
1075 assert(!param->rel);
1076 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1077 } else {
1078 assert(!param->rel); /* TODO */
1079 assert(param->idx < Elements(tx->regs.o));
1080 dst = tx->regs.o[param->idx];
1081 }
1082 break;
1083 case D3DSPR_ATTROUT: /* VS */
1084 case D3DSPR_COLOROUT: /* PS */
1085 assert(param->idx >= 0 && param->idx < 4);
1086 assert(!param->rel);
1087 tx->info->rt_mask |= 1 << param->idx;
1088 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1089 tx->regs.oCol[param->idx] =
1090 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1091 dst = tx->regs.oCol[param->idx];
1092 if (IS_VS && tx->version.major < 3)
1093 dst = ureg_saturate(dst);
1094 break;
1095 case D3DSPR_DEPTHOUT:
1096 assert(!param->rel);
1097 if (ureg_dst_is_undef(tx->regs.oDepth))
1098 tx->regs.oDepth =
1099 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1100 TGSI_WRITEMASK_Z);
1101 dst = tx->regs.oDepth; /* XXX: must write .z component */
1102 break;
1103 case D3DSPR_PREDICATE:
1104 assert(!param->rel);
1105 tx_pred_alloc(tx, param->idx);
1106 dst = tx->regs.p;
1107 break;
1108 case D3DSPR_TEMPFLOAT16:
1109 DBG("unhandled D3DSPR: %u\n", param->file);
1110 break;
1111 default:
1112 assert(!"invalid dst D3DSPR");
1113 break;
1114 }
1115 if (param->rel)
1116 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1117
1118 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1119 dst = ureg_writemask(dst, param->mask);
1120 if (param->mod & NINED3DSPDM_SATURATE)
1121 dst = ureg_saturate(dst);
1122
1123 return dst;
1124 }
1125
1126 static struct ureg_dst
1127 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1128 {
1129 if (param->shift) {
1130 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1131 return tx->regs.tdst;
1132 }
1133 return _tx_dst_param(tx, param);
1134 }
1135
1136 static void
1137 tx_apply_dst0_modifiers(struct shader_translator *tx)
1138 {
1139 struct ureg_dst rdst;
1140 float f;
1141
1142 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1143 return;
1144 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1145
1146 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1147
1148 if (tx->insn.dst[0].shift < 0)
1149 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1150 else
1151 f = 1 << tx->insn.dst[0].shift;
1152
1153 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1154 }
1155
1156 static struct ureg_src
1157 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1158 {
1159 struct ureg_src src;
1160
1161 assert(!param->shift);
1162 assert(!(param->mod & NINED3DSPDM_SATURATE));
1163
1164 switch (param->file) {
1165 case D3DSPR_INPUT:
1166 if (IS_VS) {
1167 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1168 } else {
1169 assert(!param->rel);
1170 assert(param->idx < Elements(tx->regs.v));
1171 src = tx->regs.v[param->idx];
1172 }
1173 break;
1174 default:
1175 src = ureg_src(tx_dst_param(tx, param));
1176 break;
1177 }
1178 if (param->rel)
1179 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1180
1181 if (!param->mask)
1182 WARN("mask is 0, using identity swizzle\n");
1183
1184 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1185 char s[4];
1186 int n;
1187 int c;
1188 for (n = 0, c = 0; c < 4; ++c)
1189 if (param->mask & (1 << c))
1190 s[n++] = c;
1191 assert(n);
1192 for (c = n; c < 4; ++c)
1193 s[c] = s[n - 1];
1194 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1195 }
1196 return src;
1197 }
1198
1199 static HRESULT
1200 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1201 {
1202 struct ureg_program *ureg = tx->ureg;
1203 struct ureg_dst dst;
1204 struct ureg_src src[2];
1205 struct sm1_src_param *src_mat = &tx->insn.src[1];
1206 unsigned i;
1207
1208 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1209 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1210
1211 for (i = 0; i < n; i++)
1212 {
1213 const unsigned m = (1 << i);
1214
1215 src[1] = tx_src_param(tx, src_mat);
1216 src_mat->idx++;
1217
1218 if (!(dst.WriteMask & m))
1219 continue;
1220
1221 /* XXX: src == dst case ? */
1222
1223 switch (k) {
1224 case 3:
1225 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1226 break;
1227 case 4:
1228 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1229 break;
1230 default:
1231 DBG("invalid operation: M%ux%u\n", m, n);
1232 break;
1233 }
1234 }
1235
1236 return D3D_OK;
1237 }
1238
1239 #define VNOTSUPPORTED 0, 0
1240 #define V(maj, min) (((maj) << 8) | (min))
1241
1242 static INLINE const char *
1243 d3dsio_to_string( unsigned opcode )
1244 {
1245 static const char *names[] = {
1246 "NOP",
1247 "MOV",
1248 "ADD",
1249 "SUB",
1250 "MAD",
1251 "MUL",
1252 "RCP",
1253 "RSQ",
1254 "DP3",
1255 "DP4",
1256 "MIN",
1257 "MAX",
1258 "SLT",
1259 "SGE",
1260 "EXP",
1261 "LOG",
1262 "LIT",
1263 "DST",
1264 "LRP",
1265 "FRC",
1266 "M4x4",
1267 "M4x3",
1268 "M3x4",
1269 "M3x3",
1270 "M3x2",
1271 "CALL",
1272 "CALLNZ",
1273 "LOOP",
1274 "RET",
1275 "ENDLOOP",
1276 "LABEL",
1277 "DCL",
1278 "POW",
1279 "CRS",
1280 "SGN",
1281 "ABS",
1282 "NRM",
1283 "SINCOS",
1284 "REP",
1285 "ENDREP",
1286 "IF",
1287 "IFC",
1288 "ELSE",
1289 "ENDIF",
1290 "BREAK",
1291 "BREAKC",
1292 "MOVA",
1293 "DEFB",
1294 "DEFI",
1295 NULL,
1296 NULL,
1297 NULL,
1298 NULL,
1299 NULL,
1300 NULL,
1301 NULL,
1302 NULL,
1303 NULL,
1304 NULL,
1305 NULL,
1306 NULL,
1307 NULL,
1308 NULL,
1309 NULL,
1310 "TEXCOORD",
1311 "TEXKILL",
1312 "TEX",
1313 "TEXBEM",
1314 "TEXBEML",
1315 "TEXREG2AR",
1316 "TEXREG2GB",
1317 "TEXM3x2PAD",
1318 "TEXM3x2TEX",
1319 "TEXM3x3PAD",
1320 "TEXM3x3TEX",
1321 NULL,
1322 "TEXM3x3SPEC",
1323 "TEXM3x3VSPEC",
1324 "EXPP",
1325 "LOGP",
1326 "CND",
1327 "DEF",
1328 "TEXREG2RGB",
1329 "TEXDP3TEX",
1330 "TEXM3x2DEPTH",
1331 "TEXDP3",
1332 "TEXM3x3",
1333 "TEXDEPTH",
1334 "CMP",
1335 "BEM",
1336 "DP2ADD",
1337 "DSX",
1338 "DSY",
1339 "TEXLDD",
1340 "SETP",
1341 "TEXLDL",
1342 "BREAKP"
1343 };
1344
1345 if (opcode < Elements(names)) return names[opcode];
1346
1347 switch (opcode) {
1348 case D3DSIO_PHASE: return "PHASE";
1349 case D3DSIO_COMMENT: return "COMMENT";
1350 case D3DSIO_END: return "END";
1351 default:
1352 return NULL;
1353 }
1354 }
1355
1356 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1357 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1358 (inst).vert_version.max | \
1359 (inst).frag_version.min | \
1360 (inst).frag_version.max)
1361
1362 #define SPECIAL(name) \
1363 NineTranslateInstruction_##name
1364
1365 #define DECL_SPECIAL(name) \
1366 static HRESULT \
1367 NineTranslateInstruction_##name( struct shader_translator *tx )
1368
1369 static HRESULT
1370 NineTranslateInstruction_Generic(struct shader_translator *);
1371
1372 DECL_SPECIAL(M4x4)
1373 {
1374 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1375 }
1376
1377 DECL_SPECIAL(M4x3)
1378 {
1379 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1380 }
1381
1382 DECL_SPECIAL(M3x4)
1383 {
1384 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1385 }
1386
1387 DECL_SPECIAL(M3x3)
1388 {
1389 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1390 }
1391
1392 DECL_SPECIAL(M3x2)
1393 {
1394 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1395 }
1396
1397 DECL_SPECIAL(CMP)
1398 {
1399 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1400 tx_src_param(tx, &tx->insn.src[0]),
1401 tx_src_param(tx, &tx->insn.src[2]),
1402 tx_src_param(tx, &tx->insn.src[1]));
1403 return D3D_OK;
1404 }
1405
1406 DECL_SPECIAL(CND)
1407 {
1408 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1409 struct ureg_dst cgt;
1410 struct ureg_src cnd;
1411
1412 /* the coissue flag was a tip for compilers to advise to
1413 * execute two operations at the same time, in cases
1414 * the two executions had same dst with different channels.
1415 * It has no effect on current hw. However it seems CND
1416 * is affected. The handling of this very specific case
1417 * handled below mimick wine behaviour */
1418 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1419 ureg_MOV(tx->ureg,
1420 dst, tx_src_param(tx, &tx->insn.src[1]));
1421 return D3D_OK;
1422 }
1423
1424 cnd = tx_src_param(tx, &tx->insn.src[0]);
1425 cgt = tx_scratch(tx);
1426
1427 if (tx->version.major == 1 && tx->version.minor < 4)
1428 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1429
1430 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1431
1432 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1433 tx_src_param(tx, &tx->insn.src[1]),
1434 tx_src_param(tx, &tx->insn.src[2]));
1435 return D3D_OK;
1436 }
1437
1438 DECL_SPECIAL(CALL)
1439 {
1440 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1441 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1442 return D3D_OK;
1443 }
1444
1445 DECL_SPECIAL(CALLNZ)
1446 {
1447 struct ureg_program *ureg = tx->ureg;
1448 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1449
1450 if (!tx->native_integers)
1451 ureg_IF(ureg, src, tx_cond(tx));
1452 else
1453 ureg_UIF(ureg, src, tx_cond(tx));
1454 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1455 tx_endcond(tx);
1456 ureg_ENDIF(ureg);
1457 return D3D_OK;
1458 }
1459
1460 DECL_SPECIAL(MOV_vs1x)
1461 {
1462 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1463 /* Implementation note: We don't write directly
1464 * to the addr register, but to an intermediate
1465 * float register.
1466 * Contrary to the doc, when writing to ADDR here,
1467 * the rounding is not to nearest, but to lowest
1468 * (wine test).
1469 * Since we use ARR next, substract 0.5. */
1470 ureg_SUB(tx->ureg,
1471 tx_dst_param(tx, &tx->insn.dst[0]),
1472 tx_src_param(tx, &tx->insn.src[0]),
1473 ureg_imm1f(tx->ureg, 0.5f));
1474 return D3D_OK;
1475 }
1476 return NineTranslateInstruction_Generic(tx);
1477 }
1478
1479 DECL_SPECIAL(LOOP)
1480 {
1481 struct ureg_program *ureg = tx->ureg;
1482 unsigned *label;
1483 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1484 struct ureg_dst ctr;
1485 struct ureg_dst tmp;
1486 struct ureg_src ctrx;
1487
1488 label = tx_bgnloop(tx);
1489 ctr = tx_get_loopctr(tx, TRUE);
1490 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1491
1492 /* src: num_iterations - start_value of al - step for al - 0 */
1493 ureg_MOV(ureg, ctr, src);
1494 ureg_BGNLOOP(tx->ureg, label);
1495 tmp = tx_scratch_scalar(tx);
1496 /* Initially ctr.x contains the number of iterations.
1497 * ctr.y will contain the updated value of al.
1498 * We decrease ctr.x at the end of every iteration,
1499 * and stop when it reaches 0. */
1500
1501 if (!tx->native_integers) {
1502 /* case src and ctr contain floats */
1503 /* to avoid precision issue, we stop when ctr <= 0.5 */
1504 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1505 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1506 } else {
1507 /* case src and ctr contain integers */
1508 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1509 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1510 }
1511 ureg_BRK(ureg);
1512 tx_endcond(tx);
1513 ureg_ENDIF(ureg);
1514 return D3D_OK;
1515 }
1516
1517 DECL_SPECIAL(RET)
1518 {
1519 ureg_RET(tx->ureg);
1520 return D3D_OK;
1521 }
1522
1523 DECL_SPECIAL(ENDLOOP)
1524 {
1525 struct ureg_program *ureg = tx->ureg;
1526 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1527 struct ureg_dst dst_ctrx, dst_al;
1528 struct ureg_src src_ctr, al_counter;
1529
1530 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1531 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1532 src_ctr = ureg_src(ctr);
1533 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1534
1535 /* ctr.x -= 1
1536 * ctr.y (aL) += step */
1537 if (!tx->native_integers) {
1538 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1539 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1540 } else {
1541 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1542 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1543 }
1544 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1545 return D3D_OK;
1546 }
1547
1548 DECL_SPECIAL(LABEL)
1549 {
1550 unsigned k = tx->num_inst_labels;
1551 unsigned n = tx->insn.src[0].idx;
1552 assert(n < 2048);
1553 if (n >= k)
1554 tx->inst_labels = REALLOC(tx->inst_labels,
1555 k * sizeof(tx->inst_labels[0]),
1556 n * sizeof(tx->inst_labels[0]));
1557
1558 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1559 return D3D_OK;
1560 }
1561
1562 DECL_SPECIAL(SINCOS)
1563 {
1564 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1565 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1566
1567 assert(!(dst.WriteMask & 0xc));
1568
1569 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1570 ureg_SCS(tx->ureg, dst, src);
1571 return D3D_OK;
1572 }
1573
1574 DECL_SPECIAL(SGN)
1575 {
1576 ureg_SSG(tx->ureg,
1577 tx_dst_param(tx, &tx->insn.dst[0]),
1578 tx_src_param(tx, &tx->insn.src[0]));
1579 return D3D_OK;
1580 }
1581
1582 DECL_SPECIAL(REP)
1583 {
1584 struct ureg_program *ureg = tx->ureg;
1585 unsigned *label;
1586 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1587 struct ureg_dst ctr;
1588 struct ureg_dst tmp;
1589 struct ureg_src ctrx;
1590
1591 label = tx_bgnloop(tx);
1592 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1593 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1594
1595 /* NOTE: rep must be constant, so we don't have to save the count */
1596 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1597
1598 /* rep: num_iterations - 0 - 0 - 0 */
1599 ureg_MOV(ureg, ctr, rep);
1600 ureg_BGNLOOP(ureg, label);
1601 tmp = tx_scratch_scalar(tx);
1602 /* Initially ctr.x contains the number of iterations.
1603 * We decrease ctr.x at the end of every iteration,
1604 * and stop when it reaches 0. */
1605
1606 if (!tx->native_integers) {
1607 /* case src and ctr contain floats */
1608 /* to avoid precision issue, we stop when ctr <= 0.5 */
1609 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1610 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1611 } else {
1612 /* case src and ctr contain integers */
1613 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1614 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1615 }
1616 ureg_BRK(ureg);
1617 tx_endcond(tx);
1618 ureg_ENDIF(ureg);
1619
1620 return D3D_OK;
1621 }
1622
1623 DECL_SPECIAL(ENDREP)
1624 {
1625 struct ureg_program *ureg = tx->ureg;
1626 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1627 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1628 struct ureg_src src_ctr = ureg_src(ctr);
1629
1630 /* ctr.x -= 1 */
1631 if (!tx->native_integers)
1632 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1633 else
1634 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1635
1636 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1637 return D3D_OK;
1638 }
1639
1640 DECL_SPECIAL(ENDIF)
1641 {
1642 tx_endcond(tx);
1643 ureg_ENDIF(tx->ureg);
1644 return D3D_OK;
1645 }
1646
1647 DECL_SPECIAL(IF)
1648 {
1649 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1650
1651 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1652 ureg_UIF(tx->ureg, src, tx_cond(tx));
1653 else
1654 ureg_IF(tx->ureg, src, tx_cond(tx));
1655
1656 return D3D_OK;
1657 }
1658
1659 static INLINE unsigned
1660 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1661 {
1662 switch (flags) {
1663 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1664 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1665 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1666 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1667 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1668 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1669 default:
1670 assert(!"invalid comparison flags");
1671 return TGSI_OPCODE_SGT;
1672 }
1673 }
1674
1675 DECL_SPECIAL(IFC)
1676 {
1677 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1678 struct ureg_src src[2];
1679 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1680 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1681 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1682 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1683 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1684 return D3D_OK;
1685 }
1686
1687 DECL_SPECIAL(ELSE)
1688 {
1689 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1690 return D3D_OK;
1691 }
1692
1693 DECL_SPECIAL(BREAKC)
1694 {
1695 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1696 struct ureg_src src[2];
1697 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1698 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1699 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1700 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1701 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1702 ureg_BRK(tx->ureg);
1703 tx_endcond(tx);
1704 ureg_ENDIF(tx->ureg);
1705 return D3D_OK;
1706 }
1707
1708 static const char *sm1_declusage_names[] =
1709 {
1710 [D3DDECLUSAGE_POSITION] = "POSITION",
1711 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1712 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1713 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1714 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1715 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1716 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1717 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1718 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1719 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1720 [D3DDECLUSAGE_COLOR] = "COLOR",
1721 [D3DDECLUSAGE_FOG] = "FOG",
1722 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1723 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1724 };
1725
1726 static INLINE unsigned
1727 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1728 {
1729 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1730 }
1731
1732 static void
1733 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1734 boolean tc,
1735 struct sm1_semantic *dcl)
1736 {
1737 BYTE index = dcl->usage_idx;
1738
1739 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1740 * we match to a TGSI_SEMANTIC_GENERIC with index.
1741 *
1742 * The index can be anything UINT16 and usage_idx is BYTE,
1743 * so we can fit everything. It doesn't matter if indices
1744 * are close together or low.
1745 *
1746 *
1747 * POSITION >= 1: 10 * index + 6
1748 * COLOR >= 2: 10 * (index-1) + 7
1749 * TEXCOORD[0..15]: index
1750 * BLENDWEIGHT: 10 * index + 18
1751 * BLENDINDICES: 10 * index + 19
1752 * NORMAL: 10 * index + 20
1753 * TANGENT: 10 * index + 21
1754 * BINORMAL: 10 * index + 22
1755 * TESSFACTOR: 10 * index + 23
1756 */
1757
1758 switch (dcl->usage) {
1759 case D3DDECLUSAGE_POSITION:
1760 case D3DDECLUSAGE_POSITIONT:
1761 case D3DDECLUSAGE_DEPTH:
1762 if (index == 0) {
1763 sem->Name = TGSI_SEMANTIC_POSITION;
1764 sem->Index = 0;
1765 } else {
1766 sem->Name = TGSI_SEMANTIC_GENERIC;
1767 sem->Index = 10 * index + 6;
1768 }
1769 break;
1770 case D3DDECLUSAGE_COLOR:
1771 if (index < 2) {
1772 sem->Name = TGSI_SEMANTIC_COLOR;
1773 sem->Index = index;
1774 } else {
1775 sem->Name = TGSI_SEMANTIC_GENERIC;
1776 sem->Index = 10 * (index-1) + 7;
1777 }
1778 break;
1779 case D3DDECLUSAGE_FOG:
1780 assert(index == 0);
1781 sem->Name = TGSI_SEMANTIC_FOG;
1782 sem->Index = 0;
1783 break;
1784 case D3DDECLUSAGE_PSIZE:
1785 assert(index == 0);
1786 sem->Name = TGSI_SEMANTIC_PSIZE;
1787 sem->Index = 0;
1788 break;
1789 case D3DDECLUSAGE_TEXCOORD:
1790 assert(index < 16);
1791 if (index < 8 && tc)
1792 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1793 else
1794 sem->Name = TGSI_SEMANTIC_GENERIC;
1795 sem->Index = index;
1796 break;
1797 case D3DDECLUSAGE_BLENDWEIGHT:
1798 sem->Name = TGSI_SEMANTIC_GENERIC;
1799 sem->Index = 10 * index + 18;
1800 break;
1801 case D3DDECLUSAGE_BLENDINDICES:
1802 sem->Name = TGSI_SEMANTIC_GENERIC;
1803 sem->Index = 10 * index + 19;
1804 break;
1805 case D3DDECLUSAGE_NORMAL:
1806 sem->Name = TGSI_SEMANTIC_GENERIC;
1807 sem->Index = 10 * index + 20;
1808 break;
1809 case D3DDECLUSAGE_TANGENT:
1810 sem->Name = TGSI_SEMANTIC_GENERIC;
1811 sem->Index = 10 * index + 21;
1812 break;
1813 case D3DDECLUSAGE_BINORMAL:
1814 sem->Name = TGSI_SEMANTIC_GENERIC;
1815 sem->Index = 10 * index + 22;
1816 break;
1817 case D3DDECLUSAGE_TESSFACTOR:
1818 sem->Name = TGSI_SEMANTIC_GENERIC;
1819 sem->Index = 10 * index + 23;
1820 break;
1821 case D3DDECLUSAGE_SAMPLE:
1822 sem->Name = TGSI_SEMANTIC_COUNT;
1823 sem->Index = 0;
1824 break;
1825 default:
1826 assert(!"Invalid DECLUSAGE.");
1827 break;
1828 }
1829 }
1830
1831 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1832 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1833 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1834 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1835 static INLINE unsigned
1836 d3dstt_to_tgsi_tex(BYTE sampler_type)
1837 {
1838 switch (sampler_type) {
1839 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1840 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1841 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1842 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1843 default:
1844 assert(0);
1845 return TGSI_TEXTURE_UNKNOWN;
1846 }
1847 }
1848 static INLINE unsigned
1849 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1850 {
1851 switch (sampler_type) {
1852 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1853 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1854 case NINED3DSTT_VOLUME:
1855 case NINED3DSTT_CUBE:
1856 default:
1857 assert(0);
1858 return TGSI_TEXTURE_UNKNOWN;
1859 }
1860 }
1861 static INLINE unsigned
1862 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1863 {
1864 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1865 case 1: return TGSI_TEXTURE_1D;
1866 case 0: return TGSI_TEXTURE_2D;
1867 case 3: return TGSI_TEXTURE_3D;
1868 default:
1869 return TGSI_TEXTURE_CUBE;
1870 }
1871 }
1872
1873 static const char *
1874 sm1_sampler_type_name(BYTE sampler_type)
1875 {
1876 switch (sampler_type) {
1877 case NINED3DSTT_1D: return "1D";
1878 case NINED3DSTT_2D: return "2D";
1879 case NINED3DSTT_VOLUME: return "VOLUME";
1880 case NINED3DSTT_CUBE: return "CUBE";
1881 default:
1882 return "(D3DSTT_?)";
1883 }
1884 }
1885
1886 static INLINE unsigned
1887 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1888 {
1889 switch (sem->Name) {
1890 case TGSI_SEMANTIC_POSITION:
1891 case TGSI_SEMANTIC_NORMAL:
1892 return TGSI_INTERPOLATE_LINEAR;
1893 case TGSI_SEMANTIC_BCOLOR:
1894 case TGSI_SEMANTIC_COLOR:
1895 case TGSI_SEMANTIC_FOG:
1896 case TGSI_SEMANTIC_GENERIC:
1897 case TGSI_SEMANTIC_TEXCOORD:
1898 case TGSI_SEMANTIC_CLIPDIST:
1899 case TGSI_SEMANTIC_CLIPVERTEX:
1900 return TGSI_INTERPOLATE_PERSPECTIVE;
1901 case TGSI_SEMANTIC_EDGEFLAG:
1902 case TGSI_SEMANTIC_FACE:
1903 case TGSI_SEMANTIC_INSTANCEID:
1904 case TGSI_SEMANTIC_PCOORD:
1905 case TGSI_SEMANTIC_PRIMID:
1906 case TGSI_SEMANTIC_PSIZE:
1907 case TGSI_SEMANTIC_VERTEXID:
1908 return TGSI_INTERPOLATE_CONSTANT;
1909 default:
1910 assert(0);
1911 return TGSI_INTERPOLATE_CONSTANT;
1912 }
1913 }
1914
1915 DECL_SPECIAL(DCL)
1916 {
1917 struct ureg_program *ureg = tx->ureg;
1918 boolean is_input;
1919 boolean is_sampler;
1920 struct tgsi_declaration_semantic tgsi;
1921 struct sm1_semantic sem;
1922 sm1_read_semantic(tx, &sem);
1923
1924 is_input = sem.reg.file == D3DSPR_INPUT;
1925 is_sampler =
1926 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1927
1928 DUMP("DCL ");
1929 sm1_dump_dst_param(&sem.reg);
1930 if (is_sampler)
1931 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1932 else
1933 if (tx->version.major >= 3)
1934 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1935 else
1936 if (sem.usage | sem.usage_idx)
1937 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1938 else
1939 DUMP("\n");
1940
1941 if (is_sampler) {
1942 const unsigned m = 1 << sem.reg.idx;
1943 ureg_DECL_sampler(ureg, sem.reg.idx);
1944 tx->info->sampler_mask |= m;
1945 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1946 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1947 d3dstt_to_tgsi_tex(sem.sampler_type);
1948 return D3D_OK;
1949 }
1950
1951 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1952 if (IS_VS) {
1953 if (is_input) {
1954 /* linkage outside of shader with vertex declaration */
1955 ureg_DECL_vs_input(ureg, sem.reg.idx);
1956 assert(sem.reg.idx < Elements(tx->info->input_map));
1957 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1958 tx->info->num_inputs = sem.reg.idx + 1;
1959 /* NOTE: preserving order in case of indirect access */
1960 } else
1961 if (tx->version.major >= 3) {
1962 /* SM2 output semantic determined by file */
1963 assert(sem.reg.mask != 0);
1964 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1965 tx->info->position_t = TRUE;
1966 assert(sem.reg.idx < Elements(tx->regs.o));
1967 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1968 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1969
1970 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1971 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1972 }
1973 } else {
1974 if (is_input && tx->version.major >= 3) {
1975 /* SM3 only, SM2 input semantic determined by file */
1976 assert(sem.reg.idx < Elements(tx->regs.v));
1977 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1978 ureg, tgsi.Name, tgsi.Index,
1979 nine_tgsi_to_interp_mode(&tgsi),
1980 0, /* cylwrap */
1981 sem.reg.mod & NINED3DSPDM_CENTROID);
1982 } else
1983 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1984 /* FragColor or FragDepth */
1985 assert(sem.reg.mask != 0);
1986 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1987 }
1988 }
1989 return D3D_OK;
1990 }
1991
1992 DECL_SPECIAL(DEF)
1993 {
1994 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1995 return D3D_OK;
1996 }
1997
1998 DECL_SPECIAL(DEFB)
1999 {
2000 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2001 return D3D_OK;
2002 }
2003
2004 DECL_SPECIAL(DEFI)
2005 {
2006 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2007 return D3D_OK;
2008 }
2009
2010 DECL_SPECIAL(POW)
2011 {
2012 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2013 struct ureg_src src[2] = {
2014 tx_src_param(tx, &tx->insn.src[0]),
2015 tx_src_param(tx, &tx->insn.src[1])
2016 };
2017 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2018 return D3D_OK;
2019 }
2020
2021 DECL_SPECIAL(RSQ)
2022 {
2023 struct ureg_program *ureg = tx->ureg;
2024 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2025 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2026 struct ureg_dst tmp = tx_scratch(tx);
2027 ureg_RSQ(ureg, tmp, ureg_abs(src));
2028 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2029 return D3D_OK;
2030 }
2031
2032 DECL_SPECIAL(LOG)
2033 {
2034 struct ureg_program *ureg = tx->ureg;
2035 struct ureg_dst tmp = tx_scratch_scalar(tx);
2036 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2037 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2038 ureg_LG2(ureg, tmp, ureg_abs(src));
2039 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2040 return D3D_OK;
2041 }
2042
2043 DECL_SPECIAL(NRM)
2044 {
2045 struct ureg_program *ureg = tx->ureg;
2046 struct ureg_dst tmp = tx_scratch_scalar(tx);
2047 struct ureg_src nrm = tx_src_scalar(tmp);
2048 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2049 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2050 ureg_DP3(ureg, tmp, src, src);
2051 ureg_RSQ(ureg, tmp, nrm);
2052 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2053 ureg_MUL(ureg, dst, src, nrm);
2054 return D3D_OK;
2055 }
2056
2057 DECL_SPECIAL(DP2ADD)
2058 {
2059 struct ureg_dst tmp = tx_scratch_scalar(tx);
2060 struct ureg_src dp2 = tx_src_scalar(tmp);
2061 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2062 struct ureg_src src[3];
2063 int i;
2064 for (i = 0; i < 3; ++i)
2065 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2066 assert_replicate_swizzle(&src[2]);
2067
2068 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2069 ureg_ADD(tx->ureg, dst, src[2], dp2);
2070
2071 return D3D_OK;
2072 }
2073
2074 DECL_SPECIAL(TEXCOORD)
2075 {
2076 struct ureg_program *ureg = tx->ureg;
2077 const unsigned s = tx->insn.dst[0].idx;
2078 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2079
2080 tx_texcoord_alloc(tx, s);
2081 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2082 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2083
2084 return D3D_OK;
2085 }
2086
2087 DECL_SPECIAL(TEXCOORD_ps14)
2088 {
2089 struct ureg_program *ureg = tx->ureg;
2090 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2091 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2092
2093 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2094
2095 ureg_MOV(ureg, dst, src);
2096
2097 return D3D_OK;
2098 }
2099
2100 DECL_SPECIAL(TEXKILL)
2101 {
2102 struct ureg_src reg;
2103
2104 if (tx->version.major > 1 || tx->version.minor > 3) {
2105 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2106 } else {
2107 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2108 reg = tx->regs.vT[tx->insn.dst[0].idx];
2109 }
2110 if (tx->version.major < 2)
2111 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2112 ureg_KILL_IF(tx->ureg, reg);
2113
2114 return D3D_OK;
2115 }
2116
2117 DECL_SPECIAL(TEXBEM)
2118 {
2119 STUB(D3DERR_INVALIDCALL);
2120 }
2121
2122 DECL_SPECIAL(TEXBEML)
2123 {
2124 STUB(D3DERR_INVALIDCALL);
2125 }
2126
2127 DECL_SPECIAL(TEXREG2AR)
2128 {
2129 struct ureg_program *ureg = tx->ureg;
2130 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2131 struct ureg_src sample;
2132 const int m = tx->insn.dst[0].idx;
2133 const int n = tx->insn.src[0].idx;
2134 assert(m >= 0 && m > n);
2135
2136 sample = ureg_DECL_sampler(ureg, m);
2137 tx->info->sampler_mask |= 1 << m;
2138 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2139
2140 return D3D_OK;
2141 }
2142
2143 DECL_SPECIAL(TEXREG2GB)
2144 {
2145 struct ureg_program *ureg = tx->ureg;
2146 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2147 struct ureg_src sample;
2148 const int m = tx->insn.dst[0].idx;
2149 const int n = tx->insn.src[0].idx;
2150 assert(m >= 0 && m > n);
2151
2152 sample = ureg_DECL_sampler(ureg, m);
2153 tx->info->sampler_mask |= 1 << m;
2154 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2155
2156 return D3D_OK;
2157 }
2158
2159 DECL_SPECIAL(TEXM3x2PAD)
2160 {
2161 return D3D_OK; /* this is just padding */
2162 }
2163
2164 DECL_SPECIAL(TEXM3x2TEX)
2165 {
2166 struct ureg_program *ureg = tx->ureg;
2167 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2168 struct ureg_src sample;
2169 const int m = tx->insn.dst[0].idx - 1;
2170 const int n = tx->insn.src[0].idx;
2171 assert(m >= 0 && m > n);
2172
2173 tx_texcoord_alloc(tx, m);
2174 tx_texcoord_alloc(tx, m+1);
2175
2176 /* performs the matrix multiplication */
2177 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2178 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2179
2180 sample = ureg_DECL_sampler(ureg, m + 1);
2181 tx->info->sampler_mask |= 1 << (m + 1);
2182 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2183
2184 return D3D_OK;
2185 }
2186
2187 DECL_SPECIAL(TEXM3x3PAD)
2188 {
2189 return D3D_OK; /* this is just padding */
2190 }
2191
2192 DECL_SPECIAL(TEXM3x3SPEC)
2193 {
2194 struct ureg_program *ureg = tx->ureg;
2195 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2196 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2197 struct ureg_src sample;
2198 struct ureg_dst tmp;
2199 const int m = tx->insn.dst[0].idx - 2;
2200 const int n = tx->insn.src[0].idx;
2201 assert(m >= 0 && m > n);
2202
2203 tx_texcoord_alloc(tx, m);
2204 tx_texcoord_alloc(tx, m+1);
2205 tx_texcoord_alloc(tx, m+2);
2206
2207 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2208 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2209 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2210
2211 sample = ureg_DECL_sampler(ureg, m + 2);
2212 tx->info->sampler_mask |= 1 << (m + 2);
2213 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2214
2215 /* At this step, dst = N = (u', w', z').
2216 * We want dst to be the texture sampled at (u'', w'', z''), with
2217 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2218 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2219 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2220 /* at this step tmp.x = 1/N.N */
2221 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2222 /* at this step tmp.y = N.E */
2223 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2224 /* at this step tmp.x = N.E/N.N */
2225 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2226 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2227 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2228 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2229 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2230
2231 return D3D_OK;
2232 }
2233
2234 DECL_SPECIAL(TEXREG2RGB)
2235 {
2236 struct ureg_program *ureg = tx->ureg;
2237 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2238 struct ureg_src sample;
2239 const int m = tx->insn.dst[0].idx;
2240 const int n = tx->insn.src[0].idx;
2241 assert(m >= 0 && m > n);
2242
2243 sample = ureg_DECL_sampler(ureg, m);
2244 tx->info->sampler_mask |= 1 << m;
2245 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2246
2247 return D3D_OK;
2248 }
2249
2250 DECL_SPECIAL(TEXDP3TEX)
2251 {
2252 struct ureg_program *ureg = tx->ureg;
2253 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2254 struct ureg_dst tmp;
2255 struct ureg_src sample;
2256 const int m = tx->insn.dst[0].idx;
2257 const int n = tx->insn.src[0].idx;
2258 assert(m >= 0 && m > n);
2259
2260 tx_texcoord_alloc(tx, m);
2261
2262 tmp = tx_scratch(tx);
2263 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2264 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2265
2266 sample = ureg_DECL_sampler(ureg, m);
2267 tx->info->sampler_mask |= 1 << m;
2268 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2269
2270 return D3D_OK;
2271 }
2272
2273 DECL_SPECIAL(TEXM3x2DEPTH)
2274 {
2275 struct ureg_program *ureg = tx->ureg;
2276 struct ureg_dst tmp;
2277 const int m = tx->insn.dst[0].idx - 1;
2278 const int n = tx->insn.src[0].idx;
2279 assert(m >= 0 && m > n);
2280
2281 tx_texcoord_alloc(tx, m);
2282 tx_texcoord_alloc(tx, m+1);
2283
2284 tmp = tx_scratch(tx);
2285
2286 /* performs the matrix multiplication */
2287 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2288 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2289
2290 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2291 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2292 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2293 /* res = 'w' == 0 ? 1.0 : z/w */
2294 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2295 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2296 /* replace the depth for depth testing with the result */
2297 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
2298 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2299 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2300 return D3D_OK;
2301 }
2302
2303 DECL_SPECIAL(TEXDP3)
2304 {
2305 struct ureg_program *ureg = tx->ureg;
2306 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2307 const int m = tx->insn.dst[0].idx;
2308 const int n = tx->insn.src[0].idx;
2309 assert(m >= 0 && m > n);
2310
2311 tx_texcoord_alloc(tx, m);
2312
2313 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2314
2315 return D3D_OK;
2316 }
2317
2318 DECL_SPECIAL(TEXM3x3)
2319 {
2320 struct ureg_program *ureg = tx->ureg;
2321 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2322 struct ureg_src sample;
2323 struct ureg_dst E, tmp;
2324 const int m = tx->insn.dst[0].idx - 2;
2325 const int n = tx->insn.src[0].idx;
2326 assert(m >= 0 && m > n);
2327
2328 tx_texcoord_alloc(tx, m);
2329 tx_texcoord_alloc(tx, m+1);
2330 tx_texcoord_alloc(tx, m+2);
2331
2332 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2333 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2334 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2335
2336 switch (tx->insn.opcode) {
2337 case D3DSIO_TEXM3x3:
2338 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2339 break;
2340 case D3DSIO_TEXM3x3TEX:
2341 sample = ureg_DECL_sampler(ureg, m + 2);
2342 tx->info->sampler_mask |= 1 << (m + 2);
2343 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2344 break;
2345 case D3DSIO_TEXM3x3VSPEC:
2346 sample = ureg_DECL_sampler(ureg, m + 2);
2347 tx->info->sampler_mask |= 1 << (m + 2);
2348 E = tx_scratch(tx);
2349 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2350 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2351 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2352 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2353 /* At this step, dst = N = (u', w', z').
2354 * We want dst to be the texture sampled at (u'', w'', z''), with
2355 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2356 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2357 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2358 /* at this step tmp.x = 1/N.N */
2359 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2360 /* at this step tmp.y = N.E */
2361 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2362 /* at this step tmp.x = N.E/N.N */
2363 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2364 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2365 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2366 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2367 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2368 break;
2369 default:
2370 return D3DERR_INVALIDCALL;
2371 }
2372 return D3D_OK;
2373 }
2374
2375 DECL_SPECIAL(TEXDEPTH)
2376 {
2377 struct ureg_program *ureg = tx->ureg;
2378 struct ureg_dst r5;
2379 struct ureg_src r5r, r5g;
2380
2381 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2382
2383 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2384 * r5 won't be used afterward, thus we can use r5.ba */
2385 r5 = tx->regs.r[5];
2386 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2387 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2388
2389 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2390 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2391 /* r5.r = r/g */
2392 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2393 r5r, ureg_imm1f(ureg, 1.0f));
2394 /* replace the depth for depth testing with the result */
2395 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
2396 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2397
2398 return D3D_OK;
2399 }
2400
2401 DECL_SPECIAL(BEM)
2402 {
2403 STUB(D3DERR_INVALIDCALL);
2404 }
2405
2406 DECL_SPECIAL(TEXLD)
2407 {
2408 struct ureg_program *ureg = tx->ureg;
2409 unsigned target;
2410 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2411 struct ureg_src src[2] = {
2412 tx_src_param(tx, &tx->insn.src[0]),
2413 tx_src_param(tx, &tx->insn.src[1])
2414 };
2415 assert(tx->insn.src[1].idx >= 0 &&
2416 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2417 target = tx->sampler_targets[tx->insn.src[1].idx];
2418
2419 switch (tx->insn.flags) {
2420 case 0:
2421 ureg_TEX(ureg, dst, target, src[0], src[1]);
2422 break;
2423 case NINED3DSI_TEXLD_PROJECT:
2424 ureg_TXP(ureg, dst, target, src[0], src[1]);
2425 break;
2426 case NINED3DSI_TEXLD_BIAS:
2427 ureg_TXB(ureg, dst, target, src[0], src[1]);
2428 break;
2429 default:
2430 assert(0);
2431 return D3DERR_INVALIDCALL;
2432 }
2433 return D3D_OK;
2434 }
2435
2436 DECL_SPECIAL(TEXLD_14)
2437 {
2438 struct ureg_program *ureg = tx->ureg;
2439 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2440 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2441 const unsigned s = tx->insn.dst[0].idx;
2442 const unsigned t = ps1x_sampler_type(tx->info, s);
2443
2444 tx->info->sampler_mask |= 1 << s;
2445 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2446
2447 return D3D_OK;
2448 }
2449
2450 DECL_SPECIAL(TEX)
2451 {
2452 struct ureg_program *ureg = tx->ureg;
2453 const unsigned s = tx->insn.dst[0].idx;
2454 const unsigned t = ps1x_sampler_type(tx->info, s);
2455 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2456 struct ureg_src src[2];
2457
2458 tx_texcoord_alloc(tx, s);
2459
2460 src[0] = tx->regs.vT[s];
2461 src[1] = ureg_DECL_sampler(ureg, s);
2462 tx->info->sampler_mask |= 1 << s;
2463
2464 ureg_TEX(ureg, dst, t, src[0], src[1]);
2465
2466 return D3D_OK;
2467 }
2468
2469 DECL_SPECIAL(TEXLDD)
2470 {
2471 unsigned target;
2472 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2473 struct ureg_src src[4] = {
2474 tx_src_param(tx, &tx->insn.src[0]),
2475 tx_src_param(tx, &tx->insn.src[1]),
2476 tx_src_param(tx, &tx->insn.src[2]),
2477 tx_src_param(tx, &tx->insn.src[3])
2478 };
2479 assert(tx->insn.src[3].idx >= 0 &&
2480 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2481 target = tx->sampler_targets[tx->insn.src[1].idx];
2482
2483 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2484 return D3D_OK;
2485 }
2486
2487 DECL_SPECIAL(TEXLDL)
2488 {
2489 unsigned target;
2490 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2491 struct ureg_src src[2] = {
2492 tx_src_param(tx, &tx->insn.src[0]),
2493 tx_src_param(tx, &tx->insn.src[1])
2494 };
2495 assert(tx->insn.src[3].idx >= 0 &&
2496 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2497 target = tx->sampler_targets[tx->insn.src[1].idx];
2498
2499 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2500 return D3D_OK;
2501 }
2502
2503 DECL_SPECIAL(SETP)
2504 {
2505 STUB(D3DERR_INVALIDCALL);
2506 }
2507
2508 DECL_SPECIAL(BREAKP)
2509 {
2510 STUB(D3DERR_INVALIDCALL);
2511 }
2512
2513 DECL_SPECIAL(PHASE)
2514 {
2515 return D3D_OK; /* we don't care about phase */
2516 }
2517
2518 DECL_SPECIAL(COMMENT)
2519 {
2520 return D3D_OK; /* nothing to do */
2521 }
2522
2523
2524 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2525 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2526
2527 struct sm1_op_info inst_table[] =
2528 {
2529 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2530 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2531 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2532 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2533 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2534 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2535 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2536 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2537 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2538 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2539 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2540 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2541 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2542 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2543 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2544 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2545 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2546 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2547 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2548 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2549 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2550
2551 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2552 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2553 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2554 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2555 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2556
2557 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2558 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2559 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2560 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2561 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2562 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2563
2564 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2565
2566 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2567 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2568 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2569 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2570 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2571
2572 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2573 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2574
2575 /* More flow control */
2576 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2577 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2578 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2579 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2580 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2581 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2582 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2583 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2584 /* we don't write to the address register, but a normal register (copied
2585 * when needed to the address register), thus we don't use ARR */
2586 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2587
2588 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2589 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2590
2591 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2592 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2593 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2594 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2595 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2596 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2597 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2598 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
2599 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2600 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2601 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2602 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2603 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2604 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2605 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2606 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2607
2608 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2609 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2610 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2611 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2612
2613 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2614
2615 /* More tex stuff */
2616 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2617 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2618 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2619 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2620 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2621 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2622
2623 /* Misc */
2624 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2625 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2626 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2627 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2628 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2629 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2630 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2631 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2632 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2633 };
2634
2635 struct sm1_op_info inst_phase =
2636 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2637
2638 struct sm1_op_info inst_comment =
2639 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2640
2641 static void
2642 create_op_info_map(struct shader_translator *tx)
2643 {
2644 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2645 unsigned i;
2646
2647 for (i = 0; i < Elements(tx->op_info_map); ++i)
2648 tx->op_info_map[i] = -1;
2649
2650 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2651 for (i = 0; i < Elements(inst_table); ++i) {
2652 assert(inst_table[i].sio < Elements(tx->op_info_map));
2653 if (inst_table[i].vert_version.min <= version &&
2654 inst_table[i].vert_version.max >= version)
2655 tx->op_info_map[inst_table[i].sio] = i;
2656 }
2657 } else {
2658 for (i = 0; i < Elements(inst_table); ++i) {
2659 assert(inst_table[i].sio < Elements(tx->op_info_map));
2660 if (inst_table[i].frag_version.min <= version &&
2661 inst_table[i].frag_version.max >= version)
2662 tx->op_info_map[inst_table[i].sio] = i;
2663 }
2664 }
2665 }
2666
2667 static INLINE HRESULT
2668 NineTranslateInstruction_Generic(struct shader_translator *tx)
2669 {
2670 struct ureg_dst dst[1];
2671 struct ureg_src src[4];
2672 unsigned i;
2673
2674 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2675 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2676 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2677 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2678
2679 ureg_insn(tx->ureg, tx->insn.info->opcode,
2680 dst, tx->insn.ndst,
2681 src, tx->insn.nsrc);
2682 return D3D_OK;
2683 }
2684
2685 static INLINE DWORD
2686 TOKEN_PEEK(struct shader_translator *tx)
2687 {
2688 return *(tx->parse);
2689 }
2690
2691 static INLINE DWORD
2692 TOKEN_NEXT(struct shader_translator *tx)
2693 {
2694 return *(tx->parse)++;
2695 }
2696
2697 static INLINE void
2698 TOKEN_JUMP(struct shader_translator *tx)
2699 {
2700 if (tx->parse_next && tx->parse != tx->parse_next) {
2701 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2702 tx->parse = tx->parse_next;
2703 }
2704 }
2705
2706 static INLINE boolean
2707 sm1_parse_eof(struct shader_translator *tx)
2708 {
2709 return TOKEN_PEEK(tx) == NINED3DSP_END;
2710 }
2711
2712 static void
2713 sm1_read_version(struct shader_translator *tx)
2714 {
2715 const DWORD tok = TOKEN_NEXT(tx);
2716
2717 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2718 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2719
2720 switch (tok >> 16) {
2721 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2722 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2723 default:
2724 DBG("Invalid shader type: %x\n", tok);
2725 tx->processor = ~0;
2726 break;
2727 }
2728 }
2729
2730 /* This is just to check if we parsed the instruction properly. */
2731 static void
2732 sm1_parse_get_skip(struct shader_translator *tx)
2733 {
2734 const DWORD tok = TOKEN_PEEK(tx);
2735
2736 if (tx->version.major >= 2) {
2737 tx->parse_next = tx->parse + 1 /* this */ +
2738 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2739 } else {
2740 tx->parse_next = NULL; /* TODO: determine from param count */
2741 }
2742 }
2743
2744 static void
2745 sm1_print_comment(const char *comment, UINT size)
2746 {
2747 if (!size)
2748 return;
2749 /* TODO */
2750 }
2751
2752 static void
2753 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2754 {
2755 DWORD tok = TOKEN_PEEK(tx);
2756
2757 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2758 {
2759 const char *comment = "";
2760 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2761 tx->parse += size + 1;
2762
2763 if (print)
2764 sm1_print_comment(comment, size);
2765
2766 tok = TOKEN_PEEK(tx);
2767 }
2768 }
2769
2770 static void
2771 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2772 {
2773 *reg = TOKEN_NEXT(tx);
2774
2775 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2776 {
2777 if (tx->version.major < 2)
2778 *rel = (1 << 31) |
2779 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2780 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2781 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2782 else
2783 *rel = TOKEN_NEXT(tx);
2784 }
2785 }
2786
2787 static void
2788 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2789 {
2790 uint8_t shift;
2791 dst->file =
2792 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2793 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2794 dst->type = TGSI_RETURN_TYPE_FLOAT;
2795 dst->idx = tok & D3DSP_REGNUM_MASK;
2796 dst->rel = NULL;
2797 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2798 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2799 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2800 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2801 }
2802
2803 static void
2804 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2805 {
2806 src->file =
2807 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2808 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2809 src->type = TGSI_RETURN_TYPE_FLOAT;
2810 src->idx = tok & D3DSP_REGNUM_MASK;
2811 src->rel = NULL;
2812 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2813 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2814
2815 switch (src->file) {
2816 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2817 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2818 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2819 default:
2820 break;
2821 }
2822 }
2823
2824 static void
2825 sm1_parse_immediate(struct shader_translator *tx,
2826 struct sm1_src_param *imm)
2827 {
2828 imm->file = NINED3DSPR_IMMEDIATE;
2829 imm->idx = INT_MIN;
2830 imm->rel = NULL;
2831 imm->swizzle = NINED3DSP_NOSWIZZLE;
2832 imm->mod = 0;
2833 switch (tx->insn.opcode) {
2834 case D3DSIO_DEF:
2835 imm->type = NINED3DSPTYPE_FLOAT4;
2836 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2837 tx->parse += 4;
2838 break;
2839 case D3DSIO_DEFI:
2840 imm->type = NINED3DSPTYPE_INT4;
2841 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2842 tx->parse += 4;
2843 break;
2844 case D3DSIO_DEFB:
2845 imm->type = NINED3DSPTYPE_BOOL;
2846 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2847 tx->parse += 1;
2848 break;
2849 default:
2850 assert(0);
2851 break;
2852 }
2853 }
2854
2855 static void
2856 sm1_read_dst_param(struct shader_translator *tx,
2857 struct sm1_dst_param *dst,
2858 struct sm1_src_param *rel)
2859 {
2860 DWORD tok_dst, tok_rel = 0;
2861
2862 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2863 sm1_parse_dst_param(dst, tok_dst);
2864 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2865 sm1_parse_src_param(rel, tok_rel);
2866 dst->rel = rel;
2867 }
2868 }
2869
2870 static void
2871 sm1_read_src_param(struct shader_translator *tx,
2872 struct sm1_src_param *src,
2873 struct sm1_src_param *rel)
2874 {
2875 DWORD tok_src, tok_rel = 0;
2876
2877 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2878 sm1_parse_src_param(src, tok_src);
2879 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2880 assert(rel);
2881 sm1_parse_src_param(rel, tok_rel);
2882 src->rel = rel;
2883 }
2884 }
2885
2886 static void
2887 sm1_read_semantic(struct shader_translator *tx,
2888 struct sm1_semantic *sem)
2889 {
2890 const DWORD tok_usg = TOKEN_NEXT(tx);
2891 const DWORD tok_dst = TOKEN_NEXT(tx);
2892
2893 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2894 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2895 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2896
2897 sm1_parse_dst_param(&sem->reg, tok_dst);
2898 }
2899
2900 static void
2901 sm1_parse_instruction(struct shader_translator *tx)
2902 {
2903 struct sm1_instruction *insn = &tx->insn;
2904 DWORD tok;
2905 struct sm1_op_info *info = NULL;
2906 unsigned i;
2907
2908 sm1_parse_comments(tx, TRUE);
2909 sm1_parse_get_skip(tx);
2910
2911 tok = TOKEN_NEXT(tx);
2912
2913 insn->opcode = tok & D3DSI_OPCODE_MASK;
2914 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2915 insn->coissue = !!(tok & D3DSI_COISSUE);
2916 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2917
2918 if (insn->opcode < Elements(tx->op_info_map)) {
2919 int k = tx->op_info_map[insn->opcode];
2920 if (k >= 0) {
2921 assert(k < Elements(inst_table));
2922 info = &inst_table[k];
2923 }
2924 } else {
2925 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2926 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2927 }
2928 if (!info) {
2929 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2930 TOKEN_JUMP(tx);
2931 return;
2932 }
2933 insn->info = info;
2934 insn->ndst = info->ndst;
2935 insn->nsrc = info->nsrc;
2936
2937 assert(!insn->predicated && "TODO: predicated instructions");
2938
2939 /* check version */
2940 {
2941 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2942 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2943 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2944 if (ver < min || ver > max) {
2945 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2946 min, ver, max);
2947 return;
2948 }
2949 }
2950
2951 for (i = 0; i < insn->ndst; ++i)
2952 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2953 if (insn->predicated)
2954 sm1_read_src_param(tx, &insn->pred, NULL);
2955 for (i = 0; i < insn->nsrc; ++i)
2956 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2957
2958 /* parse here so we can dump them before processing */
2959 if (insn->opcode == D3DSIO_DEF ||
2960 insn->opcode == D3DSIO_DEFI ||
2961 insn->opcode == D3DSIO_DEFB)
2962 sm1_parse_immediate(tx, &tx->insn.src[0]);
2963
2964 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2965 sm1_instruction_check(insn);
2966
2967 if (info->handler)
2968 info->handler(tx);
2969 else
2970 NineTranslateInstruction_Generic(tx);
2971 tx_apply_dst0_modifiers(tx);
2972
2973 tx->num_scratch = 0; /* reset */
2974
2975 TOKEN_JUMP(tx);
2976 }
2977
2978 static void
2979 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2980 {
2981 unsigned i;
2982
2983 tx->info = info;
2984
2985 tx->byte_code = info->byte_code;
2986 tx->parse = info->byte_code;
2987
2988 for (i = 0; i < Elements(info->input_map); ++i)
2989 info->input_map[i] = NINE_DECLUSAGE_NONE;
2990 info->num_inputs = 0;
2991
2992 info->position_t = FALSE;
2993 info->point_size = FALSE;
2994
2995 tx->info->const_float_slots = 0;
2996 tx->info->const_int_slots = 0;
2997 tx->info->const_bool_slots = 0;
2998
2999 info->sampler_mask = 0x0;
3000 info->rt_mask = 0x0;
3001
3002 info->lconstf.data = NULL;
3003 info->lconstf.ranges = NULL;
3004
3005 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3006 tx->regs.rL[i] = ureg_dst_undef();
3007 }
3008 tx->regs.address = ureg_dst_undef();
3009 tx->regs.a0 = ureg_dst_undef();
3010 tx->regs.p = ureg_dst_undef();
3011 tx->regs.oDepth = ureg_dst_undef();
3012 tx->regs.vPos = ureg_src_undef();
3013 tx->regs.vFace = ureg_src_undef();
3014 for (i = 0; i < Elements(tx->regs.o); ++i)
3015 tx->regs.o[i] = ureg_dst_undef();
3016 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3017 tx->regs.oCol[i] = ureg_dst_undef();
3018 for (i = 0; i < Elements(tx->regs.vC); ++i)
3019 tx->regs.vC[i] = ureg_src_undef();
3020 for (i = 0; i < Elements(tx->regs.vT); ++i)
3021 tx->regs.vT[i] = ureg_src_undef();
3022
3023 for (i = 0; i < Elements(tx->lconsti); ++i)
3024 tx->lconsti[i].idx = -1;
3025 for (i = 0; i < Elements(tx->lconstb); ++i)
3026 tx->lconstb[i].idx = -1;
3027
3028 sm1_read_version(tx);
3029
3030 info->version = (tx->version.major << 4) | tx->version.minor;
3031
3032 create_op_info_map(tx);
3033 }
3034
3035 static void
3036 tx_dtor(struct shader_translator *tx)
3037 {
3038 if (tx->num_inst_labels)
3039 FREE(tx->inst_labels);
3040 FREE(tx->lconstf);
3041 FREE(tx->regs.r);
3042 FREE(tx);
3043 }
3044
3045 static INLINE unsigned
3046 tgsi_processor_from_type(unsigned shader_type)
3047 {
3048 switch (shader_type) {
3049 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3050 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3051 default:
3052 return ~0;
3053 }
3054 }
3055
3056 #define GET_CAP(n) device->screen->get_param( \
3057 device->screen, PIPE_CAP_##n)
3058 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3059 device->screen, info->type, PIPE_SHADER_CAP_##n)
3060
3061 HRESULT
3062 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3063 {
3064 struct shader_translator *tx;
3065 HRESULT hr = D3D_OK;
3066 const unsigned processor = tgsi_processor_from_type(info->type);
3067 unsigned s, slot_max;
3068
3069 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3070
3071 tx = CALLOC_STRUCT(shader_translator);
3072 if (!tx)
3073 return E_OUTOFMEMORY;
3074 tx_ctor(tx, info);
3075
3076 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3077 hr = D3DERR_INVALIDCALL;
3078 DBG("Unsupported shader version: %u.%u !\n",
3079 tx->version.major, tx->version.minor);
3080 goto out;
3081 }
3082 if (tx->processor != processor) {
3083 hr = D3DERR_INVALIDCALL;
3084 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3085 goto out;
3086 }
3087 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3088 tx->version.major, tx->version.minor);
3089
3090 tx->ureg = ureg_create(processor);
3091 if (!tx->ureg) {
3092 hr = E_OUTOFMEMORY;
3093 goto out;
3094 }
3095
3096 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3097 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3098 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3099 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3100 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3101 tx->texcoord_sn = tx->want_texcoord ?
3102 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3103
3104 /* VS must always write position. Declare it here to make it the 1st output.
3105 * (Some drivers like nv50 are buggy and rely on that.)
3106 */
3107 if (IS_VS) {
3108 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3109 } else {
3110 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3111 if (!tx->shift_wpos)
3112 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3113 }
3114
3115 while (!sm1_parse_eof(tx) && !tx->failure)
3116 sm1_parse_instruction(tx);
3117 tx->parse++; /* for byte_size */
3118
3119 if (tx->failure) {
3120 ERR("Encountered buggy shader\n");
3121 ureg_destroy(tx->ureg);
3122 hr = D3DERR_INVALIDCALL;
3123 goto out;
3124 }
3125
3126 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
3127 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
3128 ureg_src(tx->regs.r[0]));
3129 info->rt_mask |= 0x1;
3130 }
3131
3132 if (info->position_t)
3133 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3134
3135 ureg_END(tx->ureg);
3136
3137 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3138 info->point_size = TRUE;
3139
3140 /* record local constants */
3141 if (tx->num_lconstf && tx->indirect_const_access) {
3142 struct nine_range *ranges;
3143 float *data;
3144 int *indices;
3145 unsigned i, k, n;
3146
3147 hr = E_OUTOFMEMORY;
3148
3149 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3150 if (!data)
3151 goto out;
3152 info->lconstf.data = data;
3153
3154 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3155 if (!indices)
3156 goto out;
3157
3158 /* lazy sort, num_lconstf should be small */
3159 for (n = 0; n < tx->num_lconstf; ++n) {
3160 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3161 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3162 k = i;
3163 }
3164 indices[n] = tx->lconstf[k].idx;
3165 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3166 tx->lconstf[k].idx = INT_MAX;
3167 }
3168
3169 /* count ranges */
3170 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3171 if (indices[i] != indices[i - 1] + 1)
3172 ++n;
3173 ranges = MALLOC(n * sizeof(ranges[0]));
3174 if (!ranges) {
3175 FREE(indices);
3176 goto out;
3177 }
3178 info->lconstf.ranges = ranges;
3179
3180 k = 0;
3181 ranges[k].bgn = indices[0];
3182 for (i = 1; i < tx->num_lconstf; ++i) {
3183 if (indices[i] != indices[i - 1] + 1) {
3184 ranges[k].next = &ranges[k + 1];
3185 ranges[k].end = indices[i - 1] + 1;
3186 ++k;
3187 ranges[k].bgn = indices[i];
3188 }
3189 }
3190 ranges[k].end = indices[i - 1] + 1;
3191 ranges[k].next = NULL;
3192 assert(n == (k + 1));
3193
3194 FREE(indices);
3195 hr = D3D_OK;
3196 }
3197
3198 /* r500 */
3199 if (info->const_float_slots > device->max_vs_const_f &&
3200 (info->const_int_slots || info->const_bool_slots))
3201 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3202
3203
3204 if (tx->indirect_const_access) /* vs only */
3205 info->const_float_slots = device->max_vs_const_f;
3206
3207 slot_max = info->const_bool_slots > 0 ?
3208 device->max_vs_const_f + NINE_MAX_CONST_I
3209 + info->const_bool_slots :
3210 info->const_int_slots > 0 ?
3211 device->max_vs_const_f + info->const_int_slots :
3212 info->const_float_slots;
3213 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3214
3215 for (s = 0; s < slot_max; s++)
3216 ureg_DECL_constant(tx->ureg, s);
3217
3218 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3219 unsigned count;
3220 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3221 tgsi_dump(toks, 0);
3222 ureg_free_tokens(toks);
3223 }
3224
3225 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3226 if (!info->cso) {
3227 hr = D3DERR_DRIVERINTERNALERROR;
3228 FREE(info->lconstf.data);
3229 FREE(info->lconstf.ranges);
3230 goto out;
3231 }
3232
3233 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3234 out:
3235 tx_dtor(tx);
3236 return hr;
3237 }