79b0804c754c65b90d8f16082f3ade22fb7670d3
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
39
40
41 struct shader_translator;
42
43 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
44
45 static INLINE const char *d3dsio_to_string(unsigned opcode);
46
47
48 #define NINED3D_SM1_VS 0xfffe
49 #define NINED3D_SM1_PS 0xffff
50
51 #define NINE_MAX_COND_DEPTH 64
52 #define NINE_MAX_LOOP_DEPTH 64
53
54 #define NINED3DSP_END 0x0000ffff
55
56 #define NINED3DSPTYPE_FLOAT4 0
57 #define NINED3DSPTYPE_INT4 1
58 #define NINED3DSPTYPE_BOOL 2
59
60 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
61
62 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
63 #define NINED3DSP_WRITEMASK_SHIFT 16
64
65 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
66
67 #define NINED3DSHADER_REL_OP_GT 1
68 #define NINED3DSHADER_REL_OP_EQ 2
69 #define NINED3DSHADER_REL_OP_GE 3
70 #define NINED3DSHADER_REL_OP_LT 4
71 #define NINED3DSHADER_REL_OP_NE 5
72 #define NINED3DSHADER_REL_OP_LE 6
73
74 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
75 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
76
77 #define NINED3DSI_TEXLD_PROJECT 0x1
78 #define NINED3DSI_TEXLD_BIAS 0x2
79
80 #define NINED3DSP_WRITEMASK_0 0x1
81 #define NINED3DSP_WRITEMASK_1 0x2
82 #define NINED3DSP_WRITEMASK_2 0x4
83 #define NINED3DSP_WRITEMASK_3 0x8
84 #define NINED3DSP_WRITEMASK_ALL 0xf
85
86 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
87
88 #define NINE_SWIZZLE4(x,y,z,w) \
89 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
90
91 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
92 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
94
95 /*
96 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
97 * BIAS <= PS 1.4 (x-0.5)
98 * BIASNEG <= PS 1.4 (-(x-0.5))
99 * SIGN <= PS 1.4 (2(x-0.5))
100 * SIGNNEG <= PS 1.4 (-2(x-0.5))
101 * COMP <= PS 1.4 (1-x)
102 * X2 = PS 1.4 (2x)
103 * X2NEG = PS 1.4 (-2x)
104 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
105 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
106 * ABS >= SM 3.0 (abs(x))
107 * ABSNEG >= SM 3.0 (-abs(x))
108 * NOT >= SM 2.0 pedication only
109 */
110 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
124
125 static const char *sm1_mod_str[] =
126 {
127 [NINED3DSPSM_NONE] = "",
128 [NINED3DSPSM_NEG] = "-",
129 [NINED3DSPSM_BIAS] = "bias",
130 [NINED3DSPSM_BIASNEG] = "biasneg",
131 [NINED3DSPSM_SIGN] = "sign",
132 [NINED3DSPSM_SIGNNEG] = "signneg",
133 [NINED3DSPSM_COMP] = "comp",
134 [NINED3DSPSM_X2] = "x2",
135 [NINED3DSPSM_X2NEG] = "x2neg",
136 [NINED3DSPSM_DZ] = "dz",
137 [NINED3DSPSM_DW] = "dw",
138 [NINED3DSPSM_ABS] = "abs",
139 [NINED3DSPSM_ABSNEG] = "-abs",
140 [NINED3DSPSM_NOT] = "not"
141 };
142
143 static void
144 sm1_dump_writemask(BYTE mask)
145 {
146 if (mask & 1) DUMP("x"); else DUMP("_");
147 if (mask & 2) DUMP("y"); else DUMP("_");
148 if (mask & 4) DUMP("z"); else DUMP("_");
149 if (mask & 8) DUMP("w"); else DUMP("_");
150 }
151
152 static void
153 sm1_dump_swizzle(BYTE s)
154 {
155 char c[4] = { 'x', 'y', 'z', 'w' };
156 DUMP("%c%c%c%c",
157 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
158 }
159
160 static const char sm1_file_char[] =
161 {
162 [D3DSPR_TEMP] = 'r',
163 [D3DSPR_INPUT] = 'v',
164 [D3DSPR_CONST] = 'c',
165 [D3DSPR_ADDR] = 'A',
166 [D3DSPR_RASTOUT] = 'R',
167 [D3DSPR_ATTROUT] = 'D',
168 [D3DSPR_OUTPUT] = 'o',
169 [D3DSPR_CONSTINT] = 'I',
170 [D3DSPR_COLOROUT] = 'C',
171 [D3DSPR_DEPTHOUT] = 'D',
172 [D3DSPR_SAMPLER] = 's',
173 [D3DSPR_CONST2] = 'c',
174 [D3DSPR_CONST3] = 'c',
175 [D3DSPR_CONST4] = 'c',
176 [D3DSPR_CONSTBOOL] = 'B',
177 [D3DSPR_LOOP] = 'L',
178 [D3DSPR_TEMPFLOAT16] = 'h',
179 [D3DSPR_MISCTYPE] = 'M',
180 [D3DSPR_LABEL] = 'X',
181 [D3DSPR_PREDICATE] = 'p'
182 };
183
184 static void
185 sm1_dump_reg(BYTE file, INT index)
186 {
187 switch (file) {
188 case D3DSPR_LOOP:
189 DUMP("aL");
190 break;
191 case D3DSPR_COLOROUT:
192 DUMP("oC%i", index);
193 break;
194 case D3DSPR_DEPTHOUT:
195 DUMP("oDepth");
196 break;
197 case D3DSPR_RASTOUT:
198 DUMP("oRast%i", index);
199 break;
200 case D3DSPR_CONSTINT:
201 DUMP("iconst[%i]", index);
202 break;
203 case D3DSPR_CONSTBOOL:
204 DUMP("bconst[%i]", index);
205 break;
206 default:
207 DUMP("%c%i", sm1_file_char[file], index);
208 break;
209 }
210 }
211
212 struct sm1_src_param
213 {
214 INT idx;
215 struct sm1_src_param *rel;
216 BYTE file;
217 BYTE swizzle;
218 BYTE mod;
219 BYTE type;
220 union {
221 DWORD d[4];
222 float f[4];
223 int i[4];
224 BOOL b;
225 } imm;
226 };
227 static void
228 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
229
230 struct sm1_dst_param
231 {
232 INT idx;
233 struct sm1_src_param *rel;
234 BYTE file;
235 BYTE mask;
236 BYTE mod;
237 int8_t shift; /* sint4 */
238 BYTE type;
239 };
240
241 static INLINE void
242 assert_replicate_swizzle(const struct ureg_src *reg)
243 {
244 assert(reg->SwizzleY == reg->SwizzleX &&
245 reg->SwizzleZ == reg->SwizzleX &&
246 reg->SwizzleW == reg->SwizzleX);
247 }
248
249 static void
250 sm1_dump_immediate(const struct sm1_src_param *param)
251 {
252 switch (param->type) {
253 case NINED3DSPTYPE_FLOAT4:
254 DUMP("{ %f %f %f %f }",
255 param->imm.f[0], param->imm.f[1],
256 param->imm.f[2], param->imm.f[3]);
257 break;
258 case NINED3DSPTYPE_INT4:
259 DUMP("{ %i %i %i %i }",
260 param->imm.i[0], param->imm.i[1],
261 param->imm.i[2], param->imm.i[3]);
262 break;
263 case NINED3DSPTYPE_BOOL:
264 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
265 break;
266 default:
267 assert(0);
268 break;
269 }
270 }
271
272 static void
273 sm1_dump_src_param(const struct sm1_src_param *param)
274 {
275 if (param->file == NINED3DSPR_IMMEDIATE) {
276 assert(!param->mod &&
277 !param->rel &&
278 param->swizzle == NINED3DSP_NOSWIZZLE);
279 sm1_dump_immediate(param);
280 return;
281 }
282
283 if (param->mod)
284 DUMP("%s(", sm1_mod_str[param->mod]);
285 if (param->rel) {
286 DUMP("%c[", sm1_file_char[param->file]);
287 sm1_dump_src_param(param->rel);
288 DUMP("+%i]", param->idx);
289 } else {
290 sm1_dump_reg(param->file, param->idx);
291 }
292 if (param->mod)
293 DUMP(")");
294 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
295 DUMP(".");
296 sm1_dump_swizzle(param->swizzle);
297 }
298 }
299
300 static void
301 sm1_dump_dst_param(const struct sm1_dst_param *param)
302 {
303 if (param->mod & NINED3DSPDM_SATURATE)
304 DUMP("sat ");
305 if (param->mod & NINED3DSPDM_PARTIALP)
306 DUMP("pp ");
307 if (param->mod & NINED3DSPDM_CENTROID)
308 DUMP("centroid ");
309 if (param->shift < 0)
310 DUMP("/%u ", 1 << -param->shift);
311 if (param->shift > 0)
312 DUMP("*%u ", 1 << param->shift);
313
314 if (param->rel) {
315 DUMP("%c[", sm1_file_char[param->file]);
316 sm1_dump_src_param(param->rel);
317 DUMP("+%i]", param->idx);
318 } else {
319 sm1_dump_reg(param->file, param->idx);
320 }
321 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
322 DUMP(".");
323 sm1_dump_writemask(param->mask);
324 }
325 }
326
327 struct sm1_semantic
328 {
329 struct sm1_dst_param reg;
330 BYTE sampler_type;
331 D3DDECLUSAGE usage;
332 BYTE usage_idx;
333 };
334
335 struct sm1_op_info
336 {
337 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
338 * should be ignored completely */
339 unsigned sio;
340 unsigned opcode; /* TGSI_OPCODE_x */
341
342 /* versions are still set even handler is set */
343 struct {
344 unsigned min;
345 unsigned max;
346 } vert_version, frag_version;
347
348 /* number of regs parsed outside of special handler */
349 unsigned ndst;
350 unsigned nsrc;
351
352 /* some instructions don't map perfectly, so use a special handler */
353 translate_instruction_func handler;
354 };
355
356 struct sm1_instruction
357 {
358 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
359 BYTE flags;
360 BOOL coissue;
361 BOOL predicated;
362 BYTE ndst;
363 BYTE nsrc;
364 struct sm1_src_param src[4];
365 struct sm1_src_param src_rel[4];
366 struct sm1_src_param pred;
367 struct sm1_src_param dst_rel[1];
368 struct sm1_dst_param dst[1];
369
370 struct sm1_op_info *info;
371 };
372
373 static void
374 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
375 {
376 unsigned i;
377
378 /* no info stored for these: */
379 if (insn->opcode == D3DSIO_DCL)
380 return;
381 for (i = 0; i < indent; ++i)
382 DUMP(" ");
383
384 if (insn->predicated) {
385 DUMP("@");
386 sm1_dump_src_param(&insn->pred);
387 DUMP(" ");
388 }
389 DUMP("%s", d3dsio_to_string(insn->opcode));
390 if (insn->flags) {
391 switch (insn->opcode) {
392 case D3DSIO_TEX:
393 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
394 break;
395 default:
396 DUMP("_%x", insn->flags);
397 break;
398 }
399 }
400 if (insn->coissue)
401 DUMP("_co");
402 DUMP(" ");
403
404 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
405 sm1_dump_dst_param(&insn->dst[i]);
406 DUMP(" ");
407 }
408
409 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
410 sm1_dump_src_param(&insn->src[i]);
411 DUMP(" ");
412 }
413 if (insn->opcode == D3DSIO_DEF ||
414 insn->opcode == D3DSIO_DEFI ||
415 insn->opcode == D3DSIO_DEFB)
416 sm1_dump_immediate(&insn->src[0]);
417
418 DUMP("\n");
419 }
420
421 struct sm1_local_const
422 {
423 INT idx;
424 struct ureg_src reg;
425 union {
426 boolean b;
427 float f[4];
428 int32_t i[4];
429 } imm;
430 };
431
432 struct shader_translator
433 {
434 const DWORD *byte_code;
435 const DWORD *parse;
436 const DWORD *parse_next;
437
438 struct ureg_program *ureg;
439
440 /* shader version */
441 struct {
442 BYTE major;
443 BYTE minor;
444 } version;
445 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
446
447 boolean native_integers;
448 boolean inline_subroutines;
449 boolean lower_preds;
450 boolean want_texcoord;
451 boolean shift_wpos;
452 unsigned texcoord_sn;
453
454 struct sm1_instruction insn; /* current instruction */
455
456 struct {
457 struct ureg_dst *r;
458 struct ureg_dst oPos;
459 struct ureg_dst oFog;
460 struct ureg_dst oPts;
461 struct ureg_dst oCol[4];
462 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
463 struct ureg_dst oDepth;
464 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
465 struct ureg_src vPos;
466 struct ureg_src vFace;
467 struct ureg_src s;
468 struct ureg_dst p;
469 struct ureg_dst a;
470 struct ureg_dst tS[8]; /* texture stage registers */
471 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
472 struct ureg_dst t[5]; /* scratch TEMPs */
473 struct ureg_src vC[2]; /* PS color in */
474 struct ureg_src vT[8]; /* PS texcoord in */
475 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
476 struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* loop ctr ADDR register */
477 } regs;
478 unsigned num_temp; /* Elements(regs.r) */
479 unsigned num_scratch;
480 unsigned loop_depth;
481 unsigned loop_depth_max;
482 unsigned cond_depth;
483 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
484 unsigned cond_labels[NINE_MAX_COND_DEPTH];
485
486 unsigned *inst_labels; /* LABEL op */
487 unsigned num_inst_labels;
488
489 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
490
491 struct sm1_local_const *lconstf;
492 unsigned num_lconstf;
493 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
494 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
495
496 boolean indirect_const_access;
497
498 struct nine_shader_info *info;
499
500 int16_t op_info_map[D3DSIO_BREAKP + 1];
501 };
502
503 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
504 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
505
506 static void
507 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
508
509 static void
510 sm1_instruction_check(const struct sm1_instruction *insn)
511 {
512 if (insn->opcode == D3DSIO_CRS)
513 {
514 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
515 {
516 DBG("CRS.mask.w\n");
517 }
518 }
519 }
520
521 static boolean
522 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
523 {
524 INT i;
525 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
526 for (i = 0; i < tx->num_lconstf; ++i) {
527 if (tx->lconstf[i].idx == index) {
528 *src = tx->lconstf[i].reg;
529 return TRUE;
530 }
531 }
532 return FALSE;
533 }
534 static boolean
535 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
536 {
537 assert(index >= 0 && index < NINE_MAX_CONST_I);
538 if (tx->lconsti[index].idx == index)
539 *src = tx->lconsti[index].reg;
540 return tx->lconsti[index].idx == index;
541 }
542 static boolean
543 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
544 {
545 assert(index >= 0 && index < NINE_MAX_CONST_B);
546 if (tx->lconstb[index].idx == index)
547 *src = tx->lconstb[index].reg;
548 return tx->lconstb[index].idx == index;
549 }
550
551 static void
552 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
553 {
554 unsigned n;
555
556 /* Anno1404 sets out of range constants. */
557 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
558 if (index >= NINE_MAX_CONST_F)
559 WARN("lconstf index %i too high, indirect access won't work\n", index);
560
561 for (n = 0; n < tx->num_lconstf; ++n)
562 if (tx->lconstf[n].idx == index)
563 break;
564 if (n == tx->num_lconstf) {
565 if ((n % 8) == 0) {
566 tx->lconstf = REALLOC(tx->lconstf,
567 (n + 0) * sizeof(tx->lconstf[0]),
568 (n + 8) * sizeof(tx->lconstf[0]));
569 assert(tx->lconstf);
570 }
571 tx->num_lconstf++;
572 }
573 tx->lconstf[n].idx = index;
574 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
575
576 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
577 }
578 static void
579 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
580 {
581 assert(index >= 0 && index < NINE_MAX_CONST_I);
582 tx->lconsti[index].idx = index;
583 tx->lconsti[index].reg = tx->native_integers ?
584 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
585 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
586 }
587 static void
588 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
589 {
590 assert(index >= 0 && index < NINE_MAX_CONST_B);
591 tx->lconstb[index].idx = index;
592 tx->lconstb[index].reg = tx->native_integers ?
593 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
594 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
595 }
596
597 static INLINE struct ureg_dst
598 tx_scratch(struct shader_translator *tx)
599 {
600 assert(tx->num_scratch < Elements(tx->regs.t));
601 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
602 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
603 return tx->regs.t[tx->num_scratch++];
604 }
605
606 static INLINE struct ureg_dst
607 tx_scratch_scalar(struct shader_translator *tx)
608 {
609 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
610 }
611
612 static INLINE struct ureg_src
613 tx_src_scalar(struct ureg_dst dst)
614 {
615 struct ureg_src src = ureg_src(dst);
616 int c = ffs(dst.WriteMask) - 1;
617 if (dst.WriteMask == (1 << c))
618 src = ureg_scalar(src, c);
619 return src;
620 }
621
622 /* Need to declare all constants if indirect addressing is used,
623 * otherwise we could scan the shader to determine the maximum.
624 * TODO: It doesn't really matter for nv50 so I won't do the scan,
625 * but radeon drivers might care, if they don't infer it from TGSI.
626 */
627 static void
628 tx_decl_constants(struct shader_translator *tx)
629 {
630 unsigned i, n = 0;
631
632 for (i = 0; i < NINE_MAX_CONST_F; ++i)
633 ureg_DECL_constant(tx->ureg, n++);
634 for (i = 0; i < NINE_MAX_CONST_I; ++i)
635 ureg_DECL_constant(tx->ureg, n++);
636 for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
637 ureg_DECL_constant(tx->ureg, n++);
638 }
639
640 static INLINE void
641 tx_temp_alloc(struct shader_translator *tx, INT idx)
642 {
643 assert(idx >= 0);
644 if (idx >= tx->num_temp) {
645 unsigned k = tx->num_temp;
646 unsigned n = idx + 1;
647 tx->regs.r = REALLOC(tx->regs.r,
648 k * sizeof(tx->regs.r[0]),
649 n * sizeof(tx->regs.r[0]));
650 for (; k < n; ++k)
651 tx->regs.r[k] = ureg_dst_undef();
652 tx->num_temp = n;
653 }
654 if (ureg_dst_is_undef(tx->regs.r[idx]))
655 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
656 }
657
658 static INLINE void
659 tx_addr_alloc(struct shader_translator *tx, INT idx)
660 {
661 assert(idx == 0);
662 if (ureg_dst_is_undef(tx->regs.a))
663 tx->regs.a = ureg_DECL_address(tx->ureg);
664 }
665
666 static INLINE void
667 tx_pred_alloc(struct shader_translator *tx, INT idx)
668 {
669 assert(idx == 0);
670 if (ureg_dst_is_undef(tx->regs.p))
671 tx->regs.p = ureg_DECL_predicate(tx->ureg);
672 }
673
674 static INLINE void
675 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
676 {
677 assert(IS_PS);
678 assert(idx >= 0 && idx < Elements(tx->regs.vT));
679 if (ureg_src_is_undef(tx->regs.vT[idx]))
680 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
681 TGSI_INTERPOLATE_PERSPECTIVE);
682 }
683
684 static INLINE unsigned *
685 tx_bgnloop(struct shader_translator *tx)
686 {
687 tx->loop_depth++;
688 if (tx->loop_depth_max < tx->loop_depth)
689 tx->loop_depth_max = tx->loop_depth;
690 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
691 return &tx->loop_labels[tx->loop_depth - 1];
692 }
693
694 static INLINE unsigned *
695 tx_endloop(struct shader_translator *tx)
696 {
697 assert(tx->loop_depth);
698 tx->loop_depth--;
699 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
700 ureg_get_instruction_number(tx->ureg));
701 return &tx->loop_labels[tx->loop_depth];
702 }
703
704 static struct ureg_dst
705 tx_get_loopctr(struct shader_translator *tx)
706 {
707 const unsigned l = tx->loop_depth - 1;
708
709 if (!tx->loop_depth)
710 {
711 DBG("loop counter requested outside of loop\n");
712 return ureg_dst_undef();
713 }
714
715 if (ureg_dst_is_undef(tx->regs.aL[l]))
716 {
717 struct ureg_dst rreg = ureg_DECL_local_temporary(tx->ureg);
718 struct ureg_dst areg = ureg_DECL_address(tx->ureg);
719 unsigned c;
720
721 assert(l % 4 == 0);
722 for (c = l; c < (l + 4) && c < Elements(tx->regs.aL); ++c) {
723 tx->regs.rL[c] = ureg_writemask(rreg, 1 << (c & 3));
724 tx->regs.aL[c] = ureg_writemask(areg, 1 << (c & 3));
725 }
726 }
727 return tx->regs.rL[l];
728 }
729 static struct ureg_dst
730 tx_get_aL(struct shader_translator *tx)
731 {
732 if (!ureg_dst_is_undef(tx_get_loopctr(tx)))
733 return tx->regs.aL[tx->loop_depth - 1];
734 return ureg_dst_undef();
735 }
736
737 static INLINE unsigned *
738 tx_cond(struct shader_translator *tx)
739 {
740 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
741 tx->cond_depth++;
742 return &tx->cond_labels[tx->cond_depth - 1];
743 }
744
745 static INLINE unsigned *
746 tx_elsecond(struct shader_translator *tx)
747 {
748 assert(tx->cond_depth);
749 return &tx->cond_labels[tx->cond_depth - 1];
750 }
751
752 static INLINE void
753 tx_endcond(struct shader_translator *tx)
754 {
755 assert(tx->cond_depth);
756 tx->cond_depth--;
757 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
758 ureg_get_instruction_number(tx->ureg));
759 }
760
761 static INLINE struct ureg_dst
762 nine_ureg_dst_register(unsigned file, int index)
763 {
764 return ureg_dst(ureg_src_register(file, index));
765 }
766
767 static struct ureg_src
768 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
769 {
770 struct ureg_program *ureg = tx->ureg;
771 struct ureg_src src;
772 struct ureg_dst tmp;
773
774 switch (param->file)
775 {
776 case D3DSPR_TEMP:
777 assert(!param->rel);
778 tx_temp_alloc(tx, param->idx);
779 src = ureg_src(tx->regs.r[param->idx]);
780 break;
781 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
782 case D3DSPR_ADDR:
783 assert(!param->rel);
784 if (IS_VS) {
785 tx_addr_alloc(tx, param->idx);
786 src = ureg_src(tx->regs.a);
787 } else {
788 if (tx->version.major < 2 && tx->version.minor < 4) {
789 /* no subroutines, so should be defined */
790 src = ureg_src(tx->regs.tS[param->idx]);
791 } else {
792 tx_texcoord_alloc(tx, param->idx);
793 src = tx->regs.vT[param->idx];
794 }
795 }
796 break;
797 case D3DSPR_INPUT:
798 if (IS_VS) {
799 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
800 } else {
801 if (tx->version.major < 3) {
802 assert(!param->rel);
803 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
804 param->idx,
805 TGSI_INTERPOLATE_PERSPECTIVE);
806 } else {
807 assert(!param->rel); /* TODO */
808 assert(param->idx < Elements(tx->regs.v));
809 src = tx->regs.v[param->idx];
810 }
811 }
812 break;
813 case D3DSPR_PREDICATE:
814 assert(!param->rel);
815 tx_pred_alloc(tx, param->idx);
816 src = ureg_src(tx->regs.p);
817 break;
818 case D3DSPR_SAMPLER:
819 assert(param->mod == NINED3DSPSM_NONE);
820 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
821 assert(!param->rel);
822 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
823 break;
824 case D3DSPR_CONST:
825 if (param->rel)
826 tx->indirect_const_access = TRUE;
827 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
828 if (!param->rel)
829 nine_info_mark_const_f_used(tx->info, param->idx);
830 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
831 }
832 break;
833 case D3DSPR_CONST2:
834 case D3DSPR_CONST3:
835 case D3DSPR_CONST4:
836 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
837 assert(!"CONST2/3/4");
838 src = ureg_imm1f(ureg, 0.0f);
839 break;
840 case D3DSPR_CONSTINT:
841 if (param->rel || !tx_lconsti(tx, &src, param->idx)) {
842 if (!param->rel)
843 nine_info_mark_const_i_used(tx->info, param->idx);
844 src = ureg_src_register(TGSI_FILE_CONSTANT,
845 tx->info->const_i_base + param->idx);
846 }
847 break;
848 case D3DSPR_CONSTBOOL:
849 if (param->rel || !tx_lconstb(tx, &src, param->idx)) {
850 char r = param->idx / 4;
851 char s = param->idx & 3;
852 if (!param->rel)
853 nine_info_mark_const_b_used(tx->info, param->idx);
854 src = ureg_src_register(TGSI_FILE_CONSTANT,
855 tx->info->const_b_base + r);
856 src = ureg_swizzle(src, s, s, s, s);
857 }
858 break;
859 case D3DSPR_LOOP:
860 src = tx_src_scalar(tx_get_aL(tx));
861 break;
862 case D3DSPR_MISCTYPE:
863 switch (param->idx) {
864 case D3DSMO_POSITION:
865 if (ureg_src_is_undef(tx->regs.vPos))
866 tx->regs.vPos = ureg_DECL_fs_input(ureg,
867 TGSI_SEMANTIC_POSITION, 0,
868 TGSI_INTERPOLATE_LINEAR);
869 if (tx->shift_wpos) {
870 /* TODO: do this only once */
871 struct ureg_dst wpos = tx_scratch(tx);
872 ureg_SUB(ureg, wpos, tx->regs.vPos,
873 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
874 src = ureg_src(wpos);
875 } else {
876 src = tx->regs.vPos;
877 }
878 break;
879 case D3DSMO_FACE:
880 if (ureg_src_is_undef(tx->regs.vFace)) {
881 tx->regs.vFace = ureg_DECL_fs_input(ureg,
882 TGSI_SEMANTIC_FACE, 0,
883 TGSI_INTERPOLATE_CONSTANT);
884 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
885 }
886 src = tx->regs.vFace;
887 break;
888 default:
889 assert(!"invalid src D3DSMO");
890 break;
891 }
892 assert(!param->rel);
893 break;
894 case D3DSPR_TEMPFLOAT16:
895 break;
896 default:
897 assert(!"invalid src D3DSPR");
898 }
899 if (param->rel)
900 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
901
902 if (param->swizzle != NINED3DSP_NOSWIZZLE)
903 src = ureg_swizzle(src,
904 (param->swizzle >> 0) & 0x3,
905 (param->swizzle >> 2) & 0x3,
906 (param->swizzle >> 4) & 0x3,
907 (param->swizzle >> 6) & 0x3);
908
909 switch (param->mod) {
910 case NINED3DSPSM_ABS:
911 src = ureg_abs(src);
912 break;
913 case NINED3DSPSM_ABSNEG:
914 src = ureg_negate(ureg_abs(src));
915 break;
916 case NINED3DSPSM_NEG:
917 src = ureg_negate(src);
918 break;
919 case NINED3DSPSM_BIAS:
920 tmp = tx_scratch(tx);
921 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
922 src = ureg_src(tmp);
923 break;
924 case NINED3DSPSM_BIASNEG:
925 tmp = tx_scratch(tx);
926 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
927 src = ureg_src(tmp);
928 break;
929 case NINED3DSPSM_NOT:
930 if (tx->native_integers) {
931 tmp = tx_scratch(tx);
932 ureg_NOT(ureg, tmp, src);
933 src = ureg_src(tmp);
934 break;
935 }
936 /* fall through */
937 case NINED3DSPSM_COMP:
938 tmp = tx_scratch(tx);
939 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
940 src = ureg_src(tmp);
941 break;
942 case NINED3DSPSM_DZ:
943 case NINED3DSPSM_DW:
944 /* handled in instruction */
945 break;
946 case NINED3DSPSM_SIGN:
947 tmp = tx_scratch(tx);
948 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
949 src = ureg_src(tmp);
950 break;
951 case NINED3DSPSM_SIGNNEG:
952 tmp = tx_scratch(tx);
953 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
954 src = ureg_src(tmp);
955 break;
956 case NINED3DSPSM_X2:
957 tmp = tx_scratch(tx);
958 ureg_ADD(ureg, tmp, src, src);
959 src = ureg_src(tmp);
960 break;
961 case NINED3DSPSM_X2NEG:
962 tmp = tx_scratch(tx);
963 ureg_ADD(ureg, tmp, src, src);
964 src = ureg_negate(ureg_src(tmp));
965 break;
966 default:
967 assert(param->mod == NINED3DSPSM_NONE);
968 break;
969 }
970
971 return src;
972 }
973
974 static struct ureg_dst
975 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
976 {
977 struct ureg_dst dst;
978
979 switch (param->file)
980 {
981 case D3DSPR_TEMP:
982 assert(!param->rel);
983 tx_temp_alloc(tx, param->idx);
984 dst = tx->regs.r[param->idx];
985 break;
986 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
987 case D3DSPR_ADDR:
988 assert(!param->rel);
989 if (tx->version.major < 2 && !IS_VS) {
990 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
991 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
992 dst = tx->regs.tS[param->idx];
993 } else
994 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
995 tx_texcoord_alloc(tx, param->idx);
996 dst = ureg_dst(tx->regs.vT[param->idx]);
997 } else {
998 tx_addr_alloc(tx, param->idx);
999 dst = tx->regs.a;
1000 }
1001 break;
1002 case D3DSPR_RASTOUT:
1003 assert(!param->rel);
1004 switch (param->idx) {
1005 case 0:
1006 if (ureg_dst_is_undef(tx->regs.oPos))
1007 tx->regs.oPos =
1008 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1009 dst = tx->regs.oPos;
1010 break;
1011 case 1:
1012 if (ureg_dst_is_undef(tx->regs.oFog))
1013 tx->regs.oFog =
1014 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
1015 dst = tx->regs.oFog;
1016 break;
1017 case 2:
1018 if (ureg_dst_is_undef(tx->regs.oPts))
1019 tx->regs.oPts =
1020 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
1021 dst = tx->regs.oPts;
1022 break;
1023 default:
1024 assert(0);
1025 break;
1026 }
1027 break;
1028 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1029 case D3DSPR_OUTPUT:
1030 if (tx->version.major < 3) {
1031 assert(!param->rel);
1032 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1033 } else {
1034 assert(!param->rel); /* TODO */
1035 assert(param->idx < Elements(tx->regs.o));
1036 dst = tx->regs.o[param->idx];
1037 }
1038 break;
1039 case D3DSPR_ATTROUT: /* VS */
1040 case D3DSPR_COLOROUT: /* PS */
1041 assert(param->idx >= 0 && param->idx < 4);
1042 assert(!param->rel);
1043 tx->info->rt_mask |= 1 << param->idx;
1044 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1045 tx->regs.oCol[param->idx] =
1046 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1047 dst = tx->regs.oCol[param->idx];
1048 if (IS_VS && tx->version.major < 3)
1049 dst = ureg_saturate(dst);
1050 break;
1051 case D3DSPR_DEPTHOUT:
1052 assert(!param->rel);
1053 if (ureg_dst_is_undef(tx->regs.oDepth))
1054 tx->regs.oDepth =
1055 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1056 TGSI_WRITEMASK_Z);
1057 dst = tx->regs.oDepth; /* XXX: must write .z component */
1058 break;
1059 case D3DSPR_PREDICATE:
1060 assert(!param->rel);
1061 tx_pred_alloc(tx, param->idx);
1062 dst = tx->regs.p;
1063 break;
1064 case D3DSPR_TEMPFLOAT16:
1065 DBG("unhandled D3DSPR: %u\n", param->file);
1066 break;
1067 default:
1068 assert(!"invalid dst D3DSPR");
1069 break;
1070 }
1071 if (param->rel)
1072 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1073
1074 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1075 dst = ureg_writemask(dst, param->mask);
1076 if (param->mod & NINED3DSPDM_SATURATE)
1077 dst = ureg_saturate(dst);
1078
1079 return dst;
1080 }
1081
1082 static struct ureg_dst
1083 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1084 {
1085 if (param->shift) {
1086 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1087 return tx->regs.tdst;
1088 }
1089 return _tx_dst_param(tx, param);
1090 }
1091
1092 static void
1093 tx_apply_dst0_modifiers(struct shader_translator *tx)
1094 {
1095 struct ureg_dst rdst;
1096 float f;
1097
1098 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1099 return;
1100 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1101
1102 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1103
1104 if (tx->insn.dst[0].shift < 0)
1105 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1106 else
1107 f = 1 << tx->insn.dst[0].shift;
1108
1109 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1110 }
1111
1112 static struct ureg_src
1113 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1114 {
1115 struct ureg_src src;
1116
1117 assert(!param->shift);
1118 assert(!(param->mod & NINED3DSPDM_SATURATE));
1119
1120 switch (param->file) {
1121 case D3DSPR_INPUT:
1122 if (IS_VS) {
1123 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1124 } else {
1125 assert(!param->rel);
1126 assert(param->idx < Elements(tx->regs.v));
1127 src = tx->regs.v[param->idx];
1128 }
1129 break;
1130 default:
1131 src = ureg_src(tx_dst_param(tx, param));
1132 break;
1133 }
1134 if (param->rel)
1135 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1136
1137 if (!param->mask)
1138 WARN("mask is 0, using identity swizzle\n");
1139
1140 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1141 char s[4];
1142 int n;
1143 int c;
1144 for (n = 0, c = 0; c < 4; ++c)
1145 if (param->mask & (1 << c))
1146 s[n++] = c;
1147 assert(n);
1148 for (c = n; c < 4; ++c)
1149 s[c] = s[n - 1];
1150 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1151 }
1152 return src;
1153 }
1154
1155 static HRESULT
1156 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1157 {
1158 struct ureg_program *ureg = tx->ureg;
1159 struct ureg_dst dst;
1160 struct ureg_src src[2];
1161 unsigned i;
1162
1163 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1164 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1165 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1166
1167 for (i = 0; i < n; i++, src[1].Index++)
1168 {
1169 const unsigned m = (1 << i);
1170
1171 if (!(dst.WriteMask & m))
1172 continue;
1173
1174 /* XXX: src == dst case ? */
1175
1176 switch (k) {
1177 case 3:
1178 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1179 break;
1180 case 4:
1181 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1182 break;
1183 default:
1184 DBG("invalid operation: M%ux%u\n", m, n);
1185 break;
1186 }
1187 }
1188
1189 return D3D_OK;
1190 }
1191
1192 #define VNOTSUPPORTED 0, 0
1193 #define V(maj, min) (((maj) << 8) | (min))
1194
1195 static INLINE const char *
1196 d3dsio_to_string( unsigned opcode )
1197 {
1198 static const char *names[] = {
1199 "NOP",
1200 "MOV",
1201 "ADD",
1202 "SUB",
1203 "MAD",
1204 "MUL",
1205 "RCP",
1206 "RSQ",
1207 "DP3",
1208 "DP4",
1209 "MIN",
1210 "MAX",
1211 "SLT",
1212 "SGE",
1213 "EXP",
1214 "LOG",
1215 "LIT",
1216 "DST",
1217 "LRP",
1218 "FRC",
1219 "M4x4",
1220 "M4x3",
1221 "M3x4",
1222 "M3x3",
1223 "M3x2",
1224 "CALL",
1225 "CALLNZ",
1226 "LOOP",
1227 "RET",
1228 "ENDLOOP",
1229 "LABEL",
1230 "DCL",
1231 "POW",
1232 "CRS",
1233 "SGN",
1234 "ABS",
1235 "NRM",
1236 "SINCOS",
1237 "REP",
1238 "ENDREP",
1239 "IF",
1240 "IFC",
1241 "ELSE",
1242 "ENDIF",
1243 "BREAK",
1244 "BREAKC",
1245 "MOVA",
1246 "DEFB",
1247 "DEFI",
1248 NULL,
1249 NULL,
1250 NULL,
1251 NULL,
1252 NULL,
1253 NULL,
1254 NULL,
1255 NULL,
1256 NULL,
1257 NULL,
1258 NULL,
1259 NULL,
1260 NULL,
1261 NULL,
1262 NULL,
1263 "TEXCOORD",
1264 "TEXKILL",
1265 "TEX",
1266 "TEXBEM",
1267 "TEXBEML",
1268 "TEXREG2AR",
1269 "TEXREG2GB",
1270 "TEXM3x2PAD",
1271 "TEXM3x2TEX",
1272 "TEXM3x3PAD",
1273 "TEXM3x3TEX",
1274 NULL,
1275 "TEXM3x3SPEC",
1276 "TEXM3x3VSPEC",
1277 "EXPP",
1278 "LOGP",
1279 "CND",
1280 "DEF",
1281 "TEXREG2RGB",
1282 "TEXDP3TEX",
1283 "TEXM3x2DEPTH",
1284 "TEXDP3",
1285 "TEXM3x3",
1286 "TEXDEPTH",
1287 "CMP",
1288 "BEM",
1289 "DP2ADD",
1290 "DSX",
1291 "DSY",
1292 "TEXLDD",
1293 "SETP",
1294 "TEXLDL",
1295 "BREAKP"
1296 };
1297
1298 if (opcode < Elements(names)) return names[opcode];
1299
1300 switch (opcode) {
1301 case D3DSIO_PHASE: return "PHASE";
1302 case D3DSIO_COMMENT: return "COMMENT";
1303 case D3DSIO_END: return "END";
1304 default:
1305 return NULL;
1306 }
1307 }
1308
1309 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1310 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1311 (inst).vert_version.max | \
1312 (inst).frag_version.min | \
1313 (inst).frag_version.max)
1314
1315 #define SPECIAL(name) \
1316 NineTranslateInstruction_##name
1317
1318 #define DECL_SPECIAL(name) \
1319 static HRESULT \
1320 NineTranslateInstruction_##name( struct shader_translator *tx )
1321
1322 static HRESULT
1323 NineTranslateInstruction_Generic(struct shader_translator *);
1324
1325 DECL_SPECIAL(M4x4)
1326 {
1327 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1328 }
1329
1330 DECL_SPECIAL(M4x3)
1331 {
1332 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1333 }
1334
1335 DECL_SPECIAL(M3x4)
1336 {
1337 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1338 }
1339
1340 DECL_SPECIAL(M3x3)
1341 {
1342 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1343 }
1344
1345 DECL_SPECIAL(M3x2)
1346 {
1347 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1348 }
1349
1350 DECL_SPECIAL(CMP)
1351 {
1352 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1353 tx_src_param(tx, &tx->insn.src[0]),
1354 tx_src_param(tx, &tx->insn.src[2]),
1355 tx_src_param(tx, &tx->insn.src[1]));
1356 return D3D_OK;
1357 }
1358
1359 DECL_SPECIAL(CND)
1360 {
1361 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1362 struct ureg_dst cgt;
1363 struct ureg_src cnd;
1364
1365 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4) {
1366 ureg_MOV(tx->ureg,
1367 dst, tx_src_param(tx, &tx->insn.src[1]));
1368 return D3D_OK;
1369 }
1370
1371 cnd = tx_src_param(tx, &tx->insn.src[0]);
1372 cgt = tx_scratch(tx);
1373
1374 if (tx->version.major == 1 && tx->version.minor < 4) {
1375 cgt.WriteMask = TGSI_WRITEMASK_W;
1376 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1377 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1378 } else {
1379 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1380 }
1381 ureg_CMP(tx->ureg, dst,
1382 tx_src_param(tx, &tx->insn.src[1]),
1383 tx_src_param(tx, &tx->insn.src[2]), ureg_negate(cnd));
1384 return D3D_OK;
1385 }
1386
1387 DECL_SPECIAL(CALL)
1388 {
1389 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1390 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1391 return D3D_OK;
1392 }
1393
1394 DECL_SPECIAL(CALLNZ)
1395 {
1396 struct ureg_program *ureg = tx->ureg;
1397 struct ureg_dst tmp = tx_scratch_scalar(tx);
1398 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1399
1400 /* NOTE: source should be const bool, so we can use NOT/SUB instead of [U]SNE 0 */
1401 if (!tx->insn.flags) {
1402 if (tx->native_integers)
1403 ureg_NOT(ureg, tmp, src);
1404 else
1405 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1406 }
1407 ureg_IF(ureg, tx->insn.flags ? src : tx_src_scalar(tmp), tx_cond(tx));
1408 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1409 tx_endcond(tx);
1410 ureg_ENDIF(ureg);
1411 return D3D_OK;
1412 }
1413
1414 DECL_SPECIAL(MOV_vs1x)
1415 {
1416 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1417 ureg_ARL(tx->ureg,
1418 tx_dst_param(tx, &tx->insn.dst[0]),
1419 tx_src_param(tx, &tx->insn.src[0]));
1420 return D3D_OK;
1421 }
1422 return NineTranslateInstruction_Generic(tx);
1423 }
1424
1425 DECL_SPECIAL(LOOP)
1426 {
1427 struct ureg_program *ureg = tx->ureg;
1428 unsigned *label;
1429 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1430 struct ureg_src iter = ureg_scalar(src, TGSI_SWIZZLE_X);
1431 struct ureg_src init = ureg_scalar(src, TGSI_SWIZZLE_Y);
1432 struct ureg_src step = ureg_scalar(src, TGSI_SWIZZLE_Z);
1433 struct ureg_dst ctr;
1434 struct ureg_dst tmp = tx_scratch_scalar(tx);
1435
1436 label = tx_bgnloop(tx);
1437 ctr = tx_get_loopctr(tx);
1438
1439 ureg_MOV(tx->ureg, ctr, init);
1440 ureg_BGNLOOP(tx->ureg, label);
1441 if (tx->native_integers) {
1442 /* we'll let the backend pull up that MAD ... */
1443 ureg_UMAD(ureg, tmp, iter, step, init);
1444 ureg_USEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1445 #ifdef NINE_TGSI_LAZY_DEVS
1446 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1447 #endif
1448 } else {
1449 /* can't simply use SGE for precision because step might be negative */
1450 ureg_MAD(ureg, tmp, iter, step, init);
1451 ureg_SEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
1452 #ifdef NINE_TGSI_LAZY_DEVS
1453 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1454 #endif
1455 }
1456 #ifdef NINE_TGSI_LAZY_DEVS
1457 ureg_BRK(ureg);
1458 tx_endcond(tx);
1459 ureg_ENDIF(ureg);
1460 #else
1461 ureg_BREAKC(ureg, tx_src_scalar(tmp));
1462 #endif
1463 if (tx->native_integers) {
1464 ureg_UARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1465 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), step);
1466 } else {
1467 ureg_ARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
1468 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), step);
1469 }
1470 return D3D_OK;
1471 }
1472
1473 DECL_SPECIAL(RET)
1474 {
1475 ureg_RET(tx->ureg);
1476 return D3D_OK;
1477 }
1478
1479 DECL_SPECIAL(ENDLOOP)
1480 {
1481 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1482 return D3D_OK;
1483 }
1484
1485 DECL_SPECIAL(LABEL)
1486 {
1487 unsigned k = tx->num_inst_labels;
1488 unsigned n = tx->insn.src[0].idx;
1489 assert(n < 2048);
1490 if (n >= k)
1491 tx->inst_labels = REALLOC(tx->inst_labels,
1492 k * sizeof(tx->inst_labels[0]),
1493 n * sizeof(tx->inst_labels[0]));
1494
1495 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1496 return D3D_OK;
1497 }
1498
1499 DECL_SPECIAL(SINCOS)
1500 {
1501 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1502 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1503
1504 assert(!(dst.WriteMask & 0xc));
1505
1506 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1507 ureg_SCS(tx->ureg, dst, src);
1508 return D3D_OK;
1509 }
1510
1511 DECL_SPECIAL(SGN)
1512 {
1513 ureg_SSG(tx->ureg,
1514 tx_dst_param(tx, &tx->insn.dst[0]),
1515 tx_src_param(tx, &tx->insn.src[0]));
1516 return D3D_OK;
1517 }
1518
1519 DECL_SPECIAL(REP)
1520 {
1521 struct ureg_program *ureg = tx->ureg;
1522 unsigned *label;
1523 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1524 struct ureg_dst ctr;
1525 struct ureg_dst tmp = tx_scratch_scalar(tx);
1526 struct ureg_src imm =
1527 tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f);
1528
1529 label = tx_bgnloop(tx);
1530 ctr = tx_get_loopctr(tx);
1531
1532 /* NOTE: rep must be constant, so we don't have to save the count */
1533 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1534
1535 ureg_MOV(ureg, ctr, imm);
1536 ureg_BGNLOOP(ureg, label);
1537 if (tx->native_integers)
1538 {
1539 ureg_USGE(ureg, tmp, tx_src_scalar(ctr), rep);
1540 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1541 }
1542 else
1543 {
1544 ureg_SGE(ureg, tmp, tx_src_scalar(ctr), rep);
1545 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1546 }
1547 ureg_BRK(ureg);
1548 tx_endcond(tx);
1549 ureg_ENDIF(ureg);
1550
1551 if (tx->native_integers) {
1552 ureg_UADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1u(ureg, 1));
1553 } else {
1554 ureg_ADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1f(ureg, 1.0f));
1555 }
1556
1557 return D3D_OK;
1558 }
1559
1560 DECL_SPECIAL(ENDREP)
1561 {
1562 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1563 return D3D_OK;
1564 }
1565
1566 DECL_SPECIAL(ENDIF)
1567 {
1568 tx_endcond(tx);
1569 ureg_ENDIF(tx->ureg);
1570 return D3D_OK;
1571 }
1572
1573 DECL_SPECIAL(IF)
1574 {
1575 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1576
1577 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1578 ureg_UIF(tx->ureg, src, tx_cond(tx));
1579 else
1580 ureg_IF(tx->ureg, src, tx_cond(tx));
1581
1582 return D3D_OK;
1583 }
1584
1585 static INLINE unsigned
1586 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1587 {
1588 switch (flags) {
1589 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1590 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1591 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1592 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1593 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1594 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1595 default:
1596 assert(!"invalid comparison flags");
1597 return TGSI_OPCODE_SGT;
1598 }
1599 }
1600
1601 DECL_SPECIAL(IFC)
1602 {
1603 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1604 struct ureg_src src[2];
1605 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1606 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1607 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1608 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1609 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1610 return D3D_OK;
1611 }
1612
1613 DECL_SPECIAL(ELSE)
1614 {
1615 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1616 return D3D_OK;
1617 }
1618
1619 DECL_SPECIAL(BREAKC)
1620 {
1621 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1622 struct ureg_src src[2];
1623 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1624 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1625 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1626 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1627 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1628 ureg_BRK(tx->ureg);
1629 tx_endcond(tx);
1630 ureg_ENDIF(tx->ureg);
1631 return D3D_OK;
1632 }
1633
1634 static const char *sm1_declusage_names[] =
1635 {
1636 [D3DDECLUSAGE_POSITION] = "POSITION",
1637 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1638 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1639 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1640 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1641 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1642 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1643 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1644 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1645 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1646 [D3DDECLUSAGE_COLOR] = "COLOR",
1647 [D3DDECLUSAGE_FOG] = "FOG",
1648 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1649 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1650 };
1651
1652 static INLINE unsigned
1653 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1654 {
1655 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1656 }
1657
1658 static void
1659 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1660 boolean tc,
1661 struct sm1_semantic *dcl)
1662 {
1663 BYTE index = dcl->usage_idx;
1664
1665 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1666 * we match to a TGSI_SEMANTIC_GENERIC with index.
1667 *
1668 * The index can be anything UINT16 and usage_idx is BYTE,
1669 * so we can fit everything. It doesn't matter if indices
1670 * are close together or low.
1671 *
1672 *
1673 * POSITION >= 1: 10 * index + 6
1674 * COLOR >= 2: 10 * (index-1) + 7
1675 * TEXCOORD[0..15]: index
1676 * BLENDWEIGHT: 10 * index + 18
1677 * BLENDINDICES: 10 * index + 19
1678 * NORMAL: 10 * index + 20
1679 * TANGENT: 10 * index + 21
1680 * BINORMAL: 10 * index + 22
1681 * TESSFACTOR: 10 * index + 23
1682 */
1683
1684 switch (dcl->usage) {
1685 case D3DDECLUSAGE_POSITION:
1686 case D3DDECLUSAGE_POSITIONT:
1687 case D3DDECLUSAGE_DEPTH:
1688 if (index == 0) {
1689 sem->Name = TGSI_SEMANTIC_POSITION;
1690 sem->Index = 0;
1691 } else {
1692 sem->Name = TGSI_SEMANTIC_GENERIC;
1693 sem->Index = 10 * index + 6;
1694 }
1695 break;
1696 case D3DDECLUSAGE_COLOR:
1697 if (index < 2) {
1698 sem->Name = TGSI_SEMANTIC_COLOR;
1699 sem->Index = index;
1700 } else {
1701 sem->Name = TGSI_SEMANTIC_GENERIC;
1702 sem->Index = 10 * (index-1) + 7;
1703 }
1704 break;
1705 case D3DDECLUSAGE_FOG:
1706 assert(index == 0);
1707 sem->Name = TGSI_SEMANTIC_FOG;
1708 sem->Index = 0;
1709 break;
1710 case D3DDECLUSAGE_PSIZE:
1711 assert(index == 0);
1712 sem->Name = TGSI_SEMANTIC_PSIZE;
1713 sem->Index = 0;
1714 break;
1715 case D3DDECLUSAGE_TEXCOORD:
1716 assert(index < 16);
1717 if (index < 8 && tc)
1718 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1719 else
1720 sem->Name = TGSI_SEMANTIC_GENERIC;
1721 sem->Index = index;
1722 break;
1723 case D3DDECLUSAGE_BLENDWEIGHT:
1724 sem->Name = TGSI_SEMANTIC_GENERIC;
1725 sem->Index = 10 * index + 18;
1726 break;
1727 case D3DDECLUSAGE_BLENDINDICES:
1728 sem->Name = TGSI_SEMANTIC_GENERIC;
1729 sem->Index = 10 * index + 19;
1730 break;
1731 case D3DDECLUSAGE_NORMAL:
1732 sem->Name = TGSI_SEMANTIC_GENERIC;
1733 sem->Index = 10 * index + 20;
1734 break;
1735 case D3DDECLUSAGE_TANGENT:
1736 sem->Name = TGSI_SEMANTIC_GENERIC;
1737 sem->Index = 10 * index + 21;
1738 break;
1739 case D3DDECLUSAGE_BINORMAL:
1740 sem->Name = TGSI_SEMANTIC_GENERIC;
1741 sem->Index = 10 * index + 22;
1742 break;
1743 case D3DDECLUSAGE_TESSFACTOR:
1744 sem->Name = TGSI_SEMANTIC_GENERIC;
1745 sem->Index = 10 * index + 23;
1746 break;
1747 case D3DDECLUSAGE_SAMPLE:
1748 sem->Name = TGSI_SEMANTIC_COUNT;
1749 sem->Index = 0;
1750 break;
1751 default:
1752 assert(!"Invalid DECLUSAGE.");
1753 break;
1754 }
1755 }
1756
1757 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1758 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1759 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1760 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1761 static INLINE unsigned
1762 d3dstt_to_tgsi_tex(BYTE sampler_type)
1763 {
1764 switch (sampler_type) {
1765 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1766 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1767 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1768 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1769 default:
1770 assert(0);
1771 return TGSI_TEXTURE_UNKNOWN;
1772 }
1773 }
1774 static INLINE unsigned
1775 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1776 {
1777 switch (sampler_type) {
1778 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1779 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1780 case NINED3DSTT_VOLUME:
1781 case NINED3DSTT_CUBE:
1782 default:
1783 assert(0);
1784 return TGSI_TEXTURE_UNKNOWN;
1785 }
1786 }
1787 static INLINE unsigned
1788 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1789 {
1790 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1791 case 1: return TGSI_TEXTURE_1D;
1792 case 0: return TGSI_TEXTURE_2D;
1793 case 3: return TGSI_TEXTURE_3D;
1794 default:
1795 return TGSI_TEXTURE_CUBE;
1796 }
1797 }
1798
1799 static const char *
1800 sm1_sampler_type_name(BYTE sampler_type)
1801 {
1802 switch (sampler_type) {
1803 case NINED3DSTT_1D: return "1D";
1804 case NINED3DSTT_2D: return "2D";
1805 case NINED3DSTT_VOLUME: return "VOLUME";
1806 case NINED3DSTT_CUBE: return "CUBE";
1807 default:
1808 return "(D3DSTT_?)";
1809 }
1810 }
1811
1812 static INLINE unsigned
1813 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1814 {
1815 switch (sem->Name) {
1816 case TGSI_SEMANTIC_POSITION:
1817 case TGSI_SEMANTIC_NORMAL:
1818 return TGSI_INTERPOLATE_LINEAR;
1819 case TGSI_SEMANTIC_BCOLOR:
1820 case TGSI_SEMANTIC_COLOR:
1821 case TGSI_SEMANTIC_FOG:
1822 case TGSI_SEMANTIC_GENERIC:
1823 case TGSI_SEMANTIC_TEXCOORD:
1824 case TGSI_SEMANTIC_CLIPDIST:
1825 case TGSI_SEMANTIC_CLIPVERTEX:
1826 return TGSI_INTERPOLATE_PERSPECTIVE;
1827 case TGSI_SEMANTIC_EDGEFLAG:
1828 case TGSI_SEMANTIC_FACE:
1829 case TGSI_SEMANTIC_INSTANCEID:
1830 case TGSI_SEMANTIC_PCOORD:
1831 case TGSI_SEMANTIC_PRIMID:
1832 case TGSI_SEMANTIC_PSIZE:
1833 case TGSI_SEMANTIC_VERTEXID:
1834 return TGSI_INTERPOLATE_CONSTANT;
1835 default:
1836 assert(0);
1837 return TGSI_INTERPOLATE_CONSTANT;
1838 }
1839 }
1840
1841 DECL_SPECIAL(DCL)
1842 {
1843 struct ureg_program *ureg = tx->ureg;
1844 boolean is_input;
1845 boolean is_sampler;
1846 struct tgsi_declaration_semantic tgsi;
1847 struct sm1_semantic sem;
1848 sm1_read_semantic(tx, &sem);
1849
1850 is_input = sem.reg.file == D3DSPR_INPUT;
1851 is_sampler =
1852 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1853
1854 DUMP("DCL ");
1855 sm1_dump_dst_param(&sem.reg);
1856 if (is_sampler)
1857 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1858 else
1859 if (tx->version.major >= 3)
1860 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1861 else
1862 if (sem.usage | sem.usage_idx)
1863 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1864 else
1865 DUMP("\n");
1866
1867 if (is_sampler) {
1868 const unsigned m = 1 << sem.reg.idx;
1869 ureg_DECL_sampler(ureg, sem.reg.idx);
1870 tx->info->sampler_mask |= m;
1871 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1872 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1873 d3dstt_to_tgsi_tex(sem.sampler_type);
1874 return D3D_OK;
1875 }
1876
1877 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1878 if (IS_VS) {
1879 if (is_input) {
1880 /* linkage outside of shader with vertex declaration */
1881 ureg_DECL_vs_input(ureg, sem.reg.idx);
1882 assert(sem.reg.idx < Elements(tx->info->input_map));
1883 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1884 tx->info->num_inputs = sem.reg.idx + 1;
1885 /* NOTE: preserving order in case of indirect access */
1886 } else
1887 if (tx->version.major >= 3) {
1888 /* SM2 output semantic determined by file */
1889 assert(sem.reg.mask != 0);
1890 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1891 tx->info->position_t = TRUE;
1892 assert(sem.reg.idx < Elements(tx->regs.o));
1893 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1894 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1895
1896 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1897 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1898 }
1899 } else {
1900 if (is_input && tx->version.major >= 3) {
1901 /* SM3 only, SM2 input semantic determined by file */
1902 assert(sem.reg.idx < Elements(tx->regs.v));
1903 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1904 ureg, tgsi.Name, tgsi.Index,
1905 nine_tgsi_to_interp_mode(&tgsi),
1906 0, /* cylwrap */
1907 sem.reg.mod & NINED3DSPDM_CENTROID);
1908 } else
1909 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1910 /* FragColor or FragDepth */
1911 assert(sem.reg.mask != 0);
1912 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1913 }
1914 }
1915 return D3D_OK;
1916 }
1917
1918 DECL_SPECIAL(DEF)
1919 {
1920 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1921 return D3D_OK;
1922 }
1923
1924 DECL_SPECIAL(DEFB)
1925 {
1926 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
1927 return D3D_OK;
1928 }
1929
1930 DECL_SPECIAL(DEFI)
1931 {
1932 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
1933 return D3D_OK;
1934 }
1935
1936 DECL_SPECIAL(NRM)
1937 {
1938 struct ureg_program *ureg = tx->ureg;
1939 struct ureg_dst tmp = tx_scratch_scalar(tx);
1940 struct ureg_src nrm = tx_src_scalar(tmp);
1941 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1942 ureg_DP3(ureg, tmp, src, src);
1943 ureg_RSQ(ureg, tmp, nrm);
1944 ureg_MUL(ureg, tx_dst_param(tx, &tx->insn.dst[0]), src, nrm);
1945 return D3D_OK;
1946 }
1947
1948 DECL_SPECIAL(DP2ADD)
1949 {
1950 struct ureg_dst tmp = tx_scratch_scalar(tx);
1951 struct ureg_src dp2 = tx_src_scalar(tmp);
1952 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1953 struct ureg_src src[3];
1954 int i;
1955 for (i = 0; i < 3; ++i)
1956 src[i] = tx_src_param(tx, &tx->insn.src[i]);
1957 assert_replicate_swizzle(&src[2]);
1958
1959 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
1960 ureg_ADD(tx->ureg, dst, src[2], dp2);
1961
1962 return D3D_OK;
1963 }
1964
1965 DECL_SPECIAL(TEXCOORD)
1966 {
1967 struct ureg_program *ureg = tx->ureg;
1968 const unsigned s = tx->insn.dst[0].idx;
1969 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1970
1971 if (ureg_src_is_undef(tx->regs.vT[s]))
1972 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1973 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
1974
1975 return D3D_OK;
1976 }
1977
1978 DECL_SPECIAL(TEXCOORD_ps14)
1979 {
1980 struct ureg_program *ureg = tx->ureg;
1981 const unsigned s = tx->insn.src[0].idx;
1982 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1983
1984 if (ureg_src_is_undef(tx->regs.vT[s]))
1985 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1986 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
1987
1988 return D3D_OK;
1989 }
1990
1991 DECL_SPECIAL(TEXKILL)
1992 {
1993 struct ureg_src reg;
1994
1995 if (tx->version.major > 1 || tx->version.minor > 3) {
1996 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
1997 } else {
1998 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
1999 reg = tx->regs.vT[tx->insn.dst[0].idx];
2000 }
2001 if (tx->version.major < 2)
2002 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2003 ureg_KILL_IF(tx->ureg, reg);
2004
2005 return D3D_OK;
2006 }
2007
2008 DECL_SPECIAL(TEXBEM)
2009 {
2010 STUB(D3DERR_INVALIDCALL);
2011 }
2012
2013 DECL_SPECIAL(TEXBEML)
2014 {
2015 STUB(D3DERR_INVALIDCALL);
2016 }
2017
2018 DECL_SPECIAL(TEXREG2AR)
2019 {
2020 STUB(D3DERR_INVALIDCALL);
2021 }
2022
2023 DECL_SPECIAL(TEXREG2GB)
2024 {
2025 STUB(D3DERR_INVALIDCALL);
2026 }
2027
2028 DECL_SPECIAL(TEXM3x2PAD)
2029 {
2030 STUB(D3DERR_INVALIDCALL);
2031 }
2032
2033 DECL_SPECIAL(TEXM3x2TEX)
2034 {
2035 STUB(D3DERR_INVALIDCALL);
2036 }
2037
2038 DECL_SPECIAL(TEXM3x3PAD)
2039 {
2040 return D3D_OK; /* this is just padding */
2041 }
2042
2043 DECL_SPECIAL(TEXM3x3SPEC)
2044 {
2045 STUB(D3DERR_INVALIDCALL);
2046 }
2047
2048 DECL_SPECIAL(TEXM3x3VSPEC)
2049 {
2050 STUB(D3DERR_INVALIDCALL);
2051 }
2052
2053 DECL_SPECIAL(TEXREG2RGB)
2054 {
2055 STUB(D3DERR_INVALIDCALL);
2056 }
2057
2058 DECL_SPECIAL(TEXDP3TEX)
2059 {
2060 STUB(D3DERR_INVALIDCALL);
2061 }
2062
2063 DECL_SPECIAL(TEXM3x2DEPTH)
2064 {
2065 STUB(D3DERR_INVALIDCALL);
2066 }
2067
2068 DECL_SPECIAL(TEXDP3)
2069 {
2070 STUB(D3DERR_INVALIDCALL);
2071 }
2072
2073 DECL_SPECIAL(TEXM3x3)
2074 {
2075 struct ureg_program *ureg = tx->ureg;
2076 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2077 struct ureg_src src[4];
2078 int s;
2079 const int m = tx->insn.dst[0].idx - 2;
2080 const int n = tx->insn.src[0].idx;
2081 assert(m >= 0 && m > n);
2082
2083 for (s = m; s <= (m + 2); ++s) {
2084 if (ureg_src_is_undef(tx->regs.vT[s]))
2085 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2086 src[s] = tx->regs.vT[s];
2087 }
2088 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), src[0], ureg_src(tx->regs.tS[n]));
2089 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), src[1], ureg_src(tx->regs.tS[n]));
2090 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), src[2], ureg_src(tx->regs.tS[n]));
2091
2092 switch (tx->insn.opcode) {
2093 case D3DSIO_TEXM3x3:
2094 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2095 break;
2096 case D3DSIO_TEXM3x3TEX:
2097 src[3] = ureg_DECL_sampler(ureg, m + 2);
2098 tx->info->sampler_mask |= 1 << (m + 2);
2099 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), src[3]);
2100 break;
2101 default:
2102 return D3DERR_INVALIDCALL;
2103 }
2104 return D3D_OK;
2105 }
2106
2107 DECL_SPECIAL(TEXDEPTH)
2108 {
2109 STUB(D3DERR_INVALIDCALL);
2110 }
2111
2112 DECL_SPECIAL(BEM)
2113 {
2114 STUB(D3DERR_INVALIDCALL);
2115 }
2116
2117 DECL_SPECIAL(TEXLD)
2118 {
2119 struct ureg_program *ureg = tx->ureg;
2120 unsigned target;
2121 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2122 struct ureg_src src[2] = {
2123 tx_src_param(tx, &tx->insn.src[0]),
2124 tx_src_param(tx, &tx->insn.src[1])
2125 };
2126 assert(tx->insn.src[1].idx >= 0 &&
2127 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2128 target = tx->sampler_targets[tx->insn.src[1].idx];
2129
2130 switch (tx->insn.flags) {
2131 case 0:
2132 ureg_TEX(ureg, dst, target, src[0], src[1]);
2133 break;
2134 case NINED3DSI_TEXLD_PROJECT:
2135 ureg_TXP(ureg, dst, target, src[0], src[1]);
2136 break;
2137 case NINED3DSI_TEXLD_BIAS:
2138 ureg_TXB(ureg, dst, target, src[0], src[1]);
2139 break;
2140 default:
2141 assert(0);
2142 return D3DERR_INVALIDCALL;
2143 }
2144 return D3D_OK;
2145 }
2146
2147 DECL_SPECIAL(TEXLD_14)
2148 {
2149 struct ureg_program *ureg = tx->ureg;
2150 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2151 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2152 const unsigned s = tx->insn.dst[0].idx;
2153 const unsigned t = ps1x_sampler_type(tx->info, s);
2154
2155 tx->info->sampler_mask |= 1 << s;
2156 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2157
2158 return D3D_OK;
2159 }
2160
2161 DECL_SPECIAL(TEX)
2162 {
2163 struct ureg_program *ureg = tx->ureg;
2164 const unsigned s = tx->insn.dst[0].idx;
2165 const unsigned t = ps1x_sampler_type(tx->info, s);
2166 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2167 struct ureg_src src[2];
2168
2169 if (ureg_src_is_undef(tx->regs.vT[s]))
2170 tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
2171
2172 src[0] = tx->regs.vT[s];
2173 src[1] = ureg_DECL_sampler(ureg, s);
2174 tx->info->sampler_mask |= 1 << s;
2175
2176 ureg_TEX(ureg, dst, t, src[0], src[1]);
2177
2178 return D3D_OK;
2179 }
2180
2181 DECL_SPECIAL(TEXLDD)
2182 {
2183 unsigned target;
2184 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2185 struct ureg_src src[4] = {
2186 tx_src_param(tx, &tx->insn.src[0]),
2187 tx_src_param(tx, &tx->insn.src[1]),
2188 tx_src_param(tx, &tx->insn.src[2]),
2189 tx_src_param(tx, &tx->insn.src[3])
2190 };
2191 assert(tx->insn.src[3].idx >= 0 &&
2192 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2193 target = tx->sampler_targets[tx->insn.src[1].idx];
2194
2195 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2196 return D3D_OK;
2197 }
2198
2199 DECL_SPECIAL(TEXLDL)
2200 {
2201 unsigned target;
2202 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2203 struct ureg_src src[2] = {
2204 tx_src_param(tx, &tx->insn.src[0]),
2205 tx_src_param(tx, &tx->insn.src[1])
2206 };
2207 assert(tx->insn.src[3].idx >= 0 &&
2208 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2209 target = tx->sampler_targets[tx->insn.src[1].idx];
2210
2211 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2212 return D3D_OK;
2213 }
2214
2215 DECL_SPECIAL(SETP)
2216 {
2217 STUB(D3DERR_INVALIDCALL);
2218 }
2219
2220 DECL_SPECIAL(BREAKP)
2221 {
2222 STUB(D3DERR_INVALIDCALL);
2223 }
2224
2225 DECL_SPECIAL(PHASE)
2226 {
2227 return D3D_OK; /* we don't care about phase */
2228 }
2229
2230 DECL_SPECIAL(COMMENT)
2231 {
2232 return D3D_OK; /* nothing to do */
2233 }
2234
2235
2236 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2237 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2238
2239 struct sm1_op_info inst_table[] =
2240 {
2241 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2242 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2243 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2244 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2245 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2246 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2247 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2248 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2249 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 7 */
2250 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2251 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2252 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2253 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2254 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2255 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2256 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2257 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 15 */
2258 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2259 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2260 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2261 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2262
2263 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2264 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2265 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2266 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2267 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2268
2269 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALL)),
2270 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALLNZ)),
2271 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2272 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2273 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2274 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(LABEL)),
2275
2276 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2277
2278 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL),
2279 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2280 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2281 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2282 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2283
2284 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2285 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2286
2287 /* More flow control */
2288 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2289 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2290 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2291 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2292 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2293 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2294 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2295 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2296
2297 _OPI(MOVA, ARR, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2298
2299 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2300 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2301
2302 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2303 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2304 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2305 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2306 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2307 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2308 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEM)),
2309 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEML)),
2310 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2AR)),
2311 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2GB)),
2312 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2PAD)),
2313 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2TEX)),
2314 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3PAD)),
2315 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2316 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3SPEC)),
2317 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3VSPEC)),
2318
2319 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2320 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2321 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2322 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2323
2324 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2325
2326 /* More tex stuff */
2327 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXREG2RGB)),
2328 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3TEX)),
2329 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 0, 0, SPECIAL(TEXM3x2DEPTH)),
2330 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3)),
2331 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2332 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(TEXDEPTH)),
2333
2334 /* Misc */
2335 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2336 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(BEM)),
2337 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2338 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2339 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2340 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2341 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(SETP)),
2342 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2343 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(BREAKP))
2344 };
2345
2346 struct sm1_op_info inst_phase =
2347 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2348
2349 struct sm1_op_info inst_comment =
2350 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2351
2352 static void
2353 create_op_info_map(struct shader_translator *tx)
2354 {
2355 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2356 unsigned i;
2357
2358 for (i = 0; i < Elements(tx->op_info_map); ++i)
2359 tx->op_info_map[i] = -1;
2360
2361 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2362 for (i = 0; i < Elements(inst_table); ++i) {
2363 assert(inst_table[i].sio < Elements(tx->op_info_map));
2364 if (inst_table[i].vert_version.min <= version &&
2365 inst_table[i].vert_version.max >= version)
2366 tx->op_info_map[inst_table[i].sio] = i;
2367 }
2368 } else {
2369 for (i = 0; i < Elements(inst_table); ++i) {
2370 assert(inst_table[i].sio < Elements(tx->op_info_map));
2371 if (inst_table[i].frag_version.min <= version &&
2372 inst_table[i].frag_version.max >= version)
2373 tx->op_info_map[inst_table[i].sio] = i;
2374 }
2375 }
2376 }
2377
2378 static INLINE HRESULT
2379 NineTranslateInstruction_Generic(struct shader_translator *tx)
2380 {
2381 struct ureg_dst dst[1];
2382 struct ureg_src src[4];
2383 unsigned i;
2384
2385 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2386 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2387 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2388 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2389
2390 ureg_insn(tx->ureg, tx->insn.info->opcode,
2391 dst, tx->insn.ndst,
2392 src, tx->insn.nsrc);
2393 return D3D_OK;
2394 }
2395
2396 static INLINE DWORD
2397 TOKEN_PEEK(struct shader_translator *tx)
2398 {
2399 return *(tx->parse);
2400 }
2401
2402 static INLINE DWORD
2403 TOKEN_NEXT(struct shader_translator *tx)
2404 {
2405 return *(tx->parse)++;
2406 }
2407
2408 static INLINE void
2409 TOKEN_JUMP(struct shader_translator *tx)
2410 {
2411 if (tx->parse_next && tx->parse != tx->parse_next) {
2412 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2413 tx->parse = tx->parse_next;
2414 }
2415 }
2416
2417 static INLINE boolean
2418 sm1_parse_eof(struct shader_translator *tx)
2419 {
2420 return TOKEN_PEEK(tx) == NINED3DSP_END;
2421 }
2422
2423 static void
2424 sm1_read_version(struct shader_translator *tx)
2425 {
2426 const DWORD tok = TOKEN_NEXT(tx);
2427
2428 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2429 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2430
2431 switch (tok >> 16) {
2432 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2433 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2434 default:
2435 DBG("Invalid shader type: %x\n", tok);
2436 tx->processor = ~0;
2437 break;
2438 }
2439 }
2440
2441 /* This is just to check if we parsed the instruction properly. */
2442 static void
2443 sm1_parse_get_skip(struct shader_translator *tx)
2444 {
2445 const DWORD tok = TOKEN_PEEK(tx);
2446
2447 if (tx->version.major >= 2) {
2448 tx->parse_next = tx->parse + 1 /* this */ +
2449 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2450 } else {
2451 tx->parse_next = NULL; /* TODO: determine from param count */
2452 }
2453 }
2454
2455 static void
2456 sm1_print_comment(const char *comment, UINT size)
2457 {
2458 if (!size)
2459 return;
2460 /* TODO */
2461 }
2462
2463 static void
2464 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2465 {
2466 DWORD tok = TOKEN_PEEK(tx);
2467
2468 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2469 {
2470 const char *comment = "";
2471 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2472 tx->parse += size + 1;
2473
2474 if (print)
2475 sm1_print_comment(comment, size);
2476
2477 tok = TOKEN_PEEK(tx);
2478 }
2479 }
2480
2481 static void
2482 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2483 {
2484 *reg = TOKEN_NEXT(tx);
2485
2486 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2487 {
2488 if (tx->version.major < 2)
2489 *rel = (1 << 31) |
2490 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2491 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2492 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2493 else
2494 *rel = TOKEN_NEXT(tx);
2495 }
2496 }
2497
2498 static void
2499 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2500 {
2501 uint8_t shift;
2502 dst->file =
2503 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2504 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2505 dst->type = TGSI_RETURN_TYPE_FLOAT;
2506 dst->idx = tok & D3DSP_REGNUM_MASK;
2507 dst->rel = NULL;
2508 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2509 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2510 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2511 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2512 }
2513
2514 static void
2515 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2516 {
2517 src->file =
2518 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2519 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2520 src->type = TGSI_RETURN_TYPE_FLOAT;
2521 src->idx = tok & D3DSP_REGNUM_MASK;
2522 src->rel = NULL;
2523 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2524 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2525
2526 switch (src->file) {
2527 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2528 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2529 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2530 default:
2531 break;
2532 }
2533 }
2534
2535 static void
2536 sm1_parse_immediate(struct shader_translator *tx,
2537 struct sm1_src_param *imm)
2538 {
2539 imm->file = NINED3DSPR_IMMEDIATE;
2540 imm->idx = INT_MIN;
2541 imm->rel = NULL;
2542 imm->swizzle = NINED3DSP_NOSWIZZLE;
2543 imm->mod = 0;
2544 switch (tx->insn.opcode) {
2545 case D3DSIO_DEF:
2546 imm->type = NINED3DSPTYPE_FLOAT4;
2547 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2548 tx->parse += 4;
2549 break;
2550 case D3DSIO_DEFI:
2551 imm->type = NINED3DSPTYPE_INT4;
2552 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2553 tx->parse += 4;
2554 break;
2555 case D3DSIO_DEFB:
2556 imm->type = NINED3DSPTYPE_BOOL;
2557 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2558 tx->parse += 1;
2559 break;
2560 default:
2561 assert(0);
2562 break;
2563 }
2564 }
2565
2566 static void
2567 sm1_read_dst_param(struct shader_translator *tx,
2568 struct sm1_dst_param *dst,
2569 struct sm1_src_param *rel)
2570 {
2571 DWORD tok_dst, tok_rel = 0;
2572
2573 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2574 sm1_parse_dst_param(dst, tok_dst);
2575 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2576 sm1_parse_src_param(rel, tok_rel);
2577 dst->rel = rel;
2578 }
2579 }
2580
2581 static void
2582 sm1_read_src_param(struct shader_translator *tx,
2583 struct sm1_src_param *src,
2584 struct sm1_src_param *rel)
2585 {
2586 DWORD tok_src, tok_rel = 0;
2587
2588 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2589 sm1_parse_src_param(src, tok_src);
2590 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2591 assert(rel);
2592 sm1_parse_src_param(rel, tok_rel);
2593 src->rel = rel;
2594 }
2595 }
2596
2597 static void
2598 sm1_read_semantic(struct shader_translator *tx,
2599 struct sm1_semantic *sem)
2600 {
2601 const DWORD tok_usg = TOKEN_NEXT(tx);
2602 const DWORD tok_dst = TOKEN_NEXT(tx);
2603
2604 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2605 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2606 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2607
2608 sm1_parse_dst_param(&sem->reg, tok_dst);
2609 }
2610
2611 static void
2612 sm1_parse_instruction(struct shader_translator *tx)
2613 {
2614 struct sm1_instruction *insn = &tx->insn;
2615 DWORD tok;
2616 struct sm1_op_info *info = NULL;
2617 unsigned i;
2618
2619 sm1_parse_comments(tx, TRUE);
2620 sm1_parse_get_skip(tx);
2621
2622 tok = TOKEN_NEXT(tx);
2623
2624 insn->opcode = tok & D3DSI_OPCODE_MASK;
2625 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2626 insn->coissue = !!(tok & D3DSI_COISSUE);
2627 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2628
2629 if (insn->opcode < Elements(tx->op_info_map)) {
2630 int k = tx->op_info_map[insn->opcode];
2631 if (k >= 0) {
2632 assert(k < Elements(inst_table));
2633 info = &inst_table[k];
2634 }
2635 } else {
2636 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2637 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2638 }
2639 if (!info) {
2640 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2641 TOKEN_JUMP(tx);
2642 return;
2643 }
2644 insn->info = info;
2645 insn->ndst = info->ndst;
2646 insn->nsrc = info->nsrc;
2647
2648 assert(!insn->predicated && "TODO: predicated instructions");
2649
2650 /* check version */
2651 {
2652 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2653 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2654 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2655 if (ver < min || ver > max) {
2656 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2657 min, ver, max);
2658 return;
2659 }
2660 }
2661
2662 for (i = 0; i < insn->ndst; ++i)
2663 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2664 if (insn->predicated)
2665 sm1_read_src_param(tx, &insn->pred, NULL);
2666 for (i = 0; i < insn->nsrc; ++i)
2667 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2668
2669 /* parse here so we can dump them before processing */
2670 if (insn->opcode == D3DSIO_DEF ||
2671 insn->opcode == D3DSIO_DEFI ||
2672 insn->opcode == D3DSIO_DEFB)
2673 sm1_parse_immediate(tx, &tx->insn.src[0]);
2674
2675 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2676 sm1_instruction_check(insn);
2677
2678 if (info->handler)
2679 info->handler(tx);
2680 else
2681 NineTranslateInstruction_Generic(tx);
2682 tx_apply_dst0_modifiers(tx);
2683
2684 tx->num_scratch = 0; /* reset */
2685
2686 TOKEN_JUMP(tx);
2687 }
2688
2689 static void
2690 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2691 {
2692 unsigned i;
2693
2694 tx->info = info;
2695
2696 tx->byte_code = info->byte_code;
2697 tx->parse = info->byte_code;
2698
2699 for (i = 0; i < Elements(info->input_map); ++i)
2700 info->input_map[i] = NINE_DECLUSAGE_NONE;
2701 info->num_inputs = 0;
2702
2703 info->position_t = FALSE;
2704 info->point_size = FALSE;
2705
2706 tx->info->const_used_size = 0;
2707
2708 info->sampler_mask = 0x0;
2709 info->rt_mask = 0x0;
2710
2711 info->lconstf.data = NULL;
2712 info->lconstf.ranges = NULL;
2713
2714 for (i = 0; i < Elements(tx->regs.aL); ++i) {
2715 tx->regs.aL[i] = ureg_dst_undef();
2716 tx->regs.rL[i] = ureg_dst_undef();
2717 }
2718 tx->regs.a = ureg_dst_undef();
2719 tx->regs.p = ureg_dst_undef();
2720 tx->regs.oDepth = ureg_dst_undef();
2721 tx->regs.vPos = ureg_src_undef();
2722 tx->regs.vFace = ureg_src_undef();
2723 for (i = 0; i < Elements(tx->regs.o); ++i)
2724 tx->regs.o[i] = ureg_dst_undef();
2725 for (i = 0; i < Elements(tx->regs.oCol); ++i)
2726 tx->regs.oCol[i] = ureg_dst_undef();
2727 for (i = 0; i < Elements(tx->regs.vC); ++i)
2728 tx->regs.vC[i] = ureg_src_undef();
2729 for (i = 0; i < Elements(tx->regs.vT); ++i)
2730 tx->regs.vT[i] = ureg_src_undef();
2731
2732 for (i = 0; i < Elements(tx->lconsti); ++i)
2733 tx->lconsti[i].idx = -1;
2734 for (i = 0; i < Elements(tx->lconstb); ++i)
2735 tx->lconstb[i].idx = -1;
2736
2737 sm1_read_version(tx);
2738
2739 info->version = (tx->version.major << 4) | tx->version.minor;
2740
2741 create_op_info_map(tx);
2742 }
2743
2744 static void
2745 tx_dtor(struct shader_translator *tx)
2746 {
2747 if (tx->num_inst_labels)
2748 FREE(tx->inst_labels);
2749 FREE(tx->lconstf);
2750 FREE(tx->regs.r);
2751 FREE(tx);
2752 }
2753
2754 static INLINE unsigned
2755 tgsi_processor_from_type(unsigned shader_type)
2756 {
2757 switch (shader_type) {
2758 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
2759 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
2760 default:
2761 return ~0;
2762 }
2763 }
2764
2765 #define GET_CAP(n) device->screen->get_param( \
2766 device->screen, PIPE_CAP_##n)
2767 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
2768 device->screen, info->type, PIPE_SHADER_CAP_##n)
2769
2770 HRESULT
2771 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
2772 {
2773 struct shader_translator *tx;
2774 HRESULT hr = D3D_OK;
2775 const unsigned processor = tgsi_processor_from_type(info->type);
2776
2777 user_assert(processor != ~0, D3DERR_INVALIDCALL);
2778
2779 tx = CALLOC_STRUCT(shader_translator);
2780 if (!tx)
2781 return E_OUTOFMEMORY;
2782 tx_ctor(tx, info);
2783
2784 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
2785 hr = D3DERR_INVALIDCALL;
2786 DBG("Unsupported shader version: %u.%u !\n",
2787 tx->version.major, tx->version.minor);
2788 goto out;
2789 }
2790 if (tx->processor != processor) {
2791 hr = D3DERR_INVALIDCALL;
2792 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
2793 goto out;
2794 }
2795 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
2796 tx->version.major, tx->version.minor);
2797
2798 tx->ureg = ureg_create(processor);
2799 if (!tx->ureg) {
2800 hr = E_OUTOFMEMORY;
2801 goto out;
2802 }
2803 tx_decl_constants(tx);
2804
2805 tx->native_integers = GET_SHADER_CAP(INTEGERS);
2806 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
2807 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
2808 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
2809 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2810 tx->texcoord_sn = tx->want_texcoord ?
2811 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
2812
2813 /* VS must always write position. Declare it here to make it the 1st output.
2814 * (Some drivers like nv50 are buggy and rely on that.)
2815 */
2816 if (IS_VS) {
2817 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
2818 } else {
2819 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
2820 if (!tx->shift_wpos)
2821 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2822 }
2823
2824 while (!sm1_parse_eof(tx))
2825 sm1_parse_instruction(tx);
2826 tx->parse++; /* for byte_size */
2827
2828 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
2829 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
2830 ureg_src(tx->regs.r[0]));
2831 info->rt_mask |= 0x1;
2832 }
2833
2834 if (info->position_t)
2835 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
2836
2837 ureg_END(tx->ureg);
2838
2839 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
2840 info->point_size = TRUE;
2841
2842 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
2843 unsigned count;
2844 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
2845 tgsi_dump(toks, 0);
2846 ureg_free_tokens(toks);
2847 }
2848
2849 /* record local constants */
2850 if (tx->num_lconstf && tx->indirect_const_access) {
2851 struct nine_range *ranges;
2852 float *data;
2853 int *indices;
2854 unsigned i, k, n;
2855
2856 hr = E_OUTOFMEMORY;
2857
2858 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
2859 if (!data)
2860 goto out;
2861 info->lconstf.data = data;
2862
2863 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
2864 if (!indices)
2865 goto out;
2866
2867 /* lazy sort, num_lconstf should be small */
2868 for (n = 0; n < tx->num_lconstf; ++n) {
2869 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
2870 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
2871 k = i;
2872 }
2873 indices[n] = tx->lconstf[k].idx;
2874 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
2875 tx->lconstf[k].idx = INT_MAX;
2876 }
2877
2878 /* count ranges */
2879 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
2880 if (indices[i] != indices[i - 1] + 1)
2881 ++n;
2882 ranges = MALLOC(n * sizeof(ranges[0]));
2883 if (!ranges) {
2884 FREE(indices);
2885 goto out;
2886 }
2887 info->lconstf.ranges = ranges;
2888
2889 k = 0;
2890 ranges[k].bgn = indices[0];
2891 for (i = 1; i < tx->num_lconstf; ++i) {
2892 if (indices[i] != indices[i - 1] + 1) {
2893 ranges[k].next = &ranges[k + 1];
2894 ranges[k].end = indices[i - 1] + 1;
2895 ++k;
2896 ranges[k].bgn = indices[i];
2897 }
2898 }
2899 ranges[k].end = indices[i - 1] + 1;
2900 ranges[k].next = NULL;
2901 assert(n == (k + 1));
2902
2903 FREE(indices);
2904 hr = D3D_OK;
2905 }
2906
2907 if (tx->indirect_const_access)
2908 info->const_used_size = ~0;
2909
2910 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
2911 if (!info->cso) {
2912 hr = D3DERR_DRIVERINTERNALERROR;
2913 FREE(info->lconstf.data);
2914 FREE(info->lconstf.ranges);
2915 goto out;
2916 }
2917
2918 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
2919 out:
2920 tx_dtor(tx);
2921 return hr;
2922 }