st/nine: Clamp ps 1.X constants
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
39
40
41 struct shader_translator;
42
43 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
44
45 static INLINE const char *d3dsio_to_string(unsigned opcode);
46
47
48 #define NINED3D_SM1_VS 0xfffe
49 #define NINED3D_SM1_PS 0xffff
50
51 #define NINE_MAX_COND_DEPTH 64
52 #define NINE_MAX_LOOP_DEPTH 64
53
54 #define NINED3DSP_END 0x0000ffff
55
56 #define NINED3DSPTYPE_FLOAT4 0
57 #define NINED3DSPTYPE_INT4 1
58 #define NINED3DSPTYPE_BOOL 2
59
60 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
61
62 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
63 #define NINED3DSP_WRITEMASK_SHIFT 16
64
65 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
66
67 #define NINED3DSHADER_REL_OP_GT 1
68 #define NINED3DSHADER_REL_OP_EQ 2
69 #define NINED3DSHADER_REL_OP_GE 3
70 #define NINED3DSHADER_REL_OP_LT 4
71 #define NINED3DSHADER_REL_OP_NE 5
72 #define NINED3DSHADER_REL_OP_LE 6
73
74 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
75 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
76
77 #define NINED3DSI_TEXLD_PROJECT 0x1
78 #define NINED3DSI_TEXLD_BIAS 0x2
79
80 #define NINED3DSP_WRITEMASK_0 0x1
81 #define NINED3DSP_WRITEMASK_1 0x2
82 #define NINED3DSP_WRITEMASK_2 0x4
83 #define NINED3DSP_WRITEMASK_3 0x8
84 #define NINED3DSP_WRITEMASK_ALL 0xf
85
86 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
87
88 #define NINE_SWIZZLE4(x,y,z,w) \
89 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
90
91 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
92 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
94
95 /*
96 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
97 * BIAS <= PS 1.4 (x-0.5)
98 * BIASNEG <= PS 1.4 (-(x-0.5))
99 * SIGN <= PS 1.4 (2(x-0.5))
100 * SIGNNEG <= PS 1.4 (-2(x-0.5))
101 * COMP <= PS 1.4 (1-x)
102 * X2 = PS 1.4 (2x)
103 * X2NEG = PS 1.4 (-2x)
104 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
105 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
106 * ABS >= SM 3.0 (abs(x))
107 * ABSNEG >= SM 3.0 (-abs(x))
108 * NOT >= SM 2.0 pedication only
109 */
110 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
124
125 static const char *sm1_mod_str[] =
126 {
127 [NINED3DSPSM_NONE] = "",
128 [NINED3DSPSM_NEG] = "-",
129 [NINED3DSPSM_BIAS] = "bias",
130 [NINED3DSPSM_BIASNEG] = "biasneg",
131 [NINED3DSPSM_SIGN] = "sign",
132 [NINED3DSPSM_SIGNNEG] = "signneg",
133 [NINED3DSPSM_COMP] = "comp",
134 [NINED3DSPSM_X2] = "x2",
135 [NINED3DSPSM_X2NEG] = "x2neg",
136 [NINED3DSPSM_DZ] = "dz",
137 [NINED3DSPSM_DW] = "dw",
138 [NINED3DSPSM_ABS] = "abs",
139 [NINED3DSPSM_ABSNEG] = "-abs",
140 [NINED3DSPSM_NOT] = "not"
141 };
142
143 static void
144 sm1_dump_writemask(BYTE mask)
145 {
146 if (mask & 1) DUMP("x"); else DUMP("_");
147 if (mask & 2) DUMP("y"); else DUMP("_");
148 if (mask & 4) DUMP("z"); else DUMP("_");
149 if (mask & 8) DUMP("w"); else DUMP("_");
150 }
151
152 static void
153 sm1_dump_swizzle(BYTE s)
154 {
155 char c[4] = { 'x', 'y', 'z', 'w' };
156 DUMP("%c%c%c%c",
157 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
158 }
159
160 static const char sm1_file_char[] =
161 {
162 [D3DSPR_TEMP] = 'r',
163 [D3DSPR_INPUT] = 'v',
164 [D3DSPR_CONST] = 'c',
165 [D3DSPR_ADDR] = 'A',
166 [D3DSPR_RASTOUT] = 'R',
167 [D3DSPR_ATTROUT] = 'D',
168 [D3DSPR_OUTPUT] = 'o',
169 [D3DSPR_CONSTINT] = 'I',
170 [D3DSPR_COLOROUT] = 'C',
171 [D3DSPR_DEPTHOUT] = 'D',
172 [D3DSPR_SAMPLER] = 's',
173 [D3DSPR_CONST2] = 'c',
174 [D3DSPR_CONST3] = 'c',
175 [D3DSPR_CONST4] = 'c',
176 [D3DSPR_CONSTBOOL] = 'B',
177 [D3DSPR_LOOP] = 'L',
178 [D3DSPR_TEMPFLOAT16] = 'h',
179 [D3DSPR_MISCTYPE] = 'M',
180 [D3DSPR_LABEL] = 'X',
181 [D3DSPR_PREDICATE] = 'p'
182 };
183
184 static void
185 sm1_dump_reg(BYTE file, INT index)
186 {
187 switch (file) {
188 case D3DSPR_LOOP:
189 DUMP("aL");
190 break;
191 case D3DSPR_COLOROUT:
192 DUMP("oC%i", index);
193 break;
194 case D3DSPR_DEPTHOUT:
195 DUMP("oDepth");
196 break;
197 case D3DSPR_RASTOUT:
198 DUMP("oRast%i", index);
199 break;
200 case D3DSPR_CONSTINT:
201 DUMP("iconst[%i]", index);
202 break;
203 case D3DSPR_CONSTBOOL:
204 DUMP("bconst[%i]", index);
205 break;
206 default:
207 DUMP("%c%i", sm1_file_char[file], index);
208 break;
209 }
210 }
211
212 struct sm1_src_param
213 {
214 INT idx;
215 struct sm1_src_param *rel;
216 BYTE file;
217 BYTE swizzle;
218 BYTE mod;
219 BYTE type;
220 union {
221 DWORD d[4];
222 float f[4];
223 int i[4];
224 BOOL b;
225 } imm;
226 };
227 static void
228 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
229
230 struct sm1_dst_param
231 {
232 INT idx;
233 struct sm1_src_param *rel;
234 BYTE file;
235 BYTE mask;
236 BYTE mod;
237 int8_t shift; /* sint4 */
238 BYTE type;
239 };
240
241 static INLINE void
242 assert_replicate_swizzle(const struct ureg_src *reg)
243 {
244 assert(reg->SwizzleY == reg->SwizzleX &&
245 reg->SwizzleZ == reg->SwizzleX &&
246 reg->SwizzleW == reg->SwizzleX);
247 }
248
249 static void
250 sm1_dump_immediate(const struct sm1_src_param *param)
251 {
252 switch (param->type) {
253 case NINED3DSPTYPE_FLOAT4:
254 DUMP("{ %f %f %f %f }",
255 param->imm.f[0], param->imm.f[1],
256 param->imm.f[2], param->imm.f[3]);
257 break;
258 case NINED3DSPTYPE_INT4:
259 DUMP("{ %i %i %i %i }",
260 param->imm.i[0], param->imm.i[1],
261 param->imm.i[2], param->imm.i[3]);
262 break;
263 case NINED3DSPTYPE_BOOL:
264 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
265 break;
266 default:
267 assert(0);
268 break;
269 }
270 }
271
272 static void
273 sm1_dump_src_param(const struct sm1_src_param *param)
274 {
275 if (param->file == NINED3DSPR_IMMEDIATE) {
276 assert(!param->mod &&
277 !param->rel &&
278 param->swizzle == NINED3DSP_NOSWIZZLE);
279 sm1_dump_immediate(param);
280 return;
281 }
282
283 if (param->mod)
284 DUMP("%s(", sm1_mod_str[param->mod]);
285 if (param->rel) {
286 DUMP("%c[", sm1_file_char[param->file]);
287 sm1_dump_src_param(param->rel);
288 DUMP("+%i]", param->idx);
289 } else {
290 sm1_dump_reg(param->file, param->idx);
291 }
292 if (param->mod)
293 DUMP(")");
294 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
295 DUMP(".");
296 sm1_dump_swizzle(param->swizzle);
297 }
298 }
299
300 static void
301 sm1_dump_dst_param(const struct sm1_dst_param *param)
302 {
303 if (param->mod & NINED3DSPDM_SATURATE)
304 DUMP("sat ");
305 if (param->mod & NINED3DSPDM_PARTIALP)
306 DUMP("pp ");
307 if (param->mod & NINED3DSPDM_CENTROID)
308 DUMP("centroid ");
309 if (param->shift < 0)
310 DUMP("/%u ", 1 << -param->shift);
311 if (param->shift > 0)
312 DUMP("*%u ", 1 << param->shift);
313
314 if (param->rel) {
315 DUMP("%c[", sm1_file_char[param->file]);
316 sm1_dump_src_param(param->rel);
317 DUMP("+%i]", param->idx);
318 } else {
319 sm1_dump_reg(param->file, param->idx);
320 }
321 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
322 DUMP(".");
323 sm1_dump_writemask(param->mask);
324 }
325 }
326
327 struct sm1_semantic
328 {
329 struct sm1_dst_param reg;
330 BYTE sampler_type;
331 D3DDECLUSAGE usage;
332 BYTE usage_idx;
333 };
334
335 struct sm1_op_info
336 {
337 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
338 * should be ignored completely */
339 unsigned sio;
340 unsigned opcode; /* TGSI_OPCODE_x */
341
342 /* versions are still set even handler is set */
343 struct {
344 unsigned min;
345 unsigned max;
346 } vert_version, frag_version;
347
348 /* number of regs parsed outside of special handler */
349 unsigned ndst;
350 unsigned nsrc;
351
352 /* some instructions don't map perfectly, so use a special handler */
353 translate_instruction_func handler;
354 };
355
356 struct sm1_instruction
357 {
358 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
359 BYTE flags;
360 BOOL coissue;
361 BOOL predicated;
362 BYTE ndst;
363 BYTE nsrc;
364 struct sm1_src_param src[4];
365 struct sm1_src_param src_rel[4];
366 struct sm1_src_param pred;
367 struct sm1_src_param dst_rel[1];
368 struct sm1_dst_param dst[1];
369
370 struct sm1_op_info *info;
371 };
372
373 static void
374 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
375 {
376 unsigned i;
377
378 /* no info stored for these: */
379 if (insn->opcode == D3DSIO_DCL)
380 return;
381 for (i = 0; i < indent; ++i)
382 DUMP(" ");
383
384 if (insn->predicated) {
385 DUMP("@");
386 sm1_dump_src_param(&insn->pred);
387 DUMP(" ");
388 }
389 DUMP("%s", d3dsio_to_string(insn->opcode));
390 if (insn->flags) {
391 switch (insn->opcode) {
392 case D3DSIO_TEX:
393 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
394 break;
395 default:
396 DUMP("_%x", insn->flags);
397 break;
398 }
399 }
400 if (insn->coissue)
401 DUMP("_co");
402 DUMP(" ");
403
404 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
405 sm1_dump_dst_param(&insn->dst[i]);
406 DUMP(" ");
407 }
408
409 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
410 sm1_dump_src_param(&insn->src[i]);
411 DUMP(" ");
412 }
413 if (insn->opcode == D3DSIO_DEF ||
414 insn->opcode == D3DSIO_DEFI ||
415 insn->opcode == D3DSIO_DEFB)
416 sm1_dump_immediate(&insn->src[0]);
417
418 DUMP("\n");
419 }
420
421 struct sm1_local_const
422 {
423 INT idx;
424 struct ureg_src reg;
425 union {
426 boolean b;
427 float f[4];
428 int32_t i[4];
429 } imm;
430 };
431
432 struct shader_translator
433 {
434 const DWORD *byte_code;
435 const DWORD *parse;
436 const DWORD *parse_next;
437
438 struct ureg_program *ureg;
439
440 /* shader version */
441 struct {
442 BYTE major;
443 BYTE minor;
444 } version;
445 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
446
447 boolean native_integers;
448 boolean inline_subroutines;
449 boolean lower_preds;
450 boolean want_texcoord;
451 boolean shift_wpos;
452 unsigned texcoord_sn;
453
454 struct sm1_instruction insn; /* current instruction */
455
456 struct {
457 struct ureg_dst *r;
458 struct ureg_dst oPos;
459 struct ureg_dst oFog;
460 struct ureg_dst oPts;
461 struct ureg_dst oCol[4];
462 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
463 struct ureg_dst oDepth;
464 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
465 struct ureg_src vPos;
466 struct ureg_src vFace;
467 struct ureg_src s;
468 struct ureg_dst p;
469 struct ureg_dst address;
470 struct ureg_dst a0;
471 struct ureg_dst tS[8]; /* texture stage registers */
472 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
473 struct ureg_dst t[5]; /* scratch TEMPs */
474 struct ureg_src vC[2]; /* PS color in */
475 struct ureg_src vT[8]; /* PS texcoord in */
476 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
477 } regs;
478 unsigned num_temp; /* Elements(regs.r) */
479 unsigned num_scratch;
480 unsigned loop_depth;
481 unsigned loop_depth_max;
482 unsigned cond_depth;
483 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
484 unsigned cond_labels[NINE_MAX_COND_DEPTH];
485 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
486
487 unsigned *inst_labels; /* LABEL op */
488 unsigned num_inst_labels;
489
490 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
491
492 struct sm1_local_const *lconstf;
493 unsigned num_lconstf;
494 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
495 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
496
497 boolean indirect_const_access;
498
499 struct nine_shader_info *info;
500
501 int16_t op_info_map[D3DSIO_BREAKP + 1];
502 };
503
504 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
505 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
506
507 static void
508 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
509
510 static void
511 sm1_instruction_check(const struct sm1_instruction *insn)
512 {
513 if (insn->opcode == D3DSIO_CRS)
514 {
515 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
516 {
517 DBG("CRS.mask.w\n");
518 }
519 }
520 }
521
522 static boolean
523 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
524 {
525 INT i;
526 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
527 for (i = 0; i < tx->num_lconstf; ++i) {
528 if (tx->lconstf[i].idx == index) {
529 *src = tx->lconstf[i].reg;
530 return TRUE;
531 }
532 }
533 return FALSE;
534 }
535 static boolean
536 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
537 {
538 assert(index >= 0 && index < NINE_MAX_CONST_I);
539 if (tx->lconsti[index].idx == index)
540 *src = tx->lconsti[index].reg;
541 return tx->lconsti[index].idx == index;
542 }
543 static boolean
544 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
545 {
546 assert(index >= 0 && index < NINE_MAX_CONST_B);
547 if (tx->lconstb[index].idx == index)
548 *src = tx->lconstb[index].reg;
549 return tx->lconstb[index].idx == index;
550 }
551
552 static void
553 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
554 {
555 unsigned n;
556
557 /* Anno1404 sets out of range constants. */
558 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
559 if (index >= NINE_MAX_CONST_F)
560 WARN("lconstf index %i too high, indirect access won't work\n", index);
561
562 for (n = 0; n < tx->num_lconstf; ++n)
563 if (tx->lconstf[n].idx == index)
564 break;
565 if (n == tx->num_lconstf) {
566 if ((n % 8) == 0) {
567 tx->lconstf = REALLOC(tx->lconstf,
568 (n + 0) * sizeof(tx->lconstf[0]),
569 (n + 8) * sizeof(tx->lconstf[0]));
570 assert(tx->lconstf);
571 }
572 tx->num_lconstf++;
573 }
574 tx->lconstf[n].idx = index;
575 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
576
577 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
578 }
579 static void
580 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
581 {
582 assert(index >= 0 && index < NINE_MAX_CONST_I);
583 tx->lconsti[index].idx = index;
584 tx->lconsti[index].reg = tx->native_integers ?
585 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
586 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
587 }
588 static void
589 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
590 {
591 assert(index >= 0 && index < NINE_MAX_CONST_B);
592 tx->lconstb[index].idx = index;
593 tx->lconstb[index].reg = tx->native_integers ?
594 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
595 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
596 }
597
598 static INLINE struct ureg_dst
599 tx_scratch(struct shader_translator *tx)
600 {
601 assert(tx->num_scratch < Elements(tx->regs.t));
602 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
603 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
604 return tx->regs.t[tx->num_scratch++];
605 }
606
607 static INLINE struct ureg_dst
608 tx_scratch_scalar(struct shader_translator *tx)
609 {
610 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
611 }
612
613 static INLINE struct ureg_src
614 tx_src_scalar(struct ureg_dst dst)
615 {
616 struct ureg_src src = ureg_src(dst);
617 int c = ffs(dst.WriteMask) - 1;
618 if (dst.WriteMask == (1 << c))
619 src = ureg_scalar(src, c);
620 return src;
621 }
622
623 /* Need to declare all constants if indirect addressing is used,
624 * otherwise we could scan the shader to determine the maximum.
625 * TODO: It doesn't really matter for nv50 so I won't do the scan,
626 * but radeon drivers might care, if they don't infer it from TGSI.
627 */
628 static void
629 tx_decl_constants(struct shader_translator *tx)
630 {
631 unsigned i, n = 0;
632
633 for (i = 0; i < NINE_MAX_CONST_F; ++i)
634 ureg_DECL_constant(tx->ureg, n++);
635 for (i = 0; i < NINE_MAX_CONST_I; ++i)
636 ureg_DECL_constant(tx->ureg, n++);
637 for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
638 ureg_DECL_constant(tx->ureg, n++);
639 }
640
641 static INLINE void
642 tx_temp_alloc(struct shader_translator *tx, INT idx)
643 {
644 assert(idx >= 0);
645 if (idx >= tx->num_temp) {
646 unsigned k = tx->num_temp;
647 unsigned n = idx + 1;
648 tx->regs.r = REALLOC(tx->regs.r,
649 k * sizeof(tx->regs.r[0]),
650 n * sizeof(tx->regs.r[0]));
651 for (; k < n; ++k)
652 tx->regs.r[k] = ureg_dst_undef();
653 tx->num_temp = n;
654 }
655 if (ureg_dst_is_undef(tx->regs.r[idx]))
656 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
657 }
658
659 static INLINE void
660 tx_addr_alloc(struct shader_translator *tx, INT idx)
661 {
662 assert(idx == 0);
663 if (ureg_dst_is_undef(tx->regs.address))
664 tx->regs.address = ureg_DECL_address(tx->ureg);
665 if (ureg_dst_is_undef(tx->regs.a0))
666 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
667 }
668
669 static INLINE void
670 tx_pred_alloc(struct shader_translator *tx, INT idx)
671 {
672 assert(idx == 0);
673 if (ureg_dst_is_undef(tx->regs.p))
674 tx->regs.p = ureg_DECL_predicate(tx->ureg);
675 }
676
677 static INLINE void
678 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
679 {
680 assert(IS_PS);
681 assert(idx >= 0 && idx < Elements(tx->regs.vT));
682 if (ureg_src_is_undef(tx->regs.vT[idx]))
683 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
684 TGSI_INTERPOLATE_PERSPECTIVE);
685 }
686
687 static INLINE unsigned *
688 tx_bgnloop(struct shader_translator *tx)
689 {
690 tx->loop_depth++;
691 if (tx->loop_depth_max < tx->loop_depth)
692 tx->loop_depth_max = tx->loop_depth;
693 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
694 return &tx->loop_labels[tx->loop_depth - 1];
695 }
696
697 static INLINE unsigned *
698 tx_endloop(struct shader_translator *tx)
699 {
700 assert(tx->loop_depth);
701 tx->loop_depth--;
702 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
703 ureg_get_instruction_number(tx->ureg));
704 return &tx->loop_labels[tx->loop_depth];
705 }
706
707 static struct ureg_dst
708 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
709 {
710 const unsigned l = tx->loop_depth - 1;
711
712 if (!tx->loop_depth)
713 {
714 DBG("loop counter requested outside of loop\n");
715 return ureg_dst_undef();
716 }
717
718 if (ureg_dst_is_undef(tx->regs.rL[l])) {
719 /* loop or rep ctr creation */
720 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
721 tx->loop_or_rep[l] = loop_or_rep;
722 }
723 /* loop - rep - endloop - endrep not allowed */
724 assert(tx->loop_or_rep[l] == loop_or_rep);
725
726 return tx->regs.rL[l];
727 }
728
729 static struct ureg_src
730 tx_get_loopal(struct shader_translator *tx)
731 {
732 int loop_level = tx->loop_depth - 1;
733
734 while (loop_level >= 0) {
735 /* handle loop - rep - endrep - endloop case */
736 if (tx->loop_or_rep[loop_level])
737 /* the value is in the loop counter y component (nine implementation) */
738 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
739 loop_level--;
740 }
741
742 DBG("aL counter requested outside of loop\n");
743 return ureg_src_undef();
744 }
745
746 static INLINE unsigned *
747 tx_cond(struct shader_translator *tx)
748 {
749 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
750 tx->cond_depth++;
751 return &tx->cond_labels[tx->cond_depth - 1];
752 }
753
754 static INLINE unsigned *
755 tx_elsecond(struct shader_translator *tx)
756 {
757 assert(tx->cond_depth);
758 return &tx->cond_labels[tx->cond_depth - 1];
759 }
760
761 static INLINE void
762 tx_endcond(struct shader_translator *tx)
763 {
764 assert(tx->cond_depth);
765 tx->cond_depth--;
766 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
767 ureg_get_instruction_number(tx->ureg));
768 }
769
770 static INLINE struct ureg_dst
771 nine_ureg_dst_register(unsigned file, int index)
772 {
773 return ureg_dst(ureg_src_register(file, index));
774 }
775
776 static struct ureg_src
777 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
778 {
779 struct ureg_program *ureg = tx->ureg;
780 struct ureg_src src;
781 struct ureg_dst tmp;
782
783 switch (param->file)
784 {
785 case D3DSPR_TEMP:
786 assert(!param->rel);
787 tx_temp_alloc(tx, param->idx);
788 src = ureg_src(tx->regs.r[param->idx]);
789 break;
790 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
791 case D3DSPR_ADDR:
792 assert(!param->rel);
793 if (IS_VS) {
794 assert(param->idx == 0);
795 /* the address register (vs only) must be
796 * assigned before use */
797 assert(!ureg_dst_is_undef(tx->regs.a0));
798 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
799 src = ureg_src(tx->regs.address);
800 } else {
801 if (tx->version.major < 2 && tx->version.minor < 4) {
802 /* no subroutines, so should be defined */
803 src = ureg_src(tx->regs.tS[param->idx]);
804 } else {
805 tx_texcoord_alloc(tx, param->idx);
806 src = tx->regs.vT[param->idx];
807 }
808 }
809 break;
810 case D3DSPR_INPUT:
811 if (IS_VS) {
812 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
813 } else {
814 if (tx->version.major < 3) {
815 assert(!param->rel);
816 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
817 param->idx,
818 TGSI_INTERPOLATE_PERSPECTIVE);
819 } else {
820 assert(!param->rel); /* TODO */
821 assert(param->idx < Elements(tx->regs.v));
822 src = tx->regs.v[param->idx];
823 }
824 }
825 break;
826 case D3DSPR_PREDICATE:
827 assert(!param->rel);
828 tx_pred_alloc(tx, param->idx);
829 src = ureg_src(tx->regs.p);
830 break;
831 case D3DSPR_SAMPLER:
832 assert(param->mod == NINED3DSPSM_NONE);
833 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
834 assert(!param->rel);
835 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
836 break;
837 case D3DSPR_CONST:
838 if (param->rel)
839 tx->indirect_const_access = TRUE;
840 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
841 if (!param->rel)
842 nine_info_mark_const_f_used(tx->info, param->idx);
843 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
844 }
845 if (!IS_VS && tx->version.major < 2) {
846 /* ps 1.X clamps constants */
847 tmp = tx_scratch(tx);
848 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
849 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
850 src = ureg_src(tmp);
851 }
852 break;
853 case D3DSPR_CONST2:
854 case D3DSPR_CONST3:
855 case D3DSPR_CONST4:
856 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
857 assert(!"CONST2/3/4");
858 src = ureg_imm1f(ureg, 0.0f);
859 break;
860 case D3DSPR_CONSTINT:
861 if (param->rel || !tx_lconsti(tx, &src, param->idx)) {
862 if (!param->rel)
863 nine_info_mark_const_i_used(tx->info, param->idx);
864 src = ureg_src_register(TGSI_FILE_CONSTANT,
865 tx->info->const_i_base + param->idx);
866 }
867 break;
868 case D3DSPR_CONSTBOOL:
869 if (param->rel || !tx_lconstb(tx, &src, param->idx)) {
870 char r = param->idx / 4;
871 char s = param->idx & 3;
872 if (!param->rel)
873 nine_info_mark_const_b_used(tx->info, param->idx);
874 src = ureg_src_register(TGSI_FILE_CONSTANT,
875 tx->info->const_b_base + r);
876 src = ureg_swizzle(src, s, s, s, s);
877 }
878 break;
879 case D3DSPR_LOOP:
880 if (ureg_dst_is_undef(tx->regs.address))
881 tx->regs.address = ureg_DECL_address(ureg);
882 if (!tx->native_integers)
883 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
884 else
885 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
886 src = ureg_src(tx->regs.address);
887 break;
888 case D3DSPR_MISCTYPE:
889 switch (param->idx) {
890 case D3DSMO_POSITION:
891 if (ureg_src_is_undef(tx->regs.vPos))
892 tx->regs.vPos = ureg_DECL_fs_input(ureg,
893 TGSI_SEMANTIC_POSITION, 0,
894 TGSI_INTERPOLATE_LINEAR);
895 if (tx->shift_wpos) {
896 /* TODO: do this only once */
897 struct ureg_dst wpos = tx_scratch(tx);
898 ureg_SUB(ureg, wpos, tx->regs.vPos,
899 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
900 src = ureg_src(wpos);
901 } else {
902 src = tx->regs.vPos;
903 }
904 break;
905 case D3DSMO_FACE:
906 if (ureg_src_is_undef(tx->regs.vFace)) {
907 tx->regs.vFace = ureg_DECL_fs_input(ureg,
908 TGSI_SEMANTIC_FACE, 0,
909 TGSI_INTERPOLATE_CONSTANT);
910 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
911 }
912 src = tx->regs.vFace;
913 break;
914 default:
915 assert(!"invalid src D3DSMO");
916 break;
917 }
918 assert(!param->rel);
919 break;
920 case D3DSPR_TEMPFLOAT16:
921 break;
922 default:
923 assert(!"invalid src D3DSPR");
924 }
925 if (param->rel)
926 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
927
928 if (param->swizzle != NINED3DSP_NOSWIZZLE)
929 src = ureg_swizzle(src,
930 (param->swizzle >> 0) & 0x3,
931 (param->swizzle >> 2) & 0x3,
932 (param->swizzle >> 4) & 0x3,
933 (param->swizzle >> 6) & 0x3);
934
935 switch (param->mod) {
936 case NINED3DSPSM_ABS:
937 src = ureg_abs(src);
938 break;
939 case NINED3DSPSM_ABSNEG:
940 src = ureg_negate(ureg_abs(src));
941 break;
942 case NINED3DSPSM_NEG:
943 src = ureg_negate(src);
944 break;
945 case NINED3DSPSM_BIAS:
946 tmp = tx_scratch(tx);
947 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
948 src = ureg_src(tmp);
949 break;
950 case NINED3DSPSM_BIASNEG:
951 tmp = tx_scratch(tx);
952 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
953 src = ureg_src(tmp);
954 break;
955 case NINED3DSPSM_NOT:
956 if (tx->native_integers) {
957 tmp = tx_scratch(tx);
958 ureg_NOT(ureg, tmp, src);
959 src = ureg_src(tmp);
960 break;
961 }
962 /* fall through */
963 case NINED3DSPSM_COMP:
964 tmp = tx_scratch(tx);
965 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
966 src = ureg_src(tmp);
967 break;
968 case NINED3DSPSM_DZ:
969 case NINED3DSPSM_DW:
970 /* handled in instruction */
971 break;
972 case NINED3DSPSM_SIGN:
973 tmp = tx_scratch(tx);
974 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
975 src = ureg_src(tmp);
976 break;
977 case NINED3DSPSM_SIGNNEG:
978 tmp = tx_scratch(tx);
979 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
980 src = ureg_src(tmp);
981 break;
982 case NINED3DSPSM_X2:
983 tmp = tx_scratch(tx);
984 ureg_ADD(ureg, tmp, src, src);
985 src = ureg_src(tmp);
986 break;
987 case NINED3DSPSM_X2NEG:
988 tmp = tx_scratch(tx);
989 ureg_ADD(ureg, tmp, src, src);
990 src = ureg_negate(ureg_src(tmp));
991 break;
992 default:
993 assert(param->mod == NINED3DSPSM_NONE);
994 break;
995 }
996
997 return src;
998 }
999
1000 static struct ureg_dst
1001 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1002 {
1003 struct ureg_dst dst;
1004
1005 switch (param->file)
1006 {
1007 case D3DSPR_TEMP:
1008 assert(!param->rel);
1009 tx_temp_alloc(tx, param->idx);
1010 dst = tx->regs.r[param->idx];
1011 break;
1012 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1013 case D3DSPR_ADDR:
1014 assert(!param->rel);
1015 if (tx->version.major < 2 && !IS_VS) {
1016 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1017 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1018 dst = tx->regs.tS[param->idx];
1019 } else
1020 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1021 tx_texcoord_alloc(tx, param->idx);
1022 dst = ureg_dst(tx->regs.vT[param->idx]);
1023 } else {
1024 tx_addr_alloc(tx, param->idx);
1025 dst = tx->regs.a0;
1026 }
1027 break;
1028 case D3DSPR_RASTOUT:
1029 assert(!param->rel);
1030 switch (param->idx) {
1031 case 0:
1032 if (ureg_dst_is_undef(tx->regs.oPos))
1033 tx->regs.oPos =
1034 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1035 dst = tx->regs.oPos;
1036 break;
1037 case 1:
1038 if (ureg_dst_is_undef(tx->regs.oFog))
1039 tx->regs.oFog =
1040 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1041 dst = tx->regs.oFog;
1042 break;
1043 case 2:
1044 if (ureg_dst_is_undef(tx->regs.oPts))
1045 tx->regs.oPts =
1046 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1047 dst = tx->regs.oPts;
1048 break;
1049 default:
1050 assert(0);
1051 break;
1052 }
1053 break;
1054 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1055 case D3DSPR_OUTPUT:
1056 if (tx->version.major < 3) {
1057 assert(!param->rel);
1058 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1059 } else {
1060 assert(!param->rel); /* TODO */
1061 assert(param->idx < Elements(tx->regs.o));
1062 dst = tx->regs.o[param->idx];
1063 }
1064 break;
1065 case D3DSPR_ATTROUT: /* VS */
1066 case D3DSPR_COLOROUT: /* PS */
1067 assert(param->idx >= 0 && param->idx < 4);
1068 assert(!param->rel);
1069 tx->info->rt_mask |= 1 << param->idx;
1070 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1071 tx->regs.oCol[param->idx] =
1072 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1073 dst = tx->regs.oCol[param->idx];
1074 if (IS_VS && tx->version.major < 3)
1075 dst = ureg_saturate(dst);
1076 break;
1077 case D3DSPR_DEPTHOUT:
1078 assert(!param->rel);
1079 if (ureg_dst_is_undef(tx->regs.oDepth))
1080 tx->regs.oDepth =
1081 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1082 TGSI_WRITEMASK_Z);
1083 dst = tx->regs.oDepth; /* XXX: must write .z component */
1084 break;
1085 case D3DSPR_PREDICATE:
1086 assert(!param->rel);
1087 tx_pred_alloc(tx, param->idx);
1088 dst = tx->regs.p;
1089 break;
1090 case D3DSPR_TEMPFLOAT16:
1091 DBG("unhandled D3DSPR: %u\n", param->file);
1092 break;
1093 default:
1094 assert(!"invalid dst D3DSPR");
1095 break;
1096 }
1097 if (param->rel)
1098 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1099
1100 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1101 dst = ureg_writemask(dst, param->mask);
1102 if (param->mod & NINED3DSPDM_SATURATE)
1103 dst = ureg_saturate(dst);
1104
1105 return dst;
1106 }
1107
1108 static struct ureg_dst
1109 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1110 {
1111 if (param->shift) {
1112 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1113 return tx->regs.tdst;
1114 }
1115 return _tx_dst_param(tx, param);
1116 }
1117
1118 static void
1119 tx_apply_dst0_modifiers(struct shader_translator *tx)
1120 {
1121 struct ureg_dst rdst;
1122 float f;
1123
1124 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1125 return;
1126 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1127
1128 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1129
1130 if (tx->insn.dst[0].shift < 0)
1131 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1132 else
1133 f = 1 << tx->insn.dst[0].shift;
1134
1135 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1136 }
1137
1138 static struct ureg_src
1139 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1140 {
1141 struct ureg_src src;
1142
1143 assert(!param->shift);
1144 assert(!(param->mod & NINED3DSPDM_SATURATE));
1145
1146 switch (param->file) {
1147 case D3DSPR_INPUT:
1148 if (IS_VS) {
1149 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1150 } else {
1151 assert(!param->rel);
1152 assert(param->idx < Elements(tx->regs.v));
1153 src = tx->regs.v[param->idx];
1154 }
1155 break;
1156 default:
1157 src = ureg_src(tx_dst_param(tx, param));
1158 break;
1159 }
1160 if (param->rel)
1161 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1162
1163 if (!param->mask)
1164 WARN("mask is 0, using identity swizzle\n");
1165
1166 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1167 char s[4];
1168 int n;
1169 int c;
1170 for (n = 0, c = 0; c < 4; ++c)
1171 if (param->mask & (1 << c))
1172 s[n++] = c;
1173 assert(n);
1174 for (c = n; c < 4; ++c)
1175 s[c] = s[n - 1];
1176 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1177 }
1178 return src;
1179 }
1180
1181 static HRESULT
1182 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1183 {
1184 struct ureg_program *ureg = tx->ureg;
1185 struct ureg_dst dst;
1186 struct ureg_src src[2];
1187 struct sm1_src_param *src_mat = &tx->insn.src[1];
1188 unsigned i;
1189
1190 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1191 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1192
1193 for (i = 0; i < n; i++)
1194 {
1195 const unsigned m = (1 << i);
1196
1197 src[1] = tx_src_param(tx, src_mat);
1198 src_mat->idx++;
1199
1200 if (!(dst.WriteMask & m))
1201 continue;
1202
1203 /* XXX: src == dst case ? */
1204
1205 switch (k) {
1206 case 3:
1207 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1208 break;
1209 case 4:
1210 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1211 break;
1212 default:
1213 DBG("invalid operation: M%ux%u\n", m, n);
1214 break;
1215 }
1216 }
1217
1218 return D3D_OK;
1219 }
1220
1221 #define VNOTSUPPORTED 0, 0
1222 #define V(maj, min) (((maj) << 8) | (min))
1223
1224 static INLINE const char *
1225 d3dsio_to_string( unsigned opcode )
1226 {
1227 static const char *names[] = {
1228 "NOP",
1229 "MOV",
1230 "ADD",
1231 "SUB",
1232 "MAD",
1233 "MUL",
1234 "RCP",
1235 "RSQ",
1236 "DP3",
1237 "DP4",
1238 "MIN",
1239 "MAX",
1240 "SLT",
1241 "SGE",
1242 "EXP",
1243 "LOG",
1244 "LIT",
1245 "DST",
1246 "LRP",
1247 "FRC",
1248 "M4x4",
1249 "M4x3",
1250 "M3x4",
1251 "M3x3",
1252 "M3x2",
1253 "CALL",
1254 "CALLNZ",
1255 "LOOP",
1256 "RET",
1257 "ENDLOOP",
1258 "LABEL",
1259 "DCL",
1260 "POW",
1261 "CRS",
1262 "SGN",
1263 "ABS",
1264 "NRM",
1265 "SINCOS",
1266 "REP",
1267 "ENDREP",
1268 "IF",
1269 "IFC",
1270 "ELSE",
1271 "ENDIF",
1272 "BREAK",
1273 "BREAKC",
1274 "MOVA",
1275 "DEFB",
1276 "DEFI",
1277 NULL,
1278 NULL,
1279 NULL,
1280 NULL,
1281 NULL,
1282 NULL,
1283 NULL,
1284 NULL,
1285 NULL,
1286 NULL,
1287 NULL,
1288 NULL,
1289 NULL,
1290 NULL,
1291 NULL,
1292 "TEXCOORD",
1293 "TEXKILL",
1294 "TEX",
1295 "TEXBEM",
1296 "TEXBEML",
1297 "TEXREG2AR",
1298 "TEXREG2GB",
1299 "TEXM3x2PAD",
1300 "TEXM3x2TEX",
1301 "TEXM3x3PAD",
1302 "TEXM3x3TEX",
1303 NULL,
1304 "TEXM3x3SPEC",
1305 "TEXM3x3VSPEC",
1306 "EXPP",
1307 "LOGP",
1308 "CND",
1309 "DEF",
1310 "TEXREG2RGB",
1311 "TEXDP3TEX",
1312 "TEXM3x2DEPTH",
1313 "TEXDP3",
1314 "TEXM3x3",
1315 "TEXDEPTH",
1316 "CMP",
1317 "BEM",
1318 "DP2ADD",
1319 "DSX",
1320 "DSY",
1321 "TEXLDD",
1322 "SETP",
1323 "TEXLDL",
1324 "BREAKP"
1325 };
1326
1327 if (opcode < Elements(names)) return names[opcode];
1328
1329 switch (opcode) {
1330 case D3DSIO_PHASE: return "PHASE";
1331 case D3DSIO_COMMENT: return "COMMENT";
1332 case D3DSIO_END: return "END";
1333 default:
1334 return NULL;
1335 }
1336 }
1337
1338 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1339 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1340 (inst).vert_version.max | \
1341 (inst).frag_version.min | \
1342 (inst).frag_version.max)
1343
1344 #define SPECIAL(name) \
1345 NineTranslateInstruction_##name
1346
1347 #define DECL_SPECIAL(name) \
1348 static HRESULT \
1349 NineTranslateInstruction_##name( struct shader_translator *tx )
1350
1351 static HRESULT
1352 NineTranslateInstruction_Generic(struct shader_translator *);
1353
1354 DECL_SPECIAL(M4x4)
1355 {
1356 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1357 }
1358
1359 DECL_SPECIAL(M4x3)
1360 {
1361 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1362 }
1363
1364 DECL_SPECIAL(M3x4)
1365 {
1366 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1367 }
1368
1369 DECL_SPECIAL(M3x3)
1370 {
1371 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1372 }
1373
1374 DECL_SPECIAL(M3x2)
1375 {
1376 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1377 }
1378
1379 DECL_SPECIAL(CMP)
1380 {
1381 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1382 tx_src_param(tx, &tx->insn.src[0]),
1383 tx_src_param(tx, &tx->insn.src[2]),
1384 tx_src_param(tx, &tx->insn.src[1]));
1385 return D3D_OK;
1386 }
1387
1388 DECL_SPECIAL(CND)
1389 {
1390 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1391 struct ureg_dst cgt;
1392 struct ureg_src cnd;
1393
1394 /* the coissue flag was a tip for compilers to advise to
1395 * execute two operations at the same time, in cases
1396 * the two executions had same dst with different channels.
1397 * It has no effect on current hw. However it seems CND
1398 * is affected. The handling of this very specific case
1399 * handled below mimick wine behaviour */
1400 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1401 ureg_MOV(tx->ureg,
1402 dst, tx_src_param(tx, &tx->insn.src[1]));
1403 return D3D_OK;
1404 }
1405
1406 cnd = tx_src_param(tx, &tx->insn.src[0]);
1407 cgt = tx_scratch(tx);
1408
1409 if (tx->version.major == 1 && tx->version.minor < 4)
1410 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1411
1412 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1413
1414 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1415 tx_src_param(tx, &tx->insn.src[1]),
1416 tx_src_param(tx, &tx->insn.src[2]));
1417 return D3D_OK;
1418 }
1419
1420 DECL_SPECIAL(CALL)
1421 {
1422 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1423 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1424 return D3D_OK;
1425 }
1426
1427 DECL_SPECIAL(CALLNZ)
1428 {
1429 struct ureg_program *ureg = tx->ureg;
1430 struct ureg_dst tmp = tx_scratch_scalar(tx);
1431 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1432
1433 /* NOTE: source should be const bool, so we can use NOT/SUB instead of [U]SNE 0 */
1434 if (!tx->insn.flags) {
1435 if (tx->native_integers)
1436 ureg_NOT(ureg, tmp, src);
1437 else
1438 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1439 }
1440 ureg_IF(ureg, tx->insn.flags ? src : tx_src_scalar(tmp), tx_cond(tx));
1441 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1442 tx_endcond(tx);
1443 ureg_ENDIF(ureg);
1444 return D3D_OK;
1445 }
1446
1447 DECL_SPECIAL(MOV_vs1x)
1448 {
1449 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1450 /* Implementation note: We don't write directly
1451 * to the addr register, but to an intermediate
1452 * float register.
1453 * Contrary to the doc, when writing to ADDR here,
1454 * the rounding is not to nearest, but to lowest
1455 * (wine test).
1456 * Since we use ARR next, substract 0.5. */
1457 ureg_SUB(tx->ureg,
1458 tx_dst_param(tx, &tx->insn.dst[0]),
1459 tx_src_param(tx, &tx->insn.src[0]),
1460 ureg_imm1f(tx->ureg, 0.5f));
1461 return D3D_OK;
1462 }
1463 return NineTranslateInstruction_Generic(tx);
1464 }
1465
1466 DECL_SPECIAL(LOOP)
1467 {
1468 struct ureg_program *ureg = tx->ureg;
1469 unsigned *label;
1470 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1471 struct ureg_dst ctr;
1472 struct ureg_dst tmp;
1473 struct ureg_src ctrx;
1474
1475 label = tx_bgnloop(tx);
1476 ctr = tx_get_loopctr(tx, TRUE);
1477 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1478
1479 /* src: num_iterations - start_value of al - step for al - 0 */
1480 ureg_MOV(ureg, ctr, src);
1481 ureg_BGNLOOP(tx->ureg, label);
1482 tmp = tx_scratch_scalar(tx);
1483 /* Initially ctr.x contains the number of iterations.
1484 * ctr.y will contain the updated value of al.
1485 * We decrease ctr.x at the end of every iteration,
1486 * and stop when it reaches 0. */
1487
1488 if (!tx->native_integers) {
1489 /* case src and ctr contain floats */
1490 /* to avoid precision issue, we stop when ctr <= 0.5 */
1491 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1492 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1493 } else {
1494 /* case src and ctr contain integers */
1495 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1496 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1497 }
1498 ureg_BRK(ureg);
1499 tx_endcond(tx);
1500 ureg_ENDIF(ureg);
1501 return D3D_OK;
1502 }
1503
1504 DECL_SPECIAL(RET)
1505 {
1506 ureg_RET(tx->ureg);
1507 return D3D_OK;
1508 }
1509
1510 DECL_SPECIAL(ENDLOOP)
1511 {
1512 struct ureg_program *ureg = tx->ureg;
1513 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1514 struct ureg_dst dst_ctrx, dst_al;
1515 struct ureg_src src_ctr, al_counter;
1516
1517 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1518 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1519 src_ctr = ureg_src(ctr);
1520 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1521
1522 /* ctr.x -= 1
1523 * ctr.y (aL) += step */
1524 if (!tx->native_integers) {
1525 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1526 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1527 } else {
1528 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1529 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1530 }
1531 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1532 return D3D_OK;
1533 }
1534
1535 DECL_SPECIAL(LABEL)
1536 {
1537 unsigned k = tx->num_inst_labels;
1538 unsigned n = tx->insn.src[0].idx;
1539 assert(n < 2048);
1540 if (n >= k)
1541 tx->inst_labels = REALLOC(tx->inst_labels,
1542 k * sizeof(tx->inst_labels[0]),
1543 n * sizeof(tx->inst_labels[0]));
1544
1545 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1546 return D3D_OK;
1547 }
1548
1549 DECL_SPECIAL(SINCOS)
1550 {
1551 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1552 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1553
1554 assert(!(dst.WriteMask & 0xc));
1555
1556 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1557 ureg_SCS(tx->ureg, dst, src);
1558 return D3D_OK;
1559 }
1560
1561 DECL_SPECIAL(SGN)
1562 {
1563 ureg_SSG(tx->ureg,
1564 tx_dst_param(tx, &tx->insn.dst[0]),
1565 tx_src_param(tx, &tx->insn.src[0]));
1566 return D3D_OK;
1567 }
1568
1569 DECL_SPECIAL(REP)
1570 {
1571 struct ureg_program *ureg = tx->ureg;
1572 unsigned *label;
1573 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1574 struct ureg_dst ctr;
1575 struct ureg_dst tmp;
1576 struct ureg_src ctrx;
1577
1578 label = tx_bgnloop(tx);
1579 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1580 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1581
1582 /* NOTE: rep must be constant, so we don't have to save the count */
1583 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1584
1585 /* rep: num_iterations - 0 - 0 - 0 */
1586 ureg_MOV(ureg, ctr, rep);
1587 ureg_BGNLOOP(ureg, label);
1588 tmp = tx_scratch_scalar(tx);
1589 /* Initially ctr.x contains the number of iterations.
1590 * We decrease ctr.x at the end of every iteration,
1591 * and stop when it reaches 0. */
1592
1593 if (!tx->native_integers) {
1594 /* case src and ctr contain floats */
1595 /* to avoid precision issue, we stop when ctr <= 0.5 */
1596 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1597 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1598 } else {
1599 /* case src and ctr contain integers */
1600 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1601 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1602 }
1603 ureg_BRK(ureg);
1604 tx_endcond(tx);
1605 ureg_ENDIF(ureg);
1606
1607 return D3D_OK;
1608 }
1609
1610 DECL_SPECIAL(ENDREP)
1611 {
1612 struct ureg_program *ureg = tx->ureg;
1613 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1614 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1615 struct ureg_src src_ctr = ureg_src(ctr);
1616
1617 /* ctr.x -= 1 */
1618 if (!tx->native_integers)
1619 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1620 else
1621 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1622
1623 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1624 return D3D_OK;
1625 }
1626
1627 DECL_SPECIAL(ENDIF)
1628 {
1629 tx_endcond(tx);
1630 ureg_ENDIF(tx->ureg);
1631 return D3D_OK;
1632 }
1633
1634 DECL_SPECIAL(IF)
1635 {
1636 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1637
1638 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1639 ureg_UIF(tx->ureg, src, tx_cond(tx));
1640 else
1641 ureg_IF(tx->ureg, src, tx_cond(tx));
1642
1643 return D3D_OK;
1644 }
1645
1646 static INLINE unsigned
1647 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1648 {
1649 switch (flags) {
1650 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1651 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1652 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1653 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1654 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1655 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1656 default:
1657 assert(!"invalid comparison flags");
1658 return TGSI_OPCODE_SGT;
1659 }
1660 }
1661
1662 DECL_SPECIAL(IFC)
1663 {
1664 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1665 struct ureg_src src[2];
1666 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1667 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1668 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1669 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1670 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1671 return D3D_OK;
1672 }
1673
1674 DECL_SPECIAL(ELSE)
1675 {
1676 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1677 return D3D_OK;
1678 }
1679
1680 DECL_SPECIAL(BREAKC)
1681 {
1682 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1683 struct ureg_src src[2];
1684 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1685 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1686 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1687 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1688 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1689 ureg_BRK(tx->ureg);
1690 tx_endcond(tx);
1691 ureg_ENDIF(tx->ureg);
1692 return D3D_OK;
1693 }
1694
1695 static const char *sm1_declusage_names[] =
1696 {
1697 [D3DDECLUSAGE_POSITION] = "POSITION",
1698 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1699 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1700 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1701 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1702 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1703 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1704 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1705 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1706 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1707 [D3DDECLUSAGE_COLOR] = "COLOR",
1708 [D3DDECLUSAGE_FOG] = "FOG",
1709 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1710 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1711 };
1712
1713 static INLINE unsigned
1714 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1715 {
1716 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1717 }
1718
1719 static void
1720 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1721 boolean tc,
1722 struct sm1_semantic *dcl)
1723 {
1724 BYTE index = dcl->usage_idx;
1725
1726 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1727 * we match to a TGSI_SEMANTIC_GENERIC with index.
1728 *
1729 * The index can be anything UINT16 and usage_idx is BYTE,
1730 * so we can fit everything. It doesn't matter if indices
1731 * are close together or low.
1732 *
1733 *
1734 * POSITION >= 1: 10 * index + 6
1735 * COLOR >= 2: 10 * (index-1) + 7
1736 * TEXCOORD[0..15]: index
1737 * BLENDWEIGHT: 10 * index + 18
1738 * BLENDINDICES: 10 * index + 19
1739 * NORMAL: 10 * index + 20
1740 * TANGENT: 10 * index + 21
1741 * BINORMAL: 10 * index + 22
1742 * TESSFACTOR: 10 * index + 23
1743 */
1744
1745 switch (dcl->usage) {
1746 case D3DDECLUSAGE_POSITION:
1747 case D3DDECLUSAGE_POSITIONT:
1748 case D3DDECLUSAGE_DEPTH:
1749 if (index == 0) {
1750 sem->Name = TGSI_SEMANTIC_POSITION;
1751 sem->Index = 0;
1752 } else {
1753 sem->Name = TGSI_SEMANTIC_GENERIC;
1754 sem->Index = 10 * index + 6;
1755 }
1756 break;
1757 case D3DDECLUSAGE_COLOR:
1758 if (index < 2) {
1759 sem->Name = TGSI_SEMANTIC_COLOR;
1760 sem->Index = index;
1761 } else {
1762 sem->Name = TGSI_SEMANTIC_GENERIC;
1763 sem->Index = 10 * (index-1) + 7;
1764 }
1765 break;
1766 case D3DDECLUSAGE_FOG:
1767 assert(index == 0);
1768 sem->Name = TGSI_SEMANTIC_FOG;
1769 sem->Index = 0;
1770 break;
1771 case D3DDECLUSAGE_PSIZE:
1772 assert(index == 0);
1773 sem->Name = TGSI_SEMANTIC_PSIZE;
1774 sem->Index = 0;
1775 break;
1776 case D3DDECLUSAGE_TEXCOORD:
1777 assert(index < 16);
1778 if (index < 8 && tc)
1779 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1780 else
1781 sem->Name = TGSI_SEMANTIC_GENERIC;
1782 sem->Index = index;
1783 break;
1784 case D3DDECLUSAGE_BLENDWEIGHT:
1785 sem->Name = TGSI_SEMANTIC_GENERIC;
1786 sem->Index = 10 * index + 18;
1787 break;
1788 case D3DDECLUSAGE_BLENDINDICES:
1789 sem->Name = TGSI_SEMANTIC_GENERIC;
1790 sem->Index = 10 * index + 19;
1791 break;
1792 case D3DDECLUSAGE_NORMAL:
1793 sem->Name = TGSI_SEMANTIC_GENERIC;
1794 sem->Index = 10 * index + 20;
1795 break;
1796 case D3DDECLUSAGE_TANGENT:
1797 sem->Name = TGSI_SEMANTIC_GENERIC;
1798 sem->Index = 10 * index + 21;
1799 break;
1800 case D3DDECLUSAGE_BINORMAL:
1801 sem->Name = TGSI_SEMANTIC_GENERIC;
1802 sem->Index = 10 * index + 22;
1803 break;
1804 case D3DDECLUSAGE_TESSFACTOR:
1805 sem->Name = TGSI_SEMANTIC_GENERIC;
1806 sem->Index = 10 * index + 23;
1807 break;
1808 case D3DDECLUSAGE_SAMPLE:
1809 sem->Name = TGSI_SEMANTIC_COUNT;
1810 sem->Index = 0;
1811 break;
1812 default:
1813 assert(!"Invalid DECLUSAGE.");
1814 break;
1815 }
1816 }
1817
1818 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1819 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1820 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1821 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1822 static INLINE unsigned
1823 d3dstt_to_tgsi_tex(BYTE sampler_type)
1824 {
1825 switch (sampler_type) {
1826 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1827 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1828 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1829 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1830 default:
1831 assert(0);
1832 return TGSI_TEXTURE_UNKNOWN;
1833 }
1834 }
1835 static INLINE unsigned
1836 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1837 {
1838 switch (sampler_type) {
1839 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1840 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1841 case NINED3DSTT_VOLUME:
1842 case NINED3DSTT_CUBE:
1843 default:
1844 assert(0);
1845 return TGSI_TEXTURE_UNKNOWN;
1846 }
1847 }
1848 static INLINE unsigned
1849 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1850 {
1851 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1852 case 1: return TGSI_TEXTURE_1D;
1853 case 0: return TGSI_TEXTURE_2D;
1854 case 3: return TGSI_TEXTURE_3D;
1855 default:
1856 return TGSI_TEXTURE_CUBE;
1857 }
1858 }
1859
1860 static const char *
1861 sm1_sampler_type_name(BYTE sampler_type)
1862 {
1863 switch (sampler_type) {
1864 case NINED3DSTT_1D: return "1D";
1865 case NINED3DSTT_2D: return "2D";
1866 case NINED3DSTT_VOLUME: return "VOLUME";
1867 case NINED3DSTT_CUBE: return "CUBE";
1868 default:
1869 return "(D3DSTT_?)";
1870 }
1871 }
1872
1873 static INLINE unsigned
1874 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1875 {
1876 switch (sem->Name) {
1877 case TGSI_SEMANTIC_POSITION:
1878 case TGSI_SEMANTIC_NORMAL:
1879 return TGSI_INTERPOLATE_LINEAR;
1880 case TGSI_SEMANTIC_BCOLOR:
1881 case TGSI_SEMANTIC_COLOR:
1882 case TGSI_SEMANTIC_FOG:
1883 case TGSI_SEMANTIC_GENERIC:
1884 case TGSI_SEMANTIC_TEXCOORD:
1885 case TGSI_SEMANTIC_CLIPDIST:
1886 case TGSI_SEMANTIC_CLIPVERTEX:
1887 return TGSI_INTERPOLATE_PERSPECTIVE;
1888 case TGSI_SEMANTIC_EDGEFLAG:
1889 case TGSI_SEMANTIC_FACE:
1890 case TGSI_SEMANTIC_INSTANCEID:
1891 case TGSI_SEMANTIC_PCOORD:
1892 case TGSI_SEMANTIC_PRIMID:
1893 case TGSI_SEMANTIC_PSIZE:
1894 case TGSI_SEMANTIC_VERTEXID:
1895 return TGSI_INTERPOLATE_CONSTANT;
1896 default:
1897 assert(0);
1898 return TGSI_INTERPOLATE_CONSTANT;
1899 }
1900 }
1901
1902 DECL_SPECIAL(DCL)
1903 {
1904 struct ureg_program *ureg = tx->ureg;
1905 boolean is_input;
1906 boolean is_sampler;
1907 struct tgsi_declaration_semantic tgsi;
1908 struct sm1_semantic sem;
1909 sm1_read_semantic(tx, &sem);
1910
1911 is_input = sem.reg.file == D3DSPR_INPUT;
1912 is_sampler =
1913 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1914
1915 DUMP("DCL ");
1916 sm1_dump_dst_param(&sem.reg);
1917 if (is_sampler)
1918 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1919 else
1920 if (tx->version.major >= 3)
1921 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1922 else
1923 if (sem.usage | sem.usage_idx)
1924 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1925 else
1926 DUMP("\n");
1927
1928 if (is_sampler) {
1929 const unsigned m = 1 << sem.reg.idx;
1930 ureg_DECL_sampler(ureg, sem.reg.idx);
1931 tx->info->sampler_mask |= m;
1932 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1933 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1934 d3dstt_to_tgsi_tex(sem.sampler_type);
1935 return D3D_OK;
1936 }
1937
1938 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1939 if (IS_VS) {
1940 if (is_input) {
1941 /* linkage outside of shader with vertex declaration */
1942 ureg_DECL_vs_input(ureg, sem.reg.idx);
1943 assert(sem.reg.idx < Elements(tx->info->input_map));
1944 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1945 tx->info->num_inputs = sem.reg.idx + 1;
1946 /* NOTE: preserving order in case of indirect access */
1947 } else
1948 if (tx->version.major >= 3) {
1949 /* SM2 output semantic determined by file */
1950 assert(sem.reg.mask != 0);
1951 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1952 tx->info->position_t = TRUE;
1953 assert(sem.reg.idx < Elements(tx->regs.o));
1954 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1955 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1956
1957 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1958 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1959 }
1960 } else {
1961 if (is_input && tx->version.major >= 3) {
1962 /* SM3 only, SM2 input semantic determined by file */
1963 assert(sem.reg.idx < Elements(tx->regs.v));
1964 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1965 ureg, tgsi.Name, tgsi.Index,
1966 nine_tgsi_to_interp_mode(&tgsi),
1967 0, /* cylwrap */
1968 sem.reg.mod & NINED3DSPDM_CENTROID);
1969 } else
1970 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1971 /* FragColor or FragDepth */
1972 assert(sem.reg.mask != 0);
1973 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1974 }
1975 }
1976 return D3D_OK;
1977 }
1978
1979 DECL_SPECIAL(DEF)
1980 {
1981 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1982 return D3D_OK;
1983 }
1984
1985 DECL_SPECIAL(DEFB)
1986 {
1987 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
1988 return D3D_OK;
1989 }
1990
1991 DECL_SPECIAL(DEFI)
1992 {
1993 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
1994 return D3D_OK;
1995 }
1996
1997 DECL_SPECIAL(POW)
1998 {
1999 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2000 struct ureg_src src[2] = {
2001 tx_src_param(tx, &tx->insn.src[0]),
2002 tx_src_param(tx, &tx->insn.src[1])
2003 };
2004 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2005 return D3D_OK;
2006 }
2007
2008 DECL_SPECIAL(RSQ)
2009 {
2010 struct ureg_program *ureg = tx->ureg;
2011 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2012 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2013 struct ureg_dst tmp = tx_scratch(tx);
2014 ureg_RSQ(ureg, tmp, ureg_abs(src));
2015 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2016 return D3D_OK;
2017 }
2018
2019 DECL_SPECIAL(LOG)
2020 {
2021 struct ureg_program *ureg = tx->ureg;
2022 struct ureg_dst tmp = tx_scratch_scalar(tx);
2023 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2024 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2025 ureg_LG2(ureg, tmp, ureg_abs(src));
2026 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2027 return D3D_OK;
2028 }
2029
2030 DECL_SPECIAL(NRM)
2031 {
2032 struct ureg_program *ureg = tx->ureg;
2033 struct ureg_dst tmp = tx_scratch_scalar(tx);
2034 struct ureg_src nrm = tx_src_scalar(tmp);
2035 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2036 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2037 ureg_DP3(ureg, tmp, src, src);
2038 ureg_RSQ(ureg, tmp, nrm);
2039 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2040 ureg_MUL(ureg, dst, src, nrm);
2041 return D3D_OK;
2042 }
2043
2044 DECL_SPECIAL(DP2ADD)
2045 {
2046 struct ureg_dst tmp = tx_scratch_scalar(tx);
2047 struct ureg_src dp2 = tx_src_scalar(tmp);
2048 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2049 struct ureg_src src[3];
2050 int i;
2051 for (i = 0; i < 3; ++i)
2052 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2053 assert_replicate_swizzle(&src[2]);
2054
2055 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2056 ureg_ADD(tx->ureg, dst, src[2], dp2);
2057
2058 return D3D_OK;
2059 }
2060
2061 DECL_SPECIAL(TEXCOORD)
2062 {
2063 struct ureg_program *ureg = tx->ureg;
2064 const unsigned s = tx->insn.dst[0].idx;
2065 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2066
2067 tx_texcoord_alloc(tx, s);
2068 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
2069
2070 return D3D_OK;
2071 }
2072
2073 DECL_SPECIAL(TEXCOORD_ps14)
2074 {
2075 struct ureg_program *ureg = tx->ureg;
2076 const unsigned s = tx->insn.src[0].idx;
2077 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2078
2079 tx_texcoord_alloc(tx, s);
2080 ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
2081
2082 return D3D_OK;
2083 }
2084
2085 DECL_SPECIAL(TEXKILL)
2086 {
2087 struct ureg_src reg;
2088
2089 if (tx->version.major > 1 || tx->version.minor > 3) {
2090 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2091 } else {
2092 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2093 reg = tx->regs.vT[tx->insn.dst[0].idx];
2094 }
2095 if (tx->version.major < 2)
2096 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2097 ureg_KILL_IF(tx->ureg, reg);
2098
2099 return D3D_OK;
2100 }
2101
2102 DECL_SPECIAL(TEXBEM)
2103 {
2104 STUB(D3DERR_INVALIDCALL);
2105 }
2106
2107 DECL_SPECIAL(TEXBEML)
2108 {
2109 STUB(D3DERR_INVALIDCALL);
2110 }
2111
2112 DECL_SPECIAL(TEXREG2AR)
2113 {
2114 STUB(D3DERR_INVALIDCALL);
2115 }
2116
2117 DECL_SPECIAL(TEXREG2GB)
2118 {
2119 STUB(D3DERR_INVALIDCALL);
2120 }
2121
2122 DECL_SPECIAL(TEXM3x2PAD)
2123 {
2124 STUB(D3DERR_INVALIDCALL);
2125 }
2126
2127 DECL_SPECIAL(TEXM3x2TEX)
2128 {
2129 STUB(D3DERR_INVALIDCALL);
2130 }
2131
2132 DECL_SPECIAL(TEXM3x3PAD)
2133 {
2134 return D3D_OK; /* this is just padding */
2135 }
2136
2137 DECL_SPECIAL(TEXM3x3SPEC)
2138 {
2139 STUB(D3DERR_INVALIDCALL);
2140 }
2141
2142 DECL_SPECIAL(TEXM3x3VSPEC)
2143 {
2144 STUB(D3DERR_INVALIDCALL);
2145 }
2146
2147 DECL_SPECIAL(TEXREG2RGB)
2148 {
2149 STUB(D3DERR_INVALIDCALL);
2150 }
2151
2152 DECL_SPECIAL(TEXDP3TEX)
2153 {
2154 STUB(D3DERR_INVALIDCALL);
2155 }
2156
2157 DECL_SPECIAL(TEXM3x2DEPTH)
2158 {
2159 STUB(D3DERR_INVALIDCALL);
2160 }
2161
2162 DECL_SPECIAL(TEXDP3)
2163 {
2164 STUB(D3DERR_INVALIDCALL);
2165 }
2166
2167 DECL_SPECIAL(TEXM3x3)
2168 {
2169 struct ureg_program *ureg = tx->ureg;
2170 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2171 struct ureg_src src[4];
2172 int s;
2173 const int m = tx->insn.dst[0].idx - 2;
2174 const int n = tx->insn.src[0].idx;
2175 assert(m >= 0 && m > n);
2176
2177 for (s = m; s <= (m + 2); ++s) {
2178 tx_texcoord_alloc(tx, s);
2179 src[s] = tx->regs.vT[s];
2180 }
2181 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), src[0], ureg_src(tx->regs.tS[n]));
2182 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), src[1], ureg_src(tx->regs.tS[n]));
2183 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), src[2], ureg_src(tx->regs.tS[n]));
2184
2185 switch (tx->insn.opcode) {
2186 case D3DSIO_TEXM3x3:
2187 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2188 break;
2189 case D3DSIO_TEXM3x3TEX:
2190 src[3] = ureg_DECL_sampler(ureg, m + 2);
2191 tx->info->sampler_mask |= 1 << (m + 2);
2192 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), src[3]);
2193 break;
2194 default:
2195 return D3DERR_INVALIDCALL;
2196 }
2197 return D3D_OK;
2198 }
2199
2200 DECL_SPECIAL(TEXDEPTH)
2201 {
2202 STUB(D3DERR_INVALIDCALL);
2203 }
2204
2205 DECL_SPECIAL(BEM)
2206 {
2207 STUB(D3DERR_INVALIDCALL);
2208 }
2209
2210 DECL_SPECIAL(TEXLD)
2211 {
2212 struct ureg_program *ureg = tx->ureg;
2213 unsigned target;
2214 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2215 struct ureg_src src[2] = {
2216 tx_src_param(tx, &tx->insn.src[0]),
2217 tx_src_param(tx, &tx->insn.src[1])
2218 };
2219 assert(tx->insn.src[1].idx >= 0 &&
2220 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2221 target = tx->sampler_targets[tx->insn.src[1].idx];
2222
2223 switch (tx->insn.flags) {
2224 case 0:
2225 ureg_TEX(ureg, dst, target, src[0], src[1]);
2226 break;
2227 case NINED3DSI_TEXLD_PROJECT:
2228 ureg_TXP(ureg, dst, target, src[0], src[1]);
2229 break;
2230 case NINED3DSI_TEXLD_BIAS:
2231 ureg_TXB(ureg, dst, target, src[0], src[1]);
2232 break;
2233 default:
2234 assert(0);
2235 return D3DERR_INVALIDCALL;
2236 }
2237 return D3D_OK;
2238 }
2239
2240 DECL_SPECIAL(TEXLD_14)
2241 {
2242 struct ureg_program *ureg = tx->ureg;
2243 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2244 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2245 const unsigned s = tx->insn.dst[0].idx;
2246 const unsigned t = ps1x_sampler_type(tx->info, s);
2247
2248 tx->info->sampler_mask |= 1 << s;
2249 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2250
2251 return D3D_OK;
2252 }
2253
2254 DECL_SPECIAL(TEX)
2255 {
2256 struct ureg_program *ureg = tx->ureg;
2257 const unsigned s = tx->insn.dst[0].idx;
2258 const unsigned t = ps1x_sampler_type(tx->info, s);
2259 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2260 struct ureg_src src[2];
2261
2262 tx_texcoord_alloc(tx, s);
2263
2264 src[0] = tx->regs.vT[s];
2265 src[1] = ureg_DECL_sampler(ureg, s);
2266 tx->info->sampler_mask |= 1 << s;
2267
2268 ureg_TEX(ureg, dst, t, src[0], src[1]);
2269
2270 return D3D_OK;
2271 }
2272
2273 DECL_SPECIAL(TEXLDD)
2274 {
2275 unsigned target;
2276 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2277 struct ureg_src src[4] = {
2278 tx_src_param(tx, &tx->insn.src[0]),
2279 tx_src_param(tx, &tx->insn.src[1]),
2280 tx_src_param(tx, &tx->insn.src[2]),
2281 tx_src_param(tx, &tx->insn.src[3])
2282 };
2283 assert(tx->insn.src[3].idx >= 0 &&
2284 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2285 target = tx->sampler_targets[tx->insn.src[1].idx];
2286
2287 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2288 return D3D_OK;
2289 }
2290
2291 DECL_SPECIAL(TEXLDL)
2292 {
2293 unsigned target;
2294 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2295 struct ureg_src src[2] = {
2296 tx_src_param(tx, &tx->insn.src[0]),
2297 tx_src_param(tx, &tx->insn.src[1])
2298 };
2299 assert(tx->insn.src[3].idx >= 0 &&
2300 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2301 target = tx->sampler_targets[tx->insn.src[1].idx];
2302
2303 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2304 return D3D_OK;
2305 }
2306
2307 DECL_SPECIAL(SETP)
2308 {
2309 STUB(D3DERR_INVALIDCALL);
2310 }
2311
2312 DECL_SPECIAL(BREAKP)
2313 {
2314 STUB(D3DERR_INVALIDCALL);
2315 }
2316
2317 DECL_SPECIAL(PHASE)
2318 {
2319 return D3D_OK; /* we don't care about phase */
2320 }
2321
2322 DECL_SPECIAL(COMMENT)
2323 {
2324 return D3D_OK; /* nothing to do */
2325 }
2326
2327
2328 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2329 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2330
2331 struct sm1_op_info inst_table[] =
2332 {
2333 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2334 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2335 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2336 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2337 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2338 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2339 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2340 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2341 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2342 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2343 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2344 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2345 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2346 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2347 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2348 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2349 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2350 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2351 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2352 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2353 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2354
2355 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2356 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2357 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2358 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2359 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2360
2361 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALL)),
2362 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALLNZ)),
2363 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2364 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2365 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2366 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(LABEL)),
2367
2368 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2369
2370 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2371 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2372 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2373 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2374 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2375
2376 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2377 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2378
2379 /* More flow control */
2380 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2381 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2382 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2383 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2384 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2385 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2386 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2387 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2388 /* we don't write to the address register, but a normal register (copied
2389 * when needed to the address register), thus we don't use ARR */
2390 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2391
2392 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2393 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2394
2395 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2396 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2397 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2398 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2399 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2400 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2401 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEM)),
2402 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEML)),
2403 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2AR)),
2404 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2GB)),
2405 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2PAD)),
2406 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2TEX)),
2407 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3PAD)),
2408 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2409 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3SPEC)),
2410 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3VSPEC)),
2411
2412 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2413 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2414 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2415 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2416
2417 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2418
2419 /* More tex stuff */
2420 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXREG2RGB)),
2421 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3TEX)),
2422 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 0, 0, SPECIAL(TEXM3x2DEPTH)),
2423 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3)),
2424 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
2425 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(TEXDEPTH)),
2426
2427 /* Misc */
2428 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2429 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(BEM)),
2430 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2431 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2432 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2433 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2434 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(SETP)),
2435 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2436 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(BREAKP))
2437 };
2438
2439 struct sm1_op_info inst_phase =
2440 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2441
2442 struct sm1_op_info inst_comment =
2443 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2444
2445 static void
2446 create_op_info_map(struct shader_translator *tx)
2447 {
2448 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2449 unsigned i;
2450
2451 for (i = 0; i < Elements(tx->op_info_map); ++i)
2452 tx->op_info_map[i] = -1;
2453
2454 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2455 for (i = 0; i < Elements(inst_table); ++i) {
2456 assert(inst_table[i].sio < Elements(tx->op_info_map));
2457 if (inst_table[i].vert_version.min <= version &&
2458 inst_table[i].vert_version.max >= version)
2459 tx->op_info_map[inst_table[i].sio] = i;
2460 }
2461 } else {
2462 for (i = 0; i < Elements(inst_table); ++i) {
2463 assert(inst_table[i].sio < Elements(tx->op_info_map));
2464 if (inst_table[i].frag_version.min <= version &&
2465 inst_table[i].frag_version.max >= version)
2466 tx->op_info_map[inst_table[i].sio] = i;
2467 }
2468 }
2469 }
2470
2471 static INLINE HRESULT
2472 NineTranslateInstruction_Generic(struct shader_translator *tx)
2473 {
2474 struct ureg_dst dst[1];
2475 struct ureg_src src[4];
2476 unsigned i;
2477
2478 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2479 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2480 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2481 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2482
2483 ureg_insn(tx->ureg, tx->insn.info->opcode,
2484 dst, tx->insn.ndst,
2485 src, tx->insn.nsrc);
2486 return D3D_OK;
2487 }
2488
2489 static INLINE DWORD
2490 TOKEN_PEEK(struct shader_translator *tx)
2491 {
2492 return *(tx->parse);
2493 }
2494
2495 static INLINE DWORD
2496 TOKEN_NEXT(struct shader_translator *tx)
2497 {
2498 return *(tx->parse)++;
2499 }
2500
2501 static INLINE void
2502 TOKEN_JUMP(struct shader_translator *tx)
2503 {
2504 if (tx->parse_next && tx->parse != tx->parse_next) {
2505 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2506 tx->parse = tx->parse_next;
2507 }
2508 }
2509
2510 static INLINE boolean
2511 sm1_parse_eof(struct shader_translator *tx)
2512 {
2513 return TOKEN_PEEK(tx) == NINED3DSP_END;
2514 }
2515
2516 static void
2517 sm1_read_version(struct shader_translator *tx)
2518 {
2519 const DWORD tok = TOKEN_NEXT(tx);
2520
2521 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2522 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2523
2524 switch (tok >> 16) {
2525 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2526 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2527 default:
2528 DBG("Invalid shader type: %x\n", tok);
2529 tx->processor = ~0;
2530 break;
2531 }
2532 }
2533
2534 /* This is just to check if we parsed the instruction properly. */
2535 static void
2536 sm1_parse_get_skip(struct shader_translator *tx)
2537 {
2538 const DWORD tok = TOKEN_PEEK(tx);
2539
2540 if (tx->version.major >= 2) {
2541 tx->parse_next = tx->parse + 1 /* this */ +
2542 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2543 } else {
2544 tx->parse_next = NULL; /* TODO: determine from param count */
2545 }
2546 }
2547
2548 static void
2549 sm1_print_comment(const char *comment, UINT size)
2550 {
2551 if (!size)
2552 return;
2553 /* TODO */
2554 }
2555
2556 static void
2557 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2558 {
2559 DWORD tok = TOKEN_PEEK(tx);
2560
2561 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2562 {
2563 const char *comment = "";
2564 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2565 tx->parse += size + 1;
2566
2567 if (print)
2568 sm1_print_comment(comment, size);
2569
2570 tok = TOKEN_PEEK(tx);
2571 }
2572 }
2573
2574 static void
2575 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2576 {
2577 *reg = TOKEN_NEXT(tx);
2578
2579 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2580 {
2581 if (tx->version.major < 2)
2582 *rel = (1 << 31) |
2583 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2584 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2585 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2586 else
2587 *rel = TOKEN_NEXT(tx);
2588 }
2589 }
2590
2591 static void
2592 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2593 {
2594 uint8_t shift;
2595 dst->file =
2596 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2597 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2598 dst->type = TGSI_RETURN_TYPE_FLOAT;
2599 dst->idx = tok & D3DSP_REGNUM_MASK;
2600 dst->rel = NULL;
2601 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2602 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2603 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2604 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2605 }
2606
2607 static void
2608 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2609 {
2610 src->file =
2611 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2612 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2613 src->type = TGSI_RETURN_TYPE_FLOAT;
2614 src->idx = tok & D3DSP_REGNUM_MASK;
2615 src->rel = NULL;
2616 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2617 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2618
2619 switch (src->file) {
2620 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2621 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2622 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2623 default:
2624 break;
2625 }
2626 }
2627
2628 static void
2629 sm1_parse_immediate(struct shader_translator *tx,
2630 struct sm1_src_param *imm)
2631 {
2632 imm->file = NINED3DSPR_IMMEDIATE;
2633 imm->idx = INT_MIN;
2634 imm->rel = NULL;
2635 imm->swizzle = NINED3DSP_NOSWIZZLE;
2636 imm->mod = 0;
2637 switch (tx->insn.opcode) {
2638 case D3DSIO_DEF:
2639 imm->type = NINED3DSPTYPE_FLOAT4;
2640 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2641 tx->parse += 4;
2642 break;
2643 case D3DSIO_DEFI:
2644 imm->type = NINED3DSPTYPE_INT4;
2645 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2646 tx->parse += 4;
2647 break;
2648 case D3DSIO_DEFB:
2649 imm->type = NINED3DSPTYPE_BOOL;
2650 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2651 tx->parse += 1;
2652 break;
2653 default:
2654 assert(0);
2655 break;
2656 }
2657 }
2658
2659 static void
2660 sm1_read_dst_param(struct shader_translator *tx,
2661 struct sm1_dst_param *dst,
2662 struct sm1_src_param *rel)
2663 {
2664 DWORD tok_dst, tok_rel = 0;
2665
2666 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2667 sm1_parse_dst_param(dst, tok_dst);
2668 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2669 sm1_parse_src_param(rel, tok_rel);
2670 dst->rel = rel;
2671 }
2672 }
2673
2674 static void
2675 sm1_read_src_param(struct shader_translator *tx,
2676 struct sm1_src_param *src,
2677 struct sm1_src_param *rel)
2678 {
2679 DWORD tok_src, tok_rel = 0;
2680
2681 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2682 sm1_parse_src_param(src, tok_src);
2683 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2684 assert(rel);
2685 sm1_parse_src_param(rel, tok_rel);
2686 src->rel = rel;
2687 }
2688 }
2689
2690 static void
2691 sm1_read_semantic(struct shader_translator *tx,
2692 struct sm1_semantic *sem)
2693 {
2694 const DWORD tok_usg = TOKEN_NEXT(tx);
2695 const DWORD tok_dst = TOKEN_NEXT(tx);
2696
2697 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2698 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2699 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2700
2701 sm1_parse_dst_param(&sem->reg, tok_dst);
2702 }
2703
2704 static void
2705 sm1_parse_instruction(struct shader_translator *tx)
2706 {
2707 struct sm1_instruction *insn = &tx->insn;
2708 DWORD tok;
2709 struct sm1_op_info *info = NULL;
2710 unsigned i;
2711
2712 sm1_parse_comments(tx, TRUE);
2713 sm1_parse_get_skip(tx);
2714
2715 tok = TOKEN_NEXT(tx);
2716
2717 insn->opcode = tok & D3DSI_OPCODE_MASK;
2718 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2719 insn->coissue = !!(tok & D3DSI_COISSUE);
2720 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2721
2722 if (insn->opcode < Elements(tx->op_info_map)) {
2723 int k = tx->op_info_map[insn->opcode];
2724 if (k >= 0) {
2725 assert(k < Elements(inst_table));
2726 info = &inst_table[k];
2727 }
2728 } else {
2729 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2730 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2731 }
2732 if (!info) {
2733 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2734 TOKEN_JUMP(tx);
2735 return;
2736 }
2737 insn->info = info;
2738 insn->ndst = info->ndst;
2739 insn->nsrc = info->nsrc;
2740
2741 assert(!insn->predicated && "TODO: predicated instructions");
2742
2743 /* check version */
2744 {
2745 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2746 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2747 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2748 if (ver < min || ver > max) {
2749 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2750 min, ver, max);
2751 return;
2752 }
2753 }
2754
2755 for (i = 0; i < insn->ndst; ++i)
2756 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2757 if (insn->predicated)
2758 sm1_read_src_param(tx, &insn->pred, NULL);
2759 for (i = 0; i < insn->nsrc; ++i)
2760 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2761
2762 /* parse here so we can dump them before processing */
2763 if (insn->opcode == D3DSIO_DEF ||
2764 insn->opcode == D3DSIO_DEFI ||
2765 insn->opcode == D3DSIO_DEFB)
2766 sm1_parse_immediate(tx, &tx->insn.src[0]);
2767
2768 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2769 sm1_instruction_check(insn);
2770
2771 if (info->handler)
2772 info->handler(tx);
2773 else
2774 NineTranslateInstruction_Generic(tx);
2775 tx_apply_dst0_modifiers(tx);
2776
2777 tx->num_scratch = 0; /* reset */
2778
2779 TOKEN_JUMP(tx);
2780 }
2781
2782 static void
2783 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2784 {
2785 unsigned i;
2786
2787 tx->info = info;
2788
2789 tx->byte_code = info->byte_code;
2790 tx->parse = info->byte_code;
2791
2792 for (i = 0; i < Elements(info->input_map); ++i)
2793 info->input_map[i] = NINE_DECLUSAGE_NONE;
2794 info->num_inputs = 0;
2795
2796 info->position_t = FALSE;
2797 info->point_size = FALSE;
2798
2799 tx->info->const_used_size = 0;
2800
2801 info->sampler_mask = 0x0;
2802 info->rt_mask = 0x0;
2803
2804 info->lconstf.data = NULL;
2805 info->lconstf.ranges = NULL;
2806
2807 for (i = 0; i < Elements(tx->regs.rL); ++i) {
2808 tx->regs.rL[i] = ureg_dst_undef();
2809 }
2810 tx->regs.address = ureg_dst_undef();
2811 tx->regs.a0 = ureg_dst_undef();
2812 tx->regs.p = ureg_dst_undef();
2813 tx->regs.oDepth = ureg_dst_undef();
2814 tx->regs.vPos = ureg_src_undef();
2815 tx->regs.vFace = ureg_src_undef();
2816 for (i = 0; i < Elements(tx->regs.o); ++i)
2817 tx->regs.o[i] = ureg_dst_undef();
2818 for (i = 0; i < Elements(tx->regs.oCol); ++i)
2819 tx->regs.oCol[i] = ureg_dst_undef();
2820 for (i = 0; i < Elements(tx->regs.vC); ++i)
2821 tx->regs.vC[i] = ureg_src_undef();
2822 for (i = 0; i < Elements(tx->regs.vT); ++i)
2823 tx->regs.vT[i] = ureg_src_undef();
2824
2825 for (i = 0; i < Elements(tx->lconsti); ++i)
2826 tx->lconsti[i].idx = -1;
2827 for (i = 0; i < Elements(tx->lconstb); ++i)
2828 tx->lconstb[i].idx = -1;
2829
2830 sm1_read_version(tx);
2831
2832 info->version = (tx->version.major << 4) | tx->version.minor;
2833
2834 create_op_info_map(tx);
2835 }
2836
2837 static void
2838 tx_dtor(struct shader_translator *tx)
2839 {
2840 if (tx->num_inst_labels)
2841 FREE(tx->inst_labels);
2842 FREE(tx->lconstf);
2843 FREE(tx->regs.r);
2844 FREE(tx);
2845 }
2846
2847 static INLINE unsigned
2848 tgsi_processor_from_type(unsigned shader_type)
2849 {
2850 switch (shader_type) {
2851 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
2852 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
2853 default:
2854 return ~0;
2855 }
2856 }
2857
2858 #define GET_CAP(n) device->screen->get_param( \
2859 device->screen, PIPE_CAP_##n)
2860 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
2861 device->screen, info->type, PIPE_SHADER_CAP_##n)
2862
2863 HRESULT
2864 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
2865 {
2866 struct shader_translator *tx;
2867 HRESULT hr = D3D_OK;
2868 const unsigned processor = tgsi_processor_from_type(info->type);
2869
2870 user_assert(processor != ~0, D3DERR_INVALIDCALL);
2871
2872 tx = CALLOC_STRUCT(shader_translator);
2873 if (!tx)
2874 return E_OUTOFMEMORY;
2875 tx_ctor(tx, info);
2876
2877 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
2878 hr = D3DERR_INVALIDCALL;
2879 DBG("Unsupported shader version: %u.%u !\n",
2880 tx->version.major, tx->version.minor);
2881 goto out;
2882 }
2883 if (tx->processor != processor) {
2884 hr = D3DERR_INVALIDCALL;
2885 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
2886 goto out;
2887 }
2888 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
2889 tx->version.major, tx->version.minor);
2890
2891 tx->ureg = ureg_create(processor);
2892 if (!tx->ureg) {
2893 hr = E_OUTOFMEMORY;
2894 goto out;
2895 }
2896 tx_decl_constants(tx);
2897
2898 tx->native_integers = GET_SHADER_CAP(INTEGERS);
2899 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
2900 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
2901 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
2902 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2903 tx->texcoord_sn = tx->want_texcoord ?
2904 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
2905
2906 /* VS must always write position. Declare it here to make it the 1st output.
2907 * (Some drivers like nv50 are buggy and rely on that.)
2908 */
2909 if (IS_VS) {
2910 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
2911 } else {
2912 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
2913 if (!tx->shift_wpos)
2914 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2915 }
2916
2917 while (!sm1_parse_eof(tx))
2918 sm1_parse_instruction(tx);
2919 tx->parse++; /* for byte_size */
2920
2921 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
2922 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
2923 ureg_src(tx->regs.r[0]));
2924 info->rt_mask |= 0x1;
2925 }
2926
2927 if (info->position_t)
2928 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
2929
2930 ureg_END(tx->ureg);
2931
2932 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
2933 info->point_size = TRUE;
2934
2935 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
2936 unsigned count;
2937 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
2938 tgsi_dump(toks, 0);
2939 ureg_free_tokens(toks);
2940 }
2941
2942 /* record local constants */
2943 if (tx->num_lconstf && tx->indirect_const_access) {
2944 struct nine_range *ranges;
2945 float *data;
2946 int *indices;
2947 unsigned i, k, n;
2948
2949 hr = E_OUTOFMEMORY;
2950
2951 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
2952 if (!data)
2953 goto out;
2954 info->lconstf.data = data;
2955
2956 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
2957 if (!indices)
2958 goto out;
2959
2960 /* lazy sort, num_lconstf should be small */
2961 for (n = 0; n < tx->num_lconstf; ++n) {
2962 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
2963 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
2964 k = i;
2965 }
2966 indices[n] = tx->lconstf[k].idx;
2967 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
2968 tx->lconstf[k].idx = INT_MAX;
2969 }
2970
2971 /* count ranges */
2972 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
2973 if (indices[i] != indices[i - 1] + 1)
2974 ++n;
2975 ranges = MALLOC(n * sizeof(ranges[0]));
2976 if (!ranges) {
2977 FREE(indices);
2978 goto out;
2979 }
2980 info->lconstf.ranges = ranges;
2981
2982 k = 0;
2983 ranges[k].bgn = indices[0];
2984 for (i = 1; i < tx->num_lconstf; ++i) {
2985 if (indices[i] != indices[i - 1] + 1) {
2986 ranges[k].next = &ranges[k + 1];
2987 ranges[k].end = indices[i - 1] + 1;
2988 ++k;
2989 ranges[k].bgn = indices[i];
2990 }
2991 }
2992 ranges[k].end = indices[i - 1] + 1;
2993 ranges[k].next = NULL;
2994 assert(n == (k + 1));
2995
2996 FREE(indices);
2997 hr = D3D_OK;
2998 }
2999
3000 if (tx->indirect_const_access)
3001 info->const_used_size = ~0;
3002
3003 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3004 if (!info->cso) {
3005 hr = D3DERR_DRIVERINTERNALERROR;
3006 FREE(info->lconstf.data);
3007 FREE(info->lconstf.ranges);
3008 goto out;
3009 }
3010
3011 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3012 out:
3013 tx_dtor(tx);
3014 return hr;
3015 }