cefe4e432486b82331f10976337d7e6188e923fb
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
39
40
41 struct shader_translator;
42
43 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
44
45 static INLINE const char *d3dsio_to_string(unsigned opcode);
46
47
48 #define NINED3D_SM1_VS 0xfffe
49 #define NINED3D_SM1_PS 0xffff
50
51 #define NINE_MAX_COND_DEPTH 64
52 #define NINE_MAX_LOOP_DEPTH 64
53
54 #define NINED3DSP_END 0x0000ffff
55
56 #define NINED3DSPTYPE_FLOAT4 0
57 #define NINED3DSPTYPE_INT4 1
58 #define NINED3DSPTYPE_BOOL 2
59
60 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
61
62 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
63 #define NINED3DSP_WRITEMASK_SHIFT 16
64
65 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
66
67 #define NINED3DSHADER_REL_OP_GT 1
68 #define NINED3DSHADER_REL_OP_EQ 2
69 #define NINED3DSHADER_REL_OP_GE 3
70 #define NINED3DSHADER_REL_OP_LT 4
71 #define NINED3DSHADER_REL_OP_NE 5
72 #define NINED3DSHADER_REL_OP_LE 6
73
74 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
75 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
76
77 #define NINED3DSI_TEXLD_PROJECT 0x1
78 #define NINED3DSI_TEXLD_BIAS 0x2
79
80 #define NINED3DSP_WRITEMASK_0 0x1
81 #define NINED3DSP_WRITEMASK_1 0x2
82 #define NINED3DSP_WRITEMASK_2 0x4
83 #define NINED3DSP_WRITEMASK_3 0x8
84 #define NINED3DSP_WRITEMASK_ALL 0xf
85
86 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
87
88 #define NINE_SWIZZLE4(x,y,z,w) \
89 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
90
91 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
92 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
94
95 /*
96 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
97 * BIAS <= PS 1.4 (x-0.5)
98 * BIASNEG <= PS 1.4 (-(x-0.5))
99 * SIGN <= PS 1.4 (2(x-0.5))
100 * SIGNNEG <= PS 1.4 (-2(x-0.5))
101 * COMP <= PS 1.4 (1-x)
102 * X2 = PS 1.4 (2x)
103 * X2NEG = PS 1.4 (-2x)
104 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
105 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
106 * ABS >= SM 3.0 (abs(x))
107 * ABSNEG >= SM 3.0 (-abs(x))
108 * NOT >= SM 2.0 pedication only
109 */
110 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
124
125 static const char *sm1_mod_str[] =
126 {
127 [NINED3DSPSM_NONE] = "",
128 [NINED3DSPSM_NEG] = "-",
129 [NINED3DSPSM_BIAS] = "bias",
130 [NINED3DSPSM_BIASNEG] = "biasneg",
131 [NINED3DSPSM_SIGN] = "sign",
132 [NINED3DSPSM_SIGNNEG] = "signneg",
133 [NINED3DSPSM_COMP] = "comp",
134 [NINED3DSPSM_X2] = "x2",
135 [NINED3DSPSM_X2NEG] = "x2neg",
136 [NINED3DSPSM_DZ] = "dz",
137 [NINED3DSPSM_DW] = "dw",
138 [NINED3DSPSM_ABS] = "abs",
139 [NINED3DSPSM_ABSNEG] = "-abs",
140 [NINED3DSPSM_NOT] = "not"
141 };
142
143 static void
144 sm1_dump_writemask(BYTE mask)
145 {
146 if (mask & 1) DUMP("x"); else DUMP("_");
147 if (mask & 2) DUMP("y"); else DUMP("_");
148 if (mask & 4) DUMP("z"); else DUMP("_");
149 if (mask & 8) DUMP("w"); else DUMP("_");
150 }
151
152 static void
153 sm1_dump_swizzle(BYTE s)
154 {
155 char c[4] = { 'x', 'y', 'z', 'w' };
156 DUMP("%c%c%c%c",
157 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
158 }
159
160 static const char sm1_file_char[] =
161 {
162 [D3DSPR_TEMP] = 'r',
163 [D3DSPR_INPUT] = 'v',
164 [D3DSPR_CONST] = 'c',
165 [D3DSPR_ADDR] = 'A',
166 [D3DSPR_RASTOUT] = 'R',
167 [D3DSPR_ATTROUT] = 'D',
168 [D3DSPR_OUTPUT] = 'o',
169 [D3DSPR_CONSTINT] = 'I',
170 [D3DSPR_COLOROUT] = 'C',
171 [D3DSPR_DEPTHOUT] = 'D',
172 [D3DSPR_SAMPLER] = 's',
173 [D3DSPR_CONST2] = 'c',
174 [D3DSPR_CONST3] = 'c',
175 [D3DSPR_CONST4] = 'c',
176 [D3DSPR_CONSTBOOL] = 'B',
177 [D3DSPR_LOOP] = 'L',
178 [D3DSPR_TEMPFLOAT16] = 'h',
179 [D3DSPR_MISCTYPE] = 'M',
180 [D3DSPR_LABEL] = 'X',
181 [D3DSPR_PREDICATE] = 'p'
182 };
183
184 static void
185 sm1_dump_reg(BYTE file, INT index)
186 {
187 switch (file) {
188 case D3DSPR_LOOP:
189 DUMP("aL");
190 break;
191 case D3DSPR_COLOROUT:
192 DUMP("oC%i", index);
193 break;
194 case D3DSPR_DEPTHOUT:
195 DUMP("oDepth");
196 break;
197 case D3DSPR_RASTOUT:
198 DUMP("oRast%i", index);
199 break;
200 case D3DSPR_CONSTINT:
201 DUMP("iconst[%i]", index);
202 break;
203 case D3DSPR_CONSTBOOL:
204 DUMP("bconst[%i]", index);
205 break;
206 default:
207 DUMP("%c%i", sm1_file_char[file], index);
208 break;
209 }
210 }
211
212 struct sm1_src_param
213 {
214 INT idx;
215 struct sm1_src_param *rel;
216 BYTE file;
217 BYTE swizzle;
218 BYTE mod;
219 BYTE type;
220 union {
221 DWORD d[4];
222 float f[4];
223 int i[4];
224 BOOL b;
225 } imm;
226 };
227 static void
228 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
229
230 struct sm1_dst_param
231 {
232 INT idx;
233 struct sm1_src_param *rel;
234 BYTE file;
235 BYTE mask;
236 BYTE mod;
237 int8_t shift; /* sint4 */
238 BYTE type;
239 };
240
241 static INLINE void
242 assert_replicate_swizzle(const struct ureg_src *reg)
243 {
244 assert(reg->SwizzleY == reg->SwizzleX &&
245 reg->SwizzleZ == reg->SwizzleX &&
246 reg->SwizzleW == reg->SwizzleX);
247 }
248
249 static void
250 sm1_dump_immediate(const struct sm1_src_param *param)
251 {
252 switch (param->type) {
253 case NINED3DSPTYPE_FLOAT4:
254 DUMP("{ %f %f %f %f }",
255 param->imm.f[0], param->imm.f[1],
256 param->imm.f[2], param->imm.f[3]);
257 break;
258 case NINED3DSPTYPE_INT4:
259 DUMP("{ %i %i %i %i }",
260 param->imm.i[0], param->imm.i[1],
261 param->imm.i[2], param->imm.i[3]);
262 break;
263 case NINED3DSPTYPE_BOOL:
264 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
265 break;
266 default:
267 assert(0);
268 break;
269 }
270 }
271
272 static void
273 sm1_dump_src_param(const struct sm1_src_param *param)
274 {
275 if (param->file == NINED3DSPR_IMMEDIATE) {
276 assert(!param->mod &&
277 !param->rel &&
278 param->swizzle == NINED3DSP_NOSWIZZLE);
279 sm1_dump_immediate(param);
280 return;
281 }
282
283 if (param->mod)
284 DUMP("%s(", sm1_mod_str[param->mod]);
285 if (param->rel) {
286 DUMP("%c[", sm1_file_char[param->file]);
287 sm1_dump_src_param(param->rel);
288 DUMP("+%i]", param->idx);
289 } else {
290 sm1_dump_reg(param->file, param->idx);
291 }
292 if (param->mod)
293 DUMP(")");
294 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
295 DUMP(".");
296 sm1_dump_swizzle(param->swizzle);
297 }
298 }
299
300 static void
301 sm1_dump_dst_param(const struct sm1_dst_param *param)
302 {
303 if (param->mod & NINED3DSPDM_SATURATE)
304 DUMP("sat ");
305 if (param->mod & NINED3DSPDM_PARTIALP)
306 DUMP("pp ");
307 if (param->mod & NINED3DSPDM_CENTROID)
308 DUMP("centroid ");
309 if (param->shift < 0)
310 DUMP("/%u ", 1 << -param->shift);
311 if (param->shift > 0)
312 DUMP("*%u ", 1 << param->shift);
313
314 if (param->rel) {
315 DUMP("%c[", sm1_file_char[param->file]);
316 sm1_dump_src_param(param->rel);
317 DUMP("+%i]", param->idx);
318 } else {
319 sm1_dump_reg(param->file, param->idx);
320 }
321 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
322 DUMP(".");
323 sm1_dump_writemask(param->mask);
324 }
325 }
326
327 struct sm1_semantic
328 {
329 struct sm1_dst_param reg;
330 BYTE sampler_type;
331 D3DDECLUSAGE usage;
332 BYTE usage_idx;
333 };
334
335 struct sm1_op_info
336 {
337 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
338 * should be ignored completely */
339 unsigned sio;
340 unsigned opcode; /* TGSI_OPCODE_x */
341
342 /* versions are still set even handler is set */
343 struct {
344 unsigned min;
345 unsigned max;
346 } vert_version, frag_version;
347
348 /* number of regs parsed outside of special handler */
349 unsigned ndst;
350 unsigned nsrc;
351
352 /* some instructions don't map perfectly, so use a special handler */
353 translate_instruction_func handler;
354 };
355
356 struct sm1_instruction
357 {
358 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
359 BYTE flags;
360 BOOL coissue;
361 BOOL predicated;
362 BYTE ndst;
363 BYTE nsrc;
364 struct sm1_src_param src[4];
365 struct sm1_src_param src_rel[4];
366 struct sm1_src_param pred;
367 struct sm1_src_param dst_rel[1];
368 struct sm1_dst_param dst[1];
369
370 struct sm1_op_info *info;
371 };
372
373 static void
374 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
375 {
376 unsigned i;
377
378 /* no info stored for these: */
379 if (insn->opcode == D3DSIO_DCL)
380 return;
381 for (i = 0; i < indent; ++i)
382 DUMP(" ");
383
384 if (insn->predicated) {
385 DUMP("@");
386 sm1_dump_src_param(&insn->pred);
387 DUMP(" ");
388 }
389 DUMP("%s", d3dsio_to_string(insn->opcode));
390 if (insn->flags) {
391 switch (insn->opcode) {
392 case D3DSIO_TEX:
393 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
394 break;
395 default:
396 DUMP("_%x", insn->flags);
397 break;
398 }
399 }
400 if (insn->coissue)
401 DUMP("_co");
402 DUMP(" ");
403
404 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
405 sm1_dump_dst_param(&insn->dst[i]);
406 DUMP(" ");
407 }
408
409 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
410 sm1_dump_src_param(&insn->src[i]);
411 DUMP(" ");
412 }
413 if (insn->opcode == D3DSIO_DEF ||
414 insn->opcode == D3DSIO_DEFI ||
415 insn->opcode == D3DSIO_DEFB)
416 sm1_dump_immediate(&insn->src[0]);
417
418 DUMP("\n");
419 }
420
421 struct sm1_local_const
422 {
423 INT idx;
424 struct ureg_src reg;
425 union {
426 boolean b;
427 float f[4];
428 int32_t i[4];
429 } imm;
430 };
431
432 struct shader_translator
433 {
434 const DWORD *byte_code;
435 const DWORD *parse;
436 const DWORD *parse_next;
437
438 struct ureg_program *ureg;
439
440 /* shader version */
441 struct {
442 BYTE major;
443 BYTE minor;
444 } version;
445 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
446
447 boolean native_integers;
448 boolean inline_subroutines;
449 boolean lower_preds;
450 boolean want_texcoord;
451 boolean shift_wpos;
452 unsigned texcoord_sn;
453
454 struct sm1_instruction insn; /* current instruction */
455
456 struct {
457 struct ureg_dst *r;
458 struct ureg_dst oPos;
459 struct ureg_dst oFog;
460 struct ureg_dst oPts;
461 struct ureg_dst oCol[4];
462 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
463 struct ureg_dst oDepth;
464 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
465 struct ureg_src vPos;
466 struct ureg_src vFace;
467 struct ureg_src s;
468 struct ureg_dst p;
469 struct ureg_dst address;
470 struct ureg_dst a0;
471 struct ureg_dst tS[8]; /* texture stage registers */
472 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
473 struct ureg_dst t[5]; /* scratch TEMPs */
474 struct ureg_src vC[2]; /* PS color in */
475 struct ureg_src vT[8]; /* PS texcoord in */
476 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
477 } regs;
478 unsigned num_temp; /* Elements(regs.r) */
479 unsigned num_scratch;
480 unsigned loop_depth;
481 unsigned loop_depth_max;
482 unsigned cond_depth;
483 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
484 unsigned cond_labels[NINE_MAX_COND_DEPTH];
485 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
486
487 unsigned *inst_labels; /* LABEL op */
488 unsigned num_inst_labels;
489
490 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
491
492 struct sm1_local_const *lconstf;
493 unsigned num_lconstf;
494 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
495 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
496
497 boolean indirect_const_access;
498
499 struct nine_shader_info *info;
500
501 int16_t op_info_map[D3DSIO_BREAKP + 1];
502 };
503
504 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
505 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
506
507 static void
508 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
509
510 static void
511 sm1_instruction_check(const struct sm1_instruction *insn)
512 {
513 if (insn->opcode == D3DSIO_CRS)
514 {
515 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
516 {
517 DBG("CRS.mask.w\n");
518 }
519 }
520 }
521
522 static boolean
523 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
524 {
525 INT i;
526 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
527 for (i = 0; i < tx->num_lconstf; ++i) {
528 if (tx->lconstf[i].idx == index) {
529 *src = tx->lconstf[i].reg;
530 return TRUE;
531 }
532 }
533 return FALSE;
534 }
535 static boolean
536 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
537 {
538 assert(index >= 0 && index < NINE_MAX_CONST_I);
539 if (tx->lconsti[index].idx == index)
540 *src = tx->lconsti[index].reg;
541 return tx->lconsti[index].idx == index;
542 }
543 static boolean
544 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
545 {
546 assert(index >= 0 && index < NINE_MAX_CONST_B);
547 if (tx->lconstb[index].idx == index)
548 *src = tx->lconstb[index].reg;
549 return tx->lconstb[index].idx == index;
550 }
551
552 static void
553 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
554 {
555 unsigned n;
556
557 /* Anno1404 sets out of range constants. */
558 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
559 if (index >= NINE_MAX_CONST_F)
560 WARN("lconstf index %i too high, indirect access won't work\n", index);
561
562 for (n = 0; n < tx->num_lconstf; ++n)
563 if (tx->lconstf[n].idx == index)
564 break;
565 if (n == tx->num_lconstf) {
566 if ((n % 8) == 0) {
567 tx->lconstf = REALLOC(tx->lconstf,
568 (n + 0) * sizeof(tx->lconstf[0]),
569 (n + 8) * sizeof(tx->lconstf[0]));
570 assert(tx->lconstf);
571 }
572 tx->num_lconstf++;
573 }
574 tx->lconstf[n].idx = index;
575 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
576
577 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
578 }
579 static void
580 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
581 {
582 assert(index >= 0 && index < NINE_MAX_CONST_I);
583 tx->lconsti[index].idx = index;
584 tx->lconsti[index].reg = tx->native_integers ?
585 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
586 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
587 }
588 static void
589 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
590 {
591 assert(index >= 0 && index < NINE_MAX_CONST_B);
592 tx->lconstb[index].idx = index;
593 tx->lconstb[index].reg = tx->native_integers ?
594 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
595 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
596 }
597
598 static INLINE struct ureg_dst
599 tx_scratch(struct shader_translator *tx)
600 {
601 assert(tx->num_scratch < Elements(tx->regs.t));
602 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
603 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
604 return tx->regs.t[tx->num_scratch++];
605 }
606
607 static INLINE struct ureg_dst
608 tx_scratch_scalar(struct shader_translator *tx)
609 {
610 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
611 }
612
613 static INLINE struct ureg_src
614 tx_src_scalar(struct ureg_dst dst)
615 {
616 struct ureg_src src = ureg_src(dst);
617 int c = ffs(dst.WriteMask) - 1;
618 if (dst.WriteMask == (1 << c))
619 src = ureg_scalar(src, c);
620 return src;
621 }
622
623 /* Need to declare all constants if indirect addressing is used,
624 * otherwise we could scan the shader to determine the maximum.
625 * TODO: It doesn't really matter for nv50 so I won't do the scan,
626 * but radeon drivers might care, if they don't infer it from TGSI.
627 */
628 static void
629 tx_decl_constants(struct shader_translator *tx)
630 {
631 unsigned i, n = 0;
632
633 for (i = 0; i < NINE_MAX_CONST_F; ++i)
634 ureg_DECL_constant(tx->ureg, n++);
635 for (i = 0; i < NINE_MAX_CONST_I; ++i)
636 ureg_DECL_constant(tx->ureg, n++);
637 for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
638 ureg_DECL_constant(tx->ureg, n++);
639 }
640
641 static INLINE void
642 tx_temp_alloc(struct shader_translator *tx, INT idx)
643 {
644 assert(idx >= 0);
645 if (idx >= tx->num_temp) {
646 unsigned k = tx->num_temp;
647 unsigned n = idx + 1;
648 tx->regs.r = REALLOC(tx->regs.r,
649 k * sizeof(tx->regs.r[0]),
650 n * sizeof(tx->regs.r[0]));
651 for (; k < n; ++k)
652 tx->regs.r[k] = ureg_dst_undef();
653 tx->num_temp = n;
654 }
655 if (ureg_dst_is_undef(tx->regs.r[idx]))
656 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
657 }
658
659 static INLINE void
660 tx_addr_alloc(struct shader_translator *tx, INT idx)
661 {
662 assert(idx == 0);
663 if (ureg_dst_is_undef(tx->regs.address))
664 tx->regs.address = ureg_DECL_address(tx->ureg);
665 if (ureg_dst_is_undef(tx->regs.a0))
666 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
667 }
668
669 static INLINE void
670 tx_pred_alloc(struct shader_translator *tx, INT idx)
671 {
672 assert(idx == 0);
673 if (ureg_dst_is_undef(tx->regs.p))
674 tx->regs.p = ureg_DECL_predicate(tx->ureg);
675 }
676
677 static INLINE void
678 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
679 {
680 assert(IS_PS);
681 assert(idx >= 0 && idx < Elements(tx->regs.vT));
682 if (ureg_src_is_undef(tx->regs.vT[idx]))
683 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
684 TGSI_INTERPOLATE_PERSPECTIVE);
685 }
686
687 static INLINE unsigned *
688 tx_bgnloop(struct shader_translator *tx)
689 {
690 tx->loop_depth++;
691 if (tx->loop_depth_max < tx->loop_depth)
692 tx->loop_depth_max = tx->loop_depth;
693 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
694 return &tx->loop_labels[tx->loop_depth - 1];
695 }
696
697 static INLINE unsigned *
698 tx_endloop(struct shader_translator *tx)
699 {
700 assert(tx->loop_depth);
701 tx->loop_depth--;
702 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
703 ureg_get_instruction_number(tx->ureg));
704 return &tx->loop_labels[tx->loop_depth];
705 }
706
707 static struct ureg_dst
708 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
709 {
710 const unsigned l = tx->loop_depth - 1;
711
712 if (!tx->loop_depth)
713 {
714 DBG("loop counter requested outside of loop\n");
715 return ureg_dst_undef();
716 }
717
718 if (ureg_dst_is_undef(tx->regs.rL[l])) {
719 /* loop or rep ctr creation */
720 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
721 tx->loop_or_rep[l] = loop_or_rep;
722 }
723 /* loop - rep - endloop - endrep not allowed */
724 assert(tx->loop_or_rep[l] == loop_or_rep);
725
726 return tx->regs.rL[l];
727 }
728
729 static struct ureg_src
730 tx_get_loopal(struct shader_translator *tx)
731 {
732 int loop_level = tx->loop_depth - 1;
733
734 while (loop_level >= 0) {
735 /* handle loop - rep - endrep - endloop case */
736 if (tx->loop_or_rep[loop_level])
737 /* the value is in the loop counter y component (nine implementation) */
738 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
739 loop_level--;
740 }
741
742 DBG("aL counter requested outside of loop\n");
743 return ureg_src_undef();
744 }
745
746 static INLINE unsigned *
747 tx_cond(struct shader_translator *tx)
748 {
749 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
750 tx->cond_depth++;
751 return &tx->cond_labels[tx->cond_depth - 1];
752 }
753
754 static INLINE unsigned *
755 tx_elsecond(struct shader_translator *tx)
756 {
757 assert(tx->cond_depth);
758 return &tx->cond_labels[tx->cond_depth - 1];
759 }
760
761 static INLINE void
762 tx_endcond(struct shader_translator *tx)
763 {
764 assert(tx->cond_depth);
765 tx->cond_depth--;
766 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
767 ureg_get_instruction_number(tx->ureg));
768 }
769
770 static INLINE struct ureg_dst
771 nine_ureg_dst_register(unsigned file, int index)
772 {
773 return ureg_dst(ureg_src_register(file, index));
774 }
775
776 static struct ureg_src
777 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
778 {
779 struct ureg_program *ureg = tx->ureg;
780 struct ureg_src src;
781 struct ureg_dst tmp;
782
783 switch (param->file)
784 {
785 case D3DSPR_TEMP:
786 assert(!param->rel);
787 tx_temp_alloc(tx, param->idx);
788 src = ureg_src(tx->regs.r[param->idx]);
789 break;
790 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
791 case D3DSPR_ADDR:
792 assert(!param->rel);
793 if (IS_VS) {
794 assert(param->idx == 0);
795 /* the address register (vs only) must be
796 * assigned before use */
797 assert(!ureg_dst_is_undef(tx->regs.a0));
798 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
799 src = ureg_src(tx->regs.address);
800 } else {
801 if (tx->version.major < 2 && tx->version.minor < 4) {
802 /* no subroutines, so should be defined */
803 src = ureg_src(tx->regs.tS[param->idx]);
804 } else {
805 tx_texcoord_alloc(tx, param->idx);
806 src = tx->regs.vT[param->idx];
807 }
808 }
809 break;
810 case D3DSPR_INPUT:
811 if (IS_VS) {
812 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
813 } else {
814 if (tx->version.major < 3) {
815 assert(!param->rel);
816 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
817 param->idx,
818 TGSI_INTERPOLATE_PERSPECTIVE);
819 } else {
820 assert(!param->rel); /* TODO */
821 assert(param->idx < Elements(tx->regs.v));
822 src = tx->regs.v[param->idx];
823 }
824 }
825 break;
826 case D3DSPR_PREDICATE:
827 assert(!param->rel);
828 tx_pred_alloc(tx, param->idx);
829 src = ureg_src(tx->regs.p);
830 break;
831 case D3DSPR_SAMPLER:
832 assert(param->mod == NINED3DSPSM_NONE);
833 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
834 assert(!param->rel);
835 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
836 break;
837 case D3DSPR_CONST:
838 if (param->rel)
839 tx->indirect_const_access = TRUE;
840 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
841 if (!param->rel)
842 nine_info_mark_const_f_used(tx->info, param->idx);
843 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
844 }
845 if (!IS_VS && tx->version.major < 2) {
846 /* ps 1.X clamps constants */
847 tmp = tx_scratch(tx);
848 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
849 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
850 src = ureg_src(tmp);
851 }
852 break;
853 case D3DSPR_CONST2:
854 case D3DSPR_CONST3:
855 case D3DSPR_CONST4:
856 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
857 assert(!"CONST2/3/4");
858 src = ureg_imm1f(ureg, 0.0f);
859 break;
860 case D3DSPR_CONSTINT:
861 if (param->rel || !tx_lconsti(tx, &src, param->idx)) {
862 if (!param->rel)
863 nine_info_mark_const_i_used(tx->info, param->idx);
864 src = ureg_src_register(TGSI_FILE_CONSTANT,
865 tx->info->const_i_base + param->idx);
866 }
867 break;
868 case D3DSPR_CONSTBOOL:
869 if (param->rel || !tx_lconstb(tx, &src, param->idx)) {
870 char r = param->idx / 4;
871 char s = param->idx & 3;
872 if (!param->rel)
873 nine_info_mark_const_b_used(tx->info, param->idx);
874 src = ureg_src_register(TGSI_FILE_CONSTANT,
875 tx->info->const_b_base + r);
876 src = ureg_swizzle(src, s, s, s, s);
877 }
878 break;
879 case D3DSPR_LOOP:
880 if (ureg_dst_is_undef(tx->regs.address))
881 tx->regs.address = ureg_DECL_address(ureg);
882 if (!tx->native_integers)
883 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
884 else
885 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
886 src = ureg_src(tx->regs.address);
887 break;
888 case D3DSPR_MISCTYPE:
889 switch (param->idx) {
890 case D3DSMO_POSITION:
891 if (ureg_src_is_undef(tx->regs.vPos))
892 tx->regs.vPos = ureg_DECL_fs_input(ureg,
893 TGSI_SEMANTIC_POSITION, 0,
894 TGSI_INTERPOLATE_LINEAR);
895 if (tx->shift_wpos) {
896 /* TODO: do this only once */
897 struct ureg_dst wpos = tx_scratch(tx);
898 ureg_SUB(ureg, wpos, tx->regs.vPos,
899 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
900 src = ureg_src(wpos);
901 } else {
902 src = tx->regs.vPos;
903 }
904 break;
905 case D3DSMO_FACE:
906 if (ureg_src_is_undef(tx->regs.vFace)) {
907 tx->regs.vFace = ureg_DECL_fs_input(ureg,
908 TGSI_SEMANTIC_FACE, 0,
909 TGSI_INTERPOLATE_CONSTANT);
910 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
911 }
912 src = tx->regs.vFace;
913 break;
914 default:
915 assert(!"invalid src D3DSMO");
916 break;
917 }
918 assert(!param->rel);
919 break;
920 case D3DSPR_TEMPFLOAT16:
921 break;
922 default:
923 assert(!"invalid src D3DSPR");
924 }
925 if (param->rel)
926 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
927
928 switch (param->mod) {
929 case NINED3DSPSM_DW:
930 tmp = tx_scratch(tx);
931 /* NOTE: app is not allowed to read w with this modifier */
932 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
933 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
934 src = ureg_src(tmp);
935 break;
936 case NINED3DSPSM_DZ:
937 tmp = tx_scratch(tx);
938 /* NOTE: app is not allowed to read z with this modifier */
939 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
940 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
941 src = ureg_src(tmp);
942 break;
943 default:
944 break;
945 }
946
947 if (param->swizzle != NINED3DSP_NOSWIZZLE)
948 src = ureg_swizzle(src,
949 (param->swizzle >> 0) & 0x3,
950 (param->swizzle >> 2) & 0x3,
951 (param->swizzle >> 4) & 0x3,
952 (param->swizzle >> 6) & 0x3);
953
954 switch (param->mod) {
955 case NINED3DSPSM_ABS:
956 src = ureg_abs(src);
957 break;
958 case NINED3DSPSM_ABSNEG:
959 src = ureg_negate(ureg_abs(src));
960 break;
961 case NINED3DSPSM_NEG:
962 src = ureg_negate(src);
963 break;
964 case NINED3DSPSM_BIAS:
965 tmp = tx_scratch(tx);
966 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
967 src = ureg_src(tmp);
968 break;
969 case NINED3DSPSM_BIASNEG:
970 tmp = tx_scratch(tx);
971 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
972 src = ureg_src(tmp);
973 break;
974 case NINED3DSPSM_NOT:
975 if (tx->native_integers) {
976 tmp = tx_scratch(tx);
977 ureg_NOT(ureg, tmp, src);
978 src = ureg_src(tmp);
979 break;
980 }
981 /* fall through */
982 case NINED3DSPSM_COMP:
983 tmp = tx_scratch(tx);
984 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
985 src = ureg_src(tmp);
986 break;
987 case NINED3DSPSM_DZ:
988 case NINED3DSPSM_DW:
989 /* Already handled*/
990 break;
991 case NINED3DSPSM_SIGN:
992 tmp = tx_scratch(tx);
993 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
994 src = ureg_src(tmp);
995 break;
996 case NINED3DSPSM_SIGNNEG:
997 tmp = tx_scratch(tx);
998 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
999 src = ureg_src(tmp);
1000 break;
1001 case NINED3DSPSM_X2:
1002 tmp = tx_scratch(tx);
1003 ureg_ADD(ureg, tmp, src, src);
1004 src = ureg_src(tmp);
1005 break;
1006 case NINED3DSPSM_X2NEG:
1007 tmp = tx_scratch(tx);
1008 ureg_ADD(ureg, tmp, src, src);
1009 src = ureg_negate(ureg_src(tmp));
1010 break;
1011 default:
1012 assert(param->mod == NINED3DSPSM_NONE);
1013 break;
1014 }
1015
1016 return src;
1017 }
1018
1019 static struct ureg_dst
1020 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1021 {
1022 struct ureg_dst dst;
1023
1024 switch (param->file)
1025 {
1026 case D3DSPR_TEMP:
1027 assert(!param->rel);
1028 tx_temp_alloc(tx, param->idx);
1029 dst = tx->regs.r[param->idx];
1030 break;
1031 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1032 case D3DSPR_ADDR:
1033 assert(!param->rel);
1034 if (tx->version.major < 2 && !IS_VS) {
1035 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1036 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1037 dst = tx->regs.tS[param->idx];
1038 } else
1039 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1040 tx_texcoord_alloc(tx, param->idx);
1041 dst = ureg_dst(tx->regs.vT[param->idx]);
1042 } else {
1043 tx_addr_alloc(tx, param->idx);
1044 dst = tx->regs.a0;
1045 }
1046 break;
1047 case D3DSPR_RASTOUT:
1048 assert(!param->rel);
1049 switch (param->idx) {
1050 case 0:
1051 if (ureg_dst_is_undef(tx->regs.oPos))
1052 tx->regs.oPos =
1053 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1054 dst = tx->regs.oPos;
1055 break;
1056 case 1:
1057 if (ureg_dst_is_undef(tx->regs.oFog))
1058 tx->regs.oFog =
1059 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1060 dst = tx->regs.oFog;
1061 break;
1062 case 2:
1063 if (ureg_dst_is_undef(tx->regs.oPts))
1064 tx->regs.oPts =
1065 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1066 dst = tx->regs.oPts;
1067 break;
1068 default:
1069 assert(0);
1070 break;
1071 }
1072 break;
1073 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1074 case D3DSPR_OUTPUT:
1075 if (tx->version.major < 3) {
1076 assert(!param->rel);
1077 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1078 } else {
1079 assert(!param->rel); /* TODO */
1080 assert(param->idx < Elements(tx->regs.o));
1081 dst = tx->regs.o[param->idx];
1082 }
1083 break;
1084 case D3DSPR_ATTROUT: /* VS */
1085 case D3DSPR_COLOROUT: /* PS */
1086 assert(param->idx >= 0 && param->idx < 4);
1087 assert(!param->rel);
1088 tx->info->rt_mask |= 1 << param->idx;
1089 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1090 tx->regs.oCol[param->idx] =
1091 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1092 dst = tx->regs.oCol[param->idx];
1093 if (IS_VS && tx->version.major < 3)
1094 dst = ureg_saturate(dst);
1095 break;
1096 case D3DSPR_DEPTHOUT:
1097 assert(!param->rel);
1098 if (ureg_dst_is_undef(tx->regs.oDepth))
1099 tx->regs.oDepth =
1100 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1101 TGSI_WRITEMASK_Z);
1102 dst = tx->regs.oDepth; /* XXX: must write .z component */
1103 break;
1104 case D3DSPR_PREDICATE:
1105 assert(!param->rel);
1106 tx_pred_alloc(tx, param->idx);
1107 dst = tx->regs.p;
1108 break;
1109 case D3DSPR_TEMPFLOAT16:
1110 DBG("unhandled D3DSPR: %u\n", param->file);
1111 break;
1112 default:
1113 assert(!"invalid dst D3DSPR");
1114 break;
1115 }
1116 if (param->rel)
1117 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1118
1119 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1120 dst = ureg_writemask(dst, param->mask);
1121 if (param->mod & NINED3DSPDM_SATURATE)
1122 dst = ureg_saturate(dst);
1123
1124 return dst;
1125 }
1126
1127 static struct ureg_dst
1128 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1129 {
1130 if (param->shift) {
1131 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1132 return tx->regs.tdst;
1133 }
1134 return _tx_dst_param(tx, param);
1135 }
1136
1137 static void
1138 tx_apply_dst0_modifiers(struct shader_translator *tx)
1139 {
1140 struct ureg_dst rdst;
1141 float f;
1142
1143 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1144 return;
1145 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1146
1147 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1148
1149 if (tx->insn.dst[0].shift < 0)
1150 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1151 else
1152 f = 1 << tx->insn.dst[0].shift;
1153
1154 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1155 }
1156
1157 static struct ureg_src
1158 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1159 {
1160 struct ureg_src src;
1161
1162 assert(!param->shift);
1163 assert(!(param->mod & NINED3DSPDM_SATURATE));
1164
1165 switch (param->file) {
1166 case D3DSPR_INPUT:
1167 if (IS_VS) {
1168 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1169 } else {
1170 assert(!param->rel);
1171 assert(param->idx < Elements(tx->regs.v));
1172 src = tx->regs.v[param->idx];
1173 }
1174 break;
1175 default:
1176 src = ureg_src(tx_dst_param(tx, param));
1177 break;
1178 }
1179 if (param->rel)
1180 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1181
1182 if (!param->mask)
1183 WARN("mask is 0, using identity swizzle\n");
1184
1185 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1186 char s[4];
1187 int n;
1188 int c;
1189 for (n = 0, c = 0; c < 4; ++c)
1190 if (param->mask & (1 << c))
1191 s[n++] = c;
1192 assert(n);
1193 for (c = n; c < 4; ++c)
1194 s[c] = s[n - 1];
1195 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1196 }
1197 return src;
1198 }
1199
1200 static HRESULT
1201 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1202 {
1203 struct ureg_program *ureg = tx->ureg;
1204 struct ureg_dst dst;
1205 struct ureg_src src[2];
1206 struct sm1_src_param *src_mat = &tx->insn.src[1];
1207 unsigned i;
1208
1209 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1210 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1211
1212 for (i = 0; i < n; i++)
1213 {
1214 const unsigned m = (1 << i);
1215
1216 src[1] = tx_src_param(tx, src_mat);
1217 src_mat->idx++;
1218
1219 if (!(dst.WriteMask & m))
1220 continue;
1221
1222 /* XXX: src == dst case ? */
1223
1224 switch (k) {
1225 case 3:
1226 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1227 break;
1228 case 4:
1229 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1230 break;
1231 default:
1232 DBG("invalid operation: M%ux%u\n", m, n);
1233 break;
1234 }
1235 }
1236
1237 return D3D_OK;
1238 }
1239
1240 #define VNOTSUPPORTED 0, 0
1241 #define V(maj, min) (((maj) << 8) | (min))
1242
1243 static INLINE const char *
1244 d3dsio_to_string( unsigned opcode )
1245 {
1246 static const char *names[] = {
1247 "NOP",
1248 "MOV",
1249 "ADD",
1250 "SUB",
1251 "MAD",
1252 "MUL",
1253 "RCP",
1254 "RSQ",
1255 "DP3",
1256 "DP4",
1257 "MIN",
1258 "MAX",
1259 "SLT",
1260 "SGE",
1261 "EXP",
1262 "LOG",
1263 "LIT",
1264 "DST",
1265 "LRP",
1266 "FRC",
1267 "M4x4",
1268 "M4x3",
1269 "M3x4",
1270 "M3x3",
1271 "M3x2",
1272 "CALL",
1273 "CALLNZ",
1274 "LOOP",
1275 "RET",
1276 "ENDLOOP",
1277 "LABEL",
1278 "DCL",
1279 "POW",
1280 "CRS",
1281 "SGN",
1282 "ABS",
1283 "NRM",
1284 "SINCOS",
1285 "REP",
1286 "ENDREP",
1287 "IF",
1288 "IFC",
1289 "ELSE",
1290 "ENDIF",
1291 "BREAK",
1292 "BREAKC",
1293 "MOVA",
1294 "DEFB",
1295 "DEFI",
1296 NULL,
1297 NULL,
1298 NULL,
1299 NULL,
1300 NULL,
1301 NULL,
1302 NULL,
1303 NULL,
1304 NULL,
1305 NULL,
1306 NULL,
1307 NULL,
1308 NULL,
1309 NULL,
1310 NULL,
1311 "TEXCOORD",
1312 "TEXKILL",
1313 "TEX",
1314 "TEXBEM",
1315 "TEXBEML",
1316 "TEXREG2AR",
1317 "TEXREG2GB",
1318 "TEXM3x2PAD",
1319 "TEXM3x2TEX",
1320 "TEXM3x3PAD",
1321 "TEXM3x3TEX",
1322 NULL,
1323 "TEXM3x3SPEC",
1324 "TEXM3x3VSPEC",
1325 "EXPP",
1326 "LOGP",
1327 "CND",
1328 "DEF",
1329 "TEXREG2RGB",
1330 "TEXDP3TEX",
1331 "TEXM3x2DEPTH",
1332 "TEXDP3",
1333 "TEXM3x3",
1334 "TEXDEPTH",
1335 "CMP",
1336 "BEM",
1337 "DP2ADD",
1338 "DSX",
1339 "DSY",
1340 "TEXLDD",
1341 "SETP",
1342 "TEXLDL",
1343 "BREAKP"
1344 };
1345
1346 if (opcode < Elements(names)) return names[opcode];
1347
1348 switch (opcode) {
1349 case D3DSIO_PHASE: return "PHASE";
1350 case D3DSIO_COMMENT: return "COMMENT";
1351 case D3DSIO_END: return "END";
1352 default:
1353 return NULL;
1354 }
1355 }
1356
1357 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1358 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1359 (inst).vert_version.max | \
1360 (inst).frag_version.min | \
1361 (inst).frag_version.max)
1362
1363 #define SPECIAL(name) \
1364 NineTranslateInstruction_##name
1365
1366 #define DECL_SPECIAL(name) \
1367 static HRESULT \
1368 NineTranslateInstruction_##name( struct shader_translator *tx )
1369
1370 static HRESULT
1371 NineTranslateInstruction_Generic(struct shader_translator *);
1372
1373 DECL_SPECIAL(M4x4)
1374 {
1375 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1376 }
1377
1378 DECL_SPECIAL(M4x3)
1379 {
1380 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1381 }
1382
1383 DECL_SPECIAL(M3x4)
1384 {
1385 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1386 }
1387
1388 DECL_SPECIAL(M3x3)
1389 {
1390 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1391 }
1392
1393 DECL_SPECIAL(M3x2)
1394 {
1395 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1396 }
1397
1398 DECL_SPECIAL(CMP)
1399 {
1400 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1401 tx_src_param(tx, &tx->insn.src[0]),
1402 tx_src_param(tx, &tx->insn.src[2]),
1403 tx_src_param(tx, &tx->insn.src[1]));
1404 return D3D_OK;
1405 }
1406
1407 DECL_SPECIAL(CND)
1408 {
1409 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1410 struct ureg_dst cgt;
1411 struct ureg_src cnd;
1412
1413 /* the coissue flag was a tip for compilers to advise to
1414 * execute two operations at the same time, in cases
1415 * the two executions had same dst with different channels.
1416 * It has no effect on current hw. However it seems CND
1417 * is affected. The handling of this very specific case
1418 * handled below mimick wine behaviour */
1419 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1420 ureg_MOV(tx->ureg,
1421 dst, tx_src_param(tx, &tx->insn.src[1]));
1422 return D3D_OK;
1423 }
1424
1425 cnd = tx_src_param(tx, &tx->insn.src[0]);
1426 cgt = tx_scratch(tx);
1427
1428 if (tx->version.major == 1 && tx->version.minor < 4)
1429 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1430
1431 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1432
1433 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1434 tx_src_param(tx, &tx->insn.src[1]),
1435 tx_src_param(tx, &tx->insn.src[2]));
1436 return D3D_OK;
1437 }
1438
1439 DECL_SPECIAL(CALL)
1440 {
1441 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1442 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1443 return D3D_OK;
1444 }
1445
1446 DECL_SPECIAL(CALLNZ)
1447 {
1448 struct ureg_program *ureg = tx->ureg;
1449 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1450
1451 if (!tx->native_integers)
1452 ureg_IF(ureg, src, tx_cond(tx));
1453 else
1454 ureg_UIF(ureg, src, tx_cond(tx));
1455 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1456 tx_endcond(tx);
1457 ureg_ENDIF(ureg);
1458 return D3D_OK;
1459 }
1460
1461 DECL_SPECIAL(MOV_vs1x)
1462 {
1463 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1464 /* Implementation note: We don't write directly
1465 * to the addr register, but to an intermediate
1466 * float register.
1467 * Contrary to the doc, when writing to ADDR here,
1468 * the rounding is not to nearest, but to lowest
1469 * (wine test).
1470 * Since we use ARR next, substract 0.5. */
1471 ureg_SUB(tx->ureg,
1472 tx_dst_param(tx, &tx->insn.dst[0]),
1473 tx_src_param(tx, &tx->insn.src[0]),
1474 ureg_imm1f(tx->ureg, 0.5f));
1475 return D3D_OK;
1476 }
1477 return NineTranslateInstruction_Generic(tx);
1478 }
1479
1480 DECL_SPECIAL(LOOP)
1481 {
1482 struct ureg_program *ureg = tx->ureg;
1483 unsigned *label;
1484 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1485 struct ureg_dst ctr;
1486 struct ureg_dst tmp;
1487 struct ureg_src ctrx;
1488
1489 label = tx_bgnloop(tx);
1490 ctr = tx_get_loopctr(tx, TRUE);
1491 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1492
1493 /* src: num_iterations - start_value of al - step for al - 0 */
1494 ureg_MOV(ureg, ctr, src);
1495 ureg_BGNLOOP(tx->ureg, label);
1496 tmp = tx_scratch_scalar(tx);
1497 /* Initially ctr.x contains the number of iterations.
1498 * ctr.y will contain the updated value of al.
1499 * We decrease ctr.x at the end of every iteration,
1500 * and stop when it reaches 0. */
1501
1502 if (!tx->native_integers) {
1503 /* case src and ctr contain floats */
1504 /* to avoid precision issue, we stop when ctr <= 0.5 */
1505 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1506 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1507 } else {
1508 /* case src and ctr contain integers */
1509 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1510 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1511 }
1512 ureg_BRK(ureg);
1513 tx_endcond(tx);
1514 ureg_ENDIF(ureg);
1515 return D3D_OK;
1516 }
1517
1518 DECL_SPECIAL(RET)
1519 {
1520 ureg_RET(tx->ureg);
1521 return D3D_OK;
1522 }
1523
1524 DECL_SPECIAL(ENDLOOP)
1525 {
1526 struct ureg_program *ureg = tx->ureg;
1527 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1528 struct ureg_dst dst_ctrx, dst_al;
1529 struct ureg_src src_ctr, al_counter;
1530
1531 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1532 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1533 src_ctr = ureg_src(ctr);
1534 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1535
1536 /* ctr.x -= 1
1537 * ctr.y (aL) += step */
1538 if (!tx->native_integers) {
1539 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1540 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1541 } else {
1542 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1543 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1544 }
1545 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1546 return D3D_OK;
1547 }
1548
1549 DECL_SPECIAL(LABEL)
1550 {
1551 unsigned k = tx->num_inst_labels;
1552 unsigned n = tx->insn.src[0].idx;
1553 assert(n < 2048);
1554 if (n >= k)
1555 tx->inst_labels = REALLOC(tx->inst_labels,
1556 k * sizeof(tx->inst_labels[0]),
1557 n * sizeof(tx->inst_labels[0]));
1558
1559 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1560 return D3D_OK;
1561 }
1562
1563 DECL_SPECIAL(SINCOS)
1564 {
1565 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1566 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1567
1568 assert(!(dst.WriteMask & 0xc));
1569
1570 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1571 ureg_SCS(tx->ureg, dst, src);
1572 return D3D_OK;
1573 }
1574
1575 DECL_SPECIAL(SGN)
1576 {
1577 ureg_SSG(tx->ureg,
1578 tx_dst_param(tx, &tx->insn.dst[0]),
1579 tx_src_param(tx, &tx->insn.src[0]));
1580 return D3D_OK;
1581 }
1582
1583 DECL_SPECIAL(REP)
1584 {
1585 struct ureg_program *ureg = tx->ureg;
1586 unsigned *label;
1587 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1588 struct ureg_dst ctr;
1589 struct ureg_dst tmp;
1590 struct ureg_src ctrx;
1591
1592 label = tx_bgnloop(tx);
1593 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1594 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1595
1596 /* NOTE: rep must be constant, so we don't have to save the count */
1597 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1598
1599 /* rep: num_iterations - 0 - 0 - 0 */
1600 ureg_MOV(ureg, ctr, rep);
1601 ureg_BGNLOOP(ureg, label);
1602 tmp = tx_scratch_scalar(tx);
1603 /* Initially ctr.x contains the number of iterations.
1604 * We decrease ctr.x at the end of every iteration,
1605 * and stop when it reaches 0. */
1606
1607 if (!tx->native_integers) {
1608 /* case src and ctr contain floats */
1609 /* to avoid precision issue, we stop when ctr <= 0.5 */
1610 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1611 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1612 } else {
1613 /* case src and ctr contain integers */
1614 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1615 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1616 }
1617 ureg_BRK(ureg);
1618 tx_endcond(tx);
1619 ureg_ENDIF(ureg);
1620
1621 return D3D_OK;
1622 }
1623
1624 DECL_SPECIAL(ENDREP)
1625 {
1626 struct ureg_program *ureg = tx->ureg;
1627 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1628 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1629 struct ureg_src src_ctr = ureg_src(ctr);
1630
1631 /* ctr.x -= 1 */
1632 if (!tx->native_integers)
1633 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1634 else
1635 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1636
1637 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1638 return D3D_OK;
1639 }
1640
1641 DECL_SPECIAL(ENDIF)
1642 {
1643 tx_endcond(tx);
1644 ureg_ENDIF(tx->ureg);
1645 return D3D_OK;
1646 }
1647
1648 DECL_SPECIAL(IF)
1649 {
1650 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1651
1652 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1653 ureg_UIF(tx->ureg, src, tx_cond(tx));
1654 else
1655 ureg_IF(tx->ureg, src, tx_cond(tx));
1656
1657 return D3D_OK;
1658 }
1659
1660 static INLINE unsigned
1661 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1662 {
1663 switch (flags) {
1664 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1665 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1666 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1667 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1668 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1669 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1670 default:
1671 assert(!"invalid comparison flags");
1672 return TGSI_OPCODE_SGT;
1673 }
1674 }
1675
1676 DECL_SPECIAL(IFC)
1677 {
1678 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1679 struct ureg_src src[2];
1680 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1681 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1682 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1683 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1684 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1685 return D3D_OK;
1686 }
1687
1688 DECL_SPECIAL(ELSE)
1689 {
1690 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1691 return D3D_OK;
1692 }
1693
1694 DECL_SPECIAL(BREAKC)
1695 {
1696 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1697 struct ureg_src src[2];
1698 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1699 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1700 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1701 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1702 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1703 ureg_BRK(tx->ureg);
1704 tx_endcond(tx);
1705 ureg_ENDIF(tx->ureg);
1706 return D3D_OK;
1707 }
1708
1709 static const char *sm1_declusage_names[] =
1710 {
1711 [D3DDECLUSAGE_POSITION] = "POSITION",
1712 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1713 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1714 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1715 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1716 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1717 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1718 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1719 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1720 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1721 [D3DDECLUSAGE_COLOR] = "COLOR",
1722 [D3DDECLUSAGE_FOG] = "FOG",
1723 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1724 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1725 };
1726
1727 static INLINE unsigned
1728 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1729 {
1730 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1731 }
1732
1733 static void
1734 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1735 boolean tc,
1736 struct sm1_semantic *dcl)
1737 {
1738 BYTE index = dcl->usage_idx;
1739
1740 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1741 * we match to a TGSI_SEMANTIC_GENERIC with index.
1742 *
1743 * The index can be anything UINT16 and usage_idx is BYTE,
1744 * so we can fit everything. It doesn't matter if indices
1745 * are close together or low.
1746 *
1747 *
1748 * POSITION >= 1: 10 * index + 6
1749 * COLOR >= 2: 10 * (index-1) + 7
1750 * TEXCOORD[0..15]: index
1751 * BLENDWEIGHT: 10 * index + 18
1752 * BLENDINDICES: 10 * index + 19
1753 * NORMAL: 10 * index + 20
1754 * TANGENT: 10 * index + 21
1755 * BINORMAL: 10 * index + 22
1756 * TESSFACTOR: 10 * index + 23
1757 */
1758
1759 switch (dcl->usage) {
1760 case D3DDECLUSAGE_POSITION:
1761 case D3DDECLUSAGE_POSITIONT:
1762 case D3DDECLUSAGE_DEPTH:
1763 if (index == 0) {
1764 sem->Name = TGSI_SEMANTIC_POSITION;
1765 sem->Index = 0;
1766 } else {
1767 sem->Name = TGSI_SEMANTIC_GENERIC;
1768 sem->Index = 10 * index + 6;
1769 }
1770 break;
1771 case D3DDECLUSAGE_COLOR:
1772 if (index < 2) {
1773 sem->Name = TGSI_SEMANTIC_COLOR;
1774 sem->Index = index;
1775 } else {
1776 sem->Name = TGSI_SEMANTIC_GENERIC;
1777 sem->Index = 10 * (index-1) + 7;
1778 }
1779 break;
1780 case D3DDECLUSAGE_FOG:
1781 assert(index == 0);
1782 sem->Name = TGSI_SEMANTIC_FOG;
1783 sem->Index = 0;
1784 break;
1785 case D3DDECLUSAGE_PSIZE:
1786 assert(index == 0);
1787 sem->Name = TGSI_SEMANTIC_PSIZE;
1788 sem->Index = 0;
1789 break;
1790 case D3DDECLUSAGE_TEXCOORD:
1791 assert(index < 16);
1792 if (index < 8 && tc)
1793 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1794 else
1795 sem->Name = TGSI_SEMANTIC_GENERIC;
1796 sem->Index = index;
1797 break;
1798 case D3DDECLUSAGE_BLENDWEIGHT:
1799 sem->Name = TGSI_SEMANTIC_GENERIC;
1800 sem->Index = 10 * index + 18;
1801 break;
1802 case D3DDECLUSAGE_BLENDINDICES:
1803 sem->Name = TGSI_SEMANTIC_GENERIC;
1804 sem->Index = 10 * index + 19;
1805 break;
1806 case D3DDECLUSAGE_NORMAL:
1807 sem->Name = TGSI_SEMANTIC_GENERIC;
1808 sem->Index = 10 * index + 20;
1809 break;
1810 case D3DDECLUSAGE_TANGENT:
1811 sem->Name = TGSI_SEMANTIC_GENERIC;
1812 sem->Index = 10 * index + 21;
1813 break;
1814 case D3DDECLUSAGE_BINORMAL:
1815 sem->Name = TGSI_SEMANTIC_GENERIC;
1816 sem->Index = 10 * index + 22;
1817 break;
1818 case D3DDECLUSAGE_TESSFACTOR:
1819 sem->Name = TGSI_SEMANTIC_GENERIC;
1820 sem->Index = 10 * index + 23;
1821 break;
1822 case D3DDECLUSAGE_SAMPLE:
1823 sem->Name = TGSI_SEMANTIC_COUNT;
1824 sem->Index = 0;
1825 break;
1826 default:
1827 assert(!"Invalid DECLUSAGE.");
1828 break;
1829 }
1830 }
1831
1832 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1833 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1834 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1835 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1836 static INLINE unsigned
1837 d3dstt_to_tgsi_tex(BYTE sampler_type)
1838 {
1839 switch (sampler_type) {
1840 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1841 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1842 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1843 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1844 default:
1845 assert(0);
1846 return TGSI_TEXTURE_UNKNOWN;
1847 }
1848 }
1849 static INLINE unsigned
1850 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1851 {
1852 switch (sampler_type) {
1853 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1854 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1855 case NINED3DSTT_VOLUME:
1856 case NINED3DSTT_CUBE:
1857 default:
1858 assert(0);
1859 return TGSI_TEXTURE_UNKNOWN;
1860 }
1861 }
1862 static INLINE unsigned
1863 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1864 {
1865 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1866 case 1: return TGSI_TEXTURE_1D;
1867 case 0: return TGSI_TEXTURE_2D;
1868 case 3: return TGSI_TEXTURE_3D;
1869 default:
1870 return TGSI_TEXTURE_CUBE;
1871 }
1872 }
1873
1874 static const char *
1875 sm1_sampler_type_name(BYTE sampler_type)
1876 {
1877 switch (sampler_type) {
1878 case NINED3DSTT_1D: return "1D";
1879 case NINED3DSTT_2D: return "2D";
1880 case NINED3DSTT_VOLUME: return "VOLUME";
1881 case NINED3DSTT_CUBE: return "CUBE";
1882 default:
1883 return "(D3DSTT_?)";
1884 }
1885 }
1886
1887 static INLINE unsigned
1888 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1889 {
1890 switch (sem->Name) {
1891 case TGSI_SEMANTIC_POSITION:
1892 case TGSI_SEMANTIC_NORMAL:
1893 return TGSI_INTERPOLATE_LINEAR;
1894 case TGSI_SEMANTIC_BCOLOR:
1895 case TGSI_SEMANTIC_COLOR:
1896 case TGSI_SEMANTIC_FOG:
1897 case TGSI_SEMANTIC_GENERIC:
1898 case TGSI_SEMANTIC_TEXCOORD:
1899 case TGSI_SEMANTIC_CLIPDIST:
1900 case TGSI_SEMANTIC_CLIPVERTEX:
1901 return TGSI_INTERPOLATE_PERSPECTIVE;
1902 case TGSI_SEMANTIC_EDGEFLAG:
1903 case TGSI_SEMANTIC_FACE:
1904 case TGSI_SEMANTIC_INSTANCEID:
1905 case TGSI_SEMANTIC_PCOORD:
1906 case TGSI_SEMANTIC_PRIMID:
1907 case TGSI_SEMANTIC_PSIZE:
1908 case TGSI_SEMANTIC_VERTEXID:
1909 return TGSI_INTERPOLATE_CONSTANT;
1910 default:
1911 assert(0);
1912 return TGSI_INTERPOLATE_CONSTANT;
1913 }
1914 }
1915
1916 DECL_SPECIAL(DCL)
1917 {
1918 struct ureg_program *ureg = tx->ureg;
1919 boolean is_input;
1920 boolean is_sampler;
1921 struct tgsi_declaration_semantic tgsi;
1922 struct sm1_semantic sem;
1923 sm1_read_semantic(tx, &sem);
1924
1925 is_input = sem.reg.file == D3DSPR_INPUT;
1926 is_sampler =
1927 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1928
1929 DUMP("DCL ");
1930 sm1_dump_dst_param(&sem.reg);
1931 if (is_sampler)
1932 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1933 else
1934 if (tx->version.major >= 3)
1935 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1936 else
1937 if (sem.usage | sem.usage_idx)
1938 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1939 else
1940 DUMP("\n");
1941
1942 if (is_sampler) {
1943 const unsigned m = 1 << sem.reg.idx;
1944 ureg_DECL_sampler(ureg, sem.reg.idx);
1945 tx->info->sampler_mask |= m;
1946 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1947 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1948 d3dstt_to_tgsi_tex(sem.sampler_type);
1949 return D3D_OK;
1950 }
1951
1952 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1953 if (IS_VS) {
1954 if (is_input) {
1955 /* linkage outside of shader with vertex declaration */
1956 ureg_DECL_vs_input(ureg, sem.reg.idx);
1957 assert(sem.reg.idx < Elements(tx->info->input_map));
1958 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1959 tx->info->num_inputs = sem.reg.idx + 1;
1960 /* NOTE: preserving order in case of indirect access */
1961 } else
1962 if (tx->version.major >= 3) {
1963 /* SM2 output semantic determined by file */
1964 assert(sem.reg.mask != 0);
1965 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1966 tx->info->position_t = TRUE;
1967 assert(sem.reg.idx < Elements(tx->regs.o));
1968 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1969 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1970
1971 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1972 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1973 }
1974 } else {
1975 if (is_input && tx->version.major >= 3) {
1976 /* SM3 only, SM2 input semantic determined by file */
1977 assert(sem.reg.idx < Elements(tx->regs.v));
1978 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1979 ureg, tgsi.Name, tgsi.Index,
1980 nine_tgsi_to_interp_mode(&tgsi),
1981 0, /* cylwrap */
1982 sem.reg.mod & NINED3DSPDM_CENTROID);
1983 } else
1984 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1985 /* FragColor or FragDepth */
1986 assert(sem.reg.mask != 0);
1987 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1988 }
1989 }
1990 return D3D_OK;
1991 }
1992
1993 DECL_SPECIAL(DEF)
1994 {
1995 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1996 return D3D_OK;
1997 }
1998
1999 DECL_SPECIAL(DEFB)
2000 {
2001 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2002 return D3D_OK;
2003 }
2004
2005 DECL_SPECIAL(DEFI)
2006 {
2007 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2008 return D3D_OK;
2009 }
2010
2011 DECL_SPECIAL(POW)
2012 {
2013 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2014 struct ureg_src src[2] = {
2015 tx_src_param(tx, &tx->insn.src[0]),
2016 tx_src_param(tx, &tx->insn.src[1])
2017 };
2018 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2019 return D3D_OK;
2020 }
2021
2022 DECL_SPECIAL(RSQ)
2023 {
2024 struct ureg_program *ureg = tx->ureg;
2025 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2026 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2027 struct ureg_dst tmp = tx_scratch(tx);
2028 ureg_RSQ(ureg, tmp, ureg_abs(src));
2029 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2030 return D3D_OK;
2031 }
2032
2033 DECL_SPECIAL(LOG)
2034 {
2035 struct ureg_program *ureg = tx->ureg;
2036 struct ureg_dst tmp = tx_scratch_scalar(tx);
2037 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2038 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2039 ureg_LG2(ureg, tmp, ureg_abs(src));
2040 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2041 return D3D_OK;
2042 }
2043
2044 DECL_SPECIAL(NRM)
2045 {
2046 struct ureg_program *ureg = tx->ureg;
2047 struct ureg_dst tmp = tx_scratch_scalar(tx);
2048 struct ureg_src nrm = tx_src_scalar(tmp);
2049 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2050 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2051 ureg_DP3(ureg, tmp, src, src);
2052 ureg_RSQ(ureg, tmp, nrm);
2053 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2054 ureg_MUL(ureg, dst, src, nrm);
2055 return D3D_OK;
2056 }
2057
2058 DECL_SPECIAL(DP2ADD)
2059 {
2060 struct ureg_dst tmp = tx_scratch_scalar(tx);
2061 struct ureg_src dp2 = tx_src_scalar(tmp);
2062 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2063 struct ureg_src src[3];
2064 int i;
2065 for (i = 0; i < 3; ++i)
2066 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2067 assert_replicate_swizzle(&src[2]);
2068
2069 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2070 ureg_ADD(tx->ureg, dst, src[2], dp2);
2071
2072 return D3D_OK;
2073 }
2074
2075 DECL_SPECIAL(TEXCOORD)
2076 {
2077 struct ureg_program *ureg = tx->ureg;
2078 const unsigned s = tx->insn.dst[0].idx;
2079 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2080
2081 tx_texcoord_alloc(tx, s);
2082 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2083 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2084
2085 return D3D_OK;
2086 }
2087
2088 DECL_SPECIAL(TEXCOORD_ps14)
2089 {
2090 struct ureg_program *ureg = tx->ureg;
2091 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2092 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2093
2094 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2095
2096 ureg_MOV(ureg, dst, src);
2097
2098 return D3D_OK;
2099 }
2100
2101 DECL_SPECIAL(TEXKILL)
2102 {
2103 struct ureg_src reg;
2104
2105 if (tx->version.major > 1 || tx->version.minor > 3) {
2106 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2107 } else {
2108 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2109 reg = tx->regs.vT[tx->insn.dst[0].idx];
2110 }
2111 if (tx->version.major < 2)
2112 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2113 ureg_KILL_IF(tx->ureg, reg);
2114
2115 return D3D_OK;
2116 }
2117
2118 DECL_SPECIAL(TEXBEM)
2119 {
2120 STUB(D3DERR_INVALIDCALL);
2121 }
2122
2123 DECL_SPECIAL(TEXBEML)
2124 {
2125 STUB(D3DERR_INVALIDCALL);
2126 }
2127
2128 DECL_SPECIAL(TEXREG2AR)
2129 {
2130 STUB(D3DERR_INVALIDCALL);
2131 }
2132
2133 DECL_SPECIAL(TEXREG2GB)
2134 {
2135 STUB(D3DERR_INVALIDCALL);
2136 }
2137
2138 DECL_SPECIAL(TEXM3x2PAD)
2139 {
2140 return D3D_OK; /* this is just padding */
2141 }
2142
2143 DECL_SPECIAL(TEXM3x2TEX)
2144 {
2145 STUB(D3DERR_INVALIDCALL);
2146 }
2147
2148 DECL_SPECIAL(TEXM3x3PAD)
2149 {
2150 return D3D_OK; /* this is just padding */
2151 }
2152
2153 DECL_SPECIAL(TEXM3x3SPEC)
2154 {
2155 STUB(D3DERR_INVALIDCALL);
2156 }
2157
2158 DECL_SPECIAL(TEXM3x3VSPEC)
2159 {
2160 STUB(D3DERR_INVALIDCALL);
2161 }
2162
2163 DECL_SPECIAL(TEXREG2RGB)
2164 {
2165 STUB(D3DERR_INVALIDCALL);
2166 }
2167
2168 DECL_SPECIAL(TEXDP3TEX)
2169 {
2170 STUB(D3DERR_INVALIDCALL);
2171 }
2172
2173 DECL_SPECIAL(TEXM3x2DEPTH)
2174 {
2175 STUB(D3DERR_INVALIDCALL);
2176 }
2177
2178 DECL_SPECIAL(TEXDP3)
2179 {
2180 STUB(D3DERR_INVALIDCALL);
2181 }
2182
2183 DECL_SPECIAL(TEXM3x3)
2184 {
2185 struct ureg_program *ureg = tx->ureg;
2186 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2187 struct ureg_src src[4];
2188 int s;
2189 const int m = tx->insn.dst[0].idx - 2;
2190 const int n = tx->insn.src[0].idx;
2191 assert(m >= 0 && m > n);
2192
2193 for (s = m; s <= (m + 2); ++s) {
2194 tx_texcoord_alloc(tx, s);
2195 src[s] = tx->regs.vT[s];
2196 }
2197 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), src[0], ureg_src(tx->regs.tS[n]));
2198 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), src[1], ureg_src(tx->regs.tS[n]));
2199 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), src[2], ureg_src(tx->regs.tS[n]));
2200
2201 switch (tx->insn.opcode) {
2202 case D3DSIO_TEXM3x3:
2203 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2204 break;
2205 case D3DSIO_TEXM3x3TEX:
2206 src[3] = ureg_DECL_sampler(ureg, m + 2);
2207 tx->info->sampler_mask |= 1 << (m + 2);
2208 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), src[3]);
2209 break;
2210 default:
2211 return D3DERR_INVALIDCALL;
2212 }
2213 return D3D_OK;
2214 }
2215
2216 DECL_SPECIAL(TEXDEPTH)
2217 {
2218 STUB(D3DERR_INVALIDCALL);
2219 }
2220
2221 DECL_SPECIAL(BEM)
2222 {
2223 STUB(D3DERR_INVALIDCALL);
2224 }
2225
2226 DECL_SPECIAL(TEXLD)
2227 {
2228 struct ureg_program *ureg = tx->ureg;
2229 unsigned target;
2230 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2231 struct ureg_src src[2] = {
2232 tx_src_param(tx, &tx->insn.src[0]),
2233 tx_src_param(tx, &tx->insn.src[1])
2234 };
2235 assert(tx->insn.src[1].idx >= 0 &&
2236 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2237 target = tx->sampler_targets[tx->insn.src[1].idx];
2238
2239 switch (tx->insn.flags) {
2240 case 0:
2241 ureg_TEX(ureg, dst, target, src[0], src[1]);
2242 break;
2243 case NINED3DSI_TEXLD_PROJECT:
2244 ureg_TXP(ureg, dst, target, src[0], src[1]);
2245 break;
2246 case NINED3DSI_TEXLD_BIAS:
2247 ureg_TXB(ureg, dst, target, src[0], src[1]);
2248 break;
2249 default:
2250 assert(0);
2251 return D3DERR_INVALIDCALL;
2252 }
2253 return D3D_OK;
2254 }
2255
2256 DECL_SPECIAL(TEXLD_14)
2257 {
2258 struct ureg_program *ureg = tx->ureg;
2259 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2260 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2261 const unsigned s = tx->insn.dst[0].idx;
2262 const unsigned t = ps1x_sampler_type(tx->info, s);
2263
2264 tx->info->sampler_mask |= 1 << s;
2265 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2266
2267 return D3D_OK;
2268 }
2269
2270 DECL_SPECIAL(TEX)
2271 {
2272 struct ureg_program *ureg = tx->ureg;
2273 const unsigned s = tx->insn.dst[0].idx;
2274 const unsigned t = ps1x_sampler_type(tx->info, s);
2275 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2276 struct ureg_src src[2];
2277
2278 tx_texcoord_alloc(tx, s);
2279
2280 src[0] = tx->regs.vT[s];
2281 src[1] = ureg_DECL_sampler(ureg, s);
2282 tx->info->sampler_mask |= 1 << s;
2283
2284 ureg_TEX(ureg, dst, t, src[0], src[1]);
2285
2286 return D3D_OK;
2287 }
2288
2289 DECL_SPECIAL(TEXLDD)
2290 {
2291 unsigned target;
2292 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2293 struct ureg_src src[4] = {
2294 tx_src_param(tx, &tx->insn.src[0]),
2295 tx_src_param(tx, &tx->insn.src[1]),
2296 tx_src_param(tx, &tx->insn.src[2]),
2297 tx_src_param(tx, &tx->insn.src[3])
2298 };
2299 assert(tx->insn.src[3].idx >= 0 &&
2300 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2301 target = tx->sampler_targets[tx->insn.src[1].idx];
2302
2303 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2304 return D3D_OK;
2305 }
2306
2307 DECL_SPECIAL(TEXLDL)
2308 {
2309 unsigned target;
2310 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2311 struct ureg_src src[2] = {
2312 tx_src_param(tx, &tx->insn.src[0]),
2313 tx_src_param(tx, &tx->insn.src[1])
2314 };
2315 assert(tx->insn.src[3].idx >= 0 &&
2316 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2317 target = tx->sampler_targets[tx->insn.src[1].idx];
2318
2319 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2320 return D3D_OK;
2321 }
2322
2323 DECL_SPECIAL(SETP)
2324 {
2325 STUB(D3DERR_INVALIDCALL);
2326 }
2327
2328 DECL_SPECIAL(BREAKP)
2329 {
2330 STUB(D3DERR_INVALIDCALL);
2331 }
2332
2333 DECL_SPECIAL(PHASE)
2334 {
2335 return D3D_OK; /* we don't care about phase */
2336 }
2337
2338 DECL_SPECIAL(COMMENT)
2339 {
2340 return D3D_OK; /* nothing to do */
2341 }
2342
2343
2344 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2345 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2346
2347 struct sm1_op_info inst_table[] =
2348 {
2349 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2350 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2351 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2352 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2353 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2354 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2355 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2356 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2357 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2358 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2359 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2360 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2361 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2362 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2363 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2364 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2365 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2366 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2367 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2368 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2369 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2370
2371 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2372 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2373 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2374 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2375 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2376
2377 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2378 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2379 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2380 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2381 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2382 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2383
2384 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2385
2386 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2387 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2388 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2389 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2390 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2391
2392 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2393 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2394
2395 /* More flow control */
2396 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2397 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2398 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2399 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2400 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2401 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2402 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2403 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2404 /* we don't write to the address register, but a normal register (copied
2405 * when needed to the address register), thus we don't use ARR */
2406 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2407
2408 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2409 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2410
2411 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2412 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2413 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2414 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2415 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2416 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2417 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2418 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
2419 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2420 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2421 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2422 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2423 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2424 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2425 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2426 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3VSPEC)),
2427
2428 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2429 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2430 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2431 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2432
2433 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2434
2435 /* More tex stuff */
2436 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2437 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2438 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2439 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2440 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2441 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2442
2443 /* Misc */
2444 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2445 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2446 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2447 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2448 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2449 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2450 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2451 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2452 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2453 };
2454
2455 struct sm1_op_info inst_phase =
2456 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2457
2458 struct sm1_op_info inst_comment =
2459 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2460
2461 static void
2462 create_op_info_map(struct shader_translator *tx)
2463 {
2464 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2465 unsigned i;
2466
2467 for (i = 0; i < Elements(tx->op_info_map); ++i)
2468 tx->op_info_map[i] = -1;
2469
2470 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2471 for (i = 0; i < Elements(inst_table); ++i) {
2472 assert(inst_table[i].sio < Elements(tx->op_info_map));
2473 if (inst_table[i].vert_version.min <= version &&
2474 inst_table[i].vert_version.max >= version)
2475 tx->op_info_map[inst_table[i].sio] = i;
2476 }
2477 } else {
2478 for (i = 0; i < Elements(inst_table); ++i) {
2479 assert(inst_table[i].sio < Elements(tx->op_info_map));
2480 if (inst_table[i].frag_version.min <= version &&
2481 inst_table[i].frag_version.max >= version)
2482 tx->op_info_map[inst_table[i].sio] = i;
2483 }
2484 }
2485 }
2486
2487 static INLINE HRESULT
2488 NineTranslateInstruction_Generic(struct shader_translator *tx)
2489 {
2490 struct ureg_dst dst[1];
2491 struct ureg_src src[4];
2492 unsigned i;
2493
2494 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2495 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2496 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2497 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2498
2499 ureg_insn(tx->ureg, tx->insn.info->opcode,
2500 dst, tx->insn.ndst,
2501 src, tx->insn.nsrc);
2502 return D3D_OK;
2503 }
2504
2505 static INLINE DWORD
2506 TOKEN_PEEK(struct shader_translator *tx)
2507 {
2508 return *(tx->parse);
2509 }
2510
2511 static INLINE DWORD
2512 TOKEN_NEXT(struct shader_translator *tx)
2513 {
2514 return *(tx->parse)++;
2515 }
2516
2517 static INLINE void
2518 TOKEN_JUMP(struct shader_translator *tx)
2519 {
2520 if (tx->parse_next && tx->parse != tx->parse_next) {
2521 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2522 tx->parse = tx->parse_next;
2523 }
2524 }
2525
2526 static INLINE boolean
2527 sm1_parse_eof(struct shader_translator *tx)
2528 {
2529 return TOKEN_PEEK(tx) == NINED3DSP_END;
2530 }
2531
2532 static void
2533 sm1_read_version(struct shader_translator *tx)
2534 {
2535 const DWORD tok = TOKEN_NEXT(tx);
2536
2537 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2538 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2539
2540 switch (tok >> 16) {
2541 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2542 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2543 default:
2544 DBG("Invalid shader type: %x\n", tok);
2545 tx->processor = ~0;
2546 break;
2547 }
2548 }
2549
2550 /* This is just to check if we parsed the instruction properly. */
2551 static void
2552 sm1_parse_get_skip(struct shader_translator *tx)
2553 {
2554 const DWORD tok = TOKEN_PEEK(tx);
2555
2556 if (tx->version.major >= 2) {
2557 tx->parse_next = tx->parse + 1 /* this */ +
2558 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2559 } else {
2560 tx->parse_next = NULL; /* TODO: determine from param count */
2561 }
2562 }
2563
2564 static void
2565 sm1_print_comment(const char *comment, UINT size)
2566 {
2567 if (!size)
2568 return;
2569 /* TODO */
2570 }
2571
2572 static void
2573 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2574 {
2575 DWORD tok = TOKEN_PEEK(tx);
2576
2577 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2578 {
2579 const char *comment = "";
2580 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2581 tx->parse += size + 1;
2582
2583 if (print)
2584 sm1_print_comment(comment, size);
2585
2586 tok = TOKEN_PEEK(tx);
2587 }
2588 }
2589
2590 static void
2591 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2592 {
2593 *reg = TOKEN_NEXT(tx);
2594
2595 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2596 {
2597 if (tx->version.major < 2)
2598 *rel = (1 << 31) |
2599 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2600 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2601 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2602 else
2603 *rel = TOKEN_NEXT(tx);
2604 }
2605 }
2606
2607 static void
2608 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2609 {
2610 uint8_t shift;
2611 dst->file =
2612 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2613 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2614 dst->type = TGSI_RETURN_TYPE_FLOAT;
2615 dst->idx = tok & D3DSP_REGNUM_MASK;
2616 dst->rel = NULL;
2617 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2618 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2619 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2620 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2621 }
2622
2623 static void
2624 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2625 {
2626 src->file =
2627 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2628 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2629 src->type = TGSI_RETURN_TYPE_FLOAT;
2630 src->idx = tok & D3DSP_REGNUM_MASK;
2631 src->rel = NULL;
2632 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2633 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2634
2635 switch (src->file) {
2636 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2637 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2638 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2639 default:
2640 break;
2641 }
2642 }
2643
2644 static void
2645 sm1_parse_immediate(struct shader_translator *tx,
2646 struct sm1_src_param *imm)
2647 {
2648 imm->file = NINED3DSPR_IMMEDIATE;
2649 imm->idx = INT_MIN;
2650 imm->rel = NULL;
2651 imm->swizzle = NINED3DSP_NOSWIZZLE;
2652 imm->mod = 0;
2653 switch (tx->insn.opcode) {
2654 case D3DSIO_DEF:
2655 imm->type = NINED3DSPTYPE_FLOAT4;
2656 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2657 tx->parse += 4;
2658 break;
2659 case D3DSIO_DEFI:
2660 imm->type = NINED3DSPTYPE_INT4;
2661 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2662 tx->parse += 4;
2663 break;
2664 case D3DSIO_DEFB:
2665 imm->type = NINED3DSPTYPE_BOOL;
2666 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2667 tx->parse += 1;
2668 break;
2669 default:
2670 assert(0);
2671 break;
2672 }
2673 }
2674
2675 static void
2676 sm1_read_dst_param(struct shader_translator *tx,
2677 struct sm1_dst_param *dst,
2678 struct sm1_src_param *rel)
2679 {
2680 DWORD tok_dst, tok_rel = 0;
2681
2682 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2683 sm1_parse_dst_param(dst, tok_dst);
2684 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2685 sm1_parse_src_param(rel, tok_rel);
2686 dst->rel = rel;
2687 }
2688 }
2689
2690 static void
2691 sm1_read_src_param(struct shader_translator *tx,
2692 struct sm1_src_param *src,
2693 struct sm1_src_param *rel)
2694 {
2695 DWORD tok_src, tok_rel = 0;
2696
2697 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2698 sm1_parse_src_param(src, tok_src);
2699 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2700 assert(rel);
2701 sm1_parse_src_param(rel, tok_rel);
2702 src->rel = rel;
2703 }
2704 }
2705
2706 static void
2707 sm1_read_semantic(struct shader_translator *tx,
2708 struct sm1_semantic *sem)
2709 {
2710 const DWORD tok_usg = TOKEN_NEXT(tx);
2711 const DWORD tok_dst = TOKEN_NEXT(tx);
2712
2713 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2714 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2715 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2716
2717 sm1_parse_dst_param(&sem->reg, tok_dst);
2718 }
2719
2720 static void
2721 sm1_parse_instruction(struct shader_translator *tx)
2722 {
2723 struct sm1_instruction *insn = &tx->insn;
2724 DWORD tok;
2725 struct sm1_op_info *info = NULL;
2726 unsigned i;
2727
2728 sm1_parse_comments(tx, TRUE);
2729 sm1_parse_get_skip(tx);
2730
2731 tok = TOKEN_NEXT(tx);
2732
2733 insn->opcode = tok & D3DSI_OPCODE_MASK;
2734 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2735 insn->coissue = !!(tok & D3DSI_COISSUE);
2736 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2737
2738 if (insn->opcode < Elements(tx->op_info_map)) {
2739 int k = tx->op_info_map[insn->opcode];
2740 if (k >= 0) {
2741 assert(k < Elements(inst_table));
2742 info = &inst_table[k];
2743 }
2744 } else {
2745 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2746 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2747 }
2748 if (!info) {
2749 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2750 TOKEN_JUMP(tx);
2751 return;
2752 }
2753 insn->info = info;
2754 insn->ndst = info->ndst;
2755 insn->nsrc = info->nsrc;
2756
2757 assert(!insn->predicated && "TODO: predicated instructions");
2758
2759 /* check version */
2760 {
2761 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2762 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2763 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2764 if (ver < min || ver > max) {
2765 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2766 min, ver, max);
2767 return;
2768 }
2769 }
2770
2771 for (i = 0; i < insn->ndst; ++i)
2772 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2773 if (insn->predicated)
2774 sm1_read_src_param(tx, &insn->pred, NULL);
2775 for (i = 0; i < insn->nsrc; ++i)
2776 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2777
2778 /* parse here so we can dump them before processing */
2779 if (insn->opcode == D3DSIO_DEF ||
2780 insn->opcode == D3DSIO_DEFI ||
2781 insn->opcode == D3DSIO_DEFB)
2782 sm1_parse_immediate(tx, &tx->insn.src[0]);
2783
2784 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2785 sm1_instruction_check(insn);
2786
2787 if (info->handler)
2788 info->handler(tx);
2789 else
2790 NineTranslateInstruction_Generic(tx);
2791 tx_apply_dst0_modifiers(tx);
2792
2793 tx->num_scratch = 0; /* reset */
2794
2795 TOKEN_JUMP(tx);
2796 }
2797
2798 static void
2799 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2800 {
2801 unsigned i;
2802
2803 tx->info = info;
2804
2805 tx->byte_code = info->byte_code;
2806 tx->parse = info->byte_code;
2807
2808 for (i = 0; i < Elements(info->input_map); ++i)
2809 info->input_map[i] = NINE_DECLUSAGE_NONE;
2810 info->num_inputs = 0;
2811
2812 info->position_t = FALSE;
2813 info->point_size = FALSE;
2814
2815 tx->info->const_used_size = 0;
2816
2817 info->sampler_mask = 0x0;
2818 info->rt_mask = 0x0;
2819
2820 info->lconstf.data = NULL;
2821 info->lconstf.ranges = NULL;
2822
2823 for (i = 0; i < Elements(tx->regs.rL); ++i) {
2824 tx->regs.rL[i] = ureg_dst_undef();
2825 }
2826 tx->regs.address = ureg_dst_undef();
2827 tx->regs.a0 = ureg_dst_undef();
2828 tx->regs.p = ureg_dst_undef();
2829 tx->regs.oDepth = ureg_dst_undef();
2830 tx->regs.vPos = ureg_src_undef();
2831 tx->regs.vFace = ureg_src_undef();
2832 for (i = 0; i < Elements(tx->regs.o); ++i)
2833 tx->regs.o[i] = ureg_dst_undef();
2834 for (i = 0; i < Elements(tx->regs.oCol); ++i)
2835 tx->regs.oCol[i] = ureg_dst_undef();
2836 for (i = 0; i < Elements(tx->regs.vC); ++i)
2837 tx->regs.vC[i] = ureg_src_undef();
2838 for (i = 0; i < Elements(tx->regs.vT); ++i)
2839 tx->regs.vT[i] = ureg_src_undef();
2840
2841 for (i = 0; i < Elements(tx->lconsti); ++i)
2842 tx->lconsti[i].idx = -1;
2843 for (i = 0; i < Elements(tx->lconstb); ++i)
2844 tx->lconstb[i].idx = -1;
2845
2846 sm1_read_version(tx);
2847
2848 info->version = (tx->version.major << 4) | tx->version.minor;
2849
2850 create_op_info_map(tx);
2851 }
2852
2853 static void
2854 tx_dtor(struct shader_translator *tx)
2855 {
2856 if (tx->num_inst_labels)
2857 FREE(tx->inst_labels);
2858 FREE(tx->lconstf);
2859 FREE(tx->regs.r);
2860 FREE(tx);
2861 }
2862
2863 static INLINE unsigned
2864 tgsi_processor_from_type(unsigned shader_type)
2865 {
2866 switch (shader_type) {
2867 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
2868 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
2869 default:
2870 return ~0;
2871 }
2872 }
2873
2874 #define GET_CAP(n) device->screen->get_param( \
2875 device->screen, PIPE_CAP_##n)
2876 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
2877 device->screen, info->type, PIPE_SHADER_CAP_##n)
2878
2879 HRESULT
2880 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
2881 {
2882 struct shader_translator *tx;
2883 HRESULT hr = D3D_OK;
2884 const unsigned processor = tgsi_processor_from_type(info->type);
2885
2886 user_assert(processor != ~0, D3DERR_INVALIDCALL);
2887
2888 tx = CALLOC_STRUCT(shader_translator);
2889 if (!tx)
2890 return E_OUTOFMEMORY;
2891 tx_ctor(tx, info);
2892
2893 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
2894 hr = D3DERR_INVALIDCALL;
2895 DBG("Unsupported shader version: %u.%u !\n",
2896 tx->version.major, tx->version.minor);
2897 goto out;
2898 }
2899 if (tx->processor != processor) {
2900 hr = D3DERR_INVALIDCALL;
2901 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
2902 goto out;
2903 }
2904 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
2905 tx->version.major, tx->version.minor);
2906
2907 tx->ureg = ureg_create(processor);
2908 if (!tx->ureg) {
2909 hr = E_OUTOFMEMORY;
2910 goto out;
2911 }
2912 tx_decl_constants(tx);
2913
2914 tx->native_integers = GET_SHADER_CAP(INTEGERS);
2915 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
2916 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
2917 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
2918 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2919 tx->texcoord_sn = tx->want_texcoord ?
2920 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
2921
2922 /* VS must always write position. Declare it here to make it the 1st output.
2923 * (Some drivers like nv50 are buggy and rely on that.)
2924 */
2925 if (IS_VS) {
2926 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
2927 } else {
2928 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
2929 if (!tx->shift_wpos)
2930 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
2931 }
2932
2933 while (!sm1_parse_eof(tx))
2934 sm1_parse_instruction(tx);
2935 tx->parse++; /* for byte_size */
2936
2937 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
2938 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
2939 ureg_src(tx->regs.r[0]));
2940 info->rt_mask |= 0x1;
2941 }
2942
2943 if (info->position_t)
2944 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
2945
2946 ureg_END(tx->ureg);
2947
2948 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
2949 info->point_size = TRUE;
2950
2951 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
2952 unsigned count;
2953 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
2954 tgsi_dump(toks, 0);
2955 ureg_free_tokens(toks);
2956 }
2957
2958 /* record local constants */
2959 if (tx->num_lconstf && tx->indirect_const_access) {
2960 struct nine_range *ranges;
2961 float *data;
2962 int *indices;
2963 unsigned i, k, n;
2964
2965 hr = E_OUTOFMEMORY;
2966
2967 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
2968 if (!data)
2969 goto out;
2970 info->lconstf.data = data;
2971
2972 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
2973 if (!indices)
2974 goto out;
2975
2976 /* lazy sort, num_lconstf should be small */
2977 for (n = 0; n < tx->num_lconstf; ++n) {
2978 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
2979 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
2980 k = i;
2981 }
2982 indices[n] = tx->lconstf[k].idx;
2983 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
2984 tx->lconstf[k].idx = INT_MAX;
2985 }
2986
2987 /* count ranges */
2988 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
2989 if (indices[i] != indices[i - 1] + 1)
2990 ++n;
2991 ranges = MALLOC(n * sizeof(ranges[0]));
2992 if (!ranges) {
2993 FREE(indices);
2994 goto out;
2995 }
2996 info->lconstf.ranges = ranges;
2997
2998 k = 0;
2999 ranges[k].bgn = indices[0];
3000 for (i = 1; i < tx->num_lconstf; ++i) {
3001 if (indices[i] != indices[i - 1] + 1) {
3002 ranges[k].next = &ranges[k + 1];
3003 ranges[k].end = indices[i - 1] + 1;
3004 ++k;
3005 ranges[k].bgn = indices[i];
3006 }
3007 }
3008 ranges[k].end = indices[i - 1] + 1;
3009 ranges[k].next = NULL;
3010 assert(n == (k + 1));
3011
3012 FREE(indices);
3013 hr = D3D_OK;
3014 }
3015
3016 if (tx->indirect_const_access)
3017 info->const_used_size = ~0;
3018
3019 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3020 if (!info->cso) {
3021 hr = D3DERR_DRIVERINTERNALERROR;
3022 FREE(info->lconstf.data);
3023 FREE(info->lconstf.ranges);
3024 goto out;
3025 }
3026
3027 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3028 out:
3029 tx_dtor(tx);
3030 return hr;
3031 }