st/nine: PositionT and Tessfactor are forbidden as PS input
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
100
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
104
105 /*
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
112 * X2 = PS 1.4 (2x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
119 */
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
134
135 static const char *sm1_mod_str[] =
136 {
137 [NINED3DSPSM_NONE] = "",
138 [NINED3DSPSM_NEG] = "-",
139 [NINED3DSPSM_BIAS] = "bias",
140 [NINED3DSPSM_BIASNEG] = "biasneg",
141 [NINED3DSPSM_SIGN] = "sign",
142 [NINED3DSPSM_SIGNNEG] = "signneg",
143 [NINED3DSPSM_COMP] = "comp",
144 [NINED3DSPSM_X2] = "x2",
145 [NINED3DSPSM_X2NEG] = "x2neg",
146 [NINED3DSPSM_DZ] = "dz",
147 [NINED3DSPSM_DW] = "dw",
148 [NINED3DSPSM_ABS] = "abs",
149 [NINED3DSPSM_ABSNEG] = "-abs",
150 [NINED3DSPSM_NOT] = "not"
151 };
152
153 static void
154 sm1_dump_writemask(BYTE mask)
155 {
156 if (mask & 1) DUMP("x"); else DUMP("_");
157 if (mask & 2) DUMP("y"); else DUMP("_");
158 if (mask & 4) DUMP("z"); else DUMP("_");
159 if (mask & 8) DUMP("w"); else DUMP("_");
160 }
161
162 static void
163 sm1_dump_swizzle(BYTE s)
164 {
165 char c[4] = { 'x', 'y', 'z', 'w' };
166 DUMP("%c%c%c%c",
167 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
168 }
169
170 static const char sm1_file_char[] =
171 {
172 [D3DSPR_TEMP] = 'r',
173 [D3DSPR_INPUT] = 'v',
174 [D3DSPR_CONST] = 'c',
175 [D3DSPR_ADDR] = 'A',
176 [D3DSPR_RASTOUT] = 'R',
177 [D3DSPR_ATTROUT] = 'D',
178 [D3DSPR_OUTPUT] = 'o',
179 [D3DSPR_CONSTINT] = 'I',
180 [D3DSPR_COLOROUT] = 'C',
181 [D3DSPR_DEPTHOUT] = 'D',
182 [D3DSPR_SAMPLER] = 's',
183 [D3DSPR_CONST2] = 'c',
184 [D3DSPR_CONST3] = 'c',
185 [D3DSPR_CONST4] = 'c',
186 [D3DSPR_CONSTBOOL] = 'B',
187 [D3DSPR_LOOP] = 'L',
188 [D3DSPR_TEMPFLOAT16] = 'h',
189 [D3DSPR_MISCTYPE] = 'M',
190 [D3DSPR_LABEL] = 'X',
191 [D3DSPR_PREDICATE] = 'p'
192 };
193
194 static void
195 sm1_dump_reg(BYTE file, INT index)
196 {
197 switch (file) {
198 case D3DSPR_LOOP:
199 DUMP("aL");
200 break;
201 case D3DSPR_COLOROUT:
202 DUMP("oC%i", index);
203 break;
204 case D3DSPR_DEPTHOUT:
205 DUMP("oDepth");
206 break;
207 case D3DSPR_RASTOUT:
208 DUMP("oRast%i", index);
209 break;
210 case D3DSPR_CONSTINT:
211 DUMP("iconst[%i]", index);
212 break;
213 case D3DSPR_CONSTBOOL:
214 DUMP("bconst[%i]", index);
215 break;
216 default:
217 DUMP("%c%i", sm1_file_char[file], index);
218 break;
219 }
220 }
221
222 struct sm1_src_param
223 {
224 INT idx;
225 struct sm1_src_param *rel;
226 BYTE file;
227 BYTE swizzle;
228 BYTE mod;
229 BYTE type;
230 union {
231 DWORD d[4];
232 float f[4];
233 int i[4];
234 BOOL b;
235 } imm;
236 };
237 static void
238 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
240 struct sm1_dst_param
241 {
242 INT idx;
243 struct sm1_src_param *rel;
244 BYTE file;
245 BYTE mask;
246 BYTE mod;
247 int8_t shift; /* sint4 */
248 BYTE type;
249 };
250
251 static inline void
252 assert_replicate_swizzle(const struct ureg_src *reg)
253 {
254 assert(reg->SwizzleY == reg->SwizzleX &&
255 reg->SwizzleZ == reg->SwizzleX &&
256 reg->SwizzleW == reg->SwizzleX);
257 }
258
259 static void
260 sm1_dump_immediate(const struct sm1_src_param *param)
261 {
262 switch (param->type) {
263 case NINED3DSPTYPE_FLOAT4:
264 DUMP("{ %f %f %f %f }",
265 param->imm.f[0], param->imm.f[1],
266 param->imm.f[2], param->imm.f[3]);
267 break;
268 case NINED3DSPTYPE_INT4:
269 DUMP("{ %i %i %i %i }",
270 param->imm.i[0], param->imm.i[1],
271 param->imm.i[2], param->imm.i[3]);
272 break;
273 case NINED3DSPTYPE_BOOL:
274 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
275 break;
276 default:
277 assert(0);
278 break;
279 }
280 }
281
282 static void
283 sm1_dump_src_param(const struct sm1_src_param *param)
284 {
285 if (param->file == NINED3DSPR_IMMEDIATE) {
286 assert(!param->mod &&
287 !param->rel &&
288 param->swizzle == NINED3DSP_NOSWIZZLE);
289 sm1_dump_immediate(param);
290 return;
291 }
292
293 if (param->mod)
294 DUMP("%s(", sm1_mod_str[param->mod]);
295 if (param->rel) {
296 DUMP("%c[", sm1_file_char[param->file]);
297 sm1_dump_src_param(param->rel);
298 DUMP("+%i]", param->idx);
299 } else {
300 sm1_dump_reg(param->file, param->idx);
301 }
302 if (param->mod)
303 DUMP(")");
304 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
305 DUMP(".");
306 sm1_dump_swizzle(param->swizzle);
307 }
308 }
309
310 static void
311 sm1_dump_dst_param(const struct sm1_dst_param *param)
312 {
313 if (param->mod & NINED3DSPDM_SATURATE)
314 DUMP("sat ");
315 if (param->mod & NINED3DSPDM_PARTIALP)
316 DUMP("pp ");
317 if (param->mod & NINED3DSPDM_CENTROID)
318 DUMP("centroid ");
319 if (param->shift < 0)
320 DUMP("/%u ", 1 << -param->shift);
321 if (param->shift > 0)
322 DUMP("*%u ", 1 << param->shift);
323
324 if (param->rel) {
325 DUMP("%c[", sm1_file_char[param->file]);
326 sm1_dump_src_param(param->rel);
327 DUMP("+%i]", param->idx);
328 } else {
329 sm1_dump_reg(param->file, param->idx);
330 }
331 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
332 DUMP(".");
333 sm1_dump_writemask(param->mask);
334 }
335 }
336
337 struct sm1_semantic
338 {
339 struct sm1_dst_param reg;
340 BYTE sampler_type;
341 D3DDECLUSAGE usage;
342 BYTE usage_idx;
343 };
344
345 struct sm1_op_info
346 {
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
349 unsigned sio;
350 unsigned opcode; /* TGSI_OPCODE_x */
351
352 /* versions are still set even handler is set */
353 struct {
354 unsigned min;
355 unsigned max;
356 } vert_version, frag_version;
357
358 /* number of regs parsed outside of special handler */
359 unsigned ndst;
360 unsigned nsrc;
361
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler;
364 };
365
366 struct sm1_instruction
367 {
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
369 BYTE flags;
370 BOOL coissue;
371 BOOL predicated;
372 BYTE ndst;
373 BYTE nsrc;
374 struct sm1_src_param src[4];
375 struct sm1_src_param src_rel[4];
376 struct sm1_src_param pred;
377 struct sm1_src_param dst_rel[1];
378 struct sm1_dst_param dst[1];
379
380 struct sm1_op_info *info;
381 };
382
383 static void
384 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
385 {
386 unsigned i;
387
388 /* no info stored for these: */
389 if (insn->opcode == D3DSIO_DCL)
390 return;
391 for (i = 0; i < indent; ++i)
392 DUMP(" ");
393
394 if (insn->predicated) {
395 DUMP("@");
396 sm1_dump_src_param(&insn->pred);
397 DUMP(" ");
398 }
399 DUMP("%s", d3dsio_to_string(insn->opcode));
400 if (insn->flags) {
401 switch (insn->opcode) {
402 case D3DSIO_TEX:
403 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
404 break;
405 default:
406 DUMP("_%x", insn->flags);
407 break;
408 }
409 }
410 if (insn->coissue)
411 DUMP("_co");
412 DUMP(" ");
413
414 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
415 sm1_dump_dst_param(&insn->dst[i]);
416 DUMP(" ");
417 }
418
419 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
420 sm1_dump_src_param(&insn->src[i]);
421 DUMP(" ");
422 }
423 if (insn->opcode == D3DSIO_DEF ||
424 insn->opcode == D3DSIO_DEFI ||
425 insn->opcode == D3DSIO_DEFB)
426 sm1_dump_immediate(&insn->src[0]);
427
428 DUMP("\n");
429 }
430
431 struct sm1_local_const
432 {
433 INT idx;
434 struct ureg_src reg;
435 union {
436 boolean b;
437 float f[4];
438 int32_t i[4];
439 } imm;
440 };
441
442 struct shader_translator
443 {
444 const DWORD *byte_code;
445 const DWORD *parse;
446 const DWORD *parse_next;
447
448 struct ureg_program *ureg;
449
450 /* shader version */
451 struct {
452 BYTE major;
453 BYTE minor;
454 } version;
455 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
456 unsigned num_constf_allowed;
457 unsigned num_consti_allowed;
458 unsigned num_constb_allowed;
459
460 boolean native_integers;
461 boolean inline_subroutines;
462 boolean lower_preds;
463 boolean want_texcoord;
464 boolean shift_wpos;
465 boolean wpos_is_sysval;
466 boolean face_is_sysval_integer;
467 unsigned texcoord_sn;
468
469 struct sm1_instruction insn; /* current instruction */
470
471 struct {
472 struct ureg_dst *r;
473 struct ureg_dst oPos;
474 struct ureg_dst oFog;
475 struct ureg_dst oPts;
476 struct ureg_dst oCol[4];
477 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
478 struct ureg_dst oDepth;
479 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
480 struct ureg_src vPos;
481 struct ureg_src vFace;
482 struct ureg_src s;
483 struct ureg_dst p;
484 struct ureg_dst address;
485 struct ureg_dst a0;
486 struct ureg_dst tS[8]; /* texture stage registers */
487 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
488 struct ureg_dst t[5]; /* scratch TEMPs */
489 struct ureg_src vC[2]; /* PS color in */
490 struct ureg_src vT[8]; /* PS texcoord in */
491 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
492 } regs;
493 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
494 unsigned num_scratch;
495 unsigned loop_depth;
496 unsigned loop_depth_max;
497 unsigned cond_depth;
498 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
499 unsigned cond_labels[NINE_MAX_COND_DEPTH];
500 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
501
502 unsigned *inst_labels; /* LABEL op */
503 unsigned num_inst_labels;
504
505 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
506
507 struct sm1_local_const *lconstf;
508 unsigned num_lconstf;
509 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
510 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
511
512 boolean indirect_const_access;
513 boolean failure;
514
515 struct nine_shader_info *info;
516
517 int16_t op_info_map[D3DSIO_BREAKP + 1];
518 };
519
520 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
521 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
522
523 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
524
525 static void
526 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
527
528 static void
529 sm1_instruction_check(const struct sm1_instruction *insn)
530 {
531 if (insn->opcode == D3DSIO_CRS)
532 {
533 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
534 {
535 DBG("CRS.mask.w\n");
536 }
537 }
538 }
539
540 static boolean
541 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
542 {
543 INT i;
544 if (index < 0 || index >= tx->num_constf_allowed) {
545 tx->failure = TRUE;
546 return FALSE;
547 }
548 for (i = 0; i < tx->num_lconstf; ++i) {
549 if (tx->lconstf[i].idx == index) {
550 *src = tx->lconstf[i].reg;
551 return TRUE;
552 }
553 }
554 return FALSE;
555 }
556 static boolean
557 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
558 {
559 if (index < 0 || index >= tx->num_consti_allowed) {
560 tx->failure = TRUE;
561 return FALSE;
562 }
563 if (tx->lconsti[index].idx == index)
564 *src = tx->lconsti[index].reg;
565 return tx->lconsti[index].idx == index;
566 }
567 static boolean
568 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
569 {
570 if (index < 0 || index >= tx->num_constb_allowed) {
571 tx->failure = TRUE;
572 return FALSE;
573 }
574 if (tx->lconstb[index].idx == index)
575 *src = tx->lconstb[index].reg;
576 return tx->lconstb[index].idx == index;
577 }
578
579 static void
580 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
581 {
582 unsigned n;
583
584 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
585
586 for (n = 0; n < tx->num_lconstf; ++n)
587 if (tx->lconstf[n].idx == index)
588 break;
589 if (n == tx->num_lconstf) {
590 if ((n % 8) == 0) {
591 tx->lconstf = REALLOC(tx->lconstf,
592 (n + 0) * sizeof(tx->lconstf[0]),
593 (n + 8) * sizeof(tx->lconstf[0]));
594 assert(tx->lconstf);
595 }
596 tx->num_lconstf++;
597 }
598 tx->lconstf[n].idx = index;
599 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
600
601 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
602 }
603 static void
604 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
605 {
606 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
607 tx->lconsti[index].idx = index;
608 tx->lconsti[index].reg = tx->native_integers ?
609 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
610 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
611 }
612 static void
613 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
614 {
615 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
616 tx->lconstb[index].idx = index;
617 tx->lconstb[index].reg = tx->native_integers ?
618 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
619 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
620 }
621
622 static inline struct ureg_dst
623 tx_scratch(struct shader_translator *tx)
624 {
625 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
626 tx->failure = TRUE;
627 return tx->regs.t[0];
628 }
629 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
630 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
631 return tx->regs.t[tx->num_scratch++];
632 }
633
634 static inline struct ureg_dst
635 tx_scratch_scalar(struct shader_translator *tx)
636 {
637 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
638 }
639
640 static inline struct ureg_src
641 tx_src_scalar(struct ureg_dst dst)
642 {
643 struct ureg_src src = ureg_src(dst);
644 int c = ffs(dst.WriteMask) - 1;
645 if (dst.WriteMask == (1 << c))
646 src = ureg_scalar(src, c);
647 return src;
648 }
649
650 static inline void
651 tx_temp_alloc(struct shader_translator *tx, INT idx)
652 {
653 assert(idx >= 0);
654 if (idx >= tx->num_temp) {
655 unsigned k = tx->num_temp;
656 unsigned n = idx + 1;
657 tx->regs.r = REALLOC(tx->regs.r,
658 k * sizeof(tx->regs.r[0]),
659 n * sizeof(tx->regs.r[0]));
660 for (; k < n; ++k)
661 tx->regs.r[k] = ureg_dst_undef();
662 tx->num_temp = n;
663 }
664 if (ureg_dst_is_undef(tx->regs.r[idx]))
665 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
666 }
667
668 static inline void
669 tx_addr_alloc(struct shader_translator *tx, INT idx)
670 {
671 assert(idx == 0);
672 if (ureg_dst_is_undef(tx->regs.address))
673 tx->regs.address = ureg_DECL_address(tx->ureg);
674 if (ureg_dst_is_undef(tx->regs.a0))
675 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
676 }
677
678 static inline void
679 tx_pred_alloc(struct shader_translator *tx, INT idx)
680 {
681 assert(idx == 0);
682 if (ureg_dst_is_undef(tx->regs.p))
683 tx->regs.p = ureg_DECL_predicate(tx->ureg);
684 }
685
686 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
687 * the projection should be applied on the texture. It doesn't
688 * apply on texkill.
689 * The doc is very imprecise here (it says the projection is done
690 * before rasterization, thus in vs, which seems wrong since ps instructions
691 * are affected differently)
692 * For now we only apply to the ps TEX instruction and TEXBEM.
693 * Perhaps some other instructions would need it */
694 static inline void
695 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
696 struct ureg_src src, INT idx)
697 {
698 struct ureg_dst tmp;
699 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
700
701 /* no projection */
702 if (dim == 1) {
703 ureg_MOV(tx->ureg, dst, src);
704 } else {
705 tmp = tx_scratch_scalar(tx);
706 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
707 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
708 }
709 }
710
711 static inline void
712 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
713 unsigned target, struct ureg_src src0,
714 struct ureg_src src1, INT idx)
715 {
716 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
717 struct ureg_dst tmp;
718
719 /* dim == 1: no projection
720 * Looks like must be disabled when it makes no
721 * sense according the texture dimensions
722 */
723 if (dim == 1 || dim <= target) {
724 ureg_TEX(tx->ureg, dst, target, src0, src1);
725 } else if (dim == 4) {
726 ureg_TXP(tx->ureg, dst, target, src0, src1);
727 } else {
728 tmp = tx_scratch(tx);
729 apply_ps1x_projection(tx, tmp, src0, idx);
730 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
731 }
732 }
733
734 static inline void
735 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
736 {
737 assert(IS_PS);
738 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
739 if (ureg_src_is_undef(tx->regs.vT[idx]))
740 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
741 TGSI_INTERPOLATE_PERSPECTIVE);
742 }
743
744 static inline unsigned *
745 tx_bgnloop(struct shader_translator *tx)
746 {
747 tx->loop_depth++;
748 if (tx->loop_depth_max < tx->loop_depth)
749 tx->loop_depth_max = tx->loop_depth;
750 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
751 return &tx->loop_labels[tx->loop_depth - 1];
752 }
753
754 static inline unsigned *
755 tx_endloop(struct shader_translator *tx)
756 {
757 assert(tx->loop_depth);
758 tx->loop_depth--;
759 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
760 ureg_get_instruction_number(tx->ureg));
761 return &tx->loop_labels[tx->loop_depth];
762 }
763
764 static struct ureg_dst
765 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
766 {
767 const unsigned l = tx->loop_depth - 1;
768
769 if (!tx->loop_depth)
770 {
771 DBG("loop counter requested outside of loop\n");
772 return ureg_dst_undef();
773 }
774
775 if (ureg_dst_is_undef(tx->regs.rL[l])) {
776 /* loop or rep ctr creation */
777 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
778 tx->loop_or_rep[l] = loop_or_rep;
779 }
780 /* loop - rep - endloop - endrep not allowed */
781 assert(tx->loop_or_rep[l] == loop_or_rep);
782
783 return tx->regs.rL[l];
784 }
785
786 static struct ureg_src
787 tx_get_loopal(struct shader_translator *tx)
788 {
789 int loop_level = tx->loop_depth - 1;
790
791 while (loop_level >= 0) {
792 /* handle loop - rep - endrep - endloop case */
793 if (tx->loop_or_rep[loop_level])
794 /* the value is in the loop counter y component (nine implementation) */
795 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
796 loop_level--;
797 }
798
799 DBG("aL counter requested outside of loop\n");
800 return ureg_src_undef();
801 }
802
803 static inline unsigned *
804 tx_cond(struct shader_translator *tx)
805 {
806 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
807 tx->cond_depth++;
808 return &tx->cond_labels[tx->cond_depth - 1];
809 }
810
811 static inline unsigned *
812 tx_elsecond(struct shader_translator *tx)
813 {
814 assert(tx->cond_depth);
815 return &tx->cond_labels[tx->cond_depth - 1];
816 }
817
818 static inline void
819 tx_endcond(struct shader_translator *tx)
820 {
821 assert(tx->cond_depth);
822 tx->cond_depth--;
823 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
824 ureg_get_instruction_number(tx->ureg));
825 }
826
827 static inline struct ureg_dst
828 nine_ureg_dst_register(unsigned file, int index)
829 {
830 return ureg_dst(ureg_src_register(file, index));
831 }
832
833 static inline struct ureg_src
834 nine_get_position_input(struct shader_translator *tx)
835 {
836 struct ureg_program *ureg = tx->ureg;
837
838 if (tx->wpos_is_sysval)
839 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
840 else
841 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
842 0, TGSI_INTERPOLATE_LINEAR);
843 }
844
845 static struct ureg_src
846 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
847 {
848 struct ureg_program *ureg = tx->ureg;
849 struct ureg_src src;
850 struct ureg_dst tmp;
851
852 switch (param->file)
853 {
854 case D3DSPR_TEMP:
855 assert(!param->rel);
856 tx_temp_alloc(tx, param->idx);
857 src = ureg_src(tx->regs.r[param->idx]);
858 break;
859 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
860 case D3DSPR_ADDR:
861 assert(!param->rel);
862 if (IS_VS) {
863 assert(param->idx == 0);
864 /* the address register (vs only) must be
865 * assigned before use */
866 assert(!ureg_dst_is_undef(tx->regs.a0));
867 /* Round to lowest for vs1.1 (contrary to the doc), else
868 * round to nearest */
869 if (tx->version.major < 2 && tx->version.minor < 2)
870 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
871 else
872 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
873 src = ureg_src(tx->regs.address);
874 } else {
875 if (tx->version.major < 2 && tx->version.minor < 4) {
876 /* no subroutines, so should be defined */
877 src = ureg_src(tx->regs.tS[param->idx]);
878 } else {
879 tx_texcoord_alloc(tx, param->idx);
880 src = tx->regs.vT[param->idx];
881 }
882 }
883 break;
884 case D3DSPR_INPUT:
885 if (IS_VS) {
886 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
887 } else {
888 if (tx->version.major < 3) {
889 assert(!param->rel);
890 src = ureg_DECL_fs_input_cyl_centroid(
891 ureg, TGSI_SEMANTIC_COLOR, param->idx,
892 TGSI_INTERPOLATE_COLOR, 0,
893 tx->info->force_color_in_centroid ?
894 TGSI_INTERPOLATE_LOC_CENTROID : 0,
895 0, 1);
896 } else {
897 assert(!param->rel); /* TODO */
898 assert(param->idx < ARRAY_SIZE(tx->regs.v));
899 src = tx->regs.v[param->idx];
900 }
901 }
902 break;
903 case D3DSPR_PREDICATE:
904 assert(!param->rel);
905 tx_pred_alloc(tx, param->idx);
906 src = ureg_src(tx->regs.p);
907 break;
908 case D3DSPR_SAMPLER:
909 assert(param->mod == NINED3DSPSM_NONE);
910 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
911 assert(!param->rel);
912 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
913 break;
914 case D3DSPR_CONST:
915 assert(!param->rel || IS_VS);
916 if (param->rel)
917 tx->indirect_const_access = TRUE;
918 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
919 if (!param->rel)
920 nine_info_mark_const_f_used(tx->info, param->idx);
921 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
922 }
923 if (!IS_VS && tx->version.major < 2) {
924 /* ps 1.X clamps constants */
925 tmp = tx_scratch(tx);
926 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
927 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
928 src = ureg_src(tmp);
929 }
930 break;
931 case D3DSPR_CONST2:
932 case D3DSPR_CONST3:
933 case D3DSPR_CONST4:
934 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
935 assert(!"CONST2/3/4");
936 src = ureg_imm1f(ureg, 0.0f);
937 break;
938 case D3DSPR_CONSTINT:
939 /* relative adressing only possible for float constants in vs */
940 assert(!param->rel);
941 if (!tx_lconsti(tx, &src, param->idx)) {
942 nine_info_mark_const_i_used(tx->info, param->idx);
943 src = ureg_src_register(TGSI_FILE_CONSTANT,
944 tx->info->const_i_base + param->idx);
945 }
946 break;
947 case D3DSPR_CONSTBOOL:
948 assert(!param->rel);
949 if (!tx_lconstb(tx, &src, param->idx)) {
950 char r = param->idx / 4;
951 char s = param->idx & 3;
952 nine_info_mark_const_b_used(tx->info, param->idx);
953 src = ureg_src_register(TGSI_FILE_CONSTANT,
954 tx->info->const_b_base + r);
955 src = ureg_swizzle(src, s, s, s, s);
956 }
957 break;
958 case D3DSPR_LOOP:
959 if (ureg_dst_is_undef(tx->regs.address))
960 tx->regs.address = ureg_DECL_address(ureg);
961 if (!tx->native_integers)
962 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
963 else
964 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
965 src = ureg_src(tx->regs.address);
966 break;
967 case D3DSPR_MISCTYPE:
968 switch (param->idx) {
969 case D3DSMO_POSITION:
970 if (ureg_src_is_undef(tx->regs.vPos))
971 tx->regs.vPos = nine_get_position_input(tx);
972 if (tx->shift_wpos) {
973 /* TODO: do this only once */
974 struct ureg_dst wpos = tx_scratch(tx);
975 ureg_SUB(ureg, wpos, tx->regs.vPos,
976 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
977 src = ureg_src(wpos);
978 } else {
979 src = tx->regs.vPos;
980 }
981 break;
982 case D3DSMO_FACE:
983 if (ureg_src_is_undef(tx->regs.vFace)) {
984 if (tx->face_is_sysval_integer) {
985 tmp = tx_scratch(tx);
986 tx->regs.vFace =
987 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
988
989 /* convert bool to float */
990 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
991 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
992 tx->regs.vFace = ureg_src(tmp);
993 } else {
994 tx->regs.vFace = ureg_DECL_fs_input(ureg,
995 TGSI_SEMANTIC_FACE, 0,
996 TGSI_INTERPOLATE_CONSTANT);
997 }
998 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
999 }
1000 src = tx->regs.vFace;
1001 break;
1002 default:
1003 assert(!"invalid src D3DSMO");
1004 break;
1005 }
1006 assert(!param->rel);
1007 break;
1008 case D3DSPR_TEMPFLOAT16:
1009 break;
1010 default:
1011 assert(!"invalid src D3DSPR");
1012 }
1013 if (param->rel)
1014 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1015
1016 switch (param->mod) {
1017 case NINED3DSPSM_DW:
1018 tmp = tx_scratch(tx);
1019 /* NOTE: app is not allowed to read w with this modifier */
1020 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1021 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1022 src = ureg_src(tmp);
1023 break;
1024 case NINED3DSPSM_DZ:
1025 tmp = tx_scratch(tx);
1026 /* NOTE: app is not allowed to read z with this modifier */
1027 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1028 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1029 src = ureg_src(tmp);
1030 break;
1031 default:
1032 break;
1033 }
1034
1035 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1036 src = ureg_swizzle(src,
1037 (param->swizzle >> 0) & 0x3,
1038 (param->swizzle >> 2) & 0x3,
1039 (param->swizzle >> 4) & 0x3,
1040 (param->swizzle >> 6) & 0x3);
1041
1042 switch (param->mod) {
1043 case NINED3DSPSM_ABS:
1044 src = ureg_abs(src);
1045 break;
1046 case NINED3DSPSM_ABSNEG:
1047 src = ureg_negate(ureg_abs(src));
1048 break;
1049 case NINED3DSPSM_NEG:
1050 src = ureg_negate(src);
1051 break;
1052 case NINED3DSPSM_BIAS:
1053 tmp = tx_scratch(tx);
1054 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
1055 src = ureg_src(tmp);
1056 break;
1057 case NINED3DSPSM_BIASNEG:
1058 tmp = tx_scratch(tx);
1059 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
1060 src = ureg_src(tmp);
1061 break;
1062 case NINED3DSPSM_NOT:
1063 if (tx->native_integers) {
1064 tmp = tx_scratch(tx);
1065 ureg_NOT(ureg, tmp, src);
1066 src = ureg_src(tmp);
1067 break;
1068 }
1069 /* fall through */
1070 case NINED3DSPSM_COMP:
1071 tmp = tx_scratch(tx);
1072 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1073 src = ureg_src(tmp);
1074 break;
1075 case NINED3DSPSM_DZ:
1076 case NINED3DSPSM_DW:
1077 /* Already handled*/
1078 break;
1079 case NINED3DSPSM_SIGN:
1080 tmp = tx_scratch(tx);
1081 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1082 src = ureg_src(tmp);
1083 break;
1084 case NINED3DSPSM_SIGNNEG:
1085 tmp = tx_scratch(tx);
1086 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1087 src = ureg_src(tmp);
1088 break;
1089 case NINED3DSPSM_X2:
1090 tmp = tx_scratch(tx);
1091 ureg_ADD(ureg, tmp, src, src);
1092 src = ureg_src(tmp);
1093 break;
1094 case NINED3DSPSM_X2NEG:
1095 tmp = tx_scratch(tx);
1096 ureg_ADD(ureg, tmp, src, src);
1097 src = ureg_negate(ureg_src(tmp));
1098 break;
1099 default:
1100 assert(param->mod == NINED3DSPSM_NONE);
1101 break;
1102 }
1103
1104 return src;
1105 }
1106
1107 static struct ureg_dst
1108 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1109 {
1110 struct ureg_dst dst;
1111
1112 switch (param->file)
1113 {
1114 case D3DSPR_TEMP:
1115 assert(!param->rel);
1116 tx_temp_alloc(tx, param->idx);
1117 dst = tx->regs.r[param->idx];
1118 break;
1119 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1120 case D3DSPR_ADDR:
1121 assert(!param->rel);
1122 if (tx->version.major < 2 && !IS_VS) {
1123 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1124 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1125 dst = tx->regs.tS[param->idx];
1126 } else
1127 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1128 tx_texcoord_alloc(tx, param->idx);
1129 dst = ureg_dst(tx->regs.vT[param->idx]);
1130 } else {
1131 tx_addr_alloc(tx, param->idx);
1132 dst = tx->regs.a0;
1133 }
1134 break;
1135 case D3DSPR_RASTOUT:
1136 assert(!param->rel);
1137 switch (param->idx) {
1138 case 0:
1139 if (ureg_dst_is_undef(tx->regs.oPos))
1140 tx->regs.oPos =
1141 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1142 dst = tx->regs.oPos;
1143 break;
1144 case 1:
1145 if (ureg_dst_is_undef(tx->regs.oFog))
1146 tx->regs.oFog =
1147 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1148 dst = tx->regs.oFog;
1149 break;
1150 case 2:
1151 if (ureg_dst_is_undef(tx->regs.oPts))
1152 tx->regs.oPts =
1153 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1154 dst = tx->regs.oPts;
1155 break;
1156 default:
1157 assert(0);
1158 break;
1159 }
1160 break;
1161 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1162 case D3DSPR_OUTPUT:
1163 if (tx->version.major < 3) {
1164 assert(!param->rel);
1165 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1166 } else {
1167 assert(!param->rel); /* TODO */
1168 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1169 dst = tx->regs.o[param->idx];
1170 }
1171 break;
1172 case D3DSPR_ATTROUT: /* VS */
1173 case D3DSPR_COLOROUT: /* PS */
1174 assert(param->idx >= 0 && param->idx < 4);
1175 assert(!param->rel);
1176 tx->info->rt_mask |= 1 << param->idx;
1177 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1178 /* ps < 3: oCol[0] will have fog blending afterward */
1179 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1180 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1181 } else {
1182 tx->regs.oCol[param->idx] =
1183 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1184 }
1185 }
1186 dst = tx->regs.oCol[param->idx];
1187 if (IS_VS && tx->version.major < 3)
1188 dst = ureg_saturate(dst);
1189 break;
1190 case D3DSPR_DEPTHOUT:
1191 assert(!param->rel);
1192 if (ureg_dst_is_undef(tx->regs.oDepth))
1193 tx->regs.oDepth =
1194 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1195 TGSI_WRITEMASK_Z, 0, 1);
1196 dst = tx->regs.oDepth; /* XXX: must write .z component */
1197 break;
1198 case D3DSPR_PREDICATE:
1199 assert(!param->rel);
1200 tx_pred_alloc(tx, param->idx);
1201 dst = tx->regs.p;
1202 break;
1203 case D3DSPR_TEMPFLOAT16:
1204 DBG("unhandled D3DSPR: %u\n", param->file);
1205 break;
1206 default:
1207 assert(!"invalid dst D3DSPR");
1208 break;
1209 }
1210 if (param->rel)
1211 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1212
1213 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1214 dst = ureg_writemask(dst, param->mask);
1215 if (param->mod & NINED3DSPDM_SATURATE)
1216 dst = ureg_saturate(dst);
1217
1218 return dst;
1219 }
1220
1221 static struct ureg_dst
1222 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1223 {
1224 if (param->shift) {
1225 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1226 return tx->regs.tdst;
1227 }
1228 return _tx_dst_param(tx, param);
1229 }
1230
1231 static void
1232 tx_apply_dst0_modifiers(struct shader_translator *tx)
1233 {
1234 struct ureg_dst rdst;
1235 float f;
1236
1237 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1238 return;
1239 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1240
1241 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1242
1243 if (tx->insn.dst[0].shift < 0)
1244 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1245 else
1246 f = 1 << tx->insn.dst[0].shift;
1247
1248 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1249 }
1250
1251 static struct ureg_src
1252 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1253 {
1254 struct ureg_src src;
1255
1256 assert(!param->shift);
1257 assert(!(param->mod & NINED3DSPDM_SATURATE));
1258
1259 switch (param->file) {
1260 case D3DSPR_INPUT:
1261 if (IS_VS) {
1262 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1263 } else {
1264 assert(!param->rel);
1265 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1266 src = tx->regs.v[param->idx];
1267 }
1268 break;
1269 default:
1270 src = ureg_src(tx_dst_param(tx, param));
1271 break;
1272 }
1273 if (param->rel)
1274 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1275
1276 if (!param->mask)
1277 WARN("mask is 0, using identity swizzle\n");
1278
1279 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1280 char s[4];
1281 int n;
1282 int c;
1283 for (n = 0, c = 0; c < 4; ++c)
1284 if (param->mask & (1 << c))
1285 s[n++] = c;
1286 assert(n);
1287 for (c = n; c < 4; ++c)
1288 s[c] = s[n - 1];
1289 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1290 }
1291 return src;
1292 }
1293
1294 static HRESULT
1295 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1296 {
1297 struct ureg_program *ureg = tx->ureg;
1298 struct ureg_dst dst;
1299 struct ureg_src src[2];
1300 struct sm1_src_param *src_mat = &tx->insn.src[1];
1301 unsigned i;
1302
1303 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1304 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1305
1306 for (i = 0; i < n; i++)
1307 {
1308 const unsigned m = (1 << i);
1309
1310 src[1] = tx_src_param(tx, src_mat);
1311 src_mat->idx++;
1312
1313 if (!(dst.WriteMask & m))
1314 continue;
1315
1316 /* XXX: src == dst case ? */
1317
1318 switch (k) {
1319 case 3:
1320 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1321 break;
1322 case 4:
1323 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1324 break;
1325 default:
1326 DBG("invalid operation: M%ux%u\n", m, n);
1327 break;
1328 }
1329 }
1330
1331 return D3D_OK;
1332 }
1333
1334 #define VNOTSUPPORTED 0, 0
1335 #define V(maj, min) (((maj) << 8) | (min))
1336
1337 static inline const char *
1338 d3dsio_to_string( unsigned opcode )
1339 {
1340 static const char *names[] = {
1341 "NOP",
1342 "MOV",
1343 "ADD",
1344 "SUB",
1345 "MAD",
1346 "MUL",
1347 "RCP",
1348 "RSQ",
1349 "DP3",
1350 "DP4",
1351 "MIN",
1352 "MAX",
1353 "SLT",
1354 "SGE",
1355 "EXP",
1356 "LOG",
1357 "LIT",
1358 "DST",
1359 "LRP",
1360 "FRC",
1361 "M4x4",
1362 "M4x3",
1363 "M3x4",
1364 "M3x3",
1365 "M3x2",
1366 "CALL",
1367 "CALLNZ",
1368 "LOOP",
1369 "RET",
1370 "ENDLOOP",
1371 "LABEL",
1372 "DCL",
1373 "POW",
1374 "CRS",
1375 "SGN",
1376 "ABS",
1377 "NRM",
1378 "SINCOS",
1379 "REP",
1380 "ENDREP",
1381 "IF",
1382 "IFC",
1383 "ELSE",
1384 "ENDIF",
1385 "BREAK",
1386 "BREAKC",
1387 "MOVA",
1388 "DEFB",
1389 "DEFI",
1390 NULL,
1391 NULL,
1392 NULL,
1393 NULL,
1394 NULL,
1395 NULL,
1396 NULL,
1397 NULL,
1398 NULL,
1399 NULL,
1400 NULL,
1401 NULL,
1402 NULL,
1403 NULL,
1404 NULL,
1405 "TEXCOORD",
1406 "TEXKILL",
1407 "TEX",
1408 "TEXBEM",
1409 "TEXBEML",
1410 "TEXREG2AR",
1411 "TEXREG2GB",
1412 "TEXM3x2PAD",
1413 "TEXM3x2TEX",
1414 "TEXM3x3PAD",
1415 "TEXM3x3TEX",
1416 NULL,
1417 "TEXM3x3SPEC",
1418 "TEXM3x3VSPEC",
1419 "EXPP",
1420 "LOGP",
1421 "CND",
1422 "DEF",
1423 "TEXREG2RGB",
1424 "TEXDP3TEX",
1425 "TEXM3x2DEPTH",
1426 "TEXDP3",
1427 "TEXM3x3",
1428 "TEXDEPTH",
1429 "CMP",
1430 "BEM",
1431 "DP2ADD",
1432 "DSX",
1433 "DSY",
1434 "TEXLDD",
1435 "SETP",
1436 "TEXLDL",
1437 "BREAKP"
1438 };
1439
1440 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1441
1442 switch (opcode) {
1443 case D3DSIO_PHASE: return "PHASE";
1444 case D3DSIO_COMMENT: return "COMMENT";
1445 case D3DSIO_END: return "END";
1446 default:
1447 return NULL;
1448 }
1449 }
1450
1451 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1452 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1453 (inst).vert_version.max | \
1454 (inst).frag_version.min | \
1455 (inst).frag_version.max)
1456
1457 #define SPECIAL(name) \
1458 NineTranslateInstruction_##name
1459
1460 #define DECL_SPECIAL(name) \
1461 static HRESULT \
1462 NineTranslateInstruction_##name( struct shader_translator *tx )
1463
1464 static HRESULT
1465 NineTranslateInstruction_Generic(struct shader_translator *);
1466
1467 DECL_SPECIAL(M4x4)
1468 {
1469 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1470 }
1471
1472 DECL_SPECIAL(M4x3)
1473 {
1474 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1475 }
1476
1477 DECL_SPECIAL(M3x4)
1478 {
1479 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1480 }
1481
1482 DECL_SPECIAL(M3x3)
1483 {
1484 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1485 }
1486
1487 DECL_SPECIAL(M3x2)
1488 {
1489 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1490 }
1491
1492 DECL_SPECIAL(CMP)
1493 {
1494 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1495 tx_src_param(tx, &tx->insn.src[0]),
1496 tx_src_param(tx, &tx->insn.src[2]),
1497 tx_src_param(tx, &tx->insn.src[1]));
1498 return D3D_OK;
1499 }
1500
1501 DECL_SPECIAL(CND)
1502 {
1503 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1504 struct ureg_dst cgt;
1505 struct ureg_src cnd;
1506
1507 /* the coissue flag was a tip for compilers to advise to
1508 * execute two operations at the same time, in cases
1509 * the two executions had same dst with different channels.
1510 * It has no effect on current hw. However it seems CND
1511 * is affected. The handling of this very specific case
1512 * handled below mimick wine behaviour */
1513 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1514 ureg_MOV(tx->ureg,
1515 dst, tx_src_param(tx, &tx->insn.src[1]));
1516 return D3D_OK;
1517 }
1518
1519 cnd = tx_src_param(tx, &tx->insn.src[0]);
1520 cgt = tx_scratch(tx);
1521
1522 if (tx->version.major == 1 && tx->version.minor < 4)
1523 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1524
1525 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1526
1527 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1528 tx_src_param(tx, &tx->insn.src[1]),
1529 tx_src_param(tx, &tx->insn.src[2]));
1530 return D3D_OK;
1531 }
1532
1533 DECL_SPECIAL(CALL)
1534 {
1535 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1536 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1537 return D3D_OK;
1538 }
1539
1540 DECL_SPECIAL(CALLNZ)
1541 {
1542 struct ureg_program *ureg = tx->ureg;
1543 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1544
1545 if (!tx->native_integers)
1546 ureg_IF(ureg, src, tx_cond(tx));
1547 else
1548 ureg_UIF(ureg, src, tx_cond(tx));
1549 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1550 tx_endcond(tx);
1551 ureg_ENDIF(ureg);
1552 return D3D_OK;
1553 }
1554
1555 DECL_SPECIAL(LOOP)
1556 {
1557 struct ureg_program *ureg = tx->ureg;
1558 unsigned *label;
1559 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1560 struct ureg_dst ctr;
1561 struct ureg_dst tmp;
1562 struct ureg_src ctrx;
1563
1564 label = tx_bgnloop(tx);
1565 ctr = tx_get_loopctr(tx, TRUE);
1566 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1567
1568 /* src: num_iterations - start_value of al - step for al - 0 */
1569 ureg_MOV(ureg, ctr, src);
1570 ureg_BGNLOOP(tx->ureg, label);
1571 tmp = tx_scratch_scalar(tx);
1572 /* Initially ctr.x contains the number of iterations.
1573 * ctr.y will contain the updated value of al.
1574 * We decrease ctr.x at the end of every iteration,
1575 * and stop when it reaches 0. */
1576
1577 if (!tx->native_integers) {
1578 /* case src and ctr contain floats */
1579 /* to avoid precision issue, we stop when ctr <= 0.5 */
1580 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1581 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1582 } else {
1583 /* case src and ctr contain integers */
1584 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1585 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1586 }
1587 ureg_BRK(ureg);
1588 tx_endcond(tx);
1589 ureg_ENDIF(ureg);
1590 return D3D_OK;
1591 }
1592
1593 DECL_SPECIAL(RET)
1594 {
1595 ureg_RET(tx->ureg);
1596 return D3D_OK;
1597 }
1598
1599 DECL_SPECIAL(ENDLOOP)
1600 {
1601 struct ureg_program *ureg = tx->ureg;
1602 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1603 struct ureg_dst dst_ctrx, dst_al;
1604 struct ureg_src src_ctr, al_counter;
1605
1606 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1607 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1608 src_ctr = ureg_src(ctr);
1609 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1610
1611 /* ctr.x -= 1
1612 * ctr.y (aL) += step */
1613 if (!tx->native_integers) {
1614 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1615 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1616 } else {
1617 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1618 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1619 }
1620 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1621 return D3D_OK;
1622 }
1623
1624 DECL_SPECIAL(LABEL)
1625 {
1626 unsigned k = tx->num_inst_labels;
1627 unsigned n = tx->insn.src[0].idx;
1628 assert(n < 2048);
1629 if (n >= k)
1630 tx->inst_labels = REALLOC(tx->inst_labels,
1631 k * sizeof(tx->inst_labels[0]),
1632 n * sizeof(tx->inst_labels[0]));
1633
1634 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1635 return D3D_OK;
1636 }
1637
1638 DECL_SPECIAL(SINCOS)
1639 {
1640 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1641 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1642
1643 assert(!(dst.WriteMask & 0xc));
1644
1645 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1646 ureg_SCS(tx->ureg, dst, src);
1647 return D3D_OK;
1648 }
1649
1650 DECL_SPECIAL(SGN)
1651 {
1652 ureg_SSG(tx->ureg,
1653 tx_dst_param(tx, &tx->insn.dst[0]),
1654 tx_src_param(tx, &tx->insn.src[0]));
1655 return D3D_OK;
1656 }
1657
1658 DECL_SPECIAL(REP)
1659 {
1660 struct ureg_program *ureg = tx->ureg;
1661 unsigned *label;
1662 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1663 struct ureg_dst ctr;
1664 struct ureg_dst tmp;
1665 struct ureg_src ctrx;
1666
1667 label = tx_bgnloop(tx);
1668 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1669 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1670
1671 /* NOTE: rep must be constant, so we don't have to save the count */
1672 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1673
1674 /* rep: num_iterations - 0 - 0 - 0 */
1675 ureg_MOV(ureg, ctr, rep);
1676 ureg_BGNLOOP(ureg, label);
1677 tmp = tx_scratch_scalar(tx);
1678 /* Initially ctr.x contains the number of iterations.
1679 * We decrease ctr.x at the end of every iteration,
1680 * and stop when it reaches 0. */
1681
1682 if (!tx->native_integers) {
1683 /* case src and ctr contain floats */
1684 /* to avoid precision issue, we stop when ctr <= 0.5 */
1685 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1686 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1687 } else {
1688 /* case src and ctr contain integers */
1689 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1690 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1691 }
1692 ureg_BRK(ureg);
1693 tx_endcond(tx);
1694 ureg_ENDIF(ureg);
1695
1696 return D3D_OK;
1697 }
1698
1699 DECL_SPECIAL(ENDREP)
1700 {
1701 struct ureg_program *ureg = tx->ureg;
1702 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1703 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1704 struct ureg_src src_ctr = ureg_src(ctr);
1705
1706 /* ctr.x -= 1 */
1707 if (!tx->native_integers)
1708 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1709 else
1710 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1711
1712 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1713 return D3D_OK;
1714 }
1715
1716 DECL_SPECIAL(ENDIF)
1717 {
1718 tx_endcond(tx);
1719 ureg_ENDIF(tx->ureg);
1720 return D3D_OK;
1721 }
1722
1723 DECL_SPECIAL(IF)
1724 {
1725 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1726
1727 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1728 ureg_UIF(tx->ureg, src, tx_cond(tx));
1729 else
1730 ureg_IF(tx->ureg, src, tx_cond(tx));
1731
1732 return D3D_OK;
1733 }
1734
1735 static inline unsigned
1736 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1737 {
1738 switch (flags) {
1739 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1740 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1741 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1742 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1743 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1744 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1745 default:
1746 assert(!"invalid comparison flags");
1747 return TGSI_OPCODE_SGT;
1748 }
1749 }
1750
1751 DECL_SPECIAL(IFC)
1752 {
1753 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1754 struct ureg_src src[2];
1755 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1756 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1757 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1758 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1759 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1760 return D3D_OK;
1761 }
1762
1763 DECL_SPECIAL(ELSE)
1764 {
1765 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1766 return D3D_OK;
1767 }
1768
1769 DECL_SPECIAL(BREAKC)
1770 {
1771 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1772 struct ureg_src src[2];
1773 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1774 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1775 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1776 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1777 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1778 ureg_BRK(tx->ureg);
1779 tx_endcond(tx);
1780 ureg_ENDIF(tx->ureg);
1781 return D3D_OK;
1782 }
1783
1784 static const char *sm1_declusage_names[] =
1785 {
1786 [D3DDECLUSAGE_POSITION] = "POSITION",
1787 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1788 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1789 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1790 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1791 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1792 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1793 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1794 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1795 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1796 [D3DDECLUSAGE_COLOR] = "COLOR",
1797 [D3DDECLUSAGE_FOG] = "FOG",
1798 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1799 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1800 };
1801
1802 static inline unsigned
1803 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1804 {
1805 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1806 }
1807
1808 static void
1809 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1810 boolean tc,
1811 struct sm1_semantic *dcl)
1812 {
1813 BYTE index = dcl->usage_idx;
1814
1815 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1816 * we match to a TGSI_SEMANTIC_GENERIC with index.
1817 *
1818 * The index can be anything UINT16 and usage_idx is BYTE,
1819 * so we can fit everything. It doesn't matter if indices
1820 * are close together or low.
1821 *
1822 *
1823 * POSITION >= 1: 10 * index + 6
1824 * COLOR >= 2: 10 * (index-1) + 7
1825 * TEXCOORD[0..15]: index
1826 * BLENDWEIGHT: 10 * index + 18
1827 * BLENDINDICES: 10 * index + 19
1828 * NORMAL: 10 * index + 20
1829 * TANGENT: 10 * index + 21
1830 * BINORMAL: 10 * index + 22
1831 * TESSFACTOR: 10 * index + 23
1832 */
1833
1834 switch (dcl->usage) {
1835 case D3DDECLUSAGE_POSITION:
1836 case D3DDECLUSAGE_POSITIONT:
1837 case D3DDECLUSAGE_DEPTH:
1838 if (index == 0) {
1839 sem->Name = TGSI_SEMANTIC_POSITION;
1840 sem->Index = 0;
1841 } else {
1842 sem->Name = TGSI_SEMANTIC_GENERIC;
1843 sem->Index = 10 * index + 6;
1844 }
1845 break;
1846 case D3DDECLUSAGE_COLOR:
1847 if (index < 2) {
1848 sem->Name = TGSI_SEMANTIC_COLOR;
1849 sem->Index = index;
1850 } else {
1851 sem->Name = TGSI_SEMANTIC_GENERIC;
1852 sem->Index = 10 * (index-1) + 7;
1853 }
1854 break;
1855 case D3DDECLUSAGE_FOG:
1856 assert(index == 0);
1857 sem->Name = TGSI_SEMANTIC_FOG;
1858 sem->Index = 0;
1859 break;
1860 case D3DDECLUSAGE_PSIZE:
1861 assert(index == 0);
1862 sem->Name = TGSI_SEMANTIC_PSIZE;
1863 sem->Index = 0;
1864 break;
1865 case D3DDECLUSAGE_TEXCOORD:
1866 assert(index < 16);
1867 if (index < 8 && tc)
1868 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1869 else
1870 sem->Name = TGSI_SEMANTIC_GENERIC;
1871 sem->Index = index;
1872 break;
1873 case D3DDECLUSAGE_BLENDWEIGHT:
1874 sem->Name = TGSI_SEMANTIC_GENERIC;
1875 sem->Index = 10 * index + 18;
1876 break;
1877 case D3DDECLUSAGE_BLENDINDICES:
1878 sem->Name = TGSI_SEMANTIC_GENERIC;
1879 sem->Index = 10 * index + 19;
1880 break;
1881 case D3DDECLUSAGE_NORMAL:
1882 sem->Name = TGSI_SEMANTIC_GENERIC;
1883 sem->Index = 10 * index + 20;
1884 break;
1885 case D3DDECLUSAGE_TANGENT:
1886 sem->Name = TGSI_SEMANTIC_GENERIC;
1887 sem->Index = 10 * index + 21;
1888 break;
1889 case D3DDECLUSAGE_BINORMAL:
1890 sem->Name = TGSI_SEMANTIC_GENERIC;
1891 sem->Index = 10 * index + 22;
1892 break;
1893 case D3DDECLUSAGE_TESSFACTOR:
1894 sem->Name = TGSI_SEMANTIC_GENERIC;
1895 sem->Index = 10 * index + 23;
1896 break;
1897 case D3DDECLUSAGE_SAMPLE:
1898 sem->Name = TGSI_SEMANTIC_COUNT;
1899 sem->Index = 0;
1900 break;
1901 default:
1902 unreachable(!"Invalid DECLUSAGE.");
1903 break;
1904 }
1905 }
1906
1907 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1908 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1909 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1910 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1911 static inline unsigned
1912 d3dstt_to_tgsi_tex(BYTE sampler_type)
1913 {
1914 switch (sampler_type) {
1915 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1916 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1917 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1918 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1919 default:
1920 assert(0);
1921 return TGSI_TEXTURE_UNKNOWN;
1922 }
1923 }
1924 static inline unsigned
1925 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1926 {
1927 switch (sampler_type) {
1928 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1929 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1930 case NINED3DSTT_VOLUME:
1931 case NINED3DSTT_CUBE:
1932 default:
1933 assert(0);
1934 return TGSI_TEXTURE_UNKNOWN;
1935 }
1936 }
1937 static inline unsigned
1938 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1939 {
1940 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1941 case 1: return TGSI_TEXTURE_1D;
1942 case 0: return TGSI_TEXTURE_2D;
1943 case 3: return TGSI_TEXTURE_3D;
1944 default:
1945 return TGSI_TEXTURE_CUBE;
1946 }
1947 }
1948
1949 static const char *
1950 sm1_sampler_type_name(BYTE sampler_type)
1951 {
1952 switch (sampler_type) {
1953 case NINED3DSTT_1D: return "1D";
1954 case NINED3DSTT_2D: return "2D";
1955 case NINED3DSTT_VOLUME: return "VOLUME";
1956 case NINED3DSTT_CUBE: return "CUBE";
1957 default:
1958 return "(D3DSTT_?)";
1959 }
1960 }
1961
1962 static inline unsigned
1963 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1964 {
1965 switch (sem->Name) {
1966 case TGSI_SEMANTIC_POSITION:
1967 case TGSI_SEMANTIC_NORMAL:
1968 return TGSI_INTERPOLATE_LINEAR;
1969 case TGSI_SEMANTIC_BCOLOR:
1970 case TGSI_SEMANTIC_COLOR:
1971 return TGSI_INTERPOLATE_COLOR;
1972 case TGSI_SEMANTIC_FOG:
1973 case TGSI_SEMANTIC_GENERIC:
1974 case TGSI_SEMANTIC_TEXCOORD:
1975 case TGSI_SEMANTIC_CLIPDIST:
1976 case TGSI_SEMANTIC_CLIPVERTEX:
1977 return TGSI_INTERPOLATE_PERSPECTIVE;
1978 case TGSI_SEMANTIC_EDGEFLAG:
1979 case TGSI_SEMANTIC_FACE:
1980 case TGSI_SEMANTIC_INSTANCEID:
1981 case TGSI_SEMANTIC_PCOORD:
1982 case TGSI_SEMANTIC_PRIMID:
1983 case TGSI_SEMANTIC_PSIZE:
1984 case TGSI_SEMANTIC_VERTEXID:
1985 return TGSI_INTERPOLATE_CONSTANT;
1986 default:
1987 assert(0);
1988 return TGSI_INTERPOLATE_CONSTANT;
1989 }
1990 }
1991
1992 DECL_SPECIAL(DCL)
1993 {
1994 struct ureg_program *ureg = tx->ureg;
1995 boolean is_input;
1996 boolean is_sampler;
1997 struct tgsi_declaration_semantic tgsi;
1998 struct sm1_semantic sem;
1999 sm1_read_semantic(tx, &sem);
2000
2001 is_input = sem.reg.file == D3DSPR_INPUT;
2002 is_sampler =
2003 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2004
2005 DUMP("DCL ");
2006 sm1_dump_dst_param(&sem.reg);
2007 if (is_sampler)
2008 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2009 else
2010 if (tx->version.major >= 3)
2011 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2012 else
2013 if (sem.usage | sem.usage_idx)
2014 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2015 else
2016 DUMP("\n");
2017
2018 if (is_sampler) {
2019 const unsigned m = 1 << sem.reg.idx;
2020 ureg_DECL_sampler(ureg, sem.reg.idx);
2021 tx->info->sampler_mask |= m;
2022 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2023 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2024 d3dstt_to_tgsi_tex(sem.sampler_type);
2025 return D3D_OK;
2026 }
2027
2028 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2029 if (IS_VS) {
2030 if (is_input) {
2031 /* linkage outside of shader with vertex declaration */
2032 ureg_DECL_vs_input(ureg, sem.reg.idx);
2033 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2034 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2035 tx->info->num_inputs = sem.reg.idx + 1;
2036 /* NOTE: preserving order in case of indirect access */
2037 } else
2038 if (tx->version.major >= 3) {
2039 /* SM2 output semantic determined by file */
2040 assert(sem.reg.mask != 0);
2041 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2042 tx->info->position_t = TRUE;
2043 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2044 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2045 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2046
2047 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
2048 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2049 }
2050 } else {
2051 if (is_input && tx->version.major >= 3) {
2052 unsigned interp_location = 0;
2053 /* SM3 only, SM2 input semantic determined by file */
2054 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2055 /* PositionT and tessfactor forbidden */
2056 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2057 return D3DERR_INVALIDCALL;
2058
2059 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2060 /* Position0 is forbidden (likely because vPos already does that) */
2061 if (sem.usage == D3DDECLUSAGE_POSITION)
2062 return D3DERR_INVALIDCALL;
2063 /* Following code is for depth */
2064 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2065 return D3D_OK;
2066 }
2067
2068 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2069 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2070 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2071
2072 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2073 ureg, tgsi.Name, tgsi.Index,
2074 nine_tgsi_to_interp_mode(&tgsi),
2075 0, /* cylwrap */
2076 interp_location, 0, 1);
2077 } else
2078 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2079 /* FragColor or FragDepth */
2080 assert(sem.reg.mask != 0);
2081 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2082 0, 1);
2083 }
2084 }
2085 return D3D_OK;
2086 }
2087
2088 DECL_SPECIAL(DEF)
2089 {
2090 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2091 return D3D_OK;
2092 }
2093
2094 DECL_SPECIAL(DEFB)
2095 {
2096 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2097 return D3D_OK;
2098 }
2099
2100 DECL_SPECIAL(DEFI)
2101 {
2102 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2103 return D3D_OK;
2104 }
2105
2106 DECL_SPECIAL(POW)
2107 {
2108 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2109 struct ureg_src src[2] = {
2110 tx_src_param(tx, &tx->insn.src[0]),
2111 tx_src_param(tx, &tx->insn.src[1])
2112 };
2113 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2114 return D3D_OK;
2115 }
2116
2117 DECL_SPECIAL(RSQ)
2118 {
2119 struct ureg_program *ureg = tx->ureg;
2120 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2121 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2122 struct ureg_dst tmp = tx_scratch(tx);
2123 ureg_RSQ(ureg, tmp, ureg_abs(src));
2124 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2125 return D3D_OK;
2126 }
2127
2128 DECL_SPECIAL(LOG)
2129 {
2130 struct ureg_program *ureg = tx->ureg;
2131 struct ureg_dst tmp = tx_scratch_scalar(tx);
2132 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2133 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2134 ureg_LG2(ureg, tmp, ureg_abs(src));
2135 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2136 return D3D_OK;
2137 }
2138
2139 DECL_SPECIAL(LIT)
2140 {
2141 struct ureg_program *ureg = tx->ureg;
2142 struct ureg_dst tmp = tx_scratch(tx);
2143 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2144 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2145 ureg_LIT(ureg, tmp, src);
2146 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2147 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2148 * it 0^0 if src.w=0, which value is driver dependent. */
2149 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2150 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2151 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2152 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2153 return D3D_OK;
2154 }
2155
2156 DECL_SPECIAL(NRM)
2157 {
2158 struct ureg_program *ureg = tx->ureg;
2159 struct ureg_dst tmp = tx_scratch_scalar(tx);
2160 struct ureg_src nrm = tx_src_scalar(tmp);
2161 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2162 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2163 ureg_DP3(ureg, tmp, src, src);
2164 ureg_RSQ(ureg, tmp, nrm);
2165 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2166 ureg_MUL(ureg, dst, src, nrm);
2167 return D3D_OK;
2168 }
2169
2170 DECL_SPECIAL(DP2ADD)
2171 {
2172 struct ureg_dst tmp = tx_scratch_scalar(tx);
2173 struct ureg_src dp2 = tx_src_scalar(tmp);
2174 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2175 struct ureg_src src[3];
2176 int i;
2177 for (i = 0; i < 3; ++i)
2178 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2179 assert_replicate_swizzle(&src[2]);
2180
2181 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2182 ureg_ADD(tx->ureg, dst, src[2], dp2);
2183
2184 return D3D_OK;
2185 }
2186
2187 DECL_SPECIAL(TEXCOORD)
2188 {
2189 struct ureg_program *ureg = tx->ureg;
2190 const unsigned s = tx->insn.dst[0].idx;
2191 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2192
2193 tx_texcoord_alloc(tx, s);
2194 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2195 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2196
2197 return D3D_OK;
2198 }
2199
2200 DECL_SPECIAL(TEXCOORD_ps14)
2201 {
2202 struct ureg_program *ureg = tx->ureg;
2203 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2204 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2205
2206 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2207
2208 ureg_MOV(ureg, dst, src);
2209
2210 return D3D_OK;
2211 }
2212
2213 DECL_SPECIAL(TEXKILL)
2214 {
2215 struct ureg_src reg;
2216
2217 if (tx->version.major > 1 || tx->version.minor > 3) {
2218 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2219 } else {
2220 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2221 reg = tx->regs.vT[tx->insn.dst[0].idx];
2222 }
2223 if (tx->version.major < 2)
2224 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2225 ureg_KILL_IF(tx->ureg, reg);
2226
2227 return D3D_OK;
2228 }
2229
2230 DECL_SPECIAL(TEXBEM)
2231 {
2232 struct ureg_program *ureg = tx->ureg;
2233 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2234 struct ureg_dst tmp, tmp2, texcoord;
2235 struct ureg_src sample, m00, m01, m10, m11;
2236 struct ureg_src bumpenvlscale, bumpenvloffset;
2237 const int m = tx->insn.dst[0].idx;
2238 const int n = tx->insn.src[0].idx;
2239
2240 assert(tx->version.major == 1);
2241
2242 sample = ureg_DECL_sampler(ureg, m);
2243 tx->info->sampler_mask |= 1 << m;
2244
2245 tx_texcoord_alloc(tx, m);
2246
2247 tmp = tx_scratch(tx);
2248 tmp2 = tx_scratch(tx);
2249 texcoord = tx_scratch(tx);
2250 /*
2251 * Bump-env-matrix:
2252 * 00 is X
2253 * 01 is Y
2254 * 10 is Z
2255 * 11 is W
2256 */
2257 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2258 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2259 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2260 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2261 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2262
2263 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2264 if (m % 2 == 0) {
2265 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2266 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2267 } else {
2268 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2269 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2270 }
2271
2272 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2273
2274 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2275 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2276 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2277 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2278 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2279 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2280 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2281
2282 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2283 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2284 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2285 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2286 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2287 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2288 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2289
2290 /* Now the texture coordinates are in tmp.xy */
2291
2292 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2293 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2294 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2295 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2296 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2297 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2298 bumpenvlscale, bumpenvloffset);
2299 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2300 }
2301
2302 tx->info->bumpenvmat_needed = 1;
2303
2304 return D3D_OK;
2305 }
2306
2307 DECL_SPECIAL(TEXREG2AR)
2308 {
2309 struct ureg_program *ureg = tx->ureg;
2310 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2311 struct ureg_src sample;
2312 const int m = tx->insn.dst[0].idx;
2313 const int n = tx->insn.src[0].idx;
2314 assert(m >= 0 && m > n);
2315
2316 sample = ureg_DECL_sampler(ureg, m);
2317 tx->info->sampler_mask |= 1 << m;
2318 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2319
2320 return D3D_OK;
2321 }
2322
2323 DECL_SPECIAL(TEXREG2GB)
2324 {
2325 struct ureg_program *ureg = tx->ureg;
2326 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2327 struct ureg_src sample;
2328 const int m = tx->insn.dst[0].idx;
2329 const int n = tx->insn.src[0].idx;
2330 assert(m >= 0 && m > n);
2331
2332 sample = ureg_DECL_sampler(ureg, m);
2333 tx->info->sampler_mask |= 1 << m;
2334 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2335
2336 return D3D_OK;
2337 }
2338
2339 DECL_SPECIAL(TEXM3x2PAD)
2340 {
2341 return D3D_OK; /* this is just padding */
2342 }
2343
2344 DECL_SPECIAL(TEXM3x2TEX)
2345 {
2346 struct ureg_program *ureg = tx->ureg;
2347 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2348 struct ureg_src sample;
2349 const int m = tx->insn.dst[0].idx - 1;
2350 const int n = tx->insn.src[0].idx;
2351 assert(m >= 0 && m > n);
2352
2353 tx_texcoord_alloc(tx, m);
2354 tx_texcoord_alloc(tx, m+1);
2355
2356 /* performs the matrix multiplication */
2357 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2358 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2359
2360 sample = ureg_DECL_sampler(ureg, m + 1);
2361 tx->info->sampler_mask |= 1 << (m + 1);
2362 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2363
2364 return D3D_OK;
2365 }
2366
2367 DECL_SPECIAL(TEXM3x3PAD)
2368 {
2369 return D3D_OK; /* this is just padding */
2370 }
2371
2372 DECL_SPECIAL(TEXM3x3SPEC)
2373 {
2374 struct ureg_program *ureg = tx->ureg;
2375 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2376 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2377 struct ureg_src sample;
2378 struct ureg_dst tmp;
2379 const int m = tx->insn.dst[0].idx - 2;
2380 const int n = tx->insn.src[0].idx;
2381 assert(m >= 0 && m > n);
2382
2383 tx_texcoord_alloc(tx, m);
2384 tx_texcoord_alloc(tx, m+1);
2385 tx_texcoord_alloc(tx, m+2);
2386
2387 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2388 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2389 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2390
2391 sample = ureg_DECL_sampler(ureg, m + 2);
2392 tx->info->sampler_mask |= 1 << (m + 2);
2393 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2394
2395 /* At this step, dst = N = (u', w', z').
2396 * We want dst to be the texture sampled at (u'', w'', z''), with
2397 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2398 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2399 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2400 /* at this step tmp.x = 1/N.N */
2401 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2402 /* at this step tmp.y = N.E */
2403 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2404 /* at this step tmp.x = N.E/N.N */
2405 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2406 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2407 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2408 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2409 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2410
2411 return D3D_OK;
2412 }
2413
2414 DECL_SPECIAL(TEXREG2RGB)
2415 {
2416 struct ureg_program *ureg = tx->ureg;
2417 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2418 struct ureg_src sample;
2419 const int m = tx->insn.dst[0].idx;
2420 const int n = tx->insn.src[0].idx;
2421 assert(m >= 0 && m > n);
2422
2423 sample = ureg_DECL_sampler(ureg, m);
2424 tx->info->sampler_mask |= 1 << m;
2425 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2426
2427 return D3D_OK;
2428 }
2429
2430 DECL_SPECIAL(TEXDP3TEX)
2431 {
2432 struct ureg_program *ureg = tx->ureg;
2433 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2434 struct ureg_dst tmp;
2435 struct ureg_src sample;
2436 const int m = tx->insn.dst[0].idx;
2437 const int n = tx->insn.src[0].idx;
2438 assert(m >= 0 && m > n);
2439
2440 tx_texcoord_alloc(tx, m);
2441
2442 tmp = tx_scratch(tx);
2443 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2444 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2445
2446 sample = ureg_DECL_sampler(ureg, m);
2447 tx->info->sampler_mask |= 1 << m;
2448 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2449
2450 return D3D_OK;
2451 }
2452
2453 DECL_SPECIAL(TEXM3x2DEPTH)
2454 {
2455 struct ureg_program *ureg = tx->ureg;
2456 struct ureg_dst tmp;
2457 const int m = tx->insn.dst[0].idx - 1;
2458 const int n = tx->insn.src[0].idx;
2459 assert(m >= 0 && m > n);
2460
2461 tx_texcoord_alloc(tx, m);
2462 tx_texcoord_alloc(tx, m+1);
2463
2464 tmp = tx_scratch(tx);
2465
2466 /* performs the matrix multiplication */
2467 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2468 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2469
2470 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2471 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2472 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2473 /* res = 'w' == 0 ? 1.0 : z/w */
2474 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2475 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2476 /* replace the depth for depth testing with the result */
2477 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2478 TGSI_WRITEMASK_Z, 0, 1);
2479 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2480 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2481 return D3D_OK;
2482 }
2483
2484 DECL_SPECIAL(TEXDP3)
2485 {
2486 struct ureg_program *ureg = tx->ureg;
2487 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2488 const int m = tx->insn.dst[0].idx;
2489 const int n = tx->insn.src[0].idx;
2490 assert(m >= 0 && m > n);
2491
2492 tx_texcoord_alloc(tx, m);
2493
2494 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2495
2496 return D3D_OK;
2497 }
2498
2499 DECL_SPECIAL(TEXM3x3)
2500 {
2501 struct ureg_program *ureg = tx->ureg;
2502 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2503 struct ureg_src sample;
2504 struct ureg_dst E, tmp;
2505 const int m = tx->insn.dst[0].idx - 2;
2506 const int n = tx->insn.src[0].idx;
2507 assert(m >= 0 && m > n);
2508
2509 tx_texcoord_alloc(tx, m);
2510 tx_texcoord_alloc(tx, m+1);
2511 tx_texcoord_alloc(tx, m+2);
2512
2513 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2514 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2515 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2516
2517 switch (tx->insn.opcode) {
2518 case D3DSIO_TEXM3x3:
2519 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2520 break;
2521 case D3DSIO_TEXM3x3TEX:
2522 sample = ureg_DECL_sampler(ureg, m + 2);
2523 tx->info->sampler_mask |= 1 << (m + 2);
2524 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2525 break;
2526 case D3DSIO_TEXM3x3VSPEC:
2527 sample = ureg_DECL_sampler(ureg, m + 2);
2528 tx->info->sampler_mask |= 1 << (m + 2);
2529 E = tx_scratch(tx);
2530 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2531 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2532 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2533 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2534 /* At this step, dst = N = (u', w', z').
2535 * We want dst to be the texture sampled at (u'', w'', z''), with
2536 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2537 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2538 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2539 /* at this step tmp.x = 1/N.N */
2540 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2541 /* at this step tmp.y = N.E */
2542 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2543 /* at this step tmp.x = N.E/N.N */
2544 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2545 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2546 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2547 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2548 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2549 break;
2550 default:
2551 return D3DERR_INVALIDCALL;
2552 }
2553 return D3D_OK;
2554 }
2555
2556 DECL_SPECIAL(TEXDEPTH)
2557 {
2558 struct ureg_program *ureg = tx->ureg;
2559 struct ureg_dst r5;
2560 struct ureg_src r5r, r5g;
2561
2562 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2563
2564 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2565 * r5 won't be used afterward, thus we can use r5.ba */
2566 r5 = tx->regs.r[5];
2567 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2568 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2569
2570 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2571 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2572 /* r5.r = r/g */
2573 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2574 r5r, ureg_imm1f(ureg, 1.0f));
2575 /* replace the depth for depth testing with the result */
2576 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2577 TGSI_WRITEMASK_Z, 0, 1);
2578 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2579
2580 return D3D_OK;
2581 }
2582
2583 DECL_SPECIAL(BEM)
2584 {
2585 struct ureg_program *ureg = tx->ureg;
2586 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2587 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2588 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2589 struct ureg_src m00, m01, m10, m11;
2590 const int m = tx->insn.dst[0].idx;
2591 struct ureg_dst tmp;
2592 /*
2593 * Bump-env-matrix:
2594 * 00 is X
2595 * 01 is Y
2596 * 10 is Z
2597 * 11 is W
2598 */
2599 nine_info_mark_const_f_used(tx->info, 8 + m);
2600 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2601 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2602 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2603 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2604 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2605 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2606 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2607 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2608 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2609 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2610
2611 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2612 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2613 NINE_APPLY_SWIZZLE(src1, X), src0);
2614 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2615 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2616 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2617 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2618
2619 tx->info->bumpenvmat_needed = 1;
2620
2621 return D3D_OK;
2622 }
2623
2624 DECL_SPECIAL(TEXLD)
2625 {
2626 struct ureg_program *ureg = tx->ureg;
2627 unsigned target;
2628 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2629 struct ureg_src src[2] = {
2630 tx_src_param(tx, &tx->insn.src[0]),
2631 tx_src_param(tx, &tx->insn.src[1])
2632 };
2633 assert(tx->insn.src[1].idx >= 0 &&
2634 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2635 target = tx->sampler_targets[tx->insn.src[1].idx];
2636
2637 switch (tx->insn.flags) {
2638 case 0:
2639 ureg_TEX(ureg, dst, target, src[0], src[1]);
2640 break;
2641 case NINED3DSI_TEXLD_PROJECT:
2642 ureg_TXP(ureg, dst, target, src[0], src[1]);
2643 break;
2644 case NINED3DSI_TEXLD_BIAS:
2645 ureg_TXB(ureg, dst, target, src[0], src[1]);
2646 break;
2647 default:
2648 assert(0);
2649 return D3DERR_INVALIDCALL;
2650 }
2651 return D3D_OK;
2652 }
2653
2654 DECL_SPECIAL(TEXLD_14)
2655 {
2656 struct ureg_program *ureg = tx->ureg;
2657 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2658 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2659 const unsigned s = tx->insn.dst[0].idx;
2660 const unsigned t = ps1x_sampler_type(tx->info, s);
2661
2662 tx->info->sampler_mask |= 1 << s;
2663 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2664
2665 return D3D_OK;
2666 }
2667
2668 DECL_SPECIAL(TEX)
2669 {
2670 struct ureg_program *ureg = tx->ureg;
2671 const unsigned s = tx->insn.dst[0].idx;
2672 const unsigned t = ps1x_sampler_type(tx->info, s);
2673 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2674 struct ureg_src src[2];
2675
2676 tx_texcoord_alloc(tx, s);
2677
2678 src[0] = tx->regs.vT[s];
2679 src[1] = ureg_DECL_sampler(ureg, s);
2680 tx->info->sampler_mask |= 1 << s;
2681
2682 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2683
2684 return D3D_OK;
2685 }
2686
2687 DECL_SPECIAL(TEXLDD)
2688 {
2689 unsigned target;
2690 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2691 struct ureg_src src[4] = {
2692 tx_src_param(tx, &tx->insn.src[0]),
2693 tx_src_param(tx, &tx->insn.src[1]),
2694 tx_src_param(tx, &tx->insn.src[2]),
2695 tx_src_param(tx, &tx->insn.src[3])
2696 };
2697 assert(tx->insn.src[1].idx >= 0 &&
2698 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2699 target = tx->sampler_targets[tx->insn.src[1].idx];
2700
2701 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2702 return D3D_OK;
2703 }
2704
2705 DECL_SPECIAL(TEXLDL)
2706 {
2707 unsigned target;
2708 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2709 struct ureg_src src[2] = {
2710 tx_src_param(tx, &tx->insn.src[0]),
2711 tx_src_param(tx, &tx->insn.src[1])
2712 };
2713 assert(tx->insn.src[1].idx >= 0 &&
2714 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2715 target = tx->sampler_targets[tx->insn.src[1].idx];
2716
2717 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2718 return D3D_OK;
2719 }
2720
2721 DECL_SPECIAL(SETP)
2722 {
2723 STUB(D3DERR_INVALIDCALL);
2724 }
2725
2726 DECL_SPECIAL(BREAKP)
2727 {
2728 STUB(D3DERR_INVALIDCALL);
2729 }
2730
2731 DECL_SPECIAL(PHASE)
2732 {
2733 return D3D_OK; /* we don't care about phase */
2734 }
2735
2736 DECL_SPECIAL(COMMENT)
2737 {
2738 return D3D_OK; /* nothing to do */
2739 }
2740
2741
2742 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2743 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2744
2745 struct sm1_op_info inst_table[] =
2746 {
2747 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2748 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2749 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2750 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2751 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2752 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2753 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2754 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2755 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2756 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2757 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2758 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2759 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2760 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2761 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2762 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2763 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2764 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2765 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2766 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2767
2768 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2769 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2770 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2771 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2772 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2773
2774 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2775 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2776 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2777 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2778 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2779 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2780
2781 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2782
2783 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2784 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2785 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2786 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2787 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2788
2789 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2790 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2791
2792 /* More flow control */
2793 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2794 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2795 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2796 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2797 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2798 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2799 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2800 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2801 /* we don't write to the address register, but a normal register (copied
2802 * when needed to the address register), thus we don't use ARR */
2803 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2804
2805 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2806 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2807
2808 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2809 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2810 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2811 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2812 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2813 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2814 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2815 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2816 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2817 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2818 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2819 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2820 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2821 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2822 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2823 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2824
2825 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2826 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2827 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2828 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2829
2830 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2831
2832 /* More tex stuff */
2833 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2834 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2835 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2836 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2837 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2838 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2839
2840 /* Misc */
2841 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2842 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2843 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2844 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2845 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2846 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2847 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2848 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2849 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2850 };
2851
2852 struct sm1_op_info inst_phase =
2853 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2854
2855 struct sm1_op_info inst_comment =
2856 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2857
2858 static void
2859 create_op_info_map(struct shader_translator *tx)
2860 {
2861 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2862 unsigned i;
2863
2864 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
2865 tx->op_info_map[i] = -1;
2866
2867 if (tx->processor == PIPE_SHADER_VERTEX) {
2868 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
2869 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
2870 if (inst_table[i].vert_version.min <= version &&
2871 inst_table[i].vert_version.max >= version)
2872 tx->op_info_map[inst_table[i].sio] = i;
2873 }
2874 } else {
2875 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
2876 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
2877 if (inst_table[i].frag_version.min <= version &&
2878 inst_table[i].frag_version.max >= version)
2879 tx->op_info_map[inst_table[i].sio] = i;
2880 }
2881 }
2882 }
2883
2884 static inline HRESULT
2885 NineTranslateInstruction_Generic(struct shader_translator *tx)
2886 {
2887 struct ureg_dst dst[1];
2888 struct ureg_src src[4];
2889 unsigned i;
2890
2891 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
2892 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2893 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
2894 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2895
2896 ureg_insn(tx->ureg, tx->insn.info->opcode,
2897 dst, tx->insn.ndst,
2898 src, tx->insn.nsrc);
2899 return D3D_OK;
2900 }
2901
2902 static inline DWORD
2903 TOKEN_PEEK(struct shader_translator *tx)
2904 {
2905 return *(tx->parse);
2906 }
2907
2908 static inline DWORD
2909 TOKEN_NEXT(struct shader_translator *tx)
2910 {
2911 return *(tx->parse)++;
2912 }
2913
2914 static inline void
2915 TOKEN_JUMP(struct shader_translator *tx)
2916 {
2917 if (tx->parse_next && tx->parse != tx->parse_next) {
2918 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2919 tx->parse = tx->parse_next;
2920 }
2921 }
2922
2923 static inline boolean
2924 sm1_parse_eof(struct shader_translator *tx)
2925 {
2926 return TOKEN_PEEK(tx) == NINED3DSP_END;
2927 }
2928
2929 static void
2930 sm1_read_version(struct shader_translator *tx)
2931 {
2932 const DWORD tok = TOKEN_NEXT(tx);
2933
2934 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2935 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2936
2937 switch (tok >> 16) {
2938 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
2939 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
2940 default:
2941 DBG("Invalid shader type: %x\n", tok);
2942 tx->processor = ~0;
2943 break;
2944 }
2945 }
2946
2947 /* This is just to check if we parsed the instruction properly. */
2948 static void
2949 sm1_parse_get_skip(struct shader_translator *tx)
2950 {
2951 const DWORD tok = TOKEN_PEEK(tx);
2952
2953 if (tx->version.major >= 2) {
2954 tx->parse_next = tx->parse + 1 /* this */ +
2955 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2956 } else {
2957 tx->parse_next = NULL; /* TODO: determine from param count */
2958 }
2959 }
2960
2961 static void
2962 sm1_print_comment(const char *comment, UINT size)
2963 {
2964 if (!size)
2965 return;
2966 /* TODO */
2967 }
2968
2969 static void
2970 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2971 {
2972 DWORD tok = TOKEN_PEEK(tx);
2973
2974 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2975 {
2976 const char *comment = "";
2977 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2978 tx->parse += size + 1;
2979
2980 if (print)
2981 sm1_print_comment(comment, size);
2982
2983 tok = TOKEN_PEEK(tx);
2984 }
2985 }
2986
2987 static void
2988 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2989 {
2990 *reg = TOKEN_NEXT(tx);
2991
2992 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2993 {
2994 if (tx->version.major < 2)
2995 *rel = (1 << 31) |
2996 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2997 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2998 D3DSP_NOSWIZZLE;
2999 else
3000 *rel = TOKEN_NEXT(tx);
3001 }
3002 }
3003
3004 static void
3005 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3006 {
3007 uint8_t shift;
3008 dst->file =
3009 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3010 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3011 dst->type = TGSI_RETURN_TYPE_FLOAT;
3012 dst->idx = tok & D3DSP_REGNUM_MASK;
3013 dst->rel = NULL;
3014 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3015 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3016 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3017 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
3018 }
3019
3020 static void
3021 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3022 {
3023 src->file =
3024 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3025 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3026 src->type = TGSI_RETURN_TYPE_FLOAT;
3027 src->idx = tok & D3DSP_REGNUM_MASK;
3028 src->rel = NULL;
3029 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3030 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3031
3032 switch (src->file) {
3033 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3034 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3035 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3036 default:
3037 break;
3038 }
3039 }
3040
3041 static void
3042 sm1_parse_immediate(struct shader_translator *tx,
3043 struct sm1_src_param *imm)
3044 {
3045 imm->file = NINED3DSPR_IMMEDIATE;
3046 imm->idx = INT_MIN;
3047 imm->rel = NULL;
3048 imm->swizzle = NINED3DSP_NOSWIZZLE;
3049 imm->mod = 0;
3050 switch (tx->insn.opcode) {
3051 case D3DSIO_DEF:
3052 imm->type = NINED3DSPTYPE_FLOAT4;
3053 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3054 tx->parse += 4;
3055 break;
3056 case D3DSIO_DEFI:
3057 imm->type = NINED3DSPTYPE_INT4;
3058 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3059 tx->parse += 4;
3060 break;
3061 case D3DSIO_DEFB:
3062 imm->type = NINED3DSPTYPE_BOOL;
3063 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3064 tx->parse += 1;
3065 break;
3066 default:
3067 assert(0);
3068 break;
3069 }
3070 }
3071
3072 static void
3073 sm1_read_dst_param(struct shader_translator *tx,
3074 struct sm1_dst_param *dst,
3075 struct sm1_src_param *rel)
3076 {
3077 DWORD tok_dst, tok_rel = 0;
3078
3079 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3080 sm1_parse_dst_param(dst, tok_dst);
3081 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3082 sm1_parse_src_param(rel, tok_rel);
3083 dst->rel = rel;
3084 }
3085 }
3086
3087 static void
3088 sm1_read_src_param(struct shader_translator *tx,
3089 struct sm1_src_param *src,
3090 struct sm1_src_param *rel)
3091 {
3092 DWORD tok_src, tok_rel = 0;
3093
3094 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3095 sm1_parse_src_param(src, tok_src);
3096 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3097 assert(rel);
3098 sm1_parse_src_param(rel, tok_rel);
3099 src->rel = rel;
3100 }
3101 }
3102
3103 static void
3104 sm1_read_semantic(struct shader_translator *tx,
3105 struct sm1_semantic *sem)
3106 {
3107 const DWORD tok_usg = TOKEN_NEXT(tx);
3108 const DWORD tok_dst = TOKEN_NEXT(tx);
3109
3110 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3111 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3112 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3113
3114 sm1_parse_dst_param(&sem->reg, tok_dst);
3115 }
3116
3117 static void
3118 sm1_parse_instruction(struct shader_translator *tx)
3119 {
3120 struct sm1_instruction *insn = &tx->insn;
3121 HRESULT hr;
3122 DWORD tok;
3123 struct sm1_op_info *info = NULL;
3124 unsigned i;
3125
3126 sm1_parse_comments(tx, TRUE);
3127 sm1_parse_get_skip(tx);
3128
3129 tok = TOKEN_NEXT(tx);
3130
3131 insn->opcode = tok & D3DSI_OPCODE_MASK;
3132 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3133 insn->coissue = !!(tok & D3DSI_COISSUE);
3134 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3135
3136 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3137 int k = tx->op_info_map[insn->opcode];
3138 if (k >= 0) {
3139 assert(k < ARRAY_SIZE(inst_table));
3140 info = &inst_table[k];
3141 }
3142 } else {
3143 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3144 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3145 }
3146 if (!info) {
3147 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3148 TOKEN_JUMP(tx);
3149 return;
3150 }
3151 insn->info = info;
3152 insn->ndst = info->ndst;
3153 insn->nsrc = info->nsrc;
3154
3155 assert(!insn->predicated && "TODO: predicated instructions");
3156
3157 /* check version */
3158 {
3159 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3160 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3161 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3162 if (ver < min || ver > max) {
3163 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3164 min, ver, max);
3165 return;
3166 }
3167 }
3168
3169 for (i = 0; i < insn->ndst; ++i)
3170 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3171 if (insn->predicated)
3172 sm1_read_src_param(tx, &insn->pred, NULL);
3173 for (i = 0; i < insn->nsrc; ++i)
3174 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3175
3176 /* parse here so we can dump them before processing */
3177 if (insn->opcode == D3DSIO_DEF ||
3178 insn->opcode == D3DSIO_DEFI ||
3179 insn->opcode == D3DSIO_DEFB)
3180 sm1_parse_immediate(tx, &tx->insn.src[0]);
3181
3182 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3183 sm1_instruction_check(insn);
3184
3185 if (info->handler)
3186 hr = info->handler(tx);
3187 else
3188 hr = NineTranslateInstruction_Generic(tx);
3189 tx_apply_dst0_modifiers(tx);
3190
3191 if (hr != D3D_OK)
3192 tx->failure = TRUE;
3193 tx->num_scratch = 0; /* reset */
3194
3195 TOKEN_JUMP(tx);
3196 }
3197
3198 static void
3199 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3200 {
3201 unsigned i;
3202
3203 tx->info = info;
3204
3205 tx->byte_code = info->byte_code;
3206 tx->parse = info->byte_code;
3207
3208 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3209 info->input_map[i] = NINE_DECLUSAGE_NONE;
3210 info->num_inputs = 0;
3211
3212 info->position_t = FALSE;
3213 info->point_size = FALSE;
3214
3215 tx->info->const_float_slots = 0;
3216 tx->info->const_int_slots = 0;
3217 tx->info->const_bool_slots = 0;
3218
3219 info->sampler_mask = 0x0;
3220 info->rt_mask = 0x0;
3221
3222 info->lconstf.data = NULL;
3223 info->lconstf.ranges = NULL;
3224
3225 info->bumpenvmat_needed = 0;
3226
3227 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3228 tx->regs.rL[i] = ureg_dst_undef();
3229 }
3230 tx->regs.address = ureg_dst_undef();
3231 tx->regs.a0 = ureg_dst_undef();
3232 tx->regs.p = ureg_dst_undef();
3233 tx->regs.oDepth = ureg_dst_undef();
3234 tx->regs.vPos = ureg_src_undef();
3235 tx->regs.vFace = ureg_src_undef();
3236 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3237 tx->regs.o[i] = ureg_dst_undef();
3238 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3239 tx->regs.oCol[i] = ureg_dst_undef();
3240 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3241 tx->regs.vC[i] = ureg_src_undef();
3242 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3243 tx->regs.vT[i] = ureg_src_undef();
3244
3245 for (i = 0; i < ARRAY_SIZE(tx->lconsti); ++i)
3246 tx->lconsti[i].idx = -1;
3247 for (i = 0; i < ARRAY_SIZE(tx->lconstb); ++i)
3248 tx->lconstb[i].idx = -1;
3249
3250 sm1_read_version(tx);
3251
3252 info->version = (tx->version.major << 4) | tx->version.minor;
3253
3254 create_op_info_map(tx);
3255 }
3256
3257 static void
3258 tx_dtor(struct shader_translator *tx)
3259 {
3260 if (tx->num_inst_labels)
3261 FREE(tx->inst_labels);
3262 FREE(tx->lconstf);
3263 FREE(tx->regs.r);
3264 FREE(tx);
3265 }
3266
3267 static void
3268 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3269 {
3270 struct ureg_program *ureg = tx->ureg;
3271 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3272 struct ureg_src fog_end, fog_coeff, fog_density;
3273 struct ureg_src fog_vs, depth, fog_color;
3274 struct ureg_dst fog_factor;
3275
3276 if (!tx->info->fog_enable) {
3277 ureg_MOV(ureg, oCol0, src_col);
3278 return;
3279 }
3280
3281 if (tx->info->fog_mode != D3DFOG_NONE) {
3282 depth = nine_get_position_input(tx);
3283 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3284 }
3285
3286 nine_info_mark_const_f_used(tx->info, 33);
3287 fog_color = NINE_CONSTANT_SRC(32);
3288 fog_factor = tx_scratch_scalar(tx);
3289
3290 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3291 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3292 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3293 ureg_SUB(ureg, fog_factor, fog_end, depth);
3294 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3295 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3296 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3297 ureg_MUL(ureg, fog_factor, depth, fog_density);
3298 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3299 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3300 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3301 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3302 ureg_MUL(ureg, fog_factor, depth, fog_density);
3303 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3304 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3305 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3306 } else {
3307 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3308 TGSI_INTERPOLATE_PERSPECTIVE),
3309 TGSI_SWIZZLE_X);
3310 ureg_MOV(ureg, fog_factor, fog_vs);
3311 }
3312
3313 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3314 tx_src_scalar(fog_factor), src_col, fog_color);
3315 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3316 }
3317
3318 #define GET_CAP(n) device->screen->get_param( \
3319 device->screen, PIPE_CAP_##n)
3320 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3321 device->screen, info->type, PIPE_SHADER_CAP_##n)
3322
3323 HRESULT
3324 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3325 {
3326 struct shader_translator *tx;
3327 HRESULT hr = D3D_OK;
3328 const unsigned processor = info->type;
3329 unsigned s, slot_max;
3330 unsigned max_const_f;
3331
3332 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3333
3334 tx = CALLOC_STRUCT(shader_translator);
3335 if (!tx)
3336 return E_OUTOFMEMORY;
3337 tx_ctor(tx, info);
3338
3339 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3340 hr = D3DERR_INVALIDCALL;
3341 DBG("Unsupported shader version: %u.%u !\n",
3342 tx->version.major, tx->version.minor);
3343 goto out;
3344 }
3345 if (tx->processor != processor) {
3346 hr = D3DERR_INVALIDCALL;
3347 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3348 goto out;
3349 }
3350 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3351 tx->version.major, tx->version.minor);
3352
3353 tx->ureg = ureg_create(processor);
3354 if (!tx->ureg) {
3355 hr = E_OUTOFMEMORY;
3356 goto out;
3357 }
3358
3359 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3360 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3361 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3362 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3363 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3364 tx->texcoord_sn = tx->want_texcoord ?
3365 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3366 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3367 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3368
3369 if (IS_VS) {
3370 tx->num_constf_allowed = NINE_MAX_CONST_F;
3371 } else if (tx->version.major < 2) {/* IS_PS v1 */
3372 tx->num_constf_allowed = 8;
3373 } else if (tx->version.major == 2) {/* IS_PS v2 */
3374 tx->num_constf_allowed = 32;
3375 } else {/* IS_PS v3 */
3376 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3377 }
3378
3379 if (tx->version.major < 2) {
3380 tx->num_consti_allowed = 0;
3381 tx->num_constb_allowed = 0;
3382 } else {
3383 tx->num_consti_allowed = NINE_MAX_CONST_I;
3384 tx->num_constb_allowed = NINE_MAX_CONST_B;
3385 }
3386
3387 /* VS must always write position. Declare it here to make it the 1st output.
3388 * (Some drivers like nv50 are buggy and rely on that.)
3389 */
3390 if (IS_VS) {
3391 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3392 } else {
3393 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3394 if (!tx->shift_wpos)
3395 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3396 }
3397
3398 while (!sm1_parse_eof(tx) && !tx->failure)
3399 sm1_parse_instruction(tx);
3400 tx->parse++; /* for byte_size */
3401
3402 if (tx->failure) {
3403 ERR("Encountered buggy shader\n");
3404 ureg_destroy(tx->ureg);
3405 hr = D3DERR_INVALIDCALL;
3406 goto out;
3407 }
3408
3409 if (IS_PS && tx->version.major < 3) {
3410 if (tx->version.major < 2) {
3411 assert(tx->num_temp); /* there must be color output */
3412 info->rt_mask |= 0x1;
3413 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3414 } else {
3415 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3416 }
3417 }
3418
3419 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3420 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3421 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3422 }
3423
3424 if (info->position_t)
3425 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3426
3427 ureg_END(tx->ureg);
3428
3429 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3430 info->point_size = TRUE;
3431
3432 /* record local constants */
3433 if (tx->num_lconstf && tx->indirect_const_access) {
3434 struct nine_range *ranges;
3435 float *data;
3436 int *indices;
3437 unsigned i, k, n;
3438
3439 hr = E_OUTOFMEMORY;
3440
3441 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3442 if (!data)
3443 goto out;
3444 info->lconstf.data = data;
3445
3446 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3447 if (!indices)
3448 goto out;
3449
3450 /* lazy sort, num_lconstf should be small */
3451 for (n = 0; n < tx->num_lconstf; ++n) {
3452 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3453 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3454 k = i;
3455 }
3456 indices[n] = tx->lconstf[k].idx;
3457 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3458 tx->lconstf[k].idx = INT_MAX;
3459 }
3460
3461 /* count ranges */
3462 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3463 if (indices[i] != indices[i - 1] + 1)
3464 ++n;
3465 ranges = MALLOC(n * sizeof(ranges[0]));
3466 if (!ranges) {
3467 FREE(indices);
3468 goto out;
3469 }
3470 info->lconstf.ranges = ranges;
3471
3472 k = 0;
3473 ranges[k].bgn = indices[0];
3474 for (i = 1; i < tx->num_lconstf; ++i) {
3475 if (indices[i] != indices[i - 1] + 1) {
3476 ranges[k].next = &ranges[k + 1];
3477 ranges[k].end = indices[i - 1] + 1;
3478 ++k;
3479 ranges[k].bgn = indices[i];
3480 }
3481 }
3482 ranges[k].end = indices[i - 1] + 1;
3483 ranges[k].next = NULL;
3484 assert(n == (k + 1));
3485
3486 FREE(indices);
3487 hr = D3D_OK;
3488 }
3489
3490 /* r500 */
3491 if (info->const_float_slots > device->max_vs_const_f &&
3492 (info->const_int_slots || info->const_bool_slots))
3493 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3494
3495
3496 if (tx->indirect_const_access) /* vs only */
3497 info->const_float_slots = device->max_vs_const_f;
3498
3499 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3500 slot_max = info->const_bool_slots > 0 ?
3501 max_const_f + NINE_MAX_CONST_I
3502 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3503 info->const_int_slots > 0 ?
3504 max_const_f + info->const_int_slots :
3505 info->const_float_slots;
3506
3507 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3508
3509 for (s = 0; s < slot_max; s++)
3510 ureg_DECL_constant(tx->ureg, s);
3511
3512 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3513 unsigned count;
3514 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3515 tgsi_dump(toks, 0);
3516 ureg_free_tokens(toks);
3517 }
3518
3519 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3520 if (!info->cso) {
3521 hr = D3DERR_DRIVERINTERNALERROR;
3522 FREE(info->lconstf.data);
3523 FREE(info->lconstf.ranges);
3524 goto out;
3525 }
3526
3527 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3528 out:
3529 tx_dtor(tx);
3530 return hr;
3531 }