Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
100
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
104
105 /*
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
112 * X2 = PS 1.4 (2x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
119 */
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
134
135 static const char *sm1_mod_str[] =
136 {
137 [NINED3DSPSM_NONE] = "",
138 [NINED3DSPSM_NEG] = "-",
139 [NINED3DSPSM_BIAS] = "bias",
140 [NINED3DSPSM_BIASNEG] = "biasneg",
141 [NINED3DSPSM_SIGN] = "sign",
142 [NINED3DSPSM_SIGNNEG] = "signneg",
143 [NINED3DSPSM_COMP] = "comp",
144 [NINED3DSPSM_X2] = "x2",
145 [NINED3DSPSM_X2NEG] = "x2neg",
146 [NINED3DSPSM_DZ] = "dz",
147 [NINED3DSPSM_DW] = "dw",
148 [NINED3DSPSM_ABS] = "abs",
149 [NINED3DSPSM_ABSNEG] = "-abs",
150 [NINED3DSPSM_NOT] = "not"
151 };
152
153 static void
154 sm1_dump_writemask(BYTE mask)
155 {
156 if (mask & 1) DUMP("x"); else DUMP("_");
157 if (mask & 2) DUMP("y"); else DUMP("_");
158 if (mask & 4) DUMP("z"); else DUMP("_");
159 if (mask & 8) DUMP("w"); else DUMP("_");
160 }
161
162 static void
163 sm1_dump_swizzle(BYTE s)
164 {
165 char c[4] = { 'x', 'y', 'z', 'w' };
166 DUMP("%c%c%c%c",
167 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
168 }
169
170 static const char sm1_file_char[] =
171 {
172 [D3DSPR_TEMP] = 'r',
173 [D3DSPR_INPUT] = 'v',
174 [D3DSPR_CONST] = 'c',
175 [D3DSPR_ADDR] = 'A',
176 [D3DSPR_RASTOUT] = 'R',
177 [D3DSPR_ATTROUT] = 'D',
178 [D3DSPR_OUTPUT] = 'o',
179 [D3DSPR_CONSTINT] = 'I',
180 [D3DSPR_COLOROUT] = 'C',
181 [D3DSPR_DEPTHOUT] = 'D',
182 [D3DSPR_SAMPLER] = 's',
183 [D3DSPR_CONST2] = 'c',
184 [D3DSPR_CONST3] = 'c',
185 [D3DSPR_CONST4] = 'c',
186 [D3DSPR_CONSTBOOL] = 'B',
187 [D3DSPR_LOOP] = 'L',
188 [D3DSPR_TEMPFLOAT16] = 'h',
189 [D3DSPR_MISCTYPE] = 'M',
190 [D3DSPR_LABEL] = 'X',
191 [D3DSPR_PREDICATE] = 'p'
192 };
193
194 static void
195 sm1_dump_reg(BYTE file, INT index)
196 {
197 switch (file) {
198 case D3DSPR_LOOP:
199 DUMP("aL");
200 break;
201 case D3DSPR_COLOROUT:
202 DUMP("oC%i", index);
203 break;
204 case D3DSPR_DEPTHOUT:
205 DUMP("oDepth");
206 break;
207 case D3DSPR_RASTOUT:
208 DUMP("oRast%i", index);
209 break;
210 case D3DSPR_CONSTINT:
211 DUMP("iconst[%i]", index);
212 break;
213 case D3DSPR_CONSTBOOL:
214 DUMP("bconst[%i]", index);
215 break;
216 default:
217 DUMP("%c%i", sm1_file_char[file], index);
218 break;
219 }
220 }
221
222 struct sm1_src_param
223 {
224 INT idx;
225 struct sm1_src_param *rel;
226 BYTE file;
227 BYTE swizzle;
228 BYTE mod;
229 BYTE type;
230 union {
231 DWORD d[4];
232 float f[4];
233 int i[4];
234 BOOL b;
235 } imm;
236 };
237 static void
238 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
240 struct sm1_dst_param
241 {
242 INT idx;
243 struct sm1_src_param *rel;
244 BYTE file;
245 BYTE mask;
246 BYTE mod;
247 int8_t shift; /* sint4 */
248 BYTE type;
249 };
250
251 static inline void
252 assert_replicate_swizzle(const struct ureg_src *reg)
253 {
254 assert(reg->SwizzleY == reg->SwizzleX &&
255 reg->SwizzleZ == reg->SwizzleX &&
256 reg->SwizzleW == reg->SwizzleX);
257 }
258
259 static void
260 sm1_dump_immediate(const struct sm1_src_param *param)
261 {
262 switch (param->type) {
263 case NINED3DSPTYPE_FLOAT4:
264 DUMP("{ %f %f %f %f }",
265 param->imm.f[0], param->imm.f[1],
266 param->imm.f[2], param->imm.f[3]);
267 break;
268 case NINED3DSPTYPE_INT4:
269 DUMP("{ %i %i %i %i }",
270 param->imm.i[0], param->imm.i[1],
271 param->imm.i[2], param->imm.i[3]);
272 break;
273 case NINED3DSPTYPE_BOOL:
274 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
275 break;
276 default:
277 assert(0);
278 break;
279 }
280 }
281
282 static void
283 sm1_dump_src_param(const struct sm1_src_param *param)
284 {
285 if (param->file == NINED3DSPR_IMMEDIATE) {
286 assert(!param->mod &&
287 !param->rel &&
288 param->swizzle == NINED3DSP_NOSWIZZLE);
289 sm1_dump_immediate(param);
290 return;
291 }
292
293 if (param->mod)
294 DUMP("%s(", sm1_mod_str[param->mod]);
295 if (param->rel) {
296 DUMP("%c[", sm1_file_char[param->file]);
297 sm1_dump_src_param(param->rel);
298 DUMP("+%i]", param->idx);
299 } else {
300 sm1_dump_reg(param->file, param->idx);
301 }
302 if (param->mod)
303 DUMP(")");
304 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
305 DUMP(".");
306 sm1_dump_swizzle(param->swizzle);
307 }
308 }
309
310 static void
311 sm1_dump_dst_param(const struct sm1_dst_param *param)
312 {
313 if (param->mod & NINED3DSPDM_SATURATE)
314 DUMP("sat ");
315 if (param->mod & NINED3DSPDM_PARTIALP)
316 DUMP("pp ");
317 if (param->mod & NINED3DSPDM_CENTROID)
318 DUMP("centroid ");
319 if (param->shift < 0)
320 DUMP("/%u ", 1 << -param->shift);
321 if (param->shift > 0)
322 DUMP("*%u ", 1 << param->shift);
323
324 if (param->rel) {
325 DUMP("%c[", sm1_file_char[param->file]);
326 sm1_dump_src_param(param->rel);
327 DUMP("+%i]", param->idx);
328 } else {
329 sm1_dump_reg(param->file, param->idx);
330 }
331 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
332 DUMP(".");
333 sm1_dump_writemask(param->mask);
334 }
335 }
336
337 struct sm1_semantic
338 {
339 struct sm1_dst_param reg;
340 BYTE sampler_type;
341 D3DDECLUSAGE usage;
342 BYTE usage_idx;
343 };
344
345 struct sm1_op_info
346 {
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
349 unsigned sio;
350 unsigned opcode; /* TGSI_OPCODE_x */
351
352 /* versions are still set even handler is set */
353 struct {
354 unsigned min;
355 unsigned max;
356 } vert_version, frag_version;
357
358 /* number of regs parsed outside of special handler */
359 unsigned ndst;
360 unsigned nsrc;
361
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler;
364 };
365
366 struct sm1_instruction
367 {
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
369 BYTE flags;
370 BOOL coissue;
371 BOOL predicated;
372 BYTE ndst;
373 BYTE nsrc;
374 struct sm1_src_param src[4];
375 struct sm1_src_param src_rel[4];
376 struct sm1_src_param pred;
377 struct sm1_src_param dst_rel[1];
378 struct sm1_dst_param dst[1];
379
380 struct sm1_op_info *info;
381 };
382
383 static void
384 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
385 {
386 unsigned i;
387
388 /* no info stored for these: */
389 if (insn->opcode == D3DSIO_DCL)
390 return;
391 for (i = 0; i < indent; ++i)
392 DUMP(" ");
393
394 if (insn->predicated) {
395 DUMP("@");
396 sm1_dump_src_param(&insn->pred);
397 DUMP(" ");
398 }
399 DUMP("%s", d3dsio_to_string(insn->opcode));
400 if (insn->flags) {
401 switch (insn->opcode) {
402 case D3DSIO_TEX:
403 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
404 break;
405 default:
406 DUMP("_%x", insn->flags);
407 break;
408 }
409 }
410 if (insn->coissue)
411 DUMP("_co");
412 DUMP(" ");
413
414 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
415 sm1_dump_dst_param(&insn->dst[i]);
416 DUMP(" ");
417 }
418
419 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
420 sm1_dump_src_param(&insn->src[i]);
421 DUMP(" ");
422 }
423 if (insn->opcode == D3DSIO_DEF ||
424 insn->opcode == D3DSIO_DEFI ||
425 insn->opcode == D3DSIO_DEFB)
426 sm1_dump_immediate(&insn->src[0]);
427
428 DUMP("\n");
429 }
430
431 struct sm1_local_const
432 {
433 INT idx;
434 struct ureg_src reg;
435 union {
436 boolean b;
437 float f[4];
438 int32_t i[4];
439 } imm;
440 };
441
442 struct shader_translator
443 {
444 const DWORD *byte_code;
445 const DWORD *parse;
446 const DWORD *parse_next;
447
448 struct ureg_program *ureg;
449
450 /* shader version */
451 struct {
452 BYTE major;
453 BYTE minor;
454 } version;
455 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
456 unsigned num_constf_allowed;
457 unsigned num_consti_allowed;
458 unsigned num_constb_allowed;
459
460 boolean native_integers;
461 boolean inline_subroutines;
462 boolean lower_preds;
463 boolean want_texcoord;
464 boolean shift_wpos;
465 unsigned texcoord_sn;
466
467 struct sm1_instruction insn; /* current instruction */
468
469 struct {
470 struct ureg_dst *r;
471 struct ureg_dst oPos;
472 struct ureg_dst oFog;
473 struct ureg_dst oPts;
474 struct ureg_dst oCol[4];
475 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
476 struct ureg_dst oDepth;
477 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
478 struct ureg_src vPos;
479 struct ureg_src vFace;
480 struct ureg_src s;
481 struct ureg_dst p;
482 struct ureg_dst address;
483 struct ureg_dst a0;
484 struct ureg_dst tS[8]; /* texture stage registers */
485 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
486 struct ureg_dst t[5]; /* scratch TEMPs */
487 struct ureg_src vC[2]; /* PS color in */
488 struct ureg_src vT[8]; /* PS texcoord in */
489 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
490 } regs;
491 unsigned num_temp; /* Elements(regs.r) */
492 unsigned num_scratch;
493 unsigned loop_depth;
494 unsigned loop_depth_max;
495 unsigned cond_depth;
496 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
497 unsigned cond_labels[NINE_MAX_COND_DEPTH];
498 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
499
500 unsigned *inst_labels; /* LABEL op */
501 unsigned num_inst_labels;
502
503 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504
505 struct sm1_local_const *lconstf;
506 unsigned num_lconstf;
507 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
508 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
509
510 boolean indirect_const_access;
511 boolean failure;
512
513 struct nine_shader_info *info;
514
515 int16_t op_info_map[D3DSIO_BREAKP + 1];
516 };
517
518 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
519 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
520
521 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
522
523 static void
524 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
525
526 static void
527 sm1_instruction_check(const struct sm1_instruction *insn)
528 {
529 if (insn->opcode == D3DSIO_CRS)
530 {
531 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
532 {
533 DBG("CRS.mask.w\n");
534 }
535 }
536 }
537
538 static boolean
539 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
540 {
541 INT i;
542 if (index < 0 || index >= tx->num_constf_allowed) {
543 tx->failure = TRUE;
544 return FALSE;
545 }
546 for (i = 0; i < tx->num_lconstf; ++i) {
547 if (tx->lconstf[i].idx == index) {
548 *src = tx->lconstf[i].reg;
549 return TRUE;
550 }
551 }
552 return FALSE;
553 }
554 static boolean
555 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
556 {
557 if (index < 0 || index >= tx->num_consti_allowed) {
558 tx->failure = TRUE;
559 return FALSE;
560 }
561 if (tx->lconsti[index].idx == index)
562 *src = tx->lconsti[index].reg;
563 return tx->lconsti[index].idx == index;
564 }
565 static boolean
566 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
567 {
568 if (index < 0 || index >= tx->num_constb_allowed) {
569 tx->failure = TRUE;
570 return FALSE;
571 }
572 if (tx->lconstb[index].idx == index)
573 *src = tx->lconstb[index].reg;
574 return tx->lconstb[index].idx == index;
575 }
576
577 static void
578 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
579 {
580 unsigned n;
581
582 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
583
584 for (n = 0; n < tx->num_lconstf; ++n)
585 if (tx->lconstf[n].idx == index)
586 break;
587 if (n == tx->num_lconstf) {
588 if ((n % 8) == 0) {
589 tx->lconstf = REALLOC(tx->lconstf,
590 (n + 0) * sizeof(tx->lconstf[0]),
591 (n + 8) * sizeof(tx->lconstf[0]));
592 assert(tx->lconstf);
593 }
594 tx->num_lconstf++;
595 }
596 tx->lconstf[n].idx = index;
597 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
598
599 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
600 }
601 static void
602 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
603 {
604 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
605 tx->lconsti[index].idx = index;
606 tx->lconsti[index].reg = tx->native_integers ?
607 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
608 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
609 }
610 static void
611 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
612 {
613 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
614 tx->lconstb[index].idx = index;
615 tx->lconstb[index].reg = tx->native_integers ?
616 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
617 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
618 }
619
620 static inline struct ureg_dst
621 tx_scratch(struct shader_translator *tx)
622 {
623 if (tx->num_scratch >= Elements(tx->regs.t)) {
624 tx->failure = TRUE;
625 return tx->regs.t[0];
626 }
627 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
628 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
629 return tx->regs.t[tx->num_scratch++];
630 }
631
632 static inline struct ureg_dst
633 tx_scratch_scalar(struct shader_translator *tx)
634 {
635 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
636 }
637
638 static inline struct ureg_src
639 tx_src_scalar(struct ureg_dst dst)
640 {
641 struct ureg_src src = ureg_src(dst);
642 int c = ffs(dst.WriteMask) - 1;
643 if (dst.WriteMask == (1 << c))
644 src = ureg_scalar(src, c);
645 return src;
646 }
647
648 static inline void
649 tx_temp_alloc(struct shader_translator *tx, INT idx)
650 {
651 assert(idx >= 0);
652 if (idx >= tx->num_temp) {
653 unsigned k = tx->num_temp;
654 unsigned n = idx + 1;
655 tx->regs.r = REALLOC(tx->regs.r,
656 k * sizeof(tx->regs.r[0]),
657 n * sizeof(tx->regs.r[0]));
658 for (; k < n; ++k)
659 tx->regs.r[k] = ureg_dst_undef();
660 tx->num_temp = n;
661 }
662 if (ureg_dst_is_undef(tx->regs.r[idx]))
663 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
664 }
665
666 static inline void
667 tx_addr_alloc(struct shader_translator *tx, INT idx)
668 {
669 assert(idx == 0);
670 if (ureg_dst_is_undef(tx->regs.address))
671 tx->regs.address = ureg_DECL_address(tx->ureg);
672 if (ureg_dst_is_undef(tx->regs.a0))
673 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
674 }
675
676 static inline void
677 tx_pred_alloc(struct shader_translator *tx, INT idx)
678 {
679 assert(idx == 0);
680 if (ureg_dst_is_undef(tx->regs.p))
681 tx->regs.p = ureg_DECL_predicate(tx->ureg);
682 }
683
684 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
685 * the projection should be applied on the texture. It doesn't
686 * apply on texkill.
687 * The doc is very imprecise here (it says the projection is done
688 * before rasterization, thus in vs, which seems wrong since ps instructions
689 * are affected differently)
690 * For now we only apply to the ps TEX instruction and TEXBEM.
691 * Perhaps some other instructions would need it */
692 static inline void
693 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
694 struct ureg_src src, INT idx)
695 {
696 struct ureg_dst tmp;
697 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
698
699 /* no projection */
700 if (dim == 1) {
701 ureg_MOV(tx->ureg, dst, src);
702 } else {
703 tmp = tx_scratch_scalar(tx);
704 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
705 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
706 }
707 }
708
709 static inline void
710 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
711 unsigned target, struct ureg_src src0,
712 struct ureg_src src1, INT idx)
713 {
714 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
715 struct ureg_dst tmp;
716
717 /* dim == 1: no projection
718 * Looks like must be disabled when it makes no
719 * sense according the texture dimensions
720 */
721 if (dim == 1 || dim <= target) {
722 ureg_TEX(tx->ureg, dst, target, src0, src1);
723 } else if (dim == 4) {
724 ureg_TXP(tx->ureg, dst, target, src0, src1);
725 } else {
726 tmp = tx_scratch(tx);
727 apply_ps1x_projection(tx, tmp, src0, idx);
728 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
729 }
730 }
731
732 static inline void
733 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
734 {
735 assert(IS_PS);
736 assert(idx >= 0 && idx < Elements(tx->regs.vT));
737 if (ureg_src_is_undef(tx->regs.vT[idx]))
738 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
739 TGSI_INTERPOLATE_PERSPECTIVE);
740 }
741
742 static inline unsigned *
743 tx_bgnloop(struct shader_translator *tx)
744 {
745 tx->loop_depth++;
746 if (tx->loop_depth_max < tx->loop_depth)
747 tx->loop_depth_max = tx->loop_depth;
748 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
749 return &tx->loop_labels[tx->loop_depth - 1];
750 }
751
752 static inline unsigned *
753 tx_endloop(struct shader_translator *tx)
754 {
755 assert(tx->loop_depth);
756 tx->loop_depth--;
757 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
758 ureg_get_instruction_number(tx->ureg));
759 return &tx->loop_labels[tx->loop_depth];
760 }
761
762 static struct ureg_dst
763 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
764 {
765 const unsigned l = tx->loop_depth - 1;
766
767 if (!tx->loop_depth)
768 {
769 DBG("loop counter requested outside of loop\n");
770 return ureg_dst_undef();
771 }
772
773 if (ureg_dst_is_undef(tx->regs.rL[l])) {
774 /* loop or rep ctr creation */
775 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
776 tx->loop_or_rep[l] = loop_or_rep;
777 }
778 /* loop - rep - endloop - endrep not allowed */
779 assert(tx->loop_or_rep[l] == loop_or_rep);
780
781 return tx->regs.rL[l];
782 }
783
784 static struct ureg_src
785 tx_get_loopal(struct shader_translator *tx)
786 {
787 int loop_level = tx->loop_depth - 1;
788
789 while (loop_level >= 0) {
790 /* handle loop - rep - endrep - endloop case */
791 if (tx->loop_or_rep[loop_level])
792 /* the value is in the loop counter y component (nine implementation) */
793 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
794 loop_level--;
795 }
796
797 DBG("aL counter requested outside of loop\n");
798 return ureg_src_undef();
799 }
800
801 static inline unsigned *
802 tx_cond(struct shader_translator *tx)
803 {
804 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
805 tx->cond_depth++;
806 return &tx->cond_labels[tx->cond_depth - 1];
807 }
808
809 static inline unsigned *
810 tx_elsecond(struct shader_translator *tx)
811 {
812 assert(tx->cond_depth);
813 return &tx->cond_labels[tx->cond_depth - 1];
814 }
815
816 static inline void
817 tx_endcond(struct shader_translator *tx)
818 {
819 assert(tx->cond_depth);
820 tx->cond_depth--;
821 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
822 ureg_get_instruction_number(tx->ureg));
823 }
824
825 static inline struct ureg_dst
826 nine_ureg_dst_register(unsigned file, int index)
827 {
828 return ureg_dst(ureg_src_register(file, index));
829 }
830
831 static struct ureg_src
832 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
833 {
834 struct ureg_program *ureg = tx->ureg;
835 struct ureg_src src;
836 struct ureg_dst tmp;
837
838 switch (param->file)
839 {
840 case D3DSPR_TEMP:
841 assert(!param->rel);
842 tx_temp_alloc(tx, param->idx);
843 src = ureg_src(tx->regs.r[param->idx]);
844 break;
845 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
846 case D3DSPR_ADDR:
847 assert(!param->rel);
848 if (IS_VS) {
849 assert(param->idx == 0);
850 /* the address register (vs only) must be
851 * assigned before use */
852 assert(!ureg_dst_is_undef(tx->regs.a0));
853 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
854 src = ureg_src(tx->regs.address);
855 } else {
856 if (tx->version.major < 2 && tx->version.minor < 4) {
857 /* no subroutines, so should be defined */
858 src = ureg_src(tx->regs.tS[param->idx]);
859 } else {
860 tx_texcoord_alloc(tx, param->idx);
861 src = tx->regs.vT[param->idx];
862 }
863 }
864 break;
865 case D3DSPR_INPUT:
866 if (IS_VS) {
867 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
868 } else {
869 if (tx->version.major < 3) {
870 assert(!param->rel);
871 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
872 param->idx,
873 TGSI_INTERPOLATE_PERSPECTIVE);
874 } else {
875 assert(!param->rel); /* TODO */
876 assert(param->idx < Elements(tx->regs.v));
877 src = tx->regs.v[param->idx];
878 }
879 }
880 break;
881 case D3DSPR_PREDICATE:
882 assert(!param->rel);
883 tx_pred_alloc(tx, param->idx);
884 src = ureg_src(tx->regs.p);
885 break;
886 case D3DSPR_SAMPLER:
887 assert(param->mod == NINED3DSPSM_NONE);
888 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
889 assert(!param->rel);
890 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
891 break;
892 case D3DSPR_CONST:
893 assert(!param->rel || IS_VS);
894 if (param->rel)
895 tx->indirect_const_access = TRUE;
896 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
897 if (!param->rel)
898 nine_info_mark_const_f_used(tx->info, param->idx);
899 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
900 }
901 if (!IS_VS && tx->version.major < 2) {
902 /* ps 1.X clamps constants */
903 tmp = tx_scratch(tx);
904 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
905 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
906 src = ureg_src(tmp);
907 }
908 break;
909 case D3DSPR_CONST2:
910 case D3DSPR_CONST3:
911 case D3DSPR_CONST4:
912 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
913 assert(!"CONST2/3/4");
914 src = ureg_imm1f(ureg, 0.0f);
915 break;
916 case D3DSPR_CONSTINT:
917 /* relative adressing only possible for float constants in vs */
918 assert(!param->rel);
919 if (!tx_lconsti(tx, &src, param->idx)) {
920 nine_info_mark_const_i_used(tx->info, param->idx);
921 src = ureg_src_register(TGSI_FILE_CONSTANT,
922 tx->info->const_i_base + param->idx);
923 }
924 break;
925 case D3DSPR_CONSTBOOL:
926 assert(!param->rel);
927 if (!tx_lconstb(tx, &src, param->idx)) {
928 char r = param->idx / 4;
929 char s = param->idx & 3;
930 nine_info_mark_const_b_used(tx->info, param->idx);
931 src = ureg_src_register(TGSI_FILE_CONSTANT,
932 tx->info->const_b_base + r);
933 src = ureg_swizzle(src, s, s, s, s);
934 }
935 break;
936 case D3DSPR_LOOP:
937 if (ureg_dst_is_undef(tx->regs.address))
938 tx->regs.address = ureg_DECL_address(ureg);
939 if (!tx->native_integers)
940 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
941 else
942 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
943 src = ureg_src(tx->regs.address);
944 break;
945 case D3DSPR_MISCTYPE:
946 switch (param->idx) {
947 case D3DSMO_POSITION:
948 if (ureg_src_is_undef(tx->regs.vPos))
949 tx->regs.vPos = ureg_DECL_fs_input(ureg,
950 TGSI_SEMANTIC_POSITION, 0,
951 TGSI_INTERPOLATE_LINEAR);
952 if (tx->shift_wpos) {
953 /* TODO: do this only once */
954 struct ureg_dst wpos = tx_scratch(tx);
955 ureg_SUB(ureg, wpos, tx->regs.vPos,
956 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
957 src = ureg_src(wpos);
958 } else {
959 src = tx->regs.vPos;
960 }
961 break;
962 case D3DSMO_FACE:
963 if (ureg_src_is_undef(tx->regs.vFace)) {
964 tx->regs.vFace = ureg_DECL_fs_input(ureg,
965 TGSI_SEMANTIC_FACE, 0,
966 TGSI_INTERPOLATE_CONSTANT);
967 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
968 }
969 src = tx->regs.vFace;
970 break;
971 default:
972 assert(!"invalid src D3DSMO");
973 break;
974 }
975 assert(!param->rel);
976 break;
977 case D3DSPR_TEMPFLOAT16:
978 break;
979 default:
980 assert(!"invalid src D3DSPR");
981 }
982 if (param->rel)
983 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
984
985 switch (param->mod) {
986 case NINED3DSPSM_DW:
987 tmp = tx_scratch(tx);
988 /* NOTE: app is not allowed to read w with this modifier */
989 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
990 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
991 src = ureg_src(tmp);
992 break;
993 case NINED3DSPSM_DZ:
994 tmp = tx_scratch(tx);
995 /* NOTE: app is not allowed to read z with this modifier */
996 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
997 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
998 src = ureg_src(tmp);
999 break;
1000 default:
1001 break;
1002 }
1003
1004 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1005 src = ureg_swizzle(src,
1006 (param->swizzle >> 0) & 0x3,
1007 (param->swizzle >> 2) & 0x3,
1008 (param->swizzle >> 4) & 0x3,
1009 (param->swizzle >> 6) & 0x3);
1010
1011 switch (param->mod) {
1012 case NINED3DSPSM_ABS:
1013 src = ureg_abs(src);
1014 break;
1015 case NINED3DSPSM_ABSNEG:
1016 src = ureg_negate(ureg_abs(src));
1017 break;
1018 case NINED3DSPSM_NEG:
1019 src = ureg_negate(src);
1020 break;
1021 case NINED3DSPSM_BIAS:
1022 tmp = tx_scratch(tx);
1023 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
1024 src = ureg_src(tmp);
1025 break;
1026 case NINED3DSPSM_BIASNEG:
1027 tmp = tx_scratch(tx);
1028 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
1029 src = ureg_src(tmp);
1030 break;
1031 case NINED3DSPSM_NOT:
1032 if (tx->native_integers) {
1033 tmp = tx_scratch(tx);
1034 ureg_NOT(ureg, tmp, src);
1035 src = ureg_src(tmp);
1036 break;
1037 }
1038 /* fall through */
1039 case NINED3DSPSM_COMP:
1040 tmp = tx_scratch(tx);
1041 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1042 src = ureg_src(tmp);
1043 break;
1044 case NINED3DSPSM_DZ:
1045 case NINED3DSPSM_DW:
1046 /* Already handled*/
1047 break;
1048 case NINED3DSPSM_SIGN:
1049 tmp = tx_scratch(tx);
1050 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1051 src = ureg_src(tmp);
1052 break;
1053 case NINED3DSPSM_SIGNNEG:
1054 tmp = tx_scratch(tx);
1055 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1056 src = ureg_src(tmp);
1057 break;
1058 case NINED3DSPSM_X2:
1059 tmp = tx_scratch(tx);
1060 ureg_ADD(ureg, tmp, src, src);
1061 src = ureg_src(tmp);
1062 break;
1063 case NINED3DSPSM_X2NEG:
1064 tmp = tx_scratch(tx);
1065 ureg_ADD(ureg, tmp, src, src);
1066 src = ureg_negate(ureg_src(tmp));
1067 break;
1068 default:
1069 assert(param->mod == NINED3DSPSM_NONE);
1070 break;
1071 }
1072
1073 return src;
1074 }
1075
1076 static struct ureg_dst
1077 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1078 {
1079 struct ureg_dst dst;
1080
1081 switch (param->file)
1082 {
1083 case D3DSPR_TEMP:
1084 assert(!param->rel);
1085 tx_temp_alloc(tx, param->idx);
1086 dst = tx->regs.r[param->idx];
1087 break;
1088 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1089 case D3DSPR_ADDR:
1090 assert(!param->rel);
1091 if (tx->version.major < 2 && !IS_VS) {
1092 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1093 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1094 dst = tx->regs.tS[param->idx];
1095 } else
1096 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1097 tx_texcoord_alloc(tx, param->idx);
1098 dst = ureg_dst(tx->regs.vT[param->idx]);
1099 } else {
1100 tx_addr_alloc(tx, param->idx);
1101 dst = tx->regs.a0;
1102 }
1103 break;
1104 case D3DSPR_RASTOUT:
1105 assert(!param->rel);
1106 switch (param->idx) {
1107 case 0:
1108 if (ureg_dst_is_undef(tx->regs.oPos))
1109 tx->regs.oPos =
1110 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1111 dst = tx->regs.oPos;
1112 break;
1113 case 1:
1114 if (ureg_dst_is_undef(tx->regs.oFog))
1115 tx->regs.oFog =
1116 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1117 dst = tx->regs.oFog;
1118 break;
1119 case 2:
1120 if (ureg_dst_is_undef(tx->regs.oPts))
1121 tx->regs.oPts =
1122 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1123 dst = tx->regs.oPts;
1124 break;
1125 default:
1126 assert(0);
1127 break;
1128 }
1129 break;
1130 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1131 case D3DSPR_OUTPUT:
1132 if (tx->version.major < 3) {
1133 assert(!param->rel);
1134 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1135 } else {
1136 assert(!param->rel); /* TODO */
1137 assert(param->idx < Elements(tx->regs.o));
1138 dst = tx->regs.o[param->idx];
1139 }
1140 break;
1141 case D3DSPR_ATTROUT: /* VS */
1142 case D3DSPR_COLOROUT: /* PS */
1143 assert(param->idx >= 0 && param->idx < 4);
1144 assert(!param->rel);
1145 tx->info->rt_mask |= 1 << param->idx;
1146 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1147 /* ps < 3: oCol[0] will have fog blending afterward
1148 * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
1149 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1150 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1151 } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
1152 tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
1153 } else {
1154 tx->regs.oCol[param->idx] =
1155 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1156 }
1157 }
1158 dst = tx->regs.oCol[param->idx];
1159 if (IS_VS && tx->version.major < 3)
1160 dst = ureg_saturate(dst);
1161 break;
1162 case D3DSPR_DEPTHOUT:
1163 assert(!param->rel);
1164 if (ureg_dst_is_undef(tx->regs.oDepth))
1165 tx->regs.oDepth =
1166 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1167 TGSI_WRITEMASK_Z, 0, 1);
1168 dst = tx->regs.oDepth; /* XXX: must write .z component */
1169 break;
1170 case D3DSPR_PREDICATE:
1171 assert(!param->rel);
1172 tx_pred_alloc(tx, param->idx);
1173 dst = tx->regs.p;
1174 break;
1175 case D3DSPR_TEMPFLOAT16:
1176 DBG("unhandled D3DSPR: %u\n", param->file);
1177 break;
1178 default:
1179 assert(!"invalid dst D3DSPR");
1180 break;
1181 }
1182 if (param->rel)
1183 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1184
1185 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1186 dst = ureg_writemask(dst, param->mask);
1187 if (param->mod & NINED3DSPDM_SATURATE)
1188 dst = ureg_saturate(dst);
1189
1190 return dst;
1191 }
1192
1193 static struct ureg_dst
1194 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1195 {
1196 if (param->shift) {
1197 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1198 return tx->regs.tdst;
1199 }
1200 return _tx_dst_param(tx, param);
1201 }
1202
1203 static void
1204 tx_apply_dst0_modifiers(struct shader_translator *tx)
1205 {
1206 struct ureg_dst rdst;
1207 float f;
1208
1209 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1210 return;
1211 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1212
1213 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1214
1215 if (tx->insn.dst[0].shift < 0)
1216 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1217 else
1218 f = 1 << tx->insn.dst[0].shift;
1219
1220 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1221 }
1222
1223 static struct ureg_src
1224 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1225 {
1226 struct ureg_src src;
1227
1228 assert(!param->shift);
1229 assert(!(param->mod & NINED3DSPDM_SATURATE));
1230
1231 switch (param->file) {
1232 case D3DSPR_INPUT:
1233 if (IS_VS) {
1234 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1235 } else {
1236 assert(!param->rel);
1237 assert(param->idx < Elements(tx->regs.v));
1238 src = tx->regs.v[param->idx];
1239 }
1240 break;
1241 default:
1242 src = ureg_src(tx_dst_param(tx, param));
1243 break;
1244 }
1245 if (param->rel)
1246 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1247
1248 if (!param->mask)
1249 WARN("mask is 0, using identity swizzle\n");
1250
1251 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1252 char s[4];
1253 int n;
1254 int c;
1255 for (n = 0, c = 0; c < 4; ++c)
1256 if (param->mask & (1 << c))
1257 s[n++] = c;
1258 assert(n);
1259 for (c = n; c < 4; ++c)
1260 s[c] = s[n - 1];
1261 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1262 }
1263 return src;
1264 }
1265
1266 static HRESULT
1267 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1268 {
1269 struct ureg_program *ureg = tx->ureg;
1270 struct ureg_dst dst;
1271 struct ureg_src src[2];
1272 struct sm1_src_param *src_mat = &tx->insn.src[1];
1273 unsigned i;
1274
1275 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1276 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1277
1278 for (i = 0; i < n; i++)
1279 {
1280 const unsigned m = (1 << i);
1281
1282 src[1] = tx_src_param(tx, src_mat);
1283 src_mat->idx++;
1284
1285 if (!(dst.WriteMask & m))
1286 continue;
1287
1288 /* XXX: src == dst case ? */
1289
1290 switch (k) {
1291 case 3:
1292 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1293 break;
1294 case 4:
1295 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1296 break;
1297 default:
1298 DBG("invalid operation: M%ux%u\n", m, n);
1299 break;
1300 }
1301 }
1302
1303 return D3D_OK;
1304 }
1305
1306 #define VNOTSUPPORTED 0, 0
1307 #define V(maj, min) (((maj) << 8) | (min))
1308
1309 static inline const char *
1310 d3dsio_to_string( unsigned opcode )
1311 {
1312 static const char *names[] = {
1313 "NOP",
1314 "MOV",
1315 "ADD",
1316 "SUB",
1317 "MAD",
1318 "MUL",
1319 "RCP",
1320 "RSQ",
1321 "DP3",
1322 "DP4",
1323 "MIN",
1324 "MAX",
1325 "SLT",
1326 "SGE",
1327 "EXP",
1328 "LOG",
1329 "LIT",
1330 "DST",
1331 "LRP",
1332 "FRC",
1333 "M4x4",
1334 "M4x3",
1335 "M3x4",
1336 "M3x3",
1337 "M3x2",
1338 "CALL",
1339 "CALLNZ",
1340 "LOOP",
1341 "RET",
1342 "ENDLOOP",
1343 "LABEL",
1344 "DCL",
1345 "POW",
1346 "CRS",
1347 "SGN",
1348 "ABS",
1349 "NRM",
1350 "SINCOS",
1351 "REP",
1352 "ENDREP",
1353 "IF",
1354 "IFC",
1355 "ELSE",
1356 "ENDIF",
1357 "BREAK",
1358 "BREAKC",
1359 "MOVA",
1360 "DEFB",
1361 "DEFI",
1362 NULL,
1363 NULL,
1364 NULL,
1365 NULL,
1366 NULL,
1367 NULL,
1368 NULL,
1369 NULL,
1370 NULL,
1371 NULL,
1372 NULL,
1373 NULL,
1374 NULL,
1375 NULL,
1376 NULL,
1377 "TEXCOORD",
1378 "TEXKILL",
1379 "TEX",
1380 "TEXBEM",
1381 "TEXBEML",
1382 "TEXREG2AR",
1383 "TEXREG2GB",
1384 "TEXM3x2PAD",
1385 "TEXM3x2TEX",
1386 "TEXM3x3PAD",
1387 "TEXM3x3TEX",
1388 NULL,
1389 "TEXM3x3SPEC",
1390 "TEXM3x3VSPEC",
1391 "EXPP",
1392 "LOGP",
1393 "CND",
1394 "DEF",
1395 "TEXREG2RGB",
1396 "TEXDP3TEX",
1397 "TEXM3x2DEPTH",
1398 "TEXDP3",
1399 "TEXM3x3",
1400 "TEXDEPTH",
1401 "CMP",
1402 "BEM",
1403 "DP2ADD",
1404 "DSX",
1405 "DSY",
1406 "TEXLDD",
1407 "SETP",
1408 "TEXLDL",
1409 "BREAKP"
1410 };
1411
1412 if (opcode < Elements(names)) return names[opcode];
1413
1414 switch (opcode) {
1415 case D3DSIO_PHASE: return "PHASE";
1416 case D3DSIO_COMMENT: return "COMMENT";
1417 case D3DSIO_END: return "END";
1418 default:
1419 return NULL;
1420 }
1421 }
1422
1423 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1424 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1425 (inst).vert_version.max | \
1426 (inst).frag_version.min | \
1427 (inst).frag_version.max)
1428
1429 #define SPECIAL(name) \
1430 NineTranslateInstruction_##name
1431
1432 #define DECL_SPECIAL(name) \
1433 static HRESULT \
1434 NineTranslateInstruction_##name( struct shader_translator *tx )
1435
1436 static HRESULT
1437 NineTranslateInstruction_Generic(struct shader_translator *);
1438
1439 DECL_SPECIAL(M4x4)
1440 {
1441 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1442 }
1443
1444 DECL_SPECIAL(M4x3)
1445 {
1446 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1447 }
1448
1449 DECL_SPECIAL(M3x4)
1450 {
1451 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1452 }
1453
1454 DECL_SPECIAL(M3x3)
1455 {
1456 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1457 }
1458
1459 DECL_SPECIAL(M3x2)
1460 {
1461 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1462 }
1463
1464 DECL_SPECIAL(CMP)
1465 {
1466 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1467 tx_src_param(tx, &tx->insn.src[0]),
1468 tx_src_param(tx, &tx->insn.src[2]),
1469 tx_src_param(tx, &tx->insn.src[1]));
1470 return D3D_OK;
1471 }
1472
1473 DECL_SPECIAL(CND)
1474 {
1475 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1476 struct ureg_dst cgt;
1477 struct ureg_src cnd;
1478
1479 /* the coissue flag was a tip for compilers to advise to
1480 * execute two operations at the same time, in cases
1481 * the two executions had same dst with different channels.
1482 * It has no effect on current hw. However it seems CND
1483 * is affected. The handling of this very specific case
1484 * handled below mimick wine behaviour */
1485 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1486 ureg_MOV(tx->ureg,
1487 dst, tx_src_param(tx, &tx->insn.src[1]));
1488 return D3D_OK;
1489 }
1490
1491 cnd = tx_src_param(tx, &tx->insn.src[0]);
1492 cgt = tx_scratch(tx);
1493
1494 if (tx->version.major == 1 && tx->version.minor < 4)
1495 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1496
1497 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1498
1499 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1500 tx_src_param(tx, &tx->insn.src[1]),
1501 tx_src_param(tx, &tx->insn.src[2]));
1502 return D3D_OK;
1503 }
1504
1505 DECL_SPECIAL(CALL)
1506 {
1507 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1508 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1509 return D3D_OK;
1510 }
1511
1512 DECL_SPECIAL(CALLNZ)
1513 {
1514 struct ureg_program *ureg = tx->ureg;
1515 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1516
1517 if (!tx->native_integers)
1518 ureg_IF(ureg, src, tx_cond(tx));
1519 else
1520 ureg_UIF(ureg, src, tx_cond(tx));
1521 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1522 tx_endcond(tx);
1523 ureg_ENDIF(ureg);
1524 return D3D_OK;
1525 }
1526
1527 DECL_SPECIAL(MOV_vs1x)
1528 {
1529 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1530 /* Implementation note: We don't write directly
1531 * to the addr register, but to an intermediate
1532 * float register.
1533 * Contrary to the doc, when writing to ADDR here,
1534 * the rounding is not to nearest, but to lowest
1535 * (wine test).
1536 * Since we use ARR next, substract 0.5. */
1537 ureg_SUB(tx->ureg,
1538 tx_dst_param(tx, &tx->insn.dst[0]),
1539 tx_src_param(tx, &tx->insn.src[0]),
1540 ureg_imm1f(tx->ureg, 0.5f));
1541 return D3D_OK;
1542 }
1543 return NineTranslateInstruction_Generic(tx);
1544 }
1545
1546 DECL_SPECIAL(LOOP)
1547 {
1548 struct ureg_program *ureg = tx->ureg;
1549 unsigned *label;
1550 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1551 struct ureg_dst ctr;
1552 struct ureg_dst tmp;
1553 struct ureg_src ctrx;
1554
1555 label = tx_bgnloop(tx);
1556 ctr = tx_get_loopctr(tx, TRUE);
1557 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1558
1559 /* src: num_iterations - start_value of al - step for al - 0 */
1560 ureg_MOV(ureg, ctr, src);
1561 ureg_BGNLOOP(tx->ureg, label);
1562 tmp = tx_scratch_scalar(tx);
1563 /* Initially ctr.x contains the number of iterations.
1564 * ctr.y will contain the updated value of al.
1565 * We decrease ctr.x at the end of every iteration,
1566 * and stop when it reaches 0. */
1567
1568 if (!tx->native_integers) {
1569 /* case src and ctr contain floats */
1570 /* to avoid precision issue, we stop when ctr <= 0.5 */
1571 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1572 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1573 } else {
1574 /* case src and ctr contain integers */
1575 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1576 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1577 }
1578 ureg_BRK(ureg);
1579 tx_endcond(tx);
1580 ureg_ENDIF(ureg);
1581 return D3D_OK;
1582 }
1583
1584 DECL_SPECIAL(RET)
1585 {
1586 ureg_RET(tx->ureg);
1587 return D3D_OK;
1588 }
1589
1590 DECL_SPECIAL(ENDLOOP)
1591 {
1592 struct ureg_program *ureg = tx->ureg;
1593 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1594 struct ureg_dst dst_ctrx, dst_al;
1595 struct ureg_src src_ctr, al_counter;
1596
1597 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1598 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1599 src_ctr = ureg_src(ctr);
1600 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1601
1602 /* ctr.x -= 1
1603 * ctr.y (aL) += step */
1604 if (!tx->native_integers) {
1605 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1606 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1607 } else {
1608 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1609 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1610 }
1611 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1612 return D3D_OK;
1613 }
1614
1615 DECL_SPECIAL(LABEL)
1616 {
1617 unsigned k = tx->num_inst_labels;
1618 unsigned n = tx->insn.src[0].idx;
1619 assert(n < 2048);
1620 if (n >= k)
1621 tx->inst_labels = REALLOC(tx->inst_labels,
1622 k * sizeof(tx->inst_labels[0]),
1623 n * sizeof(tx->inst_labels[0]));
1624
1625 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1626 return D3D_OK;
1627 }
1628
1629 DECL_SPECIAL(SINCOS)
1630 {
1631 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1632 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1633
1634 assert(!(dst.WriteMask & 0xc));
1635
1636 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1637 ureg_SCS(tx->ureg, dst, src);
1638 return D3D_OK;
1639 }
1640
1641 DECL_SPECIAL(SGN)
1642 {
1643 ureg_SSG(tx->ureg,
1644 tx_dst_param(tx, &tx->insn.dst[0]),
1645 tx_src_param(tx, &tx->insn.src[0]));
1646 return D3D_OK;
1647 }
1648
1649 DECL_SPECIAL(REP)
1650 {
1651 struct ureg_program *ureg = tx->ureg;
1652 unsigned *label;
1653 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1654 struct ureg_dst ctr;
1655 struct ureg_dst tmp;
1656 struct ureg_src ctrx;
1657
1658 label = tx_bgnloop(tx);
1659 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1660 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1661
1662 /* NOTE: rep must be constant, so we don't have to save the count */
1663 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1664
1665 /* rep: num_iterations - 0 - 0 - 0 */
1666 ureg_MOV(ureg, ctr, rep);
1667 ureg_BGNLOOP(ureg, label);
1668 tmp = tx_scratch_scalar(tx);
1669 /* Initially ctr.x contains the number of iterations.
1670 * We decrease ctr.x at the end of every iteration,
1671 * and stop when it reaches 0. */
1672
1673 if (!tx->native_integers) {
1674 /* case src and ctr contain floats */
1675 /* to avoid precision issue, we stop when ctr <= 0.5 */
1676 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1677 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1678 } else {
1679 /* case src and ctr contain integers */
1680 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1681 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1682 }
1683 ureg_BRK(ureg);
1684 tx_endcond(tx);
1685 ureg_ENDIF(ureg);
1686
1687 return D3D_OK;
1688 }
1689
1690 DECL_SPECIAL(ENDREP)
1691 {
1692 struct ureg_program *ureg = tx->ureg;
1693 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1694 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1695 struct ureg_src src_ctr = ureg_src(ctr);
1696
1697 /* ctr.x -= 1 */
1698 if (!tx->native_integers)
1699 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1700 else
1701 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1702
1703 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1704 return D3D_OK;
1705 }
1706
1707 DECL_SPECIAL(ENDIF)
1708 {
1709 tx_endcond(tx);
1710 ureg_ENDIF(tx->ureg);
1711 return D3D_OK;
1712 }
1713
1714 DECL_SPECIAL(IF)
1715 {
1716 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1717
1718 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1719 ureg_UIF(tx->ureg, src, tx_cond(tx));
1720 else
1721 ureg_IF(tx->ureg, src, tx_cond(tx));
1722
1723 return D3D_OK;
1724 }
1725
1726 static inline unsigned
1727 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1728 {
1729 switch (flags) {
1730 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1731 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1732 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1733 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1734 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1735 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1736 default:
1737 assert(!"invalid comparison flags");
1738 return TGSI_OPCODE_SGT;
1739 }
1740 }
1741
1742 DECL_SPECIAL(IFC)
1743 {
1744 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1745 struct ureg_src src[2];
1746 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1747 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1748 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1749 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1750 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1751 return D3D_OK;
1752 }
1753
1754 DECL_SPECIAL(ELSE)
1755 {
1756 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1757 return D3D_OK;
1758 }
1759
1760 DECL_SPECIAL(BREAKC)
1761 {
1762 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1763 struct ureg_src src[2];
1764 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1765 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1766 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1767 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1768 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1769 ureg_BRK(tx->ureg);
1770 tx_endcond(tx);
1771 ureg_ENDIF(tx->ureg);
1772 return D3D_OK;
1773 }
1774
1775 static const char *sm1_declusage_names[] =
1776 {
1777 [D3DDECLUSAGE_POSITION] = "POSITION",
1778 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1779 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1780 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1781 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1782 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1783 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1784 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1785 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1786 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1787 [D3DDECLUSAGE_COLOR] = "COLOR",
1788 [D3DDECLUSAGE_FOG] = "FOG",
1789 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1790 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1791 };
1792
1793 static inline unsigned
1794 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1795 {
1796 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1797 }
1798
1799 static void
1800 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1801 boolean tc,
1802 struct sm1_semantic *dcl)
1803 {
1804 BYTE index = dcl->usage_idx;
1805
1806 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1807 * we match to a TGSI_SEMANTIC_GENERIC with index.
1808 *
1809 * The index can be anything UINT16 and usage_idx is BYTE,
1810 * so we can fit everything. It doesn't matter if indices
1811 * are close together or low.
1812 *
1813 *
1814 * POSITION >= 1: 10 * index + 6
1815 * COLOR >= 2: 10 * (index-1) + 7
1816 * TEXCOORD[0..15]: index
1817 * BLENDWEIGHT: 10 * index + 18
1818 * BLENDINDICES: 10 * index + 19
1819 * NORMAL: 10 * index + 20
1820 * TANGENT: 10 * index + 21
1821 * BINORMAL: 10 * index + 22
1822 * TESSFACTOR: 10 * index + 23
1823 */
1824
1825 switch (dcl->usage) {
1826 case D3DDECLUSAGE_POSITION:
1827 case D3DDECLUSAGE_POSITIONT:
1828 case D3DDECLUSAGE_DEPTH:
1829 if (index == 0) {
1830 sem->Name = TGSI_SEMANTIC_POSITION;
1831 sem->Index = 0;
1832 } else {
1833 sem->Name = TGSI_SEMANTIC_GENERIC;
1834 sem->Index = 10 * index + 6;
1835 }
1836 break;
1837 case D3DDECLUSAGE_COLOR:
1838 if (index < 2) {
1839 sem->Name = TGSI_SEMANTIC_COLOR;
1840 sem->Index = index;
1841 } else {
1842 sem->Name = TGSI_SEMANTIC_GENERIC;
1843 sem->Index = 10 * (index-1) + 7;
1844 }
1845 break;
1846 case D3DDECLUSAGE_FOG:
1847 assert(index == 0);
1848 sem->Name = TGSI_SEMANTIC_FOG;
1849 sem->Index = 0;
1850 break;
1851 case D3DDECLUSAGE_PSIZE:
1852 assert(index == 0);
1853 sem->Name = TGSI_SEMANTIC_PSIZE;
1854 sem->Index = 0;
1855 break;
1856 case D3DDECLUSAGE_TEXCOORD:
1857 assert(index < 16);
1858 if (index < 8 && tc)
1859 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1860 else
1861 sem->Name = TGSI_SEMANTIC_GENERIC;
1862 sem->Index = index;
1863 break;
1864 case D3DDECLUSAGE_BLENDWEIGHT:
1865 sem->Name = TGSI_SEMANTIC_GENERIC;
1866 sem->Index = 10 * index + 18;
1867 break;
1868 case D3DDECLUSAGE_BLENDINDICES:
1869 sem->Name = TGSI_SEMANTIC_GENERIC;
1870 sem->Index = 10 * index + 19;
1871 break;
1872 case D3DDECLUSAGE_NORMAL:
1873 sem->Name = TGSI_SEMANTIC_GENERIC;
1874 sem->Index = 10 * index + 20;
1875 break;
1876 case D3DDECLUSAGE_TANGENT:
1877 sem->Name = TGSI_SEMANTIC_GENERIC;
1878 sem->Index = 10 * index + 21;
1879 break;
1880 case D3DDECLUSAGE_BINORMAL:
1881 sem->Name = TGSI_SEMANTIC_GENERIC;
1882 sem->Index = 10 * index + 22;
1883 break;
1884 case D3DDECLUSAGE_TESSFACTOR:
1885 sem->Name = TGSI_SEMANTIC_GENERIC;
1886 sem->Index = 10 * index + 23;
1887 break;
1888 case D3DDECLUSAGE_SAMPLE:
1889 sem->Name = TGSI_SEMANTIC_COUNT;
1890 sem->Index = 0;
1891 break;
1892 default:
1893 unreachable(!"Invalid DECLUSAGE.");
1894 break;
1895 }
1896 }
1897
1898 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1899 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1900 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1901 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1902 static inline unsigned
1903 d3dstt_to_tgsi_tex(BYTE sampler_type)
1904 {
1905 switch (sampler_type) {
1906 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1907 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1908 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1909 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1910 default:
1911 assert(0);
1912 return TGSI_TEXTURE_UNKNOWN;
1913 }
1914 }
1915 static inline unsigned
1916 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1917 {
1918 switch (sampler_type) {
1919 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1920 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1921 case NINED3DSTT_VOLUME:
1922 case NINED3DSTT_CUBE:
1923 default:
1924 assert(0);
1925 return TGSI_TEXTURE_UNKNOWN;
1926 }
1927 }
1928 static inline unsigned
1929 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1930 {
1931 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1932 case 1: return TGSI_TEXTURE_1D;
1933 case 0: return TGSI_TEXTURE_2D;
1934 case 3: return TGSI_TEXTURE_3D;
1935 default:
1936 return TGSI_TEXTURE_CUBE;
1937 }
1938 }
1939
1940 static const char *
1941 sm1_sampler_type_name(BYTE sampler_type)
1942 {
1943 switch (sampler_type) {
1944 case NINED3DSTT_1D: return "1D";
1945 case NINED3DSTT_2D: return "2D";
1946 case NINED3DSTT_VOLUME: return "VOLUME";
1947 case NINED3DSTT_CUBE: return "CUBE";
1948 default:
1949 return "(D3DSTT_?)";
1950 }
1951 }
1952
1953 static inline unsigned
1954 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1955 {
1956 switch (sem->Name) {
1957 case TGSI_SEMANTIC_POSITION:
1958 case TGSI_SEMANTIC_NORMAL:
1959 return TGSI_INTERPOLATE_LINEAR;
1960 case TGSI_SEMANTIC_BCOLOR:
1961 case TGSI_SEMANTIC_COLOR:
1962 case TGSI_SEMANTIC_FOG:
1963 case TGSI_SEMANTIC_GENERIC:
1964 case TGSI_SEMANTIC_TEXCOORD:
1965 case TGSI_SEMANTIC_CLIPDIST:
1966 case TGSI_SEMANTIC_CLIPVERTEX:
1967 return TGSI_INTERPOLATE_PERSPECTIVE;
1968 case TGSI_SEMANTIC_EDGEFLAG:
1969 case TGSI_SEMANTIC_FACE:
1970 case TGSI_SEMANTIC_INSTANCEID:
1971 case TGSI_SEMANTIC_PCOORD:
1972 case TGSI_SEMANTIC_PRIMID:
1973 case TGSI_SEMANTIC_PSIZE:
1974 case TGSI_SEMANTIC_VERTEXID:
1975 return TGSI_INTERPOLATE_CONSTANT;
1976 default:
1977 assert(0);
1978 return TGSI_INTERPOLATE_CONSTANT;
1979 }
1980 }
1981
1982 DECL_SPECIAL(DCL)
1983 {
1984 struct ureg_program *ureg = tx->ureg;
1985 boolean is_input;
1986 boolean is_sampler;
1987 struct tgsi_declaration_semantic tgsi;
1988 struct sm1_semantic sem;
1989 sm1_read_semantic(tx, &sem);
1990
1991 is_input = sem.reg.file == D3DSPR_INPUT;
1992 is_sampler =
1993 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1994
1995 DUMP("DCL ");
1996 sm1_dump_dst_param(&sem.reg);
1997 if (is_sampler)
1998 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1999 else
2000 if (tx->version.major >= 3)
2001 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2002 else
2003 if (sem.usage | sem.usage_idx)
2004 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2005 else
2006 DUMP("\n");
2007
2008 if (is_sampler) {
2009 const unsigned m = 1 << sem.reg.idx;
2010 ureg_DECL_sampler(ureg, sem.reg.idx);
2011 tx->info->sampler_mask |= m;
2012 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2013 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2014 d3dstt_to_tgsi_tex(sem.sampler_type);
2015 return D3D_OK;
2016 }
2017
2018 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2019 if (IS_VS) {
2020 if (is_input) {
2021 /* linkage outside of shader with vertex declaration */
2022 ureg_DECL_vs_input(ureg, sem.reg.idx);
2023 assert(sem.reg.idx < Elements(tx->info->input_map));
2024 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2025 tx->info->num_inputs = sem.reg.idx + 1;
2026 /* NOTE: preserving order in case of indirect access */
2027 } else
2028 if (tx->version.major >= 3) {
2029 /* SM2 output semantic determined by file */
2030 assert(sem.reg.mask != 0);
2031 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2032 tx->info->position_t = TRUE;
2033 assert(sem.reg.idx < Elements(tx->regs.o));
2034 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2035 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2036
2037 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
2038 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2039 }
2040 } else {
2041 if (is_input && tx->version.major >= 3) {
2042 /* SM3 only, SM2 input semantic determined by file */
2043 assert(sem.reg.idx < Elements(tx->regs.v));
2044 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2045 ureg, tgsi.Name, tgsi.Index,
2046 nine_tgsi_to_interp_mode(&tgsi),
2047 0, /* cylwrap */
2048 sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
2049 } else
2050 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2051 /* FragColor or FragDepth */
2052 assert(sem.reg.mask != 0);
2053 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2054 0, 1);
2055 }
2056 }
2057 return D3D_OK;
2058 }
2059
2060 DECL_SPECIAL(DEF)
2061 {
2062 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2063 return D3D_OK;
2064 }
2065
2066 DECL_SPECIAL(DEFB)
2067 {
2068 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2069 return D3D_OK;
2070 }
2071
2072 DECL_SPECIAL(DEFI)
2073 {
2074 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2075 return D3D_OK;
2076 }
2077
2078 DECL_SPECIAL(POW)
2079 {
2080 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2081 struct ureg_src src[2] = {
2082 tx_src_param(tx, &tx->insn.src[0]),
2083 tx_src_param(tx, &tx->insn.src[1])
2084 };
2085 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2086 return D3D_OK;
2087 }
2088
2089 DECL_SPECIAL(RSQ)
2090 {
2091 struct ureg_program *ureg = tx->ureg;
2092 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2093 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2094 struct ureg_dst tmp = tx_scratch(tx);
2095 ureg_RSQ(ureg, tmp, ureg_abs(src));
2096 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2097 return D3D_OK;
2098 }
2099
2100 DECL_SPECIAL(LOG)
2101 {
2102 struct ureg_program *ureg = tx->ureg;
2103 struct ureg_dst tmp = tx_scratch_scalar(tx);
2104 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2105 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2106 ureg_LG2(ureg, tmp, ureg_abs(src));
2107 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2108 return D3D_OK;
2109 }
2110
2111 DECL_SPECIAL(LIT)
2112 {
2113 struct ureg_program *ureg = tx->ureg;
2114 struct ureg_dst tmp = tx_scratch(tx);
2115 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2116 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2117 ureg_LIT(ureg, tmp, src);
2118 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2119 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2120 * it 0^0 if src.w=0, which value is driver dependent. */
2121 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2122 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2123 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2124 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2125 return D3D_OK;
2126 }
2127
2128 DECL_SPECIAL(NRM)
2129 {
2130 struct ureg_program *ureg = tx->ureg;
2131 struct ureg_dst tmp = tx_scratch_scalar(tx);
2132 struct ureg_src nrm = tx_src_scalar(tmp);
2133 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2134 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2135 ureg_DP3(ureg, tmp, src, src);
2136 ureg_RSQ(ureg, tmp, nrm);
2137 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2138 ureg_MUL(ureg, dst, src, nrm);
2139 return D3D_OK;
2140 }
2141
2142 DECL_SPECIAL(DP2ADD)
2143 {
2144 struct ureg_dst tmp = tx_scratch_scalar(tx);
2145 struct ureg_src dp2 = tx_src_scalar(tmp);
2146 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2147 struct ureg_src src[3];
2148 int i;
2149 for (i = 0; i < 3; ++i)
2150 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2151 assert_replicate_swizzle(&src[2]);
2152
2153 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2154 ureg_ADD(tx->ureg, dst, src[2], dp2);
2155
2156 return D3D_OK;
2157 }
2158
2159 DECL_SPECIAL(TEXCOORD)
2160 {
2161 struct ureg_program *ureg = tx->ureg;
2162 const unsigned s = tx->insn.dst[0].idx;
2163 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2164
2165 tx_texcoord_alloc(tx, s);
2166 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2167 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2168
2169 return D3D_OK;
2170 }
2171
2172 DECL_SPECIAL(TEXCOORD_ps14)
2173 {
2174 struct ureg_program *ureg = tx->ureg;
2175 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2176 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2177
2178 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2179
2180 ureg_MOV(ureg, dst, src);
2181
2182 return D3D_OK;
2183 }
2184
2185 DECL_SPECIAL(TEXKILL)
2186 {
2187 struct ureg_src reg;
2188
2189 if (tx->version.major > 1 || tx->version.minor > 3) {
2190 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2191 } else {
2192 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2193 reg = tx->regs.vT[tx->insn.dst[0].idx];
2194 }
2195 if (tx->version.major < 2)
2196 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2197 ureg_KILL_IF(tx->ureg, reg);
2198
2199 return D3D_OK;
2200 }
2201
2202 DECL_SPECIAL(TEXBEM)
2203 {
2204 struct ureg_program *ureg = tx->ureg;
2205 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2206 struct ureg_dst tmp, tmp2, texcoord;
2207 struct ureg_src sample, m00, m01, m10, m11;
2208 struct ureg_src bumpenvlscale, bumpenvloffset;
2209 const int m = tx->insn.dst[0].idx;
2210 const int n = tx->insn.src[0].idx;
2211
2212 assert(tx->version.major == 1);
2213
2214 sample = ureg_DECL_sampler(ureg, m);
2215 tx->info->sampler_mask |= 1 << m;
2216
2217 tx_texcoord_alloc(tx, m);
2218
2219 tmp = tx_scratch(tx);
2220 tmp2 = tx_scratch(tx);
2221 texcoord = tx_scratch(tx);
2222 /*
2223 * Bump-env-matrix:
2224 * 00 is X
2225 * 01 is Y
2226 * 10 is Z
2227 * 11 is W
2228 */
2229 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2230 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2231 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2232 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2233 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2234
2235 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2236 if (m % 2 == 0) {
2237 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2238 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2239 } else {
2240 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2241 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2242 }
2243
2244 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2245
2246 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2247 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2248 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2249 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2250 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2251 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2252 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2253
2254 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2255 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2256 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2257 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2258 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2259 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2260 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2261
2262 /* Now the texture coordinates are in tmp.xy */
2263
2264 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2265 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2266 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2267 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2268 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2269 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2270 bumpenvlscale, bumpenvloffset);
2271 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2272 }
2273
2274 tx->info->bumpenvmat_needed = 1;
2275
2276 return D3D_OK;
2277 }
2278
2279 DECL_SPECIAL(TEXREG2AR)
2280 {
2281 struct ureg_program *ureg = tx->ureg;
2282 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2283 struct ureg_src sample;
2284 const int m = tx->insn.dst[0].idx;
2285 const int n = tx->insn.src[0].idx;
2286 assert(m >= 0 && m > n);
2287
2288 sample = ureg_DECL_sampler(ureg, m);
2289 tx->info->sampler_mask |= 1 << m;
2290 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2291
2292 return D3D_OK;
2293 }
2294
2295 DECL_SPECIAL(TEXREG2GB)
2296 {
2297 struct ureg_program *ureg = tx->ureg;
2298 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2299 struct ureg_src sample;
2300 const int m = tx->insn.dst[0].idx;
2301 const int n = tx->insn.src[0].idx;
2302 assert(m >= 0 && m > n);
2303
2304 sample = ureg_DECL_sampler(ureg, m);
2305 tx->info->sampler_mask |= 1 << m;
2306 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2307
2308 return D3D_OK;
2309 }
2310
2311 DECL_SPECIAL(TEXM3x2PAD)
2312 {
2313 return D3D_OK; /* this is just padding */
2314 }
2315
2316 DECL_SPECIAL(TEXM3x2TEX)
2317 {
2318 struct ureg_program *ureg = tx->ureg;
2319 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2320 struct ureg_src sample;
2321 const int m = tx->insn.dst[0].idx - 1;
2322 const int n = tx->insn.src[0].idx;
2323 assert(m >= 0 && m > n);
2324
2325 tx_texcoord_alloc(tx, m);
2326 tx_texcoord_alloc(tx, m+1);
2327
2328 /* performs the matrix multiplication */
2329 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2330 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2331
2332 sample = ureg_DECL_sampler(ureg, m + 1);
2333 tx->info->sampler_mask |= 1 << (m + 1);
2334 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2335
2336 return D3D_OK;
2337 }
2338
2339 DECL_SPECIAL(TEXM3x3PAD)
2340 {
2341 return D3D_OK; /* this is just padding */
2342 }
2343
2344 DECL_SPECIAL(TEXM3x3SPEC)
2345 {
2346 struct ureg_program *ureg = tx->ureg;
2347 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2348 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2349 struct ureg_src sample;
2350 struct ureg_dst tmp;
2351 const int m = tx->insn.dst[0].idx - 2;
2352 const int n = tx->insn.src[0].idx;
2353 assert(m >= 0 && m > n);
2354
2355 tx_texcoord_alloc(tx, m);
2356 tx_texcoord_alloc(tx, m+1);
2357 tx_texcoord_alloc(tx, m+2);
2358
2359 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2360 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2361 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2362
2363 sample = ureg_DECL_sampler(ureg, m + 2);
2364 tx->info->sampler_mask |= 1 << (m + 2);
2365 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2366
2367 /* At this step, dst = N = (u', w', z').
2368 * We want dst to be the texture sampled at (u'', w'', z''), with
2369 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2370 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2371 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2372 /* at this step tmp.x = 1/N.N */
2373 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2374 /* at this step tmp.y = N.E */
2375 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2376 /* at this step tmp.x = N.E/N.N */
2377 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2378 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2379 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2380 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2381 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2382
2383 return D3D_OK;
2384 }
2385
2386 DECL_SPECIAL(TEXREG2RGB)
2387 {
2388 struct ureg_program *ureg = tx->ureg;
2389 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2390 struct ureg_src sample;
2391 const int m = tx->insn.dst[0].idx;
2392 const int n = tx->insn.src[0].idx;
2393 assert(m >= 0 && m > n);
2394
2395 sample = ureg_DECL_sampler(ureg, m);
2396 tx->info->sampler_mask |= 1 << m;
2397 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2398
2399 return D3D_OK;
2400 }
2401
2402 DECL_SPECIAL(TEXDP3TEX)
2403 {
2404 struct ureg_program *ureg = tx->ureg;
2405 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2406 struct ureg_dst tmp;
2407 struct ureg_src sample;
2408 const int m = tx->insn.dst[0].idx;
2409 const int n = tx->insn.src[0].idx;
2410 assert(m >= 0 && m > n);
2411
2412 tx_texcoord_alloc(tx, m);
2413
2414 tmp = tx_scratch(tx);
2415 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2416 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2417
2418 sample = ureg_DECL_sampler(ureg, m);
2419 tx->info->sampler_mask |= 1 << m;
2420 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2421
2422 return D3D_OK;
2423 }
2424
2425 DECL_SPECIAL(TEXM3x2DEPTH)
2426 {
2427 struct ureg_program *ureg = tx->ureg;
2428 struct ureg_dst tmp;
2429 const int m = tx->insn.dst[0].idx - 1;
2430 const int n = tx->insn.src[0].idx;
2431 assert(m >= 0 && m > n);
2432
2433 tx_texcoord_alloc(tx, m);
2434 tx_texcoord_alloc(tx, m+1);
2435
2436 tmp = tx_scratch(tx);
2437
2438 /* performs the matrix multiplication */
2439 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2440 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2441
2442 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2443 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2444 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2445 /* res = 'w' == 0 ? 1.0 : z/w */
2446 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2447 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2448 /* replace the depth for depth testing with the result */
2449 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2450 TGSI_WRITEMASK_Z, 0, 1);
2451 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2452 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2453 return D3D_OK;
2454 }
2455
2456 DECL_SPECIAL(TEXDP3)
2457 {
2458 struct ureg_program *ureg = tx->ureg;
2459 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2460 const int m = tx->insn.dst[0].idx;
2461 const int n = tx->insn.src[0].idx;
2462 assert(m >= 0 && m > n);
2463
2464 tx_texcoord_alloc(tx, m);
2465
2466 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2467
2468 return D3D_OK;
2469 }
2470
2471 DECL_SPECIAL(TEXM3x3)
2472 {
2473 struct ureg_program *ureg = tx->ureg;
2474 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2475 struct ureg_src sample;
2476 struct ureg_dst E, tmp;
2477 const int m = tx->insn.dst[0].idx - 2;
2478 const int n = tx->insn.src[0].idx;
2479 assert(m >= 0 && m > n);
2480
2481 tx_texcoord_alloc(tx, m);
2482 tx_texcoord_alloc(tx, m+1);
2483 tx_texcoord_alloc(tx, m+2);
2484
2485 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2486 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2487 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2488
2489 switch (tx->insn.opcode) {
2490 case D3DSIO_TEXM3x3:
2491 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2492 break;
2493 case D3DSIO_TEXM3x3TEX:
2494 sample = ureg_DECL_sampler(ureg, m + 2);
2495 tx->info->sampler_mask |= 1 << (m + 2);
2496 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2497 break;
2498 case D3DSIO_TEXM3x3VSPEC:
2499 sample = ureg_DECL_sampler(ureg, m + 2);
2500 tx->info->sampler_mask |= 1 << (m + 2);
2501 E = tx_scratch(tx);
2502 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2503 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2504 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2505 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2506 /* At this step, dst = N = (u', w', z').
2507 * We want dst to be the texture sampled at (u'', w'', z''), with
2508 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2509 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2510 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2511 /* at this step tmp.x = 1/N.N */
2512 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2513 /* at this step tmp.y = N.E */
2514 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2515 /* at this step tmp.x = N.E/N.N */
2516 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2517 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2518 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2519 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2520 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2521 break;
2522 default:
2523 return D3DERR_INVALIDCALL;
2524 }
2525 return D3D_OK;
2526 }
2527
2528 DECL_SPECIAL(TEXDEPTH)
2529 {
2530 struct ureg_program *ureg = tx->ureg;
2531 struct ureg_dst r5;
2532 struct ureg_src r5r, r5g;
2533
2534 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2535
2536 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2537 * r5 won't be used afterward, thus we can use r5.ba */
2538 r5 = tx->regs.r[5];
2539 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2540 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2541
2542 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2543 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2544 /* r5.r = r/g */
2545 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2546 r5r, ureg_imm1f(ureg, 1.0f));
2547 /* replace the depth for depth testing with the result */
2548 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2549 TGSI_WRITEMASK_Z, 0, 1);
2550 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2551
2552 return D3D_OK;
2553 }
2554
2555 DECL_SPECIAL(BEM)
2556 {
2557 struct ureg_program *ureg = tx->ureg;
2558 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2559 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2560 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2561 struct ureg_src m00, m01, m10, m11;
2562 const int m = tx->insn.dst[0].idx;
2563 struct ureg_dst tmp;
2564 /*
2565 * Bump-env-matrix:
2566 * 00 is X
2567 * 01 is Y
2568 * 10 is Z
2569 * 11 is W
2570 */
2571 nine_info_mark_const_f_used(tx->info, 8 + m);
2572 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2573 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2574 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2575 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2576 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2577 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2578 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2579 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2580 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2581 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2582
2583 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2584 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2585 NINE_APPLY_SWIZZLE(src1, X), src0);
2586 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2587 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2588 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2589 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2590
2591 tx->info->bumpenvmat_needed = 1;
2592
2593 return D3D_OK;
2594 }
2595
2596 DECL_SPECIAL(TEXLD)
2597 {
2598 struct ureg_program *ureg = tx->ureg;
2599 unsigned target;
2600 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2601 struct ureg_src src[2] = {
2602 tx_src_param(tx, &tx->insn.src[0]),
2603 tx_src_param(tx, &tx->insn.src[1])
2604 };
2605 assert(tx->insn.src[1].idx >= 0 &&
2606 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2607 target = tx->sampler_targets[tx->insn.src[1].idx];
2608
2609 switch (tx->insn.flags) {
2610 case 0:
2611 ureg_TEX(ureg, dst, target, src[0], src[1]);
2612 break;
2613 case NINED3DSI_TEXLD_PROJECT:
2614 ureg_TXP(ureg, dst, target, src[0], src[1]);
2615 break;
2616 case NINED3DSI_TEXLD_BIAS:
2617 ureg_TXB(ureg, dst, target, src[0], src[1]);
2618 break;
2619 default:
2620 assert(0);
2621 return D3DERR_INVALIDCALL;
2622 }
2623 return D3D_OK;
2624 }
2625
2626 DECL_SPECIAL(TEXLD_14)
2627 {
2628 struct ureg_program *ureg = tx->ureg;
2629 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2630 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2631 const unsigned s = tx->insn.dst[0].idx;
2632 const unsigned t = ps1x_sampler_type(tx->info, s);
2633
2634 tx->info->sampler_mask |= 1 << s;
2635 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2636
2637 return D3D_OK;
2638 }
2639
2640 DECL_SPECIAL(TEX)
2641 {
2642 struct ureg_program *ureg = tx->ureg;
2643 const unsigned s = tx->insn.dst[0].idx;
2644 const unsigned t = ps1x_sampler_type(tx->info, s);
2645 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2646 struct ureg_src src[2];
2647
2648 tx_texcoord_alloc(tx, s);
2649
2650 src[0] = tx->regs.vT[s];
2651 src[1] = ureg_DECL_sampler(ureg, s);
2652 tx->info->sampler_mask |= 1 << s;
2653
2654 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2655
2656 return D3D_OK;
2657 }
2658
2659 DECL_SPECIAL(TEXLDD)
2660 {
2661 unsigned target;
2662 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2663 struct ureg_src src[4] = {
2664 tx_src_param(tx, &tx->insn.src[0]),
2665 tx_src_param(tx, &tx->insn.src[1]),
2666 tx_src_param(tx, &tx->insn.src[2]),
2667 tx_src_param(tx, &tx->insn.src[3])
2668 };
2669 assert(tx->insn.src[1].idx >= 0 &&
2670 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2671 target = tx->sampler_targets[tx->insn.src[1].idx];
2672
2673 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2674 return D3D_OK;
2675 }
2676
2677 DECL_SPECIAL(TEXLDL)
2678 {
2679 unsigned target;
2680 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2681 struct ureg_src src[2] = {
2682 tx_src_param(tx, &tx->insn.src[0]),
2683 tx_src_param(tx, &tx->insn.src[1])
2684 };
2685 assert(tx->insn.src[1].idx >= 0 &&
2686 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2687 target = tx->sampler_targets[tx->insn.src[1].idx];
2688
2689 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2690 return D3D_OK;
2691 }
2692
2693 DECL_SPECIAL(SETP)
2694 {
2695 STUB(D3DERR_INVALIDCALL);
2696 }
2697
2698 DECL_SPECIAL(BREAKP)
2699 {
2700 STUB(D3DERR_INVALIDCALL);
2701 }
2702
2703 DECL_SPECIAL(PHASE)
2704 {
2705 return D3D_OK; /* we don't care about phase */
2706 }
2707
2708 DECL_SPECIAL(COMMENT)
2709 {
2710 return D3D_OK; /* nothing to do */
2711 }
2712
2713
2714 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2715 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2716
2717 struct sm1_op_info inst_table[] =
2718 {
2719 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2720 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2721 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2722 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2723 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2724 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2725 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2726 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2727 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2728 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2729 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2730 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2731 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2732 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2733 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2734 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2735 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2736 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2737 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2738 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2739 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2740
2741 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2742 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2743 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2744 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2745 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2746
2747 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2748 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2749 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2750 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2751 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2752 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2753
2754 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2755
2756 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2757 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2758 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2759 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2760 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2761
2762 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2763 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2764
2765 /* More flow control */
2766 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2767 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2768 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2769 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2770 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2771 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2772 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2773 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2774 /* we don't write to the address register, but a normal register (copied
2775 * when needed to the address register), thus we don't use ARR */
2776 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2777
2778 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2779 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2780
2781 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2782 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2783 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2784 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2785 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2786 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2787 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2788 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2789 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2790 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2791 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2792 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2793 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2794 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2795 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2796 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2797
2798 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2799 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2800 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2801 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2802
2803 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2804
2805 /* More tex stuff */
2806 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2807 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2808 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2809 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2810 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2811 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2812
2813 /* Misc */
2814 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2815 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2816 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2817 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2818 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2819 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2820 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2821 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2822 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2823 };
2824
2825 struct sm1_op_info inst_phase =
2826 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2827
2828 struct sm1_op_info inst_comment =
2829 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2830
2831 static void
2832 create_op_info_map(struct shader_translator *tx)
2833 {
2834 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2835 unsigned i;
2836
2837 for (i = 0; i < Elements(tx->op_info_map); ++i)
2838 tx->op_info_map[i] = -1;
2839
2840 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2841 for (i = 0; i < Elements(inst_table); ++i) {
2842 assert(inst_table[i].sio < Elements(tx->op_info_map));
2843 if (inst_table[i].vert_version.min <= version &&
2844 inst_table[i].vert_version.max >= version)
2845 tx->op_info_map[inst_table[i].sio] = i;
2846 }
2847 } else {
2848 for (i = 0; i < Elements(inst_table); ++i) {
2849 assert(inst_table[i].sio < Elements(tx->op_info_map));
2850 if (inst_table[i].frag_version.min <= version &&
2851 inst_table[i].frag_version.max >= version)
2852 tx->op_info_map[inst_table[i].sio] = i;
2853 }
2854 }
2855 }
2856
2857 static inline HRESULT
2858 NineTranslateInstruction_Generic(struct shader_translator *tx)
2859 {
2860 struct ureg_dst dst[1];
2861 struct ureg_src src[4];
2862 unsigned i;
2863
2864 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2865 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2866 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2867 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2868
2869 ureg_insn(tx->ureg, tx->insn.info->opcode,
2870 dst, tx->insn.ndst,
2871 src, tx->insn.nsrc);
2872 return D3D_OK;
2873 }
2874
2875 static inline DWORD
2876 TOKEN_PEEK(struct shader_translator *tx)
2877 {
2878 return *(tx->parse);
2879 }
2880
2881 static inline DWORD
2882 TOKEN_NEXT(struct shader_translator *tx)
2883 {
2884 return *(tx->parse)++;
2885 }
2886
2887 static inline void
2888 TOKEN_JUMP(struct shader_translator *tx)
2889 {
2890 if (tx->parse_next && tx->parse != tx->parse_next) {
2891 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2892 tx->parse = tx->parse_next;
2893 }
2894 }
2895
2896 static inline boolean
2897 sm1_parse_eof(struct shader_translator *tx)
2898 {
2899 return TOKEN_PEEK(tx) == NINED3DSP_END;
2900 }
2901
2902 static void
2903 sm1_read_version(struct shader_translator *tx)
2904 {
2905 const DWORD tok = TOKEN_NEXT(tx);
2906
2907 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2908 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2909
2910 switch (tok >> 16) {
2911 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2912 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2913 default:
2914 DBG("Invalid shader type: %x\n", tok);
2915 tx->processor = ~0;
2916 break;
2917 }
2918 }
2919
2920 /* This is just to check if we parsed the instruction properly. */
2921 static void
2922 sm1_parse_get_skip(struct shader_translator *tx)
2923 {
2924 const DWORD tok = TOKEN_PEEK(tx);
2925
2926 if (tx->version.major >= 2) {
2927 tx->parse_next = tx->parse + 1 /* this */ +
2928 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2929 } else {
2930 tx->parse_next = NULL; /* TODO: determine from param count */
2931 }
2932 }
2933
2934 static void
2935 sm1_print_comment(const char *comment, UINT size)
2936 {
2937 if (!size)
2938 return;
2939 /* TODO */
2940 }
2941
2942 static void
2943 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2944 {
2945 DWORD tok = TOKEN_PEEK(tx);
2946
2947 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2948 {
2949 const char *comment = "";
2950 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2951 tx->parse += size + 1;
2952
2953 if (print)
2954 sm1_print_comment(comment, size);
2955
2956 tok = TOKEN_PEEK(tx);
2957 }
2958 }
2959
2960 static void
2961 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2962 {
2963 *reg = TOKEN_NEXT(tx);
2964
2965 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2966 {
2967 if (tx->version.major < 2)
2968 *rel = (1 << 31) |
2969 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2970 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2971 D3DSP_NOSWIZZLE;
2972 else
2973 *rel = TOKEN_NEXT(tx);
2974 }
2975 }
2976
2977 static void
2978 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2979 {
2980 uint8_t shift;
2981 dst->file =
2982 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2983 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2984 dst->type = TGSI_RETURN_TYPE_FLOAT;
2985 dst->idx = tok & D3DSP_REGNUM_MASK;
2986 dst->rel = NULL;
2987 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2988 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2989 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2990 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2991 }
2992
2993 static void
2994 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2995 {
2996 src->file =
2997 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2998 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2999 src->type = TGSI_RETURN_TYPE_FLOAT;
3000 src->idx = tok & D3DSP_REGNUM_MASK;
3001 src->rel = NULL;
3002 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3003 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3004
3005 switch (src->file) {
3006 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3007 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3008 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3009 default:
3010 break;
3011 }
3012 }
3013
3014 static void
3015 sm1_parse_immediate(struct shader_translator *tx,
3016 struct sm1_src_param *imm)
3017 {
3018 imm->file = NINED3DSPR_IMMEDIATE;
3019 imm->idx = INT_MIN;
3020 imm->rel = NULL;
3021 imm->swizzle = NINED3DSP_NOSWIZZLE;
3022 imm->mod = 0;
3023 switch (tx->insn.opcode) {
3024 case D3DSIO_DEF:
3025 imm->type = NINED3DSPTYPE_FLOAT4;
3026 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3027 tx->parse += 4;
3028 break;
3029 case D3DSIO_DEFI:
3030 imm->type = NINED3DSPTYPE_INT4;
3031 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3032 tx->parse += 4;
3033 break;
3034 case D3DSIO_DEFB:
3035 imm->type = NINED3DSPTYPE_BOOL;
3036 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3037 tx->parse += 1;
3038 break;
3039 default:
3040 assert(0);
3041 break;
3042 }
3043 }
3044
3045 static void
3046 sm1_read_dst_param(struct shader_translator *tx,
3047 struct sm1_dst_param *dst,
3048 struct sm1_src_param *rel)
3049 {
3050 DWORD tok_dst, tok_rel = 0;
3051
3052 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3053 sm1_parse_dst_param(dst, tok_dst);
3054 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3055 sm1_parse_src_param(rel, tok_rel);
3056 dst->rel = rel;
3057 }
3058 }
3059
3060 static void
3061 sm1_read_src_param(struct shader_translator *tx,
3062 struct sm1_src_param *src,
3063 struct sm1_src_param *rel)
3064 {
3065 DWORD tok_src, tok_rel = 0;
3066
3067 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3068 sm1_parse_src_param(src, tok_src);
3069 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3070 assert(rel);
3071 sm1_parse_src_param(rel, tok_rel);
3072 src->rel = rel;
3073 }
3074 }
3075
3076 static void
3077 sm1_read_semantic(struct shader_translator *tx,
3078 struct sm1_semantic *sem)
3079 {
3080 const DWORD tok_usg = TOKEN_NEXT(tx);
3081 const DWORD tok_dst = TOKEN_NEXT(tx);
3082
3083 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3084 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3085 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3086
3087 sm1_parse_dst_param(&sem->reg, tok_dst);
3088 }
3089
3090 static void
3091 sm1_parse_instruction(struct shader_translator *tx)
3092 {
3093 struct sm1_instruction *insn = &tx->insn;
3094 DWORD tok;
3095 struct sm1_op_info *info = NULL;
3096 unsigned i;
3097
3098 sm1_parse_comments(tx, TRUE);
3099 sm1_parse_get_skip(tx);
3100
3101 tok = TOKEN_NEXT(tx);
3102
3103 insn->opcode = tok & D3DSI_OPCODE_MASK;
3104 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3105 insn->coissue = !!(tok & D3DSI_COISSUE);
3106 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3107
3108 if (insn->opcode < Elements(tx->op_info_map)) {
3109 int k = tx->op_info_map[insn->opcode];
3110 if (k >= 0) {
3111 assert(k < Elements(inst_table));
3112 info = &inst_table[k];
3113 }
3114 } else {
3115 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3116 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3117 }
3118 if (!info) {
3119 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3120 TOKEN_JUMP(tx);
3121 return;
3122 }
3123 insn->info = info;
3124 insn->ndst = info->ndst;
3125 insn->nsrc = info->nsrc;
3126
3127 assert(!insn->predicated && "TODO: predicated instructions");
3128
3129 /* check version */
3130 {
3131 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3132 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3133 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3134 if (ver < min || ver > max) {
3135 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3136 min, ver, max);
3137 return;
3138 }
3139 }
3140
3141 for (i = 0; i < insn->ndst; ++i)
3142 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3143 if (insn->predicated)
3144 sm1_read_src_param(tx, &insn->pred, NULL);
3145 for (i = 0; i < insn->nsrc; ++i)
3146 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3147
3148 /* parse here so we can dump them before processing */
3149 if (insn->opcode == D3DSIO_DEF ||
3150 insn->opcode == D3DSIO_DEFI ||
3151 insn->opcode == D3DSIO_DEFB)
3152 sm1_parse_immediate(tx, &tx->insn.src[0]);
3153
3154 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3155 sm1_instruction_check(insn);
3156
3157 if (info->handler)
3158 info->handler(tx);
3159 else
3160 NineTranslateInstruction_Generic(tx);
3161 tx_apply_dst0_modifiers(tx);
3162
3163 tx->num_scratch = 0; /* reset */
3164
3165 TOKEN_JUMP(tx);
3166 }
3167
3168 static void
3169 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3170 {
3171 unsigned i;
3172
3173 tx->info = info;
3174
3175 tx->byte_code = info->byte_code;
3176 tx->parse = info->byte_code;
3177
3178 for (i = 0; i < Elements(info->input_map); ++i)
3179 info->input_map[i] = NINE_DECLUSAGE_NONE;
3180 info->num_inputs = 0;
3181
3182 info->position_t = FALSE;
3183 info->point_size = FALSE;
3184
3185 tx->info->const_float_slots = 0;
3186 tx->info->const_int_slots = 0;
3187 tx->info->const_bool_slots = 0;
3188
3189 info->sampler_mask = 0x0;
3190 info->rt_mask = 0x0;
3191
3192 info->lconstf.data = NULL;
3193 info->lconstf.ranges = NULL;
3194
3195 info->bumpenvmat_needed = 0;
3196
3197 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3198 tx->regs.rL[i] = ureg_dst_undef();
3199 }
3200 tx->regs.address = ureg_dst_undef();
3201 tx->regs.a0 = ureg_dst_undef();
3202 tx->regs.p = ureg_dst_undef();
3203 tx->regs.oDepth = ureg_dst_undef();
3204 tx->regs.vPos = ureg_src_undef();
3205 tx->regs.vFace = ureg_src_undef();
3206 for (i = 0; i < Elements(tx->regs.o); ++i)
3207 tx->regs.o[i] = ureg_dst_undef();
3208 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3209 tx->regs.oCol[i] = ureg_dst_undef();
3210 for (i = 0; i < Elements(tx->regs.vC); ++i)
3211 tx->regs.vC[i] = ureg_src_undef();
3212 for (i = 0; i < Elements(tx->regs.vT); ++i)
3213 tx->regs.vT[i] = ureg_src_undef();
3214
3215 for (i = 0; i < Elements(tx->lconsti); ++i)
3216 tx->lconsti[i].idx = -1;
3217 for (i = 0; i < Elements(tx->lconstb); ++i)
3218 tx->lconstb[i].idx = -1;
3219
3220 sm1_read_version(tx);
3221
3222 info->version = (tx->version.major << 4) | tx->version.minor;
3223
3224 create_op_info_map(tx);
3225 }
3226
3227 static void
3228 tx_dtor(struct shader_translator *tx)
3229 {
3230 if (tx->num_inst_labels)
3231 FREE(tx->inst_labels);
3232 FREE(tx->lconstf);
3233 FREE(tx->regs.r);
3234 FREE(tx);
3235 }
3236
3237 static inline unsigned
3238 tgsi_processor_from_type(unsigned shader_type)
3239 {
3240 switch (shader_type) {
3241 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3242 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3243 default:
3244 return ~0;
3245 }
3246 }
3247
3248 static void
3249 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3250 {
3251 struct ureg_program *ureg = tx->ureg;
3252 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3253 struct ureg_src fog_end, fog_coeff, fog_density;
3254 struct ureg_src fog_vs, depth, fog_color;
3255 struct ureg_dst fog_factor;
3256
3257 if (!tx->info->fog_enable) {
3258 ureg_MOV(ureg, oCol0, src_col);
3259 return;
3260 }
3261
3262 if (tx->info->fog_mode != D3DFOG_NONE)
3263 depth = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
3264 TGSI_INTERPOLATE_LINEAR),
3265 TGSI_SWIZZLE_Z);
3266
3267 nine_info_mark_const_f_used(tx->info, 33);
3268 fog_color = NINE_CONSTANT_SRC(32);
3269 fog_factor = tx_scratch_scalar(tx);
3270
3271 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3272 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3273 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3274 ureg_SUB(ureg, fog_factor, fog_end, depth);
3275 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3276 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3277 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3278 ureg_MUL(ureg, fog_factor, depth, fog_density);
3279 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3280 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3281 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3282 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3283 ureg_MUL(ureg, fog_factor, depth, fog_density);
3284 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3285 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3286 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3287 } else {
3288 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3289 TGSI_INTERPOLATE_PERSPECTIVE),
3290 TGSI_SWIZZLE_X);
3291 ureg_MOV(ureg, fog_factor, fog_vs);
3292 }
3293
3294 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3295 tx_src_scalar(fog_factor), src_col, fog_color);
3296 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3297 }
3298
3299 #define GET_CAP(n) device->screen->get_param( \
3300 device->screen, PIPE_CAP_##n)
3301 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3302 device->screen, info->type, PIPE_SHADER_CAP_##n)
3303
3304 HRESULT
3305 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3306 {
3307 struct shader_translator *tx;
3308 HRESULT hr = D3D_OK;
3309 const unsigned processor = tgsi_processor_from_type(info->type);
3310 unsigned s, slot_max;
3311 unsigned max_const_f;
3312
3313 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3314
3315 tx = CALLOC_STRUCT(shader_translator);
3316 if (!tx)
3317 return E_OUTOFMEMORY;
3318 tx_ctor(tx, info);
3319
3320 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3321 hr = D3DERR_INVALIDCALL;
3322 DBG("Unsupported shader version: %u.%u !\n",
3323 tx->version.major, tx->version.minor);
3324 goto out;
3325 }
3326 if (tx->processor != processor) {
3327 hr = D3DERR_INVALIDCALL;
3328 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3329 goto out;
3330 }
3331 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3332 tx->version.major, tx->version.minor);
3333
3334 tx->ureg = ureg_create(processor);
3335 if (!tx->ureg) {
3336 hr = E_OUTOFMEMORY;
3337 goto out;
3338 }
3339
3340 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3341 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3342 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3343 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3344 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3345 tx->texcoord_sn = tx->want_texcoord ?
3346 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3347
3348 if (IS_VS) {
3349 tx->num_constf_allowed = NINE_MAX_CONST_F;
3350 } else if (tx->version.major < 2) {/* IS_PS v1 */
3351 tx->num_constf_allowed = 8;
3352 } else if (tx->version.major == 2) {/* IS_PS v2 */
3353 tx->num_constf_allowed = 32;
3354 } else {/* IS_PS v3 */
3355 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3356 }
3357
3358 if (tx->version.major < 2) {
3359 tx->num_consti_allowed = 0;
3360 tx->num_constb_allowed = 0;
3361 } else {
3362 tx->num_consti_allowed = NINE_MAX_CONST_I;
3363 tx->num_constb_allowed = NINE_MAX_CONST_B;
3364 }
3365
3366 /* VS must always write position. Declare it here to make it the 1st output.
3367 * (Some drivers like nv50 are buggy and rely on that.)
3368 */
3369 if (IS_VS) {
3370 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3371 } else {
3372 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3373 if (!tx->shift_wpos)
3374 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3375 }
3376
3377 while (!sm1_parse_eof(tx) && !tx->failure)
3378 sm1_parse_instruction(tx);
3379 tx->parse++; /* for byte_size */
3380
3381 if (tx->failure) {
3382 ERR("Encountered buggy shader\n");
3383 ureg_destroy(tx->ureg);
3384 hr = D3DERR_INVALIDCALL;
3385 goto out;
3386 }
3387
3388 if (IS_PS && tx->version.major < 3) {
3389 if (tx->version.major < 2) {
3390 assert(tx->num_temp); /* there must be color output */
3391 info->rt_mask |= 0x1;
3392 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3393 } else {
3394 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3395 }
3396 }
3397
3398 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3399 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3400 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3401 }
3402
3403 /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
3404 if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
3405 struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
3406 ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
3407 ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
3408 }
3409
3410 if (info->position_t)
3411 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3412
3413 ureg_END(tx->ureg);
3414
3415 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3416 info->point_size = TRUE;
3417
3418 /* record local constants */
3419 if (tx->num_lconstf && tx->indirect_const_access) {
3420 struct nine_range *ranges;
3421 float *data;
3422 int *indices;
3423 unsigned i, k, n;
3424
3425 hr = E_OUTOFMEMORY;
3426
3427 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3428 if (!data)
3429 goto out;
3430 info->lconstf.data = data;
3431
3432 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3433 if (!indices)
3434 goto out;
3435
3436 /* lazy sort, num_lconstf should be small */
3437 for (n = 0; n < tx->num_lconstf; ++n) {
3438 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3439 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3440 k = i;
3441 }
3442 indices[n] = tx->lconstf[k].idx;
3443 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3444 tx->lconstf[k].idx = INT_MAX;
3445 }
3446
3447 /* count ranges */
3448 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3449 if (indices[i] != indices[i - 1] + 1)
3450 ++n;
3451 ranges = MALLOC(n * sizeof(ranges[0]));
3452 if (!ranges) {
3453 FREE(indices);
3454 goto out;
3455 }
3456 info->lconstf.ranges = ranges;
3457
3458 k = 0;
3459 ranges[k].bgn = indices[0];
3460 for (i = 1; i < tx->num_lconstf; ++i) {
3461 if (indices[i] != indices[i - 1] + 1) {
3462 ranges[k].next = &ranges[k + 1];
3463 ranges[k].end = indices[i - 1] + 1;
3464 ++k;
3465 ranges[k].bgn = indices[i];
3466 }
3467 }
3468 ranges[k].end = indices[i - 1] + 1;
3469 ranges[k].next = NULL;
3470 assert(n == (k + 1));
3471
3472 FREE(indices);
3473 hr = D3D_OK;
3474 }
3475
3476 /* r500 */
3477 if (info->const_float_slots > device->max_vs_const_f &&
3478 (info->const_int_slots || info->const_bool_slots))
3479 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3480
3481
3482 if (tx->indirect_const_access) /* vs only */
3483 info->const_float_slots = device->max_vs_const_f;
3484
3485 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3486 slot_max = info->const_bool_slots > 0 ?
3487 max_const_f + NINE_MAX_CONST_I
3488 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3489 info->const_int_slots > 0 ?
3490 max_const_f + info->const_int_slots :
3491 info->const_float_slots;
3492
3493 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3494
3495 for (s = 0; s < slot_max; s++)
3496 ureg_DECL_constant(tx->ureg, s);
3497
3498 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3499 unsigned count;
3500 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3501 tgsi_dump(toks, 0);
3502 ureg_free_tokens(toks);
3503 }
3504
3505 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3506 if (!info->cso) {
3507 hr = D3DERR_DRIVERINTERNALERROR;
3508 FREE(info->lconstf.data);
3509 FREE(info->lconstf.ranges);
3510 goto out;
3511 }
3512
3513 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3514 out:
3515 tx_dtor(tx);
3516 return hr;
3517 }