st/nine: Fix rounding issue with vs1.1 a0 reg
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
100
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
104
105 /*
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
112 * X2 = PS 1.4 (2x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
119 */
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
134
135 static const char *sm1_mod_str[] =
136 {
137 [NINED3DSPSM_NONE] = "",
138 [NINED3DSPSM_NEG] = "-",
139 [NINED3DSPSM_BIAS] = "bias",
140 [NINED3DSPSM_BIASNEG] = "biasneg",
141 [NINED3DSPSM_SIGN] = "sign",
142 [NINED3DSPSM_SIGNNEG] = "signneg",
143 [NINED3DSPSM_COMP] = "comp",
144 [NINED3DSPSM_X2] = "x2",
145 [NINED3DSPSM_X2NEG] = "x2neg",
146 [NINED3DSPSM_DZ] = "dz",
147 [NINED3DSPSM_DW] = "dw",
148 [NINED3DSPSM_ABS] = "abs",
149 [NINED3DSPSM_ABSNEG] = "-abs",
150 [NINED3DSPSM_NOT] = "not"
151 };
152
153 static void
154 sm1_dump_writemask(BYTE mask)
155 {
156 if (mask & 1) DUMP("x"); else DUMP("_");
157 if (mask & 2) DUMP("y"); else DUMP("_");
158 if (mask & 4) DUMP("z"); else DUMP("_");
159 if (mask & 8) DUMP("w"); else DUMP("_");
160 }
161
162 static void
163 sm1_dump_swizzle(BYTE s)
164 {
165 char c[4] = { 'x', 'y', 'z', 'w' };
166 DUMP("%c%c%c%c",
167 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
168 }
169
170 static const char sm1_file_char[] =
171 {
172 [D3DSPR_TEMP] = 'r',
173 [D3DSPR_INPUT] = 'v',
174 [D3DSPR_CONST] = 'c',
175 [D3DSPR_ADDR] = 'A',
176 [D3DSPR_RASTOUT] = 'R',
177 [D3DSPR_ATTROUT] = 'D',
178 [D3DSPR_OUTPUT] = 'o',
179 [D3DSPR_CONSTINT] = 'I',
180 [D3DSPR_COLOROUT] = 'C',
181 [D3DSPR_DEPTHOUT] = 'D',
182 [D3DSPR_SAMPLER] = 's',
183 [D3DSPR_CONST2] = 'c',
184 [D3DSPR_CONST3] = 'c',
185 [D3DSPR_CONST4] = 'c',
186 [D3DSPR_CONSTBOOL] = 'B',
187 [D3DSPR_LOOP] = 'L',
188 [D3DSPR_TEMPFLOAT16] = 'h',
189 [D3DSPR_MISCTYPE] = 'M',
190 [D3DSPR_LABEL] = 'X',
191 [D3DSPR_PREDICATE] = 'p'
192 };
193
194 static void
195 sm1_dump_reg(BYTE file, INT index)
196 {
197 switch (file) {
198 case D3DSPR_LOOP:
199 DUMP("aL");
200 break;
201 case D3DSPR_COLOROUT:
202 DUMP("oC%i", index);
203 break;
204 case D3DSPR_DEPTHOUT:
205 DUMP("oDepth");
206 break;
207 case D3DSPR_RASTOUT:
208 DUMP("oRast%i", index);
209 break;
210 case D3DSPR_CONSTINT:
211 DUMP("iconst[%i]", index);
212 break;
213 case D3DSPR_CONSTBOOL:
214 DUMP("bconst[%i]", index);
215 break;
216 default:
217 DUMP("%c%i", sm1_file_char[file], index);
218 break;
219 }
220 }
221
222 struct sm1_src_param
223 {
224 INT idx;
225 struct sm1_src_param *rel;
226 BYTE file;
227 BYTE swizzle;
228 BYTE mod;
229 BYTE type;
230 union {
231 DWORD d[4];
232 float f[4];
233 int i[4];
234 BOOL b;
235 } imm;
236 };
237 static void
238 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
240 struct sm1_dst_param
241 {
242 INT idx;
243 struct sm1_src_param *rel;
244 BYTE file;
245 BYTE mask;
246 BYTE mod;
247 int8_t shift; /* sint4 */
248 BYTE type;
249 };
250
251 static inline void
252 assert_replicate_swizzle(const struct ureg_src *reg)
253 {
254 assert(reg->SwizzleY == reg->SwizzleX &&
255 reg->SwizzleZ == reg->SwizzleX &&
256 reg->SwizzleW == reg->SwizzleX);
257 }
258
259 static void
260 sm1_dump_immediate(const struct sm1_src_param *param)
261 {
262 switch (param->type) {
263 case NINED3DSPTYPE_FLOAT4:
264 DUMP("{ %f %f %f %f }",
265 param->imm.f[0], param->imm.f[1],
266 param->imm.f[2], param->imm.f[3]);
267 break;
268 case NINED3DSPTYPE_INT4:
269 DUMP("{ %i %i %i %i }",
270 param->imm.i[0], param->imm.i[1],
271 param->imm.i[2], param->imm.i[3]);
272 break;
273 case NINED3DSPTYPE_BOOL:
274 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
275 break;
276 default:
277 assert(0);
278 break;
279 }
280 }
281
282 static void
283 sm1_dump_src_param(const struct sm1_src_param *param)
284 {
285 if (param->file == NINED3DSPR_IMMEDIATE) {
286 assert(!param->mod &&
287 !param->rel &&
288 param->swizzle == NINED3DSP_NOSWIZZLE);
289 sm1_dump_immediate(param);
290 return;
291 }
292
293 if (param->mod)
294 DUMP("%s(", sm1_mod_str[param->mod]);
295 if (param->rel) {
296 DUMP("%c[", sm1_file_char[param->file]);
297 sm1_dump_src_param(param->rel);
298 DUMP("+%i]", param->idx);
299 } else {
300 sm1_dump_reg(param->file, param->idx);
301 }
302 if (param->mod)
303 DUMP(")");
304 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
305 DUMP(".");
306 sm1_dump_swizzle(param->swizzle);
307 }
308 }
309
310 static void
311 sm1_dump_dst_param(const struct sm1_dst_param *param)
312 {
313 if (param->mod & NINED3DSPDM_SATURATE)
314 DUMP("sat ");
315 if (param->mod & NINED3DSPDM_PARTIALP)
316 DUMP("pp ");
317 if (param->mod & NINED3DSPDM_CENTROID)
318 DUMP("centroid ");
319 if (param->shift < 0)
320 DUMP("/%u ", 1 << -param->shift);
321 if (param->shift > 0)
322 DUMP("*%u ", 1 << param->shift);
323
324 if (param->rel) {
325 DUMP("%c[", sm1_file_char[param->file]);
326 sm1_dump_src_param(param->rel);
327 DUMP("+%i]", param->idx);
328 } else {
329 sm1_dump_reg(param->file, param->idx);
330 }
331 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
332 DUMP(".");
333 sm1_dump_writemask(param->mask);
334 }
335 }
336
337 struct sm1_semantic
338 {
339 struct sm1_dst_param reg;
340 BYTE sampler_type;
341 D3DDECLUSAGE usage;
342 BYTE usage_idx;
343 };
344
345 struct sm1_op_info
346 {
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
349 unsigned sio;
350 unsigned opcode; /* TGSI_OPCODE_x */
351
352 /* versions are still set even handler is set */
353 struct {
354 unsigned min;
355 unsigned max;
356 } vert_version, frag_version;
357
358 /* number of regs parsed outside of special handler */
359 unsigned ndst;
360 unsigned nsrc;
361
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler;
364 };
365
366 struct sm1_instruction
367 {
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
369 BYTE flags;
370 BOOL coissue;
371 BOOL predicated;
372 BYTE ndst;
373 BYTE nsrc;
374 struct sm1_src_param src[4];
375 struct sm1_src_param src_rel[4];
376 struct sm1_src_param pred;
377 struct sm1_src_param dst_rel[1];
378 struct sm1_dst_param dst[1];
379
380 struct sm1_op_info *info;
381 };
382
383 static void
384 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
385 {
386 unsigned i;
387
388 /* no info stored for these: */
389 if (insn->opcode == D3DSIO_DCL)
390 return;
391 for (i = 0; i < indent; ++i)
392 DUMP(" ");
393
394 if (insn->predicated) {
395 DUMP("@");
396 sm1_dump_src_param(&insn->pred);
397 DUMP(" ");
398 }
399 DUMP("%s", d3dsio_to_string(insn->opcode));
400 if (insn->flags) {
401 switch (insn->opcode) {
402 case D3DSIO_TEX:
403 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
404 break;
405 default:
406 DUMP("_%x", insn->flags);
407 break;
408 }
409 }
410 if (insn->coissue)
411 DUMP("_co");
412 DUMP(" ");
413
414 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
415 sm1_dump_dst_param(&insn->dst[i]);
416 DUMP(" ");
417 }
418
419 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
420 sm1_dump_src_param(&insn->src[i]);
421 DUMP(" ");
422 }
423 if (insn->opcode == D3DSIO_DEF ||
424 insn->opcode == D3DSIO_DEFI ||
425 insn->opcode == D3DSIO_DEFB)
426 sm1_dump_immediate(&insn->src[0]);
427
428 DUMP("\n");
429 }
430
431 struct sm1_local_const
432 {
433 INT idx;
434 struct ureg_src reg;
435 union {
436 boolean b;
437 float f[4];
438 int32_t i[4];
439 } imm;
440 };
441
442 struct shader_translator
443 {
444 const DWORD *byte_code;
445 const DWORD *parse;
446 const DWORD *parse_next;
447
448 struct ureg_program *ureg;
449
450 /* shader version */
451 struct {
452 BYTE major;
453 BYTE minor;
454 } version;
455 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
456 unsigned num_constf_allowed;
457 unsigned num_consti_allowed;
458 unsigned num_constb_allowed;
459
460 boolean native_integers;
461 boolean inline_subroutines;
462 boolean lower_preds;
463 boolean want_texcoord;
464 boolean shift_wpos;
465 boolean wpos_is_sysval;
466 boolean face_is_sysval_integer;
467 unsigned texcoord_sn;
468
469 struct sm1_instruction insn; /* current instruction */
470
471 struct {
472 struct ureg_dst *r;
473 struct ureg_dst oPos;
474 struct ureg_dst oFog;
475 struct ureg_dst oPts;
476 struct ureg_dst oCol[4];
477 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
478 struct ureg_dst oDepth;
479 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
480 struct ureg_src vPos;
481 struct ureg_src vFace;
482 struct ureg_src s;
483 struct ureg_dst p;
484 struct ureg_dst address;
485 struct ureg_dst a0;
486 struct ureg_dst tS[8]; /* texture stage registers */
487 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
488 struct ureg_dst t[5]; /* scratch TEMPs */
489 struct ureg_src vC[2]; /* PS color in */
490 struct ureg_src vT[8]; /* PS texcoord in */
491 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
492 } regs;
493 unsigned num_temp; /* Elements(regs.r) */
494 unsigned num_scratch;
495 unsigned loop_depth;
496 unsigned loop_depth_max;
497 unsigned cond_depth;
498 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
499 unsigned cond_labels[NINE_MAX_COND_DEPTH];
500 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
501
502 unsigned *inst_labels; /* LABEL op */
503 unsigned num_inst_labels;
504
505 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
506
507 struct sm1_local_const *lconstf;
508 unsigned num_lconstf;
509 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
510 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
511
512 boolean indirect_const_access;
513 boolean failure;
514
515 struct nine_shader_info *info;
516
517 int16_t op_info_map[D3DSIO_BREAKP + 1];
518 };
519
520 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
521 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
522
523 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
524
525 static void
526 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
527
528 static void
529 sm1_instruction_check(const struct sm1_instruction *insn)
530 {
531 if (insn->opcode == D3DSIO_CRS)
532 {
533 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
534 {
535 DBG("CRS.mask.w\n");
536 }
537 }
538 }
539
540 static boolean
541 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
542 {
543 INT i;
544 if (index < 0 || index >= tx->num_constf_allowed) {
545 tx->failure = TRUE;
546 return FALSE;
547 }
548 for (i = 0; i < tx->num_lconstf; ++i) {
549 if (tx->lconstf[i].idx == index) {
550 *src = tx->lconstf[i].reg;
551 return TRUE;
552 }
553 }
554 return FALSE;
555 }
556 static boolean
557 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
558 {
559 if (index < 0 || index >= tx->num_consti_allowed) {
560 tx->failure = TRUE;
561 return FALSE;
562 }
563 if (tx->lconsti[index].idx == index)
564 *src = tx->lconsti[index].reg;
565 return tx->lconsti[index].idx == index;
566 }
567 static boolean
568 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
569 {
570 if (index < 0 || index >= tx->num_constb_allowed) {
571 tx->failure = TRUE;
572 return FALSE;
573 }
574 if (tx->lconstb[index].idx == index)
575 *src = tx->lconstb[index].reg;
576 return tx->lconstb[index].idx == index;
577 }
578
579 static void
580 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
581 {
582 unsigned n;
583
584 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
585
586 for (n = 0; n < tx->num_lconstf; ++n)
587 if (tx->lconstf[n].idx == index)
588 break;
589 if (n == tx->num_lconstf) {
590 if ((n % 8) == 0) {
591 tx->lconstf = REALLOC(tx->lconstf,
592 (n + 0) * sizeof(tx->lconstf[0]),
593 (n + 8) * sizeof(tx->lconstf[0]));
594 assert(tx->lconstf);
595 }
596 tx->num_lconstf++;
597 }
598 tx->lconstf[n].idx = index;
599 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
600
601 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
602 }
603 static void
604 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
605 {
606 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
607 tx->lconsti[index].idx = index;
608 tx->lconsti[index].reg = tx->native_integers ?
609 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
610 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
611 }
612 static void
613 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
614 {
615 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
616 tx->lconstb[index].idx = index;
617 tx->lconstb[index].reg = tx->native_integers ?
618 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
619 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
620 }
621
622 static inline struct ureg_dst
623 tx_scratch(struct shader_translator *tx)
624 {
625 if (tx->num_scratch >= Elements(tx->regs.t)) {
626 tx->failure = TRUE;
627 return tx->regs.t[0];
628 }
629 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
630 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
631 return tx->regs.t[tx->num_scratch++];
632 }
633
634 static inline struct ureg_dst
635 tx_scratch_scalar(struct shader_translator *tx)
636 {
637 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
638 }
639
640 static inline struct ureg_src
641 tx_src_scalar(struct ureg_dst dst)
642 {
643 struct ureg_src src = ureg_src(dst);
644 int c = ffs(dst.WriteMask) - 1;
645 if (dst.WriteMask == (1 << c))
646 src = ureg_scalar(src, c);
647 return src;
648 }
649
650 static inline void
651 tx_temp_alloc(struct shader_translator *tx, INT idx)
652 {
653 assert(idx >= 0);
654 if (idx >= tx->num_temp) {
655 unsigned k = tx->num_temp;
656 unsigned n = idx + 1;
657 tx->regs.r = REALLOC(tx->regs.r,
658 k * sizeof(tx->regs.r[0]),
659 n * sizeof(tx->regs.r[0]));
660 for (; k < n; ++k)
661 tx->regs.r[k] = ureg_dst_undef();
662 tx->num_temp = n;
663 }
664 if (ureg_dst_is_undef(tx->regs.r[idx]))
665 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
666 }
667
668 static inline void
669 tx_addr_alloc(struct shader_translator *tx, INT idx)
670 {
671 assert(idx == 0);
672 if (ureg_dst_is_undef(tx->regs.address))
673 tx->regs.address = ureg_DECL_address(tx->ureg);
674 if (ureg_dst_is_undef(tx->regs.a0))
675 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
676 }
677
678 static inline void
679 tx_pred_alloc(struct shader_translator *tx, INT idx)
680 {
681 assert(idx == 0);
682 if (ureg_dst_is_undef(tx->regs.p))
683 tx->regs.p = ureg_DECL_predicate(tx->ureg);
684 }
685
686 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
687 * the projection should be applied on the texture. It doesn't
688 * apply on texkill.
689 * The doc is very imprecise here (it says the projection is done
690 * before rasterization, thus in vs, which seems wrong since ps instructions
691 * are affected differently)
692 * For now we only apply to the ps TEX instruction and TEXBEM.
693 * Perhaps some other instructions would need it */
694 static inline void
695 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
696 struct ureg_src src, INT idx)
697 {
698 struct ureg_dst tmp;
699 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
700
701 /* no projection */
702 if (dim == 1) {
703 ureg_MOV(tx->ureg, dst, src);
704 } else {
705 tmp = tx_scratch_scalar(tx);
706 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
707 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
708 }
709 }
710
711 static inline void
712 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
713 unsigned target, struct ureg_src src0,
714 struct ureg_src src1, INT idx)
715 {
716 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
717 struct ureg_dst tmp;
718
719 /* dim == 1: no projection
720 * Looks like must be disabled when it makes no
721 * sense according the texture dimensions
722 */
723 if (dim == 1 || dim <= target) {
724 ureg_TEX(tx->ureg, dst, target, src0, src1);
725 } else if (dim == 4) {
726 ureg_TXP(tx->ureg, dst, target, src0, src1);
727 } else {
728 tmp = tx_scratch(tx);
729 apply_ps1x_projection(tx, tmp, src0, idx);
730 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
731 }
732 }
733
734 static inline void
735 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
736 {
737 assert(IS_PS);
738 assert(idx >= 0 && idx < Elements(tx->regs.vT));
739 if (ureg_src_is_undef(tx->regs.vT[idx]))
740 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
741 TGSI_INTERPOLATE_PERSPECTIVE);
742 }
743
744 static inline unsigned *
745 tx_bgnloop(struct shader_translator *tx)
746 {
747 tx->loop_depth++;
748 if (tx->loop_depth_max < tx->loop_depth)
749 tx->loop_depth_max = tx->loop_depth;
750 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
751 return &tx->loop_labels[tx->loop_depth - 1];
752 }
753
754 static inline unsigned *
755 tx_endloop(struct shader_translator *tx)
756 {
757 assert(tx->loop_depth);
758 tx->loop_depth--;
759 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
760 ureg_get_instruction_number(tx->ureg));
761 return &tx->loop_labels[tx->loop_depth];
762 }
763
764 static struct ureg_dst
765 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
766 {
767 const unsigned l = tx->loop_depth - 1;
768
769 if (!tx->loop_depth)
770 {
771 DBG("loop counter requested outside of loop\n");
772 return ureg_dst_undef();
773 }
774
775 if (ureg_dst_is_undef(tx->regs.rL[l])) {
776 /* loop or rep ctr creation */
777 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
778 tx->loop_or_rep[l] = loop_or_rep;
779 }
780 /* loop - rep - endloop - endrep not allowed */
781 assert(tx->loop_or_rep[l] == loop_or_rep);
782
783 return tx->regs.rL[l];
784 }
785
786 static struct ureg_src
787 tx_get_loopal(struct shader_translator *tx)
788 {
789 int loop_level = tx->loop_depth - 1;
790
791 while (loop_level >= 0) {
792 /* handle loop - rep - endrep - endloop case */
793 if (tx->loop_or_rep[loop_level])
794 /* the value is in the loop counter y component (nine implementation) */
795 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
796 loop_level--;
797 }
798
799 DBG("aL counter requested outside of loop\n");
800 return ureg_src_undef();
801 }
802
803 static inline unsigned *
804 tx_cond(struct shader_translator *tx)
805 {
806 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
807 tx->cond_depth++;
808 return &tx->cond_labels[tx->cond_depth - 1];
809 }
810
811 static inline unsigned *
812 tx_elsecond(struct shader_translator *tx)
813 {
814 assert(tx->cond_depth);
815 return &tx->cond_labels[tx->cond_depth - 1];
816 }
817
818 static inline void
819 tx_endcond(struct shader_translator *tx)
820 {
821 assert(tx->cond_depth);
822 tx->cond_depth--;
823 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
824 ureg_get_instruction_number(tx->ureg));
825 }
826
827 static inline struct ureg_dst
828 nine_ureg_dst_register(unsigned file, int index)
829 {
830 return ureg_dst(ureg_src_register(file, index));
831 }
832
833 static struct ureg_src
834 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
835 {
836 struct ureg_program *ureg = tx->ureg;
837 struct ureg_src src;
838 struct ureg_dst tmp;
839
840 switch (param->file)
841 {
842 case D3DSPR_TEMP:
843 assert(!param->rel);
844 tx_temp_alloc(tx, param->idx);
845 src = ureg_src(tx->regs.r[param->idx]);
846 break;
847 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
848 case D3DSPR_ADDR:
849 assert(!param->rel);
850 if (IS_VS) {
851 assert(param->idx == 0);
852 /* the address register (vs only) must be
853 * assigned before use */
854 assert(!ureg_dst_is_undef(tx->regs.a0));
855 /* Round to lowest for vs1.1 (contrary to the doc), else
856 * round to nearest */
857 if (tx->version.major < 2 && tx->version.minor < 2)
858 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
859 else
860 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
861 src = ureg_src(tx->regs.address);
862 } else {
863 if (tx->version.major < 2 && tx->version.minor < 4) {
864 /* no subroutines, so should be defined */
865 src = ureg_src(tx->regs.tS[param->idx]);
866 } else {
867 tx_texcoord_alloc(tx, param->idx);
868 src = tx->regs.vT[param->idx];
869 }
870 }
871 break;
872 case D3DSPR_INPUT:
873 if (IS_VS) {
874 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
875 } else {
876 if (tx->version.major < 3) {
877 assert(!param->rel);
878 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
879 param->idx,
880 TGSI_INTERPOLATE_PERSPECTIVE);
881 } else {
882 assert(!param->rel); /* TODO */
883 assert(param->idx < Elements(tx->regs.v));
884 src = tx->regs.v[param->idx];
885 }
886 }
887 break;
888 case D3DSPR_PREDICATE:
889 assert(!param->rel);
890 tx_pred_alloc(tx, param->idx);
891 src = ureg_src(tx->regs.p);
892 break;
893 case D3DSPR_SAMPLER:
894 assert(param->mod == NINED3DSPSM_NONE);
895 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
896 assert(!param->rel);
897 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
898 break;
899 case D3DSPR_CONST:
900 assert(!param->rel || IS_VS);
901 if (param->rel)
902 tx->indirect_const_access = TRUE;
903 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
904 if (!param->rel)
905 nine_info_mark_const_f_used(tx->info, param->idx);
906 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
907 }
908 if (!IS_VS && tx->version.major < 2) {
909 /* ps 1.X clamps constants */
910 tmp = tx_scratch(tx);
911 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
912 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
913 src = ureg_src(tmp);
914 }
915 break;
916 case D3DSPR_CONST2:
917 case D3DSPR_CONST3:
918 case D3DSPR_CONST4:
919 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
920 assert(!"CONST2/3/4");
921 src = ureg_imm1f(ureg, 0.0f);
922 break;
923 case D3DSPR_CONSTINT:
924 /* relative adressing only possible for float constants in vs */
925 assert(!param->rel);
926 if (!tx_lconsti(tx, &src, param->idx)) {
927 nine_info_mark_const_i_used(tx->info, param->idx);
928 src = ureg_src_register(TGSI_FILE_CONSTANT,
929 tx->info->const_i_base + param->idx);
930 }
931 break;
932 case D3DSPR_CONSTBOOL:
933 assert(!param->rel);
934 if (!tx_lconstb(tx, &src, param->idx)) {
935 char r = param->idx / 4;
936 char s = param->idx & 3;
937 nine_info_mark_const_b_used(tx->info, param->idx);
938 src = ureg_src_register(TGSI_FILE_CONSTANT,
939 tx->info->const_b_base + r);
940 src = ureg_swizzle(src, s, s, s, s);
941 }
942 break;
943 case D3DSPR_LOOP:
944 if (ureg_dst_is_undef(tx->regs.address))
945 tx->regs.address = ureg_DECL_address(ureg);
946 if (!tx->native_integers)
947 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
948 else
949 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
950 src = ureg_src(tx->regs.address);
951 break;
952 case D3DSPR_MISCTYPE:
953 switch (param->idx) {
954 case D3DSMO_POSITION:
955 if (ureg_src_is_undef(tx->regs.vPos)) {
956 if (tx->wpos_is_sysval) {
957 tx->regs.vPos =
958 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
959 } else {
960 tx->regs.vPos =
961 ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
962 TGSI_INTERPOLATE_LINEAR);
963 }
964 }
965 if (tx->shift_wpos) {
966 /* TODO: do this only once */
967 struct ureg_dst wpos = tx_scratch(tx);
968 ureg_SUB(ureg, wpos, tx->regs.vPos,
969 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
970 src = ureg_src(wpos);
971 } else {
972 src = tx->regs.vPos;
973 }
974 break;
975 case D3DSMO_FACE:
976 if (ureg_src_is_undef(tx->regs.vFace)) {
977 if (tx->face_is_sysval_integer) {
978 tmp = tx_scratch(tx);
979 tx->regs.vFace =
980 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
981
982 /* convert bool to float */
983 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
984 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
985 tx->regs.vFace = ureg_src(tmp);
986 } else {
987 tx->regs.vFace = ureg_DECL_fs_input(ureg,
988 TGSI_SEMANTIC_FACE, 0,
989 TGSI_INTERPOLATE_CONSTANT);
990 }
991 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
992 }
993 src = tx->regs.vFace;
994 break;
995 default:
996 assert(!"invalid src D3DSMO");
997 break;
998 }
999 assert(!param->rel);
1000 break;
1001 case D3DSPR_TEMPFLOAT16:
1002 break;
1003 default:
1004 assert(!"invalid src D3DSPR");
1005 }
1006 if (param->rel)
1007 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1008
1009 switch (param->mod) {
1010 case NINED3DSPSM_DW:
1011 tmp = tx_scratch(tx);
1012 /* NOTE: app is not allowed to read w with this modifier */
1013 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1014 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1015 src = ureg_src(tmp);
1016 break;
1017 case NINED3DSPSM_DZ:
1018 tmp = tx_scratch(tx);
1019 /* NOTE: app is not allowed to read z with this modifier */
1020 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1021 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1022 src = ureg_src(tmp);
1023 break;
1024 default:
1025 break;
1026 }
1027
1028 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1029 src = ureg_swizzle(src,
1030 (param->swizzle >> 0) & 0x3,
1031 (param->swizzle >> 2) & 0x3,
1032 (param->swizzle >> 4) & 0x3,
1033 (param->swizzle >> 6) & 0x3);
1034
1035 switch (param->mod) {
1036 case NINED3DSPSM_ABS:
1037 src = ureg_abs(src);
1038 break;
1039 case NINED3DSPSM_ABSNEG:
1040 src = ureg_negate(ureg_abs(src));
1041 break;
1042 case NINED3DSPSM_NEG:
1043 src = ureg_negate(src);
1044 break;
1045 case NINED3DSPSM_BIAS:
1046 tmp = tx_scratch(tx);
1047 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
1048 src = ureg_src(tmp);
1049 break;
1050 case NINED3DSPSM_BIASNEG:
1051 tmp = tx_scratch(tx);
1052 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
1053 src = ureg_src(tmp);
1054 break;
1055 case NINED3DSPSM_NOT:
1056 if (tx->native_integers) {
1057 tmp = tx_scratch(tx);
1058 ureg_NOT(ureg, tmp, src);
1059 src = ureg_src(tmp);
1060 break;
1061 }
1062 /* fall through */
1063 case NINED3DSPSM_COMP:
1064 tmp = tx_scratch(tx);
1065 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1066 src = ureg_src(tmp);
1067 break;
1068 case NINED3DSPSM_DZ:
1069 case NINED3DSPSM_DW:
1070 /* Already handled*/
1071 break;
1072 case NINED3DSPSM_SIGN:
1073 tmp = tx_scratch(tx);
1074 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1075 src = ureg_src(tmp);
1076 break;
1077 case NINED3DSPSM_SIGNNEG:
1078 tmp = tx_scratch(tx);
1079 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1080 src = ureg_src(tmp);
1081 break;
1082 case NINED3DSPSM_X2:
1083 tmp = tx_scratch(tx);
1084 ureg_ADD(ureg, tmp, src, src);
1085 src = ureg_src(tmp);
1086 break;
1087 case NINED3DSPSM_X2NEG:
1088 tmp = tx_scratch(tx);
1089 ureg_ADD(ureg, tmp, src, src);
1090 src = ureg_negate(ureg_src(tmp));
1091 break;
1092 default:
1093 assert(param->mod == NINED3DSPSM_NONE);
1094 break;
1095 }
1096
1097 return src;
1098 }
1099
1100 static struct ureg_dst
1101 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1102 {
1103 struct ureg_dst dst;
1104
1105 switch (param->file)
1106 {
1107 case D3DSPR_TEMP:
1108 assert(!param->rel);
1109 tx_temp_alloc(tx, param->idx);
1110 dst = tx->regs.r[param->idx];
1111 break;
1112 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1113 case D3DSPR_ADDR:
1114 assert(!param->rel);
1115 if (tx->version.major < 2 && !IS_VS) {
1116 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1117 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1118 dst = tx->regs.tS[param->idx];
1119 } else
1120 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1121 tx_texcoord_alloc(tx, param->idx);
1122 dst = ureg_dst(tx->regs.vT[param->idx]);
1123 } else {
1124 tx_addr_alloc(tx, param->idx);
1125 dst = tx->regs.a0;
1126 }
1127 break;
1128 case D3DSPR_RASTOUT:
1129 assert(!param->rel);
1130 switch (param->idx) {
1131 case 0:
1132 if (ureg_dst_is_undef(tx->regs.oPos))
1133 tx->regs.oPos =
1134 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1135 dst = tx->regs.oPos;
1136 break;
1137 case 1:
1138 if (ureg_dst_is_undef(tx->regs.oFog))
1139 tx->regs.oFog =
1140 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1141 dst = tx->regs.oFog;
1142 break;
1143 case 2:
1144 if (ureg_dst_is_undef(tx->regs.oPts))
1145 tx->regs.oPts =
1146 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1147 dst = tx->regs.oPts;
1148 break;
1149 default:
1150 assert(0);
1151 break;
1152 }
1153 break;
1154 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1155 case D3DSPR_OUTPUT:
1156 if (tx->version.major < 3) {
1157 assert(!param->rel);
1158 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1159 } else {
1160 assert(!param->rel); /* TODO */
1161 assert(param->idx < Elements(tx->regs.o));
1162 dst = tx->regs.o[param->idx];
1163 }
1164 break;
1165 case D3DSPR_ATTROUT: /* VS */
1166 case D3DSPR_COLOROUT: /* PS */
1167 assert(param->idx >= 0 && param->idx < 4);
1168 assert(!param->rel);
1169 tx->info->rt_mask |= 1 << param->idx;
1170 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1171 /* ps < 3: oCol[0] will have fog blending afterward */
1172 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1173 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1174 } else {
1175 tx->regs.oCol[param->idx] =
1176 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1177 }
1178 }
1179 dst = tx->regs.oCol[param->idx];
1180 if (IS_VS && tx->version.major < 3)
1181 dst = ureg_saturate(dst);
1182 break;
1183 case D3DSPR_DEPTHOUT:
1184 assert(!param->rel);
1185 if (ureg_dst_is_undef(tx->regs.oDepth))
1186 tx->regs.oDepth =
1187 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1188 TGSI_WRITEMASK_Z, 0, 1);
1189 dst = tx->regs.oDepth; /* XXX: must write .z component */
1190 break;
1191 case D3DSPR_PREDICATE:
1192 assert(!param->rel);
1193 tx_pred_alloc(tx, param->idx);
1194 dst = tx->regs.p;
1195 break;
1196 case D3DSPR_TEMPFLOAT16:
1197 DBG("unhandled D3DSPR: %u\n", param->file);
1198 break;
1199 default:
1200 assert(!"invalid dst D3DSPR");
1201 break;
1202 }
1203 if (param->rel)
1204 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1205
1206 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1207 dst = ureg_writemask(dst, param->mask);
1208 if (param->mod & NINED3DSPDM_SATURATE)
1209 dst = ureg_saturate(dst);
1210
1211 return dst;
1212 }
1213
1214 static struct ureg_dst
1215 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1216 {
1217 if (param->shift) {
1218 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1219 return tx->regs.tdst;
1220 }
1221 return _tx_dst_param(tx, param);
1222 }
1223
1224 static void
1225 tx_apply_dst0_modifiers(struct shader_translator *tx)
1226 {
1227 struct ureg_dst rdst;
1228 float f;
1229
1230 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1231 return;
1232 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1233
1234 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1235
1236 if (tx->insn.dst[0].shift < 0)
1237 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1238 else
1239 f = 1 << tx->insn.dst[0].shift;
1240
1241 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1242 }
1243
1244 static struct ureg_src
1245 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1246 {
1247 struct ureg_src src;
1248
1249 assert(!param->shift);
1250 assert(!(param->mod & NINED3DSPDM_SATURATE));
1251
1252 switch (param->file) {
1253 case D3DSPR_INPUT:
1254 if (IS_VS) {
1255 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1256 } else {
1257 assert(!param->rel);
1258 assert(param->idx < Elements(tx->regs.v));
1259 src = tx->regs.v[param->idx];
1260 }
1261 break;
1262 default:
1263 src = ureg_src(tx_dst_param(tx, param));
1264 break;
1265 }
1266 if (param->rel)
1267 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1268
1269 if (!param->mask)
1270 WARN("mask is 0, using identity swizzle\n");
1271
1272 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1273 char s[4];
1274 int n;
1275 int c;
1276 for (n = 0, c = 0; c < 4; ++c)
1277 if (param->mask & (1 << c))
1278 s[n++] = c;
1279 assert(n);
1280 for (c = n; c < 4; ++c)
1281 s[c] = s[n - 1];
1282 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1283 }
1284 return src;
1285 }
1286
1287 static HRESULT
1288 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1289 {
1290 struct ureg_program *ureg = tx->ureg;
1291 struct ureg_dst dst;
1292 struct ureg_src src[2];
1293 struct sm1_src_param *src_mat = &tx->insn.src[1];
1294 unsigned i;
1295
1296 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1297 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1298
1299 for (i = 0; i < n; i++)
1300 {
1301 const unsigned m = (1 << i);
1302
1303 src[1] = tx_src_param(tx, src_mat);
1304 src_mat->idx++;
1305
1306 if (!(dst.WriteMask & m))
1307 continue;
1308
1309 /* XXX: src == dst case ? */
1310
1311 switch (k) {
1312 case 3:
1313 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1314 break;
1315 case 4:
1316 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1317 break;
1318 default:
1319 DBG("invalid operation: M%ux%u\n", m, n);
1320 break;
1321 }
1322 }
1323
1324 return D3D_OK;
1325 }
1326
1327 #define VNOTSUPPORTED 0, 0
1328 #define V(maj, min) (((maj) << 8) | (min))
1329
1330 static inline const char *
1331 d3dsio_to_string( unsigned opcode )
1332 {
1333 static const char *names[] = {
1334 "NOP",
1335 "MOV",
1336 "ADD",
1337 "SUB",
1338 "MAD",
1339 "MUL",
1340 "RCP",
1341 "RSQ",
1342 "DP3",
1343 "DP4",
1344 "MIN",
1345 "MAX",
1346 "SLT",
1347 "SGE",
1348 "EXP",
1349 "LOG",
1350 "LIT",
1351 "DST",
1352 "LRP",
1353 "FRC",
1354 "M4x4",
1355 "M4x3",
1356 "M3x4",
1357 "M3x3",
1358 "M3x2",
1359 "CALL",
1360 "CALLNZ",
1361 "LOOP",
1362 "RET",
1363 "ENDLOOP",
1364 "LABEL",
1365 "DCL",
1366 "POW",
1367 "CRS",
1368 "SGN",
1369 "ABS",
1370 "NRM",
1371 "SINCOS",
1372 "REP",
1373 "ENDREP",
1374 "IF",
1375 "IFC",
1376 "ELSE",
1377 "ENDIF",
1378 "BREAK",
1379 "BREAKC",
1380 "MOVA",
1381 "DEFB",
1382 "DEFI",
1383 NULL,
1384 NULL,
1385 NULL,
1386 NULL,
1387 NULL,
1388 NULL,
1389 NULL,
1390 NULL,
1391 NULL,
1392 NULL,
1393 NULL,
1394 NULL,
1395 NULL,
1396 NULL,
1397 NULL,
1398 "TEXCOORD",
1399 "TEXKILL",
1400 "TEX",
1401 "TEXBEM",
1402 "TEXBEML",
1403 "TEXREG2AR",
1404 "TEXREG2GB",
1405 "TEXM3x2PAD",
1406 "TEXM3x2TEX",
1407 "TEXM3x3PAD",
1408 "TEXM3x3TEX",
1409 NULL,
1410 "TEXM3x3SPEC",
1411 "TEXM3x3VSPEC",
1412 "EXPP",
1413 "LOGP",
1414 "CND",
1415 "DEF",
1416 "TEXREG2RGB",
1417 "TEXDP3TEX",
1418 "TEXM3x2DEPTH",
1419 "TEXDP3",
1420 "TEXM3x3",
1421 "TEXDEPTH",
1422 "CMP",
1423 "BEM",
1424 "DP2ADD",
1425 "DSX",
1426 "DSY",
1427 "TEXLDD",
1428 "SETP",
1429 "TEXLDL",
1430 "BREAKP"
1431 };
1432
1433 if (opcode < Elements(names)) return names[opcode];
1434
1435 switch (opcode) {
1436 case D3DSIO_PHASE: return "PHASE";
1437 case D3DSIO_COMMENT: return "COMMENT";
1438 case D3DSIO_END: return "END";
1439 default:
1440 return NULL;
1441 }
1442 }
1443
1444 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1445 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1446 (inst).vert_version.max | \
1447 (inst).frag_version.min | \
1448 (inst).frag_version.max)
1449
1450 #define SPECIAL(name) \
1451 NineTranslateInstruction_##name
1452
1453 #define DECL_SPECIAL(name) \
1454 static HRESULT \
1455 NineTranslateInstruction_##name( struct shader_translator *tx )
1456
1457 static HRESULT
1458 NineTranslateInstruction_Generic(struct shader_translator *);
1459
1460 DECL_SPECIAL(M4x4)
1461 {
1462 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1463 }
1464
1465 DECL_SPECIAL(M4x3)
1466 {
1467 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1468 }
1469
1470 DECL_SPECIAL(M3x4)
1471 {
1472 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1473 }
1474
1475 DECL_SPECIAL(M3x3)
1476 {
1477 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1478 }
1479
1480 DECL_SPECIAL(M3x2)
1481 {
1482 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1483 }
1484
1485 DECL_SPECIAL(CMP)
1486 {
1487 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1488 tx_src_param(tx, &tx->insn.src[0]),
1489 tx_src_param(tx, &tx->insn.src[2]),
1490 tx_src_param(tx, &tx->insn.src[1]));
1491 return D3D_OK;
1492 }
1493
1494 DECL_SPECIAL(CND)
1495 {
1496 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1497 struct ureg_dst cgt;
1498 struct ureg_src cnd;
1499
1500 /* the coissue flag was a tip for compilers to advise to
1501 * execute two operations at the same time, in cases
1502 * the two executions had same dst with different channels.
1503 * It has no effect on current hw. However it seems CND
1504 * is affected. The handling of this very specific case
1505 * handled below mimick wine behaviour */
1506 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1507 ureg_MOV(tx->ureg,
1508 dst, tx_src_param(tx, &tx->insn.src[1]));
1509 return D3D_OK;
1510 }
1511
1512 cnd = tx_src_param(tx, &tx->insn.src[0]);
1513 cgt = tx_scratch(tx);
1514
1515 if (tx->version.major == 1 && tx->version.minor < 4)
1516 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1517
1518 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1519
1520 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1521 tx_src_param(tx, &tx->insn.src[1]),
1522 tx_src_param(tx, &tx->insn.src[2]));
1523 return D3D_OK;
1524 }
1525
1526 DECL_SPECIAL(CALL)
1527 {
1528 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1529 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1530 return D3D_OK;
1531 }
1532
1533 DECL_SPECIAL(CALLNZ)
1534 {
1535 struct ureg_program *ureg = tx->ureg;
1536 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1537
1538 if (!tx->native_integers)
1539 ureg_IF(ureg, src, tx_cond(tx));
1540 else
1541 ureg_UIF(ureg, src, tx_cond(tx));
1542 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1543 tx_endcond(tx);
1544 ureg_ENDIF(ureg);
1545 return D3D_OK;
1546 }
1547
1548 DECL_SPECIAL(LOOP)
1549 {
1550 struct ureg_program *ureg = tx->ureg;
1551 unsigned *label;
1552 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1553 struct ureg_dst ctr;
1554 struct ureg_dst tmp;
1555 struct ureg_src ctrx;
1556
1557 label = tx_bgnloop(tx);
1558 ctr = tx_get_loopctr(tx, TRUE);
1559 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1560
1561 /* src: num_iterations - start_value of al - step for al - 0 */
1562 ureg_MOV(ureg, ctr, src);
1563 ureg_BGNLOOP(tx->ureg, label);
1564 tmp = tx_scratch_scalar(tx);
1565 /* Initially ctr.x contains the number of iterations.
1566 * ctr.y will contain the updated value of al.
1567 * We decrease ctr.x at the end of every iteration,
1568 * and stop when it reaches 0. */
1569
1570 if (!tx->native_integers) {
1571 /* case src and ctr contain floats */
1572 /* to avoid precision issue, we stop when ctr <= 0.5 */
1573 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1574 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1575 } else {
1576 /* case src and ctr contain integers */
1577 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1578 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1579 }
1580 ureg_BRK(ureg);
1581 tx_endcond(tx);
1582 ureg_ENDIF(ureg);
1583 return D3D_OK;
1584 }
1585
1586 DECL_SPECIAL(RET)
1587 {
1588 ureg_RET(tx->ureg);
1589 return D3D_OK;
1590 }
1591
1592 DECL_SPECIAL(ENDLOOP)
1593 {
1594 struct ureg_program *ureg = tx->ureg;
1595 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1596 struct ureg_dst dst_ctrx, dst_al;
1597 struct ureg_src src_ctr, al_counter;
1598
1599 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1600 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1601 src_ctr = ureg_src(ctr);
1602 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1603
1604 /* ctr.x -= 1
1605 * ctr.y (aL) += step */
1606 if (!tx->native_integers) {
1607 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1608 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1609 } else {
1610 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1611 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1612 }
1613 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1614 return D3D_OK;
1615 }
1616
1617 DECL_SPECIAL(LABEL)
1618 {
1619 unsigned k = tx->num_inst_labels;
1620 unsigned n = tx->insn.src[0].idx;
1621 assert(n < 2048);
1622 if (n >= k)
1623 tx->inst_labels = REALLOC(tx->inst_labels,
1624 k * sizeof(tx->inst_labels[0]),
1625 n * sizeof(tx->inst_labels[0]));
1626
1627 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1628 return D3D_OK;
1629 }
1630
1631 DECL_SPECIAL(SINCOS)
1632 {
1633 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1634 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1635
1636 assert(!(dst.WriteMask & 0xc));
1637
1638 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1639 ureg_SCS(tx->ureg, dst, src);
1640 return D3D_OK;
1641 }
1642
1643 DECL_SPECIAL(SGN)
1644 {
1645 ureg_SSG(tx->ureg,
1646 tx_dst_param(tx, &tx->insn.dst[0]),
1647 tx_src_param(tx, &tx->insn.src[0]));
1648 return D3D_OK;
1649 }
1650
1651 DECL_SPECIAL(REP)
1652 {
1653 struct ureg_program *ureg = tx->ureg;
1654 unsigned *label;
1655 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1656 struct ureg_dst ctr;
1657 struct ureg_dst tmp;
1658 struct ureg_src ctrx;
1659
1660 label = tx_bgnloop(tx);
1661 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1662 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1663
1664 /* NOTE: rep must be constant, so we don't have to save the count */
1665 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1666
1667 /* rep: num_iterations - 0 - 0 - 0 */
1668 ureg_MOV(ureg, ctr, rep);
1669 ureg_BGNLOOP(ureg, label);
1670 tmp = tx_scratch_scalar(tx);
1671 /* Initially ctr.x contains the number of iterations.
1672 * We decrease ctr.x at the end of every iteration,
1673 * and stop when it reaches 0. */
1674
1675 if (!tx->native_integers) {
1676 /* case src and ctr contain floats */
1677 /* to avoid precision issue, we stop when ctr <= 0.5 */
1678 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1679 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1680 } else {
1681 /* case src and ctr contain integers */
1682 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1683 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1684 }
1685 ureg_BRK(ureg);
1686 tx_endcond(tx);
1687 ureg_ENDIF(ureg);
1688
1689 return D3D_OK;
1690 }
1691
1692 DECL_SPECIAL(ENDREP)
1693 {
1694 struct ureg_program *ureg = tx->ureg;
1695 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1696 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1697 struct ureg_src src_ctr = ureg_src(ctr);
1698
1699 /* ctr.x -= 1 */
1700 if (!tx->native_integers)
1701 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1702 else
1703 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1704
1705 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1706 return D3D_OK;
1707 }
1708
1709 DECL_SPECIAL(ENDIF)
1710 {
1711 tx_endcond(tx);
1712 ureg_ENDIF(tx->ureg);
1713 return D3D_OK;
1714 }
1715
1716 DECL_SPECIAL(IF)
1717 {
1718 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1719
1720 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1721 ureg_UIF(tx->ureg, src, tx_cond(tx));
1722 else
1723 ureg_IF(tx->ureg, src, tx_cond(tx));
1724
1725 return D3D_OK;
1726 }
1727
1728 static inline unsigned
1729 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1730 {
1731 switch (flags) {
1732 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1733 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1734 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1735 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1736 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1737 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1738 default:
1739 assert(!"invalid comparison flags");
1740 return TGSI_OPCODE_SGT;
1741 }
1742 }
1743
1744 DECL_SPECIAL(IFC)
1745 {
1746 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1747 struct ureg_src src[2];
1748 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1749 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1750 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1751 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1752 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1753 return D3D_OK;
1754 }
1755
1756 DECL_SPECIAL(ELSE)
1757 {
1758 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1759 return D3D_OK;
1760 }
1761
1762 DECL_SPECIAL(BREAKC)
1763 {
1764 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1765 struct ureg_src src[2];
1766 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1767 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1768 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1769 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1770 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1771 ureg_BRK(tx->ureg);
1772 tx_endcond(tx);
1773 ureg_ENDIF(tx->ureg);
1774 return D3D_OK;
1775 }
1776
1777 static const char *sm1_declusage_names[] =
1778 {
1779 [D3DDECLUSAGE_POSITION] = "POSITION",
1780 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1781 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1782 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1783 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1784 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1785 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1786 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1787 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1788 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1789 [D3DDECLUSAGE_COLOR] = "COLOR",
1790 [D3DDECLUSAGE_FOG] = "FOG",
1791 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1792 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1793 };
1794
1795 static inline unsigned
1796 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1797 {
1798 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1799 }
1800
1801 static void
1802 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1803 boolean tc,
1804 struct sm1_semantic *dcl)
1805 {
1806 BYTE index = dcl->usage_idx;
1807
1808 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1809 * we match to a TGSI_SEMANTIC_GENERIC with index.
1810 *
1811 * The index can be anything UINT16 and usage_idx is BYTE,
1812 * so we can fit everything. It doesn't matter if indices
1813 * are close together or low.
1814 *
1815 *
1816 * POSITION >= 1: 10 * index + 6
1817 * COLOR >= 2: 10 * (index-1) + 7
1818 * TEXCOORD[0..15]: index
1819 * BLENDWEIGHT: 10 * index + 18
1820 * BLENDINDICES: 10 * index + 19
1821 * NORMAL: 10 * index + 20
1822 * TANGENT: 10 * index + 21
1823 * BINORMAL: 10 * index + 22
1824 * TESSFACTOR: 10 * index + 23
1825 */
1826
1827 switch (dcl->usage) {
1828 case D3DDECLUSAGE_POSITION:
1829 case D3DDECLUSAGE_POSITIONT:
1830 case D3DDECLUSAGE_DEPTH:
1831 if (index == 0) {
1832 sem->Name = TGSI_SEMANTIC_POSITION;
1833 sem->Index = 0;
1834 } else {
1835 sem->Name = TGSI_SEMANTIC_GENERIC;
1836 sem->Index = 10 * index + 6;
1837 }
1838 break;
1839 case D3DDECLUSAGE_COLOR:
1840 if (index < 2) {
1841 sem->Name = TGSI_SEMANTIC_COLOR;
1842 sem->Index = index;
1843 } else {
1844 sem->Name = TGSI_SEMANTIC_GENERIC;
1845 sem->Index = 10 * (index-1) + 7;
1846 }
1847 break;
1848 case D3DDECLUSAGE_FOG:
1849 assert(index == 0);
1850 sem->Name = TGSI_SEMANTIC_FOG;
1851 sem->Index = 0;
1852 break;
1853 case D3DDECLUSAGE_PSIZE:
1854 assert(index == 0);
1855 sem->Name = TGSI_SEMANTIC_PSIZE;
1856 sem->Index = 0;
1857 break;
1858 case D3DDECLUSAGE_TEXCOORD:
1859 assert(index < 16);
1860 if (index < 8 && tc)
1861 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1862 else
1863 sem->Name = TGSI_SEMANTIC_GENERIC;
1864 sem->Index = index;
1865 break;
1866 case D3DDECLUSAGE_BLENDWEIGHT:
1867 sem->Name = TGSI_SEMANTIC_GENERIC;
1868 sem->Index = 10 * index + 18;
1869 break;
1870 case D3DDECLUSAGE_BLENDINDICES:
1871 sem->Name = TGSI_SEMANTIC_GENERIC;
1872 sem->Index = 10 * index + 19;
1873 break;
1874 case D3DDECLUSAGE_NORMAL:
1875 sem->Name = TGSI_SEMANTIC_GENERIC;
1876 sem->Index = 10 * index + 20;
1877 break;
1878 case D3DDECLUSAGE_TANGENT:
1879 sem->Name = TGSI_SEMANTIC_GENERIC;
1880 sem->Index = 10 * index + 21;
1881 break;
1882 case D3DDECLUSAGE_BINORMAL:
1883 sem->Name = TGSI_SEMANTIC_GENERIC;
1884 sem->Index = 10 * index + 22;
1885 break;
1886 case D3DDECLUSAGE_TESSFACTOR:
1887 sem->Name = TGSI_SEMANTIC_GENERIC;
1888 sem->Index = 10 * index + 23;
1889 break;
1890 case D3DDECLUSAGE_SAMPLE:
1891 sem->Name = TGSI_SEMANTIC_COUNT;
1892 sem->Index = 0;
1893 break;
1894 default:
1895 unreachable(!"Invalid DECLUSAGE.");
1896 break;
1897 }
1898 }
1899
1900 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1901 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1902 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1903 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1904 static inline unsigned
1905 d3dstt_to_tgsi_tex(BYTE sampler_type)
1906 {
1907 switch (sampler_type) {
1908 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1909 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1910 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1911 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1912 default:
1913 assert(0);
1914 return TGSI_TEXTURE_UNKNOWN;
1915 }
1916 }
1917 static inline unsigned
1918 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1919 {
1920 switch (sampler_type) {
1921 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1922 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1923 case NINED3DSTT_VOLUME:
1924 case NINED3DSTT_CUBE:
1925 default:
1926 assert(0);
1927 return TGSI_TEXTURE_UNKNOWN;
1928 }
1929 }
1930 static inline unsigned
1931 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1932 {
1933 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1934 case 1: return TGSI_TEXTURE_1D;
1935 case 0: return TGSI_TEXTURE_2D;
1936 case 3: return TGSI_TEXTURE_3D;
1937 default:
1938 return TGSI_TEXTURE_CUBE;
1939 }
1940 }
1941
1942 static const char *
1943 sm1_sampler_type_name(BYTE sampler_type)
1944 {
1945 switch (sampler_type) {
1946 case NINED3DSTT_1D: return "1D";
1947 case NINED3DSTT_2D: return "2D";
1948 case NINED3DSTT_VOLUME: return "VOLUME";
1949 case NINED3DSTT_CUBE: return "CUBE";
1950 default:
1951 return "(D3DSTT_?)";
1952 }
1953 }
1954
1955 static inline unsigned
1956 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1957 {
1958 switch (sem->Name) {
1959 case TGSI_SEMANTIC_POSITION:
1960 case TGSI_SEMANTIC_NORMAL:
1961 return TGSI_INTERPOLATE_LINEAR;
1962 case TGSI_SEMANTIC_BCOLOR:
1963 case TGSI_SEMANTIC_COLOR:
1964 case TGSI_SEMANTIC_FOG:
1965 case TGSI_SEMANTIC_GENERIC:
1966 case TGSI_SEMANTIC_TEXCOORD:
1967 case TGSI_SEMANTIC_CLIPDIST:
1968 case TGSI_SEMANTIC_CLIPVERTEX:
1969 return TGSI_INTERPOLATE_PERSPECTIVE;
1970 case TGSI_SEMANTIC_EDGEFLAG:
1971 case TGSI_SEMANTIC_FACE:
1972 case TGSI_SEMANTIC_INSTANCEID:
1973 case TGSI_SEMANTIC_PCOORD:
1974 case TGSI_SEMANTIC_PRIMID:
1975 case TGSI_SEMANTIC_PSIZE:
1976 case TGSI_SEMANTIC_VERTEXID:
1977 return TGSI_INTERPOLATE_CONSTANT;
1978 default:
1979 assert(0);
1980 return TGSI_INTERPOLATE_CONSTANT;
1981 }
1982 }
1983
1984 DECL_SPECIAL(DCL)
1985 {
1986 struct ureg_program *ureg = tx->ureg;
1987 boolean is_input;
1988 boolean is_sampler;
1989 struct tgsi_declaration_semantic tgsi;
1990 struct sm1_semantic sem;
1991 sm1_read_semantic(tx, &sem);
1992
1993 is_input = sem.reg.file == D3DSPR_INPUT;
1994 is_sampler =
1995 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1996
1997 DUMP("DCL ");
1998 sm1_dump_dst_param(&sem.reg);
1999 if (is_sampler)
2000 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2001 else
2002 if (tx->version.major >= 3)
2003 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2004 else
2005 if (sem.usage | sem.usage_idx)
2006 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2007 else
2008 DUMP("\n");
2009
2010 if (is_sampler) {
2011 const unsigned m = 1 << sem.reg.idx;
2012 ureg_DECL_sampler(ureg, sem.reg.idx);
2013 tx->info->sampler_mask |= m;
2014 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2015 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2016 d3dstt_to_tgsi_tex(sem.sampler_type);
2017 return D3D_OK;
2018 }
2019
2020 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2021 if (IS_VS) {
2022 if (is_input) {
2023 /* linkage outside of shader with vertex declaration */
2024 ureg_DECL_vs_input(ureg, sem.reg.idx);
2025 assert(sem.reg.idx < Elements(tx->info->input_map));
2026 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2027 tx->info->num_inputs = sem.reg.idx + 1;
2028 /* NOTE: preserving order in case of indirect access */
2029 } else
2030 if (tx->version.major >= 3) {
2031 /* SM2 output semantic determined by file */
2032 assert(sem.reg.mask != 0);
2033 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2034 tx->info->position_t = TRUE;
2035 assert(sem.reg.idx < Elements(tx->regs.o));
2036 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2037 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2038
2039 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
2040 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2041 }
2042 } else {
2043 if (is_input && tx->version.major >= 3) {
2044 /* SM3 only, SM2 input semantic determined by file */
2045 assert(sem.reg.idx < Elements(tx->regs.v));
2046 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2047 ureg, tgsi.Name, tgsi.Index,
2048 nine_tgsi_to_interp_mode(&tgsi),
2049 0, /* cylwrap */
2050 sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
2051 } else
2052 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2053 /* FragColor or FragDepth */
2054 assert(sem.reg.mask != 0);
2055 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2056 0, 1);
2057 }
2058 }
2059 return D3D_OK;
2060 }
2061
2062 DECL_SPECIAL(DEF)
2063 {
2064 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2065 return D3D_OK;
2066 }
2067
2068 DECL_SPECIAL(DEFB)
2069 {
2070 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2071 return D3D_OK;
2072 }
2073
2074 DECL_SPECIAL(DEFI)
2075 {
2076 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2077 return D3D_OK;
2078 }
2079
2080 DECL_SPECIAL(POW)
2081 {
2082 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2083 struct ureg_src src[2] = {
2084 tx_src_param(tx, &tx->insn.src[0]),
2085 tx_src_param(tx, &tx->insn.src[1])
2086 };
2087 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2088 return D3D_OK;
2089 }
2090
2091 DECL_SPECIAL(RSQ)
2092 {
2093 struct ureg_program *ureg = tx->ureg;
2094 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2095 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2096 struct ureg_dst tmp = tx_scratch(tx);
2097 ureg_RSQ(ureg, tmp, ureg_abs(src));
2098 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2099 return D3D_OK;
2100 }
2101
2102 DECL_SPECIAL(LOG)
2103 {
2104 struct ureg_program *ureg = tx->ureg;
2105 struct ureg_dst tmp = tx_scratch_scalar(tx);
2106 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2107 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2108 ureg_LG2(ureg, tmp, ureg_abs(src));
2109 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2110 return D3D_OK;
2111 }
2112
2113 DECL_SPECIAL(LIT)
2114 {
2115 struct ureg_program *ureg = tx->ureg;
2116 struct ureg_dst tmp = tx_scratch(tx);
2117 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2118 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2119 ureg_LIT(ureg, tmp, src);
2120 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2121 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2122 * it 0^0 if src.w=0, which value is driver dependent. */
2123 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2124 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2125 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2126 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2127 return D3D_OK;
2128 }
2129
2130 DECL_SPECIAL(NRM)
2131 {
2132 struct ureg_program *ureg = tx->ureg;
2133 struct ureg_dst tmp = tx_scratch_scalar(tx);
2134 struct ureg_src nrm = tx_src_scalar(tmp);
2135 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2136 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2137 ureg_DP3(ureg, tmp, src, src);
2138 ureg_RSQ(ureg, tmp, nrm);
2139 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2140 ureg_MUL(ureg, dst, src, nrm);
2141 return D3D_OK;
2142 }
2143
2144 DECL_SPECIAL(DP2ADD)
2145 {
2146 struct ureg_dst tmp = tx_scratch_scalar(tx);
2147 struct ureg_src dp2 = tx_src_scalar(tmp);
2148 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2149 struct ureg_src src[3];
2150 int i;
2151 for (i = 0; i < 3; ++i)
2152 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2153 assert_replicate_swizzle(&src[2]);
2154
2155 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2156 ureg_ADD(tx->ureg, dst, src[2], dp2);
2157
2158 return D3D_OK;
2159 }
2160
2161 DECL_SPECIAL(TEXCOORD)
2162 {
2163 struct ureg_program *ureg = tx->ureg;
2164 const unsigned s = tx->insn.dst[0].idx;
2165 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2166
2167 tx_texcoord_alloc(tx, s);
2168 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2169 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2170
2171 return D3D_OK;
2172 }
2173
2174 DECL_SPECIAL(TEXCOORD_ps14)
2175 {
2176 struct ureg_program *ureg = tx->ureg;
2177 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2178 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2179
2180 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2181
2182 ureg_MOV(ureg, dst, src);
2183
2184 return D3D_OK;
2185 }
2186
2187 DECL_SPECIAL(TEXKILL)
2188 {
2189 struct ureg_src reg;
2190
2191 if (tx->version.major > 1 || tx->version.minor > 3) {
2192 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2193 } else {
2194 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2195 reg = tx->regs.vT[tx->insn.dst[0].idx];
2196 }
2197 if (tx->version.major < 2)
2198 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2199 ureg_KILL_IF(tx->ureg, reg);
2200
2201 return D3D_OK;
2202 }
2203
2204 DECL_SPECIAL(TEXBEM)
2205 {
2206 struct ureg_program *ureg = tx->ureg;
2207 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2208 struct ureg_dst tmp, tmp2, texcoord;
2209 struct ureg_src sample, m00, m01, m10, m11;
2210 struct ureg_src bumpenvlscale, bumpenvloffset;
2211 const int m = tx->insn.dst[0].idx;
2212 const int n = tx->insn.src[0].idx;
2213
2214 assert(tx->version.major == 1);
2215
2216 sample = ureg_DECL_sampler(ureg, m);
2217 tx->info->sampler_mask |= 1 << m;
2218
2219 tx_texcoord_alloc(tx, m);
2220
2221 tmp = tx_scratch(tx);
2222 tmp2 = tx_scratch(tx);
2223 texcoord = tx_scratch(tx);
2224 /*
2225 * Bump-env-matrix:
2226 * 00 is X
2227 * 01 is Y
2228 * 10 is Z
2229 * 11 is W
2230 */
2231 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2232 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2233 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2234 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2235 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2236
2237 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2238 if (m % 2 == 0) {
2239 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2240 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2241 } else {
2242 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2243 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2244 }
2245
2246 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2247
2248 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2249 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2250 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2251 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2252 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2253 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2254 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2255
2256 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2257 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2258 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2259 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2260 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2261 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2262 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2263
2264 /* Now the texture coordinates are in tmp.xy */
2265
2266 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2267 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2268 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2269 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2270 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2271 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2272 bumpenvlscale, bumpenvloffset);
2273 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2274 }
2275
2276 tx->info->bumpenvmat_needed = 1;
2277
2278 return D3D_OK;
2279 }
2280
2281 DECL_SPECIAL(TEXREG2AR)
2282 {
2283 struct ureg_program *ureg = tx->ureg;
2284 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2285 struct ureg_src sample;
2286 const int m = tx->insn.dst[0].idx;
2287 const int n = tx->insn.src[0].idx;
2288 assert(m >= 0 && m > n);
2289
2290 sample = ureg_DECL_sampler(ureg, m);
2291 tx->info->sampler_mask |= 1 << m;
2292 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2293
2294 return D3D_OK;
2295 }
2296
2297 DECL_SPECIAL(TEXREG2GB)
2298 {
2299 struct ureg_program *ureg = tx->ureg;
2300 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2301 struct ureg_src sample;
2302 const int m = tx->insn.dst[0].idx;
2303 const int n = tx->insn.src[0].idx;
2304 assert(m >= 0 && m > n);
2305
2306 sample = ureg_DECL_sampler(ureg, m);
2307 tx->info->sampler_mask |= 1 << m;
2308 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2309
2310 return D3D_OK;
2311 }
2312
2313 DECL_SPECIAL(TEXM3x2PAD)
2314 {
2315 return D3D_OK; /* this is just padding */
2316 }
2317
2318 DECL_SPECIAL(TEXM3x2TEX)
2319 {
2320 struct ureg_program *ureg = tx->ureg;
2321 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2322 struct ureg_src sample;
2323 const int m = tx->insn.dst[0].idx - 1;
2324 const int n = tx->insn.src[0].idx;
2325 assert(m >= 0 && m > n);
2326
2327 tx_texcoord_alloc(tx, m);
2328 tx_texcoord_alloc(tx, m+1);
2329
2330 /* performs the matrix multiplication */
2331 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2332 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2333
2334 sample = ureg_DECL_sampler(ureg, m + 1);
2335 tx->info->sampler_mask |= 1 << (m + 1);
2336 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2337
2338 return D3D_OK;
2339 }
2340
2341 DECL_SPECIAL(TEXM3x3PAD)
2342 {
2343 return D3D_OK; /* this is just padding */
2344 }
2345
2346 DECL_SPECIAL(TEXM3x3SPEC)
2347 {
2348 struct ureg_program *ureg = tx->ureg;
2349 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2350 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2351 struct ureg_src sample;
2352 struct ureg_dst tmp;
2353 const int m = tx->insn.dst[0].idx - 2;
2354 const int n = tx->insn.src[0].idx;
2355 assert(m >= 0 && m > n);
2356
2357 tx_texcoord_alloc(tx, m);
2358 tx_texcoord_alloc(tx, m+1);
2359 tx_texcoord_alloc(tx, m+2);
2360
2361 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2362 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2363 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2364
2365 sample = ureg_DECL_sampler(ureg, m + 2);
2366 tx->info->sampler_mask |= 1 << (m + 2);
2367 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2368
2369 /* At this step, dst = N = (u', w', z').
2370 * We want dst to be the texture sampled at (u'', w'', z''), with
2371 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2372 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2373 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2374 /* at this step tmp.x = 1/N.N */
2375 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2376 /* at this step tmp.y = N.E */
2377 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2378 /* at this step tmp.x = N.E/N.N */
2379 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2380 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2381 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2382 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2383 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2384
2385 return D3D_OK;
2386 }
2387
2388 DECL_SPECIAL(TEXREG2RGB)
2389 {
2390 struct ureg_program *ureg = tx->ureg;
2391 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2392 struct ureg_src sample;
2393 const int m = tx->insn.dst[0].idx;
2394 const int n = tx->insn.src[0].idx;
2395 assert(m >= 0 && m > n);
2396
2397 sample = ureg_DECL_sampler(ureg, m);
2398 tx->info->sampler_mask |= 1 << m;
2399 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2400
2401 return D3D_OK;
2402 }
2403
2404 DECL_SPECIAL(TEXDP3TEX)
2405 {
2406 struct ureg_program *ureg = tx->ureg;
2407 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2408 struct ureg_dst tmp;
2409 struct ureg_src sample;
2410 const int m = tx->insn.dst[0].idx;
2411 const int n = tx->insn.src[0].idx;
2412 assert(m >= 0 && m > n);
2413
2414 tx_texcoord_alloc(tx, m);
2415
2416 tmp = tx_scratch(tx);
2417 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2418 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2419
2420 sample = ureg_DECL_sampler(ureg, m);
2421 tx->info->sampler_mask |= 1 << m;
2422 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2423
2424 return D3D_OK;
2425 }
2426
2427 DECL_SPECIAL(TEXM3x2DEPTH)
2428 {
2429 struct ureg_program *ureg = tx->ureg;
2430 struct ureg_dst tmp;
2431 const int m = tx->insn.dst[0].idx - 1;
2432 const int n = tx->insn.src[0].idx;
2433 assert(m >= 0 && m > n);
2434
2435 tx_texcoord_alloc(tx, m);
2436 tx_texcoord_alloc(tx, m+1);
2437
2438 tmp = tx_scratch(tx);
2439
2440 /* performs the matrix multiplication */
2441 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2442 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2443
2444 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2445 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2446 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2447 /* res = 'w' == 0 ? 1.0 : z/w */
2448 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2449 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2450 /* replace the depth for depth testing with the result */
2451 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2452 TGSI_WRITEMASK_Z, 0, 1);
2453 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2454 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2455 return D3D_OK;
2456 }
2457
2458 DECL_SPECIAL(TEXDP3)
2459 {
2460 struct ureg_program *ureg = tx->ureg;
2461 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2462 const int m = tx->insn.dst[0].idx;
2463 const int n = tx->insn.src[0].idx;
2464 assert(m >= 0 && m > n);
2465
2466 tx_texcoord_alloc(tx, m);
2467
2468 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2469
2470 return D3D_OK;
2471 }
2472
2473 DECL_SPECIAL(TEXM3x3)
2474 {
2475 struct ureg_program *ureg = tx->ureg;
2476 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2477 struct ureg_src sample;
2478 struct ureg_dst E, tmp;
2479 const int m = tx->insn.dst[0].idx - 2;
2480 const int n = tx->insn.src[0].idx;
2481 assert(m >= 0 && m > n);
2482
2483 tx_texcoord_alloc(tx, m);
2484 tx_texcoord_alloc(tx, m+1);
2485 tx_texcoord_alloc(tx, m+2);
2486
2487 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2488 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2489 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2490
2491 switch (tx->insn.opcode) {
2492 case D3DSIO_TEXM3x3:
2493 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2494 break;
2495 case D3DSIO_TEXM3x3TEX:
2496 sample = ureg_DECL_sampler(ureg, m + 2);
2497 tx->info->sampler_mask |= 1 << (m + 2);
2498 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2499 break;
2500 case D3DSIO_TEXM3x3VSPEC:
2501 sample = ureg_DECL_sampler(ureg, m + 2);
2502 tx->info->sampler_mask |= 1 << (m + 2);
2503 E = tx_scratch(tx);
2504 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2505 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2506 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2507 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2508 /* At this step, dst = N = (u', w', z').
2509 * We want dst to be the texture sampled at (u'', w'', z''), with
2510 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2511 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2512 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2513 /* at this step tmp.x = 1/N.N */
2514 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2515 /* at this step tmp.y = N.E */
2516 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2517 /* at this step tmp.x = N.E/N.N */
2518 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2519 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2520 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2521 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2522 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2523 break;
2524 default:
2525 return D3DERR_INVALIDCALL;
2526 }
2527 return D3D_OK;
2528 }
2529
2530 DECL_SPECIAL(TEXDEPTH)
2531 {
2532 struct ureg_program *ureg = tx->ureg;
2533 struct ureg_dst r5;
2534 struct ureg_src r5r, r5g;
2535
2536 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2537
2538 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2539 * r5 won't be used afterward, thus we can use r5.ba */
2540 r5 = tx->regs.r[5];
2541 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2542 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2543
2544 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2545 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2546 /* r5.r = r/g */
2547 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2548 r5r, ureg_imm1f(ureg, 1.0f));
2549 /* replace the depth for depth testing with the result */
2550 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2551 TGSI_WRITEMASK_Z, 0, 1);
2552 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2553
2554 return D3D_OK;
2555 }
2556
2557 DECL_SPECIAL(BEM)
2558 {
2559 struct ureg_program *ureg = tx->ureg;
2560 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2561 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2562 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2563 struct ureg_src m00, m01, m10, m11;
2564 const int m = tx->insn.dst[0].idx;
2565 struct ureg_dst tmp;
2566 /*
2567 * Bump-env-matrix:
2568 * 00 is X
2569 * 01 is Y
2570 * 10 is Z
2571 * 11 is W
2572 */
2573 nine_info_mark_const_f_used(tx->info, 8 + m);
2574 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2575 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2576 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2577 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2578 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2579 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2580 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2581 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2582 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2583 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2584
2585 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2586 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2587 NINE_APPLY_SWIZZLE(src1, X), src0);
2588 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2589 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2590 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2591 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2592
2593 tx->info->bumpenvmat_needed = 1;
2594
2595 return D3D_OK;
2596 }
2597
2598 DECL_SPECIAL(TEXLD)
2599 {
2600 struct ureg_program *ureg = tx->ureg;
2601 unsigned target;
2602 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2603 struct ureg_src src[2] = {
2604 tx_src_param(tx, &tx->insn.src[0]),
2605 tx_src_param(tx, &tx->insn.src[1])
2606 };
2607 assert(tx->insn.src[1].idx >= 0 &&
2608 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2609 target = tx->sampler_targets[tx->insn.src[1].idx];
2610
2611 switch (tx->insn.flags) {
2612 case 0:
2613 ureg_TEX(ureg, dst, target, src[0], src[1]);
2614 break;
2615 case NINED3DSI_TEXLD_PROJECT:
2616 ureg_TXP(ureg, dst, target, src[0], src[1]);
2617 break;
2618 case NINED3DSI_TEXLD_BIAS:
2619 ureg_TXB(ureg, dst, target, src[0], src[1]);
2620 break;
2621 default:
2622 assert(0);
2623 return D3DERR_INVALIDCALL;
2624 }
2625 return D3D_OK;
2626 }
2627
2628 DECL_SPECIAL(TEXLD_14)
2629 {
2630 struct ureg_program *ureg = tx->ureg;
2631 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2632 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2633 const unsigned s = tx->insn.dst[0].idx;
2634 const unsigned t = ps1x_sampler_type(tx->info, s);
2635
2636 tx->info->sampler_mask |= 1 << s;
2637 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2638
2639 return D3D_OK;
2640 }
2641
2642 DECL_SPECIAL(TEX)
2643 {
2644 struct ureg_program *ureg = tx->ureg;
2645 const unsigned s = tx->insn.dst[0].idx;
2646 const unsigned t = ps1x_sampler_type(tx->info, s);
2647 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2648 struct ureg_src src[2];
2649
2650 tx_texcoord_alloc(tx, s);
2651
2652 src[0] = tx->regs.vT[s];
2653 src[1] = ureg_DECL_sampler(ureg, s);
2654 tx->info->sampler_mask |= 1 << s;
2655
2656 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2657
2658 return D3D_OK;
2659 }
2660
2661 DECL_SPECIAL(TEXLDD)
2662 {
2663 unsigned target;
2664 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2665 struct ureg_src src[4] = {
2666 tx_src_param(tx, &tx->insn.src[0]),
2667 tx_src_param(tx, &tx->insn.src[1]),
2668 tx_src_param(tx, &tx->insn.src[2]),
2669 tx_src_param(tx, &tx->insn.src[3])
2670 };
2671 assert(tx->insn.src[1].idx >= 0 &&
2672 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2673 target = tx->sampler_targets[tx->insn.src[1].idx];
2674
2675 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2676 return D3D_OK;
2677 }
2678
2679 DECL_SPECIAL(TEXLDL)
2680 {
2681 unsigned target;
2682 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2683 struct ureg_src src[2] = {
2684 tx_src_param(tx, &tx->insn.src[0]),
2685 tx_src_param(tx, &tx->insn.src[1])
2686 };
2687 assert(tx->insn.src[1].idx >= 0 &&
2688 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2689 target = tx->sampler_targets[tx->insn.src[1].idx];
2690
2691 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2692 return D3D_OK;
2693 }
2694
2695 DECL_SPECIAL(SETP)
2696 {
2697 STUB(D3DERR_INVALIDCALL);
2698 }
2699
2700 DECL_SPECIAL(BREAKP)
2701 {
2702 STUB(D3DERR_INVALIDCALL);
2703 }
2704
2705 DECL_SPECIAL(PHASE)
2706 {
2707 return D3D_OK; /* we don't care about phase */
2708 }
2709
2710 DECL_SPECIAL(COMMENT)
2711 {
2712 return D3D_OK; /* nothing to do */
2713 }
2714
2715
2716 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2717 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2718
2719 struct sm1_op_info inst_table[] =
2720 {
2721 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2722 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2723 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2724 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2725 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2726 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2727 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2728 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2729 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2730 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2731 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2732 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2733 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2734 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2735 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2736 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2737 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2738 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2739 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2740 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2741
2742 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2743 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2744 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2745 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2746 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2747
2748 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2749 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2750 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2751 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2752 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2753 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2754
2755 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2756
2757 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2758 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2759 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2760 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2761 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2762
2763 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2764 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2765
2766 /* More flow control */
2767 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2768 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2769 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2770 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2771 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2772 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2773 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2774 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2775 /* we don't write to the address register, but a normal register (copied
2776 * when needed to the address register), thus we don't use ARR */
2777 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2778
2779 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2780 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2781
2782 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2783 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2784 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2785 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2786 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2787 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2788 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2789 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2790 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2791 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2792 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2793 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2794 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2795 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2796 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2797 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2798
2799 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2800 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2801 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2802 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2803
2804 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2805
2806 /* More tex stuff */
2807 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2808 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2809 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2810 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2811 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2812 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2813
2814 /* Misc */
2815 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2816 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2817 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2818 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2819 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2820 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2821 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2822 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2823 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2824 };
2825
2826 struct sm1_op_info inst_phase =
2827 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2828
2829 struct sm1_op_info inst_comment =
2830 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2831
2832 static void
2833 create_op_info_map(struct shader_translator *tx)
2834 {
2835 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2836 unsigned i;
2837
2838 for (i = 0; i < Elements(tx->op_info_map); ++i)
2839 tx->op_info_map[i] = -1;
2840
2841 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2842 for (i = 0; i < Elements(inst_table); ++i) {
2843 assert(inst_table[i].sio < Elements(tx->op_info_map));
2844 if (inst_table[i].vert_version.min <= version &&
2845 inst_table[i].vert_version.max >= version)
2846 tx->op_info_map[inst_table[i].sio] = i;
2847 }
2848 } else {
2849 for (i = 0; i < Elements(inst_table); ++i) {
2850 assert(inst_table[i].sio < Elements(tx->op_info_map));
2851 if (inst_table[i].frag_version.min <= version &&
2852 inst_table[i].frag_version.max >= version)
2853 tx->op_info_map[inst_table[i].sio] = i;
2854 }
2855 }
2856 }
2857
2858 static inline HRESULT
2859 NineTranslateInstruction_Generic(struct shader_translator *tx)
2860 {
2861 struct ureg_dst dst[1];
2862 struct ureg_src src[4];
2863 unsigned i;
2864
2865 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2866 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2867 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2868 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2869
2870 ureg_insn(tx->ureg, tx->insn.info->opcode,
2871 dst, tx->insn.ndst,
2872 src, tx->insn.nsrc);
2873 return D3D_OK;
2874 }
2875
2876 static inline DWORD
2877 TOKEN_PEEK(struct shader_translator *tx)
2878 {
2879 return *(tx->parse);
2880 }
2881
2882 static inline DWORD
2883 TOKEN_NEXT(struct shader_translator *tx)
2884 {
2885 return *(tx->parse)++;
2886 }
2887
2888 static inline void
2889 TOKEN_JUMP(struct shader_translator *tx)
2890 {
2891 if (tx->parse_next && tx->parse != tx->parse_next) {
2892 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2893 tx->parse = tx->parse_next;
2894 }
2895 }
2896
2897 static inline boolean
2898 sm1_parse_eof(struct shader_translator *tx)
2899 {
2900 return TOKEN_PEEK(tx) == NINED3DSP_END;
2901 }
2902
2903 static void
2904 sm1_read_version(struct shader_translator *tx)
2905 {
2906 const DWORD tok = TOKEN_NEXT(tx);
2907
2908 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2909 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2910
2911 switch (tok >> 16) {
2912 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2913 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2914 default:
2915 DBG("Invalid shader type: %x\n", tok);
2916 tx->processor = ~0;
2917 break;
2918 }
2919 }
2920
2921 /* This is just to check if we parsed the instruction properly. */
2922 static void
2923 sm1_parse_get_skip(struct shader_translator *tx)
2924 {
2925 const DWORD tok = TOKEN_PEEK(tx);
2926
2927 if (tx->version.major >= 2) {
2928 tx->parse_next = tx->parse + 1 /* this */ +
2929 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2930 } else {
2931 tx->parse_next = NULL; /* TODO: determine from param count */
2932 }
2933 }
2934
2935 static void
2936 sm1_print_comment(const char *comment, UINT size)
2937 {
2938 if (!size)
2939 return;
2940 /* TODO */
2941 }
2942
2943 static void
2944 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2945 {
2946 DWORD tok = TOKEN_PEEK(tx);
2947
2948 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2949 {
2950 const char *comment = "";
2951 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2952 tx->parse += size + 1;
2953
2954 if (print)
2955 sm1_print_comment(comment, size);
2956
2957 tok = TOKEN_PEEK(tx);
2958 }
2959 }
2960
2961 static void
2962 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2963 {
2964 *reg = TOKEN_NEXT(tx);
2965
2966 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2967 {
2968 if (tx->version.major < 2)
2969 *rel = (1 << 31) |
2970 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2971 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2972 D3DSP_NOSWIZZLE;
2973 else
2974 *rel = TOKEN_NEXT(tx);
2975 }
2976 }
2977
2978 static void
2979 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2980 {
2981 uint8_t shift;
2982 dst->file =
2983 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2984 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2985 dst->type = TGSI_RETURN_TYPE_FLOAT;
2986 dst->idx = tok & D3DSP_REGNUM_MASK;
2987 dst->rel = NULL;
2988 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2989 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2990 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2991 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2992 }
2993
2994 static void
2995 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2996 {
2997 src->file =
2998 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2999 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3000 src->type = TGSI_RETURN_TYPE_FLOAT;
3001 src->idx = tok & D3DSP_REGNUM_MASK;
3002 src->rel = NULL;
3003 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3004 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3005
3006 switch (src->file) {
3007 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3008 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3009 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3010 default:
3011 break;
3012 }
3013 }
3014
3015 static void
3016 sm1_parse_immediate(struct shader_translator *tx,
3017 struct sm1_src_param *imm)
3018 {
3019 imm->file = NINED3DSPR_IMMEDIATE;
3020 imm->idx = INT_MIN;
3021 imm->rel = NULL;
3022 imm->swizzle = NINED3DSP_NOSWIZZLE;
3023 imm->mod = 0;
3024 switch (tx->insn.opcode) {
3025 case D3DSIO_DEF:
3026 imm->type = NINED3DSPTYPE_FLOAT4;
3027 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3028 tx->parse += 4;
3029 break;
3030 case D3DSIO_DEFI:
3031 imm->type = NINED3DSPTYPE_INT4;
3032 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3033 tx->parse += 4;
3034 break;
3035 case D3DSIO_DEFB:
3036 imm->type = NINED3DSPTYPE_BOOL;
3037 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3038 tx->parse += 1;
3039 break;
3040 default:
3041 assert(0);
3042 break;
3043 }
3044 }
3045
3046 static void
3047 sm1_read_dst_param(struct shader_translator *tx,
3048 struct sm1_dst_param *dst,
3049 struct sm1_src_param *rel)
3050 {
3051 DWORD tok_dst, tok_rel = 0;
3052
3053 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3054 sm1_parse_dst_param(dst, tok_dst);
3055 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3056 sm1_parse_src_param(rel, tok_rel);
3057 dst->rel = rel;
3058 }
3059 }
3060
3061 static void
3062 sm1_read_src_param(struct shader_translator *tx,
3063 struct sm1_src_param *src,
3064 struct sm1_src_param *rel)
3065 {
3066 DWORD tok_src, tok_rel = 0;
3067
3068 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3069 sm1_parse_src_param(src, tok_src);
3070 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3071 assert(rel);
3072 sm1_parse_src_param(rel, tok_rel);
3073 src->rel = rel;
3074 }
3075 }
3076
3077 static void
3078 sm1_read_semantic(struct shader_translator *tx,
3079 struct sm1_semantic *sem)
3080 {
3081 const DWORD tok_usg = TOKEN_NEXT(tx);
3082 const DWORD tok_dst = TOKEN_NEXT(tx);
3083
3084 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3085 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3086 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3087
3088 sm1_parse_dst_param(&sem->reg, tok_dst);
3089 }
3090
3091 static void
3092 sm1_parse_instruction(struct shader_translator *tx)
3093 {
3094 struct sm1_instruction *insn = &tx->insn;
3095 DWORD tok;
3096 struct sm1_op_info *info = NULL;
3097 unsigned i;
3098
3099 sm1_parse_comments(tx, TRUE);
3100 sm1_parse_get_skip(tx);
3101
3102 tok = TOKEN_NEXT(tx);
3103
3104 insn->opcode = tok & D3DSI_OPCODE_MASK;
3105 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3106 insn->coissue = !!(tok & D3DSI_COISSUE);
3107 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3108
3109 if (insn->opcode < Elements(tx->op_info_map)) {
3110 int k = tx->op_info_map[insn->opcode];
3111 if (k >= 0) {
3112 assert(k < Elements(inst_table));
3113 info = &inst_table[k];
3114 }
3115 } else {
3116 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3117 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3118 }
3119 if (!info) {
3120 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3121 TOKEN_JUMP(tx);
3122 return;
3123 }
3124 insn->info = info;
3125 insn->ndst = info->ndst;
3126 insn->nsrc = info->nsrc;
3127
3128 assert(!insn->predicated && "TODO: predicated instructions");
3129
3130 /* check version */
3131 {
3132 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3133 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3134 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3135 if (ver < min || ver > max) {
3136 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3137 min, ver, max);
3138 return;
3139 }
3140 }
3141
3142 for (i = 0; i < insn->ndst; ++i)
3143 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3144 if (insn->predicated)
3145 sm1_read_src_param(tx, &insn->pred, NULL);
3146 for (i = 0; i < insn->nsrc; ++i)
3147 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3148
3149 /* parse here so we can dump them before processing */
3150 if (insn->opcode == D3DSIO_DEF ||
3151 insn->opcode == D3DSIO_DEFI ||
3152 insn->opcode == D3DSIO_DEFB)
3153 sm1_parse_immediate(tx, &tx->insn.src[0]);
3154
3155 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3156 sm1_instruction_check(insn);
3157
3158 if (info->handler)
3159 info->handler(tx);
3160 else
3161 NineTranslateInstruction_Generic(tx);
3162 tx_apply_dst0_modifiers(tx);
3163
3164 tx->num_scratch = 0; /* reset */
3165
3166 TOKEN_JUMP(tx);
3167 }
3168
3169 static void
3170 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3171 {
3172 unsigned i;
3173
3174 tx->info = info;
3175
3176 tx->byte_code = info->byte_code;
3177 tx->parse = info->byte_code;
3178
3179 for (i = 0; i < Elements(info->input_map); ++i)
3180 info->input_map[i] = NINE_DECLUSAGE_NONE;
3181 info->num_inputs = 0;
3182
3183 info->position_t = FALSE;
3184 info->point_size = FALSE;
3185
3186 tx->info->const_float_slots = 0;
3187 tx->info->const_int_slots = 0;
3188 tx->info->const_bool_slots = 0;
3189
3190 info->sampler_mask = 0x0;
3191 info->rt_mask = 0x0;
3192
3193 info->lconstf.data = NULL;
3194 info->lconstf.ranges = NULL;
3195
3196 info->bumpenvmat_needed = 0;
3197
3198 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3199 tx->regs.rL[i] = ureg_dst_undef();
3200 }
3201 tx->regs.address = ureg_dst_undef();
3202 tx->regs.a0 = ureg_dst_undef();
3203 tx->regs.p = ureg_dst_undef();
3204 tx->regs.oDepth = ureg_dst_undef();
3205 tx->regs.vPos = ureg_src_undef();
3206 tx->regs.vFace = ureg_src_undef();
3207 for (i = 0; i < Elements(tx->regs.o); ++i)
3208 tx->regs.o[i] = ureg_dst_undef();
3209 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3210 tx->regs.oCol[i] = ureg_dst_undef();
3211 for (i = 0; i < Elements(tx->regs.vC); ++i)
3212 tx->regs.vC[i] = ureg_src_undef();
3213 for (i = 0; i < Elements(tx->regs.vT); ++i)
3214 tx->regs.vT[i] = ureg_src_undef();
3215
3216 for (i = 0; i < Elements(tx->lconsti); ++i)
3217 tx->lconsti[i].idx = -1;
3218 for (i = 0; i < Elements(tx->lconstb); ++i)
3219 tx->lconstb[i].idx = -1;
3220
3221 sm1_read_version(tx);
3222
3223 info->version = (tx->version.major << 4) | tx->version.minor;
3224
3225 create_op_info_map(tx);
3226 }
3227
3228 static void
3229 tx_dtor(struct shader_translator *tx)
3230 {
3231 if (tx->num_inst_labels)
3232 FREE(tx->inst_labels);
3233 FREE(tx->lconstf);
3234 FREE(tx->regs.r);
3235 FREE(tx);
3236 }
3237
3238 static inline unsigned
3239 tgsi_processor_from_type(unsigned shader_type)
3240 {
3241 switch (shader_type) {
3242 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3243 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3244 default:
3245 return ~0;
3246 }
3247 }
3248
3249 static void
3250 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3251 {
3252 struct ureg_program *ureg = tx->ureg;
3253 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3254 struct ureg_src fog_end, fog_coeff, fog_density;
3255 struct ureg_src fog_vs, depth, fog_color;
3256 struct ureg_dst fog_factor;
3257
3258 if (!tx->info->fog_enable) {
3259 ureg_MOV(ureg, oCol0, src_col);
3260 return;
3261 }
3262
3263 if (tx->info->fog_mode != D3DFOG_NONE) {
3264 if (tx->wpos_is_sysval) {
3265 depth = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
3266 } else {
3267 depth = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
3268 TGSI_INTERPOLATE_LINEAR);
3269 }
3270 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3271 }
3272
3273 nine_info_mark_const_f_used(tx->info, 33);
3274 fog_color = NINE_CONSTANT_SRC(32);
3275 fog_factor = tx_scratch_scalar(tx);
3276
3277 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3278 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3279 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3280 ureg_SUB(ureg, fog_factor, fog_end, depth);
3281 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3282 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3283 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3284 ureg_MUL(ureg, fog_factor, depth, fog_density);
3285 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3286 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3287 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3288 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3289 ureg_MUL(ureg, fog_factor, depth, fog_density);
3290 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3291 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3292 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3293 } else {
3294 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3295 TGSI_INTERPOLATE_PERSPECTIVE),
3296 TGSI_SWIZZLE_X);
3297 ureg_MOV(ureg, fog_factor, fog_vs);
3298 }
3299
3300 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3301 tx_src_scalar(fog_factor), src_col, fog_color);
3302 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3303 }
3304
3305 #define GET_CAP(n) device->screen->get_param( \
3306 device->screen, PIPE_CAP_##n)
3307 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3308 device->screen, info->type, PIPE_SHADER_CAP_##n)
3309
3310 HRESULT
3311 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3312 {
3313 struct shader_translator *tx;
3314 HRESULT hr = D3D_OK;
3315 const unsigned processor = tgsi_processor_from_type(info->type);
3316 unsigned s, slot_max;
3317 unsigned max_const_f;
3318
3319 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3320
3321 tx = CALLOC_STRUCT(shader_translator);
3322 if (!tx)
3323 return E_OUTOFMEMORY;
3324 tx_ctor(tx, info);
3325
3326 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3327 hr = D3DERR_INVALIDCALL;
3328 DBG("Unsupported shader version: %u.%u !\n",
3329 tx->version.major, tx->version.minor);
3330 goto out;
3331 }
3332 if (tx->processor != processor) {
3333 hr = D3DERR_INVALIDCALL;
3334 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3335 goto out;
3336 }
3337 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3338 tx->version.major, tx->version.minor);
3339
3340 tx->ureg = ureg_create(processor);
3341 if (!tx->ureg) {
3342 hr = E_OUTOFMEMORY;
3343 goto out;
3344 }
3345
3346 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3347 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3348 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3349 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3350 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3351 tx->texcoord_sn = tx->want_texcoord ?
3352 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3353 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3354 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3355
3356 if (IS_VS) {
3357 tx->num_constf_allowed = NINE_MAX_CONST_F;
3358 } else if (tx->version.major < 2) {/* IS_PS v1 */
3359 tx->num_constf_allowed = 8;
3360 } else if (tx->version.major == 2) {/* IS_PS v2 */
3361 tx->num_constf_allowed = 32;
3362 } else {/* IS_PS v3 */
3363 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3364 }
3365
3366 if (tx->version.major < 2) {
3367 tx->num_consti_allowed = 0;
3368 tx->num_constb_allowed = 0;
3369 } else {
3370 tx->num_consti_allowed = NINE_MAX_CONST_I;
3371 tx->num_constb_allowed = NINE_MAX_CONST_B;
3372 }
3373
3374 /* VS must always write position. Declare it here to make it the 1st output.
3375 * (Some drivers like nv50 are buggy and rely on that.)
3376 */
3377 if (IS_VS) {
3378 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3379 } else {
3380 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3381 if (!tx->shift_wpos)
3382 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3383 }
3384
3385 while (!sm1_parse_eof(tx) && !tx->failure)
3386 sm1_parse_instruction(tx);
3387 tx->parse++; /* for byte_size */
3388
3389 if (tx->failure) {
3390 ERR("Encountered buggy shader\n");
3391 ureg_destroy(tx->ureg);
3392 hr = D3DERR_INVALIDCALL;
3393 goto out;
3394 }
3395
3396 if (IS_PS && tx->version.major < 3) {
3397 if (tx->version.major < 2) {
3398 assert(tx->num_temp); /* there must be color output */
3399 info->rt_mask |= 0x1;
3400 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3401 } else {
3402 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3403 }
3404 }
3405
3406 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3407 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3408 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3409 }
3410
3411 if (info->position_t)
3412 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3413
3414 ureg_END(tx->ureg);
3415
3416 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3417 info->point_size = TRUE;
3418
3419 /* record local constants */
3420 if (tx->num_lconstf && tx->indirect_const_access) {
3421 struct nine_range *ranges;
3422 float *data;
3423 int *indices;
3424 unsigned i, k, n;
3425
3426 hr = E_OUTOFMEMORY;
3427
3428 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3429 if (!data)
3430 goto out;
3431 info->lconstf.data = data;
3432
3433 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3434 if (!indices)
3435 goto out;
3436
3437 /* lazy sort, num_lconstf should be small */
3438 for (n = 0; n < tx->num_lconstf; ++n) {
3439 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3440 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3441 k = i;
3442 }
3443 indices[n] = tx->lconstf[k].idx;
3444 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3445 tx->lconstf[k].idx = INT_MAX;
3446 }
3447
3448 /* count ranges */
3449 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3450 if (indices[i] != indices[i - 1] + 1)
3451 ++n;
3452 ranges = MALLOC(n * sizeof(ranges[0]));
3453 if (!ranges) {
3454 FREE(indices);
3455 goto out;
3456 }
3457 info->lconstf.ranges = ranges;
3458
3459 k = 0;
3460 ranges[k].bgn = indices[0];
3461 for (i = 1; i < tx->num_lconstf; ++i) {
3462 if (indices[i] != indices[i - 1] + 1) {
3463 ranges[k].next = &ranges[k + 1];
3464 ranges[k].end = indices[i - 1] + 1;
3465 ++k;
3466 ranges[k].bgn = indices[i];
3467 }
3468 }
3469 ranges[k].end = indices[i - 1] + 1;
3470 ranges[k].next = NULL;
3471 assert(n == (k + 1));
3472
3473 FREE(indices);
3474 hr = D3D_OK;
3475 }
3476
3477 /* r500 */
3478 if (info->const_float_slots > device->max_vs_const_f &&
3479 (info->const_int_slots || info->const_bool_slots))
3480 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3481
3482
3483 if (tx->indirect_const_access) /* vs only */
3484 info->const_float_slots = device->max_vs_const_f;
3485
3486 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3487 slot_max = info->const_bool_slots > 0 ?
3488 max_const_f + NINE_MAX_CONST_I
3489 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3490 info->const_int_slots > 0 ?
3491 max_const_f + info->const_int_slots :
3492 info->const_float_slots;
3493
3494 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3495
3496 for (s = 0; s < slot_max; s++)
3497 ureg_DECL_constant(tx->ureg, s);
3498
3499 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3500 unsigned count;
3501 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3502 tgsi_dump(toks, 0);
3503 ureg_free_tokens(toks);
3504 }
3505
3506 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3507 if (!info->cso) {
3508 hr = D3DERR_DRIVERINTERNALERROR;
3509 FREE(info->lconstf.data);
3510 FREE(info->lconstf.ranges);
3511 goto out;
3512 }
3513
3514 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3515 out:
3516 tx_dtor(tx);
3517 return hr;
3518 }