st/nine: fix unreachable() typo
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
100
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
104
105 /*
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
112 * X2 = PS 1.4 (2x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
119 */
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
134
135 static const char *sm1_mod_str[] =
136 {
137 [NINED3DSPSM_NONE] = "",
138 [NINED3DSPSM_NEG] = "-",
139 [NINED3DSPSM_BIAS] = "bias",
140 [NINED3DSPSM_BIASNEG] = "biasneg",
141 [NINED3DSPSM_SIGN] = "sign",
142 [NINED3DSPSM_SIGNNEG] = "signneg",
143 [NINED3DSPSM_COMP] = "comp",
144 [NINED3DSPSM_X2] = "x2",
145 [NINED3DSPSM_X2NEG] = "x2neg",
146 [NINED3DSPSM_DZ] = "dz",
147 [NINED3DSPSM_DW] = "dw",
148 [NINED3DSPSM_ABS] = "abs",
149 [NINED3DSPSM_ABSNEG] = "-abs",
150 [NINED3DSPSM_NOT] = "not"
151 };
152
153 static void
154 sm1_dump_writemask(BYTE mask)
155 {
156 if (mask & 1) DUMP("x"); else DUMP("_");
157 if (mask & 2) DUMP("y"); else DUMP("_");
158 if (mask & 4) DUMP("z"); else DUMP("_");
159 if (mask & 8) DUMP("w"); else DUMP("_");
160 }
161
162 static void
163 sm1_dump_swizzle(BYTE s)
164 {
165 char c[4] = { 'x', 'y', 'z', 'w' };
166 DUMP("%c%c%c%c",
167 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
168 }
169
170 static const char sm1_file_char[] =
171 {
172 [D3DSPR_TEMP] = 'r',
173 [D3DSPR_INPUT] = 'v',
174 [D3DSPR_CONST] = 'c',
175 [D3DSPR_ADDR] = 'A',
176 [D3DSPR_RASTOUT] = 'R',
177 [D3DSPR_ATTROUT] = 'D',
178 [D3DSPR_OUTPUT] = 'o',
179 [D3DSPR_CONSTINT] = 'I',
180 [D3DSPR_COLOROUT] = 'C',
181 [D3DSPR_DEPTHOUT] = 'D',
182 [D3DSPR_SAMPLER] = 's',
183 [D3DSPR_CONST2] = 'c',
184 [D3DSPR_CONST3] = 'c',
185 [D3DSPR_CONST4] = 'c',
186 [D3DSPR_CONSTBOOL] = 'B',
187 [D3DSPR_LOOP] = 'L',
188 [D3DSPR_TEMPFLOAT16] = 'h',
189 [D3DSPR_MISCTYPE] = 'M',
190 [D3DSPR_LABEL] = 'X',
191 [D3DSPR_PREDICATE] = 'p'
192 };
193
194 static void
195 sm1_dump_reg(BYTE file, INT index)
196 {
197 switch (file) {
198 case D3DSPR_LOOP:
199 DUMP("aL");
200 break;
201 case D3DSPR_COLOROUT:
202 DUMP("oC%i", index);
203 break;
204 case D3DSPR_DEPTHOUT:
205 DUMP("oDepth");
206 break;
207 case D3DSPR_RASTOUT:
208 DUMP("oRast%i", index);
209 break;
210 case D3DSPR_CONSTINT:
211 DUMP("iconst[%i]", index);
212 break;
213 case D3DSPR_CONSTBOOL:
214 DUMP("bconst[%i]", index);
215 break;
216 default:
217 DUMP("%c%i", sm1_file_char[file], index);
218 break;
219 }
220 }
221
222 struct sm1_src_param
223 {
224 INT idx;
225 struct sm1_src_param *rel;
226 BYTE file;
227 BYTE swizzle;
228 BYTE mod;
229 BYTE type;
230 union {
231 DWORD d[4];
232 float f[4];
233 int i[4];
234 BOOL b;
235 } imm;
236 };
237 static void
238 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
240 struct sm1_dst_param
241 {
242 INT idx;
243 struct sm1_src_param *rel;
244 BYTE file;
245 BYTE mask;
246 BYTE mod;
247 int8_t shift; /* sint4 */
248 BYTE type;
249 };
250
251 static inline void
252 assert_replicate_swizzle(const struct ureg_src *reg)
253 {
254 assert(reg->SwizzleY == reg->SwizzleX &&
255 reg->SwizzleZ == reg->SwizzleX &&
256 reg->SwizzleW == reg->SwizzleX);
257 }
258
259 static void
260 sm1_dump_immediate(const struct sm1_src_param *param)
261 {
262 switch (param->type) {
263 case NINED3DSPTYPE_FLOAT4:
264 DUMP("{ %f %f %f %f }",
265 param->imm.f[0], param->imm.f[1],
266 param->imm.f[2], param->imm.f[3]);
267 break;
268 case NINED3DSPTYPE_INT4:
269 DUMP("{ %i %i %i %i }",
270 param->imm.i[0], param->imm.i[1],
271 param->imm.i[2], param->imm.i[3]);
272 break;
273 case NINED3DSPTYPE_BOOL:
274 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
275 break;
276 default:
277 assert(0);
278 break;
279 }
280 }
281
282 static void
283 sm1_dump_src_param(const struct sm1_src_param *param)
284 {
285 if (param->file == NINED3DSPR_IMMEDIATE) {
286 assert(!param->mod &&
287 !param->rel &&
288 param->swizzle == NINED3DSP_NOSWIZZLE);
289 sm1_dump_immediate(param);
290 return;
291 }
292
293 if (param->mod)
294 DUMP("%s(", sm1_mod_str[param->mod]);
295 if (param->rel) {
296 DUMP("%c[", sm1_file_char[param->file]);
297 sm1_dump_src_param(param->rel);
298 DUMP("+%i]", param->idx);
299 } else {
300 sm1_dump_reg(param->file, param->idx);
301 }
302 if (param->mod)
303 DUMP(")");
304 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
305 DUMP(".");
306 sm1_dump_swizzle(param->swizzle);
307 }
308 }
309
310 static void
311 sm1_dump_dst_param(const struct sm1_dst_param *param)
312 {
313 if (param->mod & NINED3DSPDM_SATURATE)
314 DUMP("sat ");
315 if (param->mod & NINED3DSPDM_PARTIALP)
316 DUMP("pp ");
317 if (param->mod & NINED3DSPDM_CENTROID)
318 DUMP("centroid ");
319 if (param->shift < 0)
320 DUMP("/%u ", 1 << -param->shift);
321 if (param->shift > 0)
322 DUMP("*%u ", 1 << param->shift);
323
324 if (param->rel) {
325 DUMP("%c[", sm1_file_char[param->file]);
326 sm1_dump_src_param(param->rel);
327 DUMP("+%i]", param->idx);
328 } else {
329 sm1_dump_reg(param->file, param->idx);
330 }
331 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
332 DUMP(".");
333 sm1_dump_writemask(param->mask);
334 }
335 }
336
337 struct sm1_semantic
338 {
339 struct sm1_dst_param reg;
340 BYTE sampler_type;
341 D3DDECLUSAGE usage;
342 BYTE usage_idx;
343 };
344
345 struct sm1_op_info
346 {
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
349 unsigned sio;
350 unsigned opcode; /* TGSI_OPCODE_x */
351
352 /* versions are still set even handler is set */
353 struct {
354 unsigned min;
355 unsigned max;
356 } vert_version, frag_version;
357
358 /* number of regs parsed outside of special handler */
359 unsigned ndst;
360 unsigned nsrc;
361
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler;
364 };
365
366 struct sm1_instruction
367 {
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
369 BYTE flags;
370 BOOL coissue;
371 BOOL predicated;
372 BYTE ndst;
373 BYTE nsrc;
374 struct sm1_src_param src[4];
375 struct sm1_src_param src_rel[4];
376 struct sm1_src_param pred;
377 struct sm1_src_param dst_rel[1];
378 struct sm1_dst_param dst[1];
379
380 struct sm1_op_info *info;
381 };
382
383 static void
384 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
385 {
386 unsigned i;
387
388 /* no info stored for these: */
389 if (insn->opcode == D3DSIO_DCL)
390 return;
391 for (i = 0; i < indent; ++i)
392 DUMP(" ");
393
394 if (insn->predicated) {
395 DUMP("@");
396 sm1_dump_src_param(&insn->pred);
397 DUMP(" ");
398 }
399 DUMP("%s", d3dsio_to_string(insn->opcode));
400 if (insn->flags) {
401 switch (insn->opcode) {
402 case D3DSIO_TEX:
403 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
404 break;
405 default:
406 DUMP("_%x", insn->flags);
407 break;
408 }
409 }
410 if (insn->coissue)
411 DUMP("_co");
412 DUMP(" ");
413
414 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
415 sm1_dump_dst_param(&insn->dst[i]);
416 DUMP(" ");
417 }
418
419 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
420 sm1_dump_src_param(&insn->src[i]);
421 DUMP(" ");
422 }
423 if (insn->opcode == D3DSIO_DEF ||
424 insn->opcode == D3DSIO_DEFI ||
425 insn->opcode == D3DSIO_DEFB)
426 sm1_dump_immediate(&insn->src[0]);
427
428 DUMP("\n");
429 }
430
431 struct sm1_local_const
432 {
433 INT idx;
434 struct ureg_src reg;
435 union {
436 boolean b;
437 float f[4];
438 int32_t i[4];
439 } imm;
440 };
441
442 struct shader_translator
443 {
444 const DWORD *byte_code;
445 const DWORD *parse;
446 const DWORD *parse_next;
447
448 struct ureg_program *ureg;
449
450 /* shader version */
451 struct {
452 BYTE major;
453 BYTE minor;
454 } version;
455 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
456 unsigned num_constf_allowed;
457 unsigned num_consti_allowed;
458 unsigned num_constb_allowed;
459
460 boolean native_integers;
461 boolean inline_subroutines;
462 boolean lower_preds;
463 boolean want_texcoord;
464 boolean shift_wpos;
465 boolean wpos_is_sysval;
466 boolean face_is_sysval_integer;
467 unsigned texcoord_sn;
468
469 struct sm1_instruction insn; /* current instruction */
470
471 struct {
472 struct ureg_dst *r;
473 struct ureg_dst oPos;
474 struct ureg_dst oFog;
475 struct ureg_dst oPts;
476 struct ureg_dst oCol[4];
477 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
478 struct ureg_dst oDepth;
479 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
480 struct ureg_src vPos;
481 struct ureg_src vFace;
482 struct ureg_src s;
483 struct ureg_dst p;
484 struct ureg_dst address;
485 struct ureg_dst a0;
486 struct ureg_dst tS[8]; /* texture stage registers */
487 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
488 struct ureg_dst t[5]; /* scratch TEMPs */
489 struct ureg_src vC[2]; /* PS color in */
490 struct ureg_src vT[8]; /* PS texcoord in */
491 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
492 } regs;
493 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
494 unsigned num_scratch;
495 unsigned loop_depth;
496 unsigned loop_depth_max;
497 unsigned cond_depth;
498 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
499 unsigned cond_labels[NINE_MAX_COND_DEPTH];
500 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
501
502 unsigned *inst_labels; /* LABEL op */
503 unsigned num_inst_labels;
504
505 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
506
507 struct sm1_local_const *lconstf;
508 unsigned num_lconstf;
509 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
510 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
511
512 boolean indirect_const_access;
513 boolean failure;
514
515 struct nine_shader_info *info;
516
517 int16_t op_info_map[D3DSIO_BREAKP + 1];
518 };
519
520 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
521 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
522
523 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
524
525 static void
526 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
527
528 static void
529 sm1_instruction_check(const struct sm1_instruction *insn)
530 {
531 if (insn->opcode == D3DSIO_CRS)
532 {
533 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
534 {
535 DBG("CRS.mask.w\n");
536 }
537 }
538 }
539
540 static boolean
541 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
542 {
543 INT i;
544 if (index < 0 || index >= tx->num_constf_allowed) {
545 tx->failure = TRUE;
546 return FALSE;
547 }
548 for (i = 0; i < tx->num_lconstf; ++i) {
549 if (tx->lconstf[i].idx == index) {
550 *src = tx->lconstf[i].reg;
551 return TRUE;
552 }
553 }
554 return FALSE;
555 }
556 static boolean
557 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
558 {
559 if (index < 0 || index >= tx->num_consti_allowed) {
560 tx->failure = TRUE;
561 return FALSE;
562 }
563 if (tx->lconsti[index].idx == index)
564 *src = tx->lconsti[index].reg;
565 return tx->lconsti[index].idx == index;
566 }
567 static boolean
568 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
569 {
570 if (index < 0 || index >= tx->num_constb_allowed) {
571 tx->failure = TRUE;
572 return FALSE;
573 }
574 if (tx->lconstb[index].idx == index)
575 *src = tx->lconstb[index].reg;
576 return tx->lconstb[index].idx == index;
577 }
578
579 static void
580 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
581 {
582 unsigned n;
583
584 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
585
586 for (n = 0; n < tx->num_lconstf; ++n)
587 if (tx->lconstf[n].idx == index)
588 break;
589 if (n == tx->num_lconstf) {
590 if ((n % 8) == 0) {
591 tx->lconstf = REALLOC(tx->lconstf,
592 (n + 0) * sizeof(tx->lconstf[0]),
593 (n + 8) * sizeof(tx->lconstf[0]));
594 assert(tx->lconstf);
595 }
596 tx->num_lconstf++;
597 }
598 tx->lconstf[n].idx = index;
599 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
600
601 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
602 }
603 static void
604 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
605 {
606 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
607 tx->lconsti[index].idx = index;
608 tx->lconsti[index].reg = tx->native_integers ?
609 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
610 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
611 }
612 static void
613 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
614 {
615 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
616 tx->lconstb[index].idx = index;
617 tx->lconstb[index].reg = tx->native_integers ?
618 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
619 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
620 }
621
622 static inline struct ureg_dst
623 tx_scratch(struct shader_translator *tx)
624 {
625 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
626 tx->failure = TRUE;
627 return tx->regs.t[0];
628 }
629 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
630 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
631 return tx->regs.t[tx->num_scratch++];
632 }
633
634 static inline struct ureg_dst
635 tx_scratch_scalar(struct shader_translator *tx)
636 {
637 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
638 }
639
640 static inline struct ureg_src
641 tx_src_scalar(struct ureg_dst dst)
642 {
643 struct ureg_src src = ureg_src(dst);
644 int c = ffs(dst.WriteMask) - 1;
645 if (dst.WriteMask == (1 << c))
646 src = ureg_scalar(src, c);
647 return src;
648 }
649
650 static inline void
651 tx_temp_alloc(struct shader_translator *tx, INT idx)
652 {
653 assert(idx >= 0);
654 if (idx >= tx->num_temp) {
655 unsigned k = tx->num_temp;
656 unsigned n = idx + 1;
657 tx->regs.r = REALLOC(tx->regs.r,
658 k * sizeof(tx->regs.r[0]),
659 n * sizeof(tx->regs.r[0]));
660 for (; k < n; ++k)
661 tx->regs.r[k] = ureg_dst_undef();
662 tx->num_temp = n;
663 }
664 if (ureg_dst_is_undef(tx->regs.r[idx]))
665 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
666 }
667
668 static inline void
669 tx_addr_alloc(struct shader_translator *tx, INT idx)
670 {
671 assert(idx == 0);
672 if (ureg_dst_is_undef(tx->regs.address))
673 tx->regs.address = ureg_DECL_address(tx->ureg);
674 if (ureg_dst_is_undef(tx->regs.a0))
675 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
676 }
677
678 static inline void
679 tx_pred_alloc(struct shader_translator *tx, INT idx)
680 {
681 assert(idx == 0);
682 if (ureg_dst_is_undef(tx->regs.p))
683 tx->regs.p = ureg_DECL_predicate(tx->ureg);
684 }
685
686 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
687 * the projection should be applied on the texture. It doesn't
688 * apply on texkill.
689 * The doc is very imprecise here (it says the projection is done
690 * before rasterization, thus in vs, which seems wrong since ps instructions
691 * are affected differently)
692 * For now we only apply to the ps TEX instruction and TEXBEM.
693 * Perhaps some other instructions would need it */
694 static inline void
695 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
696 struct ureg_src src, INT idx)
697 {
698 struct ureg_dst tmp;
699 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
700
701 /* no projection */
702 if (dim == 1) {
703 ureg_MOV(tx->ureg, dst, src);
704 } else {
705 tmp = tx_scratch_scalar(tx);
706 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
707 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
708 }
709 }
710
711 static inline void
712 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
713 unsigned target, struct ureg_src src0,
714 struct ureg_src src1, INT idx)
715 {
716 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
717 struct ureg_dst tmp;
718
719 /* dim == 1: no projection
720 * Looks like must be disabled when it makes no
721 * sense according the texture dimensions
722 */
723 if (dim == 1 || dim <= target) {
724 ureg_TEX(tx->ureg, dst, target, src0, src1);
725 } else if (dim == 4) {
726 ureg_TXP(tx->ureg, dst, target, src0, src1);
727 } else {
728 tmp = tx_scratch(tx);
729 apply_ps1x_projection(tx, tmp, src0, idx);
730 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
731 }
732 }
733
734 static inline void
735 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
736 {
737 assert(IS_PS);
738 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
739 if (ureg_src_is_undef(tx->regs.vT[idx]))
740 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
741 TGSI_INTERPOLATE_PERSPECTIVE);
742 }
743
744 static inline unsigned *
745 tx_bgnloop(struct shader_translator *tx)
746 {
747 tx->loop_depth++;
748 if (tx->loop_depth_max < tx->loop_depth)
749 tx->loop_depth_max = tx->loop_depth;
750 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
751 return &tx->loop_labels[tx->loop_depth - 1];
752 }
753
754 static inline unsigned *
755 tx_endloop(struct shader_translator *tx)
756 {
757 assert(tx->loop_depth);
758 tx->loop_depth--;
759 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
760 ureg_get_instruction_number(tx->ureg));
761 return &tx->loop_labels[tx->loop_depth];
762 }
763
764 static struct ureg_dst
765 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
766 {
767 const unsigned l = tx->loop_depth - 1;
768
769 if (!tx->loop_depth)
770 {
771 DBG("loop counter requested outside of loop\n");
772 return ureg_dst_undef();
773 }
774
775 if (ureg_dst_is_undef(tx->regs.rL[l])) {
776 /* loop or rep ctr creation */
777 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
778 tx->loop_or_rep[l] = loop_or_rep;
779 }
780 /* loop - rep - endloop - endrep not allowed */
781 assert(tx->loop_or_rep[l] == loop_or_rep);
782
783 return tx->regs.rL[l];
784 }
785
786 static struct ureg_src
787 tx_get_loopal(struct shader_translator *tx)
788 {
789 int loop_level = tx->loop_depth - 1;
790
791 while (loop_level >= 0) {
792 /* handle loop - rep - endrep - endloop case */
793 if (tx->loop_or_rep[loop_level])
794 /* the value is in the loop counter y component (nine implementation) */
795 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
796 loop_level--;
797 }
798
799 DBG("aL counter requested outside of loop\n");
800 return ureg_src_undef();
801 }
802
803 static inline unsigned *
804 tx_cond(struct shader_translator *tx)
805 {
806 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
807 tx->cond_depth++;
808 return &tx->cond_labels[tx->cond_depth - 1];
809 }
810
811 static inline unsigned *
812 tx_elsecond(struct shader_translator *tx)
813 {
814 assert(tx->cond_depth);
815 return &tx->cond_labels[tx->cond_depth - 1];
816 }
817
818 static inline void
819 tx_endcond(struct shader_translator *tx)
820 {
821 assert(tx->cond_depth);
822 tx->cond_depth--;
823 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
824 ureg_get_instruction_number(tx->ureg));
825 }
826
827 static inline struct ureg_dst
828 nine_ureg_dst_register(unsigned file, int index)
829 {
830 return ureg_dst(ureg_src_register(file, index));
831 }
832
833 static inline struct ureg_src
834 nine_get_position_input(struct shader_translator *tx)
835 {
836 struct ureg_program *ureg = tx->ureg;
837
838 if (tx->wpos_is_sysval)
839 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
840 else
841 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
842 0, TGSI_INTERPOLATE_LINEAR);
843 }
844
845 static struct ureg_src
846 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
847 {
848 struct ureg_program *ureg = tx->ureg;
849 struct ureg_src src;
850 struct ureg_dst tmp;
851
852 switch (param->file)
853 {
854 case D3DSPR_TEMP:
855 assert(!param->rel);
856 tx_temp_alloc(tx, param->idx);
857 src = ureg_src(tx->regs.r[param->idx]);
858 break;
859 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
860 case D3DSPR_ADDR:
861 assert(!param->rel);
862 if (IS_VS) {
863 assert(param->idx == 0);
864 /* the address register (vs only) must be
865 * assigned before use */
866 assert(!ureg_dst_is_undef(tx->regs.a0));
867 /* Round to lowest for vs1.1 (contrary to the doc), else
868 * round to nearest */
869 if (tx->version.major < 2 && tx->version.minor < 2)
870 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
871 else
872 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
873 src = ureg_src(tx->regs.address);
874 } else {
875 if (tx->version.major < 2 && tx->version.minor < 4) {
876 /* no subroutines, so should be defined */
877 src = ureg_src(tx->regs.tS[param->idx]);
878 } else {
879 tx_texcoord_alloc(tx, param->idx);
880 src = tx->regs.vT[param->idx];
881 }
882 }
883 break;
884 case D3DSPR_INPUT:
885 if (IS_VS) {
886 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
887 } else {
888 if (tx->version.major < 3) {
889 assert(!param->rel);
890 src = ureg_DECL_fs_input_cyl_centroid(
891 ureg, TGSI_SEMANTIC_COLOR, param->idx,
892 TGSI_INTERPOLATE_COLOR, 0,
893 tx->info->force_color_in_centroid ?
894 TGSI_INTERPOLATE_LOC_CENTROID : 0,
895 0, 1);
896 } else {
897 assert(!param->rel); /* TODO */
898 assert(param->idx < ARRAY_SIZE(tx->regs.v));
899 src = tx->regs.v[param->idx];
900 }
901 }
902 break;
903 case D3DSPR_PREDICATE:
904 assert(!param->rel);
905 tx_pred_alloc(tx, param->idx);
906 src = ureg_src(tx->regs.p);
907 break;
908 case D3DSPR_SAMPLER:
909 assert(param->mod == NINED3DSPSM_NONE);
910 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
911 assert(!param->rel);
912 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
913 break;
914 case D3DSPR_CONST:
915 assert(!param->rel || IS_VS);
916 if (param->rel)
917 tx->indirect_const_access = TRUE;
918 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
919 if (!param->rel)
920 nine_info_mark_const_f_used(tx->info, param->idx);
921 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
922 }
923 if (!IS_VS && tx->version.major < 2) {
924 /* ps 1.X clamps constants */
925 tmp = tx_scratch(tx);
926 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
927 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
928 src = ureg_src(tmp);
929 }
930 break;
931 case D3DSPR_CONST2:
932 case D3DSPR_CONST3:
933 case D3DSPR_CONST4:
934 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
935 assert(!"CONST2/3/4");
936 src = ureg_imm1f(ureg, 0.0f);
937 break;
938 case D3DSPR_CONSTINT:
939 /* relative adressing only possible for float constants in vs */
940 assert(!param->rel);
941 if (!tx_lconsti(tx, &src, param->idx)) {
942 nine_info_mark_const_i_used(tx->info, param->idx);
943 src = ureg_src_register(TGSI_FILE_CONSTANT,
944 tx->info->const_i_base + param->idx);
945 }
946 break;
947 case D3DSPR_CONSTBOOL:
948 assert(!param->rel);
949 if (!tx_lconstb(tx, &src, param->idx)) {
950 char r = param->idx / 4;
951 char s = param->idx & 3;
952 nine_info_mark_const_b_used(tx->info, param->idx);
953 src = ureg_src_register(TGSI_FILE_CONSTANT,
954 tx->info->const_b_base + r);
955 src = ureg_swizzle(src, s, s, s, s);
956 }
957 break;
958 case D3DSPR_LOOP:
959 if (ureg_dst_is_undef(tx->regs.address))
960 tx->regs.address = ureg_DECL_address(ureg);
961 if (!tx->native_integers)
962 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
963 else
964 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
965 src = ureg_src(tx->regs.address);
966 break;
967 case D3DSPR_MISCTYPE:
968 switch (param->idx) {
969 case D3DSMO_POSITION:
970 if (ureg_src_is_undef(tx->regs.vPos))
971 tx->regs.vPos = nine_get_position_input(tx);
972 if (tx->shift_wpos) {
973 /* TODO: do this only once */
974 struct ureg_dst wpos = tx_scratch(tx);
975 ureg_SUB(ureg, wpos, tx->regs.vPos,
976 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
977 src = ureg_src(wpos);
978 } else {
979 src = tx->regs.vPos;
980 }
981 break;
982 case D3DSMO_FACE:
983 if (ureg_src_is_undef(tx->regs.vFace)) {
984 if (tx->face_is_sysval_integer) {
985 tmp = tx_scratch(tx);
986 tx->regs.vFace =
987 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
988
989 /* convert bool to float */
990 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
991 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
992 tx->regs.vFace = ureg_src(tmp);
993 } else {
994 tx->regs.vFace = ureg_DECL_fs_input(ureg,
995 TGSI_SEMANTIC_FACE, 0,
996 TGSI_INTERPOLATE_CONSTANT);
997 }
998 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
999 }
1000 src = tx->regs.vFace;
1001 break;
1002 default:
1003 assert(!"invalid src D3DSMO");
1004 break;
1005 }
1006 assert(!param->rel);
1007 break;
1008 case D3DSPR_TEMPFLOAT16:
1009 break;
1010 default:
1011 assert(!"invalid src D3DSPR");
1012 }
1013 if (param->rel)
1014 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1015
1016 switch (param->mod) {
1017 case NINED3DSPSM_DW:
1018 tmp = tx_scratch(tx);
1019 /* NOTE: app is not allowed to read w with this modifier */
1020 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1021 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1022 src = ureg_src(tmp);
1023 break;
1024 case NINED3DSPSM_DZ:
1025 tmp = tx_scratch(tx);
1026 /* NOTE: app is not allowed to read z with this modifier */
1027 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1028 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1029 src = ureg_src(tmp);
1030 break;
1031 default:
1032 break;
1033 }
1034
1035 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1036 src = ureg_swizzle(src,
1037 (param->swizzle >> 0) & 0x3,
1038 (param->swizzle >> 2) & 0x3,
1039 (param->swizzle >> 4) & 0x3,
1040 (param->swizzle >> 6) & 0x3);
1041
1042 switch (param->mod) {
1043 case NINED3DSPSM_ABS:
1044 src = ureg_abs(src);
1045 break;
1046 case NINED3DSPSM_ABSNEG:
1047 src = ureg_negate(ureg_abs(src));
1048 break;
1049 case NINED3DSPSM_NEG:
1050 src = ureg_negate(src);
1051 break;
1052 case NINED3DSPSM_BIAS:
1053 tmp = tx_scratch(tx);
1054 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
1055 src = ureg_src(tmp);
1056 break;
1057 case NINED3DSPSM_BIASNEG:
1058 tmp = tx_scratch(tx);
1059 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
1060 src = ureg_src(tmp);
1061 break;
1062 case NINED3DSPSM_NOT:
1063 if (tx->native_integers) {
1064 tmp = tx_scratch(tx);
1065 ureg_NOT(ureg, tmp, src);
1066 src = ureg_src(tmp);
1067 break;
1068 }
1069 /* fall through */
1070 case NINED3DSPSM_COMP:
1071 tmp = tx_scratch(tx);
1072 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1073 src = ureg_src(tmp);
1074 break;
1075 case NINED3DSPSM_DZ:
1076 case NINED3DSPSM_DW:
1077 /* Already handled*/
1078 break;
1079 case NINED3DSPSM_SIGN:
1080 tmp = tx_scratch(tx);
1081 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1082 src = ureg_src(tmp);
1083 break;
1084 case NINED3DSPSM_SIGNNEG:
1085 tmp = tx_scratch(tx);
1086 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1087 src = ureg_src(tmp);
1088 break;
1089 case NINED3DSPSM_X2:
1090 tmp = tx_scratch(tx);
1091 ureg_ADD(ureg, tmp, src, src);
1092 src = ureg_src(tmp);
1093 break;
1094 case NINED3DSPSM_X2NEG:
1095 tmp = tx_scratch(tx);
1096 ureg_ADD(ureg, tmp, src, src);
1097 src = ureg_negate(ureg_src(tmp));
1098 break;
1099 default:
1100 assert(param->mod == NINED3DSPSM_NONE);
1101 break;
1102 }
1103
1104 return src;
1105 }
1106
1107 static struct ureg_dst
1108 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1109 {
1110 struct ureg_dst dst;
1111
1112 switch (param->file)
1113 {
1114 case D3DSPR_TEMP:
1115 assert(!param->rel);
1116 tx_temp_alloc(tx, param->idx);
1117 dst = tx->regs.r[param->idx];
1118 break;
1119 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1120 case D3DSPR_ADDR:
1121 assert(!param->rel);
1122 if (tx->version.major < 2 && !IS_VS) {
1123 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1124 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1125 dst = tx->regs.tS[param->idx];
1126 } else
1127 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1128 tx_texcoord_alloc(tx, param->idx);
1129 dst = ureg_dst(tx->regs.vT[param->idx]);
1130 } else {
1131 tx_addr_alloc(tx, param->idx);
1132 dst = tx->regs.a0;
1133 }
1134 break;
1135 case D3DSPR_RASTOUT:
1136 assert(!param->rel);
1137 switch (param->idx) {
1138 case 0:
1139 if (ureg_dst_is_undef(tx->regs.oPos))
1140 tx->regs.oPos =
1141 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1142 dst = tx->regs.oPos;
1143 break;
1144 case 1:
1145 if (ureg_dst_is_undef(tx->regs.oFog))
1146 tx->regs.oFog =
1147 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1148 dst = tx->regs.oFog;
1149 break;
1150 case 2:
1151 if (ureg_dst_is_undef(tx->regs.oPts))
1152 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1153 dst = tx->regs.oPts;
1154 break;
1155 default:
1156 assert(0);
1157 break;
1158 }
1159 break;
1160 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1161 case D3DSPR_OUTPUT:
1162 if (tx->version.major < 3) {
1163 assert(!param->rel);
1164 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1165 } else {
1166 assert(!param->rel); /* TODO */
1167 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1168 dst = tx->regs.o[param->idx];
1169 }
1170 break;
1171 case D3DSPR_ATTROUT: /* VS */
1172 case D3DSPR_COLOROUT: /* PS */
1173 assert(param->idx >= 0 && param->idx < 4);
1174 assert(!param->rel);
1175 tx->info->rt_mask |= 1 << param->idx;
1176 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1177 /* ps < 3: oCol[0] will have fog blending afterward */
1178 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1179 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1180 } else {
1181 tx->regs.oCol[param->idx] =
1182 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1183 }
1184 }
1185 dst = tx->regs.oCol[param->idx];
1186 if (IS_VS && tx->version.major < 3)
1187 dst = ureg_saturate(dst);
1188 break;
1189 case D3DSPR_DEPTHOUT:
1190 assert(!param->rel);
1191 if (ureg_dst_is_undef(tx->regs.oDepth))
1192 tx->regs.oDepth =
1193 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1194 TGSI_WRITEMASK_Z, 0, 1);
1195 dst = tx->regs.oDepth; /* XXX: must write .z component */
1196 break;
1197 case D3DSPR_PREDICATE:
1198 assert(!param->rel);
1199 tx_pred_alloc(tx, param->idx);
1200 dst = tx->regs.p;
1201 break;
1202 case D3DSPR_TEMPFLOAT16:
1203 DBG("unhandled D3DSPR: %u\n", param->file);
1204 break;
1205 default:
1206 assert(!"invalid dst D3DSPR");
1207 break;
1208 }
1209 if (param->rel)
1210 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1211
1212 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1213 dst = ureg_writemask(dst, param->mask);
1214 if (param->mod & NINED3DSPDM_SATURATE)
1215 dst = ureg_saturate(dst);
1216
1217 return dst;
1218 }
1219
1220 static struct ureg_dst
1221 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1222 {
1223 if (param->shift) {
1224 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1225 return tx->regs.tdst;
1226 }
1227 return _tx_dst_param(tx, param);
1228 }
1229
1230 static void
1231 tx_apply_dst0_modifiers(struct shader_translator *tx)
1232 {
1233 struct ureg_dst rdst;
1234 float f;
1235
1236 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1237 return;
1238 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1239
1240 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1241
1242 if (tx->insn.dst[0].shift < 0)
1243 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1244 else
1245 f = 1 << tx->insn.dst[0].shift;
1246
1247 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1248 }
1249
1250 static struct ureg_src
1251 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1252 {
1253 struct ureg_src src;
1254
1255 assert(!param->shift);
1256 assert(!(param->mod & NINED3DSPDM_SATURATE));
1257
1258 switch (param->file) {
1259 case D3DSPR_INPUT:
1260 if (IS_VS) {
1261 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1262 } else {
1263 assert(!param->rel);
1264 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1265 src = tx->regs.v[param->idx];
1266 }
1267 break;
1268 default:
1269 src = ureg_src(tx_dst_param(tx, param));
1270 break;
1271 }
1272 if (param->rel)
1273 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1274
1275 if (!param->mask)
1276 WARN("mask is 0, using identity swizzle\n");
1277
1278 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1279 char s[4];
1280 int n;
1281 int c;
1282 for (n = 0, c = 0; c < 4; ++c)
1283 if (param->mask & (1 << c))
1284 s[n++] = c;
1285 assert(n);
1286 for (c = n; c < 4; ++c)
1287 s[c] = s[n - 1];
1288 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1289 }
1290 return src;
1291 }
1292
1293 static HRESULT
1294 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1295 {
1296 struct ureg_program *ureg = tx->ureg;
1297 struct ureg_dst dst;
1298 struct ureg_src src[2];
1299 struct sm1_src_param *src_mat = &tx->insn.src[1];
1300 unsigned i;
1301
1302 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1303 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1304
1305 for (i = 0; i < n; i++)
1306 {
1307 const unsigned m = (1 << i);
1308
1309 src[1] = tx_src_param(tx, src_mat);
1310 src_mat->idx++;
1311
1312 if (!(dst.WriteMask & m))
1313 continue;
1314
1315 /* XXX: src == dst case ? */
1316
1317 switch (k) {
1318 case 3:
1319 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1320 break;
1321 case 4:
1322 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1323 break;
1324 default:
1325 DBG("invalid operation: M%ux%u\n", m, n);
1326 break;
1327 }
1328 }
1329
1330 return D3D_OK;
1331 }
1332
1333 #define VNOTSUPPORTED 0, 0
1334 #define V(maj, min) (((maj) << 8) | (min))
1335
1336 static inline const char *
1337 d3dsio_to_string( unsigned opcode )
1338 {
1339 static const char *names[] = {
1340 "NOP",
1341 "MOV",
1342 "ADD",
1343 "SUB",
1344 "MAD",
1345 "MUL",
1346 "RCP",
1347 "RSQ",
1348 "DP3",
1349 "DP4",
1350 "MIN",
1351 "MAX",
1352 "SLT",
1353 "SGE",
1354 "EXP",
1355 "LOG",
1356 "LIT",
1357 "DST",
1358 "LRP",
1359 "FRC",
1360 "M4x4",
1361 "M4x3",
1362 "M3x4",
1363 "M3x3",
1364 "M3x2",
1365 "CALL",
1366 "CALLNZ",
1367 "LOOP",
1368 "RET",
1369 "ENDLOOP",
1370 "LABEL",
1371 "DCL",
1372 "POW",
1373 "CRS",
1374 "SGN",
1375 "ABS",
1376 "NRM",
1377 "SINCOS",
1378 "REP",
1379 "ENDREP",
1380 "IF",
1381 "IFC",
1382 "ELSE",
1383 "ENDIF",
1384 "BREAK",
1385 "BREAKC",
1386 "MOVA",
1387 "DEFB",
1388 "DEFI",
1389 NULL,
1390 NULL,
1391 NULL,
1392 NULL,
1393 NULL,
1394 NULL,
1395 NULL,
1396 NULL,
1397 NULL,
1398 NULL,
1399 NULL,
1400 NULL,
1401 NULL,
1402 NULL,
1403 NULL,
1404 "TEXCOORD",
1405 "TEXKILL",
1406 "TEX",
1407 "TEXBEM",
1408 "TEXBEML",
1409 "TEXREG2AR",
1410 "TEXREG2GB",
1411 "TEXM3x2PAD",
1412 "TEXM3x2TEX",
1413 "TEXM3x3PAD",
1414 "TEXM3x3TEX",
1415 NULL,
1416 "TEXM3x3SPEC",
1417 "TEXM3x3VSPEC",
1418 "EXPP",
1419 "LOGP",
1420 "CND",
1421 "DEF",
1422 "TEXREG2RGB",
1423 "TEXDP3TEX",
1424 "TEXM3x2DEPTH",
1425 "TEXDP3",
1426 "TEXM3x3",
1427 "TEXDEPTH",
1428 "CMP",
1429 "BEM",
1430 "DP2ADD",
1431 "DSX",
1432 "DSY",
1433 "TEXLDD",
1434 "SETP",
1435 "TEXLDL",
1436 "BREAKP"
1437 };
1438
1439 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1440
1441 switch (opcode) {
1442 case D3DSIO_PHASE: return "PHASE";
1443 case D3DSIO_COMMENT: return "COMMENT";
1444 case D3DSIO_END: return "END";
1445 default:
1446 return NULL;
1447 }
1448 }
1449
1450 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1451 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1452 (inst).vert_version.max | \
1453 (inst).frag_version.min | \
1454 (inst).frag_version.max)
1455
1456 #define SPECIAL(name) \
1457 NineTranslateInstruction_##name
1458
1459 #define DECL_SPECIAL(name) \
1460 static HRESULT \
1461 NineTranslateInstruction_##name( struct shader_translator *tx )
1462
1463 static HRESULT
1464 NineTranslateInstruction_Generic(struct shader_translator *);
1465
1466 DECL_SPECIAL(M4x4)
1467 {
1468 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1469 }
1470
1471 DECL_SPECIAL(M4x3)
1472 {
1473 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1474 }
1475
1476 DECL_SPECIAL(M3x4)
1477 {
1478 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1479 }
1480
1481 DECL_SPECIAL(M3x3)
1482 {
1483 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1484 }
1485
1486 DECL_SPECIAL(M3x2)
1487 {
1488 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1489 }
1490
1491 DECL_SPECIAL(CMP)
1492 {
1493 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1494 tx_src_param(tx, &tx->insn.src[0]),
1495 tx_src_param(tx, &tx->insn.src[2]),
1496 tx_src_param(tx, &tx->insn.src[1]));
1497 return D3D_OK;
1498 }
1499
1500 DECL_SPECIAL(CND)
1501 {
1502 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1503 struct ureg_dst cgt;
1504 struct ureg_src cnd;
1505
1506 /* the coissue flag was a tip for compilers to advise to
1507 * execute two operations at the same time, in cases
1508 * the two executions had same dst with different channels.
1509 * It has no effect on current hw. However it seems CND
1510 * is affected. The handling of this very specific case
1511 * handled below mimick wine behaviour */
1512 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1513 ureg_MOV(tx->ureg,
1514 dst, tx_src_param(tx, &tx->insn.src[1]));
1515 return D3D_OK;
1516 }
1517
1518 cnd = tx_src_param(tx, &tx->insn.src[0]);
1519 cgt = tx_scratch(tx);
1520
1521 if (tx->version.major == 1 && tx->version.minor < 4)
1522 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1523
1524 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1525
1526 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1527 tx_src_param(tx, &tx->insn.src[1]),
1528 tx_src_param(tx, &tx->insn.src[2]));
1529 return D3D_OK;
1530 }
1531
1532 DECL_SPECIAL(CALL)
1533 {
1534 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1535 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1536 return D3D_OK;
1537 }
1538
1539 DECL_SPECIAL(CALLNZ)
1540 {
1541 struct ureg_program *ureg = tx->ureg;
1542 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1543
1544 if (!tx->native_integers)
1545 ureg_IF(ureg, src, tx_cond(tx));
1546 else
1547 ureg_UIF(ureg, src, tx_cond(tx));
1548 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1549 tx_endcond(tx);
1550 ureg_ENDIF(ureg);
1551 return D3D_OK;
1552 }
1553
1554 DECL_SPECIAL(LOOP)
1555 {
1556 struct ureg_program *ureg = tx->ureg;
1557 unsigned *label;
1558 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1559 struct ureg_dst ctr;
1560 struct ureg_dst tmp;
1561 struct ureg_src ctrx;
1562
1563 label = tx_bgnloop(tx);
1564 ctr = tx_get_loopctr(tx, TRUE);
1565 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1566
1567 /* src: num_iterations - start_value of al - step for al - 0 */
1568 ureg_MOV(ureg, ctr, src);
1569 ureg_BGNLOOP(tx->ureg, label);
1570 tmp = tx_scratch_scalar(tx);
1571 /* Initially ctr.x contains the number of iterations.
1572 * ctr.y will contain the updated value of al.
1573 * We decrease ctr.x at the end of every iteration,
1574 * and stop when it reaches 0. */
1575
1576 if (!tx->native_integers) {
1577 /* case src and ctr contain floats */
1578 /* to avoid precision issue, we stop when ctr <= 0.5 */
1579 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1580 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1581 } else {
1582 /* case src and ctr contain integers */
1583 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1584 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1585 }
1586 ureg_BRK(ureg);
1587 tx_endcond(tx);
1588 ureg_ENDIF(ureg);
1589 return D3D_OK;
1590 }
1591
1592 DECL_SPECIAL(RET)
1593 {
1594 ureg_RET(tx->ureg);
1595 return D3D_OK;
1596 }
1597
1598 DECL_SPECIAL(ENDLOOP)
1599 {
1600 struct ureg_program *ureg = tx->ureg;
1601 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1602 struct ureg_dst dst_ctrx, dst_al;
1603 struct ureg_src src_ctr, al_counter;
1604
1605 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1606 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1607 src_ctr = ureg_src(ctr);
1608 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1609
1610 /* ctr.x -= 1
1611 * ctr.y (aL) += step */
1612 if (!tx->native_integers) {
1613 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1614 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1615 } else {
1616 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1617 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1618 }
1619 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1620 return D3D_OK;
1621 }
1622
1623 DECL_SPECIAL(LABEL)
1624 {
1625 unsigned k = tx->num_inst_labels;
1626 unsigned n = tx->insn.src[0].idx;
1627 assert(n < 2048);
1628 if (n >= k)
1629 tx->inst_labels = REALLOC(tx->inst_labels,
1630 k * sizeof(tx->inst_labels[0]),
1631 n * sizeof(tx->inst_labels[0]));
1632
1633 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1634 return D3D_OK;
1635 }
1636
1637 DECL_SPECIAL(SINCOS)
1638 {
1639 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1640 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1641
1642 assert(!(dst.WriteMask & 0xc));
1643
1644 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1645 ureg_SCS(tx->ureg, dst, src);
1646 return D3D_OK;
1647 }
1648
1649 DECL_SPECIAL(SGN)
1650 {
1651 ureg_SSG(tx->ureg,
1652 tx_dst_param(tx, &tx->insn.dst[0]),
1653 tx_src_param(tx, &tx->insn.src[0]));
1654 return D3D_OK;
1655 }
1656
1657 DECL_SPECIAL(REP)
1658 {
1659 struct ureg_program *ureg = tx->ureg;
1660 unsigned *label;
1661 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1662 struct ureg_dst ctr;
1663 struct ureg_dst tmp;
1664 struct ureg_src ctrx;
1665
1666 label = tx_bgnloop(tx);
1667 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1668 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1669
1670 /* NOTE: rep must be constant, so we don't have to save the count */
1671 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1672
1673 /* rep: num_iterations - 0 - 0 - 0 */
1674 ureg_MOV(ureg, ctr, rep);
1675 ureg_BGNLOOP(ureg, label);
1676 tmp = tx_scratch_scalar(tx);
1677 /* Initially ctr.x contains the number of iterations.
1678 * We decrease ctr.x at the end of every iteration,
1679 * and stop when it reaches 0. */
1680
1681 if (!tx->native_integers) {
1682 /* case src and ctr contain floats */
1683 /* to avoid precision issue, we stop when ctr <= 0.5 */
1684 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1685 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1686 } else {
1687 /* case src and ctr contain integers */
1688 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1689 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1690 }
1691 ureg_BRK(ureg);
1692 tx_endcond(tx);
1693 ureg_ENDIF(ureg);
1694
1695 return D3D_OK;
1696 }
1697
1698 DECL_SPECIAL(ENDREP)
1699 {
1700 struct ureg_program *ureg = tx->ureg;
1701 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1702 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1703 struct ureg_src src_ctr = ureg_src(ctr);
1704
1705 /* ctr.x -= 1 */
1706 if (!tx->native_integers)
1707 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1708 else
1709 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1710
1711 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1712 return D3D_OK;
1713 }
1714
1715 DECL_SPECIAL(ENDIF)
1716 {
1717 tx_endcond(tx);
1718 ureg_ENDIF(tx->ureg);
1719 return D3D_OK;
1720 }
1721
1722 DECL_SPECIAL(IF)
1723 {
1724 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1725
1726 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1727 ureg_UIF(tx->ureg, src, tx_cond(tx));
1728 else
1729 ureg_IF(tx->ureg, src, tx_cond(tx));
1730
1731 return D3D_OK;
1732 }
1733
1734 static inline unsigned
1735 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1736 {
1737 switch (flags) {
1738 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1739 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1740 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1741 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1742 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1743 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1744 default:
1745 assert(!"invalid comparison flags");
1746 return TGSI_OPCODE_SGT;
1747 }
1748 }
1749
1750 DECL_SPECIAL(IFC)
1751 {
1752 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1753 struct ureg_src src[2];
1754 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1755 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1756 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1757 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1758 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1759 return D3D_OK;
1760 }
1761
1762 DECL_SPECIAL(ELSE)
1763 {
1764 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1765 return D3D_OK;
1766 }
1767
1768 DECL_SPECIAL(BREAKC)
1769 {
1770 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1771 struct ureg_src src[2];
1772 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1773 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1774 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1775 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1776 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1777 ureg_BRK(tx->ureg);
1778 tx_endcond(tx);
1779 ureg_ENDIF(tx->ureg);
1780 return D3D_OK;
1781 }
1782
1783 static const char *sm1_declusage_names[] =
1784 {
1785 [D3DDECLUSAGE_POSITION] = "POSITION",
1786 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1787 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1788 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1789 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1790 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1791 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1792 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1793 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1794 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1795 [D3DDECLUSAGE_COLOR] = "COLOR",
1796 [D3DDECLUSAGE_FOG] = "FOG",
1797 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1798 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1799 };
1800
1801 static inline unsigned
1802 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1803 {
1804 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1805 }
1806
1807 static void
1808 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1809 boolean tc,
1810 struct sm1_semantic *dcl)
1811 {
1812 BYTE index = dcl->usage_idx;
1813
1814 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1815 * we match to a TGSI_SEMANTIC_GENERIC with index.
1816 *
1817 * The index can be anything UINT16 and usage_idx is BYTE,
1818 * so we can fit everything. It doesn't matter if indices
1819 * are close together or low.
1820 *
1821 *
1822 * POSITION >= 1: 10 * index + 6
1823 * COLOR >= 2: 10 * (index-1) + 7
1824 * TEXCOORD[0..15]: index
1825 * BLENDWEIGHT: 10 * index + 18
1826 * BLENDINDICES: 10 * index + 19
1827 * NORMAL: 10 * index + 20
1828 * TANGENT: 10 * index + 21
1829 * BINORMAL: 10 * index + 22
1830 * TESSFACTOR: 10 * index + 23
1831 */
1832
1833 switch (dcl->usage) {
1834 case D3DDECLUSAGE_POSITION:
1835 case D3DDECLUSAGE_POSITIONT:
1836 case D3DDECLUSAGE_DEPTH:
1837 if (index == 0) {
1838 sem->Name = TGSI_SEMANTIC_POSITION;
1839 sem->Index = 0;
1840 } else {
1841 sem->Name = TGSI_SEMANTIC_GENERIC;
1842 sem->Index = 10 * index + 6;
1843 }
1844 break;
1845 case D3DDECLUSAGE_COLOR:
1846 if (index < 2) {
1847 sem->Name = TGSI_SEMANTIC_COLOR;
1848 sem->Index = index;
1849 } else {
1850 sem->Name = TGSI_SEMANTIC_GENERIC;
1851 sem->Index = 10 * (index-1) + 7;
1852 }
1853 break;
1854 case D3DDECLUSAGE_FOG:
1855 assert(index == 0);
1856 sem->Name = TGSI_SEMANTIC_FOG;
1857 sem->Index = 0;
1858 break;
1859 case D3DDECLUSAGE_PSIZE:
1860 assert(index == 0);
1861 sem->Name = TGSI_SEMANTIC_PSIZE;
1862 sem->Index = 0;
1863 break;
1864 case D3DDECLUSAGE_TEXCOORD:
1865 assert(index < 16);
1866 if (index < 8 && tc)
1867 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1868 else
1869 sem->Name = TGSI_SEMANTIC_GENERIC;
1870 sem->Index = index;
1871 break;
1872 case D3DDECLUSAGE_BLENDWEIGHT:
1873 sem->Name = TGSI_SEMANTIC_GENERIC;
1874 sem->Index = 10 * index + 18;
1875 break;
1876 case D3DDECLUSAGE_BLENDINDICES:
1877 sem->Name = TGSI_SEMANTIC_GENERIC;
1878 sem->Index = 10 * index + 19;
1879 break;
1880 case D3DDECLUSAGE_NORMAL:
1881 sem->Name = TGSI_SEMANTIC_GENERIC;
1882 sem->Index = 10 * index + 20;
1883 break;
1884 case D3DDECLUSAGE_TANGENT:
1885 sem->Name = TGSI_SEMANTIC_GENERIC;
1886 sem->Index = 10 * index + 21;
1887 break;
1888 case D3DDECLUSAGE_BINORMAL:
1889 sem->Name = TGSI_SEMANTIC_GENERIC;
1890 sem->Index = 10 * index + 22;
1891 break;
1892 case D3DDECLUSAGE_TESSFACTOR:
1893 sem->Name = TGSI_SEMANTIC_GENERIC;
1894 sem->Index = 10 * index + 23;
1895 break;
1896 case D3DDECLUSAGE_SAMPLE:
1897 sem->Name = TGSI_SEMANTIC_COUNT;
1898 sem->Index = 0;
1899 break;
1900 default:
1901 unreachable("Invalid DECLUSAGE.");
1902 break;
1903 }
1904 }
1905
1906 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1907 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1908 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1909 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1910 static inline unsigned
1911 d3dstt_to_tgsi_tex(BYTE sampler_type)
1912 {
1913 switch (sampler_type) {
1914 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1915 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1916 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1917 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1918 default:
1919 assert(0);
1920 return TGSI_TEXTURE_UNKNOWN;
1921 }
1922 }
1923 static inline unsigned
1924 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1925 {
1926 switch (sampler_type) {
1927 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1928 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1929 case NINED3DSTT_VOLUME:
1930 case NINED3DSTT_CUBE:
1931 default:
1932 assert(0);
1933 return TGSI_TEXTURE_UNKNOWN;
1934 }
1935 }
1936 static inline unsigned
1937 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1938 {
1939 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1940 case 1: return TGSI_TEXTURE_1D;
1941 case 0: return TGSI_TEXTURE_2D;
1942 case 3: return TGSI_TEXTURE_3D;
1943 default:
1944 return TGSI_TEXTURE_CUBE;
1945 }
1946 }
1947
1948 static const char *
1949 sm1_sampler_type_name(BYTE sampler_type)
1950 {
1951 switch (sampler_type) {
1952 case NINED3DSTT_1D: return "1D";
1953 case NINED3DSTT_2D: return "2D";
1954 case NINED3DSTT_VOLUME: return "VOLUME";
1955 case NINED3DSTT_CUBE: return "CUBE";
1956 default:
1957 return "(D3DSTT_?)";
1958 }
1959 }
1960
1961 static inline unsigned
1962 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1963 {
1964 switch (sem->Name) {
1965 case TGSI_SEMANTIC_POSITION:
1966 case TGSI_SEMANTIC_NORMAL:
1967 return TGSI_INTERPOLATE_LINEAR;
1968 case TGSI_SEMANTIC_BCOLOR:
1969 case TGSI_SEMANTIC_COLOR:
1970 return TGSI_INTERPOLATE_COLOR;
1971 case TGSI_SEMANTIC_FOG:
1972 case TGSI_SEMANTIC_GENERIC:
1973 case TGSI_SEMANTIC_TEXCOORD:
1974 case TGSI_SEMANTIC_CLIPDIST:
1975 case TGSI_SEMANTIC_CLIPVERTEX:
1976 return TGSI_INTERPOLATE_PERSPECTIVE;
1977 case TGSI_SEMANTIC_EDGEFLAG:
1978 case TGSI_SEMANTIC_FACE:
1979 case TGSI_SEMANTIC_INSTANCEID:
1980 case TGSI_SEMANTIC_PCOORD:
1981 case TGSI_SEMANTIC_PRIMID:
1982 case TGSI_SEMANTIC_PSIZE:
1983 case TGSI_SEMANTIC_VERTEXID:
1984 return TGSI_INTERPOLATE_CONSTANT;
1985 default:
1986 assert(0);
1987 return TGSI_INTERPOLATE_CONSTANT;
1988 }
1989 }
1990
1991 DECL_SPECIAL(DCL)
1992 {
1993 struct ureg_program *ureg = tx->ureg;
1994 boolean is_input;
1995 boolean is_sampler;
1996 struct tgsi_declaration_semantic tgsi;
1997 struct sm1_semantic sem;
1998 sm1_read_semantic(tx, &sem);
1999
2000 is_input = sem.reg.file == D3DSPR_INPUT;
2001 is_sampler =
2002 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2003
2004 DUMP("DCL ");
2005 sm1_dump_dst_param(&sem.reg);
2006 if (is_sampler)
2007 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2008 else
2009 if (tx->version.major >= 3)
2010 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2011 else
2012 if (sem.usage | sem.usage_idx)
2013 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2014 else
2015 DUMP("\n");
2016
2017 if (is_sampler) {
2018 const unsigned m = 1 << sem.reg.idx;
2019 ureg_DECL_sampler(ureg, sem.reg.idx);
2020 tx->info->sampler_mask |= m;
2021 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2022 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2023 d3dstt_to_tgsi_tex(sem.sampler_type);
2024 return D3D_OK;
2025 }
2026
2027 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2028 if (IS_VS) {
2029 if (is_input) {
2030 /* linkage outside of shader with vertex declaration */
2031 ureg_DECL_vs_input(ureg, sem.reg.idx);
2032 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2033 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2034 tx->info->num_inputs = sem.reg.idx + 1;
2035 /* NOTE: preserving order in case of indirect access */
2036 } else
2037 if (tx->version.major >= 3) {
2038 /* SM2 output semantic determined by file */
2039 assert(sem.reg.mask != 0);
2040 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2041 tx->info->position_t = TRUE;
2042 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2043 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2044 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2045 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2046
2047 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2048 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2049 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2050 }
2051 }
2052 } else {
2053 if (is_input && tx->version.major >= 3) {
2054 unsigned interp_location = 0;
2055 /* SM3 only, SM2 input semantic determined by file */
2056 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2057 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2058 /* PositionT and tessfactor forbidden */
2059 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2060 return D3DERR_INVALIDCALL;
2061
2062 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2063 /* Position0 is forbidden (likely because vPos already does that) */
2064 if (sem.usage == D3DDECLUSAGE_POSITION)
2065 return D3DERR_INVALIDCALL;
2066 /* Following code is for depth */
2067 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2068 return D3D_OK;
2069 }
2070
2071 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2072 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2073 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2074
2075 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2076 ureg, tgsi.Name, tgsi.Index,
2077 nine_tgsi_to_interp_mode(&tgsi),
2078 0, /* cylwrap */
2079 interp_location, 0, 1);
2080 } else
2081 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2082 /* FragColor or FragDepth */
2083 assert(sem.reg.mask != 0);
2084 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2085 0, 1);
2086 }
2087 }
2088 return D3D_OK;
2089 }
2090
2091 DECL_SPECIAL(DEF)
2092 {
2093 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2094 return D3D_OK;
2095 }
2096
2097 DECL_SPECIAL(DEFB)
2098 {
2099 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2100 return D3D_OK;
2101 }
2102
2103 DECL_SPECIAL(DEFI)
2104 {
2105 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2106 return D3D_OK;
2107 }
2108
2109 DECL_SPECIAL(POW)
2110 {
2111 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2112 struct ureg_src src[2] = {
2113 tx_src_param(tx, &tx->insn.src[0]),
2114 tx_src_param(tx, &tx->insn.src[1])
2115 };
2116 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2117 return D3D_OK;
2118 }
2119
2120 DECL_SPECIAL(RSQ)
2121 {
2122 struct ureg_program *ureg = tx->ureg;
2123 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2124 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2125 struct ureg_dst tmp = tx_scratch(tx);
2126 ureg_RSQ(ureg, tmp, ureg_abs(src));
2127 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2128 return D3D_OK;
2129 }
2130
2131 DECL_SPECIAL(LOG)
2132 {
2133 struct ureg_program *ureg = tx->ureg;
2134 struct ureg_dst tmp = tx_scratch_scalar(tx);
2135 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2136 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2137 ureg_LG2(ureg, tmp, ureg_abs(src));
2138 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2139 return D3D_OK;
2140 }
2141
2142 DECL_SPECIAL(LIT)
2143 {
2144 struct ureg_program *ureg = tx->ureg;
2145 struct ureg_dst tmp = tx_scratch(tx);
2146 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2147 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2148 ureg_LIT(ureg, tmp, src);
2149 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2150 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2151 * it 0^0 if src.w=0, which value is driver dependent. */
2152 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2153 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2154 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2155 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2156 return D3D_OK;
2157 }
2158
2159 DECL_SPECIAL(NRM)
2160 {
2161 struct ureg_program *ureg = tx->ureg;
2162 struct ureg_dst tmp = tx_scratch_scalar(tx);
2163 struct ureg_src nrm = tx_src_scalar(tmp);
2164 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2165 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2166 ureg_DP3(ureg, tmp, src, src);
2167 ureg_RSQ(ureg, tmp, nrm);
2168 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2169 ureg_MUL(ureg, dst, src, nrm);
2170 return D3D_OK;
2171 }
2172
2173 DECL_SPECIAL(DP2ADD)
2174 {
2175 struct ureg_dst tmp = tx_scratch_scalar(tx);
2176 struct ureg_src dp2 = tx_src_scalar(tmp);
2177 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2178 struct ureg_src src[3];
2179 int i;
2180 for (i = 0; i < 3; ++i)
2181 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2182 assert_replicate_swizzle(&src[2]);
2183
2184 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2185 ureg_ADD(tx->ureg, dst, src[2], dp2);
2186
2187 return D3D_OK;
2188 }
2189
2190 DECL_SPECIAL(TEXCOORD)
2191 {
2192 struct ureg_program *ureg = tx->ureg;
2193 const unsigned s = tx->insn.dst[0].idx;
2194 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2195
2196 tx_texcoord_alloc(tx, s);
2197 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2198 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2199
2200 return D3D_OK;
2201 }
2202
2203 DECL_SPECIAL(TEXCOORD_ps14)
2204 {
2205 struct ureg_program *ureg = tx->ureg;
2206 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2207 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2208
2209 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2210
2211 ureg_MOV(ureg, dst, src);
2212
2213 return D3D_OK;
2214 }
2215
2216 DECL_SPECIAL(TEXKILL)
2217 {
2218 struct ureg_src reg;
2219
2220 if (tx->version.major > 1 || tx->version.minor > 3) {
2221 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2222 } else {
2223 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2224 reg = tx->regs.vT[tx->insn.dst[0].idx];
2225 }
2226 if (tx->version.major < 2)
2227 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2228 ureg_KILL_IF(tx->ureg, reg);
2229
2230 return D3D_OK;
2231 }
2232
2233 DECL_SPECIAL(TEXBEM)
2234 {
2235 struct ureg_program *ureg = tx->ureg;
2236 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2237 struct ureg_dst tmp, tmp2, texcoord;
2238 struct ureg_src sample, m00, m01, m10, m11;
2239 struct ureg_src bumpenvlscale, bumpenvloffset;
2240 const int m = tx->insn.dst[0].idx;
2241 const int n = tx->insn.src[0].idx;
2242
2243 assert(tx->version.major == 1);
2244
2245 sample = ureg_DECL_sampler(ureg, m);
2246 tx->info->sampler_mask |= 1 << m;
2247
2248 tx_texcoord_alloc(tx, m);
2249
2250 tmp = tx_scratch(tx);
2251 tmp2 = tx_scratch(tx);
2252 texcoord = tx_scratch(tx);
2253 /*
2254 * Bump-env-matrix:
2255 * 00 is X
2256 * 01 is Y
2257 * 10 is Z
2258 * 11 is W
2259 */
2260 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2261 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2262 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2263 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2264 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2265
2266 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2267 if (m % 2 == 0) {
2268 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2269 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2270 } else {
2271 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2272 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2273 }
2274
2275 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2276
2277 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2278 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2279 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2280 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2281 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2282 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2283 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2284
2285 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2286 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2287 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2288 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2289 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2290 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2291 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2292
2293 /* Now the texture coordinates are in tmp.xy */
2294
2295 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2296 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2297 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2298 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2299 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2300 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2301 bumpenvlscale, bumpenvloffset);
2302 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2303 }
2304
2305 tx->info->bumpenvmat_needed = 1;
2306
2307 return D3D_OK;
2308 }
2309
2310 DECL_SPECIAL(TEXREG2AR)
2311 {
2312 struct ureg_program *ureg = tx->ureg;
2313 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2314 struct ureg_src sample;
2315 const int m = tx->insn.dst[0].idx;
2316 const int n = tx->insn.src[0].idx;
2317 assert(m >= 0 && m > n);
2318
2319 sample = ureg_DECL_sampler(ureg, m);
2320 tx->info->sampler_mask |= 1 << m;
2321 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2322
2323 return D3D_OK;
2324 }
2325
2326 DECL_SPECIAL(TEXREG2GB)
2327 {
2328 struct ureg_program *ureg = tx->ureg;
2329 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2330 struct ureg_src sample;
2331 const int m = tx->insn.dst[0].idx;
2332 const int n = tx->insn.src[0].idx;
2333 assert(m >= 0 && m > n);
2334
2335 sample = ureg_DECL_sampler(ureg, m);
2336 tx->info->sampler_mask |= 1 << m;
2337 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2338
2339 return D3D_OK;
2340 }
2341
2342 DECL_SPECIAL(TEXM3x2PAD)
2343 {
2344 return D3D_OK; /* this is just padding */
2345 }
2346
2347 DECL_SPECIAL(TEXM3x2TEX)
2348 {
2349 struct ureg_program *ureg = tx->ureg;
2350 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2351 struct ureg_src sample;
2352 const int m = tx->insn.dst[0].idx - 1;
2353 const int n = tx->insn.src[0].idx;
2354 assert(m >= 0 && m > n);
2355
2356 tx_texcoord_alloc(tx, m);
2357 tx_texcoord_alloc(tx, m+1);
2358
2359 /* performs the matrix multiplication */
2360 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2361 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2362
2363 sample = ureg_DECL_sampler(ureg, m + 1);
2364 tx->info->sampler_mask |= 1 << (m + 1);
2365 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2366
2367 return D3D_OK;
2368 }
2369
2370 DECL_SPECIAL(TEXM3x3PAD)
2371 {
2372 return D3D_OK; /* this is just padding */
2373 }
2374
2375 DECL_SPECIAL(TEXM3x3SPEC)
2376 {
2377 struct ureg_program *ureg = tx->ureg;
2378 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2379 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2380 struct ureg_src sample;
2381 struct ureg_dst tmp;
2382 const int m = tx->insn.dst[0].idx - 2;
2383 const int n = tx->insn.src[0].idx;
2384 assert(m >= 0 && m > n);
2385
2386 tx_texcoord_alloc(tx, m);
2387 tx_texcoord_alloc(tx, m+1);
2388 tx_texcoord_alloc(tx, m+2);
2389
2390 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2391 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2392 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2393
2394 sample = ureg_DECL_sampler(ureg, m + 2);
2395 tx->info->sampler_mask |= 1 << (m + 2);
2396 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2397
2398 /* At this step, dst = N = (u', w', z').
2399 * We want dst to be the texture sampled at (u'', w'', z''), with
2400 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2401 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2402 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2403 /* at this step tmp.x = 1/N.N */
2404 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2405 /* at this step tmp.y = N.E */
2406 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2407 /* at this step tmp.x = N.E/N.N */
2408 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2409 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2410 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2411 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2412 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2413
2414 return D3D_OK;
2415 }
2416
2417 DECL_SPECIAL(TEXREG2RGB)
2418 {
2419 struct ureg_program *ureg = tx->ureg;
2420 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2421 struct ureg_src sample;
2422 const int m = tx->insn.dst[0].idx;
2423 const int n = tx->insn.src[0].idx;
2424 assert(m >= 0 && m > n);
2425
2426 sample = ureg_DECL_sampler(ureg, m);
2427 tx->info->sampler_mask |= 1 << m;
2428 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2429
2430 return D3D_OK;
2431 }
2432
2433 DECL_SPECIAL(TEXDP3TEX)
2434 {
2435 struct ureg_program *ureg = tx->ureg;
2436 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2437 struct ureg_dst tmp;
2438 struct ureg_src sample;
2439 const int m = tx->insn.dst[0].idx;
2440 const int n = tx->insn.src[0].idx;
2441 assert(m >= 0 && m > n);
2442
2443 tx_texcoord_alloc(tx, m);
2444
2445 tmp = tx_scratch(tx);
2446 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2447 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2448
2449 sample = ureg_DECL_sampler(ureg, m);
2450 tx->info->sampler_mask |= 1 << m;
2451 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2452
2453 return D3D_OK;
2454 }
2455
2456 DECL_SPECIAL(TEXM3x2DEPTH)
2457 {
2458 struct ureg_program *ureg = tx->ureg;
2459 struct ureg_dst tmp;
2460 const int m = tx->insn.dst[0].idx - 1;
2461 const int n = tx->insn.src[0].idx;
2462 assert(m >= 0 && m > n);
2463
2464 tx_texcoord_alloc(tx, m);
2465 tx_texcoord_alloc(tx, m+1);
2466
2467 tmp = tx_scratch(tx);
2468
2469 /* performs the matrix multiplication */
2470 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2471 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2472
2473 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2474 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2475 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2476 /* res = 'w' == 0 ? 1.0 : z/w */
2477 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2478 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2479 /* replace the depth for depth testing with the result */
2480 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2481 TGSI_WRITEMASK_Z, 0, 1);
2482 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2483 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2484 return D3D_OK;
2485 }
2486
2487 DECL_SPECIAL(TEXDP3)
2488 {
2489 struct ureg_program *ureg = tx->ureg;
2490 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2491 const int m = tx->insn.dst[0].idx;
2492 const int n = tx->insn.src[0].idx;
2493 assert(m >= 0 && m > n);
2494
2495 tx_texcoord_alloc(tx, m);
2496
2497 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2498
2499 return D3D_OK;
2500 }
2501
2502 DECL_SPECIAL(TEXM3x3)
2503 {
2504 struct ureg_program *ureg = tx->ureg;
2505 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2506 struct ureg_src sample;
2507 struct ureg_dst E, tmp;
2508 const int m = tx->insn.dst[0].idx - 2;
2509 const int n = tx->insn.src[0].idx;
2510 assert(m >= 0 && m > n);
2511
2512 tx_texcoord_alloc(tx, m);
2513 tx_texcoord_alloc(tx, m+1);
2514 tx_texcoord_alloc(tx, m+2);
2515
2516 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2517 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2518 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2519
2520 switch (tx->insn.opcode) {
2521 case D3DSIO_TEXM3x3:
2522 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2523 break;
2524 case D3DSIO_TEXM3x3TEX:
2525 sample = ureg_DECL_sampler(ureg, m + 2);
2526 tx->info->sampler_mask |= 1 << (m + 2);
2527 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2528 break;
2529 case D3DSIO_TEXM3x3VSPEC:
2530 sample = ureg_DECL_sampler(ureg, m + 2);
2531 tx->info->sampler_mask |= 1 << (m + 2);
2532 E = tx_scratch(tx);
2533 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2534 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2535 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2536 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2537 /* At this step, dst = N = (u', w', z').
2538 * We want dst to be the texture sampled at (u'', w'', z''), with
2539 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2540 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2541 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2542 /* at this step tmp.x = 1/N.N */
2543 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2544 /* at this step tmp.y = N.E */
2545 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2546 /* at this step tmp.x = N.E/N.N */
2547 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2548 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2549 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2550 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2551 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2552 break;
2553 default:
2554 return D3DERR_INVALIDCALL;
2555 }
2556 return D3D_OK;
2557 }
2558
2559 DECL_SPECIAL(TEXDEPTH)
2560 {
2561 struct ureg_program *ureg = tx->ureg;
2562 struct ureg_dst r5;
2563 struct ureg_src r5r, r5g;
2564
2565 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2566
2567 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2568 * r5 won't be used afterward, thus we can use r5.ba */
2569 r5 = tx->regs.r[5];
2570 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2571 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2572
2573 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2574 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2575 /* r5.r = r/g */
2576 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2577 r5r, ureg_imm1f(ureg, 1.0f));
2578 /* replace the depth for depth testing with the result */
2579 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2580 TGSI_WRITEMASK_Z, 0, 1);
2581 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2582
2583 return D3D_OK;
2584 }
2585
2586 DECL_SPECIAL(BEM)
2587 {
2588 struct ureg_program *ureg = tx->ureg;
2589 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2590 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2591 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2592 struct ureg_src m00, m01, m10, m11;
2593 const int m = tx->insn.dst[0].idx;
2594 struct ureg_dst tmp;
2595 /*
2596 * Bump-env-matrix:
2597 * 00 is X
2598 * 01 is Y
2599 * 10 is Z
2600 * 11 is W
2601 */
2602 nine_info_mark_const_f_used(tx->info, 8 + m);
2603 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2604 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2605 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2606 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2607 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2608 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2609 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2610 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2611 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2612 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2613
2614 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2615 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2616 NINE_APPLY_SWIZZLE(src1, X), src0);
2617 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2618 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2619 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2620 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2621
2622 tx->info->bumpenvmat_needed = 1;
2623
2624 return D3D_OK;
2625 }
2626
2627 DECL_SPECIAL(TEXLD)
2628 {
2629 struct ureg_program *ureg = tx->ureg;
2630 unsigned target;
2631 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2632 struct ureg_src src[2] = {
2633 tx_src_param(tx, &tx->insn.src[0]),
2634 tx_src_param(tx, &tx->insn.src[1])
2635 };
2636 assert(tx->insn.src[1].idx >= 0 &&
2637 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2638 target = tx->sampler_targets[tx->insn.src[1].idx];
2639
2640 switch (tx->insn.flags) {
2641 case 0:
2642 ureg_TEX(ureg, dst, target, src[0], src[1]);
2643 break;
2644 case NINED3DSI_TEXLD_PROJECT:
2645 ureg_TXP(ureg, dst, target, src[0], src[1]);
2646 break;
2647 case NINED3DSI_TEXLD_BIAS:
2648 ureg_TXB(ureg, dst, target, src[0], src[1]);
2649 break;
2650 default:
2651 assert(0);
2652 return D3DERR_INVALIDCALL;
2653 }
2654 return D3D_OK;
2655 }
2656
2657 DECL_SPECIAL(TEXLD_14)
2658 {
2659 struct ureg_program *ureg = tx->ureg;
2660 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2661 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2662 const unsigned s = tx->insn.dst[0].idx;
2663 const unsigned t = ps1x_sampler_type(tx->info, s);
2664
2665 tx->info->sampler_mask |= 1 << s;
2666 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2667
2668 return D3D_OK;
2669 }
2670
2671 DECL_SPECIAL(TEX)
2672 {
2673 struct ureg_program *ureg = tx->ureg;
2674 const unsigned s = tx->insn.dst[0].idx;
2675 const unsigned t = ps1x_sampler_type(tx->info, s);
2676 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2677 struct ureg_src src[2];
2678
2679 tx_texcoord_alloc(tx, s);
2680
2681 src[0] = tx->regs.vT[s];
2682 src[1] = ureg_DECL_sampler(ureg, s);
2683 tx->info->sampler_mask |= 1 << s;
2684
2685 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2686
2687 return D3D_OK;
2688 }
2689
2690 DECL_SPECIAL(TEXLDD)
2691 {
2692 unsigned target;
2693 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2694 struct ureg_src src[4] = {
2695 tx_src_param(tx, &tx->insn.src[0]),
2696 tx_src_param(tx, &tx->insn.src[1]),
2697 tx_src_param(tx, &tx->insn.src[2]),
2698 tx_src_param(tx, &tx->insn.src[3])
2699 };
2700 assert(tx->insn.src[1].idx >= 0 &&
2701 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2702 target = tx->sampler_targets[tx->insn.src[1].idx];
2703
2704 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2705 return D3D_OK;
2706 }
2707
2708 DECL_SPECIAL(TEXLDL)
2709 {
2710 unsigned target;
2711 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2712 struct ureg_src src[2] = {
2713 tx_src_param(tx, &tx->insn.src[0]),
2714 tx_src_param(tx, &tx->insn.src[1])
2715 };
2716 assert(tx->insn.src[1].idx >= 0 &&
2717 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2718 target = tx->sampler_targets[tx->insn.src[1].idx];
2719
2720 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2721 return D3D_OK;
2722 }
2723
2724 DECL_SPECIAL(SETP)
2725 {
2726 STUB(D3DERR_INVALIDCALL);
2727 }
2728
2729 DECL_SPECIAL(BREAKP)
2730 {
2731 STUB(D3DERR_INVALIDCALL);
2732 }
2733
2734 DECL_SPECIAL(PHASE)
2735 {
2736 return D3D_OK; /* we don't care about phase */
2737 }
2738
2739 DECL_SPECIAL(COMMENT)
2740 {
2741 return D3D_OK; /* nothing to do */
2742 }
2743
2744
2745 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2746 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2747
2748 struct sm1_op_info inst_table[] =
2749 {
2750 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2751 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2752 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2753 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2754 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2755 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2756 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2757 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2758 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2759 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2760 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2761 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2762 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2763 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2764 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2765 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2766 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2767 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2768 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2769 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2770
2771 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2772 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2773 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2774 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2775 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2776
2777 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2778 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2779 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2780 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2781 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2782 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2783
2784 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2785
2786 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2787 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2788 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2789 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2790 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2791
2792 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2793 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2794
2795 /* More flow control */
2796 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2797 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2798 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2799 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2800 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2801 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2802 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2803 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2804 /* we don't write to the address register, but a normal register (copied
2805 * when needed to the address register), thus we don't use ARR */
2806 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2807
2808 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2809 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2810
2811 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2812 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2813 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2814 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2815 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2816 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2817 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2818 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2819 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2820 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2821 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2822 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2823 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2824 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2825 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2826 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2827
2828 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2829 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2830 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2831 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2832
2833 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2834
2835 /* More tex stuff */
2836 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2837 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2838 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2839 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2840 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2841 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2842
2843 /* Misc */
2844 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2845 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2846 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2847 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2848 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2849 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2850 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2851 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2852 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2853 };
2854
2855 struct sm1_op_info inst_phase =
2856 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2857
2858 struct sm1_op_info inst_comment =
2859 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2860
2861 static void
2862 create_op_info_map(struct shader_translator *tx)
2863 {
2864 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2865 unsigned i;
2866
2867 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
2868 tx->op_info_map[i] = -1;
2869
2870 if (tx->processor == PIPE_SHADER_VERTEX) {
2871 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
2872 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
2873 if (inst_table[i].vert_version.min <= version &&
2874 inst_table[i].vert_version.max >= version)
2875 tx->op_info_map[inst_table[i].sio] = i;
2876 }
2877 } else {
2878 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
2879 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
2880 if (inst_table[i].frag_version.min <= version &&
2881 inst_table[i].frag_version.max >= version)
2882 tx->op_info_map[inst_table[i].sio] = i;
2883 }
2884 }
2885 }
2886
2887 static inline HRESULT
2888 NineTranslateInstruction_Generic(struct shader_translator *tx)
2889 {
2890 struct ureg_dst dst[1];
2891 struct ureg_src src[4];
2892 unsigned i;
2893
2894 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
2895 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2896 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
2897 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2898
2899 ureg_insn(tx->ureg, tx->insn.info->opcode,
2900 dst, tx->insn.ndst,
2901 src, tx->insn.nsrc);
2902 return D3D_OK;
2903 }
2904
2905 static inline DWORD
2906 TOKEN_PEEK(struct shader_translator *tx)
2907 {
2908 return *(tx->parse);
2909 }
2910
2911 static inline DWORD
2912 TOKEN_NEXT(struct shader_translator *tx)
2913 {
2914 return *(tx->parse)++;
2915 }
2916
2917 static inline void
2918 TOKEN_JUMP(struct shader_translator *tx)
2919 {
2920 if (tx->parse_next && tx->parse != tx->parse_next) {
2921 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2922 tx->parse = tx->parse_next;
2923 }
2924 }
2925
2926 static inline boolean
2927 sm1_parse_eof(struct shader_translator *tx)
2928 {
2929 return TOKEN_PEEK(tx) == NINED3DSP_END;
2930 }
2931
2932 static void
2933 sm1_read_version(struct shader_translator *tx)
2934 {
2935 const DWORD tok = TOKEN_NEXT(tx);
2936
2937 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2938 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2939
2940 switch (tok >> 16) {
2941 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
2942 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
2943 default:
2944 DBG("Invalid shader type: %x\n", tok);
2945 tx->processor = ~0;
2946 break;
2947 }
2948 }
2949
2950 /* This is just to check if we parsed the instruction properly. */
2951 static void
2952 sm1_parse_get_skip(struct shader_translator *tx)
2953 {
2954 const DWORD tok = TOKEN_PEEK(tx);
2955
2956 if (tx->version.major >= 2) {
2957 tx->parse_next = tx->parse + 1 /* this */ +
2958 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2959 } else {
2960 tx->parse_next = NULL; /* TODO: determine from param count */
2961 }
2962 }
2963
2964 static void
2965 sm1_print_comment(const char *comment, UINT size)
2966 {
2967 if (!size)
2968 return;
2969 /* TODO */
2970 }
2971
2972 static void
2973 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2974 {
2975 DWORD tok = TOKEN_PEEK(tx);
2976
2977 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2978 {
2979 const char *comment = "";
2980 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2981 tx->parse += size + 1;
2982
2983 if (print)
2984 sm1_print_comment(comment, size);
2985
2986 tok = TOKEN_PEEK(tx);
2987 }
2988 }
2989
2990 static void
2991 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2992 {
2993 *reg = TOKEN_NEXT(tx);
2994
2995 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2996 {
2997 if (tx->version.major < 2)
2998 *rel = (1 << 31) |
2999 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3000 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3001 D3DSP_NOSWIZZLE;
3002 else
3003 *rel = TOKEN_NEXT(tx);
3004 }
3005 }
3006
3007 static void
3008 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3009 {
3010 int8_t shift;
3011 dst->file =
3012 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3013 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3014 dst->type = TGSI_RETURN_TYPE_FLOAT;
3015 dst->idx = tok & D3DSP_REGNUM_MASK;
3016 dst->rel = NULL;
3017 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3018 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3019 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3020 dst->shift = (shift & 0x7) - (shift & 0x8);
3021 }
3022
3023 static void
3024 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3025 {
3026 src->file =
3027 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3028 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3029 src->type = TGSI_RETURN_TYPE_FLOAT;
3030 src->idx = tok & D3DSP_REGNUM_MASK;
3031 src->rel = NULL;
3032 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3033 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3034
3035 switch (src->file) {
3036 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3037 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3038 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3039 default:
3040 break;
3041 }
3042 }
3043
3044 static void
3045 sm1_parse_immediate(struct shader_translator *tx,
3046 struct sm1_src_param *imm)
3047 {
3048 imm->file = NINED3DSPR_IMMEDIATE;
3049 imm->idx = INT_MIN;
3050 imm->rel = NULL;
3051 imm->swizzle = NINED3DSP_NOSWIZZLE;
3052 imm->mod = 0;
3053 switch (tx->insn.opcode) {
3054 case D3DSIO_DEF:
3055 imm->type = NINED3DSPTYPE_FLOAT4;
3056 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3057 tx->parse += 4;
3058 break;
3059 case D3DSIO_DEFI:
3060 imm->type = NINED3DSPTYPE_INT4;
3061 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3062 tx->parse += 4;
3063 break;
3064 case D3DSIO_DEFB:
3065 imm->type = NINED3DSPTYPE_BOOL;
3066 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3067 tx->parse += 1;
3068 break;
3069 default:
3070 assert(0);
3071 break;
3072 }
3073 }
3074
3075 static void
3076 sm1_read_dst_param(struct shader_translator *tx,
3077 struct sm1_dst_param *dst,
3078 struct sm1_src_param *rel)
3079 {
3080 DWORD tok_dst, tok_rel = 0;
3081
3082 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3083 sm1_parse_dst_param(dst, tok_dst);
3084 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3085 sm1_parse_src_param(rel, tok_rel);
3086 dst->rel = rel;
3087 }
3088 }
3089
3090 static void
3091 sm1_read_src_param(struct shader_translator *tx,
3092 struct sm1_src_param *src,
3093 struct sm1_src_param *rel)
3094 {
3095 DWORD tok_src, tok_rel = 0;
3096
3097 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3098 sm1_parse_src_param(src, tok_src);
3099 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3100 assert(rel);
3101 sm1_parse_src_param(rel, tok_rel);
3102 src->rel = rel;
3103 }
3104 }
3105
3106 static void
3107 sm1_read_semantic(struct shader_translator *tx,
3108 struct sm1_semantic *sem)
3109 {
3110 const DWORD tok_usg = TOKEN_NEXT(tx);
3111 const DWORD tok_dst = TOKEN_NEXT(tx);
3112
3113 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3114 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3115 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3116
3117 sm1_parse_dst_param(&sem->reg, tok_dst);
3118 }
3119
3120 static void
3121 sm1_parse_instruction(struct shader_translator *tx)
3122 {
3123 struct sm1_instruction *insn = &tx->insn;
3124 HRESULT hr;
3125 DWORD tok;
3126 struct sm1_op_info *info = NULL;
3127 unsigned i;
3128
3129 sm1_parse_comments(tx, TRUE);
3130 sm1_parse_get_skip(tx);
3131
3132 tok = TOKEN_NEXT(tx);
3133
3134 insn->opcode = tok & D3DSI_OPCODE_MASK;
3135 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3136 insn->coissue = !!(tok & D3DSI_COISSUE);
3137 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3138
3139 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3140 int k = tx->op_info_map[insn->opcode];
3141 if (k >= 0) {
3142 assert(k < ARRAY_SIZE(inst_table));
3143 info = &inst_table[k];
3144 }
3145 } else {
3146 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3147 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3148 }
3149 if (!info) {
3150 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3151 TOKEN_JUMP(tx);
3152 return;
3153 }
3154 insn->info = info;
3155 insn->ndst = info->ndst;
3156 insn->nsrc = info->nsrc;
3157
3158 assert(!insn->predicated && "TODO: predicated instructions");
3159
3160 /* check version */
3161 {
3162 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3163 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3164 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3165 if (ver < min || ver > max) {
3166 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3167 min, ver, max);
3168 return;
3169 }
3170 }
3171
3172 for (i = 0; i < insn->ndst; ++i)
3173 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3174 if (insn->predicated)
3175 sm1_read_src_param(tx, &insn->pred, NULL);
3176 for (i = 0; i < insn->nsrc; ++i)
3177 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3178
3179 /* parse here so we can dump them before processing */
3180 if (insn->opcode == D3DSIO_DEF ||
3181 insn->opcode == D3DSIO_DEFI ||
3182 insn->opcode == D3DSIO_DEFB)
3183 sm1_parse_immediate(tx, &tx->insn.src[0]);
3184
3185 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3186 sm1_instruction_check(insn);
3187
3188 if (info->handler)
3189 hr = info->handler(tx);
3190 else
3191 hr = NineTranslateInstruction_Generic(tx);
3192 tx_apply_dst0_modifiers(tx);
3193
3194 if (hr != D3D_OK)
3195 tx->failure = TRUE;
3196 tx->num_scratch = 0; /* reset */
3197
3198 TOKEN_JUMP(tx);
3199 }
3200
3201 static void
3202 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3203 {
3204 unsigned i;
3205
3206 tx->info = info;
3207
3208 tx->byte_code = info->byte_code;
3209 tx->parse = info->byte_code;
3210
3211 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3212 info->input_map[i] = NINE_DECLUSAGE_NONE;
3213 info->num_inputs = 0;
3214
3215 info->position_t = FALSE;
3216 info->point_size = FALSE;
3217
3218 tx->info->const_float_slots = 0;
3219 tx->info->const_int_slots = 0;
3220 tx->info->const_bool_slots = 0;
3221
3222 info->sampler_mask = 0x0;
3223 info->rt_mask = 0x0;
3224
3225 info->lconstf.data = NULL;
3226 info->lconstf.ranges = NULL;
3227
3228 info->bumpenvmat_needed = 0;
3229
3230 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3231 tx->regs.rL[i] = ureg_dst_undef();
3232 }
3233 tx->regs.address = ureg_dst_undef();
3234 tx->regs.a0 = ureg_dst_undef();
3235 tx->regs.p = ureg_dst_undef();
3236 tx->regs.oDepth = ureg_dst_undef();
3237 tx->regs.vPos = ureg_src_undef();
3238 tx->regs.vFace = ureg_src_undef();
3239 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3240 tx->regs.o[i] = ureg_dst_undef();
3241 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3242 tx->regs.oCol[i] = ureg_dst_undef();
3243 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3244 tx->regs.vC[i] = ureg_src_undef();
3245 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3246 tx->regs.vT[i] = ureg_src_undef();
3247
3248 for (i = 0; i < ARRAY_SIZE(tx->lconsti); ++i)
3249 tx->lconsti[i].idx = -1;
3250 for (i = 0; i < ARRAY_SIZE(tx->lconstb); ++i)
3251 tx->lconstb[i].idx = -1;
3252
3253 sm1_read_version(tx);
3254
3255 info->version = (tx->version.major << 4) | tx->version.minor;
3256
3257 create_op_info_map(tx);
3258 }
3259
3260 static void
3261 tx_dtor(struct shader_translator *tx)
3262 {
3263 if (tx->num_inst_labels)
3264 FREE(tx->inst_labels);
3265 FREE(tx->lconstf);
3266 FREE(tx->regs.r);
3267 FREE(tx);
3268 }
3269
3270 static void
3271 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3272 {
3273 struct ureg_program *ureg = tx->ureg;
3274 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3275 struct ureg_src fog_end, fog_coeff, fog_density;
3276 struct ureg_src fog_vs, depth, fog_color;
3277 struct ureg_dst fog_factor;
3278
3279 if (!tx->info->fog_enable) {
3280 ureg_MOV(ureg, oCol0, src_col);
3281 return;
3282 }
3283
3284 if (tx->info->fog_mode != D3DFOG_NONE) {
3285 depth = nine_get_position_input(tx);
3286 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3287 }
3288
3289 nine_info_mark_const_f_used(tx->info, 33);
3290 fog_color = NINE_CONSTANT_SRC(32);
3291 fog_factor = tx_scratch_scalar(tx);
3292
3293 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3294 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3295 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3296 ureg_SUB(ureg, fog_factor, fog_end, depth);
3297 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3298 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3299 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3300 ureg_MUL(ureg, fog_factor, depth, fog_density);
3301 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3302 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3303 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3304 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3305 ureg_MUL(ureg, fog_factor, depth, fog_density);
3306 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3307 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3308 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3309 } else {
3310 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3311 TGSI_INTERPOLATE_PERSPECTIVE),
3312 TGSI_SWIZZLE_X);
3313 ureg_MOV(ureg, fog_factor, fog_vs);
3314 }
3315
3316 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3317 tx_src_scalar(fog_factor), src_col, fog_color);
3318 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3319 }
3320
3321 #define GET_CAP(n) device->screen->get_param( \
3322 device->screen, PIPE_CAP_##n)
3323 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3324 device->screen, info->type, PIPE_SHADER_CAP_##n)
3325
3326 HRESULT
3327 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3328 {
3329 struct shader_translator *tx;
3330 HRESULT hr = D3D_OK;
3331 const unsigned processor = info->type;
3332 unsigned s, slot_max;
3333 unsigned max_const_f;
3334
3335 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3336
3337 tx = CALLOC_STRUCT(shader_translator);
3338 if (!tx)
3339 return E_OUTOFMEMORY;
3340 tx_ctor(tx, info);
3341
3342 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3343 hr = D3DERR_INVALIDCALL;
3344 DBG("Unsupported shader version: %u.%u !\n",
3345 tx->version.major, tx->version.minor);
3346 goto out;
3347 }
3348 if (tx->processor != processor) {
3349 hr = D3DERR_INVALIDCALL;
3350 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3351 goto out;
3352 }
3353 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3354 tx->version.major, tx->version.minor);
3355
3356 tx->ureg = ureg_create(processor);
3357 if (!tx->ureg) {
3358 hr = E_OUTOFMEMORY;
3359 goto out;
3360 }
3361
3362 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3363 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3364 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3365 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3366 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3367 tx->texcoord_sn = tx->want_texcoord ?
3368 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3369 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3370 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3371
3372 if (IS_VS) {
3373 tx->num_constf_allowed = NINE_MAX_CONST_F;
3374 } else if (tx->version.major < 2) {/* IS_PS v1 */
3375 tx->num_constf_allowed = 8;
3376 } else if (tx->version.major == 2) {/* IS_PS v2 */
3377 tx->num_constf_allowed = 32;
3378 } else {/* IS_PS v3 */
3379 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3380 }
3381
3382 if (tx->version.major < 2) {
3383 tx->num_consti_allowed = 0;
3384 tx->num_constb_allowed = 0;
3385 } else {
3386 tx->num_consti_allowed = NINE_MAX_CONST_I;
3387 tx->num_constb_allowed = NINE_MAX_CONST_B;
3388 }
3389
3390 /* VS must always write position. Declare it here to make it the 1st output.
3391 * (Some drivers like nv50 are buggy and rely on that.)
3392 */
3393 if (IS_VS) {
3394 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3395 } else {
3396 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3397 if (!tx->shift_wpos)
3398 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3399 }
3400
3401 while (!sm1_parse_eof(tx) && !tx->failure)
3402 sm1_parse_instruction(tx);
3403 tx->parse++; /* for byte_size */
3404
3405 if (tx->failure) {
3406 ERR("Encountered buggy shader\n");
3407 ureg_destroy(tx->ureg);
3408 hr = D3DERR_INVALIDCALL;
3409 goto out;
3410 }
3411
3412 if (IS_PS && tx->version.major < 3) {
3413 if (tx->version.major < 2) {
3414 assert(tx->num_temp); /* there must be color output */
3415 info->rt_mask |= 0x1;
3416 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3417 } else {
3418 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3419 }
3420 }
3421
3422 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3423 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3424 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3425 }
3426
3427 if (info->position_t)
3428 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3429
3430 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3431 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3432 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3433 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3434 info->point_size = TRUE;
3435 }
3436
3437 ureg_END(tx->ureg);
3438
3439 /* record local constants */
3440 if (tx->num_lconstf && tx->indirect_const_access) {
3441 struct nine_range *ranges;
3442 float *data;
3443 int *indices;
3444 unsigned i, k, n;
3445
3446 hr = E_OUTOFMEMORY;
3447
3448 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3449 if (!data)
3450 goto out;
3451 info->lconstf.data = data;
3452
3453 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3454 if (!indices)
3455 goto out;
3456
3457 /* lazy sort, num_lconstf should be small */
3458 for (n = 0; n < tx->num_lconstf; ++n) {
3459 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3460 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3461 k = i;
3462 }
3463 indices[n] = tx->lconstf[k].idx;
3464 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3465 tx->lconstf[k].idx = INT_MAX;
3466 }
3467
3468 /* count ranges */
3469 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3470 if (indices[i] != indices[i - 1] + 1)
3471 ++n;
3472 ranges = MALLOC(n * sizeof(ranges[0]));
3473 if (!ranges) {
3474 FREE(indices);
3475 goto out;
3476 }
3477 info->lconstf.ranges = ranges;
3478
3479 k = 0;
3480 ranges[k].bgn = indices[0];
3481 for (i = 1; i < tx->num_lconstf; ++i) {
3482 if (indices[i] != indices[i - 1] + 1) {
3483 ranges[k].next = &ranges[k + 1];
3484 ranges[k].end = indices[i - 1] + 1;
3485 ++k;
3486 ranges[k].bgn = indices[i];
3487 }
3488 }
3489 ranges[k].end = indices[i - 1] + 1;
3490 ranges[k].next = NULL;
3491 assert(n == (k + 1));
3492
3493 FREE(indices);
3494 hr = D3D_OK;
3495 }
3496
3497 /* r500 */
3498 if (info->const_float_slots > device->max_vs_const_f &&
3499 (info->const_int_slots || info->const_bool_slots))
3500 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3501
3502
3503 if (tx->indirect_const_access) /* vs only */
3504 info->const_float_slots = device->max_vs_const_f;
3505
3506 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3507 slot_max = info->const_bool_slots > 0 ?
3508 max_const_f + NINE_MAX_CONST_I
3509 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3510 info->const_int_slots > 0 ?
3511 max_const_f + info->const_int_slots :
3512 info->const_float_slots;
3513
3514 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3515
3516 for (s = 0; s < slot_max; s++)
3517 ureg_DECL_constant(tx->ureg, s);
3518
3519 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3520 unsigned count;
3521 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3522 tgsi_dump(toks, 0);
3523 ureg_free_tokens(toks);
3524 }
3525
3526 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3527 if (!info->cso) {
3528 hr = D3DERR_DRIVERINTERNALERROR;
3529 FREE(info->lconstf.data);
3530 FREE(info->lconstf.ranges);
3531 goto out;
3532 }
3533
3534 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3535 out:
3536 tx_dtor(tx);
3537 return hr;
3538 }