gallium: remove helpers converting to/from TGSI_PROCESSOR_*
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
100
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
104
105 /*
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
112 * X2 = PS 1.4 (2x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
119 */
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
134
135 static const char *sm1_mod_str[] =
136 {
137 [NINED3DSPSM_NONE] = "",
138 [NINED3DSPSM_NEG] = "-",
139 [NINED3DSPSM_BIAS] = "bias",
140 [NINED3DSPSM_BIASNEG] = "biasneg",
141 [NINED3DSPSM_SIGN] = "sign",
142 [NINED3DSPSM_SIGNNEG] = "signneg",
143 [NINED3DSPSM_COMP] = "comp",
144 [NINED3DSPSM_X2] = "x2",
145 [NINED3DSPSM_X2NEG] = "x2neg",
146 [NINED3DSPSM_DZ] = "dz",
147 [NINED3DSPSM_DW] = "dw",
148 [NINED3DSPSM_ABS] = "abs",
149 [NINED3DSPSM_ABSNEG] = "-abs",
150 [NINED3DSPSM_NOT] = "not"
151 };
152
153 static void
154 sm1_dump_writemask(BYTE mask)
155 {
156 if (mask & 1) DUMP("x"); else DUMP("_");
157 if (mask & 2) DUMP("y"); else DUMP("_");
158 if (mask & 4) DUMP("z"); else DUMP("_");
159 if (mask & 8) DUMP("w"); else DUMP("_");
160 }
161
162 static void
163 sm1_dump_swizzle(BYTE s)
164 {
165 char c[4] = { 'x', 'y', 'z', 'w' };
166 DUMP("%c%c%c%c",
167 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
168 }
169
170 static const char sm1_file_char[] =
171 {
172 [D3DSPR_TEMP] = 'r',
173 [D3DSPR_INPUT] = 'v',
174 [D3DSPR_CONST] = 'c',
175 [D3DSPR_ADDR] = 'A',
176 [D3DSPR_RASTOUT] = 'R',
177 [D3DSPR_ATTROUT] = 'D',
178 [D3DSPR_OUTPUT] = 'o',
179 [D3DSPR_CONSTINT] = 'I',
180 [D3DSPR_COLOROUT] = 'C',
181 [D3DSPR_DEPTHOUT] = 'D',
182 [D3DSPR_SAMPLER] = 's',
183 [D3DSPR_CONST2] = 'c',
184 [D3DSPR_CONST3] = 'c',
185 [D3DSPR_CONST4] = 'c',
186 [D3DSPR_CONSTBOOL] = 'B',
187 [D3DSPR_LOOP] = 'L',
188 [D3DSPR_TEMPFLOAT16] = 'h',
189 [D3DSPR_MISCTYPE] = 'M',
190 [D3DSPR_LABEL] = 'X',
191 [D3DSPR_PREDICATE] = 'p'
192 };
193
194 static void
195 sm1_dump_reg(BYTE file, INT index)
196 {
197 switch (file) {
198 case D3DSPR_LOOP:
199 DUMP("aL");
200 break;
201 case D3DSPR_COLOROUT:
202 DUMP("oC%i", index);
203 break;
204 case D3DSPR_DEPTHOUT:
205 DUMP("oDepth");
206 break;
207 case D3DSPR_RASTOUT:
208 DUMP("oRast%i", index);
209 break;
210 case D3DSPR_CONSTINT:
211 DUMP("iconst[%i]", index);
212 break;
213 case D3DSPR_CONSTBOOL:
214 DUMP("bconst[%i]", index);
215 break;
216 default:
217 DUMP("%c%i", sm1_file_char[file], index);
218 break;
219 }
220 }
221
222 struct sm1_src_param
223 {
224 INT idx;
225 struct sm1_src_param *rel;
226 BYTE file;
227 BYTE swizzle;
228 BYTE mod;
229 BYTE type;
230 union {
231 DWORD d[4];
232 float f[4];
233 int i[4];
234 BOOL b;
235 } imm;
236 };
237 static void
238 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
240 struct sm1_dst_param
241 {
242 INT idx;
243 struct sm1_src_param *rel;
244 BYTE file;
245 BYTE mask;
246 BYTE mod;
247 int8_t shift; /* sint4 */
248 BYTE type;
249 };
250
251 static inline void
252 assert_replicate_swizzle(const struct ureg_src *reg)
253 {
254 assert(reg->SwizzleY == reg->SwizzleX &&
255 reg->SwizzleZ == reg->SwizzleX &&
256 reg->SwizzleW == reg->SwizzleX);
257 }
258
259 static void
260 sm1_dump_immediate(const struct sm1_src_param *param)
261 {
262 switch (param->type) {
263 case NINED3DSPTYPE_FLOAT4:
264 DUMP("{ %f %f %f %f }",
265 param->imm.f[0], param->imm.f[1],
266 param->imm.f[2], param->imm.f[3]);
267 break;
268 case NINED3DSPTYPE_INT4:
269 DUMP("{ %i %i %i %i }",
270 param->imm.i[0], param->imm.i[1],
271 param->imm.i[2], param->imm.i[3]);
272 break;
273 case NINED3DSPTYPE_BOOL:
274 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
275 break;
276 default:
277 assert(0);
278 break;
279 }
280 }
281
282 static void
283 sm1_dump_src_param(const struct sm1_src_param *param)
284 {
285 if (param->file == NINED3DSPR_IMMEDIATE) {
286 assert(!param->mod &&
287 !param->rel &&
288 param->swizzle == NINED3DSP_NOSWIZZLE);
289 sm1_dump_immediate(param);
290 return;
291 }
292
293 if (param->mod)
294 DUMP("%s(", sm1_mod_str[param->mod]);
295 if (param->rel) {
296 DUMP("%c[", sm1_file_char[param->file]);
297 sm1_dump_src_param(param->rel);
298 DUMP("+%i]", param->idx);
299 } else {
300 sm1_dump_reg(param->file, param->idx);
301 }
302 if (param->mod)
303 DUMP(")");
304 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
305 DUMP(".");
306 sm1_dump_swizzle(param->swizzle);
307 }
308 }
309
310 static void
311 sm1_dump_dst_param(const struct sm1_dst_param *param)
312 {
313 if (param->mod & NINED3DSPDM_SATURATE)
314 DUMP("sat ");
315 if (param->mod & NINED3DSPDM_PARTIALP)
316 DUMP("pp ");
317 if (param->mod & NINED3DSPDM_CENTROID)
318 DUMP("centroid ");
319 if (param->shift < 0)
320 DUMP("/%u ", 1 << -param->shift);
321 if (param->shift > 0)
322 DUMP("*%u ", 1 << param->shift);
323
324 if (param->rel) {
325 DUMP("%c[", sm1_file_char[param->file]);
326 sm1_dump_src_param(param->rel);
327 DUMP("+%i]", param->idx);
328 } else {
329 sm1_dump_reg(param->file, param->idx);
330 }
331 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
332 DUMP(".");
333 sm1_dump_writemask(param->mask);
334 }
335 }
336
337 struct sm1_semantic
338 {
339 struct sm1_dst_param reg;
340 BYTE sampler_type;
341 D3DDECLUSAGE usage;
342 BYTE usage_idx;
343 };
344
345 struct sm1_op_info
346 {
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
349 unsigned sio;
350 unsigned opcode; /* TGSI_OPCODE_x */
351
352 /* versions are still set even handler is set */
353 struct {
354 unsigned min;
355 unsigned max;
356 } vert_version, frag_version;
357
358 /* number of regs parsed outside of special handler */
359 unsigned ndst;
360 unsigned nsrc;
361
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler;
364 };
365
366 struct sm1_instruction
367 {
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
369 BYTE flags;
370 BOOL coissue;
371 BOOL predicated;
372 BYTE ndst;
373 BYTE nsrc;
374 struct sm1_src_param src[4];
375 struct sm1_src_param src_rel[4];
376 struct sm1_src_param pred;
377 struct sm1_src_param dst_rel[1];
378 struct sm1_dst_param dst[1];
379
380 struct sm1_op_info *info;
381 };
382
383 static void
384 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
385 {
386 unsigned i;
387
388 /* no info stored for these: */
389 if (insn->opcode == D3DSIO_DCL)
390 return;
391 for (i = 0; i < indent; ++i)
392 DUMP(" ");
393
394 if (insn->predicated) {
395 DUMP("@");
396 sm1_dump_src_param(&insn->pred);
397 DUMP(" ");
398 }
399 DUMP("%s", d3dsio_to_string(insn->opcode));
400 if (insn->flags) {
401 switch (insn->opcode) {
402 case D3DSIO_TEX:
403 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
404 break;
405 default:
406 DUMP("_%x", insn->flags);
407 break;
408 }
409 }
410 if (insn->coissue)
411 DUMP("_co");
412 DUMP(" ");
413
414 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
415 sm1_dump_dst_param(&insn->dst[i]);
416 DUMP(" ");
417 }
418
419 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
420 sm1_dump_src_param(&insn->src[i]);
421 DUMP(" ");
422 }
423 if (insn->opcode == D3DSIO_DEF ||
424 insn->opcode == D3DSIO_DEFI ||
425 insn->opcode == D3DSIO_DEFB)
426 sm1_dump_immediate(&insn->src[0]);
427
428 DUMP("\n");
429 }
430
431 struct sm1_local_const
432 {
433 INT idx;
434 struct ureg_src reg;
435 union {
436 boolean b;
437 float f[4];
438 int32_t i[4];
439 } imm;
440 };
441
442 struct shader_translator
443 {
444 const DWORD *byte_code;
445 const DWORD *parse;
446 const DWORD *parse_next;
447
448 struct ureg_program *ureg;
449
450 /* shader version */
451 struct {
452 BYTE major;
453 BYTE minor;
454 } version;
455 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
456 unsigned num_constf_allowed;
457 unsigned num_consti_allowed;
458 unsigned num_constb_allowed;
459
460 boolean native_integers;
461 boolean inline_subroutines;
462 boolean lower_preds;
463 boolean want_texcoord;
464 boolean shift_wpos;
465 boolean wpos_is_sysval;
466 boolean face_is_sysval_integer;
467 unsigned texcoord_sn;
468
469 struct sm1_instruction insn; /* current instruction */
470
471 struct {
472 struct ureg_dst *r;
473 struct ureg_dst oPos;
474 struct ureg_dst oFog;
475 struct ureg_dst oPts;
476 struct ureg_dst oCol[4];
477 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
478 struct ureg_dst oDepth;
479 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
480 struct ureg_src vPos;
481 struct ureg_src vFace;
482 struct ureg_src s;
483 struct ureg_dst p;
484 struct ureg_dst address;
485 struct ureg_dst a0;
486 struct ureg_dst tS[8]; /* texture stage registers */
487 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
488 struct ureg_dst t[5]; /* scratch TEMPs */
489 struct ureg_src vC[2]; /* PS color in */
490 struct ureg_src vT[8]; /* PS texcoord in */
491 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
492 } regs;
493 unsigned num_temp; /* Elements(regs.r) */
494 unsigned num_scratch;
495 unsigned loop_depth;
496 unsigned loop_depth_max;
497 unsigned cond_depth;
498 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
499 unsigned cond_labels[NINE_MAX_COND_DEPTH];
500 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
501
502 unsigned *inst_labels; /* LABEL op */
503 unsigned num_inst_labels;
504
505 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
506
507 struct sm1_local_const *lconstf;
508 unsigned num_lconstf;
509 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
510 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
511
512 boolean indirect_const_access;
513 boolean failure;
514
515 struct nine_shader_info *info;
516
517 int16_t op_info_map[D3DSIO_BREAKP + 1];
518 };
519
520 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
521 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
522
523 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
524
525 static void
526 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
527
528 static void
529 sm1_instruction_check(const struct sm1_instruction *insn)
530 {
531 if (insn->opcode == D3DSIO_CRS)
532 {
533 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
534 {
535 DBG("CRS.mask.w\n");
536 }
537 }
538 }
539
540 static boolean
541 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
542 {
543 INT i;
544 if (index < 0 || index >= tx->num_constf_allowed) {
545 tx->failure = TRUE;
546 return FALSE;
547 }
548 for (i = 0; i < tx->num_lconstf; ++i) {
549 if (tx->lconstf[i].idx == index) {
550 *src = tx->lconstf[i].reg;
551 return TRUE;
552 }
553 }
554 return FALSE;
555 }
556 static boolean
557 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
558 {
559 if (index < 0 || index >= tx->num_consti_allowed) {
560 tx->failure = TRUE;
561 return FALSE;
562 }
563 if (tx->lconsti[index].idx == index)
564 *src = tx->lconsti[index].reg;
565 return tx->lconsti[index].idx == index;
566 }
567 static boolean
568 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
569 {
570 if (index < 0 || index >= tx->num_constb_allowed) {
571 tx->failure = TRUE;
572 return FALSE;
573 }
574 if (tx->lconstb[index].idx == index)
575 *src = tx->lconstb[index].reg;
576 return tx->lconstb[index].idx == index;
577 }
578
579 static void
580 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
581 {
582 unsigned n;
583
584 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
585
586 for (n = 0; n < tx->num_lconstf; ++n)
587 if (tx->lconstf[n].idx == index)
588 break;
589 if (n == tx->num_lconstf) {
590 if ((n % 8) == 0) {
591 tx->lconstf = REALLOC(tx->lconstf,
592 (n + 0) * sizeof(tx->lconstf[0]),
593 (n + 8) * sizeof(tx->lconstf[0]));
594 assert(tx->lconstf);
595 }
596 tx->num_lconstf++;
597 }
598 tx->lconstf[n].idx = index;
599 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
600
601 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
602 }
603 static void
604 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
605 {
606 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
607 tx->lconsti[index].idx = index;
608 tx->lconsti[index].reg = tx->native_integers ?
609 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
610 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
611 }
612 static void
613 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
614 {
615 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
616 tx->lconstb[index].idx = index;
617 tx->lconstb[index].reg = tx->native_integers ?
618 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
619 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
620 }
621
622 static inline struct ureg_dst
623 tx_scratch(struct shader_translator *tx)
624 {
625 if (tx->num_scratch >= Elements(tx->regs.t)) {
626 tx->failure = TRUE;
627 return tx->regs.t[0];
628 }
629 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
630 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
631 return tx->regs.t[tx->num_scratch++];
632 }
633
634 static inline struct ureg_dst
635 tx_scratch_scalar(struct shader_translator *tx)
636 {
637 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
638 }
639
640 static inline struct ureg_src
641 tx_src_scalar(struct ureg_dst dst)
642 {
643 struct ureg_src src = ureg_src(dst);
644 int c = ffs(dst.WriteMask) - 1;
645 if (dst.WriteMask == (1 << c))
646 src = ureg_scalar(src, c);
647 return src;
648 }
649
650 static inline void
651 tx_temp_alloc(struct shader_translator *tx, INT idx)
652 {
653 assert(idx >= 0);
654 if (idx >= tx->num_temp) {
655 unsigned k = tx->num_temp;
656 unsigned n = idx + 1;
657 tx->regs.r = REALLOC(tx->regs.r,
658 k * sizeof(tx->regs.r[0]),
659 n * sizeof(tx->regs.r[0]));
660 for (; k < n; ++k)
661 tx->regs.r[k] = ureg_dst_undef();
662 tx->num_temp = n;
663 }
664 if (ureg_dst_is_undef(tx->regs.r[idx]))
665 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
666 }
667
668 static inline void
669 tx_addr_alloc(struct shader_translator *tx, INT idx)
670 {
671 assert(idx == 0);
672 if (ureg_dst_is_undef(tx->regs.address))
673 tx->regs.address = ureg_DECL_address(tx->ureg);
674 if (ureg_dst_is_undef(tx->regs.a0))
675 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
676 }
677
678 static inline void
679 tx_pred_alloc(struct shader_translator *tx, INT idx)
680 {
681 assert(idx == 0);
682 if (ureg_dst_is_undef(tx->regs.p))
683 tx->regs.p = ureg_DECL_predicate(tx->ureg);
684 }
685
686 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
687 * the projection should be applied on the texture. It doesn't
688 * apply on texkill.
689 * The doc is very imprecise here (it says the projection is done
690 * before rasterization, thus in vs, which seems wrong since ps instructions
691 * are affected differently)
692 * For now we only apply to the ps TEX instruction and TEXBEM.
693 * Perhaps some other instructions would need it */
694 static inline void
695 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
696 struct ureg_src src, INT idx)
697 {
698 struct ureg_dst tmp;
699 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
700
701 /* no projection */
702 if (dim == 1) {
703 ureg_MOV(tx->ureg, dst, src);
704 } else {
705 tmp = tx_scratch_scalar(tx);
706 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
707 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
708 }
709 }
710
711 static inline void
712 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
713 unsigned target, struct ureg_src src0,
714 struct ureg_src src1, INT idx)
715 {
716 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
717 struct ureg_dst tmp;
718
719 /* dim == 1: no projection
720 * Looks like must be disabled when it makes no
721 * sense according the texture dimensions
722 */
723 if (dim == 1 || dim <= target) {
724 ureg_TEX(tx->ureg, dst, target, src0, src1);
725 } else if (dim == 4) {
726 ureg_TXP(tx->ureg, dst, target, src0, src1);
727 } else {
728 tmp = tx_scratch(tx);
729 apply_ps1x_projection(tx, tmp, src0, idx);
730 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
731 }
732 }
733
734 static inline void
735 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
736 {
737 assert(IS_PS);
738 assert(idx >= 0 && idx < Elements(tx->regs.vT));
739 if (ureg_src_is_undef(tx->regs.vT[idx]))
740 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
741 TGSI_INTERPOLATE_PERSPECTIVE);
742 }
743
744 static inline unsigned *
745 tx_bgnloop(struct shader_translator *tx)
746 {
747 tx->loop_depth++;
748 if (tx->loop_depth_max < tx->loop_depth)
749 tx->loop_depth_max = tx->loop_depth;
750 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
751 return &tx->loop_labels[tx->loop_depth - 1];
752 }
753
754 static inline unsigned *
755 tx_endloop(struct shader_translator *tx)
756 {
757 assert(tx->loop_depth);
758 tx->loop_depth--;
759 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
760 ureg_get_instruction_number(tx->ureg));
761 return &tx->loop_labels[tx->loop_depth];
762 }
763
764 static struct ureg_dst
765 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
766 {
767 const unsigned l = tx->loop_depth - 1;
768
769 if (!tx->loop_depth)
770 {
771 DBG("loop counter requested outside of loop\n");
772 return ureg_dst_undef();
773 }
774
775 if (ureg_dst_is_undef(tx->regs.rL[l])) {
776 /* loop or rep ctr creation */
777 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
778 tx->loop_or_rep[l] = loop_or_rep;
779 }
780 /* loop - rep - endloop - endrep not allowed */
781 assert(tx->loop_or_rep[l] == loop_or_rep);
782
783 return tx->regs.rL[l];
784 }
785
786 static struct ureg_src
787 tx_get_loopal(struct shader_translator *tx)
788 {
789 int loop_level = tx->loop_depth - 1;
790
791 while (loop_level >= 0) {
792 /* handle loop - rep - endrep - endloop case */
793 if (tx->loop_or_rep[loop_level])
794 /* the value is in the loop counter y component (nine implementation) */
795 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
796 loop_level--;
797 }
798
799 DBG("aL counter requested outside of loop\n");
800 return ureg_src_undef();
801 }
802
803 static inline unsigned *
804 tx_cond(struct shader_translator *tx)
805 {
806 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
807 tx->cond_depth++;
808 return &tx->cond_labels[tx->cond_depth - 1];
809 }
810
811 static inline unsigned *
812 tx_elsecond(struct shader_translator *tx)
813 {
814 assert(tx->cond_depth);
815 return &tx->cond_labels[tx->cond_depth - 1];
816 }
817
818 static inline void
819 tx_endcond(struct shader_translator *tx)
820 {
821 assert(tx->cond_depth);
822 tx->cond_depth--;
823 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
824 ureg_get_instruction_number(tx->ureg));
825 }
826
827 static inline struct ureg_dst
828 nine_ureg_dst_register(unsigned file, int index)
829 {
830 return ureg_dst(ureg_src_register(file, index));
831 }
832
833 static inline struct ureg_src
834 nine_get_position_input(struct shader_translator *tx)
835 {
836 struct ureg_program *ureg = tx->ureg;
837
838 if (tx->wpos_is_sysval)
839 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
840 else
841 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
842 0, TGSI_INTERPOLATE_LINEAR);
843 }
844
845 static struct ureg_src
846 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
847 {
848 struct ureg_program *ureg = tx->ureg;
849 struct ureg_src src;
850 struct ureg_dst tmp;
851
852 switch (param->file)
853 {
854 case D3DSPR_TEMP:
855 assert(!param->rel);
856 tx_temp_alloc(tx, param->idx);
857 src = ureg_src(tx->regs.r[param->idx]);
858 break;
859 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
860 case D3DSPR_ADDR:
861 assert(!param->rel);
862 if (IS_VS) {
863 assert(param->idx == 0);
864 /* the address register (vs only) must be
865 * assigned before use */
866 assert(!ureg_dst_is_undef(tx->regs.a0));
867 /* Round to lowest for vs1.1 (contrary to the doc), else
868 * round to nearest */
869 if (tx->version.major < 2 && tx->version.minor < 2)
870 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
871 else
872 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
873 src = ureg_src(tx->regs.address);
874 } else {
875 if (tx->version.major < 2 && tx->version.minor < 4) {
876 /* no subroutines, so should be defined */
877 src = ureg_src(tx->regs.tS[param->idx]);
878 } else {
879 tx_texcoord_alloc(tx, param->idx);
880 src = tx->regs.vT[param->idx];
881 }
882 }
883 break;
884 case D3DSPR_INPUT:
885 if (IS_VS) {
886 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
887 } else {
888 if (tx->version.major < 3) {
889 assert(!param->rel);
890 src = ureg_DECL_fs_input_cyl_centroid(
891 ureg, TGSI_SEMANTIC_COLOR, param->idx,
892 TGSI_INTERPOLATE_COLOR, 0,
893 tx->info->force_color_in_centroid ?
894 TGSI_INTERPOLATE_LOC_CENTROID : 0,
895 0, 1);
896 } else {
897 assert(!param->rel); /* TODO */
898 assert(param->idx < Elements(tx->regs.v));
899 src = tx->regs.v[param->idx];
900 }
901 }
902 break;
903 case D3DSPR_PREDICATE:
904 assert(!param->rel);
905 tx_pred_alloc(tx, param->idx);
906 src = ureg_src(tx->regs.p);
907 break;
908 case D3DSPR_SAMPLER:
909 assert(param->mod == NINED3DSPSM_NONE);
910 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
911 assert(!param->rel);
912 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
913 break;
914 case D3DSPR_CONST:
915 assert(!param->rel || IS_VS);
916 if (param->rel)
917 tx->indirect_const_access = TRUE;
918 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
919 if (!param->rel)
920 nine_info_mark_const_f_used(tx->info, param->idx);
921 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
922 }
923 if (!IS_VS && tx->version.major < 2) {
924 /* ps 1.X clamps constants */
925 tmp = tx_scratch(tx);
926 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
927 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
928 src = ureg_src(tmp);
929 }
930 break;
931 case D3DSPR_CONST2:
932 case D3DSPR_CONST3:
933 case D3DSPR_CONST4:
934 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
935 assert(!"CONST2/3/4");
936 src = ureg_imm1f(ureg, 0.0f);
937 break;
938 case D3DSPR_CONSTINT:
939 /* relative adressing only possible for float constants in vs */
940 assert(!param->rel);
941 if (!tx_lconsti(tx, &src, param->idx)) {
942 nine_info_mark_const_i_used(tx->info, param->idx);
943 src = ureg_src_register(TGSI_FILE_CONSTANT,
944 tx->info->const_i_base + param->idx);
945 }
946 break;
947 case D3DSPR_CONSTBOOL:
948 assert(!param->rel);
949 if (!tx_lconstb(tx, &src, param->idx)) {
950 char r = param->idx / 4;
951 char s = param->idx & 3;
952 nine_info_mark_const_b_used(tx->info, param->idx);
953 src = ureg_src_register(TGSI_FILE_CONSTANT,
954 tx->info->const_b_base + r);
955 src = ureg_swizzle(src, s, s, s, s);
956 }
957 break;
958 case D3DSPR_LOOP:
959 if (ureg_dst_is_undef(tx->regs.address))
960 tx->regs.address = ureg_DECL_address(ureg);
961 if (!tx->native_integers)
962 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
963 else
964 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
965 src = ureg_src(tx->regs.address);
966 break;
967 case D3DSPR_MISCTYPE:
968 switch (param->idx) {
969 case D3DSMO_POSITION:
970 if (ureg_src_is_undef(tx->regs.vPos))
971 tx->regs.vPos = nine_get_position_input(tx);
972 if (tx->shift_wpos) {
973 /* TODO: do this only once */
974 struct ureg_dst wpos = tx_scratch(tx);
975 ureg_SUB(ureg, wpos, tx->regs.vPos,
976 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
977 src = ureg_src(wpos);
978 } else {
979 src = tx->regs.vPos;
980 }
981 break;
982 case D3DSMO_FACE:
983 if (ureg_src_is_undef(tx->regs.vFace)) {
984 if (tx->face_is_sysval_integer) {
985 tmp = tx_scratch(tx);
986 tx->regs.vFace =
987 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
988
989 /* convert bool to float */
990 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
991 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
992 tx->regs.vFace = ureg_src(tmp);
993 } else {
994 tx->regs.vFace = ureg_DECL_fs_input(ureg,
995 TGSI_SEMANTIC_FACE, 0,
996 TGSI_INTERPOLATE_CONSTANT);
997 }
998 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
999 }
1000 src = tx->regs.vFace;
1001 break;
1002 default:
1003 assert(!"invalid src D3DSMO");
1004 break;
1005 }
1006 assert(!param->rel);
1007 break;
1008 case D3DSPR_TEMPFLOAT16:
1009 break;
1010 default:
1011 assert(!"invalid src D3DSPR");
1012 }
1013 if (param->rel)
1014 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1015
1016 switch (param->mod) {
1017 case NINED3DSPSM_DW:
1018 tmp = tx_scratch(tx);
1019 /* NOTE: app is not allowed to read w with this modifier */
1020 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1021 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1022 src = ureg_src(tmp);
1023 break;
1024 case NINED3DSPSM_DZ:
1025 tmp = tx_scratch(tx);
1026 /* NOTE: app is not allowed to read z with this modifier */
1027 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1028 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1029 src = ureg_src(tmp);
1030 break;
1031 default:
1032 break;
1033 }
1034
1035 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1036 src = ureg_swizzle(src,
1037 (param->swizzle >> 0) & 0x3,
1038 (param->swizzle >> 2) & 0x3,
1039 (param->swizzle >> 4) & 0x3,
1040 (param->swizzle >> 6) & 0x3);
1041
1042 switch (param->mod) {
1043 case NINED3DSPSM_ABS:
1044 src = ureg_abs(src);
1045 break;
1046 case NINED3DSPSM_ABSNEG:
1047 src = ureg_negate(ureg_abs(src));
1048 break;
1049 case NINED3DSPSM_NEG:
1050 src = ureg_negate(src);
1051 break;
1052 case NINED3DSPSM_BIAS:
1053 tmp = tx_scratch(tx);
1054 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
1055 src = ureg_src(tmp);
1056 break;
1057 case NINED3DSPSM_BIASNEG:
1058 tmp = tx_scratch(tx);
1059 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
1060 src = ureg_src(tmp);
1061 break;
1062 case NINED3DSPSM_NOT:
1063 if (tx->native_integers) {
1064 tmp = tx_scratch(tx);
1065 ureg_NOT(ureg, tmp, src);
1066 src = ureg_src(tmp);
1067 break;
1068 }
1069 /* fall through */
1070 case NINED3DSPSM_COMP:
1071 tmp = tx_scratch(tx);
1072 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1073 src = ureg_src(tmp);
1074 break;
1075 case NINED3DSPSM_DZ:
1076 case NINED3DSPSM_DW:
1077 /* Already handled*/
1078 break;
1079 case NINED3DSPSM_SIGN:
1080 tmp = tx_scratch(tx);
1081 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1082 src = ureg_src(tmp);
1083 break;
1084 case NINED3DSPSM_SIGNNEG:
1085 tmp = tx_scratch(tx);
1086 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1087 src = ureg_src(tmp);
1088 break;
1089 case NINED3DSPSM_X2:
1090 tmp = tx_scratch(tx);
1091 ureg_ADD(ureg, tmp, src, src);
1092 src = ureg_src(tmp);
1093 break;
1094 case NINED3DSPSM_X2NEG:
1095 tmp = tx_scratch(tx);
1096 ureg_ADD(ureg, tmp, src, src);
1097 src = ureg_negate(ureg_src(tmp));
1098 break;
1099 default:
1100 assert(param->mod == NINED3DSPSM_NONE);
1101 break;
1102 }
1103
1104 return src;
1105 }
1106
1107 static struct ureg_dst
1108 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1109 {
1110 struct ureg_dst dst;
1111
1112 switch (param->file)
1113 {
1114 case D3DSPR_TEMP:
1115 assert(!param->rel);
1116 tx_temp_alloc(tx, param->idx);
1117 dst = tx->regs.r[param->idx];
1118 break;
1119 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1120 case D3DSPR_ADDR:
1121 assert(!param->rel);
1122 if (tx->version.major < 2 && !IS_VS) {
1123 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1124 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1125 dst = tx->regs.tS[param->idx];
1126 } else
1127 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1128 tx_texcoord_alloc(tx, param->idx);
1129 dst = ureg_dst(tx->regs.vT[param->idx]);
1130 } else {
1131 tx_addr_alloc(tx, param->idx);
1132 dst = tx->regs.a0;
1133 }
1134 break;
1135 case D3DSPR_RASTOUT:
1136 assert(!param->rel);
1137 switch (param->idx) {
1138 case 0:
1139 if (ureg_dst_is_undef(tx->regs.oPos))
1140 tx->regs.oPos =
1141 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1142 dst = tx->regs.oPos;
1143 break;
1144 case 1:
1145 if (ureg_dst_is_undef(tx->regs.oFog))
1146 tx->regs.oFog =
1147 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1148 dst = tx->regs.oFog;
1149 break;
1150 case 2:
1151 if (ureg_dst_is_undef(tx->regs.oPts))
1152 tx->regs.oPts =
1153 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1154 dst = tx->regs.oPts;
1155 break;
1156 default:
1157 assert(0);
1158 break;
1159 }
1160 break;
1161 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1162 case D3DSPR_OUTPUT:
1163 if (tx->version.major < 3) {
1164 assert(!param->rel);
1165 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1166 } else {
1167 assert(!param->rel); /* TODO */
1168 assert(param->idx < Elements(tx->regs.o));
1169 dst = tx->regs.o[param->idx];
1170 }
1171 break;
1172 case D3DSPR_ATTROUT: /* VS */
1173 case D3DSPR_COLOROUT: /* PS */
1174 assert(param->idx >= 0 && param->idx < 4);
1175 assert(!param->rel);
1176 tx->info->rt_mask |= 1 << param->idx;
1177 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1178 /* ps < 3: oCol[0] will have fog blending afterward */
1179 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1180 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1181 } else {
1182 tx->regs.oCol[param->idx] =
1183 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1184 }
1185 }
1186 dst = tx->regs.oCol[param->idx];
1187 if (IS_VS && tx->version.major < 3)
1188 dst = ureg_saturate(dst);
1189 break;
1190 case D3DSPR_DEPTHOUT:
1191 assert(!param->rel);
1192 if (ureg_dst_is_undef(tx->regs.oDepth))
1193 tx->regs.oDepth =
1194 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1195 TGSI_WRITEMASK_Z, 0, 1);
1196 dst = tx->regs.oDepth; /* XXX: must write .z component */
1197 break;
1198 case D3DSPR_PREDICATE:
1199 assert(!param->rel);
1200 tx_pred_alloc(tx, param->idx);
1201 dst = tx->regs.p;
1202 break;
1203 case D3DSPR_TEMPFLOAT16:
1204 DBG("unhandled D3DSPR: %u\n", param->file);
1205 break;
1206 default:
1207 assert(!"invalid dst D3DSPR");
1208 break;
1209 }
1210 if (param->rel)
1211 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1212
1213 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1214 dst = ureg_writemask(dst, param->mask);
1215 if (param->mod & NINED3DSPDM_SATURATE)
1216 dst = ureg_saturate(dst);
1217
1218 return dst;
1219 }
1220
1221 static struct ureg_dst
1222 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1223 {
1224 if (param->shift) {
1225 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1226 return tx->regs.tdst;
1227 }
1228 return _tx_dst_param(tx, param);
1229 }
1230
1231 static void
1232 tx_apply_dst0_modifiers(struct shader_translator *tx)
1233 {
1234 struct ureg_dst rdst;
1235 float f;
1236
1237 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1238 return;
1239 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1240
1241 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1242
1243 if (tx->insn.dst[0].shift < 0)
1244 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1245 else
1246 f = 1 << tx->insn.dst[0].shift;
1247
1248 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1249 }
1250
1251 static struct ureg_src
1252 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1253 {
1254 struct ureg_src src;
1255
1256 assert(!param->shift);
1257 assert(!(param->mod & NINED3DSPDM_SATURATE));
1258
1259 switch (param->file) {
1260 case D3DSPR_INPUT:
1261 if (IS_VS) {
1262 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1263 } else {
1264 assert(!param->rel);
1265 assert(param->idx < Elements(tx->regs.v));
1266 src = tx->regs.v[param->idx];
1267 }
1268 break;
1269 default:
1270 src = ureg_src(tx_dst_param(tx, param));
1271 break;
1272 }
1273 if (param->rel)
1274 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1275
1276 if (!param->mask)
1277 WARN("mask is 0, using identity swizzle\n");
1278
1279 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1280 char s[4];
1281 int n;
1282 int c;
1283 for (n = 0, c = 0; c < 4; ++c)
1284 if (param->mask & (1 << c))
1285 s[n++] = c;
1286 assert(n);
1287 for (c = n; c < 4; ++c)
1288 s[c] = s[n - 1];
1289 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1290 }
1291 return src;
1292 }
1293
1294 static HRESULT
1295 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1296 {
1297 struct ureg_program *ureg = tx->ureg;
1298 struct ureg_dst dst;
1299 struct ureg_src src[2];
1300 struct sm1_src_param *src_mat = &tx->insn.src[1];
1301 unsigned i;
1302
1303 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1304 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1305
1306 for (i = 0; i < n; i++)
1307 {
1308 const unsigned m = (1 << i);
1309
1310 src[1] = tx_src_param(tx, src_mat);
1311 src_mat->idx++;
1312
1313 if (!(dst.WriteMask & m))
1314 continue;
1315
1316 /* XXX: src == dst case ? */
1317
1318 switch (k) {
1319 case 3:
1320 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1321 break;
1322 case 4:
1323 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1324 break;
1325 default:
1326 DBG("invalid operation: M%ux%u\n", m, n);
1327 break;
1328 }
1329 }
1330
1331 return D3D_OK;
1332 }
1333
1334 #define VNOTSUPPORTED 0, 0
1335 #define V(maj, min) (((maj) << 8) | (min))
1336
1337 static inline const char *
1338 d3dsio_to_string( unsigned opcode )
1339 {
1340 static const char *names[] = {
1341 "NOP",
1342 "MOV",
1343 "ADD",
1344 "SUB",
1345 "MAD",
1346 "MUL",
1347 "RCP",
1348 "RSQ",
1349 "DP3",
1350 "DP4",
1351 "MIN",
1352 "MAX",
1353 "SLT",
1354 "SGE",
1355 "EXP",
1356 "LOG",
1357 "LIT",
1358 "DST",
1359 "LRP",
1360 "FRC",
1361 "M4x4",
1362 "M4x3",
1363 "M3x4",
1364 "M3x3",
1365 "M3x2",
1366 "CALL",
1367 "CALLNZ",
1368 "LOOP",
1369 "RET",
1370 "ENDLOOP",
1371 "LABEL",
1372 "DCL",
1373 "POW",
1374 "CRS",
1375 "SGN",
1376 "ABS",
1377 "NRM",
1378 "SINCOS",
1379 "REP",
1380 "ENDREP",
1381 "IF",
1382 "IFC",
1383 "ELSE",
1384 "ENDIF",
1385 "BREAK",
1386 "BREAKC",
1387 "MOVA",
1388 "DEFB",
1389 "DEFI",
1390 NULL,
1391 NULL,
1392 NULL,
1393 NULL,
1394 NULL,
1395 NULL,
1396 NULL,
1397 NULL,
1398 NULL,
1399 NULL,
1400 NULL,
1401 NULL,
1402 NULL,
1403 NULL,
1404 NULL,
1405 "TEXCOORD",
1406 "TEXKILL",
1407 "TEX",
1408 "TEXBEM",
1409 "TEXBEML",
1410 "TEXREG2AR",
1411 "TEXREG2GB",
1412 "TEXM3x2PAD",
1413 "TEXM3x2TEX",
1414 "TEXM3x3PAD",
1415 "TEXM3x3TEX",
1416 NULL,
1417 "TEXM3x3SPEC",
1418 "TEXM3x3VSPEC",
1419 "EXPP",
1420 "LOGP",
1421 "CND",
1422 "DEF",
1423 "TEXREG2RGB",
1424 "TEXDP3TEX",
1425 "TEXM3x2DEPTH",
1426 "TEXDP3",
1427 "TEXM3x3",
1428 "TEXDEPTH",
1429 "CMP",
1430 "BEM",
1431 "DP2ADD",
1432 "DSX",
1433 "DSY",
1434 "TEXLDD",
1435 "SETP",
1436 "TEXLDL",
1437 "BREAKP"
1438 };
1439
1440 if (opcode < Elements(names)) return names[opcode];
1441
1442 switch (opcode) {
1443 case D3DSIO_PHASE: return "PHASE";
1444 case D3DSIO_COMMENT: return "COMMENT";
1445 case D3DSIO_END: return "END";
1446 default:
1447 return NULL;
1448 }
1449 }
1450
1451 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1452 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1453 (inst).vert_version.max | \
1454 (inst).frag_version.min | \
1455 (inst).frag_version.max)
1456
1457 #define SPECIAL(name) \
1458 NineTranslateInstruction_##name
1459
1460 #define DECL_SPECIAL(name) \
1461 static HRESULT \
1462 NineTranslateInstruction_##name( struct shader_translator *tx )
1463
1464 static HRESULT
1465 NineTranslateInstruction_Generic(struct shader_translator *);
1466
1467 DECL_SPECIAL(M4x4)
1468 {
1469 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1470 }
1471
1472 DECL_SPECIAL(M4x3)
1473 {
1474 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1475 }
1476
1477 DECL_SPECIAL(M3x4)
1478 {
1479 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1480 }
1481
1482 DECL_SPECIAL(M3x3)
1483 {
1484 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1485 }
1486
1487 DECL_SPECIAL(M3x2)
1488 {
1489 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1490 }
1491
1492 DECL_SPECIAL(CMP)
1493 {
1494 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1495 tx_src_param(tx, &tx->insn.src[0]),
1496 tx_src_param(tx, &tx->insn.src[2]),
1497 tx_src_param(tx, &tx->insn.src[1]));
1498 return D3D_OK;
1499 }
1500
1501 DECL_SPECIAL(CND)
1502 {
1503 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1504 struct ureg_dst cgt;
1505 struct ureg_src cnd;
1506
1507 /* the coissue flag was a tip for compilers to advise to
1508 * execute two operations at the same time, in cases
1509 * the two executions had same dst with different channels.
1510 * It has no effect on current hw. However it seems CND
1511 * is affected. The handling of this very specific case
1512 * handled below mimick wine behaviour */
1513 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1514 ureg_MOV(tx->ureg,
1515 dst, tx_src_param(tx, &tx->insn.src[1]));
1516 return D3D_OK;
1517 }
1518
1519 cnd = tx_src_param(tx, &tx->insn.src[0]);
1520 cgt = tx_scratch(tx);
1521
1522 if (tx->version.major == 1 && tx->version.minor < 4)
1523 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1524
1525 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1526
1527 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1528 tx_src_param(tx, &tx->insn.src[1]),
1529 tx_src_param(tx, &tx->insn.src[2]));
1530 return D3D_OK;
1531 }
1532
1533 DECL_SPECIAL(CALL)
1534 {
1535 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1536 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1537 return D3D_OK;
1538 }
1539
1540 DECL_SPECIAL(CALLNZ)
1541 {
1542 struct ureg_program *ureg = tx->ureg;
1543 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1544
1545 if (!tx->native_integers)
1546 ureg_IF(ureg, src, tx_cond(tx));
1547 else
1548 ureg_UIF(ureg, src, tx_cond(tx));
1549 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1550 tx_endcond(tx);
1551 ureg_ENDIF(ureg);
1552 return D3D_OK;
1553 }
1554
1555 DECL_SPECIAL(LOOP)
1556 {
1557 struct ureg_program *ureg = tx->ureg;
1558 unsigned *label;
1559 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1560 struct ureg_dst ctr;
1561 struct ureg_dst tmp;
1562 struct ureg_src ctrx;
1563
1564 label = tx_bgnloop(tx);
1565 ctr = tx_get_loopctr(tx, TRUE);
1566 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1567
1568 /* src: num_iterations - start_value of al - step for al - 0 */
1569 ureg_MOV(ureg, ctr, src);
1570 ureg_BGNLOOP(tx->ureg, label);
1571 tmp = tx_scratch_scalar(tx);
1572 /* Initially ctr.x contains the number of iterations.
1573 * ctr.y will contain the updated value of al.
1574 * We decrease ctr.x at the end of every iteration,
1575 * and stop when it reaches 0. */
1576
1577 if (!tx->native_integers) {
1578 /* case src and ctr contain floats */
1579 /* to avoid precision issue, we stop when ctr <= 0.5 */
1580 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1581 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1582 } else {
1583 /* case src and ctr contain integers */
1584 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1585 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1586 }
1587 ureg_BRK(ureg);
1588 tx_endcond(tx);
1589 ureg_ENDIF(ureg);
1590 return D3D_OK;
1591 }
1592
1593 DECL_SPECIAL(RET)
1594 {
1595 ureg_RET(tx->ureg);
1596 return D3D_OK;
1597 }
1598
1599 DECL_SPECIAL(ENDLOOP)
1600 {
1601 struct ureg_program *ureg = tx->ureg;
1602 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1603 struct ureg_dst dst_ctrx, dst_al;
1604 struct ureg_src src_ctr, al_counter;
1605
1606 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1607 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1608 src_ctr = ureg_src(ctr);
1609 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1610
1611 /* ctr.x -= 1
1612 * ctr.y (aL) += step */
1613 if (!tx->native_integers) {
1614 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1615 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1616 } else {
1617 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1618 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1619 }
1620 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1621 return D3D_OK;
1622 }
1623
1624 DECL_SPECIAL(LABEL)
1625 {
1626 unsigned k = tx->num_inst_labels;
1627 unsigned n = tx->insn.src[0].idx;
1628 assert(n < 2048);
1629 if (n >= k)
1630 tx->inst_labels = REALLOC(tx->inst_labels,
1631 k * sizeof(tx->inst_labels[0]),
1632 n * sizeof(tx->inst_labels[0]));
1633
1634 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1635 return D3D_OK;
1636 }
1637
1638 DECL_SPECIAL(SINCOS)
1639 {
1640 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1641 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1642
1643 assert(!(dst.WriteMask & 0xc));
1644
1645 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1646 ureg_SCS(tx->ureg, dst, src);
1647 return D3D_OK;
1648 }
1649
1650 DECL_SPECIAL(SGN)
1651 {
1652 ureg_SSG(tx->ureg,
1653 tx_dst_param(tx, &tx->insn.dst[0]),
1654 tx_src_param(tx, &tx->insn.src[0]));
1655 return D3D_OK;
1656 }
1657
1658 DECL_SPECIAL(REP)
1659 {
1660 struct ureg_program *ureg = tx->ureg;
1661 unsigned *label;
1662 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1663 struct ureg_dst ctr;
1664 struct ureg_dst tmp;
1665 struct ureg_src ctrx;
1666
1667 label = tx_bgnloop(tx);
1668 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1669 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1670
1671 /* NOTE: rep must be constant, so we don't have to save the count */
1672 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1673
1674 /* rep: num_iterations - 0 - 0 - 0 */
1675 ureg_MOV(ureg, ctr, rep);
1676 ureg_BGNLOOP(ureg, label);
1677 tmp = tx_scratch_scalar(tx);
1678 /* Initially ctr.x contains the number of iterations.
1679 * We decrease ctr.x at the end of every iteration,
1680 * and stop when it reaches 0. */
1681
1682 if (!tx->native_integers) {
1683 /* case src and ctr contain floats */
1684 /* to avoid precision issue, we stop when ctr <= 0.5 */
1685 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1686 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1687 } else {
1688 /* case src and ctr contain integers */
1689 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1690 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1691 }
1692 ureg_BRK(ureg);
1693 tx_endcond(tx);
1694 ureg_ENDIF(ureg);
1695
1696 return D3D_OK;
1697 }
1698
1699 DECL_SPECIAL(ENDREP)
1700 {
1701 struct ureg_program *ureg = tx->ureg;
1702 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1703 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1704 struct ureg_src src_ctr = ureg_src(ctr);
1705
1706 /* ctr.x -= 1 */
1707 if (!tx->native_integers)
1708 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1709 else
1710 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1711
1712 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1713 return D3D_OK;
1714 }
1715
1716 DECL_SPECIAL(ENDIF)
1717 {
1718 tx_endcond(tx);
1719 ureg_ENDIF(tx->ureg);
1720 return D3D_OK;
1721 }
1722
1723 DECL_SPECIAL(IF)
1724 {
1725 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1726
1727 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1728 ureg_UIF(tx->ureg, src, tx_cond(tx));
1729 else
1730 ureg_IF(tx->ureg, src, tx_cond(tx));
1731
1732 return D3D_OK;
1733 }
1734
1735 static inline unsigned
1736 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1737 {
1738 switch (flags) {
1739 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1740 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1741 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1742 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1743 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1744 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1745 default:
1746 assert(!"invalid comparison flags");
1747 return TGSI_OPCODE_SGT;
1748 }
1749 }
1750
1751 DECL_SPECIAL(IFC)
1752 {
1753 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1754 struct ureg_src src[2];
1755 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1756 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1757 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1758 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1759 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1760 return D3D_OK;
1761 }
1762
1763 DECL_SPECIAL(ELSE)
1764 {
1765 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1766 return D3D_OK;
1767 }
1768
1769 DECL_SPECIAL(BREAKC)
1770 {
1771 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1772 struct ureg_src src[2];
1773 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1774 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1775 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1776 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1777 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1778 ureg_BRK(tx->ureg);
1779 tx_endcond(tx);
1780 ureg_ENDIF(tx->ureg);
1781 return D3D_OK;
1782 }
1783
1784 static const char *sm1_declusage_names[] =
1785 {
1786 [D3DDECLUSAGE_POSITION] = "POSITION",
1787 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1788 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1789 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1790 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1791 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1792 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1793 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1794 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1795 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1796 [D3DDECLUSAGE_COLOR] = "COLOR",
1797 [D3DDECLUSAGE_FOG] = "FOG",
1798 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1799 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1800 };
1801
1802 static inline unsigned
1803 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1804 {
1805 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1806 }
1807
1808 static void
1809 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1810 boolean tc,
1811 struct sm1_semantic *dcl)
1812 {
1813 BYTE index = dcl->usage_idx;
1814
1815 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1816 * we match to a TGSI_SEMANTIC_GENERIC with index.
1817 *
1818 * The index can be anything UINT16 and usage_idx is BYTE,
1819 * so we can fit everything. It doesn't matter if indices
1820 * are close together or low.
1821 *
1822 *
1823 * POSITION >= 1: 10 * index + 6
1824 * COLOR >= 2: 10 * (index-1) + 7
1825 * TEXCOORD[0..15]: index
1826 * BLENDWEIGHT: 10 * index + 18
1827 * BLENDINDICES: 10 * index + 19
1828 * NORMAL: 10 * index + 20
1829 * TANGENT: 10 * index + 21
1830 * BINORMAL: 10 * index + 22
1831 * TESSFACTOR: 10 * index + 23
1832 */
1833
1834 switch (dcl->usage) {
1835 case D3DDECLUSAGE_POSITION:
1836 case D3DDECLUSAGE_POSITIONT:
1837 case D3DDECLUSAGE_DEPTH:
1838 if (index == 0) {
1839 sem->Name = TGSI_SEMANTIC_POSITION;
1840 sem->Index = 0;
1841 } else {
1842 sem->Name = TGSI_SEMANTIC_GENERIC;
1843 sem->Index = 10 * index + 6;
1844 }
1845 break;
1846 case D3DDECLUSAGE_COLOR:
1847 if (index < 2) {
1848 sem->Name = TGSI_SEMANTIC_COLOR;
1849 sem->Index = index;
1850 } else {
1851 sem->Name = TGSI_SEMANTIC_GENERIC;
1852 sem->Index = 10 * (index-1) + 7;
1853 }
1854 break;
1855 case D3DDECLUSAGE_FOG:
1856 assert(index == 0);
1857 sem->Name = TGSI_SEMANTIC_FOG;
1858 sem->Index = 0;
1859 break;
1860 case D3DDECLUSAGE_PSIZE:
1861 assert(index == 0);
1862 sem->Name = TGSI_SEMANTIC_PSIZE;
1863 sem->Index = 0;
1864 break;
1865 case D3DDECLUSAGE_TEXCOORD:
1866 assert(index < 16);
1867 if (index < 8 && tc)
1868 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1869 else
1870 sem->Name = TGSI_SEMANTIC_GENERIC;
1871 sem->Index = index;
1872 break;
1873 case D3DDECLUSAGE_BLENDWEIGHT:
1874 sem->Name = TGSI_SEMANTIC_GENERIC;
1875 sem->Index = 10 * index + 18;
1876 break;
1877 case D3DDECLUSAGE_BLENDINDICES:
1878 sem->Name = TGSI_SEMANTIC_GENERIC;
1879 sem->Index = 10 * index + 19;
1880 break;
1881 case D3DDECLUSAGE_NORMAL:
1882 sem->Name = TGSI_SEMANTIC_GENERIC;
1883 sem->Index = 10 * index + 20;
1884 break;
1885 case D3DDECLUSAGE_TANGENT:
1886 sem->Name = TGSI_SEMANTIC_GENERIC;
1887 sem->Index = 10 * index + 21;
1888 break;
1889 case D3DDECLUSAGE_BINORMAL:
1890 sem->Name = TGSI_SEMANTIC_GENERIC;
1891 sem->Index = 10 * index + 22;
1892 break;
1893 case D3DDECLUSAGE_TESSFACTOR:
1894 sem->Name = TGSI_SEMANTIC_GENERIC;
1895 sem->Index = 10 * index + 23;
1896 break;
1897 case D3DDECLUSAGE_SAMPLE:
1898 sem->Name = TGSI_SEMANTIC_COUNT;
1899 sem->Index = 0;
1900 break;
1901 default:
1902 unreachable(!"Invalid DECLUSAGE.");
1903 break;
1904 }
1905 }
1906
1907 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1908 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1909 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1910 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1911 static inline unsigned
1912 d3dstt_to_tgsi_tex(BYTE sampler_type)
1913 {
1914 switch (sampler_type) {
1915 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1916 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1917 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1918 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1919 default:
1920 assert(0);
1921 return TGSI_TEXTURE_UNKNOWN;
1922 }
1923 }
1924 static inline unsigned
1925 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1926 {
1927 switch (sampler_type) {
1928 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1929 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1930 case NINED3DSTT_VOLUME:
1931 case NINED3DSTT_CUBE:
1932 default:
1933 assert(0);
1934 return TGSI_TEXTURE_UNKNOWN;
1935 }
1936 }
1937 static inline unsigned
1938 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1939 {
1940 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1941 case 1: return TGSI_TEXTURE_1D;
1942 case 0: return TGSI_TEXTURE_2D;
1943 case 3: return TGSI_TEXTURE_3D;
1944 default:
1945 return TGSI_TEXTURE_CUBE;
1946 }
1947 }
1948
1949 static const char *
1950 sm1_sampler_type_name(BYTE sampler_type)
1951 {
1952 switch (sampler_type) {
1953 case NINED3DSTT_1D: return "1D";
1954 case NINED3DSTT_2D: return "2D";
1955 case NINED3DSTT_VOLUME: return "VOLUME";
1956 case NINED3DSTT_CUBE: return "CUBE";
1957 default:
1958 return "(D3DSTT_?)";
1959 }
1960 }
1961
1962 static inline unsigned
1963 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1964 {
1965 switch (sem->Name) {
1966 case TGSI_SEMANTIC_POSITION:
1967 case TGSI_SEMANTIC_NORMAL:
1968 return TGSI_INTERPOLATE_LINEAR;
1969 case TGSI_SEMANTIC_BCOLOR:
1970 case TGSI_SEMANTIC_COLOR:
1971 return TGSI_INTERPOLATE_COLOR;
1972 case TGSI_SEMANTIC_FOG:
1973 case TGSI_SEMANTIC_GENERIC:
1974 case TGSI_SEMANTIC_TEXCOORD:
1975 case TGSI_SEMANTIC_CLIPDIST:
1976 case TGSI_SEMANTIC_CLIPVERTEX:
1977 return TGSI_INTERPOLATE_PERSPECTIVE;
1978 case TGSI_SEMANTIC_EDGEFLAG:
1979 case TGSI_SEMANTIC_FACE:
1980 case TGSI_SEMANTIC_INSTANCEID:
1981 case TGSI_SEMANTIC_PCOORD:
1982 case TGSI_SEMANTIC_PRIMID:
1983 case TGSI_SEMANTIC_PSIZE:
1984 case TGSI_SEMANTIC_VERTEXID:
1985 return TGSI_INTERPOLATE_CONSTANT;
1986 default:
1987 assert(0);
1988 return TGSI_INTERPOLATE_CONSTANT;
1989 }
1990 }
1991
1992 DECL_SPECIAL(DCL)
1993 {
1994 struct ureg_program *ureg = tx->ureg;
1995 boolean is_input;
1996 boolean is_sampler;
1997 struct tgsi_declaration_semantic tgsi;
1998 struct sm1_semantic sem;
1999 sm1_read_semantic(tx, &sem);
2000
2001 is_input = sem.reg.file == D3DSPR_INPUT;
2002 is_sampler =
2003 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2004
2005 DUMP("DCL ");
2006 sm1_dump_dst_param(&sem.reg);
2007 if (is_sampler)
2008 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2009 else
2010 if (tx->version.major >= 3)
2011 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2012 else
2013 if (sem.usage | sem.usage_idx)
2014 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2015 else
2016 DUMP("\n");
2017
2018 if (is_sampler) {
2019 const unsigned m = 1 << sem.reg.idx;
2020 ureg_DECL_sampler(ureg, sem.reg.idx);
2021 tx->info->sampler_mask |= m;
2022 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2023 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2024 d3dstt_to_tgsi_tex(sem.sampler_type);
2025 return D3D_OK;
2026 }
2027
2028 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2029 if (IS_VS) {
2030 if (is_input) {
2031 /* linkage outside of shader with vertex declaration */
2032 ureg_DECL_vs_input(ureg, sem.reg.idx);
2033 assert(sem.reg.idx < Elements(tx->info->input_map));
2034 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2035 tx->info->num_inputs = sem.reg.idx + 1;
2036 /* NOTE: preserving order in case of indirect access */
2037 } else
2038 if (tx->version.major >= 3) {
2039 /* SM2 output semantic determined by file */
2040 assert(sem.reg.mask != 0);
2041 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2042 tx->info->position_t = TRUE;
2043 assert(sem.reg.idx < Elements(tx->regs.o));
2044 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2045 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2046
2047 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
2048 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2049 }
2050 } else {
2051 if (is_input && tx->version.major >= 3) {
2052 unsigned interp_location = 0;
2053 /* SM3 only, SM2 input semantic determined by file */
2054 assert(sem.reg.idx < Elements(tx->regs.v));
2055
2056 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2057 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2058 return D3D_OK;
2059 }
2060
2061 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2062 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2063 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2064
2065 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2066 ureg, tgsi.Name, tgsi.Index,
2067 nine_tgsi_to_interp_mode(&tgsi),
2068 0, /* cylwrap */
2069 interp_location, 0, 1);
2070 } else
2071 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2072 /* FragColor or FragDepth */
2073 assert(sem.reg.mask != 0);
2074 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2075 0, 1);
2076 }
2077 }
2078 return D3D_OK;
2079 }
2080
2081 DECL_SPECIAL(DEF)
2082 {
2083 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2084 return D3D_OK;
2085 }
2086
2087 DECL_SPECIAL(DEFB)
2088 {
2089 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2090 return D3D_OK;
2091 }
2092
2093 DECL_SPECIAL(DEFI)
2094 {
2095 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2096 return D3D_OK;
2097 }
2098
2099 DECL_SPECIAL(POW)
2100 {
2101 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2102 struct ureg_src src[2] = {
2103 tx_src_param(tx, &tx->insn.src[0]),
2104 tx_src_param(tx, &tx->insn.src[1])
2105 };
2106 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2107 return D3D_OK;
2108 }
2109
2110 DECL_SPECIAL(RSQ)
2111 {
2112 struct ureg_program *ureg = tx->ureg;
2113 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2114 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2115 struct ureg_dst tmp = tx_scratch(tx);
2116 ureg_RSQ(ureg, tmp, ureg_abs(src));
2117 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2118 return D3D_OK;
2119 }
2120
2121 DECL_SPECIAL(LOG)
2122 {
2123 struct ureg_program *ureg = tx->ureg;
2124 struct ureg_dst tmp = tx_scratch_scalar(tx);
2125 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2126 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2127 ureg_LG2(ureg, tmp, ureg_abs(src));
2128 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2129 return D3D_OK;
2130 }
2131
2132 DECL_SPECIAL(LIT)
2133 {
2134 struct ureg_program *ureg = tx->ureg;
2135 struct ureg_dst tmp = tx_scratch(tx);
2136 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2137 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2138 ureg_LIT(ureg, tmp, src);
2139 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2140 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2141 * it 0^0 if src.w=0, which value is driver dependent. */
2142 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2143 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2144 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2145 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2146 return D3D_OK;
2147 }
2148
2149 DECL_SPECIAL(NRM)
2150 {
2151 struct ureg_program *ureg = tx->ureg;
2152 struct ureg_dst tmp = tx_scratch_scalar(tx);
2153 struct ureg_src nrm = tx_src_scalar(tmp);
2154 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2155 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2156 ureg_DP3(ureg, tmp, src, src);
2157 ureg_RSQ(ureg, tmp, nrm);
2158 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2159 ureg_MUL(ureg, dst, src, nrm);
2160 return D3D_OK;
2161 }
2162
2163 DECL_SPECIAL(DP2ADD)
2164 {
2165 struct ureg_dst tmp = tx_scratch_scalar(tx);
2166 struct ureg_src dp2 = tx_src_scalar(tmp);
2167 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2168 struct ureg_src src[3];
2169 int i;
2170 for (i = 0; i < 3; ++i)
2171 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2172 assert_replicate_swizzle(&src[2]);
2173
2174 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2175 ureg_ADD(tx->ureg, dst, src[2], dp2);
2176
2177 return D3D_OK;
2178 }
2179
2180 DECL_SPECIAL(TEXCOORD)
2181 {
2182 struct ureg_program *ureg = tx->ureg;
2183 const unsigned s = tx->insn.dst[0].idx;
2184 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2185
2186 tx_texcoord_alloc(tx, s);
2187 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2188 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2189
2190 return D3D_OK;
2191 }
2192
2193 DECL_SPECIAL(TEXCOORD_ps14)
2194 {
2195 struct ureg_program *ureg = tx->ureg;
2196 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2197 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2198
2199 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2200
2201 ureg_MOV(ureg, dst, src);
2202
2203 return D3D_OK;
2204 }
2205
2206 DECL_SPECIAL(TEXKILL)
2207 {
2208 struct ureg_src reg;
2209
2210 if (tx->version.major > 1 || tx->version.minor > 3) {
2211 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2212 } else {
2213 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2214 reg = tx->regs.vT[tx->insn.dst[0].idx];
2215 }
2216 if (tx->version.major < 2)
2217 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2218 ureg_KILL_IF(tx->ureg, reg);
2219
2220 return D3D_OK;
2221 }
2222
2223 DECL_SPECIAL(TEXBEM)
2224 {
2225 struct ureg_program *ureg = tx->ureg;
2226 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2227 struct ureg_dst tmp, tmp2, texcoord;
2228 struct ureg_src sample, m00, m01, m10, m11;
2229 struct ureg_src bumpenvlscale, bumpenvloffset;
2230 const int m = tx->insn.dst[0].idx;
2231 const int n = tx->insn.src[0].idx;
2232
2233 assert(tx->version.major == 1);
2234
2235 sample = ureg_DECL_sampler(ureg, m);
2236 tx->info->sampler_mask |= 1 << m;
2237
2238 tx_texcoord_alloc(tx, m);
2239
2240 tmp = tx_scratch(tx);
2241 tmp2 = tx_scratch(tx);
2242 texcoord = tx_scratch(tx);
2243 /*
2244 * Bump-env-matrix:
2245 * 00 is X
2246 * 01 is Y
2247 * 10 is Z
2248 * 11 is W
2249 */
2250 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2251 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2252 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2253 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2254 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2255
2256 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2257 if (m % 2 == 0) {
2258 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2259 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2260 } else {
2261 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2262 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2263 }
2264
2265 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2266
2267 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2268 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2269 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2270 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2271 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2272 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2273 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2274
2275 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2276 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2277 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2278 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2279 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2280 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2281 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2282
2283 /* Now the texture coordinates are in tmp.xy */
2284
2285 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2286 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2287 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2288 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2289 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2290 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2291 bumpenvlscale, bumpenvloffset);
2292 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2293 }
2294
2295 tx->info->bumpenvmat_needed = 1;
2296
2297 return D3D_OK;
2298 }
2299
2300 DECL_SPECIAL(TEXREG2AR)
2301 {
2302 struct ureg_program *ureg = tx->ureg;
2303 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2304 struct ureg_src sample;
2305 const int m = tx->insn.dst[0].idx;
2306 const int n = tx->insn.src[0].idx;
2307 assert(m >= 0 && m > n);
2308
2309 sample = ureg_DECL_sampler(ureg, m);
2310 tx->info->sampler_mask |= 1 << m;
2311 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2312
2313 return D3D_OK;
2314 }
2315
2316 DECL_SPECIAL(TEXREG2GB)
2317 {
2318 struct ureg_program *ureg = tx->ureg;
2319 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2320 struct ureg_src sample;
2321 const int m = tx->insn.dst[0].idx;
2322 const int n = tx->insn.src[0].idx;
2323 assert(m >= 0 && m > n);
2324
2325 sample = ureg_DECL_sampler(ureg, m);
2326 tx->info->sampler_mask |= 1 << m;
2327 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2328
2329 return D3D_OK;
2330 }
2331
2332 DECL_SPECIAL(TEXM3x2PAD)
2333 {
2334 return D3D_OK; /* this is just padding */
2335 }
2336
2337 DECL_SPECIAL(TEXM3x2TEX)
2338 {
2339 struct ureg_program *ureg = tx->ureg;
2340 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2341 struct ureg_src sample;
2342 const int m = tx->insn.dst[0].idx - 1;
2343 const int n = tx->insn.src[0].idx;
2344 assert(m >= 0 && m > n);
2345
2346 tx_texcoord_alloc(tx, m);
2347 tx_texcoord_alloc(tx, m+1);
2348
2349 /* performs the matrix multiplication */
2350 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2351 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2352
2353 sample = ureg_DECL_sampler(ureg, m + 1);
2354 tx->info->sampler_mask |= 1 << (m + 1);
2355 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2356
2357 return D3D_OK;
2358 }
2359
2360 DECL_SPECIAL(TEXM3x3PAD)
2361 {
2362 return D3D_OK; /* this is just padding */
2363 }
2364
2365 DECL_SPECIAL(TEXM3x3SPEC)
2366 {
2367 struct ureg_program *ureg = tx->ureg;
2368 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2369 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2370 struct ureg_src sample;
2371 struct ureg_dst tmp;
2372 const int m = tx->insn.dst[0].idx - 2;
2373 const int n = tx->insn.src[0].idx;
2374 assert(m >= 0 && m > n);
2375
2376 tx_texcoord_alloc(tx, m);
2377 tx_texcoord_alloc(tx, m+1);
2378 tx_texcoord_alloc(tx, m+2);
2379
2380 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2381 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2382 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2383
2384 sample = ureg_DECL_sampler(ureg, m + 2);
2385 tx->info->sampler_mask |= 1 << (m + 2);
2386 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2387
2388 /* At this step, dst = N = (u', w', z').
2389 * We want dst to be the texture sampled at (u'', w'', z''), with
2390 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2391 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2392 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2393 /* at this step tmp.x = 1/N.N */
2394 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2395 /* at this step tmp.y = N.E */
2396 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2397 /* at this step tmp.x = N.E/N.N */
2398 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2399 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2400 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2401 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2402 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2403
2404 return D3D_OK;
2405 }
2406
2407 DECL_SPECIAL(TEXREG2RGB)
2408 {
2409 struct ureg_program *ureg = tx->ureg;
2410 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2411 struct ureg_src sample;
2412 const int m = tx->insn.dst[0].idx;
2413 const int n = tx->insn.src[0].idx;
2414 assert(m >= 0 && m > n);
2415
2416 sample = ureg_DECL_sampler(ureg, m);
2417 tx->info->sampler_mask |= 1 << m;
2418 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2419
2420 return D3D_OK;
2421 }
2422
2423 DECL_SPECIAL(TEXDP3TEX)
2424 {
2425 struct ureg_program *ureg = tx->ureg;
2426 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2427 struct ureg_dst tmp;
2428 struct ureg_src sample;
2429 const int m = tx->insn.dst[0].idx;
2430 const int n = tx->insn.src[0].idx;
2431 assert(m >= 0 && m > n);
2432
2433 tx_texcoord_alloc(tx, m);
2434
2435 tmp = tx_scratch(tx);
2436 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2437 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2438
2439 sample = ureg_DECL_sampler(ureg, m);
2440 tx->info->sampler_mask |= 1 << m;
2441 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2442
2443 return D3D_OK;
2444 }
2445
2446 DECL_SPECIAL(TEXM3x2DEPTH)
2447 {
2448 struct ureg_program *ureg = tx->ureg;
2449 struct ureg_dst tmp;
2450 const int m = tx->insn.dst[0].idx - 1;
2451 const int n = tx->insn.src[0].idx;
2452 assert(m >= 0 && m > n);
2453
2454 tx_texcoord_alloc(tx, m);
2455 tx_texcoord_alloc(tx, m+1);
2456
2457 tmp = tx_scratch(tx);
2458
2459 /* performs the matrix multiplication */
2460 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2461 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2462
2463 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2464 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2465 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2466 /* res = 'w' == 0 ? 1.0 : z/w */
2467 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2468 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2469 /* replace the depth for depth testing with the result */
2470 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2471 TGSI_WRITEMASK_Z, 0, 1);
2472 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2473 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2474 return D3D_OK;
2475 }
2476
2477 DECL_SPECIAL(TEXDP3)
2478 {
2479 struct ureg_program *ureg = tx->ureg;
2480 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2481 const int m = tx->insn.dst[0].idx;
2482 const int n = tx->insn.src[0].idx;
2483 assert(m >= 0 && m > n);
2484
2485 tx_texcoord_alloc(tx, m);
2486
2487 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2488
2489 return D3D_OK;
2490 }
2491
2492 DECL_SPECIAL(TEXM3x3)
2493 {
2494 struct ureg_program *ureg = tx->ureg;
2495 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2496 struct ureg_src sample;
2497 struct ureg_dst E, tmp;
2498 const int m = tx->insn.dst[0].idx - 2;
2499 const int n = tx->insn.src[0].idx;
2500 assert(m >= 0 && m > n);
2501
2502 tx_texcoord_alloc(tx, m);
2503 tx_texcoord_alloc(tx, m+1);
2504 tx_texcoord_alloc(tx, m+2);
2505
2506 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2507 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2508 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2509
2510 switch (tx->insn.opcode) {
2511 case D3DSIO_TEXM3x3:
2512 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2513 break;
2514 case D3DSIO_TEXM3x3TEX:
2515 sample = ureg_DECL_sampler(ureg, m + 2);
2516 tx->info->sampler_mask |= 1 << (m + 2);
2517 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2518 break;
2519 case D3DSIO_TEXM3x3VSPEC:
2520 sample = ureg_DECL_sampler(ureg, m + 2);
2521 tx->info->sampler_mask |= 1 << (m + 2);
2522 E = tx_scratch(tx);
2523 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2524 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2525 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2526 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2527 /* At this step, dst = N = (u', w', z').
2528 * We want dst to be the texture sampled at (u'', w'', z''), with
2529 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2530 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2531 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2532 /* at this step tmp.x = 1/N.N */
2533 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2534 /* at this step tmp.y = N.E */
2535 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2536 /* at this step tmp.x = N.E/N.N */
2537 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2538 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2539 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2540 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2541 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2542 break;
2543 default:
2544 return D3DERR_INVALIDCALL;
2545 }
2546 return D3D_OK;
2547 }
2548
2549 DECL_SPECIAL(TEXDEPTH)
2550 {
2551 struct ureg_program *ureg = tx->ureg;
2552 struct ureg_dst r5;
2553 struct ureg_src r5r, r5g;
2554
2555 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2556
2557 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2558 * r5 won't be used afterward, thus we can use r5.ba */
2559 r5 = tx->regs.r[5];
2560 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2561 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2562
2563 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2564 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2565 /* r5.r = r/g */
2566 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2567 r5r, ureg_imm1f(ureg, 1.0f));
2568 /* replace the depth for depth testing with the result */
2569 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2570 TGSI_WRITEMASK_Z, 0, 1);
2571 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2572
2573 return D3D_OK;
2574 }
2575
2576 DECL_SPECIAL(BEM)
2577 {
2578 struct ureg_program *ureg = tx->ureg;
2579 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2580 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2581 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2582 struct ureg_src m00, m01, m10, m11;
2583 const int m = tx->insn.dst[0].idx;
2584 struct ureg_dst tmp;
2585 /*
2586 * Bump-env-matrix:
2587 * 00 is X
2588 * 01 is Y
2589 * 10 is Z
2590 * 11 is W
2591 */
2592 nine_info_mark_const_f_used(tx->info, 8 + m);
2593 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2594 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2595 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2596 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2597 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2598 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2599 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2600 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2601 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2602 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2603
2604 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2605 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2606 NINE_APPLY_SWIZZLE(src1, X), src0);
2607 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2608 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2609 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2610 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2611
2612 tx->info->bumpenvmat_needed = 1;
2613
2614 return D3D_OK;
2615 }
2616
2617 DECL_SPECIAL(TEXLD)
2618 {
2619 struct ureg_program *ureg = tx->ureg;
2620 unsigned target;
2621 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2622 struct ureg_src src[2] = {
2623 tx_src_param(tx, &tx->insn.src[0]),
2624 tx_src_param(tx, &tx->insn.src[1])
2625 };
2626 assert(tx->insn.src[1].idx >= 0 &&
2627 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2628 target = tx->sampler_targets[tx->insn.src[1].idx];
2629
2630 switch (tx->insn.flags) {
2631 case 0:
2632 ureg_TEX(ureg, dst, target, src[0], src[1]);
2633 break;
2634 case NINED3DSI_TEXLD_PROJECT:
2635 ureg_TXP(ureg, dst, target, src[0], src[1]);
2636 break;
2637 case NINED3DSI_TEXLD_BIAS:
2638 ureg_TXB(ureg, dst, target, src[0], src[1]);
2639 break;
2640 default:
2641 assert(0);
2642 return D3DERR_INVALIDCALL;
2643 }
2644 return D3D_OK;
2645 }
2646
2647 DECL_SPECIAL(TEXLD_14)
2648 {
2649 struct ureg_program *ureg = tx->ureg;
2650 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2651 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2652 const unsigned s = tx->insn.dst[0].idx;
2653 const unsigned t = ps1x_sampler_type(tx->info, s);
2654
2655 tx->info->sampler_mask |= 1 << s;
2656 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2657
2658 return D3D_OK;
2659 }
2660
2661 DECL_SPECIAL(TEX)
2662 {
2663 struct ureg_program *ureg = tx->ureg;
2664 const unsigned s = tx->insn.dst[0].idx;
2665 const unsigned t = ps1x_sampler_type(tx->info, s);
2666 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2667 struct ureg_src src[2];
2668
2669 tx_texcoord_alloc(tx, s);
2670
2671 src[0] = tx->regs.vT[s];
2672 src[1] = ureg_DECL_sampler(ureg, s);
2673 tx->info->sampler_mask |= 1 << s;
2674
2675 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2676
2677 return D3D_OK;
2678 }
2679
2680 DECL_SPECIAL(TEXLDD)
2681 {
2682 unsigned target;
2683 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2684 struct ureg_src src[4] = {
2685 tx_src_param(tx, &tx->insn.src[0]),
2686 tx_src_param(tx, &tx->insn.src[1]),
2687 tx_src_param(tx, &tx->insn.src[2]),
2688 tx_src_param(tx, &tx->insn.src[3])
2689 };
2690 assert(tx->insn.src[1].idx >= 0 &&
2691 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2692 target = tx->sampler_targets[tx->insn.src[1].idx];
2693
2694 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2695 return D3D_OK;
2696 }
2697
2698 DECL_SPECIAL(TEXLDL)
2699 {
2700 unsigned target;
2701 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2702 struct ureg_src src[2] = {
2703 tx_src_param(tx, &tx->insn.src[0]),
2704 tx_src_param(tx, &tx->insn.src[1])
2705 };
2706 assert(tx->insn.src[1].idx >= 0 &&
2707 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2708 target = tx->sampler_targets[tx->insn.src[1].idx];
2709
2710 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2711 return D3D_OK;
2712 }
2713
2714 DECL_SPECIAL(SETP)
2715 {
2716 STUB(D3DERR_INVALIDCALL);
2717 }
2718
2719 DECL_SPECIAL(BREAKP)
2720 {
2721 STUB(D3DERR_INVALIDCALL);
2722 }
2723
2724 DECL_SPECIAL(PHASE)
2725 {
2726 return D3D_OK; /* we don't care about phase */
2727 }
2728
2729 DECL_SPECIAL(COMMENT)
2730 {
2731 return D3D_OK; /* nothing to do */
2732 }
2733
2734
2735 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2736 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2737
2738 struct sm1_op_info inst_table[] =
2739 {
2740 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2741 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2742 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2743 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2744 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2745 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2746 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2747 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2748 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2749 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2750 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2751 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2752 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2753 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2754 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2755 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2756 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2757 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2758 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2759 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2760
2761 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2762 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2763 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2764 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2765 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2766
2767 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2768 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2769 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2770 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2771 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2772 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2773
2774 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2775
2776 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2777 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2778 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2779 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2780 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2781
2782 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2783 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2784
2785 /* More flow control */
2786 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2787 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2788 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2789 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2790 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2791 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2792 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2793 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2794 /* we don't write to the address register, but a normal register (copied
2795 * when needed to the address register), thus we don't use ARR */
2796 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2797
2798 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2799 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2800
2801 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2802 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2803 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2804 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2805 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2806 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2807 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2808 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2809 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2810 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2811 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2812 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2813 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2814 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2815 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2816 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2817
2818 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2819 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2820 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2821 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2822
2823 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2824
2825 /* More tex stuff */
2826 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2827 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2828 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2829 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2830 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2831 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2832
2833 /* Misc */
2834 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2835 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2836 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2837 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2838 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2839 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2840 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2841 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2842 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2843 };
2844
2845 struct sm1_op_info inst_phase =
2846 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2847
2848 struct sm1_op_info inst_comment =
2849 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2850
2851 static void
2852 create_op_info_map(struct shader_translator *tx)
2853 {
2854 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2855 unsigned i;
2856
2857 for (i = 0; i < Elements(tx->op_info_map); ++i)
2858 tx->op_info_map[i] = -1;
2859
2860 if (tx->processor == PIPE_SHADER_VERTEX) {
2861 for (i = 0; i < Elements(inst_table); ++i) {
2862 assert(inst_table[i].sio < Elements(tx->op_info_map));
2863 if (inst_table[i].vert_version.min <= version &&
2864 inst_table[i].vert_version.max >= version)
2865 tx->op_info_map[inst_table[i].sio] = i;
2866 }
2867 } else {
2868 for (i = 0; i < Elements(inst_table); ++i) {
2869 assert(inst_table[i].sio < Elements(tx->op_info_map));
2870 if (inst_table[i].frag_version.min <= version &&
2871 inst_table[i].frag_version.max >= version)
2872 tx->op_info_map[inst_table[i].sio] = i;
2873 }
2874 }
2875 }
2876
2877 static inline HRESULT
2878 NineTranslateInstruction_Generic(struct shader_translator *tx)
2879 {
2880 struct ureg_dst dst[1];
2881 struct ureg_src src[4];
2882 unsigned i;
2883
2884 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2885 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2886 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2887 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2888
2889 ureg_insn(tx->ureg, tx->insn.info->opcode,
2890 dst, tx->insn.ndst,
2891 src, tx->insn.nsrc);
2892 return D3D_OK;
2893 }
2894
2895 static inline DWORD
2896 TOKEN_PEEK(struct shader_translator *tx)
2897 {
2898 return *(tx->parse);
2899 }
2900
2901 static inline DWORD
2902 TOKEN_NEXT(struct shader_translator *tx)
2903 {
2904 return *(tx->parse)++;
2905 }
2906
2907 static inline void
2908 TOKEN_JUMP(struct shader_translator *tx)
2909 {
2910 if (tx->parse_next && tx->parse != tx->parse_next) {
2911 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2912 tx->parse = tx->parse_next;
2913 }
2914 }
2915
2916 static inline boolean
2917 sm1_parse_eof(struct shader_translator *tx)
2918 {
2919 return TOKEN_PEEK(tx) == NINED3DSP_END;
2920 }
2921
2922 static void
2923 sm1_read_version(struct shader_translator *tx)
2924 {
2925 const DWORD tok = TOKEN_NEXT(tx);
2926
2927 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2928 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2929
2930 switch (tok >> 16) {
2931 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
2932 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
2933 default:
2934 DBG("Invalid shader type: %x\n", tok);
2935 tx->processor = ~0;
2936 break;
2937 }
2938 }
2939
2940 /* This is just to check if we parsed the instruction properly. */
2941 static void
2942 sm1_parse_get_skip(struct shader_translator *tx)
2943 {
2944 const DWORD tok = TOKEN_PEEK(tx);
2945
2946 if (tx->version.major >= 2) {
2947 tx->parse_next = tx->parse + 1 /* this */ +
2948 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2949 } else {
2950 tx->parse_next = NULL; /* TODO: determine from param count */
2951 }
2952 }
2953
2954 static void
2955 sm1_print_comment(const char *comment, UINT size)
2956 {
2957 if (!size)
2958 return;
2959 /* TODO */
2960 }
2961
2962 static void
2963 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2964 {
2965 DWORD tok = TOKEN_PEEK(tx);
2966
2967 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2968 {
2969 const char *comment = "";
2970 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2971 tx->parse += size + 1;
2972
2973 if (print)
2974 sm1_print_comment(comment, size);
2975
2976 tok = TOKEN_PEEK(tx);
2977 }
2978 }
2979
2980 static void
2981 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2982 {
2983 *reg = TOKEN_NEXT(tx);
2984
2985 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2986 {
2987 if (tx->version.major < 2)
2988 *rel = (1 << 31) |
2989 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2990 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2991 D3DSP_NOSWIZZLE;
2992 else
2993 *rel = TOKEN_NEXT(tx);
2994 }
2995 }
2996
2997 static void
2998 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2999 {
3000 uint8_t shift;
3001 dst->file =
3002 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3003 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3004 dst->type = TGSI_RETURN_TYPE_FLOAT;
3005 dst->idx = tok & D3DSP_REGNUM_MASK;
3006 dst->rel = NULL;
3007 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3008 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3009 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3010 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
3011 }
3012
3013 static void
3014 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3015 {
3016 src->file =
3017 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3018 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3019 src->type = TGSI_RETURN_TYPE_FLOAT;
3020 src->idx = tok & D3DSP_REGNUM_MASK;
3021 src->rel = NULL;
3022 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3023 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3024
3025 switch (src->file) {
3026 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3027 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3028 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3029 default:
3030 break;
3031 }
3032 }
3033
3034 static void
3035 sm1_parse_immediate(struct shader_translator *tx,
3036 struct sm1_src_param *imm)
3037 {
3038 imm->file = NINED3DSPR_IMMEDIATE;
3039 imm->idx = INT_MIN;
3040 imm->rel = NULL;
3041 imm->swizzle = NINED3DSP_NOSWIZZLE;
3042 imm->mod = 0;
3043 switch (tx->insn.opcode) {
3044 case D3DSIO_DEF:
3045 imm->type = NINED3DSPTYPE_FLOAT4;
3046 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3047 tx->parse += 4;
3048 break;
3049 case D3DSIO_DEFI:
3050 imm->type = NINED3DSPTYPE_INT4;
3051 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3052 tx->parse += 4;
3053 break;
3054 case D3DSIO_DEFB:
3055 imm->type = NINED3DSPTYPE_BOOL;
3056 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3057 tx->parse += 1;
3058 break;
3059 default:
3060 assert(0);
3061 break;
3062 }
3063 }
3064
3065 static void
3066 sm1_read_dst_param(struct shader_translator *tx,
3067 struct sm1_dst_param *dst,
3068 struct sm1_src_param *rel)
3069 {
3070 DWORD tok_dst, tok_rel = 0;
3071
3072 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3073 sm1_parse_dst_param(dst, tok_dst);
3074 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3075 sm1_parse_src_param(rel, tok_rel);
3076 dst->rel = rel;
3077 }
3078 }
3079
3080 static void
3081 sm1_read_src_param(struct shader_translator *tx,
3082 struct sm1_src_param *src,
3083 struct sm1_src_param *rel)
3084 {
3085 DWORD tok_src, tok_rel = 0;
3086
3087 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3088 sm1_parse_src_param(src, tok_src);
3089 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3090 assert(rel);
3091 sm1_parse_src_param(rel, tok_rel);
3092 src->rel = rel;
3093 }
3094 }
3095
3096 static void
3097 sm1_read_semantic(struct shader_translator *tx,
3098 struct sm1_semantic *sem)
3099 {
3100 const DWORD tok_usg = TOKEN_NEXT(tx);
3101 const DWORD tok_dst = TOKEN_NEXT(tx);
3102
3103 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3104 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3105 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3106
3107 sm1_parse_dst_param(&sem->reg, tok_dst);
3108 }
3109
3110 static void
3111 sm1_parse_instruction(struct shader_translator *tx)
3112 {
3113 struct sm1_instruction *insn = &tx->insn;
3114 DWORD tok;
3115 struct sm1_op_info *info = NULL;
3116 unsigned i;
3117
3118 sm1_parse_comments(tx, TRUE);
3119 sm1_parse_get_skip(tx);
3120
3121 tok = TOKEN_NEXT(tx);
3122
3123 insn->opcode = tok & D3DSI_OPCODE_MASK;
3124 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3125 insn->coissue = !!(tok & D3DSI_COISSUE);
3126 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3127
3128 if (insn->opcode < Elements(tx->op_info_map)) {
3129 int k = tx->op_info_map[insn->opcode];
3130 if (k >= 0) {
3131 assert(k < Elements(inst_table));
3132 info = &inst_table[k];
3133 }
3134 } else {
3135 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3136 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3137 }
3138 if (!info) {
3139 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3140 TOKEN_JUMP(tx);
3141 return;
3142 }
3143 insn->info = info;
3144 insn->ndst = info->ndst;
3145 insn->nsrc = info->nsrc;
3146
3147 assert(!insn->predicated && "TODO: predicated instructions");
3148
3149 /* check version */
3150 {
3151 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3152 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3153 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3154 if (ver < min || ver > max) {
3155 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3156 min, ver, max);
3157 return;
3158 }
3159 }
3160
3161 for (i = 0; i < insn->ndst; ++i)
3162 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3163 if (insn->predicated)
3164 sm1_read_src_param(tx, &insn->pred, NULL);
3165 for (i = 0; i < insn->nsrc; ++i)
3166 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3167
3168 /* parse here so we can dump them before processing */
3169 if (insn->opcode == D3DSIO_DEF ||
3170 insn->opcode == D3DSIO_DEFI ||
3171 insn->opcode == D3DSIO_DEFB)
3172 sm1_parse_immediate(tx, &tx->insn.src[0]);
3173
3174 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3175 sm1_instruction_check(insn);
3176
3177 if (info->handler)
3178 info->handler(tx);
3179 else
3180 NineTranslateInstruction_Generic(tx);
3181 tx_apply_dst0_modifiers(tx);
3182
3183 tx->num_scratch = 0; /* reset */
3184
3185 TOKEN_JUMP(tx);
3186 }
3187
3188 static void
3189 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3190 {
3191 unsigned i;
3192
3193 tx->info = info;
3194
3195 tx->byte_code = info->byte_code;
3196 tx->parse = info->byte_code;
3197
3198 for (i = 0; i < Elements(info->input_map); ++i)
3199 info->input_map[i] = NINE_DECLUSAGE_NONE;
3200 info->num_inputs = 0;
3201
3202 info->position_t = FALSE;
3203 info->point_size = FALSE;
3204
3205 tx->info->const_float_slots = 0;
3206 tx->info->const_int_slots = 0;
3207 tx->info->const_bool_slots = 0;
3208
3209 info->sampler_mask = 0x0;
3210 info->rt_mask = 0x0;
3211
3212 info->lconstf.data = NULL;
3213 info->lconstf.ranges = NULL;
3214
3215 info->bumpenvmat_needed = 0;
3216
3217 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3218 tx->regs.rL[i] = ureg_dst_undef();
3219 }
3220 tx->regs.address = ureg_dst_undef();
3221 tx->regs.a0 = ureg_dst_undef();
3222 tx->regs.p = ureg_dst_undef();
3223 tx->regs.oDepth = ureg_dst_undef();
3224 tx->regs.vPos = ureg_src_undef();
3225 tx->regs.vFace = ureg_src_undef();
3226 for (i = 0; i < Elements(tx->regs.o); ++i)
3227 tx->regs.o[i] = ureg_dst_undef();
3228 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3229 tx->regs.oCol[i] = ureg_dst_undef();
3230 for (i = 0; i < Elements(tx->regs.vC); ++i)
3231 tx->regs.vC[i] = ureg_src_undef();
3232 for (i = 0; i < Elements(tx->regs.vT); ++i)
3233 tx->regs.vT[i] = ureg_src_undef();
3234
3235 for (i = 0; i < Elements(tx->lconsti); ++i)
3236 tx->lconsti[i].idx = -1;
3237 for (i = 0; i < Elements(tx->lconstb); ++i)
3238 tx->lconstb[i].idx = -1;
3239
3240 sm1_read_version(tx);
3241
3242 info->version = (tx->version.major << 4) | tx->version.minor;
3243
3244 create_op_info_map(tx);
3245 }
3246
3247 static void
3248 tx_dtor(struct shader_translator *tx)
3249 {
3250 if (tx->num_inst_labels)
3251 FREE(tx->inst_labels);
3252 FREE(tx->lconstf);
3253 FREE(tx->regs.r);
3254 FREE(tx);
3255 }
3256
3257 static void
3258 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3259 {
3260 struct ureg_program *ureg = tx->ureg;
3261 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3262 struct ureg_src fog_end, fog_coeff, fog_density;
3263 struct ureg_src fog_vs, depth, fog_color;
3264 struct ureg_dst fog_factor;
3265
3266 if (!tx->info->fog_enable) {
3267 ureg_MOV(ureg, oCol0, src_col);
3268 return;
3269 }
3270
3271 if (tx->info->fog_mode != D3DFOG_NONE) {
3272 depth = nine_get_position_input(tx);
3273 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3274 }
3275
3276 nine_info_mark_const_f_used(tx->info, 33);
3277 fog_color = NINE_CONSTANT_SRC(32);
3278 fog_factor = tx_scratch_scalar(tx);
3279
3280 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3281 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3282 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3283 ureg_SUB(ureg, fog_factor, fog_end, depth);
3284 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3285 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3286 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3287 ureg_MUL(ureg, fog_factor, depth, fog_density);
3288 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3289 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3290 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3291 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3292 ureg_MUL(ureg, fog_factor, depth, fog_density);
3293 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3294 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3295 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3296 } else {
3297 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3298 TGSI_INTERPOLATE_PERSPECTIVE),
3299 TGSI_SWIZZLE_X);
3300 ureg_MOV(ureg, fog_factor, fog_vs);
3301 }
3302
3303 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3304 tx_src_scalar(fog_factor), src_col, fog_color);
3305 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3306 }
3307
3308 #define GET_CAP(n) device->screen->get_param( \
3309 device->screen, PIPE_CAP_##n)
3310 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3311 device->screen, info->type, PIPE_SHADER_CAP_##n)
3312
3313 HRESULT
3314 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3315 {
3316 struct shader_translator *tx;
3317 HRESULT hr = D3D_OK;
3318 const unsigned processor = info->type;
3319 unsigned s, slot_max;
3320 unsigned max_const_f;
3321
3322 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3323
3324 tx = CALLOC_STRUCT(shader_translator);
3325 if (!tx)
3326 return E_OUTOFMEMORY;
3327 tx_ctor(tx, info);
3328
3329 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3330 hr = D3DERR_INVALIDCALL;
3331 DBG("Unsupported shader version: %u.%u !\n",
3332 tx->version.major, tx->version.minor);
3333 goto out;
3334 }
3335 if (tx->processor != processor) {
3336 hr = D3DERR_INVALIDCALL;
3337 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3338 goto out;
3339 }
3340 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3341 tx->version.major, tx->version.minor);
3342
3343 tx->ureg = ureg_create(processor);
3344 if (!tx->ureg) {
3345 hr = E_OUTOFMEMORY;
3346 goto out;
3347 }
3348
3349 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3350 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3351 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3352 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3353 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3354 tx->texcoord_sn = tx->want_texcoord ?
3355 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3356 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3357 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3358
3359 if (IS_VS) {
3360 tx->num_constf_allowed = NINE_MAX_CONST_F;
3361 } else if (tx->version.major < 2) {/* IS_PS v1 */
3362 tx->num_constf_allowed = 8;
3363 } else if (tx->version.major == 2) {/* IS_PS v2 */
3364 tx->num_constf_allowed = 32;
3365 } else {/* IS_PS v3 */
3366 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3367 }
3368
3369 if (tx->version.major < 2) {
3370 tx->num_consti_allowed = 0;
3371 tx->num_constb_allowed = 0;
3372 } else {
3373 tx->num_consti_allowed = NINE_MAX_CONST_I;
3374 tx->num_constb_allowed = NINE_MAX_CONST_B;
3375 }
3376
3377 /* VS must always write position. Declare it here to make it the 1st output.
3378 * (Some drivers like nv50 are buggy and rely on that.)
3379 */
3380 if (IS_VS) {
3381 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3382 } else {
3383 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3384 if (!tx->shift_wpos)
3385 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3386 }
3387
3388 while (!sm1_parse_eof(tx) && !tx->failure)
3389 sm1_parse_instruction(tx);
3390 tx->parse++; /* for byte_size */
3391
3392 if (tx->failure) {
3393 ERR("Encountered buggy shader\n");
3394 ureg_destroy(tx->ureg);
3395 hr = D3DERR_INVALIDCALL;
3396 goto out;
3397 }
3398
3399 if (IS_PS && tx->version.major < 3) {
3400 if (tx->version.major < 2) {
3401 assert(tx->num_temp); /* there must be color output */
3402 info->rt_mask |= 0x1;
3403 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3404 } else {
3405 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3406 }
3407 }
3408
3409 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3410 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3411 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3412 }
3413
3414 if (info->position_t)
3415 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3416
3417 ureg_END(tx->ureg);
3418
3419 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3420 info->point_size = TRUE;
3421
3422 /* record local constants */
3423 if (tx->num_lconstf && tx->indirect_const_access) {
3424 struct nine_range *ranges;
3425 float *data;
3426 int *indices;
3427 unsigned i, k, n;
3428
3429 hr = E_OUTOFMEMORY;
3430
3431 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3432 if (!data)
3433 goto out;
3434 info->lconstf.data = data;
3435
3436 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3437 if (!indices)
3438 goto out;
3439
3440 /* lazy sort, num_lconstf should be small */
3441 for (n = 0; n < tx->num_lconstf; ++n) {
3442 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3443 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3444 k = i;
3445 }
3446 indices[n] = tx->lconstf[k].idx;
3447 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3448 tx->lconstf[k].idx = INT_MAX;
3449 }
3450
3451 /* count ranges */
3452 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3453 if (indices[i] != indices[i - 1] + 1)
3454 ++n;
3455 ranges = MALLOC(n * sizeof(ranges[0]));
3456 if (!ranges) {
3457 FREE(indices);
3458 goto out;
3459 }
3460 info->lconstf.ranges = ranges;
3461
3462 k = 0;
3463 ranges[k].bgn = indices[0];
3464 for (i = 1; i < tx->num_lconstf; ++i) {
3465 if (indices[i] != indices[i - 1] + 1) {
3466 ranges[k].next = &ranges[k + 1];
3467 ranges[k].end = indices[i - 1] + 1;
3468 ++k;
3469 ranges[k].bgn = indices[i];
3470 }
3471 }
3472 ranges[k].end = indices[i - 1] + 1;
3473 ranges[k].next = NULL;
3474 assert(n == (k + 1));
3475
3476 FREE(indices);
3477 hr = D3D_OK;
3478 }
3479
3480 /* r500 */
3481 if (info->const_float_slots > device->max_vs_const_f &&
3482 (info->const_int_slots || info->const_bool_slots))
3483 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3484
3485
3486 if (tx->indirect_const_access) /* vs only */
3487 info->const_float_slots = device->max_vs_const_f;
3488
3489 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3490 slot_max = info->const_bool_slots > 0 ?
3491 max_const_f + NINE_MAX_CONST_I
3492 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3493 info->const_int_slots > 0 ?
3494 max_const_f + info->const_int_slots :
3495 info->const_float_slots;
3496
3497 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3498
3499 for (s = 0; s < slot_max; s++)
3500 ureg_DECL_constant(tx->ureg, s);
3501
3502 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3503 unsigned count;
3504 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3505 tgsi_dump(toks, 0);
3506 ureg_free_tokens(toks);
3507 }
3508
3509 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3510 if (!info->cso) {
3511 hr = D3DERR_DRIVERINTERNALERROR;
3512 FREE(info->lconstf.data);
3513 FREE(info->lconstf.ranges);
3514 goto out;
3515 }
3516
3517 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3518 out:
3519 tx_dtor(tx);
3520 return hr;
3521 }