a7a7da27903e17da95d16d92f86cb2c3aa0ba580
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
100
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
104
105 /*
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
112 * X2 = PS 1.4 (2x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
119 */
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
134
135 static const char *sm1_mod_str[] =
136 {
137 [NINED3DSPSM_NONE] = "",
138 [NINED3DSPSM_NEG] = "-",
139 [NINED3DSPSM_BIAS] = "bias",
140 [NINED3DSPSM_BIASNEG] = "biasneg",
141 [NINED3DSPSM_SIGN] = "sign",
142 [NINED3DSPSM_SIGNNEG] = "signneg",
143 [NINED3DSPSM_COMP] = "comp",
144 [NINED3DSPSM_X2] = "x2",
145 [NINED3DSPSM_X2NEG] = "x2neg",
146 [NINED3DSPSM_DZ] = "dz",
147 [NINED3DSPSM_DW] = "dw",
148 [NINED3DSPSM_ABS] = "abs",
149 [NINED3DSPSM_ABSNEG] = "-abs",
150 [NINED3DSPSM_NOT] = "not"
151 };
152
153 static void
154 sm1_dump_writemask(BYTE mask)
155 {
156 if (mask & 1) DUMP("x"); else DUMP("_");
157 if (mask & 2) DUMP("y"); else DUMP("_");
158 if (mask & 4) DUMP("z"); else DUMP("_");
159 if (mask & 8) DUMP("w"); else DUMP("_");
160 }
161
162 static void
163 sm1_dump_swizzle(BYTE s)
164 {
165 char c[4] = { 'x', 'y', 'z', 'w' };
166 DUMP("%c%c%c%c",
167 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
168 }
169
170 static const char sm1_file_char[] =
171 {
172 [D3DSPR_TEMP] = 'r',
173 [D3DSPR_INPUT] = 'v',
174 [D3DSPR_CONST] = 'c',
175 [D3DSPR_ADDR] = 'A',
176 [D3DSPR_RASTOUT] = 'R',
177 [D3DSPR_ATTROUT] = 'D',
178 [D3DSPR_OUTPUT] = 'o',
179 [D3DSPR_CONSTINT] = 'I',
180 [D3DSPR_COLOROUT] = 'C',
181 [D3DSPR_DEPTHOUT] = 'D',
182 [D3DSPR_SAMPLER] = 's',
183 [D3DSPR_CONST2] = 'c',
184 [D3DSPR_CONST3] = 'c',
185 [D3DSPR_CONST4] = 'c',
186 [D3DSPR_CONSTBOOL] = 'B',
187 [D3DSPR_LOOP] = 'L',
188 [D3DSPR_TEMPFLOAT16] = 'h',
189 [D3DSPR_MISCTYPE] = 'M',
190 [D3DSPR_LABEL] = 'X',
191 [D3DSPR_PREDICATE] = 'p'
192 };
193
194 static void
195 sm1_dump_reg(BYTE file, INT index)
196 {
197 switch (file) {
198 case D3DSPR_LOOP:
199 DUMP("aL");
200 break;
201 case D3DSPR_COLOROUT:
202 DUMP("oC%i", index);
203 break;
204 case D3DSPR_DEPTHOUT:
205 DUMP("oDepth");
206 break;
207 case D3DSPR_RASTOUT:
208 DUMP("oRast%i", index);
209 break;
210 case D3DSPR_CONSTINT:
211 DUMP("iconst[%i]", index);
212 break;
213 case D3DSPR_CONSTBOOL:
214 DUMP("bconst[%i]", index);
215 break;
216 default:
217 DUMP("%c%i", sm1_file_char[file], index);
218 break;
219 }
220 }
221
222 struct sm1_src_param
223 {
224 INT idx;
225 struct sm1_src_param *rel;
226 BYTE file;
227 BYTE swizzle;
228 BYTE mod;
229 BYTE type;
230 union {
231 DWORD d[4];
232 float f[4];
233 int i[4];
234 BOOL b;
235 } imm;
236 };
237 static void
238 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
240 struct sm1_dst_param
241 {
242 INT idx;
243 struct sm1_src_param *rel;
244 BYTE file;
245 BYTE mask;
246 BYTE mod;
247 int8_t shift; /* sint4 */
248 BYTE type;
249 };
250
251 static inline void
252 assert_replicate_swizzle(const struct ureg_src *reg)
253 {
254 assert(reg->SwizzleY == reg->SwizzleX &&
255 reg->SwizzleZ == reg->SwizzleX &&
256 reg->SwizzleW == reg->SwizzleX);
257 }
258
259 static void
260 sm1_dump_immediate(const struct sm1_src_param *param)
261 {
262 switch (param->type) {
263 case NINED3DSPTYPE_FLOAT4:
264 DUMP("{ %f %f %f %f }",
265 param->imm.f[0], param->imm.f[1],
266 param->imm.f[2], param->imm.f[3]);
267 break;
268 case NINED3DSPTYPE_INT4:
269 DUMP("{ %i %i %i %i }",
270 param->imm.i[0], param->imm.i[1],
271 param->imm.i[2], param->imm.i[3]);
272 break;
273 case NINED3DSPTYPE_BOOL:
274 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
275 break;
276 default:
277 assert(0);
278 break;
279 }
280 }
281
282 static void
283 sm1_dump_src_param(const struct sm1_src_param *param)
284 {
285 if (param->file == NINED3DSPR_IMMEDIATE) {
286 assert(!param->mod &&
287 !param->rel &&
288 param->swizzle == NINED3DSP_NOSWIZZLE);
289 sm1_dump_immediate(param);
290 return;
291 }
292
293 if (param->mod)
294 DUMP("%s(", sm1_mod_str[param->mod]);
295 if (param->rel) {
296 DUMP("%c[", sm1_file_char[param->file]);
297 sm1_dump_src_param(param->rel);
298 DUMP("+%i]", param->idx);
299 } else {
300 sm1_dump_reg(param->file, param->idx);
301 }
302 if (param->mod)
303 DUMP(")");
304 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
305 DUMP(".");
306 sm1_dump_swizzle(param->swizzle);
307 }
308 }
309
310 static void
311 sm1_dump_dst_param(const struct sm1_dst_param *param)
312 {
313 if (param->mod & NINED3DSPDM_SATURATE)
314 DUMP("sat ");
315 if (param->mod & NINED3DSPDM_PARTIALP)
316 DUMP("pp ");
317 if (param->mod & NINED3DSPDM_CENTROID)
318 DUMP("centroid ");
319 if (param->shift < 0)
320 DUMP("/%u ", 1 << -param->shift);
321 if (param->shift > 0)
322 DUMP("*%u ", 1 << param->shift);
323
324 if (param->rel) {
325 DUMP("%c[", sm1_file_char[param->file]);
326 sm1_dump_src_param(param->rel);
327 DUMP("+%i]", param->idx);
328 } else {
329 sm1_dump_reg(param->file, param->idx);
330 }
331 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
332 DUMP(".");
333 sm1_dump_writemask(param->mask);
334 }
335 }
336
337 struct sm1_semantic
338 {
339 struct sm1_dst_param reg;
340 BYTE sampler_type;
341 D3DDECLUSAGE usage;
342 BYTE usage_idx;
343 };
344
345 struct sm1_op_info
346 {
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
349 unsigned sio;
350 unsigned opcode; /* TGSI_OPCODE_x */
351
352 /* versions are still set even handler is set */
353 struct {
354 unsigned min;
355 unsigned max;
356 } vert_version, frag_version;
357
358 /* number of regs parsed outside of special handler */
359 unsigned ndst;
360 unsigned nsrc;
361
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler;
364 };
365
366 struct sm1_instruction
367 {
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
369 BYTE flags;
370 BOOL coissue;
371 BOOL predicated;
372 BYTE ndst;
373 BYTE nsrc;
374 struct sm1_src_param src[4];
375 struct sm1_src_param src_rel[4];
376 struct sm1_src_param pred;
377 struct sm1_src_param dst_rel[1];
378 struct sm1_dst_param dst[1];
379
380 struct sm1_op_info *info;
381 };
382
383 static void
384 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
385 {
386 unsigned i;
387
388 /* no info stored for these: */
389 if (insn->opcode == D3DSIO_DCL)
390 return;
391 for (i = 0; i < indent; ++i)
392 DUMP(" ");
393
394 if (insn->predicated) {
395 DUMP("@");
396 sm1_dump_src_param(&insn->pred);
397 DUMP(" ");
398 }
399 DUMP("%s", d3dsio_to_string(insn->opcode));
400 if (insn->flags) {
401 switch (insn->opcode) {
402 case D3DSIO_TEX:
403 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
404 break;
405 default:
406 DUMP("_%x", insn->flags);
407 break;
408 }
409 }
410 if (insn->coissue)
411 DUMP("_co");
412 DUMP(" ");
413
414 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
415 sm1_dump_dst_param(&insn->dst[i]);
416 DUMP(" ");
417 }
418
419 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
420 sm1_dump_src_param(&insn->src[i]);
421 DUMP(" ");
422 }
423 if (insn->opcode == D3DSIO_DEF ||
424 insn->opcode == D3DSIO_DEFI ||
425 insn->opcode == D3DSIO_DEFB)
426 sm1_dump_immediate(&insn->src[0]);
427
428 DUMP("\n");
429 }
430
431 struct sm1_local_const
432 {
433 INT idx;
434 struct ureg_src reg;
435 union {
436 boolean b;
437 float f[4];
438 int32_t i[4];
439 } imm;
440 };
441
442 struct shader_translator
443 {
444 const DWORD *byte_code;
445 const DWORD *parse;
446 const DWORD *parse_next;
447
448 struct ureg_program *ureg;
449
450 /* shader version */
451 struct {
452 BYTE major;
453 BYTE minor;
454 } version;
455 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
456 unsigned num_constf_allowed;
457 unsigned num_consti_allowed;
458 unsigned num_constb_allowed;
459
460 boolean native_integers;
461 boolean inline_subroutines;
462 boolean lower_preds;
463 boolean want_texcoord;
464 boolean shift_wpos;
465 boolean wpos_is_sysval;
466 boolean face_is_sysval_integer;
467 unsigned texcoord_sn;
468
469 struct sm1_instruction insn; /* current instruction */
470
471 struct {
472 struct ureg_dst *r;
473 struct ureg_dst oPos;
474 struct ureg_dst oFog;
475 struct ureg_dst oPts;
476 struct ureg_dst oCol[4];
477 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
478 struct ureg_dst oDepth;
479 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
480 struct ureg_src vPos;
481 struct ureg_src vFace;
482 struct ureg_src s;
483 struct ureg_dst p;
484 struct ureg_dst address;
485 struct ureg_dst a0;
486 struct ureg_dst tS[8]; /* texture stage registers */
487 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
488 struct ureg_dst t[5]; /* scratch TEMPs */
489 struct ureg_src vC[2]; /* PS color in */
490 struct ureg_src vT[8]; /* PS texcoord in */
491 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
492 } regs;
493 unsigned num_temp; /* Elements(regs.r) */
494 unsigned num_scratch;
495 unsigned loop_depth;
496 unsigned loop_depth_max;
497 unsigned cond_depth;
498 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
499 unsigned cond_labels[NINE_MAX_COND_DEPTH];
500 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
501
502 unsigned *inst_labels; /* LABEL op */
503 unsigned num_inst_labels;
504
505 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
506
507 struct sm1_local_const *lconstf;
508 unsigned num_lconstf;
509 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
510 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
511
512 boolean indirect_const_access;
513 boolean failure;
514
515 struct nine_shader_info *info;
516
517 int16_t op_info_map[D3DSIO_BREAKP + 1];
518 };
519
520 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
521 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
522
523 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
524
525 static void
526 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
527
528 static void
529 sm1_instruction_check(const struct sm1_instruction *insn)
530 {
531 if (insn->opcode == D3DSIO_CRS)
532 {
533 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
534 {
535 DBG("CRS.mask.w\n");
536 }
537 }
538 }
539
540 static boolean
541 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
542 {
543 INT i;
544 if (index < 0 || index >= tx->num_constf_allowed) {
545 tx->failure = TRUE;
546 return FALSE;
547 }
548 for (i = 0; i < tx->num_lconstf; ++i) {
549 if (tx->lconstf[i].idx == index) {
550 *src = tx->lconstf[i].reg;
551 return TRUE;
552 }
553 }
554 return FALSE;
555 }
556 static boolean
557 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
558 {
559 if (index < 0 || index >= tx->num_consti_allowed) {
560 tx->failure = TRUE;
561 return FALSE;
562 }
563 if (tx->lconsti[index].idx == index)
564 *src = tx->lconsti[index].reg;
565 return tx->lconsti[index].idx == index;
566 }
567 static boolean
568 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
569 {
570 if (index < 0 || index >= tx->num_constb_allowed) {
571 tx->failure = TRUE;
572 return FALSE;
573 }
574 if (tx->lconstb[index].idx == index)
575 *src = tx->lconstb[index].reg;
576 return tx->lconstb[index].idx == index;
577 }
578
579 static void
580 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
581 {
582 unsigned n;
583
584 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
585
586 for (n = 0; n < tx->num_lconstf; ++n)
587 if (tx->lconstf[n].idx == index)
588 break;
589 if (n == tx->num_lconstf) {
590 if ((n % 8) == 0) {
591 tx->lconstf = REALLOC(tx->lconstf,
592 (n + 0) * sizeof(tx->lconstf[0]),
593 (n + 8) * sizeof(tx->lconstf[0]));
594 assert(tx->lconstf);
595 }
596 tx->num_lconstf++;
597 }
598 tx->lconstf[n].idx = index;
599 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
600
601 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
602 }
603 static void
604 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
605 {
606 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
607 tx->lconsti[index].idx = index;
608 tx->lconsti[index].reg = tx->native_integers ?
609 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
610 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
611 }
612 static void
613 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
614 {
615 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
616 tx->lconstb[index].idx = index;
617 tx->lconstb[index].reg = tx->native_integers ?
618 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
619 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
620 }
621
622 static inline struct ureg_dst
623 tx_scratch(struct shader_translator *tx)
624 {
625 if (tx->num_scratch >= Elements(tx->regs.t)) {
626 tx->failure = TRUE;
627 return tx->regs.t[0];
628 }
629 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
630 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
631 return tx->regs.t[tx->num_scratch++];
632 }
633
634 static inline struct ureg_dst
635 tx_scratch_scalar(struct shader_translator *tx)
636 {
637 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
638 }
639
640 static inline struct ureg_src
641 tx_src_scalar(struct ureg_dst dst)
642 {
643 struct ureg_src src = ureg_src(dst);
644 int c = ffs(dst.WriteMask) - 1;
645 if (dst.WriteMask == (1 << c))
646 src = ureg_scalar(src, c);
647 return src;
648 }
649
650 static inline void
651 tx_temp_alloc(struct shader_translator *tx, INT idx)
652 {
653 assert(idx >= 0);
654 if (idx >= tx->num_temp) {
655 unsigned k = tx->num_temp;
656 unsigned n = idx + 1;
657 tx->regs.r = REALLOC(tx->regs.r,
658 k * sizeof(tx->regs.r[0]),
659 n * sizeof(tx->regs.r[0]));
660 for (; k < n; ++k)
661 tx->regs.r[k] = ureg_dst_undef();
662 tx->num_temp = n;
663 }
664 if (ureg_dst_is_undef(tx->regs.r[idx]))
665 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
666 }
667
668 static inline void
669 tx_addr_alloc(struct shader_translator *tx, INT idx)
670 {
671 assert(idx == 0);
672 if (ureg_dst_is_undef(tx->regs.address))
673 tx->regs.address = ureg_DECL_address(tx->ureg);
674 if (ureg_dst_is_undef(tx->regs.a0))
675 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
676 }
677
678 static inline void
679 tx_pred_alloc(struct shader_translator *tx, INT idx)
680 {
681 assert(idx == 0);
682 if (ureg_dst_is_undef(tx->regs.p))
683 tx->regs.p = ureg_DECL_predicate(tx->ureg);
684 }
685
686 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
687 * the projection should be applied on the texture. It doesn't
688 * apply on texkill.
689 * The doc is very imprecise here (it says the projection is done
690 * before rasterization, thus in vs, which seems wrong since ps instructions
691 * are affected differently)
692 * For now we only apply to the ps TEX instruction and TEXBEM.
693 * Perhaps some other instructions would need it */
694 static inline void
695 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
696 struct ureg_src src, INT idx)
697 {
698 struct ureg_dst tmp;
699 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
700
701 /* no projection */
702 if (dim == 1) {
703 ureg_MOV(tx->ureg, dst, src);
704 } else {
705 tmp = tx_scratch_scalar(tx);
706 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
707 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
708 }
709 }
710
711 static inline void
712 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
713 unsigned target, struct ureg_src src0,
714 struct ureg_src src1, INT idx)
715 {
716 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
717 struct ureg_dst tmp;
718
719 /* dim == 1: no projection
720 * Looks like must be disabled when it makes no
721 * sense according the texture dimensions
722 */
723 if (dim == 1 || dim <= target) {
724 ureg_TEX(tx->ureg, dst, target, src0, src1);
725 } else if (dim == 4) {
726 ureg_TXP(tx->ureg, dst, target, src0, src1);
727 } else {
728 tmp = tx_scratch(tx);
729 apply_ps1x_projection(tx, tmp, src0, idx);
730 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
731 }
732 }
733
734 static inline void
735 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
736 {
737 assert(IS_PS);
738 assert(idx >= 0 && idx < Elements(tx->regs.vT));
739 if (ureg_src_is_undef(tx->regs.vT[idx]))
740 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
741 TGSI_INTERPOLATE_PERSPECTIVE);
742 }
743
744 static inline unsigned *
745 tx_bgnloop(struct shader_translator *tx)
746 {
747 tx->loop_depth++;
748 if (tx->loop_depth_max < tx->loop_depth)
749 tx->loop_depth_max = tx->loop_depth;
750 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
751 return &tx->loop_labels[tx->loop_depth - 1];
752 }
753
754 static inline unsigned *
755 tx_endloop(struct shader_translator *tx)
756 {
757 assert(tx->loop_depth);
758 tx->loop_depth--;
759 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
760 ureg_get_instruction_number(tx->ureg));
761 return &tx->loop_labels[tx->loop_depth];
762 }
763
764 static struct ureg_dst
765 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
766 {
767 const unsigned l = tx->loop_depth - 1;
768
769 if (!tx->loop_depth)
770 {
771 DBG("loop counter requested outside of loop\n");
772 return ureg_dst_undef();
773 }
774
775 if (ureg_dst_is_undef(tx->regs.rL[l])) {
776 /* loop or rep ctr creation */
777 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
778 tx->loop_or_rep[l] = loop_or_rep;
779 }
780 /* loop - rep - endloop - endrep not allowed */
781 assert(tx->loop_or_rep[l] == loop_or_rep);
782
783 return tx->regs.rL[l];
784 }
785
786 static struct ureg_src
787 tx_get_loopal(struct shader_translator *tx)
788 {
789 int loop_level = tx->loop_depth - 1;
790
791 while (loop_level >= 0) {
792 /* handle loop - rep - endrep - endloop case */
793 if (tx->loop_or_rep[loop_level])
794 /* the value is in the loop counter y component (nine implementation) */
795 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
796 loop_level--;
797 }
798
799 DBG("aL counter requested outside of loop\n");
800 return ureg_src_undef();
801 }
802
803 static inline unsigned *
804 tx_cond(struct shader_translator *tx)
805 {
806 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
807 tx->cond_depth++;
808 return &tx->cond_labels[tx->cond_depth - 1];
809 }
810
811 static inline unsigned *
812 tx_elsecond(struct shader_translator *tx)
813 {
814 assert(tx->cond_depth);
815 return &tx->cond_labels[tx->cond_depth - 1];
816 }
817
818 static inline void
819 tx_endcond(struct shader_translator *tx)
820 {
821 assert(tx->cond_depth);
822 tx->cond_depth--;
823 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
824 ureg_get_instruction_number(tx->ureg));
825 }
826
827 static inline struct ureg_dst
828 nine_ureg_dst_register(unsigned file, int index)
829 {
830 return ureg_dst(ureg_src_register(file, index));
831 }
832
833 static struct ureg_src
834 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
835 {
836 struct ureg_program *ureg = tx->ureg;
837 struct ureg_src src;
838 struct ureg_dst tmp;
839
840 switch (param->file)
841 {
842 case D3DSPR_TEMP:
843 assert(!param->rel);
844 tx_temp_alloc(tx, param->idx);
845 src = ureg_src(tx->regs.r[param->idx]);
846 break;
847 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
848 case D3DSPR_ADDR:
849 assert(!param->rel);
850 if (IS_VS) {
851 assert(param->idx == 0);
852 /* the address register (vs only) must be
853 * assigned before use */
854 assert(!ureg_dst_is_undef(tx->regs.a0));
855 /* Round to lowest for vs1.1 (contrary to the doc), else
856 * round to nearest */
857 if (tx->version.major < 2 && tx->version.minor < 2)
858 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
859 else
860 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
861 src = ureg_src(tx->regs.address);
862 } else {
863 if (tx->version.major < 2 && tx->version.minor < 4) {
864 /* no subroutines, so should be defined */
865 src = ureg_src(tx->regs.tS[param->idx]);
866 } else {
867 tx_texcoord_alloc(tx, param->idx);
868 src = tx->regs.vT[param->idx];
869 }
870 }
871 break;
872 case D3DSPR_INPUT:
873 if (IS_VS) {
874 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
875 } else {
876 if (tx->version.major < 3) {
877 assert(!param->rel);
878 src = ureg_DECL_fs_input_cyl_centroid(
879 ureg, TGSI_SEMANTIC_COLOR, param->idx,
880 TGSI_INTERPOLATE_COLOR, 0,
881 tx->info->force_color_in_centroid ?
882 TGSI_INTERPOLATE_LOC_CENTROID : 0,
883 0, 1);
884 } else {
885 assert(!param->rel); /* TODO */
886 assert(param->idx < Elements(tx->regs.v));
887 src = tx->regs.v[param->idx];
888 }
889 }
890 break;
891 case D3DSPR_PREDICATE:
892 assert(!param->rel);
893 tx_pred_alloc(tx, param->idx);
894 src = ureg_src(tx->regs.p);
895 break;
896 case D3DSPR_SAMPLER:
897 assert(param->mod == NINED3DSPSM_NONE);
898 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
899 assert(!param->rel);
900 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
901 break;
902 case D3DSPR_CONST:
903 assert(!param->rel || IS_VS);
904 if (param->rel)
905 tx->indirect_const_access = TRUE;
906 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
907 if (!param->rel)
908 nine_info_mark_const_f_used(tx->info, param->idx);
909 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
910 }
911 if (!IS_VS && tx->version.major < 2) {
912 /* ps 1.X clamps constants */
913 tmp = tx_scratch(tx);
914 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
915 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
916 src = ureg_src(tmp);
917 }
918 break;
919 case D3DSPR_CONST2:
920 case D3DSPR_CONST3:
921 case D3DSPR_CONST4:
922 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
923 assert(!"CONST2/3/4");
924 src = ureg_imm1f(ureg, 0.0f);
925 break;
926 case D3DSPR_CONSTINT:
927 /* relative adressing only possible for float constants in vs */
928 assert(!param->rel);
929 if (!tx_lconsti(tx, &src, param->idx)) {
930 nine_info_mark_const_i_used(tx->info, param->idx);
931 src = ureg_src_register(TGSI_FILE_CONSTANT,
932 tx->info->const_i_base + param->idx);
933 }
934 break;
935 case D3DSPR_CONSTBOOL:
936 assert(!param->rel);
937 if (!tx_lconstb(tx, &src, param->idx)) {
938 char r = param->idx / 4;
939 char s = param->idx & 3;
940 nine_info_mark_const_b_used(tx->info, param->idx);
941 src = ureg_src_register(TGSI_FILE_CONSTANT,
942 tx->info->const_b_base + r);
943 src = ureg_swizzle(src, s, s, s, s);
944 }
945 break;
946 case D3DSPR_LOOP:
947 if (ureg_dst_is_undef(tx->regs.address))
948 tx->regs.address = ureg_DECL_address(ureg);
949 if (!tx->native_integers)
950 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
951 else
952 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
953 src = ureg_src(tx->regs.address);
954 break;
955 case D3DSPR_MISCTYPE:
956 switch (param->idx) {
957 case D3DSMO_POSITION:
958 if (ureg_src_is_undef(tx->regs.vPos)) {
959 if (tx->wpos_is_sysval) {
960 tx->regs.vPos =
961 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
962 } else {
963 tx->regs.vPos =
964 ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
965 TGSI_INTERPOLATE_LINEAR);
966 }
967 }
968 if (tx->shift_wpos) {
969 /* TODO: do this only once */
970 struct ureg_dst wpos = tx_scratch(tx);
971 ureg_SUB(ureg, wpos, tx->regs.vPos,
972 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
973 src = ureg_src(wpos);
974 } else {
975 src = tx->regs.vPos;
976 }
977 break;
978 case D3DSMO_FACE:
979 if (ureg_src_is_undef(tx->regs.vFace)) {
980 if (tx->face_is_sysval_integer) {
981 tmp = tx_scratch(tx);
982 tx->regs.vFace =
983 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
984
985 /* convert bool to float */
986 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
987 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
988 tx->regs.vFace = ureg_src(tmp);
989 } else {
990 tx->regs.vFace = ureg_DECL_fs_input(ureg,
991 TGSI_SEMANTIC_FACE, 0,
992 TGSI_INTERPOLATE_CONSTANT);
993 }
994 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
995 }
996 src = tx->regs.vFace;
997 break;
998 default:
999 assert(!"invalid src D3DSMO");
1000 break;
1001 }
1002 assert(!param->rel);
1003 break;
1004 case D3DSPR_TEMPFLOAT16:
1005 break;
1006 default:
1007 assert(!"invalid src D3DSPR");
1008 }
1009 if (param->rel)
1010 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1011
1012 switch (param->mod) {
1013 case NINED3DSPSM_DW:
1014 tmp = tx_scratch(tx);
1015 /* NOTE: app is not allowed to read w with this modifier */
1016 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1017 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1018 src = ureg_src(tmp);
1019 break;
1020 case NINED3DSPSM_DZ:
1021 tmp = tx_scratch(tx);
1022 /* NOTE: app is not allowed to read z with this modifier */
1023 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1024 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1025 src = ureg_src(tmp);
1026 break;
1027 default:
1028 break;
1029 }
1030
1031 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1032 src = ureg_swizzle(src,
1033 (param->swizzle >> 0) & 0x3,
1034 (param->swizzle >> 2) & 0x3,
1035 (param->swizzle >> 4) & 0x3,
1036 (param->swizzle >> 6) & 0x3);
1037
1038 switch (param->mod) {
1039 case NINED3DSPSM_ABS:
1040 src = ureg_abs(src);
1041 break;
1042 case NINED3DSPSM_ABSNEG:
1043 src = ureg_negate(ureg_abs(src));
1044 break;
1045 case NINED3DSPSM_NEG:
1046 src = ureg_negate(src);
1047 break;
1048 case NINED3DSPSM_BIAS:
1049 tmp = tx_scratch(tx);
1050 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
1051 src = ureg_src(tmp);
1052 break;
1053 case NINED3DSPSM_BIASNEG:
1054 tmp = tx_scratch(tx);
1055 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
1056 src = ureg_src(tmp);
1057 break;
1058 case NINED3DSPSM_NOT:
1059 if (tx->native_integers) {
1060 tmp = tx_scratch(tx);
1061 ureg_NOT(ureg, tmp, src);
1062 src = ureg_src(tmp);
1063 break;
1064 }
1065 /* fall through */
1066 case NINED3DSPSM_COMP:
1067 tmp = tx_scratch(tx);
1068 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1069 src = ureg_src(tmp);
1070 break;
1071 case NINED3DSPSM_DZ:
1072 case NINED3DSPSM_DW:
1073 /* Already handled*/
1074 break;
1075 case NINED3DSPSM_SIGN:
1076 tmp = tx_scratch(tx);
1077 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1078 src = ureg_src(tmp);
1079 break;
1080 case NINED3DSPSM_SIGNNEG:
1081 tmp = tx_scratch(tx);
1082 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1083 src = ureg_src(tmp);
1084 break;
1085 case NINED3DSPSM_X2:
1086 tmp = tx_scratch(tx);
1087 ureg_ADD(ureg, tmp, src, src);
1088 src = ureg_src(tmp);
1089 break;
1090 case NINED3DSPSM_X2NEG:
1091 tmp = tx_scratch(tx);
1092 ureg_ADD(ureg, tmp, src, src);
1093 src = ureg_negate(ureg_src(tmp));
1094 break;
1095 default:
1096 assert(param->mod == NINED3DSPSM_NONE);
1097 break;
1098 }
1099
1100 return src;
1101 }
1102
1103 static struct ureg_dst
1104 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1105 {
1106 struct ureg_dst dst;
1107
1108 switch (param->file)
1109 {
1110 case D3DSPR_TEMP:
1111 assert(!param->rel);
1112 tx_temp_alloc(tx, param->idx);
1113 dst = tx->regs.r[param->idx];
1114 break;
1115 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1116 case D3DSPR_ADDR:
1117 assert(!param->rel);
1118 if (tx->version.major < 2 && !IS_VS) {
1119 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1120 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1121 dst = tx->regs.tS[param->idx];
1122 } else
1123 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1124 tx_texcoord_alloc(tx, param->idx);
1125 dst = ureg_dst(tx->regs.vT[param->idx]);
1126 } else {
1127 tx_addr_alloc(tx, param->idx);
1128 dst = tx->regs.a0;
1129 }
1130 break;
1131 case D3DSPR_RASTOUT:
1132 assert(!param->rel);
1133 switch (param->idx) {
1134 case 0:
1135 if (ureg_dst_is_undef(tx->regs.oPos))
1136 tx->regs.oPos =
1137 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1138 dst = tx->regs.oPos;
1139 break;
1140 case 1:
1141 if (ureg_dst_is_undef(tx->regs.oFog))
1142 tx->regs.oFog =
1143 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1144 dst = tx->regs.oFog;
1145 break;
1146 case 2:
1147 if (ureg_dst_is_undef(tx->regs.oPts))
1148 tx->regs.oPts =
1149 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1150 dst = tx->regs.oPts;
1151 break;
1152 default:
1153 assert(0);
1154 break;
1155 }
1156 break;
1157 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1158 case D3DSPR_OUTPUT:
1159 if (tx->version.major < 3) {
1160 assert(!param->rel);
1161 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1162 } else {
1163 assert(!param->rel); /* TODO */
1164 assert(param->idx < Elements(tx->regs.o));
1165 dst = tx->regs.o[param->idx];
1166 }
1167 break;
1168 case D3DSPR_ATTROUT: /* VS */
1169 case D3DSPR_COLOROUT: /* PS */
1170 assert(param->idx >= 0 && param->idx < 4);
1171 assert(!param->rel);
1172 tx->info->rt_mask |= 1 << param->idx;
1173 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1174 /* ps < 3: oCol[0] will have fog blending afterward */
1175 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1176 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1177 } else {
1178 tx->regs.oCol[param->idx] =
1179 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1180 }
1181 }
1182 dst = tx->regs.oCol[param->idx];
1183 if (IS_VS && tx->version.major < 3)
1184 dst = ureg_saturate(dst);
1185 break;
1186 case D3DSPR_DEPTHOUT:
1187 assert(!param->rel);
1188 if (ureg_dst_is_undef(tx->regs.oDepth))
1189 tx->regs.oDepth =
1190 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1191 TGSI_WRITEMASK_Z, 0, 1);
1192 dst = tx->regs.oDepth; /* XXX: must write .z component */
1193 break;
1194 case D3DSPR_PREDICATE:
1195 assert(!param->rel);
1196 tx_pred_alloc(tx, param->idx);
1197 dst = tx->regs.p;
1198 break;
1199 case D3DSPR_TEMPFLOAT16:
1200 DBG("unhandled D3DSPR: %u\n", param->file);
1201 break;
1202 default:
1203 assert(!"invalid dst D3DSPR");
1204 break;
1205 }
1206 if (param->rel)
1207 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1208
1209 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1210 dst = ureg_writemask(dst, param->mask);
1211 if (param->mod & NINED3DSPDM_SATURATE)
1212 dst = ureg_saturate(dst);
1213
1214 return dst;
1215 }
1216
1217 static struct ureg_dst
1218 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1219 {
1220 if (param->shift) {
1221 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1222 return tx->regs.tdst;
1223 }
1224 return _tx_dst_param(tx, param);
1225 }
1226
1227 static void
1228 tx_apply_dst0_modifiers(struct shader_translator *tx)
1229 {
1230 struct ureg_dst rdst;
1231 float f;
1232
1233 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1234 return;
1235 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1236
1237 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1238
1239 if (tx->insn.dst[0].shift < 0)
1240 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1241 else
1242 f = 1 << tx->insn.dst[0].shift;
1243
1244 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1245 }
1246
1247 static struct ureg_src
1248 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1249 {
1250 struct ureg_src src;
1251
1252 assert(!param->shift);
1253 assert(!(param->mod & NINED3DSPDM_SATURATE));
1254
1255 switch (param->file) {
1256 case D3DSPR_INPUT:
1257 if (IS_VS) {
1258 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1259 } else {
1260 assert(!param->rel);
1261 assert(param->idx < Elements(tx->regs.v));
1262 src = tx->regs.v[param->idx];
1263 }
1264 break;
1265 default:
1266 src = ureg_src(tx_dst_param(tx, param));
1267 break;
1268 }
1269 if (param->rel)
1270 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1271
1272 if (!param->mask)
1273 WARN("mask is 0, using identity swizzle\n");
1274
1275 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1276 char s[4];
1277 int n;
1278 int c;
1279 for (n = 0, c = 0; c < 4; ++c)
1280 if (param->mask & (1 << c))
1281 s[n++] = c;
1282 assert(n);
1283 for (c = n; c < 4; ++c)
1284 s[c] = s[n - 1];
1285 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1286 }
1287 return src;
1288 }
1289
1290 static HRESULT
1291 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1292 {
1293 struct ureg_program *ureg = tx->ureg;
1294 struct ureg_dst dst;
1295 struct ureg_src src[2];
1296 struct sm1_src_param *src_mat = &tx->insn.src[1];
1297 unsigned i;
1298
1299 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1300 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1301
1302 for (i = 0; i < n; i++)
1303 {
1304 const unsigned m = (1 << i);
1305
1306 src[1] = tx_src_param(tx, src_mat);
1307 src_mat->idx++;
1308
1309 if (!(dst.WriteMask & m))
1310 continue;
1311
1312 /* XXX: src == dst case ? */
1313
1314 switch (k) {
1315 case 3:
1316 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1317 break;
1318 case 4:
1319 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1320 break;
1321 default:
1322 DBG("invalid operation: M%ux%u\n", m, n);
1323 break;
1324 }
1325 }
1326
1327 return D3D_OK;
1328 }
1329
1330 #define VNOTSUPPORTED 0, 0
1331 #define V(maj, min) (((maj) << 8) | (min))
1332
1333 static inline const char *
1334 d3dsio_to_string( unsigned opcode )
1335 {
1336 static const char *names[] = {
1337 "NOP",
1338 "MOV",
1339 "ADD",
1340 "SUB",
1341 "MAD",
1342 "MUL",
1343 "RCP",
1344 "RSQ",
1345 "DP3",
1346 "DP4",
1347 "MIN",
1348 "MAX",
1349 "SLT",
1350 "SGE",
1351 "EXP",
1352 "LOG",
1353 "LIT",
1354 "DST",
1355 "LRP",
1356 "FRC",
1357 "M4x4",
1358 "M4x3",
1359 "M3x4",
1360 "M3x3",
1361 "M3x2",
1362 "CALL",
1363 "CALLNZ",
1364 "LOOP",
1365 "RET",
1366 "ENDLOOP",
1367 "LABEL",
1368 "DCL",
1369 "POW",
1370 "CRS",
1371 "SGN",
1372 "ABS",
1373 "NRM",
1374 "SINCOS",
1375 "REP",
1376 "ENDREP",
1377 "IF",
1378 "IFC",
1379 "ELSE",
1380 "ENDIF",
1381 "BREAK",
1382 "BREAKC",
1383 "MOVA",
1384 "DEFB",
1385 "DEFI",
1386 NULL,
1387 NULL,
1388 NULL,
1389 NULL,
1390 NULL,
1391 NULL,
1392 NULL,
1393 NULL,
1394 NULL,
1395 NULL,
1396 NULL,
1397 NULL,
1398 NULL,
1399 NULL,
1400 NULL,
1401 "TEXCOORD",
1402 "TEXKILL",
1403 "TEX",
1404 "TEXBEM",
1405 "TEXBEML",
1406 "TEXREG2AR",
1407 "TEXREG2GB",
1408 "TEXM3x2PAD",
1409 "TEXM3x2TEX",
1410 "TEXM3x3PAD",
1411 "TEXM3x3TEX",
1412 NULL,
1413 "TEXM3x3SPEC",
1414 "TEXM3x3VSPEC",
1415 "EXPP",
1416 "LOGP",
1417 "CND",
1418 "DEF",
1419 "TEXREG2RGB",
1420 "TEXDP3TEX",
1421 "TEXM3x2DEPTH",
1422 "TEXDP3",
1423 "TEXM3x3",
1424 "TEXDEPTH",
1425 "CMP",
1426 "BEM",
1427 "DP2ADD",
1428 "DSX",
1429 "DSY",
1430 "TEXLDD",
1431 "SETP",
1432 "TEXLDL",
1433 "BREAKP"
1434 };
1435
1436 if (opcode < Elements(names)) return names[opcode];
1437
1438 switch (opcode) {
1439 case D3DSIO_PHASE: return "PHASE";
1440 case D3DSIO_COMMENT: return "COMMENT";
1441 case D3DSIO_END: return "END";
1442 default:
1443 return NULL;
1444 }
1445 }
1446
1447 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1448 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1449 (inst).vert_version.max | \
1450 (inst).frag_version.min | \
1451 (inst).frag_version.max)
1452
1453 #define SPECIAL(name) \
1454 NineTranslateInstruction_##name
1455
1456 #define DECL_SPECIAL(name) \
1457 static HRESULT \
1458 NineTranslateInstruction_##name( struct shader_translator *tx )
1459
1460 static HRESULT
1461 NineTranslateInstruction_Generic(struct shader_translator *);
1462
1463 DECL_SPECIAL(M4x4)
1464 {
1465 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1466 }
1467
1468 DECL_SPECIAL(M4x3)
1469 {
1470 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1471 }
1472
1473 DECL_SPECIAL(M3x4)
1474 {
1475 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1476 }
1477
1478 DECL_SPECIAL(M3x3)
1479 {
1480 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1481 }
1482
1483 DECL_SPECIAL(M3x2)
1484 {
1485 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1486 }
1487
1488 DECL_SPECIAL(CMP)
1489 {
1490 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1491 tx_src_param(tx, &tx->insn.src[0]),
1492 tx_src_param(tx, &tx->insn.src[2]),
1493 tx_src_param(tx, &tx->insn.src[1]));
1494 return D3D_OK;
1495 }
1496
1497 DECL_SPECIAL(CND)
1498 {
1499 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1500 struct ureg_dst cgt;
1501 struct ureg_src cnd;
1502
1503 /* the coissue flag was a tip for compilers to advise to
1504 * execute two operations at the same time, in cases
1505 * the two executions had same dst with different channels.
1506 * It has no effect on current hw. However it seems CND
1507 * is affected. The handling of this very specific case
1508 * handled below mimick wine behaviour */
1509 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1510 ureg_MOV(tx->ureg,
1511 dst, tx_src_param(tx, &tx->insn.src[1]));
1512 return D3D_OK;
1513 }
1514
1515 cnd = tx_src_param(tx, &tx->insn.src[0]);
1516 cgt = tx_scratch(tx);
1517
1518 if (tx->version.major == 1 && tx->version.minor < 4)
1519 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1520
1521 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1522
1523 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1524 tx_src_param(tx, &tx->insn.src[1]),
1525 tx_src_param(tx, &tx->insn.src[2]));
1526 return D3D_OK;
1527 }
1528
1529 DECL_SPECIAL(CALL)
1530 {
1531 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1532 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1533 return D3D_OK;
1534 }
1535
1536 DECL_SPECIAL(CALLNZ)
1537 {
1538 struct ureg_program *ureg = tx->ureg;
1539 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1540
1541 if (!tx->native_integers)
1542 ureg_IF(ureg, src, tx_cond(tx));
1543 else
1544 ureg_UIF(ureg, src, tx_cond(tx));
1545 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1546 tx_endcond(tx);
1547 ureg_ENDIF(ureg);
1548 return D3D_OK;
1549 }
1550
1551 DECL_SPECIAL(LOOP)
1552 {
1553 struct ureg_program *ureg = tx->ureg;
1554 unsigned *label;
1555 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1556 struct ureg_dst ctr;
1557 struct ureg_dst tmp;
1558 struct ureg_src ctrx;
1559
1560 label = tx_bgnloop(tx);
1561 ctr = tx_get_loopctr(tx, TRUE);
1562 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1563
1564 /* src: num_iterations - start_value of al - step for al - 0 */
1565 ureg_MOV(ureg, ctr, src);
1566 ureg_BGNLOOP(tx->ureg, label);
1567 tmp = tx_scratch_scalar(tx);
1568 /* Initially ctr.x contains the number of iterations.
1569 * ctr.y will contain the updated value of al.
1570 * We decrease ctr.x at the end of every iteration,
1571 * and stop when it reaches 0. */
1572
1573 if (!tx->native_integers) {
1574 /* case src and ctr contain floats */
1575 /* to avoid precision issue, we stop when ctr <= 0.5 */
1576 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1577 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1578 } else {
1579 /* case src and ctr contain integers */
1580 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1581 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1582 }
1583 ureg_BRK(ureg);
1584 tx_endcond(tx);
1585 ureg_ENDIF(ureg);
1586 return D3D_OK;
1587 }
1588
1589 DECL_SPECIAL(RET)
1590 {
1591 ureg_RET(tx->ureg);
1592 return D3D_OK;
1593 }
1594
1595 DECL_SPECIAL(ENDLOOP)
1596 {
1597 struct ureg_program *ureg = tx->ureg;
1598 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1599 struct ureg_dst dst_ctrx, dst_al;
1600 struct ureg_src src_ctr, al_counter;
1601
1602 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1603 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1604 src_ctr = ureg_src(ctr);
1605 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1606
1607 /* ctr.x -= 1
1608 * ctr.y (aL) += step */
1609 if (!tx->native_integers) {
1610 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1611 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1612 } else {
1613 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1614 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1615 }
1616 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1617 return D3D_OK;
1618 }
1619
1620 DECL_SPECIAL(LABEL)
1621 {
1622 unsigned k = tx->num_inst_labels;
1623 unsigned n = tx->insn.src[0].idx;
1624 assert(n < 2048);
1625 if (n >= k)
1626 tx->inst_labels = REALLOC(tx->inst_labels,
1627 k * sizeof(tx->inst_labels[0]),
1628 n * sizeof(tx->inst_labels[0]));
1629
1630 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1631 return D3D_OK;
1632 }
1633
1634 DECL_SPECIAL(SINCOS)
1635 {
1636 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1637 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1638
1639 assert(!(dst.WriteMask & 0xc));
1640
1641 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1642 ureg_SCS(tx->ureg, dst, src);
1643 return D3D_OK;
1644 }
1645
1646 DECL_SPECIAL(SGN)
1647 {
1648 ureg_SSG(tx->ureg,
1649 tx_dst_param(tx, &tx->insn.dst[0]),
1650 tx_src_param(tx, &tx->insn.src[0]));
1651 return D3D_OK;
1652 }
1653
1654 DECL_SPECIAL(REP)
1655 {
1656 struct ureg_program *ureg = tx->ureg;
1657 unsigned *label;
1658 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1659 struct ureg_dst ctr;
1660 struct ureg_dst tmp;
1661 struct ureg_src ctrx;
1662
1663 label = tx_bgnloop(tx);
1664 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1665 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1666
1667 /* NOTE: rep must be constant, so we don't have to save the count */
1668 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1669
1670 /* rep: num_iterations - 0 - 0 - 0 */
1671 ureg_MOV(ureg, ctr, rep);
1672 ureg_BGNLOOP(ureg, label);
1673 tmp = tx_scratch_scalar(tx);
1674 /* Initially ctr.x contains the number of iterations.
1675 * We decrease ctr.x at the end of every iteration,
1676 * and stop when it reaches 0. */
1677
1678 if (!tx->native_integers) {
1679 /* case src and ctr contain floats */
1680 /* to avoid precision issue, we stop when ctr <= 0.5 */
1681 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1682 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1683 } else {
1684 /* case src and ctr contain integers */
1685 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1686 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1687 }
1688 ureg_BRK(ureg);
1689 tx_endcond(tx);
1690 ureg_ENDIF(ureg);
1691
1692 return D3D_OK;
1693 }
1694
1695 DECL_SPECIAL(ENDREP)
1696 {
1697 struct ureg_program *ureg = tx->ureg;
1698 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1699 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1700 struct ureg_src src_ctr = ureg_src(ctr);
1701
1702 /* ctr.x -= 1 */
1703 if (!tx->native_integers)
1704 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1705 else
1706 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1707
1708 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1709 return D3D_OK;
1710 }
1711
1712 DECL_SPECIAL(ENDIF)
1713 {
1714 tx_endcond(tx);
1715 ureg_ENDIF(tx->ureg);
1716 return D3D_OK;
1717 }
1718
1719 DECL_SPECIAL(IF)
1720 {
1721 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1722
1723 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1724 ureg_UIF(tx->ureg, src, tx_cond(tx));
1725 else
1726 ureg_IF(tx->ureg, src, tx_cond(tx));
1727
1728 return D3D_OK;
1729 }
1730
1731 static inline unsigned
1732 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1733 {
1734 switch (flags) {
1735 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1736 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1737 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1738 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1739 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1740 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1741 default:
1742 assert(!"invalid comparison flags");
1743 return TGSI_OPCODE_SGT;
1744 }
1745 }
1746
1747 DECL_SPECIAL(IFC)
1748 {
1749 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1750 struct ureg_src src[2];
1751 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1752 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1753 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1754 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1755 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1756 return D3D_OK;
1757 }
1758
1759 DECL_SPECIAL(ELSE)
1760 {
1761 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1762 return D3D_OK;
1763 }
1764
1765 DECL_SPECIAL(BREAKC)
1766 {
1767 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1768 struct ureg_src src[2];
1769 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1770 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1771 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1772 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1773 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1774 ureg_BRK(tx->ureg);
1775 tx_endcond(tx);
1776 ureg_ENDIF(tx->ureg);
1777 return D3D_OK;
1778 }
1779
1780 static const char *sm1_declusage_names[] =
1781 {
1782 [D3DDECLUSAGE_POSITION] = "POSITION",
1783 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1784 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1785 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1786 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1787 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1788 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1789 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1790 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1791 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1792 [D3DDECLUSAGE_COLOR] = "COLOR",
1793 [D3DDECLUSAGE_FOG] = "FOG",
1794 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1795 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1796 };
1797
1798 static inline unsigned
1799 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1800 {
1801 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1802 }
1803
1804 static void
1805 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1806 boolean tc,
1807 struct sm1_semantic *dcl)
1808 {
1809 BYTE index = dcl->usage_idx;
1810
1811 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1812 * we match to a TGSI_SEMANTIC_GENERIC with index.
1813 *
1814 * The index can be anything UINT16 and usage_idx is BYTE,
1815 * so we can fit everything. It doesn't matter if indices
1816 * are close together or low.
1817 *
1818 *
1819 * POSITION >= 1: 10 * index + 6
1820 * COLOR >= 2: 10 * (index-1) + 7
1821 * TEXCOORD[0..15]: index
1822 * BLENDWEIGHT: 10 * index + 18
1823 * BLENDINDICES: 10 * index + 19
1824 * NORMAL: 10 * index + 20
1825 * TANGENT: 10 * index + 21
1826 * BINORMAL: 10 * index + 22
1827 * TESSFACTOR: 10 * index + 23
1828 */
1829
1830 switch (dcl->usage) {
1831 case D3DDECLUSAGE_POSITION:
1832 case D3DDECLUSAGE_POSITIONT:
1833 case D3DDECLUSAGE_DEPTH:
1834 if (index == 0) {
1835 sem->Name = TGSI_SEMANTIC_POSITION;
1836 sem->Index = 0;
1837 } else {
1838 sem->Name = TGSI_SEMANTIC_GENERIC;
1839 sem->Index = 10 * index + 6;
1840 }
1841 break;
1842 case D3DDECLUSAGE_COLOR:
1843 if (index < 2) {
1844 sem->Name = TGSI_SEMANTIC_COLOR;
1845 sem->Index = index;
1846 } else {
1847 sem->Name = TGSI_SEMANTIC_GENERIC;
1848 sem->Index = 10 * (index-1) + 7;
1849 }
1850 break;
1851 case D3DDECLUSAGE_FOG:
1852 assert(index == 0);
1853 sem->Name = TGSI_SEMANTIC_FOG;
1854 sem->Index = 0;
1855 break;
1856 case D3DDECLUSAGE_PSIZE:
1857 assert(index == 0);
1858 sem->Name = TGSI_SEMANTIC_PSIZE;
1859 sem->Index = 0;
1860 break;
1861 case D3DDECLUSAGE_TEXCOORD:
1862 assert(index < 16);
1863 if (index < 8 && tc)
1864 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1865 else
1866 sem->Name = TGSI_SEMANTIC_GENERIC;
1867 sem->Index = index;
1868 break;
1869 case D3DDECLUSAGE_BLENDWEIGHT:
1870 sem->Name = TGSI_SEMANTIC_GENERIC;
1871 sem->Index = 10 * index + 18;
1872 break;
1873 case D3DDECLUSAGE_BLENDINDICES:
1874 sem->Name = TGSI_SEMANTIC_GENERIC;
1875 sem->Index = 10 * index + 19;
1876 break;
1877 case D3DDECLUSAGE_NORMAL:
1878 sem->Name = TGSI_SEMANTIC_GENERIC;
1879 sem->Index = 10 * index + 20;
1880 break;
1881 case D3DDECLUSAGE_TANGENT:
1882 sem->Name = TGSI_SEMANTIC_GENERIC;
1883 sem->Index = 10 * index + 21;
1884 break;
1885 case D3DDECLUSAGE_BINORMAL:
1886 sem->Name = TGSI_SEMANTIC_GENERIC;
1887 sem->Index = 10 * index + 22;
1888 break;
1889 case D3DDECLUSAGE_TESSFACTOR:
1890 sem->Name = TGSI_SEMANTIC_GENERIC;
1891 sem->Index = 10 * index + 23;
1892 break;
1893 case D3DDECLUSAGE_SAMPLE:
1894 sem->Name = TGSI_SEMANTIC_COUNT;
1895 sem->Index = 0;
1896 break;
1897 default:
1898 unreachable(!"Invalid DECLUSAGE.");
1899 break;
1900 }
1901 }
1902
1903 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1904 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1905 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1906 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1907 static inline unsigned
1908 d3dstt_to_tgsi_tex(BYTE sampler_type)
1909 {
1910 switch (sampler_type) {
1911 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1912 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1913 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1914 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1915 default:
1916 assert(0);
1917 return TGSI_TEXTURE_UNKNOWN;
1918 }
1919 }
1920 static inline unsigned
1921 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1922 {
1923 switch (sampler_type) {
1924 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1925 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1926 case NINED3DSTT_VOLUME:
1927 case NINED3DSTT_CUBE:
1928 default:
1929 assert(0);
1930 return TGSI_TEXTURE_UNKNOWN;
1931 }
1932 }
1933 static inline unsigned
1934 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1935 {
1936 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1937 case 1: return TGSI_TEXTURE_1D;
1938 case 0: return TGSI_TEXTURE_2D;
1939 case 3: return TGSI_TEXTURE_3D;
1940 default:
1941 return TGSI_TEXTURE_CUBE;
1942 }
1943 }
1944
1945 static const char *
1946 sm1_sampler_type_name(BYTE sampler_type)
1947 {
1948 switch (sampler_type) {
1949 case NINED3DSTT_1D: return "1D";
1950 case NINED3DSTT_2D: return "2D";
1951 case NINED3DSTT_VOLUME: return "VOLUME";
1952 case NINED3DSTT_CUBE: return "CUBE";
1953 default:
1954 return "(D3DSTT_?)";
1955 }
1956 }
1957
1958 static inline unsigned
1959 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1960 {
1961 switch (sem->Name) {
1962 case TGSI_SEMANTIC_POSITION:
1963 case TGSI_SEMANTIC_NORMAL:
1964 return TGSI_INTERPOLATE_LINEAR;
1965 case TGSI_SEMANTIC_BCOLOR:
1966 case TGSI_SEMANTIC_COLOR:
1967 return TGSI_INTERPOLATE_COLOR;
1968 case TGSI_SEMANTIC_FOG:
1969 case TGSI_SEMANTIC_GENERIC:
1970 case TGSI_SEMANTIC_TEXCOORD:
1971 case TGSI_SEMANTIC_CLIPDIST:
1972 case TGSI_SEMANTIC_CLIPVERTEX:
1973 return TGSI_INTERPOLATE_PERSPECTIVE;
1974 case TGSI_SEMANTIC_EDGEFLAG:
1975 case TGSI_SEMANTIC_FACE:
1976 case TGSI_SEMANTIC_INSTANCEID:
1977 case TGSI_SEMANTIC_PCOORD:
1978 case TGSI_SEMANTIC_PRIMID:
1979 case TGSI_SEMANTIC_PSIZE:
1980 case TGSI_SEMANTIC_VERTEXID:
1981 return TGSI_INTERPOLATE_CONSTANT;
1982 default:
1983 assert(0);
1984 return TGSI_INTERPOLATE_CONSTANT;
1985 }
1986 }
1987
1988 DECL_SPECIAL(DCL)
1989 {
1990 struct ureg_program *ureg = tx->ureg;
1991 boolean is_input;
1992 boolean is_sampler;
1993 struct tgsi_declaration_semantic tgsi;
1994 struct sm1_semantic sem;
1995 sm1_read_semantic(tx, &sem);
1996
1997 is_input = sem.reg.file == D3DSPR_INPUT;
1998 is_sampler =
1999 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2000
2001 DUMP("DCL ");
2002 sm1_dump_dst_param(&sem.reg);
2003 if (is_sampler)
2004 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2005 else
2006 if (tx->version.major >= 3)
2007 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2008 else
2009 if (sem.usage | sem.usage_idx)
2010 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2011 else
2012 DUMP("\n");
2013
2014 if (is_sampler) {
2015 const unsigned m = 1 << sem.reg.idx;
2016 ureg_DECL_sampler(ureg, sem.reg.idx);
2017 tx->info->sampler_mask |= m;
2018 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2019 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2020 d3dstt_to_tgsi_tex(sem.sampler_type);
2021 return D3D_OK;
2022 }
2023
2024 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2025 if (IS_VS) {
2026 if (is_input) {
2027 /* linkage outside of shader with vertex declaration */
2028 ureg_DECL_vs_input(ureg, sem.reg.idx);
2029 assert(sem.reg.idx < Elements(tx->info->input_map));
2030 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2031 tx->info->num_inputs = sem.reg.idx + 1;
2032 /* NOTE: preserving order in case of indirect access */
2033 } else
2034 if (tx->version.major >= 3) {
2035 /* SM2 output semantic determined by file */
2036 assert(sem.reg.mask != 0);
2037 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2038 tx->info->position_t = TRUE;
2039 assert(sem.reg.idx < Elements(tx->regs.o));
2040 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2041 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2042
2043 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
2044 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2045 }
2046 } else {
2047 if (is_input && tx->version.major >= 3) {
2048 unsigned interp_location = 0;
2049 /* SM3 only, SM2 input semantic determined by file */
2050 assert(sem.reg.idx < Elements(tx->regs.v));
2051 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2052 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2053 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2054 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2055 ureg, tgsi.Name, tgsi.Index,
2056 nine_tgsi_to_interp_mode(&tgsi),
2057 0, /* cylwrap */
2058 interp_location, 0, 1);
2059 } else
2060 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2061 /* FragColor or FragDepth */
2062 assert(sem.reg.mask != 0);
2063 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2064 0, 1);
2065 }
2066 }
2067 return D3D_OK;
2068 }
2069
2070 DECL_SPECIAL(DEF)
2071 {
2072 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2073 return D3D_OK;
2074 }
2075
2076 DECL_SPECIAL(DEFB)
2077 {
2078 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2079 return D3D_OK;
2080 }
2081
2082 DECL_SPECIAL(DEFI)
2083 {
2084 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2085 return D3D_OK;
2086 }
2087
2088 DECL_SPECIAL(POW)
2089 {
2090 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2091 struct ureg_src src[2] = {
2092 tx_src_param(tx, &tx->insn.src[0]),
2093 tx_src_param(tx, &tx->insn.src[1])
2094 };
2095 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2096 return D3D_OK;
2097 }
2098
2099 DECL_SPECIAL(RSQ)
2100 {
2101 struct ureg_program *ureg = tx->ureg;
2102 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2103 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2104 struct ureg_dst tmp = tx_scratch(tx);
2105 ureg_RSQ(ureg, tmp, ureg_abs(src));
2106 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2107 return D3D_OK;
2108 }
2109
2110 DECL_SPECIAL(LOG)
2111 {
2112 struct ureg_program *ureg = tx->ureg;
2113 struct ureg_dst tmp = tx_scratch_scalar(tx);
2114 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2115 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2116 ureg_LG2(ureg, tmp, ureg_abs(src));
2117 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2118 return D3D_OK;
2119 }
2120
2121 DECL_SPECIAL(LIT)
2122 {
2123 struct ureg_program *ureg = tx->ureg;
2124 struct ureg_dst tmp = tx_scratch(tx);
2125 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2126 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2127 ureg_LIT(ureg, tmp, src);
2128 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2129 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2130 * it 0^0 if src.w=0, which value is driver dependent. */
2131 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2132 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2133 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2134 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2135 return D3D_OK;
2136 }
2137
2138 DECL_SPECIAL(NRM)
2139 {
2140 struct ureg_program *ureg = tx->ureg;
2141 struct ureg_dst tmp = tx_scratch_scalar(tx);
2142 struct ureg_src nrm = tx_src_scalar(tmp);
2143 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2144 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2145 ureg_DP3(ureg, tmp, src, src);
2146 ureg_RSQ(ureg, tmp, nrm);
2147 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2148 ureg_MUL(ureg, dst, src, nrm);
2149 return D3D_OK;
2150 }
2151
2152 DECL_SPECIAL(DP2ADD)
2153 {
2154 struct ureg_dst tmp = tx_scratch_scalar(tx);
2155 struct ureg_src dp2 = tx_src_scalar(tmp);
2156 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2157 struct ureg_src src[3];
2158 int i;
2159 for (i = 0; i < 3; ++i)
2160 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2161 assert_replicate_swizzle(&src[2]);
2162
2163 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2164 ureg_ADD(tx->ureg, dst, src[2], dp2);
2165
2166 return D3D_OK;
2167 }
2168
2169 DECL_SPECIAL(TEXCOORD)
2170 {
2171 struct ureg_program *ureg = tx->ureg;
2172 const unsigned s = tx->insn.dst[0].idx;
2173 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2174
2175 tx_texcoord_alloc(tx, s);
2176 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2177 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2178
2179 return D3D_OK;
2180 }
2181
2182 DECL_SPECIAL(TEXCOORD_ps14)
2183 {
2184 struct ureg_program *ureg = tx->ureg;
2185 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2186 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2187
2188 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2189
2190 ureg_MOV(ureg, dst, src);
2191
2192 return D3D_OK;
2193 }
2194
2195 DECL_SPECIAL(TEXKILL)
2196 {
2197 struct ureg_src reg;
2198
2199 if (tx->version.major > 1 || tx->version.minor > 3) {
2200 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2201 } else {
2202 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2203 reg = tx->regs.vT[tx->insn.dst[0].idx];
2204 }
2205 if (tx->version.major < 2)
2206 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2207 ureg_KILL_IF(tx->ureg, reg);
2208
2209 return D3D_OK;
2210 }
2211
2212 DECL_SPECIAL(TEXBEM)
2213 {
2214 struct ureg_program *ureg = tx->ureg;
2215 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2216 struct ureg_dst tmp, tmp2, texcoord;
2217 struct ureg_src sample, m00, m01, m10, m11;
2218 struct ureg_src bumpenvlscale, bumpenvloffset;
2219 const int m = tx->insn.dst[0].idx;
2220 const int n = tx->insn.src[0].idx;
2221
2222 assert(tx->version.major == 1);
2223
2224 sample = ureg_DECL_sampler(ureg, m);
2225 tx->info->sampler_mask |= 1 << m;
2226
2227 tx_texcoord_alloc(tx, m);
2228
2229 tmp = tx_scratch(tx);
2230 tmp2 = tx_scratch(tx);
2231 texcoord = tx_scratch(tx);
2232 /*
2233 * Bump-env-matrix:
2234 * 00 is X
2235 * 01 is Y
2236 * 10 is Z
2237 * 11 is W
2238 */
2239 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2240 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2241 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2242 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2243 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2244
2245 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2246 if (m % 2 == 0) {
2247 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2248 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2249 } else {
2250 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2251 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2252 }
2253
2254 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2255
2256 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2257 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2258 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2259 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2260 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2261 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2262 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2263
2264 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2265 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2266 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2267 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2268 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2269 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2270 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2271
2272 /* Now the texture coordinates are in tmp.xy */
2273
2274 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2275 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2276 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2277 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2278 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2279 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2280 bumpenvlscale, bumpenvloffset);
2281 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2282 }
2283
2284 tx->info->bumpenvmat_needed = 1;
2285
2286 return D3D_OK;
2287 }
2288
2289 DECL_SPECIAL(TEXREG2AR)
2290 {
2291 struct ureg_program *ureg = tx->ureg;
2292 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2293 struct ureg_src sample;
2294 const int m = tx->insn.dst[0].idx;
2295 const int n = tx->insn.src[0].idx;
2296 assert(m >= 0 && m > n);
2297
2298 sample = ureg_DECL_sampler(ureg, m);
2299 tx->info->sampler_mask |= 1 << m;
2300 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2301
2302 return D3D_OK;
2303 }
2304
2305 DECL_SPECIAL(TEXREG2GB)
2306 {
2307 struct ureg_program *ureg = tx->ureg;
2308 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2309 struct ureg_src sample;
2310 const int m = tx->insn.dst[0].idx;
2311 const int n = tx->insn.src[0].idx;
2312 assert(m >= 0 && m > n);
2313
2314 sample = ureg_DECL_sampler(ureg, m);
2315 tx->info->sampler_mask |= 1 << m;
2316 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2317
2318 return D3D_OK;
2319 }
2320
2321 DECL_SPECIAL(TEXM3x2PAD)
2322 {
2323 return D3D_OK; /* this is just padding */
2324 }
2325
2326 DECL_SPECIAL(TEXM3x2TEX)
2327 {
2328 struct ureg_program *ureg = tx->ureg;
2329 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2330 struct ureg_src sample;
2331 const int m = tx->insn.dst[0].idx - 1;
2332 const int n = tx->insn.src[0].idx;
2333 assert(m >= 0 && m > n);
2334
2335 tx_texcoord_alloc(tx, m);
2336 tx_texcoord_alloc(tx, m+1);
2337
2338 /* performs the matrix multiplication */
2339 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2340 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2341
2342 sample = ureg_DECL_sampler(ureg, m + 1);
2343 tx->info->sampler_mask |= 1 << (m + 1);
2344 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2345
2346 return D3D_OK;
2347 }
2348
2349 DECL_SPECIAL(TEXM3x3PAD)
2350 {
2351 return D3D_OK; /* this is just padding */
2352 }
2353
2354 DECL_SPECIAL(TEXM3x3SPEC)
2355 {
2356 struct ureg_program *ureg = tx->ureg;
2357 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2358 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2359 struct ureg_src sample;
2360 struct ureg_dst tmp;
2361 const int m = tx->insn.dst[0].idx - 2;
2362 const int n = tx->insn.src[0].idx;
2363 assert(m >= 0 && m > n);
2364
2365 tx_texcoord_alloc(tx, m);
2366 tx_texcoord_alloc(tx, m+1);
2367 tx_texcoord_alloc(tx, m+2);
2368
2369 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2370 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2371 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2372
2373 sample = ureg_DECL_sampler(ureg, m + 2);
2374 tx->info->sampler_mask |= 1 << (m + 2);
2375 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2376
2377 /* At this step, dst = N = (u', w', z').
2378 * We want dst to be the texture sampled at (u'', w'', z''), with
2379 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2380 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2381 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2382 /* at this step tmp.x = 1/N.N */
2383 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2384 /* at this step tmp.y = N.E */
2385 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2386 /* at this step tmp.x = N.E/N.N */
2387 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2388 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2389 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2390 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2391 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2392
2393 return D3D_OK;
2394 }
2395
2396 DECL_SPECIAL(TEXREG2RGB)
2397 {
2398 struct ureg_program *ureg = tx->ureg;
2399 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2400 struct ureg_src sample;
2401 const int m = tx->insn.dst[0].idx;
2402 const int n = tx->insn.src[0].idx;
2403 assert(m >= 0 && m > n);
2404
2405 sample = ureg_DECL_sampler(ureg, m);
2406 tx->info->sampler_mask |= 1 << m;
2407 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2408
2409 return D3D_OK;
2410 }
2411
2412 DECL_SPECIAL(TEXDP3TEX)
2413 {
2414 struct ureg_program *ureg = tx->ureg;
2415 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2416 struct ureg_dst tmp;
2417 struct ureg_src sample;
2418 const int m = tx->insn.dst[0].idx;
2419 const int n = tx->insn.src[0].idx;
2420 assert(m >= 0 && m > n);
2421
2422 tx_texcoord_alloc(tx, m);
2423
2424 tmp = tx_scratch(tx);
2425 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2426 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2427
2428 sample = ureg_DECL_sampler(ureg, m);
2429 tx->info->sampler_mask |= 1 << m;
2430 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2431
2432 return D3D_OK;
2433 }
2434
2435 DECL_SPECIAL(TEXM3x2DEPTH)
2436 {
2437 struct ureg_program *ureg = tx->ureg;
2438 struct ureg_dst tmp;
2439 const int m = tx->insn.dst[0].idx - 1;
2440 const int n = tx->insn.src[0].idx;
2441 assert(m >= 0 && m > n);
2442
2443 tx_texcoord_alloc(tx, m);
2444 tx_texcoord_alloc(tx, m+1);
2445
2446 tmp = tx_scratch(tx);
2447
2448 /* performs the matrix multiplication */
2449 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2450 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2451
2452 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2453 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2454 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2455 /* res = 'w' == 0 ? 1.0 : z/w */
2456 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2457 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2458 /* replace the depth for depth testing with the result */
2459 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2460 TGSI_WRITEMASK_Z, 0, 1);
2461 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2462 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2463 return D3D_OK;
2464 }
2465
2466 DECL_SPECIAL(TEXDP3)
2467 {
2468 struct ureg_program *ureg = tx->ureg;
2469 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2470 const int m = tx->insn.dst[0].idx;
2471 const int n = tx->insn.src[0].idx;
2472 assert(m >= 0 && m > n);
2473
2474 tx_texcoord_alloc(tx, m);
2475
2476 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2477
2478 return D3D_OK;
2479 }
2480
2481 DECL_SPECIAL(TEXM3x3)
2482 {
2483 struct ureg_program *ureg = tx->ureg;
2484 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2485 struct ureg_src sample;
2486 struct ureg_dst E, tmp;
2487 const int m = tx->insn.dst[0].idx - 2;
2488 const int n = tx->insn.src[0].idx;
2489 assert(m >= 0 && m > n);
2490
2491 tx_texcoord_alloc(tx, m);
2492 tx_texcoord_alloc(tx, m+1);
2493 tx_texcoord_alloc(tx, m+2);
2494
2495 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2496 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2497 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2498
2499 switch (tx->insn.opcode) {
2500 case D3DSIO_TEXM3x3:
2501 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2502 break;
2503 case D3DSIO_TEXM3x3TEX:
2504 sample = ureg_DECL_sampler(ureg, m + 2);
2505 tx->info->sampler_mask |= 1 << (m + 2);
2506 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2507 break;
2508 case D3DSIO_TEXM3x3VSPEC:
2509 sample = ureg_DECL_sampler(ureg, m + 2);
2510 tx->info->sampler_mask |= 1 << (m + 2);
2511 E = tx_scratch(tx);
2512 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2513 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2514 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2515 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2516 /* At this step, dst = N = (u', w', z').
2517 * We want dst to be the texture sampled at (u'', w'', z''), with
2518 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2519 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2520 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2521 /* at this step tmp.x = 1/N.N */
2522 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2523 /* at this step tmp.y = N.E */
2524 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2525 /* at this step tmp.x = N.E/N.N */
2526 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2527 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2528 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2529 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2530 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2531 break;
2532 default:
2533 return D3DERR_INVALIDCALL;
2534 }
2535 return D3D_OK;
2536 }
2537
2538 DECL_SPECIAL(TEXDEPTH)
2539 {
2540 struct ureg_program *ureg = tx->ureg;
2541 struct ureg_dst r5;
2542 struct ureg_src r5r, r5g;
2543
2544 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2545
2546 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2547 * r5 won't be used afterward, thus we can use r5.ba */
2548 r5 = tx->regs.r[5];
2549 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2550 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2551
2552 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2553 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2554 /* r5.r = r/g */
2555 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2556 r5r, ureg_imm1f(ureg, 1.0f));
2557 /* replace the depth for depth testing with the result */
2558 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2559 TGSI_WRITEMASK_Z, 0, 1);
2560 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2561
2562 return D3D_OK;
2563 }
2564
2565 DECL_SPECIAL(BEM)
2566 {
2567 struct ureg_program *ureg = tx->ureg;
2568 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2569 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2570 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2571 struct ureg_src m00, m01, m10, m11;
2572 const int m = tx->insn.dst[0].idx;
2573 struct ureg_dst tmp;
2574 /*
2575 * Bump-env-matrix:
2576 * 00 is X
2577 * 01 is Y
2578 * 10 is Z
2579 * 11 is W
2580 */
2581 nine_info_mark_const_f_used(tx->info, 8 + m);
2582 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2583 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2584 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2585 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2586 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2587 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2588 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2589 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2590 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2591 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2592
2593 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2594 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2595 NINE_APPLY_SWIZZLE(src1, X), src0);
2596 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2597 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2598 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2599 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2600
2601 tx->info->bumpenvmat_needed = 1;
2602
2603 return D3D_OK;
2604 }
2605
2606 DECL_SPECIAL(TEXLD)
2607 {
2608 struct ureg_program *ureg = tx->ureg;
2609 unsigned target;
2610 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2611 struct ureg_src src[2] = {
2612 tx_src_param(tx, &tx->insn.src[0]),
2613 tx_src_param(tx, &tx->insn.src[1])
2614 };
2615 assert(tx->insn.src[1].idx >= 0 &&
2616 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2617 target = tx->sampler_targets[tx->insn.src[1].idx];
2618
2619 switch (tx->insn.flags) {
2620 case 0:
2621 ureg_TEX(ureg, dst, target, src[0], src[1]);
2622 break;
2623 case NINED3DSI_TEXLD_PROJECT:
2624 ureg_TXP(ureg, dst, target, src[0], src[1]);
2625 break;
2626 case NINED3DSI_TEXLD_BIAS:
2627 ureg_TXB(ureg, dst, target, src[0], src[1]);
2628 break;
2629 default:
2630 assert(0);
2631 return D3DERR_INVALIDCALL;
2632 }
2633 return D3D_OK;
2634 }
2635
2636 DECL_SPECIAL(TEXLD_14)
2637 {
2638 struct ureg_program *ureg = tx->ureg;
2639 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2640 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2641 const unsigned s = tx->insn.dst[0].idx;
2642 const unsigned t = ps1x_sampler_type(tx->info, s);
2643
2644 tx->info->sampler_mask |= 1 << s;
2645 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2646
2647 return D3D_OK;
2648 }
2649
2650 DECL_SPECIAL(TEX)
2651 {
2652 struct ureg_program *ureg = tx->ureg;
2653 const unsigned s = tx->insn.dst[0].idx;
2654 const unsigned t = ps1x_sampler_type(tx->info, s);
2655 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2656 struct ureg_src src[2];
2657
2658 tx_texcoord_alloc(tx, s);
2659
2660 src[0] = tx->regs.vT[s];
2661 src[1] = ureg_DECL_sampler(ureg, s);
2662 tx->info->sampler_mask |= 1 << s;
2663
2664 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2665
2666 return D3D_OK;
2667 }
2668
2669 DECL_SPECIAL(TEXLDD)
2670 {
2671 unsigned target;
2672 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2673 struct ureg_src src[4] = {
2674 tx_src_param(tx, &tx->insn.src[0]),
2675 tx_src_param(tx, &tx->insn.src[1]),
2676 tx_src_param(tx, &tx->insn.src[2]),
2677 tx_src_param(tx, &tx->insn.src[3])
2678 };
2679 assert(tx->insn.src[1].idx >= 0 &&
2680 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2681 target = tx->sampler_targets[tx->insn.src[1].idx];
2682
2683 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2684 return D3D_OK;
2685 }
2686
2687 DECL_SPECIAL(TEXLDL)
2688 {
2689 unsigned target;
2690 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2691 struct ureg_src src[2] = {
2692 tx_src_param(tx, &tx->insn.src[0]),
2693 tx_src_param(tx, &tx->insn.src[1])
2694 };
2695 assert(tx->insn.src[1].idx >= 0 &&
2696 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2697 target = tx->sampler_targets[tx->insn.src[1].idx];
2698
2699 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2700 return D3D_OK;
2701 }
2702
2703 DECL_SPECIAL(SETP)
2704 {
2705 STUB(D3DERR_INVALIDCALL);
2706 }
2707
2708 DECL_SPECIAL(BREAKP)
2709 {
2710 STUB(D3DERR_INVALIDCALL);
2711 }
2712
2713 DECL_SPECIAL(PHASE)
2714 {
2715 return D3D_OK; /* we don't care about phase */
2716 }
2717
2718 DECL_SPECIAL(COMMENT)
2719 {
2720 return D3D_OK; /* nothing to do */
2721 }
2722
2723
2724 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2725 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2726
2727 struct sm1_op_info inst_table[] =
2728 {
2729 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2730 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2731 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2732 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2733 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2734 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2735 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2736 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2737 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2738 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2739 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2740 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2741 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2742 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2743 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2744 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2745 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2746 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2747 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2748 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2749
2750 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2751 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2752 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2753 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2754 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2755
2756 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2757 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2758 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2759 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2760 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2761 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2762
2763 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2764
2765 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2766 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2767 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2768 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2769 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2770
2771 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2772 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2773
2774 /* More flow control */
2775 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2776 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2777 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2778 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2779 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2780 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2781 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2782 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2783 /* we don't write to the address register, but a normal register (copied
2784 * when needed to the address register), thus we don't use ARR */
2785 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2786
2787 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2788 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2789
2790 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2791 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2792 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2793 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2794 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2795 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2796 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2797 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2798 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2799 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2800 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2801 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2802 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2803 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2804 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2805 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2806
2807 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2808 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2809 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2810 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2811
2812 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2813
2814 /* More tex stuff */
2815 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2816 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2817 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2818 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2819 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2820 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2821
2822 /* Misc */
2823 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2824 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2825 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2826 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2827 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2828 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2829 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2830 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2831 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2832 };
2833
2834 struct sm1_op_info inst_phase =
2835 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2836
2837 struct sm1_op_info inst_comment =
2838 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2839
2840 static void
2841 create_op_info_map(struct shader_translator *tx)
2842 {
2843 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2844 unsigned i;
2845
2846 for (i = 0; i < Elements(tx->op_info_map); ++i)
2847 tx->op_info_map[i] = -1;
2848
2849 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2850 for (i = 0; i < Elements(inst_table); ++i) {
2851 assert(inst_table[i].sio < Elements(tx->op_info_map));
2852 if (inst_table[i].vert_version.min <= version &&
2853 inst_table[i].vert_version.max >= version)
2854 tx->op_info_map[inst_table[i].sio] = i;
2855 }
2856 } else {
2857 for (i = 0; i < Elements(inst_table); ++i) {
2858 assert(inst_table[i].sio < Elements(tx->op_info_map));
2859 if (inst_table[i].frag_version.min <= version &&
2860 inst_table[i].frag_version.max >= version)
2861 tx->op_info_map[inst_table[i].sio] = i;
2862 }
2863 }
2864 }
2865
2866 static inline HRESULT
2867 NineTranslateInstruction_Generic(struct shader_translator *tx)
2868 {
2869 struct ureg_dst dst[1];
2870 struct ureg_src src[4];
2871 unsigned i;
2872
2873 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2874 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2875 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2876 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2877
2878 ureg_insn(tx->ureg, tx->insn.info->opcode,
2879 dst, tx->insn.ndst,
2880 src, tx->insn.nsrc);
2881 return D3D_OK;
2882 }
2883
2884 static inline DWORD
2885 TOKEN_PEEK(struct shader_translator *tx)
2886 {
2887 return *(tx->parse);
2888 }
2889
2890 static inline DWORD
2891 TOKEN_NEXT(struct shader_translator *tx)
2892 {
2893 return *(tx->parse)++;
2894 }
2895
2896 static inline void
2897 TOKEN_JUMP(struct shader_translator *tx)
2898 {
2899 if (tx->parse_next && tx->parse != tx->parse_next) {
2900 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2901 tx->parse = tx->parse_next;
2902 }
2903 }
2904
2905 static inline boolean
2906 sm1_parse_eof(struct shader_translator *tx)
2907 {
2908 return TOKEN_PEEK(tx) == NINED3DSP_END;
2909 }
2910
2911 static void
2912 sm1_read_version(struct shader_translator *tx)
2913 {
2914 const DWORD tok = TOKEN_NEXT(tx);
2915
2916 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2917 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2918
2919 switch (tok >> 16) {
2920 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2921 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2922 default:
2923 DBG("Invalid shader type: %x\n", tok);
2924 tx->processor = ~0;
2925 break;
2926 }
2927 }
2928
2929 /* This is just to check if we parsed the instruction properly. */
2930 static void
2931 sm1_parse_get_skip(struct shader_translator *tx)
2932 {
2933 const DWORD tok = TOKEN_PEEK(tx);
2934
2935 if (tx->version.major >= 2) {
2936 tx->parse_next = tx->parse + 1 /* this */ +
2937 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2938 } else {
2939 tx->parse_next = NULL; /* TODO: determine from param count */
2940 }
2941 }
2942
2943 static void
2944 sm1_print_comment(const char *comment, UINT size)
2945 {
2946 if (!size)
2947 return;
2948 /* TODO */
2949 }
2950
2951 static void
2952 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2953 {
2954 DWORD tok = TOKEN_PEEK(tx);
2955
2956 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2957 {
2958 const char *comment = "";
2959 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2960 tx->parse += size + 1;
2961
2962 if (print)
2963 sm1_print_comment(comment, size);
2964
2965 tok = TOKEN_PEEK(tx);
2966 }
2967 }
2968
2969 static void
2970 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2971 {
2972 *reg = TOKEN_NEXT(tx);
2973
2974 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2975 {
2976 if (tx->version.major < 2)
2977 *rel = (1 << 31) |
2978 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2979 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2980 D3DSP_NOSWIZZLE;
2981 else
2982 *rel = TOKEN_NEXT(tx);
2983 }
2984 }
2985
2986 static void
2987 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2988 {
2989 uint8_t shift;
2990 dst->file =
2991 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2992 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2993 dst->type = TGSI_RETURN_TYPE_FLOAT;
2994 dst->idx = tok & D3DSP_REGNUM_MASK;
2995 dst->rel = NULL;
2996 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2997 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2998 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2999 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
3000 }
3001
3002 static void
3003 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3004 {
3005 src->file =
3006 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3007 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3008 src->type = TGSI_RETURN_TYPE_FLOAT;
3009 src->idx = tok & D3DSP_REGNUM_MASK;
3010 src->rel = NULL;
3011 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3012 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3013
3014 switch (src->file) {
3015 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3016 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3017 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3018 default:
3019 break;
3020 }
3021 }
3022
3023 static void
3024 sm1_parse_immediate(struct shader_translator *tx,
3025 struct sm1_src_param *imm)
3026 {
3027 imm->file = NINED3DSPR_IMMEDIATE;
3028 imm->idx = INT_MIN;
3029 imm->rel = NULL;
3030 imm->swizzle = NINED3DSP_NOSWIZZLE;
3031 imm->mod = 0;
3032 switch (tx->insn.opcode) {
3033 case D3DSIO_DEF:
3034 imm->type = NINED3DSPTYPE_FLOAT4;
3035 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3036 tx->parse += 4;
3037 break;
3038 case D3DSIO_DEFI:
3039 imm->type = NINED3DSPTYPE_INT4;
3040 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3041 tx->parse += 4;
3042 break;
3043 case D3DSIO_DEFB:
3044 imm->type = NINED3DSPTYPE_BOOL;
3045 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3046 tx->parse += 1;
3047 break;
3048 default:
3049 assert(0);
3050 break;
3051 }
3052 }
3053
3054 static void
3055 sm1_read_dst_param(struct shader_translator *tx,
3056 struct sm1_dst_param *dst,
3057 struct sm1_src_param *rel)
3058 {
3059 DWORD tok_dst, tok_rel = 0;
3060
3061 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3062 sm1_parse_dst_param(dst, tok_dst);
3063 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3064 sm1_parse_src_param(rel, tok_rel);
3065 dst->rel = rel;
3066 }
3067 }
3068
3069 static void
3070 sm1_read_src_param(struct shader_translator *tx,
3071 struct sm1_src_param *src,
3072 struct sm1_src_param *rel)
3073 {
3074 DWORD tok_src, tok_rel = 0;
3075
3076 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3077 sm1_parse_src_param(src, tok_src);
3078 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3079 assert(rel);
3080 sm1_parse_src_param(rel, tok_rel);
3081 src->rel = rel;
3082 }
3083 }
3084
3085 static void
3086 sm1_read_semantic(struct shader_translator *tx,
3087 struct sm1_semantic *sem)
3088 {
3089 const DWORD tok_usg = TOKEN_NEXT(tx);
3090 const DWORD tok_dst = TOKEN_NEXT(tx);
3091
3092 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3093 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3094 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3095
3096 sm1_parse_dst_param(&sem->reg, tok_dst);
3097 }
3098
3099 static void
3100 sm1_parse_instruction(struct shader_translator *tx)
3101 {
3102 struct sm1_instruction *insn = &tx->insn;
3103 DWORD tok;
3104 struct sm1_op_info *info = NULL;
3105 unsigned i;
3106
3107 sm1_parse_comments(tx, TRUE);
3108 sm1_parse_get_skip(tx);
3109
3110 tok = TOKEN_NEXT(tx);
3111
3112 insn->opcode = tok & D3DSI_OPCODE_MASK;
3113 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3114 insn->coissue = !!(tok & D3DSI_COISSUE);
3115 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3116
3117 if (insn->opcode < Elements(tx->op_info_map)) {
3118 int k = tx->op_info_map[insn->opcode];
3119 if (k >= 0) {
3120 assert(k < Elements(inst_table));
3121 info = &inst_table[k];
3122 }
3123 } else {
3124 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3125 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3126 }
3127 if (!info) {
3128 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3129 TOKEN_JUMP(tx);
3130 return;
3131 }
3132 insn->info = info;
3133 insn->ndst = info->ndst;
3134 insn->nsrc = info->nsrc;
3135
3136 assert(!insn->predicated && "TODO: predicated instructions");
3137
3138 /* check version */
3139 {
3140 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3141 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3142 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3143 if (ver < min || ver > max) {
3144 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3145 min, ver, max);
3146 return;
3147 }
3148 }
3149
3150 for (i = 0; i < insn->ndst; ++i)
3151 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3152 if (insn->predicated)
3153 sm1_read_src_param(tx, &insn->pred, NULL);
3154 for (i = 0; i < insn->nsrc; ++i)
3155 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3156
3157 /* parse here so we can dump them before processing */
3158 if (insn->opcode == D3DSIO_DEF ||
3159 insn->opcode == D3DSIO_DEFI ||
3160 insn->opcode == D3DSIO_DEFB)
3161 sm1_parse_immediate(tx, &tx->insn.src[0]);
3162
3163 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3164 sm1_instruction_check(insn);
3165
3166 if (info->handler)
3167 info->handler(tx);
3168 else
3169 NineTranslateInstruction_Generic(tx);
3170 tx_apply_dst0_modifiers(tx);
3171
3172 tx->num_scratch = 0; /* reset */
3173
3174 TOKEN_JUMP(tx);
3175 }
3176
3177 static void
3178 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3179 {
3180 unsigned i;
3181
3182 tx->info = info;
3183
3184 tx->byte_code = info->byte_code;
3185 tx->parse = info->byte_code;
3186
3187 for (i = 0; i < Elements(info->input_map); ++i)
3188 info->input_map[i] = NINE_DECLUSAGE_NONE;
3189 info->num_inputs = 0;
3190
3191 info->position_t = FALSE;
3192 info->point_size = FALSE;
3193
3194 tx->info->const_float_slots = 0;
3195 tx->info->const_int_slots = 0;
3196 tx->info->const_bool_slots = 0;
3197
3198 info->sampler_mask = 0x0;
3199 info->rt_mask = 0x0;
3200
3201 info->lconstf.data = NULL;
3202 info->lconstf.ranges = NULL;
3203
3204 info->bumpenvmat_needed = 0;
3205
3206 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3207 tx->regs.rL[i] = ureg_dst_undef();
3208 }
3209 tx->regs.address = ureg_dst_undef();
3210 tx->regs.a0 = ureg_dst_undef();
3211 tx->regs.p = ureg_dst_undef();
3212 tx->regs.oDepth = ureg_dst_undef();
3213 tx->regs.vPos = ureg_src_undef();
3214 tx->regs.vFace = ureg_src_undef();
3215 for (i = 0; i < Elements(tx->regs.o); ++i)
3216 tx->regs.o[i] = ureg_dst_undef();
3217 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3218 tx->regs.oCol[i] = ureg_dst_undef();
3219 for (i = 0; i < Elements(tx->regs.vC); ++i)
3220 tx->regs.vC[i] = ureg_src_undef();
3221 for (i = 0; i < Elements(tx->regs.vT); ++i)
3222 tx->regs.vT[i] = ureg_src_undef();
3223
3224 for (i = 0; i < Elements(tx->lconsti); ++i)
3225 tx->lconsti[i].idx = -1;
3226 for (i = 0; i < Elements(tx->lconstb); ++i)
3227 tx->lconstb[i].idx = -1;
3228
3229 sm1_read_version(tx);
3230
3231 info->version = (tx->version.major << 4) | tx->version.minor;
3232
3233 create_op_info_map(tx);
3234 }
3235
3236 static void
3237 tx_dtor(struct shader_translator *tx)
3238 {
3239 if (tx->num_inst_labels)
3240 FREE(tx->inst_labels);
3241 FREE(tx->lconstf);
3242 FREE(tx->regs.r);
3243 FREE(tx);
3244 }
3245
3246 static inline unsigned
3247 tgsi_processor_from_type(unsigned shader_type)
3248 {
3249 switch (shader_type) {
3250 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3251 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3252 default:
3253 return ~0;
3254 }
3255 }
3256
3257 static void
3258 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3259 {
3260 struct ureg_program *ureg = tx->ureg;
3261 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3262 struct ureg_src fog_end, fog_coeff, fog_density;
3263 struct ureg_src fog_vs, depth, fog_color;
3264 struct ureg_dst fog_factor;
3265
3266 if (!tx->info->fog_enable) {
3267 ureg_MOV(ureg, oCol0, src_col);
3268 return;
3269 }
3270
3271 if (tx->info->fog_mode != D3DFOG_NONE) {
3272 if (tx->wpos_is_sysval) {
3273 depth = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
3274 } else {
3275 depth = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
3276 TGSI_INTERPOLATE_LINEAR);
3277 }
3278 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3279 }
3280
3281 nine_info_mark_const_f_used(tx->info, 33);
3282 fog_color = NINE_CONSTANT_SRC(32);
3283 fog_factor = tx_scratch_scalar(tx);
3284
3285 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3286 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3287 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3288 ureg_SUB(ureg, fog_factor, fog_end, depth);
3289 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3290 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3291 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3292 ureg_MUL(ureg, fog_factor, depth, fog_density);
3293 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3294 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3295 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3296 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3297 ureg_MUL(ureg, fog_factor, depth, fog_density);
3298 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3299 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3300 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3301 } else {
3302 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3303 TGSI_INTERPOLATE_PERSPECTIVE),
3304 TGSI_SWIZZLE_X);
3305 ureg_MOV(ureg, fog_factor, fog_vs);
3306 }
3307
3308 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3309 tx_src_scalar(fog_factor), src_col, fog_color);
3310 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3311 }
3312
3313 #define GET_CAP(n) device->screen->get_param( \
3314 device->screen, PIPE_CAP_##n)
3315 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3316 device->screen, info->type, PIPE_SHADER_CAP_##n)
3317
3318 HRESULT
3319 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3320 {
3321 struct shader_translator *tx;
3322 HRESULT hr = D3D_OK;
3323 const unsigned processor = tgsi_processor_from_type(info->type);
3324 unsigned s, slot_max;
3325 unsigned max_const_f;
3326
3327 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3328
3329 tx = CALLOC_STRUCT(shader_translator);
3330 if (!tx)
3331 return E_OUTOFMEMORY;
3332 tx_ctor(tx, info);
3333
3334 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3335 hr = D3DERR_INVALIDCALL;
3336 DBG("Unsupported shader version: %u.%u !\n",
3337 tx->version.major, tx->version.minor);
3338 goto out;
3339 }
3340 if (tx->processor != processor) {
3341 hr = D3DERR_INVALIDCALL;
3342 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3343 goto out;
3344 }
3345 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3346 tx->version.major, tx->version.minor);
3347
3348 tx->ureg = ureg_create(processor);
3349 if (!tx->ureg) {
3350 hr = E_OUTOFMEMORY;
3351 goto out;
3352 }
3353
3354 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3355 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3356 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3357 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3358 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3359 tx->texcoord_sn = tx->want_texcoord ?
3360 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3361 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3362 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3363
3364 if (IS_VS) {
3365 tx->num_constf_allowed = NINE_MAX_CONST_F;
3366 } else if (tx->version.major < 2) {/* IS_PS v1 */
3367 tx->num_constf_allowed = 8;
3368 } else if (tx->version.major == 2) {/* IS_PS v2 */
3369 tx->num_constf_allowed = 32;
3370 } else {/* IS_PS v3 */
3371 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3372 }
3373
3374 if (tx->version.major < 2) {
3375 tx->num_consti_allowed = 0;
3376 tx->num_constb_allowed = 0;
3377 } else {
3378 tx->num_consti_allowed = NINE_MAX_CONST_I;
3379 tx->num_constb_allowed = NINE_MAX_CONST_B;
3380 }
3381
3382 /* VS must always write position. Declare it here to make it the 1st output.
3383 * (Some drivers like nv50 are buggy and rely on that.)
3384 */
3385 if (IS_VS) {
3386 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3387 } else {
3388 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3389 if (!tx->shift_wpos)
3390 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3391 }
3392
3393 while (!sm1_parse_eof(tx) && !tx->failure)
3394 sm1_parse_instruction(tx);
3395 tx->parse++; /* for byte_size */
3396
3397 if (tx->failure) {
3398 ERR("Encountered buggy shader\n");
3399 ureg_destroy(tx->ureg);
3400 hr = D3DERR_INVALIDCALL;
3401 goto out;
3402 }
3403
3404 if (IS_PS && tx->version.major < 3) {
3405 if (tx->version.major < 2) {
3406 assert(tx->num_temp); /* there must be color output */
3407 info->rt_mask |= 0x1;
3408 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3409 } else {
3410 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3411 }
3412 }
3413
3414 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3415 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3416 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3417 }
3418
3419 if (info->position_t)
3420 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3421
3422 ureg_END(tx->ureg);
3423
3424 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3425 info->point_size = TRUE;
3426
3427 /* record local constants */
3428 if (tx->num_lconstf && tx->indirect_const_access) {
3429 struct nine_range *ranges;
3430 float *data;
3431 int *indices;
3432 unsigned i, k, n;
3433
3434 hr = E_OUTOFMEMORY;
3435
3436 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3437 if (!data)
3438 goto out;
3439 info->lconstf.data = data;
3440
3441 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3442 if (!indices)
3443 goto out;
3444
3445 /* lazy sort, num_lconstf should be small */
3446 for (n = 0; n < tx->num_lconstf; ++n) {
3447 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3448 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3449 k = i;
3450 }
3451 indices[n] = tx->lconstf[k].idx;
3452 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3453 tx->lconstf[k].idx = INT_MAX;
3454 }
3455
3456 /* count ranges */
3457 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3458 if (indices[i] != indices[i - 1] + 1)
3459 ++n;
3460 ranges = MALLOC(n * sizeof(ranges[0]));
3461 if (!ranges) {
3462 FREE(indices);
3463 goto out;
3464 }
3465 info->lconstf.ranges = ranges;
3466
3467 k = 0;
3468 ranges[k].bgn = indices[0];
3469 for (i = 1; i < tx->num_lconstf; ++i) {
3470 if (indices[i] != indices[i - 1] + 1) {
3471 ranges[k].next = &ranges[k + 1];
3472 ranges[k].end = indices[i - 1] + 1;
3473 ++k;
3474 ranges[k].bgn = indices[i];
3475 }
3476 }
3477 ranges[k].end = indices[i - 1] + 1;
3478 ranges[k].next = NULL;
3479 assert(n == (k + 1));
3480
3481 FREE(indices);
3482 hr = D3D_OK;
3483 }
3484
3485 /* r500 */
3486 if (info->const_float_slots > device->max_vs_const_f &&
3487 (info->const_int_slots || info->const_bool_slots))
3488 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3489
3490
3491 if (tx->indirect_const_access) /* vs only */
3492 info->const_float_slots = device->max_vs_const_f;
3493
3494 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3495 slot_max = info->const_bool_slots > 0 ?
3496 max_const_f + NINE_MAX_CONST_I
3497 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3498 info->const_int_slots > 0 ?
3499 max_const_f + info->const_int_slots :
3500 info->const_float_slots;
3501
3502 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3503
3504 for (s = 0; s < slot_max; s++)
3505 ureg_DECL_constant(tx->ureg, s);
3506
3507 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3508 unsigned count;
3509 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3510 tgsi_dump(toks, 0);
3511 ureg_free_tokens(toks);
3512 }
3513
3514 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3515 if (!info->cso) {
3516 hr = D3DERR_DRIVERINTERNALERROR;
3517 FREE(info->lconstf.data);
3518 FREE(info->lconstf.ranges);
3519 goto out;
3520 }
3521
3522 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3523 out:
3524 tx_dtor(tx);
3525 return hr;
3526 }