Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static inline const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
94 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
95
96 /*
97 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
98 * BIAS <= PS 1.4 (x-0.5)
99 * BIASNEG <= PS 1.4 (-(x-0.5))
100 * SIGN <= PS 1.4 (2(x-0.5))
101 * SIGNNEG <= PS 1.4 (-2(x-0.5))
102 * COMP <= PS 1.4 (1-x)
103 * X2 = PS 1.4 (2x)
104 * X2NEG = PS 1.4 (-2x)
105 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
106 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
107 * ABS >= SM 3.0 (abs(x))
108 * ABSNEG >= SM 3.0 (-abs(x))
109 * NOT >= SM 2.0 pedication only
110 */
111 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
125
126 static const char *sm1_mod_str[] =
127 {
128 [NINED3DSPSM_NONE] = "",
129 [NINED3DSPSM_NEG] = "-",
130 [NINED3DSPSM_BIAS] = "bias",
131 [NINED3DSPSM_BIASNEG] = "biasneg",
132 [NINED3DSPSM_SIGN] = "sign",
133 [NINED3DSPSM_SIGNNEG] = "signneg",
134 [NINED3DSPSM_COMP] = "comp",
135 [NINED3DSPSM_X2] = "x2",
136 [NINED3DSPSM_X2NEG] = "x2neg",
137 [NINED3DSPSM_DZ] = "dz",
138 [NINED3DSPSM_DW] = "dw",
139 [NINED3DSPSM_ABS] = "abs",
140 [NINED3DSPSM_ABSNEG] = "-abs",
141 [NINED3DSPSM_NOT] = "not"
142 };
143
144 static void
145 sm1_dump_writemask(BYTE mask)
146 {
147 if (mask & 1) DUMP("x"); else DUMP("_");
148 if (mask & 2) DUMP("y"); else DUMP("_");
149 if (mask & 4) DUMP("z"); else DUMP("_");
150 if (mask & 8) DUMP("w"); else DUMP("_");
151 }
152
153 static void
154 sm1_dump_swizzle(BYTE s)
155 {
156 char c[4] = { 'x', 'y', 'z', 'w' };
157 DUMP("%c%c%c%c",
158 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
159 }
160
161 static const char sm1_file_char[] =
162 {
163 [D3DSPR_TEMP] = 'r',
164 [D3DSPR_INPUT] = 'v',
165 [D3DSPR_CONST] = 'c',
166 [D3DSPR_ADDR] = 'A',
167 [D3DSPR_RASTOUT] = 'R',
168 [D3DSPR_ATTROUT] = 'D',
169 [D3DSPR_OUTPUT] = 'o',
170 [D3DSPR_CONSTINT] = 'I',
171 [D3DSPR_COLOROUT] = 'C',
172 [D3DSPR_DEPTHOUT] = 'D',
173 [D3DSPR_SAMPLER] = 's',
174 [D3DSPR_CONST2] = 'c',
175 [D3DSPR_CONST3] = 'c',
176 [D3DSPR_CONST4] = 'c',
177 [D3DSPR_CONSTBOOL] = 'B',
178 [D3DSPR_LOOP] = 'L',
179 [D3DSPR_TEMPFLOAT16] = 'h',
180 [D3DSPR_MISCTYPE] = 'M',
181 [D3DSPR_LABEL] = 'X',
182 [D3DSPR_PREDICATE] = 'p'
183 };
184
185 static void
186 sm1_dump_reg(BYTE file, INT index)
187 {
188 switch (file) {
189 case D3DSPR_LOOP:
190 DUMP("aL");
191 break;
192 case D3DSPR_COLOROUT:
193 DUMP("oC%i", index);
194 break;
195 case D3DSPR_DEPTHOUT:
196 DUMP("oDepth");
197 break;
198 case D3DSPR_RASTOUT:
199 DUMP("oRast%i", index);
200 break;
201 case D3DSPR_CONSTINT:
202 DUMP("iconst[%i]", index);
203 break;
204 case D3DSPR_CONSTBOOL:
205 DUMP("bconst[%i]", index);
206 break;
207 default:
208 DUMP("%c%i", sm1_file_char[file], index);
209 break;
210 }
211 }
212
213 struct sm1_src_param
214 {
215 INT idx;
216 struct sm1_src_param *rel;
217 BYTE file;
218 BYTE swizzle;
219 BYTE mod;
220 BYTE type;
221 union {
222 DWORD d[4];
223 float f[4];
224 int i[4];
225 BOOL b;
226 } imm;
227 };
228 static void
229 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
230
231 struct sm1_dst_param
232 {
233 INT idx;
234 struct sm1_src_param *rel;
235 BYTE file;
236 BYTE mask;
237 BYTE mod;
238 int8_t shift; /* sint4 */
239 BYTE type;
240 };
241
242 static inline void
243 assert_replicate_swizzle(const struct ureg_src *reg)
244 {
245 assert(reg->SwizzleY == reg->SwizzleX &&
246 reg->SwizzleZ == reg->SwizzleX &&
247 reg->SwizzleW == reg->SwizzleX);
248 }
249
250 static void
251 sm1_dump_immediate(const struct sm1_src_param *param)
252 {
253 switch (param->type) {
254 case NINED3DSPTYPE_FLOAT4:
255 DUMP("{ %f %f %f %f }",
256 param->imm.f[0], param->imm.f[1],
257 param->imm.f[2], param->imm.f[3]);
258 break;
259 case NINED3DSPTYPE_INT4:
260 DUMP("{ %i %i %i %i }",
261 param->imm.i[0], param->imm.i[1],
262 param->imm.i[2], param->imm.i[3]);
263 break;
264 case NINED3DSPTYPE_BOOL:
265 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
266 break;
267 default:
268 assert(0);
269 break;
270 }
271 }
272
273 static void
274 sm1_dump_src_param(const struct sm1_src_param *param)
275 {
276 if (param->file == NINED3DSPR_IMMEDIATE) {
277 assert(!param->mod &&
278 !param->rel &&
279 param->swizzle == NINED3DSP_NOSWIZZLE);
280 sm1_dump_immediate(param);
281 return;
282 }
283
284 if (param->mod)
285 DUMP("%s(", sm1_mod_str[param->mod]);
286 if (param->rel) {
287 DUMP("%c[", sm1_file_char[param->file]);
288 sm1_dump_src_param(param->rel);
289 DUMP("+%i]", param->idx);
290 } else {
291 sm1_dump_reg(param->file, param->idx);
292 }
293 if (param->mod)
294 DUMP(")");
295 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
296 DUMP(".");
297 sm1_dump_swizzle(param->swizzle);
298 }
299 }
300
301 static void
302 sm1_dump_dst_param(const struct sm1_dst_param *param)
303 {
304 if (param->mod & NINED3DSPDM_SATURATE)
305 DUMP("sat ");
306 if (param->mod & NINED3DSPDM_PARTIALP)
307 DUMP("pp ");
308 if (param->mod & NINED3DSPDM_CENTROID)
309 DUMP("centroid ");
310 if (param->shift < 0)
311 DUMP("/%u ", 1 << -param->shift);
312 if (param->shift > 0)
313 DUMP("*%u ", 1 << param->shift);
314
315 if (param->rel) {
316 DUMP("%c[", sm1_file_char[param->file]);
317 sm1_dump_src_param(param->rel);
318 DUMP("+%i]", param->idx);
319 } else {
320 sm1_dump_reg(param->file, param->idx);
321 }
322 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
323 DUMP(".");
324 sm1_dump_writemask(param->mask);
325 }
326 }
327
328 struct sm1_semantic
329 {
330 struct sm1_dst_param reg;
331 BYTE sampler_type;
332 D3DDECLUSAGE usage;
333 BYTE usage_idx;
334 };
335
336 struct sm1_op_info
337 {
338 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
339 * should be ignored completely */
340 unsigned sio;
341 unsigned opcode; /* TGSI_OPCODE_x */
342
343 /* versions are still set even handler is set */
344 struct {
345 unsigned min;
346 unsigned max;
347 } vert_version, frag_version;
348
349 /* number of regs parsed outside of special handler */
350 unsigned ndst;
351 unsigned nsrc;
352
353 /* some instructions don't map perfectly, so use a special handler */
354 translate_instruction_func handler;
355 };
356
357 struct sm1_instruction
358 {
359 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
360 BYTE flags;
361 BOOL coissue;
362 BOOL predicated;
363 BYTE ndst;
364 BYTE nsrc;
365 struct sm1_src_param src[4];
366 struct sm1_src_param src_rel[4];
367 struct sm1_src_param pred;
368 struct sm1_src_param dst_rel[1];
369 struct sm1_dst_param dst[1];
370
371 struct sm1_op_info *info;
372 };
373
374 static void
375 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
376 {
377 unsigned i;
378
379 /* no info stored for these: */
380 if (insn->opcode == D3DSIO_DCL)
381 return;
382 for (i = 0; i < indent; ++i)
383 DUMP(" ");
384
385 if (insn->predicated) {
386 DUMP("@");
387 sm1_dump_src_param(&insn->pred);
388 DUMP(" ");
389 }
390 DUMP("%s", d3dsio_to_string(insn->opcode));
391 if (insn->flags) {
392 switch (insn->opcode) {
393 case D3DSIO_TEX:
394 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
395 break;
396 default:
397 DUMP("_%x", insn->flags);
398 break;
399 }
400 }
401 if (insn->coissue)
402 DUMP("_co");
403 DUMP(" ");
404
405 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
406 sm1_dump_dst_param(&insn->dst[i]);
407 DUMP(" ");
408 }
409
410 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
411 sm1_dump_src_param(&insn->src[i]);
412 DUMP(" ");
413 }
414 if (insn->opcode == D3DSIO_DEF ||
415 insn->opcode == D3DSIO_DEFI ||
416 insn->opcode == D3DSIO_DEFB)
417 sm1_dump_immediate(&insn->src[0]);
418
419 DUMP("\n");
420 }
421
422 struct sm1_local_const
423 {
424 INT idx;
425 struct ureg_src reg;
426 union {
427 boolean b;
428 float f[4];
429 int32_t i[4];
430 } imm;
431 };
432
433 struct shader_translator
434 {
435 const DWORD *byte_code;
436 const DWORD *parse;
437 const DWORD *parse_next;
438
439 struct ureg_program *ureg;
440
441 /* shader version */
442 struct {
443 BYTE major;
444 BYTE minor;
445 } version;
446 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
447
448 boolean native_integers;
449 boolean inline_subroutines;
450 boolean lower_preds;
451 boolean want_texcoord;
452 boolean shift_wpos;
453 unsigned texcoord_sn;
454
455 struct sm1_instruction insn; /* current instruction */
456
457 struct {
458 struct ureg_dst *r;
459 struct ureg_dst oPos;
460 struct ureg_dst oFog;
461 struct ureg_dst oPts;
462 struct ureg_dst oCol[4];
463 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
464 struct ureg_dst oDepth;
465 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
466 struct ureg_src vPos;
467 struct ureg_src vFace;
468 struct ureg_src s;
469 struct ureg_dst p;
470 struct ureg_dst address;
471 struct ureg_dst a0;
472 struct ureg_dst tS[8]; /* texture stage registers */
473 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
474 struct ureg_dst t[5]; /* scratch TEMPs */
475 struct ureg_src vC[2]; /* PS color in */
476 struct ureg_src vT[8]; /* PS texcoord in */
477 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
478 } regs;
479 unsigned num_temp; /* Elements(regs.r) */
480 unsigned num_scratch;
481 unsigned loop_depth;
482 unsigned loop_depth_max;
483 unsigned cond_depth;
484 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
485 unsigned cond_labels[NINE_MAX_COND_DEPTH];
486 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
487
488 unsigned *inst_labels; /* LABEL op */
489 unsigned num_inst_labels;
490
491 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
492
493 struct sm1_local_const *lconstf;
494 unsigned num_lconstf;
495 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
496 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
497
498 boolean indirect_const_access;
499 boolean failure;
500
501 struct nine_shader_info *info;
502
503 int16_t op_info_map[D3DSIO_BREAKP + 1];
504 };
505
506 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
507 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
508 #define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
509
510 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
511
512 static void
513 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
514
515 static void
516 sm1_instruction_check(const struct sm1_instruction *insn)
517 {
518 if (insn->opcode == D3DSIO_CRS)
519 {
520 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
521 {
522 DBG("CRS.mask.w\n");
523 }
524 }
525 }
526
527 static boolean
528 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
529 {
530 INT i;
531 if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
532 tx->failure = TRUE;
533 return FALSE;
534 }
535 for (i = 0; i < tx->num_lconstf; ++i) {
536 if (tx->lconstf[i].idx == index) {
537 *src = tx->lconstf[i].reg;
538 return TRUE;
539 }
540 }
541 return FALSE;
542 }
543 static boolean
544 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
545 {
546 if (index < 0 || index >= NINE_MAX_CONST_I) {
547 tx->failure = TRUE;
548 return FALSE;
549 }
550 if (tx->lconsti[index].idx == index)
551 *src = tx->lconsti[index].reg;
552 return tx->lconsti[index].idx == index;
553 }
554 static boolean
555 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
556 {
557 if (index < 0 || index >= NINE_MAX_CONST_B) {
558 tx->failure = TRUE;
559 return FALSE;
560 }
561 if (tx->lconstb[index].idx == index)
562 *src = tx->lconstb[index].reg;
563 return tx->lconstb[index].idx == index;
564 }
565
566 static void
567 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
568 {
569 unsigned n;
570
571 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
572 if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
573 WARN("lconstf index %i too high, indirect access won't work\n", index);
574
575 for (n = 0; n < tx->num_lconstf; ++n)
576 if (tx->lconstf[n].idx == index)
577 break;
578 if (n == tx->num_lconstf) {
579 if ((n % 8) == 0) {
580 tx->lconstf = REALLOC(tx->lconstf,
581 (n + 0) * sizeof(tx->lconstf[0]),
582 (n + 8) * sizeof(tx->lconstf[0]));
583 assert(tx->lconstf);
584 }
585 tx->num_lconstf++;
586 }
587 tx->lconstf[n].idx = index;
588 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
589
590 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
591 }
592 static void
593 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
594 {
595 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
596 tx->lconsti[index].idx = index;
597 tx->lconsti[index].reg = tx->native_integers ?
598 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
599 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
600 }
601 static void
602 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
603 {
604 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
605 tx->lconstb[index].idx = index;
606 tx->lconstb[index].reg = tx->native_integers ?
607 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
608 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
609 }
610
611 static inline struct ureg_dst
612 tx_scratch(struct shader_translator *tx)
613 {
614 if (tx->num_scratch >= Elements(tx->regs.t)) {
615 tx->failure = TRUE;
616 return tx->regs.t[0];
617 }
618 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
619 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
620 return tx->regs.t[tx->num_scratch++];
621 }
622
623 static inline struct ureg_dst
624 tx_scratch_scalar(struct shader_translator *tx)
625 {
626 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
627 }
628
629 static inline struct ureg_src
630 tx_src_scalar(struct ureg_dst dst)
631 {
632 struct ureg_src src = ureg_src(dst);
633 int c = ffs(dst.WriteMask) - 1;
634 if (dst.WriteMask == (1 << c))
635 src = ureg_scalar(src, c);
636 return src;
637 }
638
639 static inline void
640 tx_temp_alloc(struct shader_translator *tx, INT idx)
641 {
642 assert(idx >= 0);
643 if (idx >= tx->num_temp) {
644 unsigned k = tx->num_temp;
645 unsigned n = idx + 1;
646 tx->regs.r = REALLOC(tx->regs.r,
647 k * sizeof(tx->regs.r[0]),
648 n * sizeof(tx->regs.r[0]));
649 for (; k < n; ++k)
650 tx->regs.r[k] = ureg_dst_undef();
651 tx->num_temp = n;
652 }
653 if (ureg_dst_is_undef(tx->regs.r[idx]))
654 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
655 }
656
657 static inline void
658 tx_addr_alloc(struct shader_translator *tx, INT idx)
659 {
660 assert(idx == 0);
661 if (ureg_dst_is_undef(tx->regs.address))
662 tx->regs.address = ureg_DECL_address(tx->ureg);
663 if (ureg_dst_is_undef(tx->regs.a0))
664 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
665 }
666
667 static inline void
668 tx_pred_alloc(struct shader_translator *tx, INT idx)
669 {
670 assert(idx == 0);
671 if (ureg_dst_is_undef(tx->regs.p))
672 tx->regs.p = ureg_DECL_predicate(tx->ureg);
673 }
674
675 static inline void
676 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
677 {
678 assert(IS_PS);
679 assert(idx >= 0 && idx < Elements(tx->regs.vT));
680 if (ureg_src_is_undef(tx->regs.vT[idx]))
681 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
682 TGSI_INTERPOLATE_PERSPECTIVE);
683 }
684
685 static inline unsigned *
686 tx_bgnloop(struct shader_translator *tx)
687 {
688 tx->loop_depth++;
689 if (tx->loop_depth_max < tx->loop_depth)
690 tx->loop_depth_max = tx->loop_depth;
691 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
692 return &tx->loop_labels[tx->loop_depth - 1];
693 }
694
695 static inline unsigned *
696 tx_endloop(struct shader_translator *tx)
697 {
698 assert(tx->loop_depth);
699 tx->loop_depth--;
700 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
701 ureg_get_instruction_number(tx->ureg));
702 return &tx->loop_labels[tx->loop_depth];
703 }
704
705 static struct ureg_dst
706 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
707 {
708 const unsigned l = tx->loop_depth - 1;
709
710 if (!tx->loop_depth)
711 {
712 DBG("loop counter requested outside of loop\n");
713 return ureg_dst_undef();
714 }
715
716 if (ureg_dst_is_undef(tx->regs.rL[l])) {
717 /* loop or rep ctr creation */
718 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
719 tx->loop_or_rep[l] = loop_or_rep;
720 }
721 /* loop - rep - endloop - endrep not allowed */
722 assert(tx->loop_or_rep[l] == loop_or_rep);
723
724 return tx->regs.rL[l];
725 }
726
727 static struct ureg_src
728 tx_get_loopal(struct shader_translator *tx)
729 {
730 int loop_level = tx->loop_depth - 1;
731
732 while (loop_level >= 0) {
733 /* handle loop - rep - endrep - endloop case */
734 if (tx->loop_or_rep[loop_level])
735 /* the value is in the loop counter y component (nine implementation) */
736 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
737 loop_level--;
738 }
739
740 DBG("aL counter requested outside of loop\n");
741 return ureg_src_undef();
742 }
743
744 static inline unsigned *
745 tx_cond(struct shader_translator *tx)
746 {
747 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
748 tx->cond_depth++;
749 return &tx->cond_labels[tx->cond_depth - 1];
750 }
751
752 static inline unsigned *
753 tx_elsecond(struct shader_translator *tx)
754 {
755 assert(tx->cond_depth);
756 return &tx->cond_labels[tx->cond_depth - 1];
757 }
758
759 static inline void
760 tx_endcond(struct shader_translator *tx)
761 {
762 assert(tx->cond_depth);
763 tx->cond_depth--;
764 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
765 ureg_get_instruction_number(tx->ureg));
766 }
767
768 static inline struct ureg_dst
769 nine_ureg_dst_register(unsigned file, int index)
770 {
771 return ureg_dst(ureg_src_register(file, index));
772 }
773
774 static struct ureg_src
775 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
776 {
777 struct ureg_program *ureg = tx->ureg;
778 struct ureg_src src;
779 struct ureg_dst tmp;
780
781 switch (param->file)
782 {
783 case D3DSPR_TEMP:
784 assert(!param->rel);
785 tx_temp_alloc(tx, param->idx);
786 src = ureg_src(tx->regs.r[param->idx]);
787 break;
788 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
789 case D3DSPR_ADDR:
790 assert(!param->rel);
791 if (IS_VS) {
792 assert(param->idx == 0);
793 /* the address register (vs only) must be
794 * assigned before use */
795 assert(!ureg_dst_is_undef(tx->regs.a0));
796 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
797 src = ureg_src(tx->regs.address);
798 } else {
799 if (tx->version.major < 2 && tx->version.minor < 4) {
800 /* no subroutines, so should be defined */
801 src = ureg_src(tx->regs.tS[param->idx]);
802 } else {
803 tx_texcoord_alloc(tx, param->idx);
804 src = tx->regs.vT[param->idx];
805 }
806 }
807 break;
808 case D3DSPR_INPUT:
809 if (IS_VS) {
810 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
811 } else {
812 if (tx->version.major < 3) {
813 assert(!param->rel);
814 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
815 param->idx,
816 TGSI_INTERPOLATE_PERSPECTIVE);
817 } else {
818 assert(!param->rel); /* TODO */
819 assert(param->idx < Elements(tx->regs.v));
820 src = tx->regs.v[param->idx];
821 }
822 }
823 break;
824 case D3DSPR_PREDICATE:
825 assert(!param->rel);
826 tx_pred_alloc(tx, param->idx);
827 src = ureg_src(tx->regs.p);
828 break;
829 case D3DSPR_SAMPLER:
830 assert(param->mod == NINED3DSPSM_NONE);
831 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
832 assert(!param->rel);
833 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
834 break;
835 case D3DSPR_CONST:
836 assert(!param->rel || IS_VS);
837 if (param->rel)
838 tx->indirect_const_access = TRUE;
839 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
840 if (!param->rel)
841 nine_info_mark_const_f_used(tx->info, param->idx);
842 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
843 }
844 if (!IS_VS && tx->version.major < 2) {
845 /* ps 1.X clamps constants */
846 tmp = tx_scratch(tx);
847 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
848 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
849 src = ureg_src(tmp);
850 }
851 break;
852 case D3DSPR_CONST2:
853 case D3DSPR_CONST3:
854 case D3DSPR_CONST4:
855 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
856 assert(!"CONST2/3/4");
857 src = ureg_imm1f(ureg, 0.0f);
858 break;
859 case D3DSPR_CONSTINT:
860 /* relative adressing only possible for float constants in vs */
861 assert(!param->rel);
862 if (!tx_lconsti(tx, &src, param->idx)) {
863 nine_info_mark_const_i_used(tx->info, param->idx);
864 src = ureg_src_register(TGSI_FILE_CONSTANT,
865 tx->info->const_i_base + param->idx);
866 }
867 break;
868 case D3DSPR_CONSTBOOL:
869 assert(!param->rel);
870 if (!tx_lconstb(tx, &src, param->idx)) {
871 char r = param->idx / 4;
872 char s = param->idx & 3;
873 nine_info_mark_const_b_used(tx->info, param->idx);
874 src = ureg_src_register(TGSI_FILE_CONSTANT,
875 tx->info->const_b_base + r);
876 src = ureg_swizzle(src, s, s, s, s);
877 }
878 break;
879 case D3DSPR_LOOP:
880 if (ureg_dst_is_undef(tx->regs.address))
881 tx->regs.address = ureg_DECL_address(ureg);
882 if (!tx->native_integers)
883 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
884 else
885 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
886 src = ureg_src(tx->regs.address);
887 break;
888 case D3DSPR_MISCTYPE:
889 switch (param->idx) {
890 case D3DSMO_POSITION:
891 if (ureg_src_is_undef(tx->regs.vPos))
892 tx->regs.vPos = ureg_DECL_fs_input(ureg,
893 TGSI_SEMANTIC_POSITION, 0,
894 TGSI_INTERPOLATE_LINEAR);
895 if (tx->shift_wpos) {
896 /* TODO: do this only once */
897 struct ureg_dst wpos = tx_scratch(tx);
898 ureg_SUB(ureg, wpos, tx->regs.vPos,
899 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
900 src = ureg_src(wpos);
901 } else {
902 src = tx->regs.vPos;
903 }
904 break;
905 case D3DSMO_FACE:
906 if (ureg_src_is_undef(tx->regs.vFace)) {
907 tx->regs.vFace = ureg_DECL_fs_input(ureg,
908 TGSI_SEMANTIC_FACE, 0,
909 TGSI_INTERPOLATE_CONSTANT);
910 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
911 }
912 src = tx->regs.vFace;
913 break;
914 default:
915 assert(!"invalid src D3DSMO");
916 break;
917 }
918 assert(!param->rel);
919 break;
920 case D3DSPR_TEMPFLOAT16:
921 break;
922 default:
923 assert(!"invalid src D3DSPR");
924 }
925 if (param->rel)
926 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
927
928 switch (param->mod) {
929 case NINED3DSPSM_DW:
930 tmp = tx_scratch(tx);
931 /* NOTE: app is not allowed to read w with this modifier */
932 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
933 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
934 src = ureg_src(tmp);
935 break;
936 case NINED3DSPSM_DZ:
937 tmp = tx_scratch(tx);
938 /* NOTE: app is not allowed to read z with this modifier */
939 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
940 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
941 src = ureg_src(tmp);
942 break;
943 default:
944 break;
945 }
946
947 if (param->swizzle != NINED3DSP_NOSWIZZLE)
948 src = ureg_swizzle(src,
949 (param->swizzle >> 0) & 0x3,
950 (param->swizzle >> 2) & 0x3,
951 (param->swizzle >> 4) & 0x3,
952 (param->swizzle >> 6) & 0x3);
953
954 switch (param->mod) {
955 case NINED3DSPSM_ABS:
956 src = ureg_abs(src);
957 break;
958 case NINED3DSPSM_ABSNEG:
959 src = ureg_negate(ureg_abs(src));
960 break;
961 case NINED3DSPSM_NEG:
962 src = ureg_negate(src);
963 break;
964 case NINED3DSPSM_BIAS:
965 tmp = tx_scratch(tx);
966 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
967 src = ureg_src(tmp);
968 break;
969 case NINED3DSPSM_BIASNEG:
970 tmp = tx_scratch(tx);
971 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
972 src = ureg_src(tmp);
973 break;
974 case NINED3DSPSM_NOT:
975 if (tx->native_integers) {
976 tmp = tx_scratch(tx);
977 ureg_NOT(ureg, tmp, src);
978 src = ureg_src(tmp);
979 break;
980 }
981 /* fall through */
982 case NINED3DSPSM_COMP:
983 tmp = tx_scratch(tx);
984 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
985 src = ureg_src(tmp);
986 break;
987 case NINED3DSPSM_DZ:
988 case NINED3DSPSM_DW:
989 /* Already handled*/
990 break;
991 case NINED3DSPSM_SIGN:
992 tmp = tx_scratch(tx);
993 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
994 src = ureg_src(tmp);
995 break;
996 case NINED3DSPSM_SIGNNEG:
997 tmp = tx_scratch(tx);
998 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
999 src = ureg_src(tmp);
1000 break;
1001 case NINED3DSPSM_X2:
1002 tmp = tx_scratch(tx);
1003 ureg_ADD(ureg, tmp, src, src);
1004 src = ureg_src(tmp);
1005 break;
1006 case NINED3DSPSM_X2NEG:
1007 tmp = tx_scratch(tx);
1008 ureg_ADD(ureg, tmp, src, src);
1009 src = ureg_negate(ureg_src(tmp));
1010 break;
1011 default:
1012 assert(param->mod == NINED3DSPSM_NONE);
1013 break;
1014 }
1015
1016 return src;
1017 }
1018
1019 static struct ureg_dst
1020 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1021 {
1022 struct ureg_dst dst;
1023
1024 switch (param->file)
1025 {
1026 case D3DSPR_TEMP:
1027 assert(!param->rel);
1028 tx_temp_alloc(tx, param->idx);
1029 dst = tx->regs.r[param->idx];
1030 break;
1031 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1032 case D3DSPR_ADDR:
1033 assert(!param->rel);
1034 if (tx->version.major < 2 && !IS_VS) {
1035 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1036 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1037 dst = tx->regs.tS[param->idx];
1038 } else
1039 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1040 tx_texcoord_alloc(tx, param->idx);
1041 dst = ureg_dst(tx->regs.vT[param->idx]);
1042 } else {
1043 tx_addr_alloc(tx, param->idx);
1044 dst = tx->regs.a0;
1045 }
1046 break;
1047 case D3DSPR_RASTOUT:
1048 assert(!param->rel);
1049 switch (param->idx) {
1050 case 0:
1051 if (ureg_dst_is_undef(tx->regs.oPos))
1052 tx->regs.oPos =
1053 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1054 dst = tx->regs.oPos;
1055 break;
1056 case 1:
1057 if (ureg_dst_is_undef(tx->regs.oFog))
1058 tx->regs.oFog =
1059 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1060 dst = tx->regs.oFog;
1061 break;
1062 case 2:
1063 if (ureg_dst_is_undef(tx->regs.oPts))
1064 tx->regs.oPts =
1065 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1066 dst = tx->regs.oPts;
1067 break;
1068 default:
1069 assert(0);
1070 break;
1071 }
1072 break;
1073 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1074 case D3DSPR_OUTPUT:
1075 if (tx->version.major < 3) {
1076 assert(!param->rel);
1077 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1078 } else {
1079 assert(!param->rel); /* TODO */
1080 assert(param->idx < Elements(tx->regs.o));
1081 dst = tx->regs.o[param->idx];
1082 }
1083 break;
1084 case D3DSPR_ATTROUT: /* VS */
1085 case D3DSPR_COLOROUT: /* PS */
1086 assert(param->idx >= 0 && param->idx < 4);
1087 assert(!param->rel);
1088 tx->info->rt_mask |= 1 << param->idx;
1089 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1090 tx->regs.oCol[param->idx] =
1091 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1092 dst = tx->regs.oCol[param->idx];
1093 if (IS_VS && tx->version.major < 3)
1094 dst = ureg_saturate(dst);
1095 break;
1096 case D3DSPR_DEPTHOUT:
1097 assert(!param->rel);
1098 if (ureg_dst_is_undef(tx->regs.oDepth))
1099 tx->regs.oDepth =
1100 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1101 TGSI_WRITEMASK_Z, 0, 1);
1102 dst = tx->regs.oDepth; /* XXX: must write .z component */
1103 break;
1104 case D3DSPR_PREDICATE:
1105 assert(!param->rel);
1106 tx_pred_alloc(tx, param->idx);
1107 dst = tx->regs.p;
1108 break;
1109 case D3DSPR_TEMPFLOAT16:
1110 DBG("unhandled D3DSPR: %u\n", param->file);
1111 break;
1112 default:
1113 assert(!"invalid dst D3DSPR");
1114 break;
1115 }
1116 if (param->rel)
1117 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1118
1119 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1120 dst = ureg_writemask(dst, param->mask);
1121 if (param->mod & NINED3DSPDM_SATURATE)
1122 dst = ureg_saturate(dst);
1123
1124 return dst;
1125 }
1126
1127 static struct ureg_dst
1128 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1129 {
1130 if (param->shift) {
1131 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1132 return tx->regs.tdst;
1133 }
1134 return _tx_dst_param(tx, param);
1135 }
1136
1137 static void
1138 tx_apply_dst0_modifiers(struct shader_translator *tx)
1139 {
1140 struct ureg_dst rdst;
1141 float f;
1142
1143 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1144 return;
1145 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1146
1147 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1148
1149 if (tx->insn.dst[0].shift < 0)
1150 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1151 else
1152 f = 1 << tx->insn.dst[0].shift;
1153
1154 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1155 }
1156
1157 static struct ureg_src
1158 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1159 {
1160 struct ureg_src src;
1161
1162 assert(!param->shift);
1163 assert(!(param->mod & NINED3DSPDM_SATURATE));
1164
1165 switch (param->file) {
1166 case D3DSPR_INPUT:
1167 if (IS_VS) {
1168 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1169 } else {
1170 assert(!param->rel);
1171 assert(param->idx < Elements(tx->regs.v));
1172 src = tx->regs.v[param->idx];
1173 }
1174 break;
1175 default:
1176 src = ureg_src(tx_dst_param(tx, param));
1177 break;
1178 }
1179 if (param->rel)
1180 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1181
1182 if (!param->mask)
1183 WARN("mask is 0, using identity swizzle\n");
1184
1185 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1186 char s[4];
1187 int n;
1188 int c;
1189 for (n = 0, c = 0; c < 4; ++c)
1190 if (param->mask & (1 << c))
1191 s[n++] = c;
1192 assert(n);
1193 for (c = n; c < 4; ++c)
1194 s[c] = s[n - 1];
1195 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1196 }
1197 return src;
1198 }
1199
1200 static HRESULT
1201 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1202 {
1203 struct ureg_program *ureg = tx->ureg;
1204 struct ureg_dst dst;
1205 struct ureg_src src[2];
1206 struct sm1_src_param *src_mat = &tx->insn.src[1];
1207 unsigned i;
1208
1209 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1210 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1211
1212 for (i = 0; i < n; i++)
1213 {
1214 const unsigned m = (1 << i);
1215
1216 src[1] = tx_src_param(tx, src_mat);
1217 src_mat->idx++;
1218
1219 if (!(dst.WriteMask & m))
1220 continue;
1221
1222 /* XXX: src == dst case ? */
1223
1224 switch (k) {
1225 case 3:
1226 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1227 break;
1228 case 4:
1229 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1230 break;
1231 default:
1232 DBG("invalid operation: M%ux%u\n", m, n);
1233 break;
1234 }
1235 }
1236
1237 return D3D_OK;
1238 }
1239
1240 #define VNOTSUPPORTED 0, 0
1241 #define V(maj, min) (((maj) << 8) | (min))
1242
1243 static inline const char *
1244 d3dsio_to_string( unsigned opcode )
1245 {
1246 static const char *names[] = {
1247 "NOP",
1248 "MOV",
1249 "ADD",
1250 "SUB",
1251 "MAD",
1252 "MUL",
1253 "RCP",
1254 "RSQ",
1255 "DP3",
1256 "DP4",
1257 "MIN",
1258 "MAX",
1259 "SLT",
1260 "SGE",
1261 "EXP",
1262 "LOG",
1263 "LIT",
1264 "DST",
1265 "LRP",
1266 "FRC",
1267 "M4x4",
1268 "M4x3",
1269 "M3x4",
1270 "M3x3",
1271 "M3x2",
1272 "CALL",
1273 "CALLNZ",
1274 "LOOP",
1275 "RET",
1276 "ENDLOOP",
1277 "LABEL",
1278 "DCL",
1279 "POW",
1280 "CRS",
1281 "SGN",
1282 "ABS",
1283 "NRM",
1284 "SINCOS",
1285 "REP",
1286 "ENDREP",
1287 "IF",
1288 "IFC",
1289 "ELSE",
1290 "ENDIF",
1291 "BREAK",
1292 "BREAKC",
1293 "MOVA",
1294 "DEFB",
1295 "DEFI",
1296 NULL,
1297 NULL,
1298 NULL,
1299 NULL,
1300 NULL,
1301 NULL,
1302 NULL,
1303 NULL,
1304 NULL,
1305 NULL,
1306 NULL,
1307 NULL,
1308 NULL,
1309 NULL,
1310 NULL,
1311 "TEXCOORD",
1312 "TEXKILL",
1313 "TEX",
1314 "TEXBEM",
1315 "TEXBEML",
1316 "TEXREG2AR",
1317 "TEXREG2GB",
1318 "TEXM3x2PAD",
1319 "TEXM3x2TEX",
1320 "TEXM3x3PAD",
1321 "TEXM3x3TEX",
1322 NULL,
1323 "TEXM3x3SPEC",
1324 "TEXM3x3VSPEC",
1325 "EXPP",
1326 "LOGP",
1327 "CND",
1328 "DEF",
1329 "TEXREG2RGB",
1330 "TEXDP3TEX",
1331 "TEXM3x2DEPTH",
1332 "TEXDP3",
1333 "TEXM3x3",
1334 "TEXDEPTH",
1335 "CMP",
1336 "BEM",
1337 "DP2ADD",
1338 "DSX",
1339 "DSY",
1340 "TEXLDD",
1341 "SETP",
1342 "TEXLDL",
1343 "BREAKP"
1344 };
1345
1346 if (opcode < Elements(names)) return names[opcode];
1347
1348 switch (opcode) {
1349 case D3DSIO_PHASE: return "PHASE";
1350 case D3DSIO_COMMENT: return "COMMENT";
1351 case D3DSIO_END: return "END";
1352 default:
1353 return NULL;
1354 }
1355 }
1356
1357 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1358 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1359 (inst).vert_version.max | \
1360 (inst).frag_version.min | \
1361 (inst).frag_version.max)
1362
1363 #define SPECIAL(name) \
1364 NineTranslateInstruction_##name
1365
1366 #define DECL_SPECIAL(name) \
1367 static HRESULT \
1368 NineTranslateInstruction_##name( struct shader_translator *tx )
1369
1370 static HRESULT
1371 NineTranslateInstruction_Generic(struct shader_translator *);
1372
1373 DECL_SPECIAL(M4x4)
1374 {
1375 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1376 }
1377
1378 DECL_SPECIAL(M4x3)
1379 {
1380 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1381 }
1382
1383 DECL_SPECIAL(M3x4)
1384 {
1385 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1386 }
1387
1388 DECL_SPECIAL(M3x3)
1389 {
1390 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1391 }
1392
1393 DECL_SPECIAL(M3x2)
1394 {
1395 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1396 }
1397
1398 DECL_SPECIAL(CMP)
1399 {
1400 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1401 tx_src_param(tx, &tx->insn.src[0]),
1402 tx_src_param(tx, &tx->insn.src[2]),
1403 tx_src_param(tx, &tx->insn.src[1]));
1404 return D3D_OK;
1405 }
1406
1407 DECL_SPECIAL(CND)
1408 {
1409 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1410 struct ureg_dst cgt;
1411 struct ureg_src cnd;
1412
1413 /* the coissue flag was a tip for compilers to advise to
1414 * execute two operations at the same time, in cases
1415 * the two executions had same dst with different channels.
1416 * It has no effect on current hw. However it seems CND
1417 * is affected. The handling of this very specific case
1418 * handled below mimick wine behaviour */
1419 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1420 ureg_MOV(tx->ureg,
1421 dst, tx_src_param(tx, &tx->insn.src[1]));
1422 return D3D_OK;
1423 }
1424
1425 cnd = tx_src_param(tx, &tx->insn.src[0]);
1426 cgt = tx_scratch(tx);
1427
1428 if (tx->version.major == 1 && tx->version.minor < 4)
1429 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1430
1431 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1432
1433 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1434 tx_src_param(tx, &tx->insn.src[1]),
1435 tx_src_param(tx, &tx->insn.src[2]));
1436 return D3D_OK;
1437 }
1438
1439 DECL_SPECIAL(CALL)
1440 {
1441 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1442 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1443 return D3D_OK;
1444 }
1445
1446 DECL_SPECIAL(CALLNZ)
1447 {
1448 struct ureg_program *ureg = tx->ureg;
1449 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1450
1451 if (!tx->native_integers)
1452 ureg_IF(ureg, src, tx_cond(tx));
1453 else
1454 ureg_UIF(ureg, src, tx_cond(tx));
1455 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1456 tx_endcond(tx);
1457 ureg_ENDIF(ureg);
1458 return D3D_OK;
1459 }
1460
1461 DECL_SPECIAL(MOV_vs1x)
1462 {
1463 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1464 /* Implementation note: We don't write directly
1465 * to the addr register, but to an intermediate
1466 * float register.
1467 * Contrary to the doc, when writing to ADDR here,
1468 * the rounding is not to nearest, but to lowest
1469 * (wine test).
1470 * Since we use ARR next, substract 0.5. */
1471 ureg_SUB(tx->ureg,
1472 tx_dst_param(tx, &tx->insn.dst[0]),
1473 tx_src_param(tx, &tx->insn.src[0]),
1474 ureg_imm1f(tx->ureg, 0.5f));
1475 return D3D_OK;
1476 }
1477 return NineTranslateInstruction_Generic(tx);
1478 }
1479
1480 DECL_SPECIAL(LOOP)
1481 {
1482 struct ureg_program *ureg = tx->ureg;
1483 unsigned *label;
1484 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1485 struct ureg_dst ctr;
1486 struct ureg_dst tmp;
1487 struct ureg_src ctrx;
1488
1489 label = tx_bgnloop(tx);
1490 ctr = tx_get_loopctr(tx, TRUE);
1491 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1492
1493 /* src: num_iterations - start_value of al - step for al - 0 */
1494 ureg_MOV(ureg, ctr, src);
1495 ureg_BGNLOOP(tx->ureg, label);
1496 tmp = tx_scratch_scalar(tx);
1497 /* Initially ctr.x contains the number of iterations.
1498 * ctr.y will contain the updated value of al.
1499 * We decrease ctr.x at the end of every iteration,
1500 * and stop when it reaches 0. */
1501
1502 if (!tx->native_integers) {
1503 /* case src and ctr contain floats */
1504 /* to avoid precision issue, we stop when ctr <= 0.5 */
1505 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1506 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1507 } else {
1508 /* case src and ctr contain integers */
1509 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1510 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1511 }
1512 ureg_BRK(ureg);
1513 tx_endcond(tx);
1514 ureg_ENDIF(ureg);
1515 return D3D_OK;
1516 }
1517
1518 DECL_SPECIAL(RET)
1519 {
1520 ureg_RET(tx->ureg);
1521 return D3D_OK;
1522 }
1523
1524 DECL_SPECIAL(ENDLOOP)
1525 {
1526 struct ureg_program *ureg = tx->ureg;
1527 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1528 struct ureg_dst dst_ctrx, dst_al;
1529 struct ureg_src src_ctr, al_counter;
1530
1531 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1532 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1533 src_ctr = ureg_src(ctr);
1534 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1535
1536 /* ctr.x -= 1
1537 * ctr.y (aL) += step */
1538 if (!tx->native_integers) {
1539 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1540 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1541 } else {
1542 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1543 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1544 }
1545 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1546 return D3D_OK;
1547 }
1548
1549 DECL_SPECIAL(LABEL)
1550 {
1551 unsigned k = tx->num_inst_labels;
1552 unsigned n = tx->insn.src[0].idx;
1553 assert(n < 2048);
1554 if (n >= k)
1555 tx->inst_labels = REALLOC(tx->inst_labels,
1556 k * sizeof(tx->inst_labels[0]),
1557 n * sizeof(tx->inst_labels[0]));
1558
1559 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1560 return D3D_OK;
1561 }
1562
1563 DECL_SPECIAL(SINCOS)
1564 {
1565 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1566 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1567
1568 assert(!(dst.WriteMask & 0xc));
1569
1570 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1571 ureg_SCS(tx->ureg, dst, src);
1572 return D3D_OK;
1573 }
1574
1575 DECL_SPECIAL(SGN)
1576 {
1577 ureg_SSG(tx->ureg,
1578 tx_dst_param(tx, &tx->insn.dst[0]),
1579 tx_src_param(tx, &tx->insn.src[0]));
1580 return D3D_OK;
1581 }
1582
1583 DECL_SPECIAL(REP)
1584 {
1585 struct ureg_program *ureg = tx->ureg;
1586 unsigned *label;
1587 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1588 struct ureg_dst ctr;
1589 struct ureg_dst tmp;
1590 struct ureg_src ctrx;
1591
1592 label = tx_bgnloop(tx);
1593 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1594 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1595
1596 /* NOTE: rep must be constant, so we don't have to save the count */
1597 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1598
1599 /* rep: num_iterations - 0 - 0 - 0 */
1600 ureg_MOV(ureg, ctr, rep);
1601 ureg_BGNLOOP(ureg, label);
1602 tmp = tx_scratch_scalar(tx);
1603 /* Initially ctr.x contains the number of iterations.
1604 * We decrease ctr.x at the end of every iteration,
1605 * and stop when it reaches 0. */
1606
1607 if (!tx->native_integers) {
1608 /* case src and ctr contain floats */
1609 /* to avoid precision issue, we stop when ctr <= 0.5 */
1610 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1611 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1612 } else {
1613 /* case src and ctr contain integers */
1614 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1615 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1616 }
1617 ureg_BRK(ureg);
1618 tx_endcond(tx);
1619 ureg_ENDIF(ureg);
1620
1621 return D3D_OK;
1622 }
1623
1624 DECL_SPECIAL(ENDREP)
1625 {
1626 struct ureg_program *ureg = tx->ureg;
1627 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1628 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1629 struct ureg_src src_ctr = ureg_src(ctr);
1630
1631 /* ctr.x -= 1 */
1632 if (!tx->native_integers)
1633 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1634 else
1635 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1636
1637 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1638 return D3D_OK;
1639 }
1640
1641 DECL_SPECIAL(ENDIF)
1642 {
1643 tx_endcond(tx);
1644 ureg_ENDIF(tx->ureg);
1645 return D3D_OK;
1646 }
1647
1648 DECL_SPECIAL(IF)
1649 {
1650 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1651
1652 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1653 ureg_UIF(tx->ureg, src, tx_cond(tx));
1654 else
1655 ureg_IF(tx->ureg, src, tx_cond(tx));
1656
1657 return D3D_OK;
1658 }
1659
1660 static inline unsigned
1661 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1662 {
1663 switch (flags) {
1664 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1665 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1666 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1667 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1668 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1669 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1670 default:
1671 assert(!"invalid comparison flags");
1672 return TGSI_OPCODE_SGT;
1673 }
1674 }
1675
1676 DECL_SPECIAL(IFC)
1677 {
1678 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1679 struct ureg_src src[2];
1680 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1681 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1682 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1683 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1684 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1685 return D3D_OK;
1686 }
1687
1688 DECL_SPECIAL(ELSE)
1689 {
1690 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1691 return D3D_OK;
1692 }
1693
1694 DECL_SPECIAL(BREAKC)
1695 {
1696 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1697 struct ureg_src src[2];
1698 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1699 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1700 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1701 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1702 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1703 ureg_BRK(tx->ureg);
1704 tx_endcond(tx);
1705 ureg_ENDIF(tx->ureg);
1706 return D3D_OK;
1707 }
1708
1709 static const char *sm1_declusage_names[] =
1710 {
1711 [D3DDECLUSAGE_POSITION] = "POSITION",
1712 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1713 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1714 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1715 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1716 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1717 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1718 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1719 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1720 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1721 [D3DDECLUSAGE_COLOR] = "COLOR",
1722 [D3DDECLUSAGE_FOG] = "FOG",
1723 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1724 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1725 };
1726
1727 static inline unsigned
1728 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1729 {
1730 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1731 }
1732
1733 static void
1734 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1735 boolean tc,
1736 struct sm1_semantic *dcl)
1737 {
1738 BYTE index = dcl->usage_idx;
1739
1740 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1741 * we match to a TGSI_SEMANTIC_GENERIC with index.
1742 *
1743 * The index can be anything UINT16 and usage_idx is BYTE,
1744 * so we can fit everything. It doesn't matter if indices
1745 * are close together or low.
1746 *
1747 *
1748 * POSITION >= 1: 10 * index + 6
1749 * COLOR >= 2: 10 * (index-1) + 7
1750 * TEXCOORD[0..15]: index
1751 * BLENDWEIGHT: 10 * index + 18
1752 * BLENDINDICES: 10 * index + 19
1753 * NORMAL: 10 * index + 20
1754 * TANGENT: 10 * index + 21
1755 * BINORMAL: 10 * index + 22
1756 * TESSFACTOR: 10 * index + 23
1757 */
1758
1759 switch (dcl->usage) {
1760 case D3DDECLUSAGE_POSITION:
1761 case D3DDECLUSAGE_POSITIONT:
1762 case D3DDECLUSAGE_DEPTH:
1763 if (index == 0) {
1764 sem->Name = TGSI_SEMANTIC_POSITION;
1765 sem->Index = 0;
1766 } else {
1767 sem->Name = TGSI_SEMANTIC_GENERIC;
1768 sem->Index = 10 * index + 6;
1769 }
1770 break;
1771 case D3DDECLUSAGE_COLOR:
1772 if (index < 2) {
1773 sem->Name = TGSI_SEMANTIC_COLOR;
1774 sem->Index = index;
1775 } else {
1776 sem->Name = TGSI_SEMANTIC_GENERIC;
1777 sem->Index = 10 * (index-1) + 7;
1778 }
1779 break;
1780 case D3DDECLUSAGE_FOG:
1781 assert(index == 0);
1782 sem->Name = TGSI_SEMANTIC_FOG;
1783 sem->Index = 0;
1784 break;
1785 case D3DDECLUSAGE_PSIZE:
1786 assert(index == 0);
1787 sem->Name = TGSI_SEMANTIC_PSIZE;
1788 sem->Index = 0;
1789 break;
1790 case D3DDECLUSAGE_TEXCOORD:
1791 assert(index < 16);
1792 if (index < 8 && tc)
1793 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1794 else
1795 sem->Name = TGSI_SEMANTIC_GENERIC;
1796 sem->Index = index;
1797 break;
1798 case D3DDECLUSAGE_BLENDWEIGHT:
1799 sem->Name = TGSI_SEMANTIC_GENERIC;
1800 sem->Index = 10 * index + 18;
1801 break;
1802 case D3DDECLUSAGE_BLENDINDICES:
1803 sem->Name = TGSI_SEMANTIC_GENERIC;
1804 sem->Index = 10 * index + 19;
1805 break;
1806 case D3DDECLUSAGE_NORMAL:
1807 sem->Name = TGSI_SEMANTIC_GENERIC;
1808 sem->Index = 10 * index + 20;
1809 break;
1810 case D3DDECLUSAGE_TANGENT:
1811 sem->Name = TGSI_SEMANTIC_GENERIC;
1812 sem->Index = 10 * index + 21;
1813 break;
1814 case D3DDECLUSAGE_BINORMAL:
1815 sem->Name = TGSI_SEMANTIC_GENERIC;
1816 sem->Index = 10 * index + 22;
1817 break;
1818 case D3DDECLUSAGE_TESSFACTOR:
1819 sem->Name = TGSI_SEMANTIC_GENERIC;
1820 sem->Index = 10 * index + 23;
1821 break;
1822 case D3DDECLUSAGE_SAMPLE:
1823 sem->Name = TGSI_SEMANTIC_COUNT;
1824 sem->Index = 0;
1825 break;
1826 default:
1827 assert(!"Invalid DECLUSAGE.");
1828 break;
1829 }
1830 }
1831
1832 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1833 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1834 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1835 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1836 static inline unsigned
1837 d3dstt_to_tgsi_tex(BYTE sampler_type)
1838 {
1839 switch (sampler_type) {
1840 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1841 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1842 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1843 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1844 default:
1845 assert(0);
1846 return TGSI_TEXTURE_UNKNOWN;
1847 }
1848 }
1849 static inline unsigned
1850 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1851 {
1852 switch (sampler_type) {
1853 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1854 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1855 case NINED3DSTT_VOLUME:
1856 case NINED3DSTT_CUBE:
1857 default:
1858 assert(0);
1859 return TGSI_TEXTURE_UNKNOWN;
1860 }
1861 }
1862 static inline unsigned
1863 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1864 {
1865 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1866 case 1: return TGSI_TEXTURE_1D;
1867 case 0: return TGSI_TEXTURE_2D;
1868 case 3: return TGSI_TEXTURE_3D;
1869 default:
1870 return TGSI_TEXTURE_CUBE;
1871 }
1872 }
1873
1874 static const char *
1875 sm1_sampler_type_name(BYTE sampler_type)
1876 {
1877 switch (sampler_type) {
1878 case NINED3DSTT_1D: return "1D";
1879 case NINED3DSTT_2D: return "2D";
1880 case NINED3DSTT_VOLUME: return "VOLUME";
1881 case NINED3DSTT_CUBE: return "CUBE";
1882 default:
1883 return "(D3DSTT_?)";
1884 }
1885 }
1886
1887 static inline unsigned
1888 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1889 {
1890 switch (sem->Name) {
1891 case TGSI_SEMANTIC_POSITION:
1892 case TGSI_SEMANTIC_NORMAL:
1893 return TGSI_INTERPOLATE_LINEAR;
1894 case TGSI_SEMANTIC_BCOLOR:
1895 case TGSI_SEMANTIC_COLOR:
1896 case TGSI_SEMANTIC_FOG:
1897 case TGSI_SEMANTIC_GENERIC:
1898 case TGSI_SEMANTIC_TEXCOORD:
1899 case TGSI_SEMANTIC_CLIPDIST:
1900 case TGSI_SEMANTIC_CLIPVERTEX:
1901 return TGSI_INTERPOLATE_PERSPECTIVE;
1902 case TGSI_SEMANTIC_EDGEFLAG:
1903 case TGSI_SEMANTIC_FACE:
1904 case TGSI_SEMANTIC_INSTANCEID:
1905 case TGSI_SEMANTIC_PCOORD:
1906 case TGSI_SEMANTIC_PRIMID:
1907 case TGSI_SEMANTIC_PSIZE:
1908 case TGSI_SEMANTIC_VERTEXID:
1909 return TGSI_INTERPOLATE_CONSTANT;
1910 default:
1911 assert(0);
1912 return TGSI_INTERPOLATE_CONSTANT;
1913 }
1914 }
1915
1916 DECL_SPECIAL(DCL)
1917 {
1918 struct ureg_program *ureg = tx->ureg;
1919 boolean is_input;
1920 boolean is_sampler;
1921 struct tgsi_declaration_semantic tgsi;
1922 struct sm1_semantic sem;
1923 sm1_read_semantic(tx, &sem);
1924
1925 is_input = sem.reg.file == D3DSPR_INPUT;
1926 is_sampler =
1927 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1928
1929 DUMP("DCL ");
1930 sm1_dump_dst_param(&sem.reg);
1931 if (is_sampler)
1932 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1933 else
1934 if (tx->version.major >= 3)
1935 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1936 else
1937 if (sem.usage | sem.usage_idx)
1938 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1939 else
1940 DUMP("\n");
1941
1942 if (is_sampler) {
1943 const unsigned m = 1 << sem.reg.idx;
1944 ureg_DECL_sampler(ureg, sem.reg.idx);
1945 tx->info->sampler_mask |= m;
1946 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1947 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1948 d3dstt_to_tgsi_tex(sem.sampler_type);
1949 return D3D_OK;
1950 }
1951
1952 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1953 if (IS_VS) {
1954 if (is_input) {
1955 /* linkage outside of shader with vertex declaration */
1956 ureg_DECL_vs_input(ureg, sem.reg.idx);
1957 assert(sem.reg.idx < Elements(tx->info->input_map));
1958 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1959 tx->info->num_inputs = sem.reg.idx + 1;
1960 /* NOTE: preserving order in case of indirect access */
1961 } else
1962 if (tx->version.major >= 3) {
1963 /* SM2 output semantic determined by file */
1964 assert(sem.reg.mask != 0);
1965 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1966 tx->info->position_t = TRUE;
1967 assert(sem.reg.idx < Elements(tx->regs.o));
1968 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1969 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
1970
1971 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1972 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1973 }
1974 } else {
1975 if (is_input && tx->version.major >= 3) {
1976 /* SM3 only, SM2 input semantic determined by file */
1977 assert(sem.reg.idx < Elements(tx->regs.v));
1978 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1979 ureg, tgsi.Name, tgsi.Index,
1980 nine_tgsi_to_interp_mode(&tgsi),
1981 0, /* cylwrap */
1982 sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
1983 } else
1984 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1985 /* FragColor or FragDepth */
1986 assert(sem.reg.mask != 0);
1987 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
1988 0, 1);
1989 }
1990 }
1991 return D3D_OK;
1992 }
1993
1994 DECL_SPECIAL(DEF)
1995 {
1996 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1997 return D3D_OK;
1998 }
1999
2000 DECL_SPECIAL(DEFB)
2001 {
2002 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2003 return D3D_OK;
2004 }
2005
2006 DECL_SPECIAL(DEFI)
2007 {
2008 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2009 return D3D_OK;
2010 }
2011
2012 DECL_SPECIAL(POW)
2013 {
2014 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2015 struct ureg_src src[2] = {
2016 tx_src_param(tx, &tx->insn.src[0]),
2017 tx_src_param(tx, &tx->insn.src[1])
2018 };
2019 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2020 return D3D_OK;
2021 }
2022
2023 DECL_SPECIAL(RSQ)
2024 {
2025 struct ureg_program *ureg = tx->ureg;
2026 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2027 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2028 struct ureg_dst tmp = tx_scratch(tx);
2029 ureg_RSQ(ureg, tmp, ureg_abs(src));
2030 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2031 return D3D_OK;
2032 }
2033
2034 DECL_SPECIAL(LOG)
2035 {
2036 struct ureg_program *ureg = tx->ureg;
2037 struct ureg_dst tmp = tx_scratch_scalar(tx);
2038 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2039 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2040 ureg_LG2(ureg, tmp, ureg_abs(src));
2041 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2042 return D3D_OK;
2043 }
2044
2045 DECL_SPECIAL(LIT)
2046 {
2047 struct ureg_program *ureg = tx->ureg;
2048 struct ureg_dst tmp = tx_scratch(tx);
2049 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2050 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2051 ureg_LIT(ureg, tmp, src);
2052 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2053 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2054 * it 0^0 if src.w=0, which value is driver dependent. */
2055 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2056 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2057 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2058 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2059 return D3D_OK;
2060 }
2061
2062 DECL_SPECIAL(NRM)
2063 {
2064 struct ureg_program *ureg = tx->ureg;
2065 struct ureg_dst tmp = tx_scratch_scalar(tx);
2066 struct ureg_src nrm = tx_src_scalar(tmp);
2067 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2068 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2069 ureg_DP3(ureg, tmp, src, src);
2070 ureg_RSQ(ureg, tmp, nrm);
2071 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2072 ureg_MUL(ureg, dst, src, nrm);
2073 return D3D_OK;
2074 }
2075
2076 DECL_SPECIAL(DP2ADD)
2077 {
2078 struct ureg_dst tmp = tx_scratch_scalar(tx);
2079 struct ureg_src dp2 = tx_src_scalar(tmp);
2080 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2081 struct ureg_src src[3];
2082 int i;
2083 for (i = 0; i < 3; ++i)
2084 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2085 assert_replicate_swizzle(&src[2]);
2086
2087 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2088 ureg_ADD(tx->ureg, dst, src[2], dp2);
2089
2090 return D3D_OK;
2091 }
2092
2093 DECL_SPECIAL(TEXCOORD)
2094 {
2095 struct ureg_program *ureg = tx->ureg;
2096 const unsigned s = tx->insn.dst[0].idx;
2097 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2098
2099 tx_texcoord_alloc(tx, s);
2100 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2101 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2102
2103 return D3D_OK;
2104 }
2105
2106 DECL_SPECIAL(TEXCOORD_ps14)
2107 {
2108 struct ureg_program *ureg = tx->ureg;
2109 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2110 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2111
2112 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2113
2114 ureg_MOV(ureg, dst, src);
2115
2116 return D3D_OK;
2117 }
2118
2119 DECL_SPECIAL(TEXKILL)
2120 {
2121 struct ureg_src reg;
2122
2123 if (tx->version.major > 1 || tx->version.minor > 3) {
2124 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2125 } else {
2126 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2127 reg = tx->regs.vT[tx->insn.dst[0].idx];
2128 }
2129 if (tx->version.major < 2)
2130 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2131 ureg_KILL_IF(tx->ureg, reg);
2132
2133 return D3D_OK;
2134 }
2135
2136 DECL_SPECIAL(TEXBEM)
2137 {
2138 STUB(D3DERR_INVALIDCALL);
2139 }
2140
2141 DECL_SPECIAL(TEXBEML)
2142 {
2143 STUB(D3DERR_INVALIDCALL);
2144 }
2145
2146 DECL_SPECIAL(TEXREG2AR)
2147 {
2148 struct ureg_program *ureg = tx->ureg;
2149 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2150 struct ureg_src sample;
2151 const int m = tx->insn.dst[0].idx;
2152 const int n = tx->insn.src[0].idx;
2153 assert(m >= 0 && m > n);
2154
2155 sample = ureg_DECL_sampler(ureg, m);
2156 tx->info->sampler_mask |= 1 << m;
2157 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2158
2159 return D3D_OK;
2160 }
2161
2162 DECL_SPECIAL(TEXREG2GB)
2163 {
2164 struct ureg_program *ureg = tx->ureg;
2165 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2166 struct ureg_src sample;
2167 const int m = tx->insn.dst[0].idx;
2168 const int n = tx->insn.src[0].idx;
2169 assert(m >= 0 && m > n);
2170
2171 sample = ureg_DECL_sampler(ureg, m);
2172 tx->info->sampler_mask |= 1 << m;
2173 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2174
2175 return D3D_OK;
2176 }
2177
2178 DECL_SPECIAL(TEXM3x2PAD)
2179 {
2180 return D3D_OK; /* this is just padding */
2181 }
2182
2183 DECL_SPECIAL(TEXM3x2TEX)
2184 {
2185 struct ureg_program *ureg = tx->ureg;
2186 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2187 struct ureg_src sample;
2188 const int m = tx->insn.dst[0].idx - 1;
2189 const int n = tx->insn.src[0].idx;
2190 assert(m >= 0 && m > n);
2191
2192 tx_texcoord_alloc(tx, m);
2193 tx_texcoord_alloc(tx, m+1);
2194
2195 /* performs the matrix multiplication */
2196 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2197 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2198
2199 sample = ureg_DECL_sampler(ureg, m + 1);
2200 tx->info->sampler_mask |= 1 << (m + 1);
2201 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2202
2203 return D3D_OK;
2204 }
2205
2206 DECL_SPECIAL(TEXM3x3PAD)
2207 {
2208 return D3D_OK; /* this is just padding */
2209 }
2210
2211 DECL_SPECIAL(TEXM3x3SPEC)
2212 {
2213 struct ureg_program *ureg = tx->ureg;
2214 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2215 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2216 struct ureg_src sample;
2217 struct ureg_dst tmp;
2218 const int m = tx->insn.dst[0].idx - 2;
2219 const int n = tx->insn.src[0].idx;
2220 assert(m >= 0 && m > n);
2221
2222 tx_texcoord_alloc(tx, m);
2223 tx_texcoord_alloc(tx, m+1);
2224 tx_texcoord_alloc(tx, m+2);
2225
2226 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2227 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2228 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2229
2230 sample = ureg_DECL_sampler(ureg, m + 2);
2231 tx->info->sampler_mask |= 1 << (m + 2);
2232 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2233
2234 /* At this step, dst = N = (u', w', z').
2235 * We want dst to be the texture sampled at (u'', w'', z''), with
2236 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2237 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2238 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2239 /* at this step tmp.x = 1/N.N */
2240 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2241 /* at this step tmp.y = N.E */
2242 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2243 /* at this step tmp.x = N.E/N.N */
2244 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2245 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2246 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2247 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2248 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2249
2250 return D3D_OK;
2251 }
2252
2253 DECL_SPECIAL(TEXREG2RGB)
2254 {
2255 struct ureg_program *ureg = tx->ureg;
2256 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2257 struct ureg_src sample;
2258 const int m = tx->insn.dst[0].idx;
2259 const int n = tx->insn.src[0].idx;
2260 assert(m >= 0 && m > n);
2261
2262 sample = ureg_DECL_sampler(ureg, m);
2263 tx->info->sampler_mask |= 1 << m;
2264 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2265
2266 return D3D_OK;
2267 }
2268
2269 DECL_SPECIAL(TEXDP3TEX)
2270 {
2271 struct ureg_program *ureg = tx->ureg;
2272 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2273 struct ureg_dst tmp;
2274 struct ureg_src sample;
2275 const int m = tx->insn.dst[0].idx;
2276 const int n = tx->insn.src[0].idx;
2277 assert(m >= 0 && m > n);
2278
2279 tx_texcoord_alloc(tx, m);
2280
2281 tmp = tx_scratch(tx);
2282 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2283 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2284
2285 sample = ureg_DECL_sampler(ureg, m);
2286 tx->info->sampler_mask |= 1 << m;
2287 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2288
2289 return D3D_OK;
2290 }
2291
2292 DECL_SPECIAL(TEXM3x2DEPTH)
2293 {
2294 struct ureg_program *ureg = tx->ureg;
2295 struct ureg_dst tmp;
2296 const int m = tx->insn.dst[0].idx - 1;
2297 const int n = tx->insn.src[0].idx;
2298 assert(m >= 0 && m > n);
2299
2300 tx_texcoord_alloc(tx, m);
2301 tx_texcoord_alloc(tx, m+1);
2302
2303 tmp = tx_scratch(tx);
2304
2305 /* performs the matrix multiplication */
2306 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2307 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2308
2309 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2310 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2311 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2312 /* res = 'w' == 0 ? 1.0 : z/w */
2313 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2314 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2315 /* replace the depth for depth testing with the result */
2316 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2317 TGSI_WRITEMASK_Z, 0, 1);
2318 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2319 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2320 return D3D_OK;
2321 }
2322
2323 DECL_SPECIAL(TEXDP3)
2324 {
2325 struct ureg_program *ureg = tx->ureg;
2326 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2327 const int m = tx->insn.dst[0].idx;
2328 const int n = tx->insn.src[0].idx;
2329 assert(m >= 0 && m > n);
2330
2331 tx_texcoord_alloc(tx, m);
2332
2333 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2334
2335 return D3D_OK;
2336 }
2337
2338 DECL_SPECIAL(TEXM3x3)
2339 {
2340 struct ureg_program *ureg = tx->ureg;
2341 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2342 struct ureg_src sample;
2343 struct ureg_dst E, tmp;
2344 const int m = tx->insn.dst[0].idx - 2;
2345 const int n = tx->insn.src[0].idx;
2346 assert(m >= 0 && m > n);
2347
2348 tx_texcoord_alloc(tx, m);
2349 tx_texcoord_alloc(tx, m+1);
2350 tx_texcoord_alloc(tx, m+2);
2351
2352 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2353 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2354 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2355
2356 switch (tx->insn.opcode) {
2357 case D3DSIO_TEXM3x3:
2358 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2359 break;
2360 case D3DSIO_TEXM3x3TEX:
2361 sample = ureg_DECL_sampler(ureg, m + 2);
2362 tx->info->sampler_mask |= 1 << (m + 2);
2363 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2364 break;
2365 case D3DSIO_TEXM3x3VSPEC:
2366 sample = ureg_DECL_sampler(ureg, m + 2);
2367 tx->info->sampler_mask |= 1 << (m + 2);
2368 E = tx_scratch(tx);
2369 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2370 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2371 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2372 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2373 /* At this step, dst = N = (u', w', z').
2374 * We want dst to be the texture sampled at (u'', w'', z''), with
2375 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2376 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2377 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2378 /* at this step tmp.x = 1/N.N */
2379 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2380 /* at this step tmp.y = N.E */
2381 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2382 /* at this step tmp.x = N.E/N.N */
2383 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2384 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2385 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2386 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2387 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2388 break;
2389 default:
2390 return D3DERR_INVALIDCALL;
2391 }
2392 return D3D_OK;
2393 }
2394
2395 DECL_SPECIAL(TEXDEPTH)
2396 {
2397 struct ureg_program *ureg = tx->ureg;
2398 struct ureg_dst r5;
2399 struct ureg_src r5r, r5g;
2400
2401 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2402
2403 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2404 * r5 won't be used afterward, thus we can use r5.ba */
2405 r5 = tx->regs.r[5];
2406 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2407 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2408
2409 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2410 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2411 /* r5.r = r/g */
2412 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2413 r5r, ureg_imm1f(ureg, 1.0f));
2414 /* replace the depth for depth testing with the result */
2415 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2416 TGSI_WRITEMASK_Z, 0, 1);
2417 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2418
2419 return D3D_OK;
2420 }
2421
2422 DECL_SPECIAL(BEM)
2423 {
2424 STUB(D3DERR_INVALIDCALL);
2425 }
2426
2427 DECL_SPECIAL(TEXLD)
2428 {
2429 struct ureg_program *ureg = tx->ureg;
2430 unsigned target;
2431 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2432 struct ureg_src src[2] = {
2433 tx_src_param(tx, &tx->insn.src[0]),
2434 tx_src_param(tx, &tx->insn.src[1])
2435 };
2436 assert(tx->insn.src[1].idx >= 0 &&
2437 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2438 target = tx->sampler_targets[tx->insn.src[1].idx];
2439
2440 switch (tx->insn.flags) {
2441 case 0:
2442 ureg_TEX(ureg, dst, target, src[0], src[1]);
2443 break;
2444 case NINED3DSI_TEXLD_PROJECT:
2445 ureg_TXP(ureg, dst, target, src[0], src[1]);
2446 break;
2447 case NINED3DSI_TEXLD_BIAS:
2448 ureg_TXB(ureg, dst, target, src[0], src[1]);
2449 break;
2450 default:
2451 assert(0);
2452 return D3DERR_INVALIDCALL;
2453 }
2454 return D3D_OK;
2455 }
2456
2457 DECL_SPECIAL(TEXLD_14)
2458 {
2459 struct ureg_program *ureg = tx->ureg;
2460 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2461 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2462 const unsigned s = tx->insn.dst[0].idx;
2463 const unsigned t = ps1x_sampler_type(tx->info, s);
2464
2465 tx->info->sampler_mask |= 1 << s;
2466 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2467
2468 return D3D_OK;
2469 }
2470
2471 DECL_SPECIAL(TEX)
2472 {
2473 struct ureg_program *ureg = tx->ureg;
2474 const unsigned s = tx->insn.dst[0].idx;
2475 const unsigned t = ps1x_sampler_type(tx->info, s);
2476 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2477 struct ureg_src src[2];
2478
2479 tx_texcoord_alloc(tx, s);
2480
2481 src[0] = tx->regs.vT[s];
2482 src[1] = ureg_DECL_sampler(ureg, s);
2483 tx->info->sampler_mask |= 1 << s;
2484
2485 ureg_TEX(ureg, dst, t, src[0], src[1]);
2486
2487 return D3D_OK;
2488 }
2489
2490 DECL_SPECIAL(TEXLDD)
2491 {
2492 unsigned target;
2493 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2494 struct ureg_src src[4] = {
2495 tx_src_param(tx, &tx->insn.src[0]),
2496 tx_src_param(tx, &tx->insn.src[1]),
2497 tx_src_param(tx, &tx->insn.src[2]),
2498 tx_src_param(tx, &tx->insn.src[3])
2499 };
2500 assert(tx->insn.src[1].idx >= 0 &&
2501 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2502 target = tx->sampler_targets[tx->insn.src[1].idx];
2503
2504 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2505 return D3D_OK;
2506 }
2507
2508 DECL_SPECIAL(TEXLDL)
2509 {
2510 unsigned target;
2511 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2512 struct ureg_src src[2] = {
2513 tx_src_param(tx, &tx->insn.src[0]),
2514 tx_src_param(tx, &tx->insn.src[1])
2515 };
2516 assert(tx->insn.src[1].idx >= 0 &&
2517 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2518 target = tx->sampler_targets[tx->insn.src[1].idx];
2519
2520 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2521 return D3D_OK;
2522 }
2523
2524 DECL_SPECIAL(SETP)
2525 {
2526 STUB(D3DERR_INVALIDCALL);
2527 }
2528
2529 DECL_SPECIAL(BREAKP)
2530 {
2531 STUB(D3DERR_INVALIDCALL);
2532 }
2533
2534 DECL_SPECIAL(PHASE)
2535 {
2536 return D3D_OK; /* we don't care about phase */
2537 }
2538
2539 DECL_SPECIAL(COMMENT)
2540 {
2541 return D3D_OK; /* nothing to do */
2542 }
2543
2544
2545 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2546 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2547
2548 struct sm1_op_info inst_table[] =
2549 {
2550 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2551 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2552 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2553 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2554 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2555 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2556 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2557 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2558 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2559 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2560 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2561 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2562 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2563 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2564 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2565 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2566 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2567 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2568 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2569 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2570 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2571
2572 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2573 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2574 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2575 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2576 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2577
2578 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2579 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2580 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2581 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2582 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2583 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2584
2585 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2586
2587 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2588 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2589 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2590 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2591 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2592
2593 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2594 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2595
2596 /* More flow control */
2597 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2598 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2599 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2600 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2601 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2602 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2603 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2604 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2605 /* we don't write to the address register, but a normal register (copied
2606 * when needed to the address register), thus we don't use ARR */
2607 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2608
2609 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2610 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2611
2612 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2613 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2614 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2615 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2616 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2617 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2618 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2619 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
2620 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2621 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2622 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2623 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2624 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2625 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2626 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2627 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2628
2629 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2630 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2631 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2632 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2633
2634 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2635
2636 /* More tex stuff */
2637 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2638 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2639 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2640 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2641 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2642 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2643
2644 /* Misc */
2645 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2646 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2647 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2648 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2649 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2650 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2651 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2652 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2653 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2654 };
2655
2656 struct sm1_op_info inst_phase =
2657 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2658
2659 struct sm1_op_info inst_comment =
2660 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2661
2662 static void
2663 create_op_info_map(struct shader_translator *tx)
2664 {
2665 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2666 unsigned i;
2667
2668 for (i = 0; i < Elements(tx->op_info_map); ++i)
2669 tx->op_info_map[i] = -1;
2670
2671 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2672 for (i = 0; i < Elements(inst_table); ++i) {
2673 assert(inst_table[i].sio < Elements(tx->op_info_map));
2674 if (inst_table[i].vert_version.min <= version &&
2675 inst_table[i].vert_version.max >= version)
2676 tx->op_info_map[inst_table[i].sio] = i;
2677 }
2678 } else {
2679 for (i = 0; i < Elements(inst_table); ++i) {
2680 assert(inst_table[i].sio < Elements(tx->op_info_map));
2681 if (inst_table[i].frag_version.min <= version &&
2682 inst_table[i].frag_version.max >= version)
2683 tx->op_info_map[inst_table[i].sio] = i;
2684 }
2685 }
2686 }
2687
2688 static inline HRESULT
2689 NineTranslateInstruction_Generic(struct shader_translator *tx)
2690 {
2691 struct ureg_dst dst[1];
2692 struct ureg_src src[4];
2693 unsigned i;
2694
2695 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2696 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2697 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2698 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2699
2700 ureg_insn(tx->ureg, tx->insn.info->opcode,
2701 dst, tx->insn.ndst,
2702 src, tx->insn.nsrc);
2703 return D3D_OK;
2704 }
2705
2706 static inline DWORD
2707 TOKEN_PEEK(struct shader_translator *tx)
2708 {
2709 return *(tx->parse);
2710 }
2711
2712 static inline DWORD
2713 TOKEN_NEXT(struct shader_translator *tx)
2714 {
2715 return *(tx->parse)++;
2716 }
2717
2718 static inline void
2719 TOKEN_JUMP(struct shader_translator *tx)
2720 {
2721 if (tx->parse_next && tx->parse != tx->parse_next) {
2722 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2723 tx->parse = tx->parse_next;
2724 }
2725 }
2726
2727 static inline boolean
2728 sm1_parse_eof(struct shader_translator *tx)
2729 {
2730 return TOKEN_PEEK(tx) == NINED3DSP_END;
2731 }
2732
2733 static void
2734 sm1_read_version(struct shader_translator *tx)
2735 {
2736 const DWORD tok = TOKEN_NEXT(tx);
2737
2738 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2739 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2740
2741 switch (tok >> 16) {
2742 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2743 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2744 default:
2745 DBG("Invalid shader type: %x\n", tok);
2746 tx->processor = ~0;
2747 break;
2748 }
2749 }
2750
2751 /* This is just to check if we parsed the instruction properly. */
2752 static void
2753 sm1_parse_get_skip(struct shader_translator *tx)
2754 {
2755 const DWORD tok = TOKEN_PEEK(tx);
2756
2757 if (tx->version.major >= 2) {
2758 tx->parse_next = tx->parse + 1 /* this */ +
2759 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2760 } else {
2761 tx->parse_next = NULL; /* TODO: determine from param count */
2762 }
2763 }
2764
2765 static void
2766 sm1_print_comment(const char *comment, UINT size)
2767 {
2768 if (!size)
2769 return;
2770 /* TODO */
2771 }
2772
2773 static void
2774 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2775 {
2776 DWORD tok = TOKEN_PEEK(tx);
2777
2778 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2779 {
2780 const char *comment = "";
2781 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2782 tx->parse += size + 1;
2783
2784 if (print)
2785 sm1_print_comment(comment, size);
2786
2787 tok = TOKEN_PEEK(tx);
2788 }
2789 }
2790
2791 static void
2792 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2793 {
2794 *reg = TOKEN_NEXT(tx);
2795
2796 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2797 {
2798 if (tx->version.major < 2)
2799 *rel = (1 << 31) |
2800 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2801 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2802 D3DSP_NOSWIZZLE;
2803 else
2804 *rel = TOKEN_NEXT(tx);
2805 }
2806 }
2807
2808 static void
2809 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2810 {
2811 uint8_t shift;
2812 dst->file =
2813 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2814 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2815 dst->type = TGSI_RETURN_TYPE_FLOAT;
2816 dst->idx = tok & D3DSP_REGNUM_MASK;
2817 dst->rel = NULL;
2818 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2819 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2820 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2821 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2822 }
2823
2824 static void
2825 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2826 {
2827 src->file =
2828 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2829 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2830 src->type = TGSI_RETURN_TYPE_FLOAT;
2831 src->idx = tok & D3DSP_REGNUM_MASK;
2832 src->rel = NULL;
2833 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2834 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2835
2836 switch (src->file) {
2837 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2838 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2839 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2840 default:
2841 break;
2842 }
2843 }
2844
2845 static void
2846 sm1_parse_immediate(struct shader_translator *tx,
2847 struct sm1_src_param *imm)
2848 {
2849 imm->file = NINED3DSPR_IMMEDIATE;
2850 imm->idx = INT_MIN;
2851 imm->rel = NULL;
2852 imm->swizzle = NINED3DSP_NOSWIZZLE;
2853 imm->mod = 0;
2854 switch (tx->insn.opcode) {
2855 case D3DSIO_DEF:
2856 imm->type = NINED3DSPTYPE_FLOAT4;
2857 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2858 tx->parse += 4;
2859 break;
2860 case D3DSIO_DEFI:
2861 imm->type = NINED3DSPTYPE_INT4;
2862 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2863 tx->parse += 4;
2864 break;
2865 case D3DSIO_DEFB:
2866 imm->type = NINED3DSPTYPE_BOOL;
2867 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2868 tx->parse += 1;
2869 break;
2870 default:
2871 assert(0);
2872 break;
2873 }
2874 }
2875
2876 static void
2877 sm1_read_dst_param(struct shader_translator *tx,
2878 struct sm1_dst_param *dst,
2879 struct sm1_src_param *rel)
2880 {
2881 DWORD tok_dst, tok_rel = 0;
2882
2883 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2884 sm1_parse_dst_param(dst, tok_dst);
2885 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2886 sm1_parse_src_param(rel, tok_rel);
2887 dst->rel = rel;
2888 }
2889 }
2890
2891 static void
2892 sm1_read_src_param(struct shader_translator *tx,
2893 struct sm1_src_param *src,
2894 struct sm1_src_param *rel)
2895 {
2896 DWORD tok_src, tok_rel = 0;
2897
2898 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2899 sm1_parse_src_param(src, tok_src);
2900 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2901 assert(rel);
2902 sm1_parse_src_param(rel, tok_rel);
2903 src->rel = rel;
2904 }
2905 }
2906
2907 static void
2908 sm1_read_semantic(struct shader_translator *tx,
2909 struct sm1_semantic *sem)
2910 {
2911 const DWORD tok_usg = TOKEN_NEXT(tx);
2912 const DWORD tok_dst = TOKEN_NEXT(tx);
2913
2914 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2915 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2916 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2917
2918 sm1_parse_dst_param(&sem->reg, tok_dst);
2919 }
2920
2921 static void
2922 sm1_parse_instruction(struct shader_translator *tx)
2923 {
2924 struct sm1_instruction *insn = &tx->insn;
2925 DWORD tok;
2926 struct sm1_op_info *info = NULL;
2927 unsigned i;
2928
2929 sm1_parse_comments(tx, TRUE);
2930 sm1_parse_get_skip(tx);
2931
2932 tok = TOKEN_NEXT(tx);
2933
2934 insn->opcode = tok & D3DSI_OPCODE_MASK;
2935 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2936 insn->coissue = !!(tok & D3DSI_COISSUE);
2937 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2938
2939 if (insn->opcode < Elements(tx->op_info_map)) {
2940 int k = tx->op_info_map[insn->opcode];
2941 if (k >= 0) {
2942 assert(k < Elements(inst_table));
2943 info = &inst_table[k];
2944 }
2945 } else {
2946 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2947 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2948 }
2949 if (!info) {
2950 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2951 TOKEN_JUMP(tx);
2952 return;
2953 }
2954 insn->info = info;
2955 insn->ndst = info->ndst;
2956 insn->nsrc = info->nsrc;
2957
2958 assert(!insn->predicated && "TODO: predicated instructions");
2959
2960 /* check version */
2961 {
2962 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2963 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2964 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2965 if (ver < min || ver > max) {
2966 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2967 min, ver, max);
2968 return;
2969 }
2970 }
2971
2972 for (i = 0; i < insn->ndst; ++i)
2973 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2974 if (insn->predicated)
2975 sm1_read_src_param(tx, &insn->pred, NULL);
2976 for (i = 0; i < insn->nsrc; ++i)
2977 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2978
2979 /* parse here so we can dump them before processing */
2980 if (insn->opcode == D3DSIO_DEF ||
2981 insn->opcode == D3DSIO_DEFI ||
2982 insn->opcode == D3DSIO_DEFB)
2983 sm1_parse_immediate(tx, &tx->insn.src[0]);
2984
2985 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2986 sm1_instruction_check(insn);
2987
2988 if (info->handler)
2989 info->handler(tx);
2990 else
2991 NineTranslateInstruction_Generic(tx);
2992 tx_apply_dst0_modifiers(tx);
2993
2994 tx->num_scratch = 0; /* reset */
2995
2996 TOKEN_JUMP(tx);
2997 }
2998
2999 static void
3000 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3001 {
3002 unsigned i;
3003
3004 tx->info = info;
3005
3006 tx->byte_code = info->byte_code;
3007 tx->parse = info->byte_code;
3008
3009 for (i = 0; i < Elements(info->input_map); ++i)
3010 info->input_map[i] = NINE_DECLUSAGE_NONE;
3011 info->num_inputs = 0;
3012
3013 info->position_t = FALSE;
3014 info->point_size = FALSE;
3015
3016 tx->info->const_float_slots = 0;
3017 tx->info->const_int_slots = 0;
3018 tx->info->const_bool_slots = 0;
3019
3020 info->sampler_mask = 0x0;
3021 info->rt_mask = 0x0;
3022
3023 info->lconstf.data = NULL;
3024 info->lconstf.ranges = NULL;
3025
3026 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3027 tx->regs.rL[i] = ureg_dst_undef();
3028 }
3029 tx->regs.address = ureg_dst_undef();
3030 tx->regs.a0 = ureg_dst_undef();
3031 tx->regs.p = ureg_dst_undef();
3032 tx->regs.oDepth = ureg_dst_undef();
3033 tx->regs.vPos = ureg_src_undef();
3034 tx->regs.vFace = ureg_src_undef();
3035 for (i = 0; i < Elements(tx->regs.o); ++i)
3036 tx->regs.o[i] = ureg_dst_undef();
3037 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3038 tx->regs.oCol[i] = ureg_dst_undef();
3039 for (i = 0; i < Elements(tx->regs.vC); ++i)
3040 tx->regs.vC[i] = ureg_src_undef();
3041 for (i = 0; i < Elements(tx->regs.vT); ++i)
3042 tx->regs.vT[i] = ureg_src_undef();
3043
3044 for (i = 0; i < Elements(tx->lconsti); ++i)
3045 tx->lconsti[i].idx = -1;
3046 for (i = 0; i < Elements(tx->lconstb); ++i)
3047 tx->lconstb[i].idx = -1;
3048
3049 sm1_read_version(tx);
3050
3051 info->version = (tx->version.major << 4) | tx->version.minor;
3052
3053 create_op_info_map(tx);
3054 }
3055
3056 static void
3057 tx_dtor(struct shader_translator *tx)
3058 {
3059 if (tx->num_inst_labels)
3060 FREE(tx->inst_labels);
3061 FREE(tx->lconstf);
3062 FREE(tx->regs.r);
3063 FREE(tx);
3064 }
3065
3066 static inline unsigned
3067 tgsi_processor_from_type(unsigned shader_type)
3068 {
3069 switch (shader_type) {
3070 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3071 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3072 default:
3073 return ~0;
3074 }
3075 }
3076
3077 #define GET_CAP(n) device->screen->get_param( \
3078 device->screen, PIPE_CAP_##n)
3079 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3080 device->screen, info->type, PIPE_SHADER_CAP_##n)
3081
3082 HRESULT
3083 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3084 {
3085 struct shader_translator *tx;
3086 HRESULT hr = D3D_OK;
3087 const unsigned processor = tgsi_processor_from_type(info->type);
3088 unsigned s, slot_max;
3089 unsigned max_const_f;
3090
3091 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3092
3093 tx = CALLOC_STRUCT(shader_translator);
3094 if (!tx)
3095 return E_OUTOFMEMORY;
3096 tx_ctor(tx, info);
3097
3098 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3099 hr = D3DERR_INVALIDCALL;
3100 DBG("Unsupported shader version: %u.%u !\n",
3101 tx->version.major, tx->version.minor);
3102 goto out;
3103 }
3104 if (tx->processor != processor) {
3105 hr = D3DERR_INVALIDCALL;
3106 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3107 goto out;
3108 }
3109 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3110 tx->version.major, tx->version.minor);
3111
3112 tx->ureg = ureg_create(processor);
3113 if (!tx->ureg) {
3114 hr = E_OUTOFMEMORY;
3115 goto out;
3116 }
3117
3118 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3119 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3120 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3121 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3122 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3123 tx->texcoord_sn = tx->want_texcoord ?
3124 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3125
3126 /* VS must always write position. Declare it here to make it the 1st output.
3127 * (Some drivers like nv50 are buggy and rely on that.)
3128 */
3129 if (IS_VS) {
3130 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3131 } else {
3132 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3133 if (!tx->shift_wpos)
3134 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3135 }
3136
3137 while (!sm1_parse_eof(tx) && !tx->failure)
3138 sm1_parse_instruction(tx);
3139 tx->parse++; /* for byte_size */
3140
3141 if (tx->failure) {
3142 ERR("Encountered buggy shader\n");
3143 ureg_destroy(tx->ureg);
3144 hr = D3DERR_INVALIDCALL;
3145 goto out;
3146 }
3147
3148 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
3149 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
3150 ureg_src(tx->regs.r[0]));
3151 info->rt_mask |= 0x1;
3152 }
3153
3154 if (info->position_t)
3155 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3156
3157 ureg_END(tx->ureg);
3158
3159 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3160 info->point_size = TRUE;
3161
3162 /* record local constants */
3163 if (tx->num_lconstf && tx->indirect_const_access) {
3164 struct nine_range *ranges;
3165 float *data;
3166 int *indices;
3167 unsigned i, k, n;
3168
3169 hr = E_OUTOFMEMORY;
3170
3171 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3172 if (!data)
3173 goto out;
3174 info->lconstf.data = data;
3175
3176 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3177 if (!indices)
3178 goto out;
3179
3180 /* lazy sort, num_lconstf should be small */
3181 for (n = 0; n < tx->num_lconstf; ++n) {
3182 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3183 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3184 k = i;
3185 }
3186 indices[n] = tx->lconstf[k].idx;
3187 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3188 tx->lconstf[k].idx = INT_MAX;
3189 }
3190
3191 /* count ranges */
3192 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3193 if (indices[i] != indices[i - 1] + 1)
3194 ++n;
3195 ranges = MALLOC(n * sizeof(ranges[0]));
3196 if (!ranges) {
3197 FREE(indices);
3198 goto out;
3199 }
3200 info->lconstf.ranges = ranges;
3201
3202 k = 0;
3203 ranges[k].bgn = indices[0];
3204 for (i = 1; i < tx->num_lconstf; ++i) {
3205 if (indices[i] != indices[i - 1] + 1) {
3206 ranges[k].next = &ranges[k + 1];
3207 ranges[k].end = indices[i - 1] + 1;
3208 ++k;
3209 ranges[k].bgn = indices[i];
3210 }
3211 }
3212 ranges[k].end = indices[i - 1] + 1;
3213 ranges[k].next = NULL;
3214 assert(n == (k + 1));
3215
3216 FREE(indices);
3217 hr = D3D_OK;
3218 }
3219
3220 /* r500 */
3221 if (info->const_float_slots > device->max_vs_const_f &&
3222 (info->const_int_slots || info->const_bool_slots))
3223 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3224
3225
3226 if (tx->indirect_const_access) /* vs only */
3227 info->const_float_slots = device->max_vs_const_f;
3228
3229 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3230 slot_max = info->const_bool_slots > 0 ?
3231 max_const_f + NINE_MAX_CONST_I
3232 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3233 info->const_int_slots > 0 ?
3234 max_const_f + info->const_int_slots :
3235 info->const_float_slots;
3236 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3237
3238 for (s = 0; s < slot_max; s++)
3239 ureg_DECL_constant(tx->ureg, s);
3240
3241 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3242 unsigned count;
3243 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3244 tgsi_dump(toks, 0);
3245 ureg_free_tokens(toks);
3246 }
3247
3248 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3249 if (!info->cso) {
3250 hr = D3DERR_DRIVERINTERNALERROR;
3251 FREE(info->lconstf.data);
3252 FREE(info->lconstf.ranges);
3253 goto out;
3254 }
3255
3256 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3257 out:
3258 tx_dtor(tx);
3259 return hr;
3260 }