st/nine: Fix computation of const_used_size
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
36
37 #define DBG_CHANNEL DBG_SHADER
38
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
40
41
42 struct shader_translator;
43
44 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
45
46 static INLINE const char *d3dsio_to_string(unsigned opcode);
47
48
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
51
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
54
55 #define NINED3DSP_END 0x0000ffff
56
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
60
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
62
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
65
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
67
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
74
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
77
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
80
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
86
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
88
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
91
92 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
94 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
95
96 /*
97 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
98 * BIAS <= PS 1.4 (x-0.5)
99 * BIASNEG <= PS 1.4 (-(x-0.5))
100 * SIGN <= PS 1.4 (2(x-0.5))
101 * SIGNNEG <= PS 1.4 (-2(x-0.5))
102 * COMP <= PS 1.4 (1-x)
103 * X2 = PS 1.4 (2x)
104 * X2NEG = PS 1.4 (-2x)
105 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
106 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
107 * ABS >= SM 3.0 (abs(x))
108 * ABSNEG >= SM 3.0 (-abs(x))
109 * NOT >= SM 2.0 pedication only
110 */
111 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
125
126 static const char *sm1_mod_str[] =
127 {
128 [NINED3DSPSM_NONE] = "",
129 [NINED3DSPSM_NEG] = "-",
130 [NINED3DSPSM_BIAS] = "bias",
131 [NINED3DSPSM_BIASNEG] = "biasneg",
132 [NINED3DSPSM_SIGN] = "sign",
133 [NINED3DSPSM_SIGNNEG] = "signneg",
134 [NINED3DSPSM_COMP] = "comp",
135 [NINED3DSPSM_X2] = "x2",
136 [NINED3DSPSM_X2NEG] = "x2neg",
137 [NINED3DSPSM_DZ] = "dz",
138 [NINED3DSPSM_DW] = "dw",
139 [NINED3DSPSM_ABS] = "abs",
140 [NINED3DSPSM_ABSNEG] = "-abs",
141 [NINED3DSPSM_NOT] = "not"
142 };
143
144 static void
145 sm1_dump_writemask(BYTE mask)
146 {
147 if (mask & 1) DUMP("x"); else DUMP("_");
148 if (mask & 2) DUMP("y"); else DUMP("_");
149 if (mask & 4) DUMP("z"); else DUMP("_");
150 if (mask & 8) DUMP("w"); else DUMP("_");
151 }
152
153 static void
154 sm1_dump_swizzle(BYTE s)
155 {
156 char c[4] = { 'x', 'y', 'z', 'w' };
157 DUMP("%c%c%c%c",
158 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
159 }
160
161 static const char sm1_file_char[] =
162 {
163 [D3DSPR_TEMP] = 'r',
164 [D3DSPR_INPUT] = 'v',
165 [D3DSPR_CONST] = 'c',
166 [D3DSPR_ADDR] = 'A',
167 [D3DSPR_RASTOUT] = 'R',
168 [D3DSPR_ATTROUT] = 'D',
169 [D3DSPR_OUTPUT] = 'o',
170 [D3DSPR_CONSTINT] = 'I',
171 [D3DSPR_COLOROUT] = 'C',
172 [D3DSPR_DEPTHOUT] = 'D',
173 [D3DSPR_SAMPLER] = 's',
174 [D3DSPR_CONST2] = 'c',
175 [D3DSPR_CONST3] = 'c',
176 [D3DSPR_CONST4] = 'c',
177 [D3DSPR_CONSTBOOL] = 'B',
178 [D3DSPR_LOOP] = 'L',
179 [D3DSPR_TEMPFLOAT16] = 'h',
180 [D3DSPR_MISCTYPE] = 'M',
181 [D3DSPR_LABEL] = 'X',
182 [D3DSPR_PREDICATE] = 'p'
183 };
184
185 static void
186 sm1_dump_reg(BYTE file, INT index)
187 {
188 switch (file) {
189 case D3DSPR_LOOP:
190 DUMP("aL");
191 break;
192 case D3DSPR_COLOROUT:
193 DUMP("oC%i", index);
194 break;
195 case D3DSPR_DEPTHOUT:
196 DUMP("oDepth");
197 break;
198 case D3DSPR_RASTOUT:
199 DUMP("oRast%i", index);
200 break;
201 case D3DSPR_CONSTINT:
202 DUMP("iconst[%i]", index);
203 break;
204 case D3DSPR_CONSTBOOL:
205 DUMP("bconst[%i]", index);
206 break;
207 default:
208 DUMP("%c%i", sm1_file_char[file], index);
209 break;
210 }
211 }
212
213 struct sm1_src_param
214 {
215 INT idx;
216 struct sm1_src_param *rel;
217 BYTE file;
218 BYTE swizzle;
219 BYTE mod;
220 BYTE type;
221 union {
222 DWORD d[4];
223 float f[4];
224 int i[4];
225 BOOL b;
226 } imm;
227 };
228 static void
229 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
230
231 struct sm1_dst_param
232 {
233 INT idx;
234 struct sm1_src_param *rel;
235 BYTE file;
236 BYTE mask;
237 BYTE mod;
238 int8_t shift; /* sint4 */
239 BYTE type;
240 };
241
242 static INLINE void
243 assert_replicate_swizzle(const struct ureg_src *reg)
244 {
245 assert(reg->SwizzleY == reg->SwizzleX &&
246 reg->SwizzleZ == reg->SwizzleX &&
247 reg->SwizzleW == reg->SwizzleX);
248 }
249
250 static void
251 sm1_dump_immediate(const struct sm1_src_param *param)
252 {
253 switch (param->type) {
254 case NINED3DSPTYPE_FLOAT4:
255 DUMP("{ %f %f %f %f }",
256 param->imm.f[0], param->imm.f[1],
257 param->imm.f[2], param->imm.f[3]);
258 break;
259 case NINED3DSPTYPE_INT4:
260 DUMP("{ %i %i %i %i }",
261 param->imm.i[0], param->imm.i[1],
262 param->imm.i[2], param->imm.i[3]);
263 break;
264 case NINED3DSPTYPE_BOOL:
265 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
266 break;
267 default:
268 assert(0);
269 break;
270 }
271 }
272
273 static void
274 sm1_dump_src_param(const struct sm1_src_param *param)
275 {
276 if (param->file == NINED3DSPR_IMMEDIATE) {
277 assert(!param->mod &&
278 !param->rel &&
279 param->swizzle == NINED3DSP_NOSWIZZLE);
280 sm1_dump_immediate(param);
281 return;
282 }
283
284 if (param->mod)
285 DUMP("%s(", sm1_mod_str[param->mod]);
286 if (param->rel) {
287 DUMP("%c[", sm1_file_char[param->file]);
288 sm1_dump_src_param(param->rel);
289 DUMP("+%i]", param->idx);
290 } else {
291 sm1_dump_reg(param->file, param->idx);
292 }
293 if (param->mod)
294 DUMP(")");
295 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
296 DUMP(".");
297 sm1_dump_swizzle(param->swizzle);
298 }
299 }
300
301 static void
302 sm1_dump_dst_param(const struct sm1_dst_param *param)
303 {
304 if (param->mod & NINED3DSPDM_SATURATE)
305 DUMP("sat ");
306 if (param->mod & NINED3DSPDM_PARTIALP)
307 DUMP("pp ");
308 if (param->mod & NINED3DSPDM_CENTROID)
309 DUMP("centroid ");
310 if (param->shift < 0)
311 DUMP("/%u ", 1 << -param->shift);
312 if (param->shift > 0)
313 DUMP("*%u ", 1 << param->shift);
314
315 if (param->rel) {
316 DUMP("%c[", sm1_file_char[param->file]);
317 sm1_dump_src_param(param->rel);
318 DUMP("+%i]", param->idx);
319 } else {
320 sm1_dump_reg(param->file, param->idx);
321 }
322 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
323 DUMP(".");
324 sm1_dump_writemask(param->mask);
325 }
326 }
327
328 struct sm1_semantic
329 {
330 struct sm1_dst_param reg;
331 BYTE sampler_type;
332 D3DDECLUSAGE usage;
333 BYTE usage_idx;
334 };
335
336 struct sm1_op_info
337 {
338 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
339 * should be ignored completely */
340 unsigned sio;
341 unsigned opcode; /* TGSI_OPCODE_x */
342
343 /* versions are still set even handler is set */
344 struct {
345 unsigned min;
346 unsigned max;
347 } vert_version, frag_version;
348
349 /* number of regs parsed outside of special handler */
350 unsigned ndst;
351 unsigned nsrc;
352
353 /* some instructions don't map perfectly, so use a special handler */
354 translate_instruction_func handler;
355 };
356
357 struct sm1_instruction
358 {
359 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
360 BYTE flags;
361 BOOL coissue;
362 BOOL predicated;
363 BYTE ndst;
364 BYTE nsrc;
365 struct sm1_src_param src[4];
366 struct sm1_src_param src_rel[4];
367 struct sm1_src_param pred;
368 struct sm1_src_param dst_rel[1];
369 struct sm1_dst_param dst[1];
370
371 struct sm1_op_info *info;
372 };
373
374 static void
375 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
376 {
377 unsigned i;
378
379 /* no info stored for these: */
380 if (insn->opcode == D3DSIO_DCL)
381 return;
382 for (i = 0; i < indent; ++i)
383 DUMP(" ");
384
385 if (insn->predicated) {
386 DUMP("@");
387 sm1_dump_src_param(&insn->pred);
388 DUMP(" ");
389 }
390 DUMP("%s", d3dsio_to_string(insn->opcode));
391 if (insn->flags) {
392 switch (insn->opcode) {
393 case D3DSIO_TEX:
394 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
395 break;
396 default:
397 DUMP("_%x", insn->flags);
398 break;
399 }
400 }
401 if (insn->coissue)
402 DUMP("_co");
403 DUMP(" ");
404
405 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
406 sm1_dump_dst_param(&insn->dst[i]);
407 DUMP(" ");
408 }
409
410 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
411 sm1_dump_src_param(&insn->src[i]);
412 DUMP(" ");
413 }
414 if (insn->opcode == D3DSIO_DEF ||
415 insn->opcode == D3DSIO_DEFI ||
416 insn->opcode == D3DSIO_DEFB)
417 sm1_dump_immediate(&insn->src[0]);
418
419 DUMP("\n");
420 }
421
422 struct sm1_local_const
423 {
424 INT idx;
425 struct ureg_src reg;
426 union {
427 boolean b;
428 float f[4];
429 int32_t i[4];
430 } imm;
431 };
432
433 struct shader_translator
434 {
435 const DWORD *byte_code;
436 const DWORD *parse;
437 const DWORD *parse_next;
438
439 struct ureg_program *ureg;
440
441 /* shader version */
442 struct {
443 BYTE major;
444 BYTE minor;
445 } version;
446 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
447
448 boolean native_integers;
449 boolean inline_subroutines;
450 boolean lower_preds;
451 boolean want_texcoord;
452 boolean shift_wpos;
453 unsigned texcoord_sn;
454
455 struct sm1_instruction insn; /* current instruction */
456
457 struct {
458 struct ureg_dst *r;
459 struct ureg_dst oPos;
460 struct ureg_dst oFog;
461 struct ureg_dst oPts;
462 struct ureg_dst oCol[4];
463 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
464 struct ureg_dst oDepth;
465 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
466 struct ureg_src vPos;
467 struct ureg_src vFace;
468 struct ureg_src s;
469 struct ureg_dst p;
470 struct ureg_dst address;
471 struct ureg_dst a0;
472 struct ureg_dst tS[8]; /* texture stage registers */
473 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
474 struct ureg_dst t[5]; /* scratch TEMPs */
475 struct ureg_src vC[2]; /* PS color in */
476 struct ureg_src vT[8]; /* PS texcoord in */
477 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
478 } regs;
479 unsigned num_temp; /* Elements(regs.r) */
480 unsigned num_scratch;
481 unsigned loop_depth;
482 unsigned loop_depth_max;
483 unsigned cond_depth;
484 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
485 unsigned cond_labels[NINE_MAX_COND_DEPTH];
486 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
487
488 unsigned *inst_labels; /* LABEL op */
489 unsigned num_inst_labels;
490
491 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
492
493 struct sm1_local_const *lconstf;
494 unsigned num_lconstf;
495 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
496 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
497
498 boolean indirect_const_access;
499 boolean failure;
500
501 struct nine_shader_info *info;
502
503 int16_t op_info_map[D3DSIO_BREAKP + 1];
504 };
505
506 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
507 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
508 #define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
509
510 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
511
512 static void
513 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
514
515 static void
516 sm1_instruction_check(const struct sm1_instruction *insn)
517 {
518 if (insn->opcode == D3DSIO_CRS)
519 {
520 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
521 {
522 DBG("CRS.mask.w\n");
523 }
524 }
525 }
526
527 static boolean
528 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
529 {
530 INT i;
531 if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
532 tx->failure = TRUE;
533 return FALSE;
534 }
535 for (i = 0; i < tx->num_lconstf; ++i) {
536 if (tx->lconstf[i].idx == index) {
537 *src = tx->lconstf[i].reg;
538 return TRUE;
539 }
540 }
541 return FALSE;
542 }
543 static boolean
544 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
545 {
546 if (index < 0 || index >= NINE_MAX_CONST_I) {
547 tx->failure = TRUE;
548 return FALSE;
549 }
550 if (tx->lconsti[index].idx == index)
551 *src = tx->lconsti[index].reg;
552 return tx->lconsti[index].idx == index;
553 }
554 static boolean
555 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
556 {
557 if (index < 0 || index >= NINE_MAX_CONST_B) {
558 tx->failure = TRUE;
559 return FALSE;
560 }
561 if (tx->lconstb[index].idx == index)
562 *src = tx->lconstb[index].reg;
563 return tx->lconstb[index].idx == index;
564 }
565
566 static void
567 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
568 {
569 unsigned n;
570
571 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
572 if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
573 WARN("lconstf index %i too high, indirect access won't work\n", index);
574
575 for (n = 0; n < tx->num_lconstf; ++n)
576 if (tx->lconstf[n].idx == index)
577 break;
578 if (n == tx->num_lconstf) {
579 if ((n % 8) == 0) {
580 tx->lconstf = REALLOC(tx->lconstf,
581 (n + 0) * sizeof(tx->lconstf[0]),
582 (n + 8) * sizeof(tx->lconstf[0]));
583 assert(tx->lconstf);
584 }
585 tx->num_lconstf++;
586 }
587 tx->lconstf[n].idx = index;
588 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
589
590 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
591 }
592 static void
593 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
594 {
595 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
596 tx->lconsti[index].idx = index;
597 tx->lconsti[index].reg = tx->native_integers ?
598 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
599 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
600 }
601 static void
602 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
603 {
604 FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
605 tx->lconstb[index].idx = index;
606 tx->lconstb[index].reg = tx->native_integers ?
607 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
608 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
609 }
610
611 static INLINE struct ureg_dst
612 tx_scratch(struct shader_translator *tx)
613 {
614 if (tx->num_scratch >= Elements(tx->regs.t)) {
615 tx->failure = TRUE;
616 return tx->regs.t[0];
617 }
618 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
619 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
620 return tx->regs.t[tx->num_scratch++];
621 }
622
623 static INLINE struct ureg_dst
624 tx_scratch_scalar(struct shader_translator *tx)
625 {
626 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
627 }
628
629 static INLINE struct ureg_src
630 tx_src_scalar(struct ureg_dst dst)
631 {
632 struct ureg_src src = ureg_src(dst);
633 int c = ffs(dst.WriteMask) - 1;
634 if (dst.WriteMask == (1 << c))
635 src = ureg_scalar(src, c);
636 return src;
637 }
638
639 static INLINE void
640 tx_temp_alloc(struct shader_translator *tx, INT idx)
641 {
642 assert(idx >= 0);
643 if (idx >= tx->num_temp) {
644 unsigned k = tx->num_temp;
645 unsigned n = idx + 1;
646 tx->regs.r = REALLOC(tx->regs.r,
647 k * sizeof(tx->regs.r[0]),
648 n * sizeof(tx->regs.r[0]));
649 for (; k < n; ++k)
650 tx->regs.r[k] = ureg_dst_undef();
651 tx->num_temp = n;
652 }
653 if (ureg_dst_is_undef(tx->regs.r[idx]))
654 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
655 }
656
657 static INLINE void
658 tx_addr_alloc(struct shader_translator *tx, INT idx)
659 {
660 assert(idx == 0);
661 if (ureg_dst_is_undef(tx->regs.address))
662 tx->regs.address = ureg_DECL_address(tx->ureg);
663 if (ureg_dst_is_undef(tx->regs.a0))
664 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
665 }
666
667 static INLINE void
668 tx_pred_alloc(struct shader_translator *tx, INT idx)
669 {
670 assert(idx == 0);
671 if (ureg_dst_is_undef(tx->regs.p))
672 tx->regs.p = ureg_DECL_predicate(tx->ureg);
673 }
674
675 static INLINE void
676 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
677 {
678 assert(IS_PS);
679 assert(idx >= 0 && idx < Elements(tx->regs.vT));
680 if (ureg_src_is_undef(tx->regs.vT[idx]))
681 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
682 TGSI_INTERPOLATE_PERSPECTIVE);
683 }
684
685 static INLINE unsigned *
686 tx_bgnloop(struct shader_translator *tx)
687 {
688 tx->loop_depth++;
689 if (tx->loop_depth_max < tx->loop_depth)
690 tx->loop_depth_max = tx->loop_depth;
691 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
692 return &tx->loop_labels[tx->loop_depth - 1];
693 }
694
695 static INLINE unsigned *
696 tx_endloop(struct shader_translator *tx)
697 {
698 assert(tx->loop_depth);
699 tx->loop_depth--;
700 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
701 ureg_get_instruction_number(tx->ureg));
702 return &tx->loop_labels[tx->loop_depth];
703 }
704
705 static struct ureg_dst
706 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
707 {
708 const unsigned l = tx->loop_depth - 1;
709
710 if (!tx->loop_depth)
711 {
712 DBG("loop counter requested outside of loop\n");
713 return ureg_dst_undef();
714 }
715
716 if (ureg_dst_is_undef(tx->regs.rL[l])) {
717 /* loop or rep ctr creation */
718 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
719 tx->loop_or_rep[l] = loop_or_rep;
720 }
721 /* loop - rep - endloop - endrep not allowed */
722 assert(tx->loop_or_rep[l] == loop_or_rep);
723
724 return tx->regs.rL[l];
725 }
726
727 static struct ureg_src
728 tx_get_loopal(struct shader_translator *tx)
729 {
730 int loop_level = tx->loop_depth - 1;
731
732 while (loop_level >= 0) {
733 /* handle loop - rep - endrep - endloop case */
734 if (tx->loop_or_rep[loop_level])
735 /* the value is in the loop counter y component (nine implementation) */
736 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
737 loop_level--;
738 }
739
740 DBG("aL counter requested outside of loop\n");
741 return ureg_src_undef();
742 }
743
744 static INLINE unsigned *
745 tx_cond(struct shader_translator *tx)
746 {
747 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
748 tx->cond_depth++;
749 return &tx->cond_labels[tx->cond_depth - 1];
750 }
751
752 static INLINE unsigned *
753 tx_elsecond(struct shader_translator *tx)
754 {
755 assert(tx->cond_depth);
756 return &tx->cond_labels[tx->cond_depth - 1];
757 }
758
759 static INLINE void
760 tx_endcond(struct shader_translator *tx)
761 {
762 assert(tx->cond_depth);
763 tx->cond_depth--;
764 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
765 ureg_get_instruction_number(tx->ureg));
766 }
767
768 static INLINE struct ureg_dst
769 nine_ureg_dst_register(unsigned file, int index)
770 {
771 return ureg_dst(ureg_src_register(file, index));
772 }
773
774 static struct ureg_src
775 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
776 {
777 struct ureg_program *ureg = tx->ureg;
778 struct ureg_src src;
779 struct ureg_dst tmp;
780
781 switch (param->file)
782 {
783 case D3DSPR_TEMP:
784 assert(!param->rel);
785 tx_temp_alloc(tx, param->idx);
786 src = ureg_src(tx->regs.r[param->idx]);
787 break;
788 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
789 case D3DSPR_ADDR:
790 assert(!param->rel);
791 if (IS_VS) {
792 assert(param->idx == 0);
793 /* the address register (vs only) must be
794 * assigned before use */
795 assert(!ureg_dst_is_undef(tx->regs.a0));
796 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
797 src = ureg_src(tx->regs.address);
798 } else {
799 if (tx->version.major < 2 && tx->version.minor < 4) {
800 /* no subroutines, so should be defined */
801 src = ureg_src(tx->regs.tS[param->idx]);
802 } else {
803 tx_texcoord_alloc(tx, param->idx);
804 src = tx->regs.vT[param->idx];
805 }
806 }
807 break;
808 case D3DSPR_INPUT:
809 if (IS_VS) {
810 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
811 } else {
812 if (tx->version.major < 3) {
813 assert(!param->rel);
814 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
815 param->idx,
816 TGSI_INTERPOLATE_PERSPECTIVE);
817 } else {
818 assert(!param->rel); /* TODO */
819 assert(param->idx < Elements(tx->regs.v));
820 src = tx->regs.v[param->idx];
821 }
822 }
823 break;
824 case D3DSPR_PREDICATE:
825 assert(!param->rel);
826 tx_pred_alloc(tx, param->idx);
827 src = ureg_src(tx->regs.p);
828 break;
829 case D3DSPR_SAMPLER:
830 assert(param->mod == NINED3DSPSM_NONE);
831 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
832 assert(!param->rel);
833 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
834 break;
835 case D3DSPR_CONST:
836 assert(!param->rel || IS_VS);
837 if (param->rel)
838 tx->indirect_const_access = TRUE;
839 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
840 if (!param->rel)
841 nine_info_mark_const_f_used(tx->info, param->idx);
842 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
843 }
844 if (!IS_VS && tx->version.major < 2) {
845 /* ps 1.X clamps constants */
846 tmp = tx_scratch(tx);
847 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
848 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
849 src = ureg_src(tmp);
850 }
851 break;
852 case D3DSPR_CONST2:
853 case D3DSPR_CONST3:
854 case D3DSPR_CONST4:
855 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
856 assert(!"CONST2/3/4");
857 src = ureg_imm1f(ureg, 0.0f);
858 break;
859 case D3DSPR_CONSTINT:
860 /* relative adressing only possible for float constants in vs */
861 assert(!param->rel);
862 if (!tx_lconsti(tx, &src, param->idx)) {
863 nine_info_mark_const_i_used(tx->info, param->idx);
864 src = ureg_src_register(TGSI_FILE_CONSTANT,
865 tx->info->const_i_base + param->idx);
866 }
867 break;
868 case D3DSPR_CONSTBOOL:
869 assert(!param->rel);
870 if (!tx_lconstb(tx, &src, param->idx)) {
871 char r = param->idx / 4;
872 char s = param->idx & 3;
873 nine_info_mark_const_b_used(tx->info, param->idx);
874 src = ureg_src_register(TGSI_FILE_CONSTANT,
875 tx->info->const_b_base + r);
876 src = ureg_swizzle(src, s, s, s, s);
877 }
878 break;
879 case D3DSPR_LOOP:
880 if (ureg_dst_is_undef(tx->regs.address))
881 tx->regs.address = ureg_DECL_address(ureg);
882 if (!tx->native_integers)
883 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
884 else
885 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
886 src = ureg_src(tx->regs.address);
887 break;
888 case D3DSPR_MISCTYPE:
889 switch (param->idx) {
890 case D3DSMO_POSITION:
891 if (ureg_src_is_undef(tx->regs.vPos))
892 tx->regs.vPos = ureg_DECL_fs_input(ureg,
893 TGSI_SEMANTIC_POSITION, 0,
894 TGSI_INTERPOLATE_LINEAR);
895 if (tx->shift_wpos) {
896 /* TODO: do this only once */
897 struct ureg_dst wpos = tx_scratch(tx);
898 ureg_SUB(ureg, wpos, tx->regs.vPos,
899 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
900 src = ureg_src(wpos);
901 } else {
902 src = tx->regs.vPos;
903 }
904 break;
905 case D3DSMO_FACE:
906 if (ureg_src_is_undef(tx->regs.vFace)) {
907 tx->regs.vFace = ureg_DECL_fs_input(ureg,
908 TGSI_SEMANTIC_FACE, 0,
909 TGSI_INTERPOLATE_CONSTANT);
910 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
911 }
912 src = tx->regs.vFace;
913 break;
914 default:
915 assert(!"invalid src D3DSMO");
916 break;
917 }
918 assert(!param->rel);
919 break;
920 case D3DSPR_TEMPFLOAT16:
921 break;
922 default:
923 assert(!"invalid src D3DSPR");
924 }
925 if (param->rel)
926 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
927
928 switch (param->mod) {
929 case NINED3DSPSM_DW:
930 tmp = tx_scratch(tx);
931 /* NOTE: app is not allowed to read w with this modifier */
932 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
933 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
934 src = ureg_src(tmp);
935 break;
936 case NINED3DSPSM_DZ:
937 tmp = tx_scratch(tx);
938 /* NOTE: app is not allowed to read z with this modifier */
939 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
940 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
941 src = ureg_src(tmp);
942 break;
943 default:
944 break;
945 }
946
947 if (param->swizzle != NINED3DSP_NOSWIZZLE)
948 src = ureg_swizzle(src,
949 (param->swizzle >> 0) & 0x3,
950 (param->swizzle >> 2) & 0x3,
951 (param->swizzle >> 4) & 0x3,
952 (param->swizzle >> 6) & 0x3);
953
954 switch (param->mod) {
955 case NINED3DSPSM_ABS:
956 src = ureg_abs(src);
957 break;
958 case NINED3DSPSM_ABSNEG:
959 src = ureg_negate(ureg_abs(src));
960 break;
961 case NINED3DSPSM_NEG:
962 src = ureg_negate(src);
963 break;
964 case NINED3DSPSM_BIAS:
965 tmp = tx_scratch(tx);
966 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
967 src = ureg_src(tmp);
968 break;
969 case NINED3DSPSM_BIASNEG:
970 tmp = tx_scratch(tx);
971 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
972 src = ureg_src(tmp);
973 break;
974 case NINED3DSPSM_NOT:
975 if (tx->native_integers) {
976 tmp = tx_scratch(tx);
977 ureg_NOT(ureg, tmp, src);
978 src = ureg_src(tmp);
979 break;
980 }
981 /* fall through */
982 case NINED3DSPSM_COMP:
983 tmp = tx_scratch(tx);
984 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
985 src = ureg_src(tmp);
986 break;
987 case NINED3DSPSM_DZ:
988 case NINED3DSPSM_DW:
989 /* Already handled*/
990 break;
991 case NINED3DSPSM_SIGN:
992 tmp = tx_scratch(tx);
993 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
994 src = ureg_src(tmp);
995 break;
996 case NINED3DSPSM_SIGNNEG:
997 tmp = tx_scratch(tx);
998 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
999 src = ureg_src(tmp);
1000 break;
1001 case NINED3DSPSM_X2:
1002 tmp = tx_scratch(tx);
1003 ureg_ADD(ureg, tmp, src, src);
1004 src = ureg_src(tmp);
1005 break;
1006 case NINED3DSPSM_X2NEG:
1007 tmp = tx_scratch(tx);
1008 ureg_ADD(ureg, tmp, src, src);
1009 src = ureg_negate(ureg_src(tmp));
1010 break;
1011 default:
1012 assert(param->mod == NINED3DSPSM_NONE);
1013 break;
1014 }
1015
1016 return src;
1017 }
1018
1019 static struct ureg_dst
1020 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1021 {
1022 struct ureg_dst dst;
1023
1024 switch (param->file)
1025 {
1026 case D3DSPR_TEMP:
1027 assert(!param->rel);
1028 tx_temp_alloc(tx, param->idx);
1029 dst = tx->regs.r[param->idx];
1030 break;
1031 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1032 case D3DSPR_ADDR:
1033 assert(!param->rel);
1034 if (tx->version.major < 2 && !IS_VS) {
1035 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1036 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1037 dst = tx->regs.tS[param->idx];
1038 } else
1039 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1040 tx_texcoord_alloc(tx, param->idx);
1041 dst = ureg_dst(tx->regs.vT[param->idx]);
1042 } else {
1043 tx_addr_alloc(tx, param->idx);
1044 dst = tx->regs.a0;
1045 }
1046 break;
1047 case D3DSPR_RASTOUT:
1048 assert(!param->rel);
1049 switch (param->idx) {
1050 case 0:
1051 if (ureg_dst_is_undef(tx->regs.oPos))
1052 tx->regs.oPos =
1053 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1054 dst = tx->regs.oPos;
1055 break;
1056 case 1:
1057 if (ureg_dst_is_undef(tx->regs.oFog))
1058 tx->regs.oFog =
1059 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1060 dst = tx->regs.oFog;
1061 break;
1062 case 2:
1063 if (ureg_dst_is_undef(tx->regs.oPts))
1064 tx->regs.oPts =
1065 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1066 dst = tx->regs.oPts;
1067 break;
1068 default:
1069 assert(0);
1070 break;
1071 }
1072 break;
1073 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1074 case D3DSPR_OUTPUT:
1075 if (tx->version.major < 3) {
1076 assert(!param->rel);
1077 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1078 } else {
1079 assert(!param->rel); /* TODO */
1080 assert(param->idx < Elements(tx->regs.o));
1081 dst = tx->regs.o[param->idx];
1082 }
1083 break;
1084 case D3DSPR_ATTROUT: /* VS */
1085 case D3DSPR_COLOROUT: /* PS */
1086 assert(param->idx >= 0 && param->idx < 4);
1087 assert(!param->rel);
1088 tx->info->rt_mask |= 1 << param->idx;
1089 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1090 tx->regs.oCol[param->idx] =
1091 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1092 dst = tx->regs.oCol[param->idx];
1093 if (IS_VS && tx->version.major < 3)
1094 dst = ureg_saturate(dst);
1095 break;
1096 case D3DSPR_DEPTHOUT:
1097 assert(!param->rel);
1098 if (ureg_dst_is_undef(tx->regs.oDepth))
1099 tx->regs.oDepth =
1100 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1101 TGSI_WRITEMASK_Z);
1102 dst = tx->regs.oDepth; /* XXX: must write .z component */
1103 break;
1104 case D3DSPR_PREDICATE:
1105 assert(!param->rel);
1106 tx_pred_alloc(tx, param->idx);
1107 dst = tx->regs.p;
1108 break;
1109 case D3DSPR_TEMPFLOAT16:
1110 DBG("unhandled D3DSPR: %u\n", param->file);
1111 break;
1112 default:
1113 assert(!"invalid dst D3DSPR");
1114 break;
1115 }
1116 if (param->rel)
1117 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1118
1119 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1120 dst = ureg_writemask(dst, param->mask);
1121 if (param->mod & NINED3DSPDM_SATURATE)
1122 dst = ureg_saturate(dst);
1123
1124 return dst;
1125 }
1126
1127 static struct ureg_dst
1128 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1129 {
1130 if (param->shift) {
1131 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1132 return tx->regs.tdst;
1133 }
1134 return _tx_dst_param(tx, param);
1135 }
1136
1137 static void
1138 tx_apply_dst0_modifiers(struct shader_translator *tx)
1139 {
1140 struct ureg_dst rdst;
1141 float f;
1142
1143 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1144 return;
1145 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1146
1147 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1148
1149 if (tx->insn.dst[0].shift < 0)
1150 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1151 else
1152 f = 1 << tx->insn.dst[0].shift;
1153
1154 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1155 }
1156
1157 static struct ureg_src
1158 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1159 {
1160 struct ureg_src src;
1161
1162 assert(!param->shift);
1163 assert(!(param->mod & NINED3DSPDM_SATURATE));
1164
1165 switch (param->file) {
1166 case D3DSPR_INPUT:
1167 if (IS_VS) {
1168 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1169 } else {
1170 assert(!param->rel);
1171 assert(param->idx < Elements(tx->regs.v));
1172 src = tx->regs.v[param->idx];
1173 }
1174 break;
1175 default:
1176 src = ureg_src(tx_dst_param(tx, param));
1177 break;
1178 }
1179 if (param->rel)
1180 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1181
1182 if (!param->mask)
1183 WARN("mask is 0, using identity swizzle\n");
1184
1185 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1186 char s[4];
1187 int n;
1188 int c;
1189 for (n = 0, c = 0; c < 4; ++c)
1190 if (param->mask & (1 << c))
1191 s[n++] = c;
1192 assert(n);
1193 for (c = n; c < 4; ++c)
1194 s[c] = s[n - 1];
1195 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1196 }
1197 return src;
1198 }
1199
1200 static HRESULT
1201 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1202 {
1203 struct ureg_program *ureg = tx->ureg;
1204 struct ureg_dst dst;
1205 struct ureg_src src[2];
1206 struct sm1_src_param *src_mat = &tx->insn.src[1];
1207 unsigned i;
1208
1209 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1210 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1211
1212 for (i = 0; i < n; i++)
1213 {
1214 const unsigned m = (1 << i);
1215
1216 src[1] = tx_src_param(tx, src_mat);
1217 src_mat->idx++;
1218
1219 if (!(dst.WriteMask & m))
1220 continue;
1221
1222 /* XXX: src == dst case ? */
1223
1224 switch (k) {
1225 case 3:
1226 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1227 break;
1228 case 4:
1229 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1230 break;
1231 default:
1232 DBG("invalid operation: M%ux%u\n", m, n);
1233 break;
1234 }
1235 }
1236
1237 return D3D_OK;
1238 }
1239
1240 #define VNOTSUPPORTED 0, 0
1241 #define V(maj, min) (((maj) << 8) | (min))
1242
1243 static INLINE const char *
1244 d3dsio_to_string( unsigned opcode )
1245 {
1246 static const char *names[] = {
1247 "NOP",
1248 "MOV",
1249 "ADD",
1250 "SUB",
1251 "MAD",
1252 "MUL",
1253 "RCP",
1254 "RSQ",
1255 "DP3",
1256 "DP4",
1257 "MIN",
1258 "MAX",
1259 "SLT",
1260 "SGE",
1261 "EXP",
1262 "LOG",
1263 "LIT",
1264 "DST",
1265 "LRP",
1266 "FRC",
1267 "M4x4",
1268 "M4x3",
1269 "M3x4",
1270 "M3x3",
1271 "M3x2",
1272 "CALL",
1273 "CALLNZ",
1274 "LOOP",
1275 "RET",
1276 "ENDLOOP",
1277 "LABEL",
1278 "DCL",
1279 "POW",
1280 "CRS",
1281 "SGN",
1282 "ABS",
1283 "NRM",
1284 "SINCOS",
1285 "REP",
1286 "ENDREP",
1287 "IF",
1288 "IFC",
1289 "ELSE",
1290 "ENDIF",
1291 "BREAK",
1292 "BREAKC",
1293 "MOVA",
1294 "DEFB",
1295 "DEFI",
1296 NULL,
1297 NULL,
1298 NULL,
1299 NULL,
1300 NULL,
1301 NULL,
1302 NULL,
1303 NULL,
1304 NULL,
1305 NULL,
1306 NULL,
1307 NULL,
1308 NULL,
1309 NULL,
1310 NULL,
1311 "TEXCOORD",
1312 "TEXKILL",
1313 "TEX",
1314 "TEXBEM",
1315 "TEXBEML",
1316 "TEXREG2AR",
1317 "TEXREG2GB",
1318 "TEXM3x2PAD",
1319 "TEXM3x2TEX",
1320 "TEXM3x3PAD",
1321 "TEXM3x3TEX",
1322 NULL,
1323 "TEXM3x3SPEC",
1324 "TEXM3x3VSPEC",
1325 "EXPP",
1326 "LOGP",
1327 "CND",
1328 "DEF",
1329 "TEXREG2RGB",
1330 "TEXDP3TEX",
1331 "TEXM3x2DEPTH",
1332 "TEXDP3",
1333 "TEXM3x3",
1334 "TEXDEPTH",
1335 "CMP",
1336 "BEM",
1337 "DP2ADD",
1338 "DSX",
1339 "DSY",
1340 "TEXLDD",
1341 "SETP",
1342 "TEXLDL",
1343 "BREAKP"
1344 };
1345
1346 if (opcode < Elements(names)) return names[opcode];
1347
1348 switch (opcode) {
1349 case D3DSIO_PHASE: return "PHASE";
1350 case D3DSIO_COMMENT: return "COMMENT";
1351 case D3DSIO_END: return "END";
1352 default:
1353 return NULL;
1354 }
1355 }
1356
1357 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1358 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1359 (inst).vert_version.max | \
1360 (inst).frag_version.min | \
1361 (inst).frag_version.max)
1362
1363 #define SPECIAL(name) \
1364 NineTranslateInstruction_##name
1365
1366 #define DECL_SPECIAL(name) \
1367 static HRESULT \
1368 NineTranslateInstruction_##name( struct shader_translator *tx )
1369
1370 static HRESULT
1371 NineTranslateInstruction_Generic(struct shader_translator *);
1372
1373 DECL_SPECIAL(M4x4)
1374 {
1375 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1376 }
1377
1378 DECL_SPECIAL(M4x3)
1379 {
1380 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1381 }
1382
1383 DECL_SPECIAL(M3x4)
1384 {
1385 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1386 }
1387
1388 DECL_SPECIAL(M3x3)
1389 {
1390 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1391 }
1392
1393 DECL_SPECIAL(M3x2)
1394 {
1395 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1396 }
1397
1398 DECL_SPECIAL(CMP)
1399 {
1400 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1401 tx_src_param(tx, &tx->insn.src[0]),
1402 tx_src_param(tx, &tx->insn.src[2]),
1403 tx_src_param(tx, &tx->insn.src[1]));
1404 return D3D_OK;
1405 }
1406
1407 DECL_SPECIAL(CND)
1408 {
1409 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1410 struct ureg_dst cgt;
1411 struct ureg_src cnd;
1412
1413 /* the coissue flag was a tip for compilers to advise to
1414 * execute two operations at the same time, in cases
1415 * the two executions had same dst with different channels.
1416 * It has no effect on current hw. However it seems CND
1417 * is affected. The handling of this very specific case
1418 * handled below mimick wine behaviour */
1419 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1420 ureg_MOV(tx->ureg,
1421 dst, tx_src_param(tx, &tx->insn.src[1]));
1422 return D3D_OK;
1423 }
1424
1425 cnd = tx_src_param(tx, &tx->insn.src[0]);
1426 cgt = tx_scratch(tx);
1427
1428 if (tx->version.major == 1 && tx->version.minor < 4)
1429 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1430
1431 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1432
1433 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1434 tx_src_param(tx, &tx->insn.src[1]),
1435 tx_src_param(tx, &tx->insn.src[2]));
1436 return D3D_OK;
1437 }
1438
1439 DECL_SPECIAL(CALL)
1440 {
1441 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1442 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1443 return D3D_OK;
1444 }
1445
1446 DECL_SPECIAL(CALLNZ)
1447 {
1448 struct ureg_program *ureg = tx->ureg;
1449 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1450
1451 if (!tx->native_integers)
1452 ureg_IF(ureg, src, tx_cond(tx));
1453 else
1454 ureg_UIF(ureg, src, tx_cond(tx));
1455 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1456 tx_endcond(tx);
1457 ureg_ENDIF(ureg);
1458 return D3D_OK;
1459 }
1460
1461 DECL_SPECIAL(MOV_vs1x)
1462 {
1463 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1464 /* Implementation note: We don't write directly
1465 * to the addr register, but to an intermediate
1466 * float register.
1467 * Contrary to the doc, when writing to ADDR here,
1468 * the rounding is not to nearest, but to lowest
1469 * (wine test).
1470 * Since we use ARR next, substract 0.5. */
1471 ureg_SUB(tx->ureg,
1472 tx_dst_param(tx, &tx->insn.dst[0]),
1473 tx_src_param(tx, &tx->insn.src[0]),
1474 ureg_imm1f(tx->ureg, 0.5f));
1475 return D3D_OK;
1476 }
1477 return NineTranslateInstruction_Generic(tx);
1478 }
1479
1480 DECL_SPECIAL(LOOP)
1481 {
1482 struct ureg_program *ureg = tx->ureg;
1483 unsigned *label;
1484 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1485 struct ureg_dst ctr;
1486 struct ureg_dst tmp;
1487 struct ureg_src ctrx;
1488
1489 label = tx_bgnloop(tx);
1490 ctr = tx_get_loopctr(tx, TRUE);
1491 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1492
1493 /* src: num_iterations - start_value of al - step for al - 0 */
1494 ureg_MOV(ureg, ctr, src);
1495 ureg_BGNLOOP(tx->ureg, label);
1496 tmp = tx_scratch_scalar(tx);
1497 /* Initially ctr.x contains the number of iterations.
1498 * ctr.y will contain the updated value of al.
1499 * We decrease ctr.x at the end of every iteration,
1500 * and stop when it reaches 0. */
1501
1502 if (!tx->native_integers) {
1503 /* case src and ctr contain floats */
1504 /* to avoid precision issue, we stop when ctr <= 0.5 */
1505 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1506 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1507 } else {
1508 /* case src and ctr contain integers */
1509 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1510 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1511 }
1512 ureg_BRK(ureg);
1513 tx_endcond(tx);
1514 ureg_ENDIF(ureg);
1515 return D3D_OK;
1516 }
1517
1518 DECL_SPECIAL(RET)
1519 {
1520 ureg_RET(tx->ureg);
1521 return D3D_OK;
1522 }
1523
1524 DECL_SPECIAL(ENDLOOP)
1525 {
1526 struct ureg_program *ureg = tx->ureg;
1527 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1528 struct ureg_dst dst_ctrx, dst_al;
1529 struct ureg_src src_ctr, al_counter;
1530
1531 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1532 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1533 src_ctr = ureg_src(ctr);
1534 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1535
1536 /* ctr.x -= 1
1537 * ctr.y (aL) += step */
1538 if (!tx->native_integers) {
1539 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1540 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1541 } else {
1542 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1543 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1544 }
1545 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1546 return D3D_OK;
1547 }
1548
1549 DECL_SPECIAL(LABEL)
1550 {
1551 unsigned k = tx->num_inst_labels;
1552 unsigned n = tx->insn.src[0].idx;
1553 assert(n < 2048);
1554 if (n >= k)
1555 tx->inst_labels = REALLOC(tx->inst_labels,
1556 k * sizeof(tx->inst_labels[0]),
1557 n * sizeof(tx->inst_labels[0]));
1558
1559 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1560 return D3D_OK;
1561 }
1562
1563 DECL_SPECIAL(SINCOS)
1564 {
1565 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1566 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1567
1568 assert(!(dst.WriteMask & 0xc));
1569
1570 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1571 ureg_SCS(tx->ureg, dst, src);
1572 return D3D_OK;
1573 }
1574
1575 DECL_SPECIAL(SGN)
1576 {
1577 ureg_SSG(tx->ureg,
1578 tx_dst_param(tx, &tx->insn.dst[0]),
1579 tx_src_param(tx, &tx->insn.src[0]));
1580 return D3D_OK;
1581 }
1582
1583 DECL_SPECIAL(REP)
1584 {
1585 struct ureg_program *ureg = tx->ureg;
1586 unsigned *label;
1587 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1588 struct ureg_dst ctr;
1589 struct ureg_dst tmp;
1590 struct ureg_src ctrx;
1591
1592 label = tx_bgnloop(tx);
1593 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1594 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1595
1596 /* NOTE: rep must be constant, so we don't have to save the count */
1597 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1598
1599 /* rep: num_iterations - 0 - 0 - 0 */
1600 ureg_MOV(ureg, ctr, rep);
1601 ureg_BGNLOOP(ureg, label);
1602 tmp = tx_scratch_scalar(tx);
1603 /* Initially ctr.x contains the number of iterations.
1604 * We decrease ctr.x at the end of every iteration,
1605 * and stop when it reaches 0. */
1606
1607 if (!tx->native_integers) {
1608 /* case src and ctr contain floats */
1609 /* to avoid precision issue, we stop when ctr <= 0.5 */
1610 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1611 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1612 } else {
1613 /* case src and ctr contain integers */
1614 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1615 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1616 }
1617 ureg_BRK(ureg);
1618 tx_endcond(tx);
1619 ureg_ENDIF(ureg);
1620
1621 return D3D_OK;
1622 }
1623
1624 DECL_SPECIAL(ENDREP)
1625 {
1626 struct ureg_program *ureg = tx->ureg;
1627 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1628 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1629 struct ureg_src src_ctr = ureg_src(ctr);
1630
1631 /* ctr.x -= 1 */
1632 if (!tx->native_integers)
1633 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1634 else
1635 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1636
1637 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1638 return D3D_OK;
1639 }
1640
1641 DECL_SPECIAL(ENDIF)
1642 {
1643 tx_endcond(tx);
1644 ureg_ENDIF(tx->ureg);
1645 return D3D_OK;
1646 }
1647
1648 DECL_SPECIAL(IF)
1649 {
1650 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1651
1652 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1653 ureg_UIF(tx->ureg, src, tx_cond(tx));
1654 else
1655 ureg_IF(tx->ureg, src, tx_cond(tx));
1656
1657 return D3D_OK;
1658 }
1659
1660 static INLINE unsigned
1661 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1662 {
1663 switch (flags) {
1664 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1665 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1666 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1667 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1668 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1669 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1670 default:
1671 assert(!"invalid comparison flags");
1672 return TGSI_OPCODE_SGT;
1673 }
1674 }
1675
1676 DECL_SPECIAL(IFC)
1677 {
1678 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1679 struct ureg_src src[2];
1680 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1681 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1682 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1683 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1684 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1685 return D3D_OK;
1686 }
1687
1688 DECL_SPECIAL(ELSE)
1689 {
1690 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1691 return D3D_OK;
1692 }
1693
1694 DECL_SPECIAL(BREAKC)
1695 {
1696 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1697 struct ureg_src src[2];
1698 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1699 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1700 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1701 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1702 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1703 ureg_BRK(tx->ureg);
1704 tx_endcond(tx);
1705 ureg_ENDIF(tx->ureg);
1706 return D3D_OK;
1707 }
1708
1709 static const char *sm1_declusage_names[] =
1710 {
1711 [D3DDECLUSAGE_POSITION] = "POSITION",
1712 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1713 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1714 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1715 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1716 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1717 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1718 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1719 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1720 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1721 [D3DDECLUSAGE_COLOR] = "COLOR",
1722 [D3DDECLUSAGE_FOG] = "FOG",
1723 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1724 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1725 };
1726
1727 static INLINE unsigned
1728 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1729 {
1730 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1731 }
1732
1733 static void
1734 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1735 boolean tc,
1736 struct sm1_semantic *dcl)
1737 {
1738 BYTE index = dcl->usage_idx;
1739
1740 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1741 * we match to a TGSI_SEMANTIC_GENERIC with index.
1742 *
1743 * The index can be anything UINT16 and usage_idx is BYTE,
1744 * so we can fit everything. It doesn't matter if indices
1745 * are close together or low.
1746 *
1747 *
1748 * POSITION >= 1: 10 * index + 6
1749 * COLOR >= 2: 10 * (index-1) + 7
1750 * TEXCOORD[0..15]: index
1751 * BLENDWEIGHT: 10 * index + 18
1752 * BLENDINDICES: 10 * index + 19
1753 * NORMAL: 10 * index + 20
1754 * TANGENT: 10 * index + 21
1755 * BINORMAL: 10 * index + 22
1756 * TESSFACTOR: 10 * index + 23
1757 */
1758
1759 switch (dcl->usage) {
1760 case D3DDECLUSAGE_POSITION:
1761 case D3DDECLUSAGE_POSITIONT:
1762 case D3DDECLUSAGE_DEPTH:
1763 if (index == 0) {
1764 sem->Name = TGSI_SEMANTIC_POSITION;
1765 sem->Index = 0;
1766 } else {
1767 sem->Name = TGSI_SEMANTIC_GENERIC;
1768 sem->Index = 10 * index + 6;
1769 }
1770 break;
1771 case D3DDECLUSAGE_COLOR:
1772 if (index < 2) {
1773 sem->Name = TGSI_SEMANTIC_COLOR;
1774 sem->Index = index;
1775 } else {
1776 sem->Name = TGSI_SEMANTIC_GENERIC;
1777 sem->Index = 10 * (index-1) + 7;
1778 }
1779 break;
1780 case D3DDECLUSAGE_FOG:
1781 assert(index == 0);
1782 sem->Name = TGSI_SEMANTIC_FOG;
1783 sem->Index = 0;
1784 break;
1785 case D3DDECLUSAGE_PSIZE:
1786 assert(index == 0);
1787 sem->Name = TGSI_SEMANTIC_PSIZE;
1788 sem->Index = 0;
1789 break;
1790 case D3DDECLUSAGE_TEXCOORD:
1791 assert(index < 16);
1792 if (index < 8 && tc)
1793 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1794 else
1795 sem->Name = TGSI_SEMANTIC_GENERIC;
1796 sem->Index = index;
1797 break;
1798 case D3DDECLUSAGE_BLENDWEIGHT:
1799 sem->Name = TGSI_SEMANTIC_GENERIC;
1800 sem->Index = 10 * index + 18;
1801 break;
1802 case D3DDECLUSAGE_BLENDINDICES:
1803 sem->Name = TGSI_SEMANTIC_GENERIC;
1804 sem->Index = 10 * index + 19;
1805 break;
1806 case D3DDECLUSAGE_NORMAL:
1807 sem->Name = TGSI_SEMANTIC_GENERIC;
1808 sem->Index = 10 * index + 20;
1809 break;
1810 case D3DDECLUSAGE_TANGENT:
1811 sem->Name = TGSI_SEMANTIC_GENERIC;
1812 sem->Index = 10 * index + 21;
1813 break;
1814 case D3DDECLUSAGE_BINORMAL:
1815 sem->Name = TGSI_SEMANTIC_GENERIC;
1816 sem->Index = 10 * index + 22;
1817 break;
1818 case D3DDECLUSAGE_TESSFACTOR:
1819 sem->Name = TGSI_SEMANTIC_GENERIC;
1820 sem->Index = 10 * index + 23;
1821 break;
1822 case D3DDECLUSAGE_SAMPLE:
1823 sem->Name = TGSI_SEMANTIC_COUNT;
1824 sem->Index = 0;
1825 break;
1826 default:
1827 assert(!"Invalid DECLUSAGE.");
1828 break;
1829 }
1830 }
1831
1832 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1833 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1834 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1835 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1836 static INLINE unsigned
1837 d3dstt_to_tgsi_tex(BYTE sampler_type)
1838 {
1839 switch (sampler_type) {
1840 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1841 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1842 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1843 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1844 default:
1845 assert(0);
1846 return TGSI_TEXTURE_UNKNOWN;
1847 }
1848 }
1849 static INLINE unsigned
1850 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1851 {
1852 switch (sampler_type) {
1853 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1854 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1855 case NINED3DSTT_VOLUME:
1856 case NINED3DSTT_CUBE:
1857 default:
1858 assert(0);
1859 return TGSI_TEXTURE_UNKNOWN;
1860 }
1861 }
1862 static INLINE unsigned
1863 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1864 {
1865 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1866 case 1: return TGSI_TEXTURE_1D;
1867 case 0: return TGSI_TEXTURE_2D;
1868 case 3: return TGSI_TEXTURE_3D;
1869 default:
1870 return TGSI_TEXTURE_CUBE;
1871 }
1872 }
1873
1874 static const char *
1875 sm1_sampler_type_name(BYTE sampler_type)
1876 {
1877 switch (sampler_type) {
1878 case NINED3DSTT_1D: return "1D";
1879 case NINED3DSTT_2D: return "2D";
1880 case NINED3DSTT_VOLUME: return "VOLUME";
1881 case NINED3DSTT_CUBE: return "CUBE";
1882 default:
1883 return "(D3DSTT_?)";
1884 }
1885 }
1886
1887 static INLINE unsigned
1888 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1889 {
1890 switch (sem->Name) {
1891 case TGSI_SEMANTIC_POSITION:
1892 case TGSI_SEMANTIC_NORMAL:
1893 return TGSI_INTERPOLATE_LINEAR;
1894 case TGSI_SEMANTIC_BCOLOR:
1895 case TGSI_SEMANTIC_COLOR:
1896 case TGSI_SEMANTIC_FOG:
1897 case TGSI_SEMANTIC_GENERIC:
1898 case TGSI_SEMANTIC_TEXCOORD:
1899 case TGSI_SEMANTIC_CLIPDIST:
1900 case TGSI_SEMANTIC_CLIPVERTEX:
1901 return TGSI_INTERPOLATE_PERSPECTIVE;
1902 case TGSI_SEMANTIC_EDGEFLAG:
1903 case TGSI_SEMANTIC_FACE:
1904 case TGSI_SEMANTIC_INSTANCEID:
1905 case TGSI_SEMANTIC_PCOORD:
1906 case TGSI_SEMANTIC_PRIMID:
1907 case TGSI_SEMANTIC_PSIZE:
1908 case TGSI_SEMANTIC_VERTEXID:
1909 return TGSI_INTERPOLATE_CONSTANT;
1910 default:
1911 assert(0);
1912 return TGSI_INTERPOLATE_CONSTANT;
1913 }
1914 }
1915
1916 DECL_SPECIAL(DCL)
1917 {
1918 struct ureg_program *ureg = tx->ureg;
1919 boolean is_input;
1920 boolean is_sampler;
1921 struct tgsi_declaration_semantic tgsi;
1922 struct sm1_semantic sem;
1923 sm1_read_semantic(tx, &sem);
1924
1925 is_input = sem.reg.file == D3DSPR_INPUT;
1926 is_sampler =
1927 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1928
1929 DUMP("DCL ");
1930 sm1_dump_dst_param(&sem.reg);
1931 if (is_sampler)
1932 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1933 else
1934 if (tx->version.major >= 3)
1935 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1936 else
1937 if (sem.usage | sem.usage_idx)
1938 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1939 else
1940 DUMP("\n");
1941
1942 if (is_sampler) {
1943 const unsigned m = 1 << sem.reg.idx;
1944 ureg_DECL_sampler(ureg, sem.reg.idx);
1945 tx->info->sampler_mask |= m;
1946 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1947 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1948 d3dstt_to_tgsi_tex(sem.sampler_type);
1949 return D3D_OK;
1950 }
1951
1952 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1953 if (IS_VS) {
1954 if (is_input) {
1955 /* linkage outside of shader with vertex declaration */
1956 ureg_DECL_vs_input(ureg, sem.reg.idx);
1957 assert(sem.reg.idx < Elements(tx->info->input_map));
1958 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1959 tx->info->num_inputs = sem.reg.idx + 1;
1960 /* NOTE: preserving order in case of indirect access */
1961 } else
1962 if (tx->version.major >= 3) {
1963 /* SM2 output semantic determined by file */
1964 assert(sem.reg.mask != 0);
1965 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1966 tx->info->position_t = TRUE;
1967 assert(sem.reg.idx < Elements(tx->regs.o));
1968 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1969 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1970
1971 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1972 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1973 }
1974 } else {
1975 if (is_input && tx->version.major >= 3) {
1976 /* SM3 only, SM2 input semantic determined by file */
1977 assert(sem.reg.idx < Elements(tx->regs.v));
1978 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1979 ureg, tgsi.Name, tgsi.Index,
1980 nine_tgsi_to_interp_mode(&tgsi),
1981 0, /* cylwrap */
1982 sem.reg.mod & NINED3DSPDM_CENTROID);
1983 } else
1984 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1985 /* FragColor or FragDepth */
1986 assert(sem.reg.mask != 0);
1987 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1988 }
1989 }
1990 return D3D_OK;
1991 }
1992
1993 DECL_SPECIAL(DEF)
1994 {
1995 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1996 return D3D_OK;
1997 }
1998
1999 DECL_SPECIAL(DEFB)
2000 {
2001 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2002 return D3D_OK;
2003 }
2004
2005 DECL_SPECIAL(DEFI)
2006 {
2007 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2008 return D3D_OK;
2009 }
2010
2011 DECL_SPECIAL(POW)
2012 {
2013 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2014 struct ureg_src src[2] = {
2015 tx_src_param(tx, &tx->insn.src[0]),
2016 tx_src_param(tx, &tx->insn.src[1])
2017 };
2018 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2019 return D3D_OK;
2020 }
2021
2022 DECL_SPECIAL(RSQ)
2023 {
2024 struct ureg_program *ureg = tx->ureg;
2025 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2026 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2027 struct ureg_dst tmp = tx_scratch(tx);
2028 ureg_RSQ(ureg, tmp, ureg_abs(src));
2029 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2030 return D3D_OK;
2031 }
2032
2033 DECL_SPECIAL(LOG)
2034 {
2035 struct ureg_program *ureg = tx->ureg;
2036 struct ureg_dst tmp = tx_scratch_scalar(tx);
2037 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2038 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2039 ureg_LG2(ureg, tmp, ureg_abs(src));
2040 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2041 return D3D_OK;
2042 }
2043
2044 DECL_SPECIAL(LIT)
2045 {
2046 struct ureg_program *ureg = tx->ureg;
2047 struct ureg_dst tmp = tx_scratch(tx);
2048 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2049 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2050 ureg_LIT(ureg, tmp, src);
2051 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2052 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2053 * it 0^0 if src.w=0, which value is driver dependent. */
2054 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2055 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2056 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2057 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2058 return D3D_OK;
2059 }
2060
2061 DECL_SPECIAL(NRM)
2062 {
2063 struct ureg_program *ureg = tx->ureg;
2064 struct ureg_dst tmp = tx_scratch_scalar(tx);
2065 struct ureg_src nrm = tx_src_scalar(tmp);
2066 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2067 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2068 ureg_DP3(ureg, tmp, src, src);
2069 ureg_RSQ(ureg, tmp, nrm);
2070 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2071 ureg_MUL(ureg, dst, src, nrm);
2072 return D3D_OK;
2073 }
2074
2075 DECL_SPECIAL(DP2ADD)
2076 {
2077 struct ureg_dst tmp = tx_scratch_scalar(tx);
2078 struct ureg_src dp2 = tx_src_scalar(tmp);
2079 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2080 struct ureg_src src[3];
2081 int i;
2082 for (i = 0; i < 3; ++i)
2083 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2084 assert_replicate_swizzle(&src[2]);
2085
2086 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2087 ureg_ADD(tx->ureg, dst, src[2], dp2);
2088
2089 return D3D_OK;
2090 }
2091
2092 DECL_SPECIAL(TEXCOORD)
2093 {
2094 struct ureg_program *ureg = tx->ureg;
2095 const unsigned s = tx->insn.dst[0].idx;
2096 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2097
2098 tx_texcoord_alloc(tx, s);
2099 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2100 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2101
2102 return D3D_OK;
2103 }
2104
2105 DECL_SPECIAL(TEXCOORD_ps14)
2106 {
2107 struct ureg_program *ureg = tx->ureg;
2108 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2109 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2110
2111 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2112
2113 ureg_MOV(ureg, dst, src);
2114
2115 return D3D_OK;
2116 }
2117
2118 DECL_SPECIAL(TEXKILL)
2119 {
2120 struct ureg_src reg;
2121
2122 if (tx->version.major > 1 || tx->version.minor > 3) {
2123 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2124 } else {
2125 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2126 reg = tx->regs.vT[tx->insn.dst[0].idx];
2127 }
2128 if (tx->version.major < 2)
2129 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2130 ureg_KILL_IF(tx->ureg, reg);
2131
2132 return D3D_OK;
2133 }
2134
2135 DECL_SPECIAL(TEXBEM)
2136 {
2137 STUB(D3DERR_INVALIDCALL);
2138 }
2139
2140 DECL_SPECIAL(TEXBEML)
2141 {
2142 STUB(D3DERR_INVALIDCALL);
2143 }
2144
2145 DECL_SPECIAL(TEXREG2AR)
2146 {
2147 struct ureg_program *ureg = tx->ureg;
2148 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2149 struct ureg_src sample;
2150 const int m = tx->insn.dst[0].idx;
2151 const int n = tx->insn.src[0].idx;
2152 assert(m >= 0 && m > n);
2153
2154 sample = ureg_DECL_sampler(ureg, m);
2155 tx->info->sampler_mask |= 1 << m;
2156 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2157
2158 return D3D_OK;
2159 }
2160
2161 DECL_SPECIAL(TEXREG2GB)
2162 {
2163 struct ureg_program *ureg = tx->ureg;
2164 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2165 struct ureg_src sample;
2166 const int m = tx->insn.dst[0].idx;
2167 const int n = tx->insn.src[0].idx;
2168 assert(m >= 0 && m > n);
2169
2170 sample = ureg_DECL_sampler(ureg, m);
2171 tx->info->sampler_mask |= 1 << m;
2172 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2173
2174 return D3D_OK;
2175 }
2176
2177 DECL_SPECIAL(TEXM3x2PAD)
2178 {
2179 return D3D_OK; /* this is just padding */
2180 }
2181
2182 DECL_SPECIAL(TEXM3x2TEX)
2183 {
2184 struct ureg_program *ureg = tx->ureg;
2185 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2186 struct ureg_src sample;
2187 const int m = tx->insn.dst[0].idx - 1;
2188 const int n = tx->insn.src[0].idx;
2189 assert(m >= 0 && m > n);
2190
2191 tx_texcoord_alloc(tx, m);
2192 tx_texcoord_alloc(tx, m+1);
2193
2194 /* performs the matrix multiplication */
2195 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2196 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2197
2198 sample = ureg_DECL_sampler(ureg, m + 1);
2199 tx->info->sampler_mask |= 1 << (m + 1);
2200 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2201
2202 return D3D_OK;
2203 }
2204
2205 DECL_SPECIAL(TEXM3x3PAD)
2206 {
2207 return D3D_OK; /* this is just padding */
2208 }
2209
2210 DECL_SPECIAL(TEXM3x3SPEC)
2211 {
2212 struct ureg_program *ureg = tx->ureg;
2213 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2214 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2215 struct ureg_src sample;
2216 struct ureg_dst tmp;
2217 const int m = tx->insn.dst[0].idx - 2;
2218 const int n = tx->insn.src[0].idx;
2219 assert(m >= 0 && m > n);
2220
2221 tx_texcoord_alloc(tx, m);
2222 tx_texcoord_alloc(tx, m+1);
2223 tx_texcoord_alloc(tx, m+2);
2224
2225 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2226 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2227 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2228
2229 sample = ureg_DECL_sampler(ureg, m + 2);
2230 tx->info->sampler_mask |= 1 << (m + 2);
2231 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2232
2233 /* At this step, dst = N = (u', w', z').
2234 * We want dst to be the texture sampled at (u'', w'', z''), with
2235 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2236 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2237 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2238 /* at this step tmp.x = 1/N.N */
2239 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2240 /* at this step tmp.y = N.E */
2241 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2242 /* at this step tmp.x = N.E/N.N */
2243 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2244 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2245 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2246 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2247 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2248
2249 return D3D_OK;
2250 }
2251
2252 DECL_SPECIAL(TEXREG2RGB)
2253 {
2254 struct ureg_program *ureg = tx->ureg;
2255 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2256 struct ureg_src sample;
2257 const int m = tx->insn.dst[0].idx;
2258 const int n = tx->insn.src[0].idx;
2259 assert(m >= 0 && m > n);
2260
2261 sample = ureg_DECL_sampler(ureg, m);
2262 tx->info->sampler_mask |= 1 << m;
2263 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2264
2265 return D3D_OK;
2266 }
2267
2268 DECL_SPECIAL(TEXDP3TEX)
2269 {
2270 struct ureg_program *ureg = tx->ureg;
2271 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2272 struct ureg_dst tmp;
2273 struct ureg_src sample;
2274 const int m = tx->insn.dst[0].idx;
2275 const int n = tx->insn.src[0].idx;
2276 assert(m >= 0 && m > n);
2277
2278 tx_texcoord_alloc(tx, m);
2279
2280 tmp = tx_scratch(tx);
2281 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2282 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2283
2284 sample = ureg_DECL_sampler(ureg, m);
2285 tx->info->sampler_mask |= 1 << m;
2286 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2287
2288 return D3D_OK;
2289 }
2290
2291 DECL_SPECIAL(TEXM3x2DEPTH)
2292 {
2293 struct ureg_program *ureg = tx->ureg;
2294 struct ureg_dst tmp;
2295 const int m = tx->insn.dst[0].idx - 1;
2296 const int n = tx->insn.src[0].idx;
2297 assert(m >= 0 && m > n);
2298
2299 tx_texcoord_alloc(tx, m);
2300 tx_texcoord_alloc(tx, m+1);
2301
2302 tmp = tx_scratch(tx);
2303
2304 /* performs the matrix multiplication */
2305 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2306 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2307
2308 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2309 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2310 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2311 /* res = 'w' == 0 ? 1.0 : z/w */
2312 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2313 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2314 /* replace the depth for depth testing with the result */
2315 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
2316 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2317 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2318 return D3D_OK;
2319 }
2320
2321 DECL_SPECIAL(TEXDP3)
2322 {
2323 struct ureg_program *ureg = tx->ureg;
2324 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2325 const int m = tx->insn.dst[0].idx;
2326 const int n = tx->insn.src[0].idx;
2327 assert(m >= 0 && m > n);
2328
2329 tx_texcoord_alloc(tx, m);
2330
2331 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2332
2333 return D3D_OK;
2334 }
2335
2336 DECL_SPECIAL(TEXM3x3)
2337 {
2338 struct ureg_program *ureg = tx->ureg;
2339 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2340 struct ureg_src sample;
2341 struct ureg_dst E, tmp;
2342 const int m = tx->insn.dst[0].idx - 2;
2343 const int n = tx->insn.src[0].idx;
2344 assert(m >= 0 && m > n);
2345
2346 tx_texcoord_alloc(tx, m);
2347 tx_texcoord_alloc(tx, m+1);
2348 tx_texcoord_alloc(tx, m+2);
2349
2350 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2351 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2352 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2353
2354 switch (tx->insn.opcode) {
2355 case D3DSIO_TEXM3x3:
2356 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2357 break;
2358 case D3DSIO_TEXM3x3TEX:
2359 sample = ureg_DECL_sampler(ureg, m + 2);
2360 tx->info->sampler_mask |= 1 << (m + 2);
2361 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2362 break;
2363 case D3DSIO_TEXM3x3VSPEC:
2364 sample = ureg_DECL_sampler(ureg, m + 2);
2365 tx->info->sampler_mask |= 1 << (m + 2);
2366 E = tx_scratch(tx);
2367 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2368 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2369 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2370 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2371 /* At this step, dst = N = (u', w', z').
2372 * We want dst to be the texture sampled at (u'', w'', z''), with
2373 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2374 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2375 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2376 /* at this step tmp.x = 1/N.N */
2377 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2378 /* at this step tmp.y = N.E */
2379 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2380 /* at this step tmp.x = N.E/N.N */
2381 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2382 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2383 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2384 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2385 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2386 break;
2387 default:
2388 return D3DERR_INVALIDCALL;
2389 }
2390 return D3D_OK;
2391 }
2392
2393 DECL_SPECIAL(TEXDEPTH)
2394 {
2395 struct ureg_program *ureg = tx->ureg;
2396 struct ureg_dst r5;
2397 struct ureg_src r5r, r5g;
2398
2399 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2400
2401 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2402 * r5 won't be used afterward, thus we can use r5.ba */
2403 r5 = tx->regs.r[5];
2404 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2405 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2406
2407 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2408 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2409 /* r5.r = r/g */
2410 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2411 r5r, ureg_imm1f(ureg, 1.0f));
2412 /* replace the depth for depth testing with the result */
2413 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
2414 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2415
2416 return D3D_OK;
2417 }
2418
2419 DECL_SPECIAL(BEM)
2420 {
2421 STUB(D3DERR_INVALIDCALL);
2422 }
2423
2424 DECL_SPECIAL(TEXLD)
2425 {
2426 struct ureg_program *ureg = tx->ureg;
2427 unsigned target;
2428 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2429 struct ureg_src src[2] = {
2430 tx_src_param(tx, &tx->insn.src[0]),
2431 tx_src_param(tx, &tx->insn.src[1])
2432 };
2433 assert(tx->insn.src[1].idx >= 0 &&
2434 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2435 target = tx->sampler_targets[tx->insn.src[1].idx];
2436
2437 switch (tx->insn.flags) {
2438 case 0:
2439 ureg_TEX(ureg, dst, target, src[0], src[1]);
2440 break;
2441 case NINED3DSI_TEXLD_PROJECT:
2442 ureg_TXP(ureg, dst, target, src[0], src[1]);
2443 break;
2444 case NINED3DSI_TEXLD_BIAS:
2445 ureg_TXB(ureg, dst, target, src[0], src[1]);
2446 break;
2447 default:
2448 assert(0);
2449 return D3DERR_INVALIDCALL;
2450 }
2451 return D3D_OK;
2452 }
2453
2454 DECL_SPECIAL(TEXLD_14)
2455 {
2456 struct ureg_program *ureg = tx->ureg;
2457 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2458 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2459 const unsigned s = tx->insn.dst[0].idx;
2460 const unsigned t = ps1x_sampler_type(tx->info, s);
2461
2462 tx->info->sampler_mask |= 1 << s;
2463 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2464
2465 return D3D_OK;
2466 }
2467
2468 DECL_SPECIAL(TEX)
2469 {
2470 struct ureg_program *ureg = tx->ureg;
2471 const unsigned s = tx->insn.dst[0].idx;
2472 const unsigned t = ps1x_sampler_type(tx->info, s);
2473 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2474 struct ureg_src src[2];
2475
2476 tx_texcoord_alloc(tx, s);
2477
2478 src[0] = tx->regs.vT[s];
2479 src[1] = ureg_DECL_sampler(ureg, s);
2480 tx->info->sampler_mask |= 1 << s;
2481
2482 ureg_TEX(ureg, dst, t, src[0], src[1]);
2483
2484 return D3D_OK;
2485 }
2486
2487 DECL_SPECIAL(TEXLDD)
2488 {
2489 unsigned target;
2490 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2491 struct ureg_src src[4] = {
2492 tx_src_param(tx, &tx->insn.src[0]),
2493 tx_src_param(tx, &tx->insn.src[1]),
2494 tx_src_param(tx, &tx->insn.src[2]),
2495 tx_src_param(tx, &tx->insn.src[3])
2496 };
2497 assert(tx->insn.src[1].idx >= 0 &&
2498 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2499 target = tx->sampler_targets[tx->insn.src[1].idx];
2500
2501 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2502 return D3D_OK;
2503 }
2504
2505 DECL_SPECIAL(TEXLDL)
2506 {
2507 unsigned target;
2508 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2509 struct ureg_src src[2] = {
2510 tx_src_param(tx, &tx->insn.src[0]),
2511 tx_src_param(tx, &tx->insn.src[1])
2512 };
2513 assert(tx->insn.src[1].idx >= 0 &&
2514 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2515 target = tx->sampler_targets[tx->insn.src[1].idx];
2516
2517 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2518 return D3D_OK;
2519 }
2520
2521 DECL_SPECIAL(SETP)
2522 {
2523 STUB(D3DERR_INVALIDCALL);
2524 }
2525
2526 DECL_SPECIAL(BREAKP)
2527 {
2528 STUB(D3DERR_INVALIDCALL);
2529 }
2530
2531 DECL_SPECIAL(PHASE)
2532 {
2533 return D3D_OK; /* we don't care about phase */
2534 }
2535
2536 DECL_SPECIAL(COMMENT)
2537 {
2538 return D3D_OK; /* nothing to do */
2539 }
2540
2541
2542 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2543 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2544
2545 struct sm1_op_info inst_table[] =
2546 {
2547 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2548 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2549 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2550 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2551 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2552 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2553 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2554 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2555 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2556 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2557 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2558 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2559 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2560 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2561 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2562 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2563 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2564 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2565 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2566 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2567 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2568
2569 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2570 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2571 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2572 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2573 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2574
2575 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2576 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2577 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2578 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2579 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2580 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2581
2582 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2583
2584 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2585 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2586 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2587 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2588 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2589
2590 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2591 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2592
2593 /* More flow control */
2594 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2595 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2596 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2597 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2598 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2599 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2600 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2601 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2602 /* we don't write to the address register, but a normal register (copied
2603 * when needed to the address register), thus we don't use ARR */
2604 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2605
2606 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2607 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2608
2609 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2610 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2611 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2612 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2613 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2614 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2615 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2616 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
2617 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2618 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2619 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2620 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2621 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2622 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2623 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2624 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2625
2626 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2627 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2628 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2629 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2630
2631 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2632
2633 /* More tex stuff */
2634 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2635 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2636 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2637 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2638 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2639 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2640
2641 /* Misc */
2642 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2643 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2644 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2645 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2646 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2647 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2648 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2649 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2650 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2651 };
2652
2653 struct sm1_op_info inst_phase =
2654 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2655
2656 struct sm1_op_info inst_comment =
2657 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2658
2659 static void
2660 create_op_info_map(struct shader_translator *tx)
2661 {
2662 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2663 unsigned i;
2664
2665 for (i = 0; i < Elements(tx->op_info_map); ++i)
2666 tx->op_info_map[i] = -1;
2667
2668 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2669 for (i = 0; i < Elements(inst_table); ++i) {
2670 assert(inst_table[i].sio < Elements(tx->op_info_map));
2671 if (inst_table[i].vert_version.min <= version &&
2672 inst_table[i].vert_version.max >= version)
2673 tx->op_info_map[inst_table[i].sio] = i;
2674 }
2675 } else {
2676 for (i = 0; i < Elements(inst_table); ++i) {
2677 assert(inst_table[i].sio < Elements(tx->op_info_map));
2678 if (inst_table[i].frag_version.min <= version &&
2679 inst_table[i].frag_version.max >= version)
2680 tx->op_info_map[inst_table[i].sio] = i;
2681 }
2682 }
2683 }
2684
2685 static INLINE HRESULT
2686 NineTranslateInstruction_Generic(struct shader_translator *tx)
2687 {
2688 struct ureg_dst dst[1];
2689 struct ureg_src src[4];
2690 unsigned i;
2691
2692 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2693 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2694 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2695 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2696
2697 ureg_insn(tx->ureg, tx->insn.info->opcode,
2698 dst, tx->insn.ndst,
2699 src, tx->insn.nsrc);
2700 return D3D_OK;
2701 }
2702
2703 static INLINE DWORD
2704 TOKEN_PEEK(struct shader_translator *tx)
2705 {
2706 return *(tx->parse);
2707 }
2708
2709 static INLINE DWORD
2710 TOKEN_NEXT(struct shader_translator *tx)
2711 {
2712 return *(tx->parse)++;
2713 }
2714
2715 static INLINE void
2716 TOKEN_JUMP(struct shader_translator *tx)
2717 {
2718 if (tx->parse_next && tx->parse != tx->parse_next) {
2719 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2720 tx->parse = tx->parse_next;
2721 }
2722 }
2723
2724 static INLINE boolean
2725 sm1_parse_eof(struct shader_translator *tx)
2726 {
2727 return TOKEN_PEEK(tx) == NINED3DSP_END;
2728 }
2729
2730 static void
2731 sm1_read_version(struct shader_translator *tx)
2732 {
2733 const DWORD tok = TOKEN_NEXT(tx);
2734
2735 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2736 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2737
2738 switch (tok >> 16) {
2739 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2740 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2741 default:
2742 DBG("Invalid shader type: %x\n", tok);
2743 tx->processor = ~0;
2744 break;
2745 }
2746 }
2747
2748 /* This is just to check if we parsed the instruction properly. */
2749 static void
2750 sm1_parse_get_skip(struct shader_translator *tx)
2751 {
2752 const DWORD tok = TOKEN_PEEK(tx);
2753
2754 if (tx->version.major >= 2) {
2755 tx->parse_next = tx->parse + 1 /* this */ +
2756 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2757 } else {
2758 tx->parse_next = NULL; /* TODO: determine from param count */
2759 }
2760 }
2761
2762 static void
2763 sm1_print_comment(const char *comment, UINT size)
2764 {
2765 if (!size)
2766 return;
2767 /* TODO */
2768 }
2769
2770 static void
2771 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2772 {
2773 DWORD tok = TOKEN_PEEK(tx);
2774
2775 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2776 {
2777 const char *comment = "";
2778 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2779 tx->parse += size + 1;
2780
2781 if (print)
2782 sm1_print_comment(comment, size);
2783
2784 tok = TOKEN_PEEK(tx);
2785 }
2786 }
2787
2788 static void
2789 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2790 {
2791 *reg = TOKEN_NEXT(tx);
2792
2793 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2794 {
2795 if (tx->version.major < 2)
2796 *rel = (1 << 31) |
2797 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2798 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2799 D3DSP_NOSWIZZLE;
2800 else
2801 *rel = TOKEN_NEXT(tx);
2802 }
2803 }
2804
2805 static void
2806 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2807 {
2808 uint8_t shift;
2809 dst->file =
2810 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2811 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2812 dst->type = TGSI_RETURN_TYPE_FLOAT;
2813 dst->idx = tok & D3DSP_REGNUM_MASK;
2814 dst->rel = NULL;
2815 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2816 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2817 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2818 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2819 }
2820
2821 static void
2822 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2823 {
2824 src->file =
2825 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2826 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2827 src->type = TGSI_RETURN_TYPE_FLOAT;
2828 src->idx = tok & D3DSP_REGNUM_MASK;
2829 src->rel = NULL;
2830 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2831 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2832
2833 switch (src->file) {
2834 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2835 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2836 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2837 default:
2838 break;
2839 }
2840 }
2841
2842 static void
2843 sm1_parse_immediate(struct shader_translator *tx,
2844 struct sm1_src_param *imm)
2845 {
2846 imm->file = NINED3DSPR_IMMEDIATE;
2847 imm->idx = INT_MIN;
2848 imm->rel = NULL;
2849 imm->swizzle = NINED3DSP_NOSWIZZLE;
2850 imm->mod = 0;
2851 switch (tx->insn.opcode) {
2852 case D3DSIO_DEF:
2853 imm->type = NINED3DSPTYPE_FLOAT4;
2854 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2855 tx->parse += 4;
2856 break;
2857 case D3DSIO_DEFI:
2858 imm->type = NINED3DSPTYPE_INT4;
2859 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2860 tx->parse += 4;
2861 break;
2862 case D3DSIO_DEFB:
2863 imm->type = NINED3DSPTYPE_BOOL;
2864 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2865 tx->parse += 1;
2866 break;
2867 default:
2868 assert(0);
2869 break;
2870 }
2871 }
2872
2873 static void
2874 sm1_read_dst_param(struct shader_translator *tx,
2875 struct sm1_dst_param *dst,
2876 struct sm1_src_param *rel)
2877 {
2878 DWORD tok_dst, tok_rel = 0;
2879
2880 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2881 sm1_parse_dst_param(dst, tok_dst);
2882 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2883 sm1_parse_src_param(rel, tok_rel);
2884 dst->rel = rel;
2885 }
2886 }
2887
2888 static void
2889 sm1_read_src_param(struct shader_translator *tx,
2890 struct sm1_src_param *src,
2891 struct sm1_src_param *rel)
2892 {
2893 DWORD tok_src, tok_rel = 0;
2894
2895 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2896 sm1_parse_src_param(src, tok_src);
2897 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2898 assert(rel);
2899 sm1_parse_src_param(rel, tok_rel);
2900 src->rel = rel;
2901 }
2902 }
2903
2904 static void
2905 sm1_read_semantic(struct shader_translator *tx,
2906 struct sm1_semantic *sem)
2907 {
2908 const DWORD tok_usg = TOKEN_NEXT(tx);
2909 const DWORD tok_dst = TOKEN_NEXT(tx);
2910
2911 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2912 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2913 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2914
2915 sm1_parse_dst_param(&sem->reg, tok_dst);
2916 }
2917
2918 static void
2919 sm1_parse_instruction(struct shader_translator *tx)
2920 {
2921 struct sm1_instruction *insn = &tx->insn;
2922 DWORD tok;
2923 struct sm1_op_info *info = NULL;
2924 unsigned i;
2925
2926 sm1_parse_comments(tx, TRUE);
2927 sm1_parse_get_skip(tx);
2928
2929 tok = TOKEN_NEXT(tx);
2930
2931 insn->opcode = tok & D3DSI_OPCODE_MASK;
2932 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2933 insn->coissue = !!(tok & D3DSI_COISSUE);
2934 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2935
2936 if (insn->opcode < Elements(tx->op_info_map)) {
2937 int k = tx->op_info_map[insn->opcode];
2938 if (k >= 0) {
2939 assert(k < Elements(inst_table));
2940 info = &inst_table[k];
2941 }
2942 } else {
2943 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2944 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2945 }
2946 if (!info) {
2947 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2948 TOKEN_JUMP(tx);
2949 return;
2950 }
2951 insn->info = info;
2952 insn->ndst = info->ndst;
2953 insn->nsrc = info->nsrc;
2954
2955 assert(!insn->predicated && "TODO: predicated instructions");
2956
2957 /* check version */
2958 {
2959 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2960 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2961 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2962 if (ver < min || ver > max) {
2963 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2964 min, ver, max);
2965 return;
2966 }
2967 }
2968
2969 for (i = 0; i < insn->ndst; ++i)
2970 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2971 if (insn->predicated)
2972 sm1_read_src_param(tx, &insn->pred, NULL);
2973 for (i = 0; i < insn->nsrc; ++i)
2974 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2975
2976 /* parse here so we can dump them before processing */
2977 if (insn->opcode == D3DSIO_DEF ||
2978 insn->opcode == D3DSIO_DEFI ||
2979 insn->opcode == D3DSIO_DEFB)
2980 sm1_parse_immediate(tx, &tx->insn.src[0]);
2981
2982 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2983 sm1_instruction_check(insn);
2984
2985 if (info->handler)
2986 info->handler(tx);
2987 else
2988 NineTranslateInstruction_Generic(tx);
2989 tx_apply_dst0_modifiers(tx);
2990
2991 tx->num_scratch = 0; /* reset */
2992
2993 TOKEN_JUMP(tx);
2994 }
2995
2996 static void
2997 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2998 {
2999 unsigned i;
3000
3001 tx->info = info;
3002
3003 tx->byte_code = info->byte_code;
3004 tx->parse = info->byte_code;
3005
3006 for (i = 0; i < Elements(info->input_map); ++i)
3007 info->input_map[i] = NINE_DECLUSAGE_NONE;
3008 info->num_inputs = 0;
3009
3010 info->position_t = FALSE;
3011 info->point_size = FALSE;
3012
3013 tx->info->const_float_slots = 0;
3014 tx->info->const_int_slots = 0;
3015 tx->info->const_bool_slots = 0;
3016
3017 info->sampler_mask = 0x0;
3018 info->rt_mask = 0x0;
3019
3020 info->lconstf.data = NULL;
3021 info->lconstf.ranges = NULL;
3022
3023 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3024 tx->regs.rL[i] = ureg_dst_undef();
3025 }
3026 tx->regs.address = ureg_dst_undef();
3027 tx->regs.a0 = ureg_dst_undef();
3028 tx->regs.p = ureg_dst_undef();
3029 tx->regs.oDepth = ureg_dst_undef();
3030 tx->regs.vPos = ureg_src_undef();
3031 tx->regs.vFace = ureg_src_undef();
3032 for (i = 0; i < Elements(tx->regs.o); ++i)
3033 tx->regs.o[i] = ureg_dst_undef();
3034 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3035 tx->regs.oCol[i] = ureg_dst_undef();
3036 for (i = 0; i < Elements(tx->regs.vC); ++i)
3037 tx->regs.vC[i] = ureg_src_undef();
3038 for (i = 0; i < Elements(tx->regs.vT); ++i)
3039 tx->regs.vT[i] = ureg_src_undef();
3040
3041 for (i = 0; i < Elements(tx->lconsti); ++i)
3042 tx->lconsti[i].idx = -1;
3043 for (i = 0; i < Elements(tx->lconstb); ++i)
3044 tx->lconstb[i].idx = -1;
3045
3046 sm1_read_version(tx);
3047
3048 info->version = (tx->version.major << 4) | tx->version.minor;
3049
3050 create_op_info_map(tx);
3051 }
3052
3053 static void
3054 tx_dtor(struct shader_translator *tx)
3055 {
3056 if (tx->num_inst_labels)
3057 FREE(tx->inst_labels);
3058 FREE(tx->lconstf);
3059 FREE(tx->regs.r);
3060 FREE(tx);
3061 }
3062
3063 static INLINE unsigned
3064 tgsi_processor_from_type(unsigned shader_type)
3065 {
3066 switch (shader_type) {
3067 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3068 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3069 default:
3070 return ~0;
3071 }
3072 }
3073
3074 #define GET_CAP(n) device->screen->get_param( \
3075 device->screen, PIPE_CAP_##n)
3076 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3077 device->screen, info->type, PIPE_SHADER_CAP_##n)
3078
3079 HRESULT
3080 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3081 {
3082 struct shader_translator *tx;
3083 HRESULT hr = D3D_OK;
3084 const unsigned processor = tgsi_processor_from_type(info->type);
3085 unsigned s, slot_max;
3086 unsigned max_const_f;
3087
3088 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3089
3090 tx = CALLOC_STRUCT(shader_translator);
3091 if (!tx)
3092 return E_OUTOFMEMORY;
3093 tx_ctor(tx, info);
3094
3095 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3096 hr = D3DERR_INVALIDCALL;
3097 DBG("Unsupported shader version: %u.%u !\n",
3098 tx->version.major, tx->version.minor);
3099 goto out;
3100 }
3101 if (tx->processor != processor) {
3102 hr = D3DERR_INVALIDCALL;
3103 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3104 goto out;
3105 }
3106 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3107 tx->version.major, tx->version.minor);
3108
3109 tx->ureg = ureg_create(processor);
3110 if (!tx->ureg) {
3111 hr = E_OUTOFMEMORY;
3112 goto out;
3113 }
3114
3115 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3116 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3117 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3118 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3119 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3120 tx->texcoord_sn = tx->want_texcoord ?
3121 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3122
3123 /* VS must always write position. Declare it here to make it the 1st output.
3124 * (Some drivers like nv50 are buggy and rely on that.)
3125 */
3126 if (IS_VS) {
3127 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3128 } else {
3129 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3130 if (!tx->shift_wpos)
3131 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3132 }
3133
3134 while (!sm1_parse_eof(tx) && !tx->failure)
3135 sm1_parse_instruction(tx);
3136 tx->parse++; /* for byte_size */
3137
3138 if (tx->failure) {
3139 ERR("Encountered buggy shader\n");
3140 ureg_destroy(tx->ureg);
3141 hr = D3DERR_INVALIDCALL;
3142 goto out;
3143 }
3144
3145 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
3146 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
3147 ureg_src(tx->regs.r[0]));
3148 info->rt_mask |= 0x1;
3149 }
3150
3151 if (info->position_t)
3152 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3153
3154 ureg_END(tx->ureg);
3155
3156 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3157 info->point_size = TRUE;
3158
3159 /* record local constants */
3160 if (tx->num_lconstf && tx->indirect_const_access) {
3161 struct nine_range *ranges;
3162 float *data;
3163 int *indices;
3164 unsigned i, k, n;
3165
3166 hr = E_OUTOFMEMORY;
3167
3168 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3169 if (!data)
3170 goto out;
3171 info->lconstf.data = data;
3172
3173 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3174 if (!indices)
3175 goto out;
3176
3177 /* lazy sort, num_lconstf should be small */
3178 for (n = 0; n < tx->num_lconstf; ++n) {
3179 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3180 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3181 k = i;
3182 }
3183 indices[n] = tx->lconstf[k].idx;
3184 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3185 tx->lconstf[k].idx = INT_MAX;
3186 }
3187
3188 /* count ranges */
3189 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3190 if (indices[i] != indices[i - 1] + 1)
3191 ++n;
3192 ranges = MALLOC(n * sizeof(ranges[0]));
3193 if (!ranges) {
3194 FREE(indices);
3195 goto out;
3196 }
3197 info->lconstf.ranges = ranges;
3198
3199 k = 0;
3200 ranges[k].bgn = indices[0];
3201 for (i = 1; i < tx->num_lconstf; ++i) {
3202 if (indices[i] != indices[i - 1] + 1) {
3203 ranges[k].next = &ranges[k + 1];
3204 ranges[k].end = indices[i - 1] + 1;
3205 ++k;
3206 ranges[k].bgn = indices[i];
3207 }
3208 }
3209 ranges[k].end = indices[i - 1] + 1;
3210 ranges[k].next = NULL;
3211 assert(n == (k + 1));
3212
3213 FREE(indices);
3214 hr = D3D_OK;
3215 }
3216
3217 /* r500 */
3218 if (info->const_float_slots > device->max_vs_const_f &&
3219 (info->const_int_slots || info->const_bool_slots))
3220 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3221
3222
3223 if (tx->indirect_const_access) /* vs only */
3224 info->const_float_slots = device->max_vs_const_f;
3225
3226 max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3227 slot_max = info->const_bool_slots > 0 ?
3228 max_const_f + NINE_MAX_CONST_I
3229 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3230 info->const_int_slots > 0 ?
3231 max_const_f + info->const_int_slots :
3232 info->const_float_slots;
3233 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3234
3235 for (s = 0; s < slot_max; s++)
3236 ureg_DECL_constant(tx->ureg, s);
3237
3238 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3239 unsigned count;
3240 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3241 tgsi_dump(toks, 0);
3242 ureg_free_tokens(toks);
3243 }
3244
3245 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3246 if (!info->cso) {
3247 hr = D3DERR_DRIVERINTERNALERROR;
3248 FREE(info->lconstf.data);
3249 FREE(info->lconstf.ranges);
3250 goto out;
3251 }
3252
3253 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3254 out:
3255 tx_dtor(tx);
3256 return hr;
3257 }