st/nine: Refactor how user constbufs sizes are calculated
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
35
36 #define DBG_CHANNEL DBG_SHADER
37
38 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
39
40
41 struct shader_translator;
42
43 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
44
45 static INLINE const char *d3dsio_to_string(unsigned opcode);
46
47
48 #define NINED3D_SM1_VS 0xfffe
49 #define NINED3D_SM1_PS 0xffff
50
51 #define NINE_MAX_COND_DEPTH 64
52 #define NINE_MAX_LOOP_DEPTH 64
53
54 #define NINED3DSP_END 0x0000ffff
55
56 #define NINED3DSPTYPE_FLOAT4 0
57 #define NINED3DSPTYPE_INT4 1
58 #define NINED3DSPTYPE_BOOL 2
59
60 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
61
62 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
63 #define NINED3DSP_WRITEMASK_SHIFT 16
64
65 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
66
67 #define NINED3DSHADER_REL_OP_GT 1
68 #define NINED3DSHADER_REL_OP_EQ 2
69 #define NINED3DSHADER_REL_OP_GE 3
70 #define NINED3DSHADER_REL_OP_LT 4
71 #define NINED3DSHADER_REL_OP_NE 5
72 #define NINED3DSHADER_REL_OP_LE 6
73
74 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
75 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
76
77 #define NINED3DSI_TEXLD_PROJECT 0x1
78 #define NINED3DSI_TEXLD_BIAS 0x2
79
80 #define NINED3DSP_WRITEMASK_0 0x1
81 #define NINED3DSP_WRITEMASK_1 0x2
82 #define NINED3DSP_WRITEMASK_2 0x4
83 #define NINED3DSP_WRITEMASK_3 0x8
84 #define NINED3DSP_WRITEMASK_ALL 0xf
85
86 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
87
88 #define NINE_SWIZZLE4(x,y,z,w) \
89 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
90
91 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
92 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
94
95 /*
96 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
97 * BIAS <= PS 1.4 (x-0.5)
98 * BIASNEG <= PS 1.4 (-(x-0.5))
99 * SIGN <= PS 1.4 (2(x-0.5))
100 * SIGNNEG <= PS 1.4 (-2(x-0.5))
101 * COMP <= PS 1.4 (1-x)
102 * X2 = PS 1.4 (2x)
103 * X2NEG = PS 1.4 (-2x)
104 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
105 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
106 * ABS >= SM 3.0 (abs(x))
107 * ABSNEG >= SM 3.0 (-abs(x))
108 * NOT >= SM 2.0 pedication only
109 */
110 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
124
125 static const char *sm1_mod_str[] =
126 {
127 [NINED3DSPSM_NONE] = "",
128 [NINED3DSPSM_NEG] = "-",
129 [NINED3DSPSM_BIAS] = "bias",
130 [NINED3DSPSM_BIASNEG] = "biasneg",
131 [NINED3DSPSM_SIGN] = "sign",
132 [NINED3DSPSM_SIGNNEG] = "signneg",
133 [NINED3DSPSM_COMP] = "comp",
134 [NINED3DSPSM_X2] = "x2",
135 [NINED3DSPSM_X2NEG] = "x2neg",
136 [NINED3DSPSM_DZ] = "dz",
137 [NINED3DSPSM_DW] = "dw",
138 [NINED3DSPSM_ABS] = "abs",
139 [NINED3DSPSM_ABSNEG] = "-abs",
140 [NINED3DSPSM_NOT] = "not"
141 };
142
143 static void
144 sm1_dump_writemask(BYTE mask)
145 {
146 if (mask & 1) DUMP("x"); else DUMP("_");
147 if (mask & 2) DUMP("y"); else DUMP("_");
148 if (mask & 4) DUMP("z"); else DUMP("_");
149 if (mask & 8) DUMP("w"); else DUMP("_");
150 }
151
152 static void
153 sm1_dump_swizzle(BYTE s)
154 {
155 char c[4] = { 'x', 'y', 'z', 'w' };
156 DUMP("%c%c%c%c",
157 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
158 }
159
160 static const char sm1_file_char[] =
161 {
162 [D3DSPR_TEMP] = 'r',
163 [D3DSPR_INPUT] = 'v',
164 [D3DSPR_CONST] = 'c',
165 [D3DSPR_ADDR] = 'A',
166 [D3DSPR_RASTOUT] = 'R',
167 [D3DSPR_ATTROUT] = 'D',
168 [D3DSPR_OUTPUT] = 'o',
169 [D3DSPR_CONSTINT] = 'I',
170 [D3DSPR_COLOROUT] = 'C',
171 [D3DSPR_DEPTHOUT] = 'D',
172 [D3DSPR_SAMPLER] = 's',
173 [D3DSPR_CONST2] = 'c',
174 [D3DSPR_CONST3] = 'c',
175 [D3DSPR_CONST4] = 'c',
176 [D3DSPR_CONSTBOOL] = 'B',
177 [D3DSPR_LOOP] = 'L',
178 [D3DSPR_TEMPFLOAT16] = 'h',
179 [D3DSPR_MISCTYPE] = 'M',
180 [D3DSPR_LABEL] = 'X',
181 [D3DSPR_PREDICATE] = 'p'
182 };
183
184 static void
185 sm1_dump_reg(BYTE file, INT index)
186 {
187 switch (file) {
188 case D3DSPR_LOOP:
189 DUMP("aL");
190 break;
191 case D3DSPR_COLOROUT:
192 DUMP("oC%i", index);
193 break;
194 case D3DSPR_DEPTHOUT:
195 DUMP("oDepth");
196 break;
197 case D3DSPR_RASTOUT:
198 DUMP("oRast%i", index);
199 break;
200 case D3DSPR_CONSTINT:
201 DUMP("iconst[%i]", index);
202 break;
203 case D3DSPR_CONSTBOOL:
204 DUMP("bconst[%i]", index);
205 break;
206 default:
207 DUMP("%c%i", sm1_file_char[file], index);
208 break;
209 }
210 }
211
212 struct sm1_src_param
213 {
214 INT idx;
215 struct sm1_src_param *rel;
216 BYTE file;
217 BYTE swizzle;
218 BYTE mod;
219 BYTE type;
220 union {
221 DWORD d[4];
222 float f[4];
223 int i[4];
224 BOOL b;
225 } imm;
226 };
227 static void
228 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
229
230 struct sm1_dst_param
231 {
232 INT idx;
233 struct sm1_src_param *rel;
234 BYTE file;
235 BYTE mask;
236 BYTE mod;
237 int8_t shift; /* sint4 */
238 BYTE type;
239 };
240
241 static INLINE void
242 assert_replicate_swizzle(const struct ureg_src *reg)
243 {
244 assert(reg->SwizzleY == reg->SwizzleX &&
245 reg->SwizzleZ == reg->SwizzleX &&
246 reg->SwizzleW == reg->SwizzleX);
247 }
248
249 static void
250 sm1_dump_immediate(const struct sm1_src_param *param)
251 {
252 switch (param->type) {
253 case NINED3DSPTYPE_FLOAT4:
254 DUMP("{ %f %f %f %f }",
255 param->imm.f[0], param->imm.f[1],
256 param->imm.f[2], param->imm.f[3]);
257 break;
258 case NINED3DSPTYPE_INT4:
259 DUMP("{ %i %i %i %i }",
260 param->imm.i[0], param->imm.i[1],
261 param->imm.i[2], param->imm.i[3]);
262 break;
263 case NINED3DSPTYPE_BOOL:
264 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
265 break;
266 default:
267 assert(0);
268 break;
269 }
270 }
271
272 static void
273 sm1_dump_src_param(const struct sm1_src_param *param)
274 {
275 if (param->file == NINED3DSPR_IMMEDIATE) {
276 assert(!param->mod &&
277 !param->rel &&
278 param->swizzle == NINED3DSP_NOSWIZZLE);
279 sm1_dump_immediate(param);
280 return;
281 }
282
283 if (param->mod)
284 DUMP("%s(", sm1_mod_str[param->mod]);
285 if (param->rel) {
286 DUMP("%c[", sm1_file_char[param->file]);
287 sm1_dump_src_param(param->rel);
288 DUMP("+%i]", param->idx);
289 } else {
290 sm1_dump_reg(param->file, param->idx);
291 }
292 if (param->mod)
293 DUMP(")");
294 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
295 DUMP(".");
296 sm1_dump_swizzle(param->swizzle);
297 }
298 }
299
300 static void
301 sm1_dump_dst_param(const struct sm1_dst_param *param)
302 {
303 if (param->mod & NINED3DSPDM_SATURATE)
304 DUMP("sat ");
305 if (param->mod & NINED3DSPDM_PARTIALP)
306 DUMP("pp ");
307 if (param->mod & NINED3DSPDM_CENTROID)
308 DUMP("centroid ");
309 if (param->shift < 0)
310 DUMP("/%u ", 1 << -param->shift);
311 if (param->shift > 0)
312 DUMP("*%u ", 1 << param->shift);
313
314 if (param->rel) {
315 DUMP("%c[", sm1_file_char[param->file]);
316 sm1_dump_src_param(param->rel);
317 DUMP("+%i]", param->idx);
318 } else {
319 sm1_dump_reg(param->file, param->idx);
320 }
321 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
322 DUMP(".");
323 sm1_dump_writemask(param->mask);
324 }
325 }
326
327 struct sm1_semantic
328 {
329 struct sm1_dst_param reg;
330 BYTE sampler_type;
331 D3DDECLUSAGE usage;
332 BYTE usage_idx;
333 };
334
335 struct sm1_op_info
336 {
337 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
338 * should be ignored completely */
339 unsigned sio;
340 unsigned opcode; /* TGSI_OPCODE_x */
341
342 /* versions are still set even handler is set */
343 struct {
344 unsigned min;
345 unsigned max;
346 } vert_version, frag_version;
347
348 /* number of regs parsed outside of special handler */
349 unsigned ndst;
350 unsigned nsrc;
351
352 /* some instructions don't map perfectly, so use a special handler */
353 translate_instruction_func handler;
354 };
355
356 struct sm1_instruction
357 {
358 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
359 BYTE flags;
360 BOOL coissue;
361 BOOL predicated;
362 BYTE ndst;
363 BYTE nsrc;
364 struct sm1_src_param src[4];
365 struct sm1_src_param src_rel[4];
366 struct sm1_src_param pred;
367 struct sm1_src_param dst_rel[1];
368 struct sm1_dst_param dst[1];
369
370 struct sm1_op_info *info;
371 };
372
373 static void
374 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
375 {
376 unsigned i;
377
378 /* no info stored for these: */
379 if (insn->opcode == D3DSIO_DCL)
380 return;
381 for (i = 0; i < indent; ++i)
382 DUMP(" ");
383
384 if (insn->predicated) {
385 DUMP("@");
386 sm1_dump_src_param(&insn->pred);
387 DUMP(" ");
388 }
389 DUMP("%s", d3dsio_to_string(insn->opcode));
390 if (insn->flags) {
391 switch (insn->opcode) {
392 case D3DSIO_TEX:
393 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
394 break;
395 default:
396 DUMP("_%x", insn->flags);
397 break;
398 }
399 }
400 if (insn->coissue)
401 DUMP("_co");
402 DUMP(" ");
403
404 for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
405 sm1_dump_dst_param(&insn->dst[i]);
406 DUMP(" ");
407 }
408
409 for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
410 sm1_dump_src_param(&insn->src[i]);
411 DUMP(" ");
412 }
413 if (insn->opcode == D3DSIO_DEF ||
414 insn->opcode == D3DSIO_DEFI ||
415 insn->opcode == D3DSIO_DEFB)
416 sm1_dump_immediate(&insn->src[0]);
417
418 DUMP("\n");
419 }
420
421 struct sm1_local_const
422 {
423 INT idx;
424 struct ureg_src reg;
425 union {
426 boolean b;
427 float f[4];
428 int32_t i[4];
429 } imm;
430 };
431
432 struct shader_translator
433 {
434 const DWORD *byte_code;
435 const DWORD *parse;
436 const DWORD *parse_next;
437
438 struct ureg_program *ureg;
439
440 /* shader version */
441 struct {
442 BYTE major;
443 BYTE minor;
444 } version;
445 unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
446
447 boolean native_integers;
448 boolean inline_subroutines;
449 boolean lower_preds;
450 boolean want_texcoord;
451 boolean shift_wpos;
452 unsigned texcoord_sn;
453
454 struct sm1_instruction insn; /* current instruction */
455
456 struct {
457 struct ureg_dst *r;
458 struct ureg_dst oPos;
459 struct ureg_dst oFog;
460 struct ureg_dst oPts;
461 struct ureg_dst oCol[4];
462 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
463 struct ureg_dst oDepth;
464 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
465 struct ureg_src vPos;
466 struct ureg_src vFace;
467 struct ureg_src s;
468 struct ureg_dst p;
469 struct ureg_dst address;
470 struct ureg_dst a0;
471 struct ureg_dst tS[8]; /* texture stage registers */
472 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
473 struct ureg_dst t[5]; /* scratch TEMPs */
474 struct ureg_src vC[2]; /* PS color in */
475 struct ureg_src vT[8]; /* PS texcoord in */
476 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
477 } regs;
478 unsigned num_temp; /* Elements(regs.r) */
479 unsigned num_scratch;
480 unsigned loop_depth;
481 unsigned loop_depth_max;
482 unsigned cond_depth;
483 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
484 unsigned cond_labels[NINE_MAX_COND_DEPTH];
485 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
486
487 unsigned *inst_labels; /* LABEL op */
488 unsigned num_inst_labels;
489
490 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
491
492 struct sm1_local_const *lconstf;
493 unsigned num_lconstf;
494 struct sm1_local_const lconsti[NINE_MAX_CONST_I];
495 struct sm1_local_const lconstb[NINE_MAX_CONST_B];
496
497 boolean indirect_const_access;
498
499 struct nine_shader_info *info;
500
501 int16_t op_info_map[D3DSIO_BREAKP + 1];
502 };
503
504 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
505 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
506
507 static void
508 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
509
510 static void
511 sm1_instruction_check(const struct sm1_instruction *insn)
512 {
513 if (insn->opcode == D3DSIO_CRS)
514 {
515 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
516 {
517 DBG("CRS.mask.w\n");
518 }
519 }
520 }
521
522 static boolean
523 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
524 {
525 INT i;
526 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
527 for (i = 0; i < tx->num_lconstf; ++i) {
528 if (tx->lconstf[i].idx == index) {
529 *src = tx->lconstf[i].reg;
530 return TRUE;
531 }
532 }
533 return FALSE;
534 }
535 static boolean
536 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
537 {
538 assert(index >= 0 && index < NINE_MAX_CONST_I);
539 if (tx->lconsti[index].idx == index)
540 *src = tx->lconsti[index].reg;
541 return tx->lconsti[index].idx == index;
542 }
543 static boolean
544 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
545 {
546 assert(index >= 0 && index < NINE_MAX_CONST_B);
547 if (tx->lconstb[index].idx == index)
548 *src = tx->lconstb[index].reg;
549 return tx->lconstb[index].idx == index;
550 }
551
552 static void
553 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
554 {
555 unsigned n;
556
557 /* Anno1404 sets out of range constants. */
558 assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
559 if (index >= NINE_MAX_CONST_F)
560 WARN("lconstf index %i too high, indirect access won't work\n", index);
561
562 for (n = 0; n < tx->num_lconstf; ++n)
563 if (tx->lconstf[n].idx == index)
564 break;
565 if (n == tx->num_lconstf) {
566 if ((n % 8) == 0) {
567 tx->lconstf = REALLOC(tx->lconstf,
568 (n + 0) * sizeof(tx->lconstf[0]),
569 (n + 8) * sizeof(tx->lconstf[0]));
570 assert(tx->lconstf);
571 }
572 tx->num_lconstf++;
573 }
574 tx->lconstf[n].idx = index;
575 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
576
577 memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
578 }
579 static void
580 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
581 {
582 assert(index >= 0 && index < NINE_MAX_CONST_I);
583 tx->lconsti[index].idx = index;
584 tx->lconsti[index].reg = tx->native_integers ?
585 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
586 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
587 }
588 static void
589 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
590 {
591 assert(index >= 0 && index < NINE_MAX_CONST_B);
592 tx->lconstb[index].idx = index;
593 tx->lconstb[index].reg = tx->native_integers ?
594 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
595 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
596 }
597
598 static INLINE struct ureg_dst
599 tx_scratch(struct shader_translator *tx)
600 {
601 assert(tx->num_scratch < Elements(tx->regs.t));
602 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
603 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
604 return tx->regs.t[tx->num_scratch++];
605 }
606
607 static INLINE struct ureg_dst
608 tx_scratch_scalar(struct shader_translator *tx)
609 {
610 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
611 }
612
613 static INLINE struct ureg_src
614 tx_src_scalar(struct ureg_dst dst)
615 {
616 struct ureg_src src = ureg_src(dst);
617 int c = ffs(dst.WriteMask) - 1;
618 if (dst.WriteMask == (1 << c))
619 src = ureg_scalar(src, c);
620 return src;
621 }
622
623 /* Need to declare all constants if indirect addressing is used,
624 * otherwise we could scan the shader to determine the maximum.
625 * TODO: It doesn't really matter for nv50 so I won't do the scan,
626 * but radeon drivers might care, if they don't infer it from TGSI.
627 */
628 static void
629 tx_decl_constants(struct shader_translator *tx)
630 {
631 unsigned i, n = 0;
632
633 for (i = 0; i < NINE_MAX_CONST_F; ++i)
634 ureg_DECL_constant(tx->ureg, n++);
635 for (i = 0; i < NINE_MAX_CONST_I; ++i)
636 ureg_DECL_constant(tx->ureg, n++);
637 for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
638 ureg_DECL_constant(tx->ureg, n++);
639 }
640
641 static INLINE void
642 tx_temp_alloc(struct shader_translator *tx, INT idx)
643 {
644 assert(idx >= 0);
645 if (idx >= tx->num_temp) {
646 unsigned k = tx->num_temp;
647 unsigned n = idx + 1;
648 tx->regs.r = REALLOC(tx->regs.r,
649 k * sizeof(tx->regs.r[0]),
650 n * sizeof(tx->regs.r[0]));
651 for (; k < n; ++k)
652 tx->regs.r[k] = ureg_dst_undef();
653 tx->num_temp = n;
654 }
655 if (ureg_dst_is_undef(tx->regs.r[idx]))
656 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
657 }
658
659 static INLINE void
660 tx_addr_alloc(struct shader_translator *tx, INT idx)
661 {
662 assert(idx == 0);
663 if (ureg_dst_is_undef(tx->regs.address))
664 tx->regs.address = ureg_DECL_address(tx->ureg);
665 if (ureg_dst_is_undef(tx->regs.a0))
666 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
667 }
668
669 static INLINE void
670 tx_pred_alloc(struct shader_translator *tx, INT idx)
671 {
672 assert(idx == 0);
673 if (ureg_dst_is_undef(tx->regs.p))
674 tx->regs.p = ureg_DECL_predicate(tx->ureg);
675 }
676
677 static INLINE void
678 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
679 {
680 assert(IS_PS);
681 assert(idx >= 0 && idx < Elements(tx->regs.vT));
682 if (ureg_src_is_undef(tx->regs.vT[idx]))
683 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
684 TGSI_INTERPOLATE_PERSPECTIVE);
685 }
686
687 static INLINE unsigned *
688 tx_bgnloop(struct shader_translator *tx)
689 {
690 tx->loop_depth++;
691 if (tx->loop_depth_max < tx->loop_depth)
692 tx->loop_depth_max = tx->loop_depth;
693 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
694 return &tx->loop_labels[tx->loop_depth - 1];
695 }
696
697 static INLINE unsigned *
698 tx_endloop(struct shader_translator *tx)
699 {
700 assert(tx->loop_depth);
701 tx->loop_depth--;
702 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
703 ureg_get_instruction_number(tx->ureg));
704 return &tx->loop_labels[tx->loop_depth];
705 }
706
707 static struct ureg_dst
708 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
709 {
710 const unsigned l = tx->loop_depth - 1;
711
712 if (!tx->loop_depth)
713 {
714 DBG("loop counter requested outside of loop\n");
715 return ureg_dst_undef();
716 }
717
718 if (ureg_dst_is_undef(tx->regs.rL[l])) {
719 /* loop or rep ctr creation */
720 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
721 tx->loop_or_rep[l] = loop_or_rep;
722 }
723 /* loop - rep - endloop - endrep not allowed */
724 assert(tx->loop_or_rep[l] == loop_or_rep);
725
726 return tx->regs.rL[l];
727 }
728
729 static struct ureg_src
730 tx_get_loopal(struct shader_translator *tx)
731 {
732 int loop_level = tx->loop_depth - 1;
733
734 while (loop_level >= 0) {
735 /* handle loop - rep - endrep - endloop case */
736 if (tx->loop_or_rep[loop_level])
737 /* the value is in the loop counter y component (nine implementation) */
738 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
739 loop_level--;
740 }
741
742 DBG("aL counter requested outside of loop\n");
743 return ureg_src_undef();
744 }
745
746 static INLINE unsigned *
747 tx_cond(struct shader_translator *tx)
748 {
749 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
750 tx->cond_depth++;
751 return &tx->cond_labels[tx->cond_depth - 1];
752 }
753
754 static INLINE unsigned *
755 tx_elsecond(struct shader_translator *tx)
756 {
757 assert(tx->cond_depth);
758 return &tx->cond_labels[tx->cond_depth - 1];
759 }
760
761 static INLINE void
762 tx_endcond(struct shader_translator *tx)
763 {
764 assert(tx->cond_depth);
765 tx->cond_depth--;
766 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
767 ureg_get_instruction_number(tx->ureg));
768 }
769
770 static INLINE struct ureg_dst
771 nine_ureg_dst_register(unsigned file, int index)
772 {
773 return ureg_dst(ureg_src_register(file, index));
774 }
775
776 static struct ureg_src
777 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
778 {
779 struct ureg_program *ureg = tx->ureg;
780 struct ureg_src src;
781 struct ureg_dst tmp;
782
783 switch (param->file)
784 {
785 case D3DSPR_TEMP:
786 assert(!param->rel);
787 tx_temp_alloc(tx, param->idx);
788 src = ureg_src(tx->regs.r[param->idx]);
789 break;
790 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
791 case D3DSPR_ADDR:
792 assert(!param->rel);
793 if (IS_VS) {
794 assert(param->idx == 0);
795 /* the address register (vs only) must be
796 * assigned before use */
797 assert(!ureg_dst_is_undef(tx->regs.a0));
798 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
799 src = ureg_src(tx->regs.address);
800 } else {
801 if (tx->version.major < 2 && tx->version.minor < 4) {
802 /* no subroutines, so should be defined */
803 src = ureg_src(tx->regs.tS[param->idx]);
804 } else {
805 tx_texcoord_alloc(tx, param->idx);
806 src = tx->regs.vT[param->idx];
807 }
808 }
809 break;
810 case D3DSPR_INPUT:
811 if (IS_VS) {
812 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
813 } else {
814 if (tx->version.major < 3) {
815 assert(!param->rel);
816 src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
817 param->idx,
818 TGSI_INTERPOLATE_PERSPECTIVE);
819 } else {
820 assert(!param->rel); /* TODO */
821 assert(param->idx < Elements(tx->regs.v));
822 src = tx->regs.v[param->idx];
823 }
824 }
825 break;
826 case D3DSPR_PREDICATE:
827 assert(!param->rel);
828 tx_pred_alloc(tx, param->idx);
829 src = ureg_src(tx->regs.p);
830 break;
831 case D3DSPR_SAMPLER:
832 assert(param->mod == NINED3DSPSM_NONE);
833 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
834 assert(!param->rel);
835 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
836 break;
837 case D3DSPR_CONST:
838 assert(!param->rel || IS_VS);
839 if (param->rel)
840 tx->indirect_const_access = TRUE;
841 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
842 if (!param->rel)
843 nine_info_mark_const_f_used(tx->info, param->idx);
844 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
845 }
846 if (!IS_VS && tx->version.major < 2) {
847 /* ps 1.X clamps constants */
848 tmp = tx_scratch(tx);
849 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
850 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
851 src = ureg_src(tmp);
852 }
853 break;
854 case D3DSPR_CONST2:
855 case D3DSPR_CONST3:
856 case D3DSPR_CONST4:
857 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
858 assert(!"CONST2/3/4");
859 src = ureg_imm1f(ureg, 0.0f);
860 break;
861 case D3DSPR_CONSTINT:
862 /* relative adressing only possible for float constants in vs */
863 assert(!param->rel);
864 if (!tx_lconsti(tx, &src, param->idx)) {
865 nine_info_mark_const_i_used(tx->info, param->idx);
866 src = ureg_src_register(TGSI_FILE_CONSTANT,
867 tx->info->const_i_base + param->idx);
868 }
869 break;
870 case D3DSPR_CONSTBOOL:
871 assert(!param->rel);
872 if (!tx_lconstb(tx, &src, param->idx)) {
873 char r = param->idx / 4;
874 char s = param->idx & 3;
875 nine_info_mark_const_b_used(tx->info, param->idx);
876 src = ureg_src_register(TGSI_FILE_CONSTANT,
877 tx->info->const_b_base + r);
878 src = ureg_swizzle(src, s, s, s, s);
879 }
880 break;
881 case D3DSPR_LOOP:
882 if (ureg_dst_is_undef(tx->regs.address))
883 tx->regs.address = ureg_DECL_address(ureg);
884 if (!tx->native_integers)
885 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
886 else
887 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
888 src = ureg_src(tx->regs.address);
889 break;
890 case D3DSPR_MISCTYPE:
891 switch (param->idx) {
892 case D3DSMO_POSITION:
893 if (ureg_src_is_undef(tx->regs.vPos))
894 tx->regs.vPos = ureg_DECL_fs_input(ureg,
895 TGSI_SEMANTIC_POSITION, 0,
896 TGSI_INTERPOLATE_LINEAR);
897 if (tx->shift_wpos) {
898 /* TODO: do this only once */
899 struct ureg_dst wpos = tx_scratch(tx);
900 ureg_SUB(ureg, wpos, tx->regs.vPos,
901 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
902 src = ureg_src(wpos);
903 } else {
904 src = tx->regs.vPos;
905 }
906 break;
907 case D3DSMO_FACE:
908 if (ureg_src_is_undef(tx->regs.vFace)) {
909 tx->regs.vFace = ureg_DECL_fs_input(ureg,
910 TGSI_SEMANTIC_FACE, 0,
911 TGSI_INTERPOLATE_CONSTANT);
912 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
913 }
914 src = tx->regs.vFace;
915 break;
916 default:
917 assert(!"invalid src D3DSMO");
918 break;
919 }
920 assert(!param->rel);
921 break;
922 case D3DSPR_TEMPFLOAT16:
923 break;
924 default:
925 assert(!"invalid src D3DSPR");
926 }
927 if (param->rel)
928 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
929
930 switch (param->mod) {
931 case NINED3DSPSM_DW:
932 tmp = tx_scratch(tx);
933 /* NOTE: app is not allowed to read w with this modifier */
934 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
935 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
936 src = ureg_src(tmp);
937 break;
938 case NINED3DSPSM_DZ:
939 tmp = tx_scratch(tx);
940 /* NOTE: app is not allowed to read z with this modifier */
941 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
942 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
943 src = ureg_src(tmp);
944 break;
945 default:
946 break;
947 }
948
949 if (param->swizzle != NINED3DSP_NOSWIZZLE)
950 src = ureg_swizzle(src,
951 (param->swizzle >> 0) & 0x3,
952 (param->swizzle >> 2) & 0x3,
953 (param->swizzle >> 4) & 0x3,
954 (param->swizzle >> 6) & 0x3);
955
956 switch (param->mod) {
957 case NINED3DSPSM_ABS:
958 src = ureg_abs(src);
959 break;
960 case NINED3DSPSM_ABSNEG:
961 src = ureg_negate(ureg_abs(src));
962 break;
963 case NINED3DSPSM_NEG:
964 src = ureg_negate(src);
965 break;
966 case NINED3DSPSM_BIAS:
967 tmp = tx_scratch(tx);
968 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
969 src = ureg_src(tmp);
970 break;
971 case NINED3DSPSM_BIASNEG:
972 tmp = tx_scratch(tx);
973 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
974 src = ureg_src(tmp);
975 break;
976 case NINED3DSPSM_NOT:
977 if (tx->native_integers) {
978 tmp = tx_scratch(tx);
979 ureg_NOT(ureg, tmp, src);
980 src = ureg_src(tmp);
981 break;
982 }
983 /* fall through */
984 case NINED3DSPSM_COMP:
985 tmp = tx_scratch(tx);
986 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
987 src = ureg_src(tmp);
988 break;
989 case NINED3DSPSM_DZ:
990 case NINED3DSPSM_DW:
991 /* Already handled*/
992 break;
993 case NINED3DSPSM_SIGN:
994 tmp = tx_scratch(tx);
995 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
996 src = ureg_src(tmp);
997 break;
998 case NINED3DSPSM_SIGNNEG:
999 tmp = tx_scratch(tx);
1000 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1001 src = ureg_src(tmp);
1002 break;
1003 case NINED3DSPSM_X2:
1004 tmp = tx_scratch(tx);
1005 ureg_ADD(ureg, tmp, src, src);
1006 src = ureg_src(tmp);
1007 break;
1008 case NINED3DSPSM_X2NEG:
1009 tmp = tx_scratch(tx);
1010 ureg_ADD(ureg, tmp, src, src);
1011 src = ureg_negate(ureg_src(tmp));
1012 break;
1013 default:
1014 assert(param->mod == NINED3DSPSM_NONE);
1015 break;
1016 }
1017
1018 return src;
1019 }
1020
1021 static struct ureg_dst
1022 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1023 {
1024 struct ureg_dst dst;
1025
1026 switch (param->file)
1027 {
1028 case D3DSPR_TEMP:
1029 assert(!param->rel);
1030 tx_temp_alloc(tx, param->idx);
1031 dst = tx->regs.r[param->idx];
1032 break;
1033 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1034 case D3DSPR_ADDR:
1035 assert(!param->rel);
1036 if (tx->version.major < 2 && !IS_VS) {
1037 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1038 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1039 dst = tx->regs.tS[param->idx];
1040 } else
1041 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1042 tx_texcoord_alloc(tx, param->idx);
1043 dst = ureg_dst(tx->regs.vT[param->idx]);
1044 } else {
1045 tx_addr_alloc(tx, param->idx);
1046 dst = tx->regs.a0;
1047 }
1048 break;
1049 case D3DSPR_RASTOUT:
1050 assert(!param->rel);
1051 switch (param->idx) {
1052 case 0:
1053 if (ureg_dst_is_undef(tx->regs.oPos))
1054 tx->regs.oPos =
1055 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1056 dst = tx->regs.oPos;
1057 break;
1058 case 1:
1059 if (ureg_dst_is_undef(tx->regs.oFog))
1060 tx->regs.oFog =
1061 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1062 dst = tx->regs.oFog;
1063 break;
1064 case 2:
1065 if (ureg_dst_is_undef(tx->regs.oPts))
1066 tx->regs.oPts =
1067 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
1068 dst = tx->regs.oPts;
1069 break;
1070 default:
1071 assert(0);
1072 break;
1073 }
1074 break;
1075 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1076 case D3DSPR_OUTPUT:
1077 if (tx->version.major < 3) {
1078 assert(!param->rel);
1079 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1080 } else {
1081 assert(!param->rel); /* TODO */
1082 assert(param->idx < Elements(tx->regs.o));
1083 dst = tx->regs.o[param->idx];
1084 }
1085 break;
1086 case D3DSPR_ATTROUT: /* VS */
1087 case D3DSPR_COLOROUT: /* PS */
1088 assert(param->idx >= 0 && param->idx < 4);
1089 assert(!param->rel);
1090 tx->info->rt_mask |= 1 << param->idx;
1091 if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
1092 tx->regs.oCol[param->idx] =
1093 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1094 dst = tx->regs.oCol[param->idx];
1095 if (IS_VS && tx->version.major < 3)
1096 dst = ureg_saturate(dst);
1097 break;
1098 case D3DSPR_DEPTHOUT:
1099 assert(!param->rel);
1100 if (ureg_dst_is_undef(tx->regs.oDepth))
1101 tx->regs.oDepth =
1102 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1103 TGSI_WRITEMASK_Z);
1104 dst = tx->regs.oDepth; /* XXX: must write .z component */
1105 break;
1106 case D3DSPR_PREDICATE:
1107 assert(!param->rel);
1108 tx_pred_alloc(tx, param->idx);
1109 dst = tx->regs.p;
1110 break;
1111 case D3DSPR_TEMPFLOAT16:
1112 DBG("unhandled D3DSPR: %u\n", param->file);
1113 break;
1114 default:
1115 assert(!"invalid dst D3DSPR");
1116 break;
1117 }
1118 if (param->rel)
1119 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1120
1121 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1122 dst = ureg_writemask(dst, param->mask);
1123 if (param->mod & NINED3DSPDM_SATURATE)
1124 dst = ureg_saturate(dst);
1125
1126 return dst;
1127 }
1128
1129 static struct ureg_dst
1130 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1131 {
1132 if (param->shift) {
1133 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1134 return tx->regs.tdst;
1135 }
1136 return _tx_dst_param(tx, param);
1137 }
1138
1139 static void
1140 tx_apply_dst0_modifiers(struct shader_translator *tx)
1141 {
1142 struct ureg_dst rdst;
1143 float f;
1144
1145 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1146 return;
1147 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1148
1149 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1150
1151 if (tx->insn.dst[0].shift < 0)
1152 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1153 else
1154 f = 1 << tx->insn.dst[0].shift;
1155
1156 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1157 }
1158
1159 static struct ureg_src
1160 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1161 {
1162 struct ureg_src src;
1163
1164 assert(!param->shift);
1165 assert(!(param->mod & NINED3DSPDM_SATURATE));
1166
1167 switch (param->file) {
1168 case D3DSPR_INPUT:
1169 if (IS_VS) {
1170 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1171 } else {
1172 assert(!param->rel);
1173 assert(param->idx < Elements(tx->regs.v));
1174 src = tx->regs.v[param->idx];
1175 }
1176 break;
1177 default:
1178 src = ureg_src(tx_dst_param(tx, param));
1179 break;
1180 }
1181 if (param->rel)
1182 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1183
1184 if (!param->mask)
1185 WARN("mask is 0, using identity swizzle\n");
1186
1187 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1188 char s[4];
1189 int n;
1190 int c;
1191 for (n = 0, c = 0; c < 4; ++c)
1192 if (param->mask & (1 << c))
1193 s[n++] = c;
1194 assert(n);
1195 for (c = n; c < 4; ++c)
1196 s[c] = s[n - 1];
1197 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1198 }
1199 return src;
1200 }
1201
1202 static HRESULT
1203 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1204 {
1205 struct ureg_program *ureg = tx->ureg;
1206 struct ureg_dst dst;
1207 struct ureg_src src[2];
1208 struct sm1_src_param *src_mat = &tx->insn.src[1];
1209 unsigned i;
1210
1211 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1212 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1213
1214 for (i = 0; i < n; i++)
1215 {
1216 const unsigned m = (1 << i);
1217
1218 src[1] = tx_src_param(tx, src_mat);
1219 src_mat->idx++;
1220
1221 if (!(dst.WriteMask & m))
1222 continue;
1223
1224 /* XXX: src == dst case ? */
1225
1226 switch (k) {
1227 case 3:
1228 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1229 break;
1230 case 4:
1231 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1232 break;
1233 default:
1234 DBG("invalid operation: M%ux%u\n", m, n);
1235 break;
1236 }
1237 }
1238
1239 return D3D_OK;
1240 }
1241
1242 #define VNOTSUPPORTED 0, 0
1243 #define V(maj, min) (((maj) << 8) | (min))
1244
1245 static INLINE const char *
1246 d3dsio_to_string( unsigned opcode )
1247 {
1248 static const char *names[] = {
1249 "NOP",
1250 "MOV",
1251 "ADD",
1252 "SUB",
1253 "MAD",
1254 "MUL",
1255 "RCP",
1256 "RSQ",
1257 "DP3",
1258 "DP4",
1259 "MIN",
1260 "MAX",
1261 "SLT",
1262 "SGE",
1263 "EXP",
1264 "LOG",
1265 "LIT",
1266 "DST",
1267 "LRP",
1268 "FRC",
1269 "M4x4",
1270 "M4x3",
1271 "M3x4",
1272 "M3x3",
1273 "M3x2",
1274 "CALL",
1275 "CALLNZ",
1276 "LOOP",
1277 "RET",
1278 "ENDLOOP",
1279 "LABEL",
1280 "DCL",
1281 "POW",
1282 "CRS",
1283 "SGN",
1284 "ABS",
1285 "NRM",
1286 "SINCOS",
1287 "REP",
1288 "ENDREP",
1289 "IF",
1290 "IFC",
1291 "ELSE",
1292 "ENDIF",
1293 "BREAK",
1294 "BREAKC",
1295 "MOVA",
1296 "DEFB",
1297 "DEFI",
1298 NULL,
1299 NULL,
1300 NULL,
1301 NULL,
1302 NULL,
1303 NULL,
1304 NULL,
1305 NULL,
1306 NULL,
1307 NULL,
1308 NULL,
1309 NULL,
1310 NULL,
1311 NULL,
1312 NULL,
1313 "TEXCOORD",
1314 "TEXKILL",
1315 "TEX",
1316 "TEXBEM",
1317 "TEXBEML",
1318 "TEXREG2AR",
1319 "TEXREG2GB",
1320 "TEXM3x2PAD",
1321 "TEXM3x2TEX",
1322 "TEXM3x3PAD",
1323 "TEXM3x3TEX",
1324 NULL,
1325 "TEXM3x3SPEC",
1326 "TEXM3x3VSPEC",
1327 "EXPP",
1328 "LOGP",
1329 "CND",
1330 "DEF",
1331 "TEXREG2RGB",
1332 "TEXDP3TEX",
1333 "TEXM3x2DEPTH",
1334 "TEXDP3",
1335 "TEXM3x3",
1336 "TEXDEPTH",
1337 "CMP",
1338 "BEM",
1339 "DP2ADD",
1340 "DSX",
1341 "DSY",
1342 "TEXLDD",
1343 "SETP",
1344 "TEXLDL",
1345 "BREAKP"
1346 };
1347
1348 if (opcode < Elements(names)) return names[opcode];
1349
1350 switch (opcode) {
1351 case D3DSIO_PHASE: return "PHASE";
1352 case D3DSIO_COMMENT: return "COMMENT";
1353 case D3DSIO_END: return "END";
1354 default:
1355 return NULL;
1356 }
1357 }
1358
1359 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1360 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1361 (inst).vert_version.max | \
1362 (inst).frag_version.min | \
1363 (inst).frag_version.max)
1364
1365 #define SPECIAL(name) \
1366 NineTranslateInstruction_##name
1367
1368 #define DECL_SPECIAL(name) \
1369 static HRESULT \
1370 NineTranslateInstruction_##name( struct shader_translator *tx )
1371
1372 static HRESULT
1373 NineTranslateInstruction_Generic(struct shader_translator *);
1374
1375 DECL_SPECIAL(M4x4)
1376 {
1377 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1378 }
1379
1380 DECL_SPECIAL(M4x3)
1381 {
1382 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1383 }
1384
1385 DECL_SPECIAL(M3x4)
1386 {
1387 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1388 }
1389
1390 DECL_SPECIAL(M3x3)
1391 {
1392 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1393 }
1394
1395 DECL_SPECIAL(M3x2)
1396 {
1397 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1398 }
1399
1400 DECL_SPECIAL(CMP)
1401 {
1402 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1403 tx_src_param(tx, &tx->insn.src[0]),
1404 tx_src_param(tx, &tx->insn.src[2]),
1405 tx_src_param(tx, &tx->insn.src[1]));
1406 return D3D_OK;
1407 }
1408
1409 DECL_SPECIAL(CND)
1410 {
1411 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1412 struct ureg_dst cgt;
1413 struct ureg_src cnd;
1414
1415 /* the coissue flag was a tip for compilers to advise to
1416 * execute two operations at the same time, in cases
1417 * the two executions had same dst with different channels.
1418 * It has no effect on current hw. However it seems CND
1419 * is affected. The handling of this very specific case
1420 * handled below mimick wine behaviour */
1421 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1422 ureg_MOV(tx->ureg,
1423 dst, tx_src_param(tx, &tx->insn.src[1]));
1424 return D3D_OK;
1425 }
1426
1427 cnd = tx_src_param(tx, &tx->insn.src[0]);
1428 cgt = tx_scratch(tx);
1429
1430 if (tx->version.major == 1 && tx->version.minor < 4)
1431 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1432
1433 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1434
1435 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1436 tx_src_param(tx, &tx->insn.src[1]),
1437 tx_src_param(tx, &tx->insn.src[2]));
1438 return D3D_OK;
1439 }
1440
1441 DECL_SPECIAL(CALL)
1442 {
1443 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1444 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1445 return D3D_OK;
1446 }
1447
1448 DECL_SPECIAL(CALLNZ)
1449 {
1450 struct ureg_program *ureg = tx->ureg;
1451 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1452
1453 if (!tx->native_integers)
1454 ureg_IF(ureg, src, tx_cond(tx));
1455 else
1456 ureg_UIF(ureg, src, tx_cond(tx));
1457 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1458 tx_endcond(tx);
1459 ureg_ENDIF(ureg);
1460 return D3D_OK;
1461 }
1462
1463 DECL_SPECIAL(MOV_vs1x)
1464 {
1465 if (tx->insn.dst[0].file == D3DSPR_ADDR) {
1466 /* Implementation note: We don't write directly
1467 * to the addr register, but to an intermediate
1468 * float register.
1469 * Contrary to the doc, when writing to ADDR here,
1470 * the rounding is not to nearest, but to lowest
1471 * (wine test).
1472 * Since we use ARR next, substract 0.5. */
1473 ureg_SUB(tx->ureg,
1474 tx_dst_param(tx, &tx->insn.dst[0]),
1475 tx_src_param(tx, &tx->insn.src[0]),
1476 ureg_imm1f(tx->ureg, 0.5f));
1477 return D3D_OK;
1478 }
1479 return NineTranslateInstruction_Generic(tx);
1480 }
1481
1482 DECL_SPECIAL(LOOP)
1483 {
1484 struct ureg_program *ureg = tx->ureg;
1485 unsigned *label;
1486 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1487 struct ureg_dst ctr;
1488 struct ureg_dst tmp;
1489 struct ureg_src ctrx;
1490
1491 label = tx_bgnloop(tx);
1492 ctr = tx_get_loopctr(tx, TRUE);
1493 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1494
1495 /* src: num_iterations - start_value of al - step for al - 0 */
1496 ureg_MOV(ureg, ctr, src);
1497 ureg_BGNLOOP(tx->ureg, label);
1498 tmp = tx_scratch_scalar(tx);
1499 /* Initially ctr.x contains the number of iterations.
1500 * ctr.y will contain the updated value of al.
1501 * We decrease ctr.x at the end of every iteration,
1502 * and stop when it reaches 0. */
1503
1504 if (!tx->native_integers) {
1505 /* case src and ctr contain floats */
1506 /* to avoid precision issue, we stop when ctr <= 0.5 */
1507 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1508 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1509 } else {
1510 /* case src and ctr contain integers */
1511 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1512 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1513 }
1514 ureg_BRK(ureg);
1515 tx_endcond(tx);
1516 ureg_ENDIF(ureg);
1517 return D3D_OK;
1518 }
1519
1520 DECL_SPECIAL(RET)
1521 {
1522 ureg_RET(tx->ureg);
1523 return D3D_OK;
1524 }
1525
1526 DECL_SPECIAL(ENDLOOP)
1527 {
1528 struct ureg_program *ureg = tx->ureg;
1529 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1530 struct ureg_dst dst_ctrx, dst_al;
1531 struct ureg_src src_ctr, al_counter;
1532
1533 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1534 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1535 src_ctr = ureg_src(ctr);
1536 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1537
1538 /* ctr.x -= 1
1539 * ctr.y (aL) += step */
1540 if (!tx->native_integers) {
1541 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1542 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1543 } else {
1544 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1545 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1546 }
1547 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1548 return D3D_OK;
1549 }
1550
1551 DECL_SPECIAL(LABEL)
1552 {
1553 unsigned k = tx->num_inst_labels;
1554 unsigned n = tx->insn.src[0].idx;
1555 assert(n < 2048);
1556 if (n >= k)
1557 tx->inst_labels = REALLOC(tx->inst_labels,
1558 k * sizeof(tx->inst_labels[0]),
1559 n * sizeof(tx->inst_labels[0]));
1560
1561 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1562 return D3D_OK;
1563 }
1564
1565 DECL_SPECIAL(SINCOS)
1566 {
1567 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1568 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1569
1570 assert(!(dst.WriteMask & 0xc));
1571
1572 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1573 ureg_SCS(tx->ureg, dst, src);
1574 return D3D_OK;
1575 }
1576
1577 DECL_SPECIAL(SGN)
1578 {
1579 ureg_SSG(tx->ureg,
1580 tx_dst_param(tx, &tx->insn.dst[0]),
1581 tx_src_param(tx, &tx->insn.src[0]));
1582 return D3D_OK;
1583 }
1584
1585 DECL_SPECIAL(REP)
1586 {
1587 struct ureg_program *ureg = tx->ureg;
1588 unsigned *label;
1589 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1590 struct ureg_dst ctr;
1591 struct ureg_dst tmp;
1592 struct ureg_src ctrx;
1593
1594 label = tx_bgnloop(tx);
1595 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1596 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1597
1598 /* NOTE: rep must be constant, so we don't have to save the count */
1599 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1600
1601 /* rep: num_iterations - 0 - 0 - 0 */
1602 ureg_MOV(ureg, ctr, rep);
1603 ureg_BGNLOOP(ureg, label);
1604 tmp = tx_scratch_scalar(tx);
1605 /* Initially ctr.x contains the number of iterations.
1606 * We decrease ctr.x at the end of every iteration,
1607 * and stop when it reaches 0. */
1608
1609 if (!tx->native_integers) {
1610 /* case src and ctr contain floats */
1611 /* to avoid precision issue, we stop when ctr <= 0.5 */
1612 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1613 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1614 } else {
1615 /* case src and ctr contain integers */
1616 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1617 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1618 }
1619 ureg_BRK(ureg);
1620 tx_endcond(tx);
1621 ureg_ENDIF(ureg);
1622
1623 return D3D_OK;
1624 }
1625
1626 DECL_SPECIAL(ENDREP)
1627 {
1628 struct ureg_program *ureg = tx->ureg;
1629 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1630 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1631 struct ureg_src src_ctr = ureg_src(ctr);
1632
1633 /* ctr.x -= 1 */
1634 if (!tx->native_integers)
1635 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1636 else
1637 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1638
1639 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1640 return D3D_OK;
1641 }
1642
1643 DECL_SPECIAL(ENDIF)
1644 {
1645 tx_endcond(tx);
1646 ureg_ENDIF(tx->ureg);
1647 return D3D_OK;
1648 }
1649
1650 DECL_SPECIAL(IF)
1651 {
1652 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1653
1654 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1655 ureg_UIF(tx->ureg, src, tx_cond(tx));
1656 else
1657 ureg_IF(tx->ureg, src, tx_cond(tx));
1658
1659 return D3D_OK;
1660 }
1661
1662 static INLINE unsigned
1663 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1664 {
1665 switch (flags) {
1666 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1667 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1668 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1669 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1670 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1671 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1672 default:
1673 assert(!"invalid comparison flags");
1674 return TGSI_OPCODE_SGT;
1675 }
1676 }
1677
1678 DECL_SPECIAL(IFC)
1679 {
1680 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1681 struct ureg_src src[2];
1682 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1683 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1684 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1685 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1686 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1687 return D3D_OK;
1688 }
1689
1690 DECL_SPECIAL(ELSE)
1691 {
1692 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1693 return D3D_OK;
1694 }
1695
1696 DECL_SPECIAL(BREAKC)
1697 {
1698 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1699 struct ureg_src src[2];
1700 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1701 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1702 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1703 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1704 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1705 ureg_BRK(tx->ureg);
1706 tx_endcond(tx);
1707 ureg_ENDIF(tx->ureg);
1708 return D3D_OK;
1709 }
1710
1711 static const char *sm1_declusage_names[] =
1712 {
1713 [D3DDECLUSAGE_POSITION] = "POSITION",
1714 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1715 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1716 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1717 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1718 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1719 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1720 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1721 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1722 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1723 [D3DDECLUSAGE_COLOR] = "COLOR",
1724 [D3DDECLUSAGE_FOG] = "FOG",
1725 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1726 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1727 };
1728
1729 static INLINE unsigned
1730 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1731 {
1732 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1733 }
1734
1735 static void
1736 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1737 boolean tc,
1738 struct sm1_semantic *dcl)
1739 {
1740 BYTE index = dcl->usage_idx;
1741
1742 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1743 * we match to a TGSI_SEMANTIC_GENERIC with index.
1744 *
1745 * The index can be anything UINT16 and usage_idx is BYTE,
1746 * so we can fit everything. It doesn't matter if indices
1747 * are close together or low.
1748 *
1749 *
1750 * POSITION >= 1: 10 * index + 6
1751 * COLOR >= 2: 10 * (index-1) + 7
1752 * TEXCOORD[0..15]: index
1753 * BLENDWEIGHT: 10 * index + 18
1754 * BLENDINDICES: 10 * index + 19
1755 * NORMAL: 10 * index + 20
1756 * TANGENT: 10 * index + 21
1757 * BINORMAL: 10 * index + 22
1758 * TESSFACTOR: 10 * index + 23
1759 */
1760
1761 switch (dcl->usage) {
1762 case D3DDECLUSAGE_POSITION:
1763 case D3DDECLUSAGE_POSITIONT:
1764 case D3DDECLUSAGE_DEPTH:
1765 if (index == 0) {
1766 sem->Name = TGSI_SEMANTIC_POSITION;
1767 sem->Index = 0;
1768 } else {
1769 sem->Name = TGSI_SEMANTIC_GENERIC;
1770 sem->Index = 10 * index + 6;
1771 }
1772 break;
1773 case D3DDECLUSAGE_COLOR:
1774 if (index < 2) {
1775 sem->Name = TGSI_SEMANTIC_COLOR;
1776 sem->Index = index;
1777 } else {
1778 sem->Name = TGSI_SEMANTIC_GENERIC;
1779 sem->Index = 10 * (index-1) + 7;
1780 }
1781 break;
1782 case D3DDECLUSAGE_FOG:
1783 assert(index == 0);
1784 sem->Name = TGSI_SEMANTIC_FOG;
1785 sem->Index = 0;
1786 break;
1787 case D3DDECLUSAGE_PSIZE:
1788 assert(index == 0);
1789 sem->Name = TGSI_SEMANTIC_PSIZE;
1790 sem->Index = 0;
1791 break;
1792 case D3DDECLUSAGE_TEXCOORD:
1793 assert(index < 16);
1794 if (index < 8 && tc)
1795 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1796 else
1797 sem->Name = TGSI_SEMANTIC_GENERIC;
1798 sem->Index = index;
1799 break;
1800 case D3DDECLUSAGE_BLENDWEIGHT:
1801 sem->Name = TGSI_SEMANTIC_GENERIC;
1802 sem->Index = 10 * index + 18;
1803 break;
1804 case D3DDECLUSAGE_BLENDINDICES:
1805 sem->Name = TGSI_SEMANTIC_GENERIC;
1806 sem->Index = 10 * index + 19;
1807 break;
1808 case D3DDECLUSAGE_NORMAL:
1809 sem->Name = TGSI_SEMANTIC_GENERIC;
1810 sem->Index = 10 * index + 20;
1811 break;
1812 case D3DDECLUSAGE_TANGENT:
1813 sem->Name = TGSI_SEMANTIC_GENERIC;
1814 sem->Index = 10 * index + 21;
1815 break;
1816 case D3DDECLUSAGE_BINORMAL:
1817 sem->Name = TGSI_SEMANTIC_GENERIC;
1818 sem->Index = 10 * index + 22;
1819 break;
1820 case D3DDECLUSAGE_TESSFACTOR:
1821 sem->Name = TGSI_SEMANTIC_GENERIC;
1822 sem->Index = 10 * index + 23;
1823 break;
1824 case D3DDECLUSAGE_SAMPLE:
1825 sem->Name = TGSI_SEMANTIC_COUNT;
1826 sem->Index = 0;
1827 break;
1828 default:
1829 assert(!"Invalid DECLUSAGE.");
1830 break;
1831 }
1832 }
1833
1834 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1835 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1836 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1837 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1838 static INLINE unsigned
1839 d3dstt_to_tgsi_tex(BYTE sampler_type)
1840 {
1841 switch (sampler_type) {
1842 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
1843 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
1844 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
1845 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
1846 default:
1847 assert(0);
1848 return TGSI_TEXTURE_UNKNOWN;
1849 }
1850 }
1851 static INLINE unsigned
1852 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
1853 {
1854 switch (sampler_type) {
1855 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
1856 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
1857 case NINED3DSTT_VOLUME:
1858 case NINED3DSTT_CUBE:
1859 default:
1860 assert(0);
1861 return TGSI_TEXTURE_UNKNOWN;
1862 }
1863 }
1864 static INLINE unsigned
1865 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
1866 {
1867 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
1868 case 1: return TGSI_TEXTURE_1D;
1869 case 0: return TGSI_TEXTURE_2D;
1870 case 3: return TGSI_TEXTURE_3D;
1871 default:
1872 return TGSI_TEXTURE_CUBE;
1873 }
1874 }
1875
1876 static const char *
1877 sm1_sampler_type_name(BYTE sampler_type)
1878 {
1879 switch (sampler_type) {
1880 case NINED3DSTT_1D: return "1D";
1881 case NINED3DSTT_2D: return "2D";
1882 case NINED3DSTT_VOLUME: return "VOLUME";
1883 case NINED3DSTT_CUBE: return "CUBE";
1884 default:
1885 return "(D3DSTT_?)";
1886 }
1887 }
1888
1889 static INLINE unsigned
1890 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
1891 {
1892 switch (sem->Name) {
1893 case TGSI_SEMANTIC_POSITION:
1894 case TGSI_SEMANTIC_NORMAL:
1895 return TGSI_INTERPOLATE_LINEAR;
1896 case TGSI_SEMANTIC_BCOLOR:
1897 case TGSI_SEMANTIC_COLOR:
1898 case TGSI_SEMANTIC_FOG:
1899 case TGSI_SEMANTIC_GENERIC:
1900 case TGSI_SEMANTIC_TEXCOORD:
1901 case TGSI_SEMANTIC_CLIPDIST:
1902 case TGSI_SEMANTIC_CLIPVERTEX:
1903 return TGSI_INTERPOLATE_PERSPECTIVE;
1904 case TGSI_SEMANTIC_EDGEFLAG:
1905 case TGSI_SEMANTIC_FACE:
1906 case TGSI_SEMANTIC_INSTANCEID:
1907 case TGSI_SEMANTIC_PCOORD:
1908 case TGSI_SEMANTIC_PRIMID:
1909 case TGSI_SEMANTIC_PSIZE:
1910 case TGSI_SEMANTIC_VERTEXID:
1911 return TGSI_INTERPOLATE_CONSTANT;
1912 default:
1913 assert(0);
1914 return TGSI_INTERPOLATE_CONSTANT;
1915 }
1916 }
1917
1918 DECL_SPECIAL(DCL)
1919 {
1920 struct ureg_program *ureg = tx->ureg;
1921 boolean is_input;
1922 boolean is_sampler;
1923 struct tgsi_declaration_semantic tgsi;
1924 struct sm1_semantic sem;
1925 sm1_read_semantic(tx, &sem);
1926
1927 is_input = sem.reg.file == D3DSPR_INPUT;
1928 is_sampler =
1929 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
1930
1931 DUMP("DCL ");
1932 sm1_dump_dst_param(&sem.reg);
1933 if (is_sampler)
1934 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
1935 else
1936 if (tx->version.major >= 3)
1937 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
1938 else
1939 if (sem.usage | sem.usage_idx)
1940 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
1941 else
1942 DUMP("\n");
1943
1944 if (is_sampler) {
1945 const unsigned m = 1 << sem.reg.idx;
1946 ureg_DECL_sampler(ureg, sem.reg.idx);
1947 tx->info->sampler_mask |= m;
1948 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
1949 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
1950 d3dstt_to_tgsi_tex(sem.sampler_type);
1951 return D3D_OK;
1952 }
1953
1954 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
1955 if (IS_VS) {
1956 if (is_input) {
1957 /* linkage outside of shader with vertex declaration */
1958 ureg_DECL_vs_input(ureg, sem.reg.idx);
1959 assert(sem.reg.idx < Elements(tx->info->input_map));
1960 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
1961 tx->info->num_inputs = sem.reg.idx + 1;
1962 /* NOTE: preserving order in case of indirect access */
1963 } else
1964 if (tx->version.major >= 3) {
1965 /* SM2 output semantic determined by file */
1966 assert(sem.reg.mask != 0);
1967 if (sem.usage == D3DDECLUSAGE_POSITIONT)
1968 tx->info->position_t = TRUE;
1969 assert(sem.reg.idx < Elements(tx->regs.o));
1970 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
1971 ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1972
1973 if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
1974 tx->regs.oPts = tx->regs.o[sem.reg.idx];
1975 }
1976 } else {
1977 if (is_input && tx->version.major >= 3) {
1978 /* SM3 only, SM2 input semantic determined by file */
1979 assert(sem.reg.idx < Elements(tx->regs.v));
1980 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
1981 ureg, tgsi.Name, tgsi.Index,
1982 nine_tgsi_to_interp_mode(&tgsi),
1983 0, /* cylwrap */
1984 sem.reg.mod & NINED3DSPDM_CENTROID);
1985 } else
1986 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
1987 /* FragColor or FragDepth */
1988 assert(sem.reg.mask != 0);
1989 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
1990 }
1991 }
1992 return D3D_OK;
1993 }
1994
1995 DECL_SPECIAL(DEF)
1996 {
1997 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
1998 return D3D_OK;
1999 }
2000
2001 DECL_SPECIAL(DEFB)
2002 {
2003 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2004 return D3D_OK;
2005 }
2006
2007 DECL_SPECIAL(DEFI)
2008 {
2009 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2010 return D3D_OK;
2011 }
2012
2013 DECL_SPECIAL(POW)
2014 {
2015 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2016 struct ureg_src src[2] = {
2017 tx_src_param(tx, &tx->insn.src[0]),
2018 tx_src_param(tx, &tx->insn.src[1])
2019 };
2020 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2021 return D3D_OK;
2022 }
2023
2024 DECL_SPECIAL(RSQ)
2025 {
2026 struct ureg_program *ureg = tx->ureg;
2027 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2028 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2029 struct ureg_dst tmp = tx_scratch(tx);
2030 ureg_RSQ(ureg, tmp, ureg_abs(src));
2031 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2032 return D3D_OK;
2033 }
2034
2035 DECL_SPECIAL(LOG)
2036 {
2037 struct ureg_program *ureg = tx->ureg;
2038 struct ureg_dst tmp = tx_scratch_scalar(tx);
2039 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2040 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2041 ureg_LG2(ureg, tmp, ureg_abs(src));
2042 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2043 return D3D_OK;
2044 }
2045
2046 DECL_SPECIAL(NRM)
2047 {
2048 struct ureg_program *ureg = tx->ureg;
2049 struct ureg_dst tmp = tx_scratch_scalar(tx);
2050 struct ureg_src nrm = tx_src_scalar(tmp);
2051 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2052 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2053 ureg_DP3(ureg, tmp, src, src);
2054 ureg_RSQ(ureg, tmp, nrm);
2055 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2056 ureg_MUL(ureg, dst, src, nrm);
2057 return D3D_OK;
2058 }
2059
2060 DECL_SPECIAL(DP2ADD)
2061 {
2062 struct ureg_dst tmp = tx_scratch_scalar(tx);
2063 struct ureg_src dp2 = tx_src_scalar(tmp);
2064 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2065 struct ureg_src src[3];
2066 int i;
2067 for (i = 0; i < 3; ++i)
2068 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2069 assert_replicate_swizzle(&src[2]);
2070
2071 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2072 ureg_ADD(tx->ureg, dst, src[2], dp2);
2073
2074 return D3D_OK;
2075 }
2076
2077 DECL_SPECIAL(TEXCOORD)
2078 {
2079 struct ureg_program *ureg = tx->ureg;
2080 const unsigned s = tx->insn.dst[0].idx;
2081 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2082
2083 tx_texcoord_alloc(tx, s);
2084 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2085 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2086
2087 return D3D_OK;
2088 }
2089
2090 DECL_SPECIAL(TEXCOORD_ps14)
2091 {
2092 struct ureg_program *ureg = tx->ureg;
2093 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2094 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2095
2096 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2097
2098 ureg_MOV(ureg, dst, src);
2099
2100 return D3D_OK;
2101 }
2102
2103 DECL_SPECIAL(TEXKILL)
2104 {
2105 struct ureg_src reg;
2106
2107 if (tx->version.major > 1 || tx->version.minor > 3) {
2108 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2109 } else {
2110 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2111 reg = tx->regs.vT[tx->insn.dst[0].idx];
2112 }
2113 if (tx->version.major < 2)
2114 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2115 ureg_KILL_IF(tx->ureg, reg);
2116
2117 return D3D_OK;
2118 }
2119
2120 DECL_SPECIAL(TEXBEM)
2121 {
2122 STUB(D3DERR_INVALIDCALL);
2123 }
2124
2125 DECL_SPECIAL(TEXBEML)
2126 {
2127 STUB(D3DERR_INVALIDCALL);
2128 }
2129
2130 DECL_SPECIAL(TEXREG2AR)
2131 {
2132 struct ureg_program *ureg = tx->ureg;
2133 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2134 struct ureg_src sample;
2135 const int m = tx->insn.dst[0].idx;
2136 const int n = tx->insn.src[0].idx;
2137 assert(m >= 0 && m > n);
2138
2139 sample = ureg_DECL_sampler(ureg, m);
2140 tx->info->sampler_mask |= 1 << m;
2141 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2142
2143 return D3D_OK;
2144 }
2145
2146 DECL_SPECIAL(TEXREG2GB)
2147 {
2148 struct ureg_program *ureg = tx->ureg;
2149 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2150 struct ureg_src sample;
2151 const int m = tx->insn.dst[0].idx;
2152 const int n = tx->insn.src[0].idx;
2153 assert(m >= 0 && m > n);
2154
2155 sample = ureg_DECL_sampler(ureg, m);
2156 tx->info->sampler_mask |= 1 << m;
2157 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2158
2159 return D3D_OK;
2160 }
2161
2162 DECL_SPECIAL(TEXM3x2PAD)
2163 {
2164 return D3D_OK; /* this is just padding */
2165 }
2166
2167 DECL_SPECIAL(TEXM3x2TEX)
2168 {
2169 struct ureg_program *ureg = tx->ureg;
2170 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2171 struct ureg_src sample;
2172 const int m = tx->insn.dst[0].idx - 1;
2173 const int n = tx->insn.src[0].idx;
2174 assert(m >= 0 && m > n);
2175
2176 tx_texcoord_alloc(tx, m);
2177 tx_texcoord_alloc(tx, m+1);
2178
2179 /* performs the matrix multiplication */
2180 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2181 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2182
2183 sample = ureg_DECL_sampler(ureg, m + 1);
2184 tx->info->sampler_mask |= 1 << (m + 1);
2185 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2186
2187 return D3D_OK;
2188 }
2189
2190 DECL_SPECIAL(TEXM3x3PAD)
2191 {
2192 return D3D_OK; /* this is just padding */
2193 }
2194
2195 DECL_SPECIAL(TEXM3x3SPEC)
2196 {
2197 struct ureg_program *ureg = tx->ureg;
2198 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2199 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2200 struct ureg_src sample;
2201 struct ureg_dst tmp;
2202 const int m = tx->insn.dst[0].idx - 2;
2203 const int n = tx->insn.src[0].idx;
2204 assert(m >= 0 && m > n);
2205
2206 tx_texcoord_alloc(tx, m);
2207 tx_texcoord_alloc(tx, m+1);
2208 tx_texcoord_alloc(tx, m+2);
2209
2210 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2211 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2212 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2213
2214 sample = ureg_DECL_sampler(ureg, m + 2);
2215 tx->info->sampler_mask |= 1 << (m + 2);
2216 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2217
2218 /* At this step, dst = N = (u', w', z').
2219 * We want dst to be the texture sampled at (u'', w'', z''), with
2220 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2221 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2222 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2223 /* at this step tmp.x = 1/N.N */
2224 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2225 /* at this step tmp.y = N.E */
2226 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2227 /* at this step tmp.x = N.E/N.N */
2228 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2229 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2230 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2231 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2232 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2233
2234 return D3D_OK;
2235 }
2236
2237 DECL_SPECIAL(TEXREG2RGB)
2238 {
2239 struct ureg_program *ureg = tx->ureg;
2240 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2241 struct ureg_src sample;
2242 const int m = tx->insn.dst[0].idx;
2243 const int n = tx->insn.src[0].idx;
2244 assert(m >= 0 && m > n);
2245
2246 sample = ureg_DECL_sampler(ureg, m);
2247 tx->info->sampler_mask |= 1 << m;
2248 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2249
2250 return D3D_OK;
2251 }
2252
2253 DECL_SPECIAL(TEXDP3TEX)
2254 {
2255 struct ureg_program *ureg = tx->ureg;
2256 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2257 struct ureg_dst tmp;
2258 struct ureg_src sample;
2259 const int m = tx->insn.dst[0].idx;
2260 const int n = tx->insn.src[0].idx;
2261 assert(m >= 0 && m > n);
2262
2263 tx_texcoord_alloc(tx, m);
2264
2265 tmp = tx_scratch(tx);
2266 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2267 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2268
2269 sample = ureg_DECL_sampler(ureg, m);
2270 tx->info->sampler_mask |= 1 << m;
2271 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2272
2273 return D3D_OK;
2274 }
2275
2276 DECL_SPECIAL(TEXM3x2DEPTH)
2277 {
2278 struct ureg_program *ureg = tx->ureg;
2279 struct ureg_dst tmp;
2280 const int m = tx->insn.dst[0].idx - 1;
2281 const int n = tx->insn.src[0].idx;
2282 assert(m >= 0 && m > n);
2283
2284 tx_texcoord_alloc(tx, m);
2285 tx_texcoord_alloc(tx, m+1);
2286
2287 tmp = tx_scratch(tx);
2288
2289 /* performs the matrix multiplication */
2290 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2291 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2292
2293 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2294 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2295 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2296 /* res = 'w' == 0 ? 1.0 : z/w */
2297 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2298 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2299 /* replace the depth for depth testing with the result */
2300 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
2301 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2302 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2303 return D3D_OK;
2304 }
2305
2306 DECL_SPECIAL(TEXDP3)
2307 {
2308 struct ureg_program *ureg = tx->ureg;
2309 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2310 const int m = tx->insn.dst[0].idx;
2311 const int n = tx->insn.src[0].idx;
2312 assert(m >= 0 && m > n);
2313
2314 tx_texcoord_alloc(tx, m);
2315
2316 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2317
2318 return D3D_OK;
2319 }
2320
2321 DECL_SPECIAL(TEXM3x3)
2322 {
2323 struct ureg_program *ureg = tx->ureg;
2324 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2325 struct ureg_src sample;
2326 struct ureg_dst E, tmp;
2327 const int m = tx->insn.dst[0].idx - 2;
2328 const int n = tx->insn.src[0].idx;
2329 assert(m >= 0 && m > n);
2330
2331 tx_texcoord_alloc(tx, m);
2332 tx_texcoord_alloc(tx, m+1);
2333 tx_texcoord_alloc(tx, m+2);
2334
2335 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2336 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2337 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2338
2339 switch (tx->insn.opcode) {
2340 case D3DSIO_TEXM3x3:
2341 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2342 break;
2343 case D3DSIO_TEXM3x3TEX:
2344 sample = ureg_DECL_sampler(ureg, m + 2);
2345 tx->info->sampler_mask |= 1 << (m + 2);
2346 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2347 break;
2348 case D3DSIO_TEXM3x3VSPEC:
2349 sample = ureg_DECL_sampler(ureg, m + 2);
2350 tx->info->sampler_mask |= 1 << (m + 2);
2351 E = tx_scratch(tx);
2352 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2353 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2354 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2355 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2356 /* At this step, dst = N = (u', w', z').
2357 * We want dst to be the texture sampled at (u'', w'', z''), with
2358 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2359 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2360 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2361 /* at this step tmp.x = 1/N.N */
2362 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2363 /* at this step tmp.y = N.E */
2364 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2365 /* at this step tmp.x = N.E/N.N */
2366 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2367 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2368 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2369 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2370 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2371 break;
2372 default:
2373 return D3DERR_INVALIDCALL;
2374 }
2375 return D3D_OK;
2376 }
2377
2378 DECL_SPECIAL(TEXDEPTH)
2379 {
2380 struct ureg_program *ureg = tx->ureg;
2381 struct ureg_dst r5;
2382 struct ureg_src r5r, r5g;
2383
2384 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2385
2386 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2387 * r5 won't be used afterward, thus we can use r5.ba */
2388 r5 = tx->regs.r[5];
2389 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2390 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2391
2392 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2393 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2394 /* r5.r = r/g */
2395 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2396 r5r, ureg_imm1f(ureg, 1.0f));
2397 /* replace the depth for depth testing with the result */
2398 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
2399 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2400
2401 return D3D_OK;
2402 }
2403
2404 DECL_SPECIAL(BEM)
2405 {
2406 STUB(D3DERR_INVALIDCALL);
2407 }
2408
2409 DECL_SPECIAL(TEXLD)
2410 {
2411 struct ureg_program *ureg = tx->ureg;
2412 unsigned target;
2413 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2414 struct ureg_src src[2] = {
2415 tx_src_param(tx, &tx->insn.src[0]),
2416 tx_src_param(tx, &tx->insn.src[1])
2417 };
2418 assert(tx->insn.src[1].idx >= 0 &&
2419 tx->insn.src[1].idx < Elements(tx->sampler_targets));
2420 target = tx->sampler_targets[tx->insn.src[1].idx];
2421
2422 switch (tx->insn.flags) {
2423 case 0:
2424 ureg_TEX(ureg, dst, target, src[0], src[1]);
2425 break;
2426 case NINED3DSI_TEXLD_PROJECT:
2427 ureg_TXP(ureg, dst, target, src[0], src[1]);
2428 break;
2429 case NINED3DSI_TEXLD_BIAS:
2430 ureg_TXB(ureg, dst, target, src[0], src[1]);
2431 break;
2432 default:
2433 assert(0);
2434 return D3DERR_INVALIDCALL;
2435 }
2436 return D3D_OK;
2437 }
2438
2439 DECL_SPECIAL(TEXLD_14)
2440 {
2441 struct ureg_program *ureg = tx->ureg;
2442 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2443 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2444 const unsigned s = tx->insn.dst[0].idx;
2445 const unsigned t = ps1x_sampler_type(tx->info, s);
2446
2447 tx->info->sampler_mask |= 1 << s;
2448 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2449
2450 return D3D_OK;
2451 }
2452
2453 DECL_SPECIAL(TEX)
2454 {
2455 struct ureg_program *ureg = tx->ureg;
2456 const unsigned s = tx->insn.dst[0].idx;
2457 const unsigned t = ps1x_sampler_type(tx->info, s);
2458 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2459 struct ureg_src src[2];
2460
2461 tx_texcoord_alloc(tx, s);
2462
2463 src[0] = tx->regs.vT[s];
2464 src[1] = ureg_DECL_sampler(ureg, s);
2465 tx->info->sampler_mask |= 1 << s;
2466
2467 ureg_TEX(ureg, dst, t, src[0], src[1]);
2468
2469 return D3D_OK;
2470 }
2471
2472 DECL_SPECIAL(TEXLDD)
2473 {
2474 unsigned target;
2475 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2476 struct ureg_src src[4] = {
2477 tx_src_param(tx, &tx->insn.src[0]),
2478 tx_src_param(tx, &tx->insn.src[1]),
2479 tx_src_param(tx, &tx->insn.src[2]),
2480 tx_src_param(tx, &tx->insn.src[3])
2481 };
2482 assert(tx->insn.src[3].idx >= 0 &&
2483 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2484 target = tx->sampler_targets[tx->insn.src[1].idx];
2485
2486 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2487 return D3D_OK;
2488 }
2489
2490 DECL_SPECIAL(TEXLDL)
2491 {
2492 unsigned target;
2493 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2494 struct ureg_src src[2] = {
2495 tx_src_param(tx, &tx->insn.src[0]),
2496 tx_src_param(tx, &tx->insn.src[1])
2497 };
2498 assert(tx->insn.src[3].idx >= 0 &&
2499 tx->insn.src[3].idx < Elements(tx->sampler_targets));
2500 target = tx->sampler_targets[tx->insn.src[1].idx];
2501
2502 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2503 return D3D_OK;
2504 }
2505
2506 DECL_SPECIAL(SETP)
2507 {
2508 STUB(D3DERR_INVALIDCALL);
2509 }
2510
2511 DECL_SPECIAL(BREAKP)
2512 {
2513 STUB(D3DERR_INVALIDCALL);
2514 }
2515
2516 DECL_SPECIAL(PHASE)
2517 {
2518 return D3D_OK; /* we don't care about phase */
2519 }
2520
2521 DECL_SPECIAL(COMMENT)
2522 {
2523 return D3D_OK; /* nothing to do */
2524 }
2525
2526
2527 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2528 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2529
2530 struct sm1_op_info inst_table[] =
2531 {
2532 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
2533 _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
2534 _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2535 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2536 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2537 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2538 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2539 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2540 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2541 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2542 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2543 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2544 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2545 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2546 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2547 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2548 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2549 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
2550 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2551 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2552 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2553
2554 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2555 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2556 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2557 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2558 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2559
2560 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2561 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2562 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2563 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2564 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2565 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2566
2567 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2568
2569 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2570 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2571 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2572 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2573 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2574
2575 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2576 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2577
2578 /* More flow control */
2579 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2580 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2581 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2582 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2583 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2584 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2585 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2586 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2587 /* we don't write to the address register, but a normal register (copied
2588 * when needed to the address register), thus we don't use ARR */
2589 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2590
2591 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2592 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2593
2594 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2595 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2596 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2597 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2598 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2599 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2600 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2601 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
2602 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2603 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2604 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2605 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2606 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2607 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2608 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2609 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2610
2611 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2612 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2613 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2614 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2615
2616 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2617
2618 /* More tex stuff */
2619 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2620 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2621 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2622 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2623 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2624 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2625
2626 /* Misc */
2627 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2628 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2629 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2630 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2631 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2632 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2633 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2634 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2635 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2636 };
2637
2638 struct sm1_op_info inst_phase =
2639 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2640
2641 struct sm1_op_info inst_comment =
2642 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2643
2644 static void
2645 create_op_info_map(struct shader_translator *tx)
2646 {
2647 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2648 unsigned i;
2649
2650 for (i = 0; i < Elements(tx->op_info_map); ++i)
2651 tx->op_info_map[i] = -1;
2652
2653 if (tx->processor == TGSI_PROCESSOR_VERTEX) {
2654 for (i = 0; i < Elements(inst_table); ++i) {
2655 assert(inst_table[i].sio < Elements(tx->op_info_map));
2656 if (inst_table[i].vert_version.min <= version &&
2657 inst_table[i].vert_version.max >= version)
2658 tx->op_info_map[inst_table[i].sio] = i;
2659 }
2660 } else {
2661 for (i = 0; i < Elements(inst_table); ++i) {
2662 assert(inst_table[i].sio < Elements(tx->op_info_map));
2663 if (inst_table[i].frag_version.min <= version &&
2664 inst_table[i].frag_version.max >= version)
2665 tx->op_info_map[inst_table[i].sio] = i;
2666 }
2667 }
2668 }
2669
2670 static INLINE HRESULT
2671 NineTranslateInstruction_Generic(struct shader_translator *tx)
2672 {
2673 struct ureg_dst dst[1];
2674 struct ureg_src src[4];
2675 unsigned i;
2676
2677 for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
2678 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
2679 for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
2680 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2681
2682 ureg_insn(tx->ureg, tx->insn.info->opcode,
2683 dst, tx->insn.ndst,
2684 src, tx->insn.nsrc);
2685 return D3D_OK;
2686 }
2687
2688 static INLINE DWORD
2689 TOKEN_PEEK(struct shader_translator *tx)
2690 {
2691 return *(tx->parse);
2692 }
2693
2694 static INLINE DWORD
2695 TOKEN_NEXT(struct shader_translator *tx)
2696 {
2697 return *(tx->parse)++;
2698 }
2699
2700 static INLINE void
2701 TOKEN_JUMP(struct shader_translator *tx)
2702 {
2703 if (tx->parse_next && tx->parse != tx->parse_next) {
2704 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
2705 tx->parse = tx->parse_next;
2706 }
2707 }
2708
2709 static INLINE boolean
2710 sm1_parse_eof(struct shader_translator *tx)
2711 {
2712 return TOKEN_PEEK(tx) == NINED3DSP_END;
2713 }
2714
2715 static void
2716 sm1_read_version(struct shader_translator *tx)
2717 {
2718 const DWORD tok = TOKEN_NEXT(tx);
2719
2720 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
2721 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
2722
2723 switch (tok >> 16) {
2724 case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
2725 case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
2726 default:
2727 DBG("Invalid shader type: %x\n", tok);
2728 tx->processor = ~0;
2729 break;
2730 }
2731 }
2732
2733 /* This is just to check if we parsed the instruction properly. */
2734 static void
2735 sm1_parse_get_skip(struct shader_translator *tx)
2736 {
2737 const DWORD tok = TOKEN_PEEK(tx);
2738
2739 if (tx->version.major >= 2) {
2740 tx->parse_next = tx->parse + 1 /* this */ +
2741 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
2742 } else {
2743 tx->parse_next = NULL; /* TODO: determine from param count */
2744 }
2745 }
2746
2747 static void
2748 sm1_print_comment(const char *comment, UINT size)
2749 {
2750 if (!size)
2751 return;
2752 /* TODO */
2753 }
2754
2755 static void
2756 sm1_parse_comments(struct shader_translator *tx, BOOL print)
2757 {
2758 DWORD tok = TOKEN_PEEK(tx);
2759
2760 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
2761 {
2762 const char *comment = "";
2763 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2764 tx->parse += size + 1;
2765
2766 if (print)
2767 sm1_print_comment(comment, size);
2768
2769 tok = TOKEN_PEEK(tx);
2770 }
2771 }
2772
2773 static void
2774 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
2775 {
2776 *reg = TOKEN_NEXT(tx);
2777
2778 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
2779 {
2780 if (tx->version.major < 2)
2781 *rel = (1 << 31) |
2782 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
2783 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
2784 (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
2785 else
2786 *rel = TOKEN_NEXT(tx);
2787 }
2788 }
2789
2790 static void
2791 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
2792 {
2793 uint8_t shift;
2794 dst->file =
2795 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
2796 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
2797 dst->type = TGSI_RETURN_TYPE_FLOAT;
2798 dst->idx = tok & D3DSP_REGNUM_MASK;
2799 dst->rel = NULL;
2800 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
2801 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
2802 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2803 dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
2804 }
2805
2806 static void
2807 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
2808 {
2809 src->file =
2810 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
2811 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
2812 src->type = TGSI_RETURN_TYPE_FLOAT;
2813 src->idx = tok & D3DSP_REGNUM_MASK;
2814 src->rel = NULL;
2815 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
2816 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
2817
2818 switch (src->file) {
2819 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
2820 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
2821 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
2822 default:
2823 break;
2824 }
2825 }
2826
2827 static void
2828 sm1_parse_immediate(struct shader_translator *tx,
2829 struct sm1_src_param *imm)
2830 {
2831 imm->file = NINED3DSPR_IMMEDIATE;
2832 imm->idx = INT_MIN;
2833 imm->rel = NULL;
2834 imm->swizzle = NINED3DSP_NOSWIZZLE;
2835 imm->mod = 0;
2836 switch (tx->insn.opcode) {
2837 case D3DSIO_DEF:
2838 imm->type = NINED3DSPTYPE_FLOAT4;
2839 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2840 tx->parse += 4;
2841 break;
2842 case D3DSIO_DEFI:
2843 imm->type = NINED3DSPTYPE_INT4;
2844 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
2845 tx->parse += 4;
2846 break;
2847 case D3DSIO_DEFB:
2848 imm->type = NINED3DSPTYPE_BOOL;
2849 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
2850 tx->parse += 1;
2851 break;
2852 default:
2853 assert(0);
2854 break;
2855 }
2856 }
2857
2858 static void
2859 sm1_read_dst_param(struct shader_translator *tx,
2860 struct sm1_dst_param *dst,
2861 struct sm1_src_param *rel)
2862 {
2863 DWORD tok_dst, tok_rel = 0;
2864
2865 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
2866 sm1_parse_dst_param(dst, tok_dst);
2867 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
2868 sm1_parse_src_param(rel, tok_rel);
2869 dst->rel = rel;
2870 }
2871 }
2872
2873 static void
2874 sm1_read_src_param(struct shader_translator *tx,
2875 struct sm1_src_param *src,
2876 struct sm1_src_param *rel)
2877 {
2878 DWORD tok_src, tok_rel = 0;
2879
2880 sm1_parse_get_param(tx, &tok_src, &tok_rel);
2881 sm1_parse_src_param(src, tok_src);
2882 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
2883 assert(rel);
2884 sm1_parse_src_param(rel, tok_rel);
2885 src->rel = rel;
2886 }
2887 }
2888
2889 static void
2890 sm1_read_semantic(struct shader_translator *tx,
2891 struct sm1_semantic *sem)
2892 {
2893 const DWORD tok_usg = TOKEN_NEXT(tx);
2894 const DWORD tok_dst = TOKEN_NEXT(tx);
2895
2896 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
2897 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
2898 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
2899
2900 sm1_parse_dst_param(&sem->reg, tok_dst);
2901 }
2902
2903 static void
2904 sm1_parse_instruction(struct shader_translator *tx)
2905 {
2906 struct sm1_instruction *insn = &tx->insn;
2907 DWORD tok;
2908 struct sm1_op_info *info = NULL;
2909 unsigned i;
2910
2911 sm1_parse_comments(tx, TRUE);
2912 sm1_parse_get_skip(tx);
2913
2914 tok = TOKEN_NEXT(tx);
2915
2916 insn->opcode = tok & D3DSI_OPCODE_MASK;
2917 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
2918 insn->coissue = !!(tok & D3DSI_COISSUE);
2919 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
2920
2921 if (insn->opcode < Elements(tx->op_info_map)) {
2922 int k = tx->op_info_map[insn->opcode];
2923 if (k >= 0) {
2924 assert(k < Elements(inst_table));
2925 info = &inst_table[k];
2926 }
2927 } else {
2928 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
2929 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
2930 }
2931 if (!info) {
2932 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
2933 TOKEN_JUMP(tx);
2934 return;
2935 }
2936 insn->info = info;
2937 insn->ndst = info->ndst;
2938 insn->nsrc = info->nsrc;
2939
2940 assert(!insn->predicated && "TODO: predicated instructions");
2941
2942 /* check version */
2943 {
2944 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
2945 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
2946 unsigned ver = (tx->version.major << 8) | tx->version.minor;
2947 if (ver < min || ver > max) {
2948 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2949 min, ver, max);
2950 return;
2951 }
2952 }
2953
2954 for (i = 0; i < insn->ndst; ++i)
2955 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
2956 if (insn->predicated)
2957 sm1_read_src_param(tx, &insn->pred, NULL);
2958 for (i = 0; i < insn->nsrc; ++i)
2959 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
2960
2961 /* parse here so we can dump them before processing */
2962 if (insn->opcode == D3DSIO_DEF ||
2963 insn->opcode == D3DSIO_DEFI ||
2964 insn->opcode == D3DSIO_DEFB)
2965 sm1_parse_immediate(tx, &tx->insn.src[0]);
2966
2967 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
2968 sm1_instruction_check(insn);
2969
2970 if (info->handler)
2971 info->handler(tx);
2972 else
2973 NineTranslateInstruction_Generic(tx);
2974 tx_apply_dst0_modifiers(tx);
2975
2976 tx->num_scratch = 0; /* reset */
2977
2978 TOKEN_JUMP(tx);
2979 }
2980
2981 static void
2982 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
2983 {
2984 unsigned i;
2985
2986 tx->info = info;
2987
2988 tx->byte_code = info->byte_code;
2989 tx->parse = info->byte_code;
2990
2991 for (i = 0; i < Elements(info->input_map); ++i)
2992 info->input_map[i] = NINE_DECLUSAGE_NONE;
2993 info->num_inputs = 0;
2994
2995 info->position_t = FALSE;
2996 info->point_size = FALSE;
2997
2998 tx->info->const_float_slots = 0;
2999 tx->info->const_int_slots = 0;
3000 tx->info->const_bool_slots = 0;
3001
3002 info->sampler_mask = 0x0;
3003 info->rt_mask = 0x0;
3004
3005 info->lconstf.data = NULL;
3006 info->lconstf.ranges = NULL;
3007
3008 for (i = 0; i < Elements(tx->regs.rL); ++i) {
3009 tx->regs.rL[i] = ureg_dst_undef();
3010 }
3011 tx->regs.address = ureg_dst_undef();
3012 tx->regs.a0 = ureg_dst_undef();
3013 tx->regs.p = ureg_dst_undef();
3014 tx->regs.oDepth = ureg_dst_undef();
3015 tx->regs.vPos = ureg_src_undef();
3016 tx->regs.vFace = ureg_src_undef();
3017 for (i = 0; i < Elements(tx->regs.o); ++i)
3018 tx->regs.o[i] = ureg_dst_undef();
3019 for (i = 0; i < Elements(tx->regs.oCol); ++i)
3020 tx->regs.oCol[i] = ureg_dst_undef();
3021 for (i = 0; i < Elements(tx->regs.vC); ++i)
3022 tx->regs.vC[i] = ureg_src_undef();
3023 for (i = 0; i < Elements(tx->regs.vT); ++i)
3024 tx->regs.vT[i] = ureg_src_undef();
3025
3026 for (i = 0; i < Elements(tx->lconsti); ++i)
3027 tx->lconsti[i].idx = -1;
3028 for (i = 0; i < Elements(tx->lconstb); ++i)
3029 tx->lconstb[i].idx = -1;
3030
3031 sm1_read_version(tx);
3032
3033 info->version = (tx->version.major << 4) | tx->version.minor;
3034
3035 create_op_info_map(tx);
3036 }
3037
3038 static void
3039 tx_dtor(struct shader_translator *tx)
3040 {
3041 if (tx->num_inst_labels)
3042 FREE(tx->inst_labels);
3043 FREE(tx->lconstf);
3044 FREE(tx->regs.r);
3045 FREE(tx);
3046 }
3047
3048 static INLINE unsigned
3049 tgsi_processor_from_type(unsigned shader_type)
3050 {
3051 switch (shader_type) {
3052 case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
3053 case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
3054 default:
3055 return ~0;
3056 }
3057 }
3058
3059 #define GET_CAP(n) device->screen->get_param( \
3060 device->screen, PIPE_CAP_##n)
3061 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3062 device->screen, info->type, PIPE_SHADER_CAP_##n)
3063
3064 HRESULT
3065 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3066 {
3067 struct shader_translator *tx;
3068 HRESULT hr = D3D_OK;
3069 const unsigned processor = tgsi_processor_from_type(info->type);
3070 unsigned slot_max;
3071
3072 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3073
3074 tx = CALLOC_STRUCT(shader_translator);
3075 if (!tx)
3076 return E_OUTOFMEMORY;
3077 tx_ctor(tx, info);
3078
3079 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3080 hr = D3DERR_INVALIDCALL;
3081 DBG("Unsupported shader version: %u.%u !\n",
3082 tx->version.major, tx->version.minor);
3083 goto out;
3084 }
3085 if (tx->processor != processor) {
3086 hr = D3DERR_INVALIDCALL;
3087 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3088 goto out;
3089 }
3090 DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
3091 tx->version.major, tx->version.minor);
3092
3093 tx->ureg = ureg_create(processor);
3094 if (!tx->ureg) {
3095 hr = E_OUTOFMEMORY;
3096 goto out;
3097 }
3098 tx_decl_constants(tx);
3099
3100 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3101 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3102 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3103 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3104 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3105 tx->texcoord_sn = tx->want_texcoord ?
3106 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3107
3108 /* VS must always write position. Declare it here to make it the 1st output.
3109 * (Some drivers like nv50 are buggy and rely on that.)
3110 */
3111 if (IS_VS) {
3112 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3113 } else {
3114 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3115 if (!tx->shift_wpos)
3116 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3117 }
3118
3119 while (!sm1_parse_eof(tx))
3120 sm1_parse_instruction(tx);
3121 tx->parse++; /* for byte_size */
3122
3123 if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
3124 ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
3125 ureg_src(tx->regs.r[0]));
3126 info->rt_mask |= 0x1;
3127 }
3128
3129 if (info->position_t)
3130 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3131
3132 ureg_END(tx->ureg);
3133
3134 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
3135 info->point_size = TRUE;
3136
3137 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3138 unsigned count;
3139 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3140 tgsi_dump(toks, 0);
3141 ureg_free_tokens(toks);
3142 }
3143
3144 /* record local constants */
3145 if (tx->num_lconstf && tx->indirect_const_access) {
3146 struct nine_range *ranges;
3147 float *data;
3148 int *indices;
3149 unsigned i, k, n;
3150
3151 hr = E_OUTOFMEMORY;
3152
3153 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3154 if (!data)
3155 goto out;
3156 info->lconstf.data = data;
3157
3158 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3159 if (!indices)
3160 goto out;
3161
3162 /* lazy sort, num_lconstf should be small */
3163 for (n = 0; n < tx->num_lconstf; ++n) {
3164 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3165 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3166 k = i;
3167 }
3168 indices[n] = tx->lconstf[k].idx;
3169 memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
3170 tx->lconstf[k].idx = INT_MAX;
3171 }
3172
3173 /* count ranges */
3174 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3175 if (indices[i] != indices[i - 1] + 1)
3176 ++n;
3177 ranges = MALLOC(n * sizeof(ranges[0]));
3178 if (!ranges) {
3179 FREE(indices);
3180 goto out;
3181 }
3182 info->lconstf.ranges = ranges;
3183
3184 k = 0;
3185 ranges[k].bgn = indices[0];
3186 for (i = 1; i < tx->num_lconstf; ++i) {
3187 if (indices[i] != indices[i - 1] + 1) {
3188 ranges[k].next = &ranges[k + 1];
3189 ranges[k].end = indices[i - 1] + 1;
3190 ++k;
3191 ranges[k].bgn = indices[i];
3192 }
3193 }
3194 ranges[k].end = indices[i - 1] + 1;
3195 ranges[k].next = NULL;
3196 assert(n == (k + 1));
3197
3198 FREE(indices);
3199 hr = D3D_OK;
3200 }
3201
3202 if (tx->indirect_const_access) /* vs only */
3203 info->const_float_slots = device->max_vs_const_f;
3204
3205 slot_max = info->const_bool_slots > 0 ?
3206 device->max_vs_const_f + NINE_MAX_CONST_I
3207 + info->const_bool_slots :
3208 info->const_int_slots > 0 ?
3209 device->max_vs_const_f + info->const_int_slots :
3210 info->const_float_slots;
3211 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3212
3213 info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
3214 if (!info->cso) {
3215 hr = D3DERR_DRIVERINTERNALERROR;
3216 FREE(info->lconstf.data);
3217 FREE(info->lconstf.ranges);
3218 goto out;
3219 }
3220
3221 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3222 out:
3223 tx_dtor(tx);
3224 return hr;
3225 }