st/nine: Fix support for ps 1.4 dw and dz modifiers
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37
38 #define DBG_CHANNEL DBG_SHADER
39
40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
41
42
43 struct shader_translator;
44
45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
46
47 static inline const char *d3dsio_to_string(unsigned opcode);
48
49
50 #define NINED3D_SM1_VS 0xfffe
51 #define NINED3D_SM1_PS 0xffff
52
53 #define NINE_MAX_COND_DEPTH 64
54 #define NINE_MAX_LOOP_DEPTH 64
55
56 #define NINED3DSP_END 0x0000ffff
57
58 #define NINED3DSPTYPE_FLOAT4 0
59 #define NINED3DSPTYPE_INT4 1
60 #define NINED3DSPTYPE_BOOL 2
61
62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63
64 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
65 #define NINED3DSP_WRITEMASK_SHIFT 16
66
67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68
69 #define NINED3DSHADER_REL_OP_GT 1
70 #define NINED3DSHADER_REL_OP_EQ 2
71 #define NINED3DSHADER_REL_OP_GE 3
72 #define NINED3DSHADER_REL_OP_LT 4
73 #define NINED3DSHADER_REL_OP_NE 5
74 #define NINED3DSHADER_REL_OP_LE 6
75
76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78
79 #define NINED3DSI_TEXLD_PROJECT 0x1
80 #define NINED3DSI_TEXLD_BIAS 0x2
81
82 #define NINED3DSP_WRITEMASK_0 0x1
83 #define NINED3DSP_WRITEMASK_1 0x2
84 #define NINED3DSP_WRITEMASK_2 0x4
85 #define NINED3DSP_WRITEMASK_3 0x8
86 #define NINED3DSP_WRITEMASK_ALL 0xf
87
88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89
90 #define NINE_SWIZZLE4(x,y,z,w) \
91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92
93 #define NINE_CONSTANT_SRC(index) \
94 ureg_src_register(TGSI_FILE_CONSTANT, index)
95
96 #define NINE_APPLY_SWIZZLE(src, s) \
97 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
98
99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
100 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
101
102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
105
106 /*
107 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
108 * BIAS <= PS 1.4 (x-0.5)
109 * BIASNEG <= PS 1.4 (-(x-0.5))
110 * SIGN <= PS 1.4 (2(x-0.5))
111 * SIGNNEG <= PS 1.4 (-2(x-0.5))
112 * COMP <= PS 1.4 (1-x)
113 * X2 = PS 1.4 (2x)
114 * X2NEG = PS 1.4 (-2x)
115 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
116 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
117 * ABS >= SM 3.0 (abs(x))
118 * ABSNEG >= SM 3.0 (-abs(x))
119 * NOT >= SM 2.0 pedication only
120 */
121 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
134 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
135
136 static const char *sm1_mod_str[] =
137 {
138 [NINED3DSPSM_NONE] = "",
139 [NINED3DSPSM_NEG] = "-",
140 [NINED3DSPSM_BIAS] = "bias",
141 [NINED3DSPSM_BIASNEG] = "biasneg",
142 [NINED3DSPSM_SIGN] = "sign",
143 [NINED3DSPSM_SIGNNEG] = "signneg",
144 [NINED3DSPSM_COMP] = "comp",
145 [NINED3DSPSM_X2] = "x2",
146 [NINED3DSPSM_X2NEG] = "x2neg",
147 [NINED3DSPSM_DZ] = "dz",
148 [NINED3DSPSM_DW] = "dw",
149 [NINED3DSPSM_ABS] = "abs",
150 [NINED3DSPSM_ABSNEG] = "-abs",
151 [NINED3DSPSM_NOT] = "not"
152 };
153
154 static void
155 sm1_dump_writemask(BYTE mask)
156 {
157 if (mask & 1) DUMP("x"); else DUMP("_");
158 if (mask & 2) DUMP("y"); else DUMP("_");
159 if (mask & 4) DUMP("z"); else DUMP("_");
160 if (mask & 8) DUMP("w"); else DUMP("_");
161 }
162
163 static void
164 sm1_dump_swizzle(BYTE s)
165 {
166 char c[4] = { 'x', 'y', 'z', 'w' };
167 DUMP("%c%c%c%c",
168 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
169 }
170
171 static const char sm1_file_char[] =
172 {
173 [D3DSPR_TEMP] = 'r',
174 [D3DSPR_INPUT] = 'v',
175 [D3DSPR_CONST] = 'c',
176 [D3DSPR_ADDR] = 'A',
177 [D3DSPR_RASTOUT] = 'R',
178 [D3DSPR_ATTROUT] = 'D',
179 [D3DSPR_OUTPUT] = 'o',
180 [D3DSPR_CONSTINT] = 'I',
181 [D3DSPR_COLOROUT] = 'C',
182 [D3DSPR_DEPTHOUT] = 'D',
183 [D3DSPR_SAMPLER] = 's',
184 [D3DSPR_CONST2] = 'c',
185 [D3DSPR_CONST3] = 'c',
186 [D3DSPR_CONST4] = 'c',
187 [D3DSPR_CONSTBOOL] = 'B',
188 [D3DSPR_LOOP] = 'L',
189 [D3DSPR_TEMPFLOAT16] = 'h',
190 [D3DSPR_MISCTYPE] = 'M',
191 [D3DSPR_LABEL] = 'X',
192 [D3DSPR_PREDICATE] = 'p'
193 };
194
195 static void
196 sm1_dump_reg(BYTE file, INT index)
197 {
198 switch (file) {
199 case D3DSPR_LOOP:
200 DUMP("aL");
201 break;
202 case D3DSPR_COLOROUT:
203 DUMP("oC%i", index);
204 break;
205 case D3DSPR_DEPTHOUT:
206 DUMP("oDepth");
207 break;
208 case D3DSPR_RASTOUT:
209 DUMP("oRast%i", index);
210 break;
211 case D3DSPR_CONSTINT:
212 DUMP("iconst[%i]", index);
213 break;
214 case D3DSPR_CONSTBOOL:
215 DUMP("bconst[%i]", index);
216 break;
217 default:
218 DUMP("%c%i", sm1_file_char[file], index);
219 break;
220 }
221 }
222
223 struct sm1_src_param
224 {
225 INT idx;
226 struct sm1_src_param *rel;
227 BYTE file;
228 BYTE swizzle;
229 BYTE mod;
230 BYTE type;
231 union {
232 DWORD d[4];
233 float f[4];
234 int i[4];
235 BOOL b;
236 } imm;
237 };
238 static void
239 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
240
241 struct sm1_dst_param
242 {
243 INT idx;
244 struct sm1_src_param *rel;
245 BYTE file;
246 BYTE mask;
247 BYTE mod;
248 int8_t shift; /* sint4 */
249 BYTE type;
250 };
251
252 static inline void
253 assert_replicate_swizzle(const struct ureg_src *reg)
254 {
255 assert(reg->SwizzleY == reg->SwizzleX &&
256 reg->SwizzleZ == reg->SwizzleX &&
257 reg->SwizzleW == reg->SwizzleX);
258 }
259
260 static void
261 sm1_dump_immediate(const struct sm1_src_param *param)
262 {
263 switch (param->type) {
264 case NINED3DSPTYPE_FLOAT4:
265 DUMP("{ %f %f %f %f }",
266 param->imm.f[0], param->imm.f[1],
267 param->imm.f[2], param->imm.f[3]);
268 break;
269 case NINED3DSPTYPE_INT4:
270 DUMP("{ %i %i %i %i }",
271 param->imm.i[0], param->imm.i[1],
272 param->imm.i[2], param->imm.i[3]);
273 break;
274 case NINED3DSPTYPE_BOOL:
275 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
276 break;
277 default:
278 assert(0);
279 break;
280 }
281 }
282
283 static void
284 sm1_dump_src_param(const struct sm1_src_param *param)
285 {
286 if (param->file == NINED3DSPR_IMMEDIATE) {
287 assert(!param->mod &&
288 !param->rel &&
289 param->swizzle == NINED3DSP_NOSWIZZLE);
290 sm1_dump_immediate(param);
291 return;
292 }
293
294 if (param->mod)
295 DUMP("%s(", sm1_mod_str[param->mod]);
296 if (param->rel) {
297 DUMP("%c[", sm1_file_char[param->file]);
298 sm1_dump_src_param(param->rel);
299 DUMP("+%i]", param->idx);
300 } else {
301 sm1_dump_reg(param->file, param->idx);
302 }
303 if (param->mod)
304 DUMP(")");
305 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
306 DUMP(".");
307 sm1_dump_swizzle(param->swizzle);
308 }
309 }
310
311 static void
312 sm1_dump_dst_param(const struct sm1_dst_param *param)
313 {
314 if (param->mod & NINED3DSPDM_SATURATE)
315 DUMP("sat ");
316 if (param->mod & NINED3DSPDM_PARTIALP)
317 DUMP("pp ");
318 if (param->mod & NINED3DSPDM_CENTROID)
319 DUMP("centroid ");
320 if (param->shift < 0)
321 DUMP("/%u ", 1 << -param->shift);
322 if (param->shift > 0)
323 DUMP("*%u ", 1 << param->shift);
324
325 if (param->rel) {
326 DUMP("%c[", sm1_file_char[param->file]);
327 sm1_dump_src_param(param->rel);
328 DUMP("+%i]", param->idx);
329 } else {
330 sm1_dump_reg(param->file, param->idx);
331 }
332 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
333 DUMP(".");
334 sm1_dump_writemask(param->mask);
335 }
336 }
337
338 struct sm1_semantic
339 {
340 struct sm1_dst_param reg;
341 BYTE sampler_type;
342 D3DDECLUSAGE usage;
343 BYTE usage_idx;
344 };
345
346 struct sm1_op_info
347 {
348 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
349 * should be ignored completely */
350 unsigned sio;
351 unsigned opcode; /* TGSI_OPCODE_x */
352
353 /* versions are still set even handler is set */
354 struct {
355 unsigned min;
356 unsigned max;
357 } vert_version, frag_version;
358
359 /* number of regs parsed outside of special handler */
360 unsigned ndst;
361 unsigned nsrc;
362
363 /* some instructions don't map perfectly, so use a special handler */
364 translate_instruction_func handler;
365 };
366
367 struct sm1_instruction
368 {
369 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
370 BYTE flags;
371 BOOL coissue;
372 BOOL predicated;
373 BYTE ndst;
374 BYTE nsrc;
375 struct sm1_src_param src[4];
376 struct sm1_src_param src_rel[4];
377 struct sm1_src_param pred;
378 struct sm1_src_param dst_rel[1];
379 struct sm1_dst_param dst[1];
380
381 struct sm1_op_info *info;
382 };
383
384 static void
385 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
386 {
387 unsigned i;
388
389 /* no info stored for these: */
390 if (insn->opcode == D3DSIO_DCL)
391 return;
392 for (i = 0; i < indent; ++i)
393 DUMP(" ");
394
395 if (insn->predicated) {
396 DUMP("@");
397 sm1_dump_src_param(&insn->pred);
398 DUMP(" ");
399 }
400 DUMP("%s", d3dsio_to_string(insn->opcode));
401 if (insn->flags) {
402 switch (insn->opcode) {
403 case D3DSIO_TEX:
404 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
405 break;
406 default:
407 DUMP("_%x", insn->flags);
408 break;
409 }
410 }
411 if (insn->coissue)
412 DUMP("_co");
413 DUMP(" ");
414
415 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
416 sm1_dump_dst_param(&insn->dst[i]);
417 DUMP(" ");
418 }
419
420 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
421 sm1_dump_src_param(&insn->src[i]);
422 DUMP(" ");
423 }
424 if (insn->opcode == D3DSIO_DEF ||
425 insn->opcode == D3DSIO_DEFI ||
426 insn->opcode == D3DSIO_DEFB)
427 sm1_dump_immediate(&insn->src[0]);
428
429 DUMP("\n");
430 }
431
432 struct sm1_local_const
433 {
434 INT idx;
435 struct ureg_src reg;
436 float f[4]; /* for indirect addressing of float constants */
437 };
438
439 struct shader_translator
440 {
441 const DWORD *byte_code;
442 const DWORD *parse;
443 const DWORD *parse_next;
444
445 struct ureg_program *ureg;
446
447 /* shader version */
448 struct {
449 BYTE major;
450 BYTE minor;
451 } version;
452 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
453 unsigned num_constf_allowed;
454 unsigned num_consti_allowed;
455 unsigned num_constb_allowed;
456
457 boolean native_integers;
458 boolean inline_subroutines;
459 boolean want_texcoord;
460 boolean shift_wpos;
461 boolean wpos_is_sysval;
462 boolean face_is_sysval_integer;
463 unsigned texcoord_sn;
464
465 struct sm1_instruction insn; /* current instruction */
466
467 struct {
468 struct ureg_dst *r;
469 struct ureg_dst oPos;
470 struct ureg_dst oPos_out; /* the real output when doing streamout */
471 struct ureg_dst oFog;
472 struct ureg_dst oPts;
473 struct ureg_dst oCol[4];
474 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
475 struct ureg_dst oDepth;
476 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
477 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
478 struct ureg_src vPos;
479 struct ureg_src vFace;
480 struct ureg_src s;
481 struct ureg_dst p;
482 struct ureg_dst address;
483 struct ureg_dst a0;
484 struct ureg_dst tS[8]; /* texture stage registers */
485 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
486 struct ureg_dst t[5]; /* scratch TEMPs */
487 struct ureg_src vC[2]; /* PS color in */
488 struct ureg_src vT[8]; /* PS texcoord in */
489 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
490 } regs;
491 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
492 unsigned num_scratch;
493 unsigned loop_depth;
494 unsigned loop_depth_max;
495 unsigned cond_depth;
496 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
497 unsigned cond_labels[NINE_MAX_COND_DEPTH];
498 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
499
500 unsigned *inst_labels; /* LABEL op */
501 unsigned num_inst_labels;
502
503 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504
505 struct sm1_local_const *lconstf;
506 unsigned num_lconstf;
507 struct sm1_local_const *lconsti;
508 unsigned num_lconsti;
509 struct sm1_local_const *lconstb;
510 unsigned num_lconstb;
511
512 boolean indirect_const_access;
513 boolean failure;
514
515 struct nine_vs_output_info output_info[16];
516 int num_outputs;
517
518 struct nine_shader_info *info;
519
520 int16_t op_info_map[D3DSIO_BREAKP + 1];
521 };
522
523 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
524 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
525
526 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
527
528 static void
529 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
530
531 static void
532 sm1_instruction_check(const struct sm1_instruction *insn)
533 {
534 if (insn->opcode == D3DSIO_CRS)
535 {
536 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
537 {
538 DBG("CRS.mask.w\n");
539 }
540 }
541 }
542
543 static void
544 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
545 int mask, int output_index)
546 {
547 tx->output_info[tx->num_outputs].output_semantic = Usage;
548 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
549 tx->output_info[tx->num_outputs].mask = mask;
550 tx->output_info[tx->num_outputs].output_index = output_index;
551 tx->num_outputs++;
552 }
553
554 static boolean
555 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
556 {
557 INT i;
558
559 if (index < 0 || index >= tx->num_constf_allowed) {
560 tx->failure = TRUE;
561 return FALSE;
562 }
563 for (i = 0; i < tx->num_lconstf; ++i) {
564 if (tx->lconstf[i].idx == index) {
565 *src = tx->lconstf[i].reg;
566 return TRUE;
567 }
568 }
569 return FALSE;
570 }
571 static boolean
572 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
573 {
574 int i;
575
576 if (index < 0 || index >= tx->num_consti_allowed) {
577 tx->failure = TRUE;
578 return FALSE;
579 }
580 for (i = 0; i < tx->num_lconsti; ++i) {
581 if (tx->lconsti[i].idx == index) {
582 *src = tx->lconsti[i].reg;
583 return TRUE;
584 }
585 }
586 return FALSE;
587 }
588 static boolean
589 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
590 {
591 int i;
592
593 if (index < 0 || index >= tx->num_constb_allowed) {
594 tx->failure = TRUE;
595 return FALSE;
596 }
597 for (i = 0; i < tx->num_lconstb; ++i) {
598 if (tx->lconstb[i].idx == index) {
599 *src = tx->lconstb[i].reg;
600 return TRUE;
601 }
602 }
603 return FALSE;
604 }
605
606 static void
607 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
608 {
609 unsigned n;
610
611 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
612
613 for (n = 0; n < tx->num_lconstf; ++n)
614 if (tx->lconstf[n].idx == index)
615 break;
616 if (n == tx->num_lconstf) {
617 if ((n % 8) == 0) {
618 tx->lconstf = REALLOC(tx->lconstf,
619 (n + 0) * sizeof(tx->lconstf[0]),
620 (n + 8) * sizeof(tx->lconstf[0]));
621 assert(tx->lconstf);
622 }
623 tx->num_lconstf++;
624 }
625 tx->lconstf[n].idx = index;
626 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
627
628 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
629 }
630 static void
631 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
632 {
633 unsigned n;
634
635 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
636
637 for (n = 0; n < tx->num_lconsti; ++n)
638 if (tx->lconsti[n].idx == index)
639 break;
640 if (n == tx->num_lconsti) {
641 if ((n % 8) == 0) {
642 tx->lconsti = REALLOC(tx->lconsti,
643 (n + 0) * sizeof(tx->lconsti[0]),
644 (n + 8) * sizeof(tx->lconsti[0]));
645 assert(tx->lconsti);
646 }
647 tx->num_lconsti++;
648 }
649
650 tx->lconsti[n].idx = index;
651 tx->lconsti[n].reg = tx->native_integers ?
652 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
653 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
654 }
655 static void
656 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
657 {
658 unsigned n;
659
660 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
661
662 for (n = 0; n < tx->num_lconstb; ++n)
663 if (tx->lconstb[n].idx == index)
664 break;
665 if (n == tx->num_lconstb) {
666 if ((n % 8) == 0) {
667 tx->lconstb = REALLOC(tx->lconstb,
668 (n + 0) * sizeof(tx->lconstb[0]),
669 (n + 8) * sizeof(tx->lconstb[0]));
670 assert(tx->lconstb);
671 }
672 tx->num_lconstb++;
673 }
674
675 tx->lconstb[n].idx = index;
676 tx->lconstb[n].reg = tx->native_integers ?
677 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
678 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
679 }
680
681 static inline struct ureg_dst
682 tx_scratch(struct shader_translator *tx)
683 {
684 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
685 tx->failure = TRUE;
686 return tx->regs.t[0];
687 }
688 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
689 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
690 return tx->regs.t[tx->num_scratch++];
691 }
692
693 static inline struct ureg_dst
694 tx_scratch_scalar(struct shader_translator *tx)
695 {
696 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
697 }
698
699 static inline struct ureg_src
700 tx_src_scalar(struct ureg_dst dst)
701 {
702 struct ureg_src src = ureg_src(dst);
703 int c = ffs(dst.WriteMask) - 1;
704 if (dst.WriteMask == (1 << c))
705 src = ureg_scalar(src, c);
706 return src;
707 }
708
709 static inline void
710 tx_temp_alloc(struct shader_translator *tx, INT idx)
711 {
712 assert(idx >= 0);
713 if (idx >= tx->num_temp) {
714 unsigned k = tx->num_temp;
715 unsigned n = idx + 1;
716 tx->regs.r = REALLOC(tx->regs.r,
717 k * sizeof(tx->regs.r[0]),
718 n * sizeof(tx->regs.r[0]));
719 for (; k < n; ++k)
720 tx->regs.r[k] = ureg_dst_undef();
721 tx->num_temp = n;
722 }
723 if (ureg_dst_is_undef(tx->regs.r[idx]))
724 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
725 }
726
727 static inline void
728 tx_addr_alloc(struct shader_translator *tx, INT idx)
729 {
730 assert(idx == 0);
731 if (ureg_dst_is_undef(tx->regs.address))
732 tx->regs.address = ureg_DECL_address(tx->ureg);
733 if (ureg_dst_is_undef(tx->regs.a0))
734 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
735 }
736
737 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
738 * the projection should be applied on the texture. It doesn't
739 * apply on texkill.
740 * The doc is very imprecise here (it says the projection is done
741 * before rasterization, thus in vs, which seems wrong since ps instructions
742 * are affected differently)
743 * For now we only apply to the ps TEX instruction and TEXBEM.
744 * Perhaps some other instructions would need it */
745 static inline void
746 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
747 struct ureg_src src, INT idx)
748 {
749 struct ureg_dst tmp;
750 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
751
752 /* no projection */
753 if (dim == 1) {
754 ureg_MOV(tx->ureg, dst, src);
755 } else {
756 tmp = tx_scratch_scalar(tx);
757 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
758 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
759 }
760 }
761
762 static inline void
763 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
764 unsigned target, struct ureg_src src0,
765 struct ureg_src src1, INT idx)
766 {
767 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
768 struct ureg_dst tmp;
769
770 /* dim == 1: no projection
771 * Looks like must be disabled when it makes no
772 * sense according the texture dimensions
773 */
774 if (dim == 1 || dim <= target) {
775 ureg_TEX(tx->ureg, dst, target, src0, src1);
776 } else if (dim == 4) {
777 ureg_TXP(tx->ureg, dst, target, src0, src1);
778 } else {
779 tmp = tx_scratch(tx);
780 apply_ps1x_projection(tx, tmp, src0, idx);
781 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
782 }
783 }
784
785 static inline void
786 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
787 {
788 assert(IS_PS);
789 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
790 if (ureg_src_is_undef(tx->regs.vT[idx]))
791 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
792 TGSI_INTERPOLATE_PERSPECTIVE);
793 }
794
795 static inline unsigned *
796 tx_bgnloop(struct shader_translator *tx)
797 {
798 tx->loop_depth++;
799 if (tx->loop_depth_max < tx->loop_depth)
800 tx->loop_depth_max = tx->loop_depth;
801 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
802 return &tx->loop_labels[tx->loop_depth - 1];
803 }
804
805 static inline unsigned *
806 tx_endloop(struct shader_translator *tx)
807 {
808 assert(tx->loop_depth);
809 tx->loop_depth--;
810 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
811 ureg_get_instruction_number(tx->ureg));
812 return &tx->loop_labels[tx->loop_depth];
813 }
814
815 static struct ureg_dst
816 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
817 {
818 const unsigned l = tx->loop_depth - 1;
819
820 if (!tx->loop_depth)
821 {
822 DBG("loop counter requested outside of loop\n");
823 return ureg_dst_undef();
824 }
825
826 if (ureg_dst_is_undef(tx->regs.rL[l])) {
827 /* loop or rep ctr creation */
828 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
829 tx->loop_or_rep[l] = loop_or_rep;
830 }
831 /* loop - rep - endloop - endrep not allowed */
832 assert(tx->loop_or_rep[l] == loop_or_rep);
833
834 return tx->regs.rL[l];
835 }
836
837 static struct ureg_src
838 tx_get_loopal(struct shader_translator *tx)
839 {
840 int loop_level = tx->loop_depth - 1;
841
842 while (loop_level >= 0) {
843 /* handle loop - rep - endrep - endloop case */
844 if (tx->loop_or_rep[loop_level])
845 /* the value is in the loop counter y component (nine implementation) */
846 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
847 loop_level--;
848 }
849
850 DBG("aL counter requested outside of loop\n");
851 return ureg_src_undef();
852 }
853
854 static inline unsigned *
855 tx_cond(struct shader_translator *tx)
856 {
857 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
858 tx->cond_depth++;
859 return &tx->cond_labels[tx->cond_depth - 1];
860 }
861
862 static inline unsigned *
863 tx_elsecond(struct shader_translator *tx)
864 {
865 assert(tx->cond_depth);
866 return &tx->cond_labels[tx->cond_depth - 1];
867 }
868
869 static inline void
870 tx_endcond(struct shader_translator *tx)
871 {
872 assert(tx->cond_depth);
873 tx->cond_depth--;
874 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
875 ureg_get_instruction_number(tx->ureg));
876 }
877
878 static inline struct ureg_dst
879 nine_ureg_dst_register(unsigned file, int index)
880 {
881 return ureg_dst(ureg_src_register(file, index));
882 }
883
884 static inline struct ureg_src
885 nine_get_position_input(struct shader_translator *tx)
886 {
887 struct ureg_program *ureg = tx->ureg;
888
889 if (tx->wpos_is_sysval)
890 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
891 else
892 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
893 0, TGSI_INTERPOLATE_LINEAR);
894 }
895
896 static struct ureg_src
897 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
898 {
899 struct ureg_program *ureg = tx->ureg;
900 struct ureg_src src;
901 struct ureg_dst tmp;
902
903 switch (param->file)
904 {
905 case D3DSPR_TEMP:
906 assert(!param->rel);
907 tx_temp_alloc(tx, param->idx);
908 src = ureg_src(tx->regs.r[param->idx]);
909 break;
910 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
911 case D3DSPR_ADDR:
912 assert(!param->rel);
913 if (IS_VS) {
914 assert(param->idx == 0);
915 /* the address register (vs only) must be
916 * assigned before use */
917 assert(!ureg_dst_is_undef(tx->regs.a0));
918 /* Round to lowest for vs1.1 (contrary to the doc), else
919 * round to nearest */
920 if (tx->version.major < 2 && tx->version.minor < 2)
921 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
922 else
923 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
924 src = ureg_src(tx->regs.address);
925 } else {
926 if (tx->version.major < 2 && tx->version.minor < 4) {
927 /* no subroutines, so should be defined */
928 src = ureg_src(tx->regs.tS[param->idx]);
929 } else {
930 tx_texcoord_alloc(tx, param->idx);
931 src = tx->regs.vT[param->idx];
932 }
933 }
934 break;
935 case D3DSPR_INPUT:
936 if (IS_VS) {
937 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
938 } else {
939 if (tx->version.major < 3) {
940 assert(!param->rel);
941 src = ureg_DECL_fs_input_cyl_centroid(
942 ureg, TGSI_SEMANTIC_COLOR, param->idx,
943 TGSI_INTERPOLATE_COLOR, 0,
944 tx->info->force_color_in_centroid ?
945 TGSI_INTERPOLATE_LOC_CENTROID : 0,
946 0, 1);
947 } else {
948 if(param->rel) {
949 /* Copy all inputs (non consecutive)
950 * to temp array (consecutive).
951 * This is not good for performance.
952 * A better way would be to have inputs
953 * consecutive (would need implement alternative
954 * way to match vs outputs and ps inputs).
955 * However even with the better way, the temp array
956 * copy would need to be used if some inputs
957 * are not GENERIC or if they have different
958 * interpolation flag. */
959 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
960 int i;
961 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
962 for (i = 0; i < 10; i++) {
963 if (!ureg_src_is_undef(tx->regs.v[i]))
964 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
965 else
966 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
967 }
968 }
969 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
970 } else {
971 assert(param->idx < ARRAY_SIZE(tx->regs.v));
972 src = tx->regs.v[param->idx];
973 }
974 }
975 }
976 break;
977 case D3DSPR_PREDICATE:
978 assert(!"D3DSPR_PREDICATE");
979 break;
980 case D3DSPR_SAMPLER:
981 assert(param->mod == NINED3DSPSM_NONE);
982 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
983 assert(!param->rel);
984 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
985 break;
986 case D3DSPR_CONST:
987 assert(!param->rel || IS_VS);
988 if (param->rel)
989 tx->indirect_const_access = TRUE;
990 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
991 if (!param->rel)
992 nine_info_mark_const_f_used(tx->info, param->idx);
993 /* vswp constant handling: we use two buffers
994 * to fit all the float constants. The special handling
995 * doesn't need to be elsewhere, because all the instructions
996 * accessing the constants directly are VS1, and swvp
997 * is VS >= 2 */
998 if (IS_VS && tx->info->swvp_on) {
999 if (!param->rel) {
1000 if (param->idx < 4096) {
1001 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1002 src = ureg_src_dimension(src, 0);
1003 } else {
1004 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
1005 src = ureg_src_dimension(src, 1);
1006 }
1007 } else {
1008 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
1009 src = ureg_src_dimension(src, 0);
1010 }
1011 } else
1012 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1013 }
1014 if (!IS_VS && tx->version.major < 2) {
1015 /* ps 1.X clamps constants */
1016 tmp = tx_scratch(tx);
1017 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1018 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1019 src = ureg_src(tmp);
1020 }
1021 break;
1022 case D3DSPR_CONST2:
1023 case D3DSPR_CONST3:
1024 case D3DSPR_CONST4:
1025 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1026 assert(!"CONST2/3/4");
1027 src = ureg_imm1f(ureg, 0.0f);
1028 break;
1029 case D3DSPR_CONSTINT:
1030 /* relative adressing only possible for float constants in vs */
1031 assert(!param->rel);
1032 if (!tx_lconsti(tx, &src, param->idx)) {
1033 nine_info_mark_const_i_used(tx->info, param->idx);
1034 if (IS_VS && tx->info->swvp_on) {
1035 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1036 src = ureg_src_dimension(src, 2);
1037 } else
1038 src = ureg_src_register(TGSI_FILE_CONSTANT,
1039 tx->info->const_i_base + param->idx);
1040 }
1041 break;
1042 case D3DSPR_CONSTBOOL:
1043 assert(!param->rel);
1044 if (!tx_lconstb(tx, &src, param->idx)) {
1045 char r = param->idx / 4;
1046 char s = param->idx & 3;
1047 nine_info_mark_const_b_used(tx->info, param->idx);
1048 if (IS_VS && tx->info->swvp_on) {
1049 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
1050 src = ureg_src_dimension(src, 3);
1051 } else
1052 src = ureg_src_register(TGSI_FILE_CONSTANT,
1053 tx->info->const_b_base + r);
1054 src = ureg_swizzle(src, s, s, s, s);
1055 }
1056 break;
1057 case D3DSPR_LOOP:
1058 if (ureg_dst_is_undef(tx->regs.address))
1059 tx->regs.address = ureg_DECL_address(ureg);
1060 if (!tx->native_integers)
1061 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1062 else
1063 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1064 src = ureg_src(tx->regs.address);
1065 break;
1066 case D3DSPR_MISCTYPE:
1067 switch (param->idx) {
1068 case D3DSMO_POSITION:
1069 if (ureg_src_is_undef(tx->regs.vPos))
1070 tx->regs.vPos = nine_get_position_input(tx);
1071 if (tx->shift_wpos) {
1072 /* TODO: do this only once */
1073 struct ureg_dst wpos = tx_scratch(tx);
1074 ureg_ADD(ureg, wpos, tx->regs.vPos,
1075 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1076 src = ureg_src(wpos);
1077 } else {
1078 src = tx->regs.vPos;
1079 }
1080 break;
1081 case D3DSMO_FACE:
1082 if (ureg_src_is_undef(tx->regs.vFace)) {
1083 if (tx->face_is_sysval_integer) {
1084 tmp = tx_scratch(tx);
1085 tx->regs.vFace =
1086 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1087
1088 /* convert bool to float */
1089 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1090 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1091 tx->regs.vFace = ureg_src(tmp);
1092 } else {
1093 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1094 TGSI_SEMANTIC_FACE, 0,
1095 TGSI_INTERPOLATE_CONSTANT);
1096 }
1097 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1098 }
1099 src = tx->regs.vFace;
1100 break;
1101 default:
1102 assert(!"invalid src D3DSMO");
1103 break;
1104 }
1105 assert(!param->rel);
1106 break;
1107 case D3DSPR_TEMPFLOAT16:
1108 break;
1109 default:
1110 assert(!"invalid src D3DSPR");
1111 }
1112 if (param->rel)
1113 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1114
1115 switch (param->mod) {
1116 case NINED3DSPSM_DW:
1117 tmp = tx_scratch(tx);
1118 /* NOTE: app is not allowed to read w with this modifier */
1119 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1120 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1121 src = ureg_src(tmp);
1122 break;
1123 case NINED3DSPSM_DZ:
1124 tmp = tx_scratch(tx);
1125 /* NOTE: app is not allowed to read z with this modifier */
1126 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1127 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1128 src = ureg_src(tmp);
1129 break;
1130 default:
1131 break;
1132 }
1133
1134 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1135 src = ureg_swizzle(src,
1136 (param->swizzle >> 0) & 0x3,
1137 (param->swizzle >> 2) & 0x3,
1138 (param->swizzle >> 4) & 0x3,
1139 (param->swizzle >> 6) & 0x3);
1140
1141 switch (param->mod) {
1142 case NINED3DSPSM_ABS:
1143 src = ureg_abs(src);
1144 break;
1145 case NINED3DSPSM_ABSNEG:
1146 src = ureg_negate(ureg_abs(src));
1147 break;
1148 case NINED3DSPSM_NEG:
1149 src = ureg_negate(src);
1150 break;
1151 case NINED3DSPSM_BIAS:
1152 tmp = tx_scratch(tx);
1153 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1154 src = ureg_src(tmp);
1155 break;
1156 case NINED3DSPSM_BIASNEG:
1157 tmp = tx_scratch(tx);
1158 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1159 src = ureg_src(tmp);
1160 break;
1161 case NINED3DSPSM_NOT:
1162 if (tx->native_integers) {
1163 tmp = tx_scratch(tx);
1164 ureg_NOT(ureg, tmp, src);
1165 src = ureg_src(tmp);
1166 break;
1167 }
1168 /* fall through */
1169 case NINED3DSPSM_COMP:
1170 tmp = tx_scratch(tx);
1171 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1172 src = ureg_src(tmp);
1173 break;
1174 case NINED3DSPSM_DZ:
1175 case NINED3DSPSM_DW:
1176 /* Already handled*/
1177 break;
1178 case NINED3DSPSM_SIGN:
1179 tmp = tx_scratch(tx);
1180 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1181 src = ureg_src(tmp);
1182 break;
1183 case NINED3DSPSM_SIGNNEG:
1184 tmp = tx_scratch(tx);
1185 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1186 src = ureg_src(tmp);
1187 break;
1188 case NINED3DSPSM_X2:
1189 tmp = tx_scratch(tx);
1190 ureg_ADD(ureg, tmp, src, src);
1191 src = ureg_src(tmp);
1192 break;
1193 case NINED3DSPSM_X2NEG:
1194 tmp = tx_scratch(tx);
1195 ureg_ADD(ureg, tmp, src, src);
1196 src = ureg_negate(ureg_src(tmp));
1197 break;
1198 default:
1199 assert(param->mod == NINED3DSPSM_NONE);
1200 break;
1201 }
1202
1203 return src;
1204 }
1205
1206 static struct ureg_dst
1207 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1208 {
1209 struct ureg_dst dst;
1210
1211 switch (param->file)
1212 {
1213 case D3DSPR_TEMP:
1214 assert(!param->rel);
1215 tx_temp_alloc(tx, param->idx);
1216 dst = tx->regs.r[param->idx];
1217 break;
1218 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1219 case D3DSPR_ADDR:
1220 assert(!param->rel);
1221 if (tx->version.major < 2 && !IS_VS) {
1222 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1223 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1224 dst = tx->regs.tS[param->idx];
1225 } else
1226 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1227 tx_texcoord_alloc(tx, param->idx);
1228 dst = ureg_dst(tx->regs.vT[param->idx]);
1229 } else {
1230 tx_addr_alloc(tx, param->idx);
1231 dst = tx->regs.a0;
1232 }
1233 break;
1234 case D3DSPR_RASTOUT:
1235 assert(!param->rel);
1236 switch (param->idx) {
1237 case 0:
1238 if (ureg_dst_is_undef(tx->regs.oPos))
1239 tx->regs.oPos =
1240 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1241 dst = tx->regs.oPos;
1242 break;
1243 case 1:
1244 if (ureg_dst_is_undef(tx->regs.oFog))
1245 tx->regs.oFog =
1246 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1247 dst = tx->regs.oFog;
1248 break;
1249 case 2:
1250 if (ureg_dst_is_undef(tx->regs.oPts))
1251 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1252 dst = tx->regs.oPts;
1253 break;
1254 default:
1255 assert(0);
1256 break;
1257 }
1258 break;
1259 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1260 case D3DSPR_OUTPUT:
1261 if (tx->version.major < 3) {
1262 assert(!param->rel);
1263 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1264 } else {
1265 assert(!param->rel); /* TODO */
1266 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1267 dst = tx->regs.o[param->idx];
1268 }
1269 break;
1270 case D3DSPR_ATTROUT: /* VS */
1271 case D3DSPR_COLOROUT: /* PS */
1272 assert(param->idx >= 0 && param->idx < 4);
1273 assert(!param->rel);
1274 tx->info->rt_mask |= 1 << param->idx;
1275 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1276 /* ps < 3: oCol[0] will have fog blending afterward */
1277 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1278 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1279 } else {
1280 tx->regs.oCol[param->idx] =
1281 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1282 }
1283 }
1284 dst = tx->regs.oCol[param->idx];
1285 if (IS_VS && tx->version.major < 3)
1286 dst = ureg_saturate(dst);
1287 break;
1288 case D3DSPR_DEPTHOUT:
1289 assert(!param->rel);
1290 if (ureg_dst_is_undef(tx->regs.oDepth))
1291 tx->regs.oDepth =
1292 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1293 TGSI_WRITEMASK_Z, 0, 1);
1294 dst = tx->regs.oDepth; /* XXX: must write .z component */
1295 break;
1296 case D3DSPR_PREDICATE:
1297 assert(!"D3DSPR_PREDICATE");
1298 break;
1299 case D3DSPR_TEMPFLOAT16:
1300 DBG("unhandled D3DSPR: %u\n", param->file);
1301 break;
1302 default:
1303 assert(!"invalid dst D3DSPR");
1304 break;
1305 }
1306 if (param->rel)
1307 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1308
1309 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1310 dst = ureg_writemask(dst, param->mask);
1311 if (param->mod & NINED3DSPDM_SATURATE)
1312 dst = ureg_saturate(dst);
1313
1314 return dst;
1315 }
1316
1317 static struct ureg_dst
1318 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1319 {
1320 if (param->shift) {
1321 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1322 return tx->regs.tdst;
1323 }
1324 return _tx_dst_param(tx, param);
1325 }
1326
1327 static void
1328 tx_apply_dst0_modifiers(struct shader_translator *tx)
1329 {
1330 struct ureg_dst rdst;
1331 float f;
1332
1333 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1334 return;
1335 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1336
1337 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1338
1339 if (tx->insn.dst[0].shift < 0)
1340 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1341 else
1342 f = 1 << tx->insn.dst[0].shift;
1343
1344 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1345 }
1346
1347 static struct ureg_src
1348 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1349 {
1350 struct ureg_src src;
1351
1352 assert(!param->shift);
1353 assert(!(param->mod & NINED3DSPDM_SATURATE));
1354
1355 switch (param->file) {
1356 case D3DSPR_INPUT:
1357 if (IS_VS) {
1358 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1359 } else {
1360 assert(!param->rel);
1361 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1362 src = tx->regs.v[param->idx];
1363 }
1364 break;
1365 default:
1366 src = ureg_src(tx_dst_param(tx, param));
1367 break;
1368 }
1369 if (param->rel)
1370 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1371
1372 if (!param->mask)
1373 WARN("mask is 0, using identity swizzle\n");
1374
1375 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1376 char s[4];
1377 int n;
1378 int c;
1379 for (n = 0, c = 0; c < 4; ++c)
1380 if (param->mask & (1 << c))
1381 s[n++] = c;
1382 assert(n);
1383 for (c = n; c < 4; ++c)
1384 s[c] = s[n - 1];
1385 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1386 }
1387 return src;
1388 }
1389
1390 static HRESULT
1391 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1392 {
1393 struct ureg_program *ureg = tx->ureg;
1394 struct ureg_dst dst;
1395 struct ureg_src src[2];
1396 struct sm1_src_param *src_mat = &tx->insn.src[1];
1397 unsigned i;
1398
1399 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1400 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1401
1402 for (i = 0; i < n; i++)
1403 {
1404 const unsigned m = (1 << i);
1405
1406 src[1] = tx_src_param(tx, src_mat);
1407 src_mat->idx++;
1408
1409 if (!(dst.WriteMask & m))
1410 continue;
1411
1412 /* XXX: src == dst case ? */
1413
1414 switch (k) {
1415 case 3:
1416 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1417 break;
1418 case 4:
1419 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1420 break;
1421 default:
1422 DBG("invalid operation: M%ux%u\n", m, n);
1423 break;
1424 }
1425 }
1426
1427 return D3D_OK;
1428 }
1429
1430 #define VNOTSUPPORTED 0, 0
1431 #define V(maj, min) (((maj) << 8) | (min))
1432
1433 static inline const char *
1434 d3dsio_to_string( unsigned opcode )
1435 {
1436 static const char *names[] = {
1437 "NOP",
1438 "MOV",
1439 "ADD",
1440 "SUB",
1441 "MAD",
1442 "MUL",
1443 "RCP",
1444 "RSQ",
1445 "DP3",
1446 "DP4",
1447 "MIN",
1448 "MAX",
1449 "SLT",
1450 "SGE",
1451 "EXP",
1452 "LOG",
1453 "LIT",
1454 "DST",
1455 "LRP",
1456 "FRC",
1457 "M4x4",
1458 "M4x3",
1459 "M3x4",
1460 "M3x3",
1461 "M3x2",
1462 "CALL",
1463 "CALLNZ",
1464 "LOOP",
1465 "RET",
1466 "ENDLOOP",
1467 "LABEL",
1468 "DCL",
1469 "POW",
1470 "CRS",
1471 "SGN",
1472 "ABS",
1473 "NRM",
1474 "SINCOS",
1475 "REP",
1476 "ENDREP",
1477 "IF",
1478 "IFC",
1479 "ELSE",
1480 "ENDIF",
1481 "BREAK",
1482 "BREAKC",
1483 "MOVA",
1484 "DEFB",
1485 "DEFI",
1486 NULL,
1487 NULL,
1488 NULL,
1489 NULL,
1490 NULL,
1491 NULL,
1492 NULL,
1493 NULL,
1494 NULL,
1495 NULL,
1496 NULL,
1497 NULL,
1498 NULL,
1499 NULL,
1500 NULL,
1501 "TEXCOORD",
1502 "TEXKILL",
1503 "TEX",
1504 "TEXBEM",
1505 "TEXBEML",
1506 "TEXREG2AR",
1507 "TEXREG2GB",
1508 "TEXM3x2PAD",
1509 "TEXM3x2TEX",
1510 "TEXM3x3PAD",
1511 "TEXM3x3TEX",
1512 NULL,
1513 "TEXM3x3SPEC",
1514 "TEXM3x3VSPEC",
1515 "EXPP",
1516 "LOGP",
1517 "CND",
1518 "DEF",
1519 "TEXREG2RGB",
1520 "TEXDP3TEX",
1521 "TEXM3x2DEPTH",
1522 "TEXDP3",
1523 "TEXM3x3",
1524 "TEXDEPTH",
1525 "CMP",
1526 "BEM",
1527 "DP2ADD",
1528 "DSX",
1529 "DSY",
1530 "TEXLDD",
1531 "SETP",
1532 "TEXLDL",
1533 "BREAKP"
1534 };
1535
1536 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1537
1538 switch (opcode) {
1539 case D3DSIO_PHASE: return "PHASE";
1540 case D3DSIO_COMMENT: return "COMMENT";
1541 case D3DSIO_END: return "END";
1542 default:
1543 return NULL;
1544 }
1545 }
1546
1547 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1548 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1549 (inst).vert_version.max | \
1550 (inst).frag_version.min | \
1551 (inst).frag_version.max)
1552
1553 #define SPECIAL(name) \
1554 NineTranslateInstruction_##name
1555
1556 #define DECL_SPECIAL(name) \
1557 static HRESULT \
1558 NineTranslateInstruction_##name( struct shader_translator *tx )
1559
1560 static HRESULT
1561 NineTranslateInstruction_Generic(struct shader_translator *);
1562
1563 DECL_SPECIAL(NOP)
1564 {
1565 /* Nothing to do. NOP was used to avoid hangs
1566 * with very old d3d drivers. */
1567 return D3D_OK;
1568 }
1569
1570 DECL_SPECIAL(SUB)
1571 {
1572 struct ureg_program *ureg = tx->ureg;
1573 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1574 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1575 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1576
1577 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1578 return D3D_OK;
1579 }
1580
1581 DECL_SPECIAL(ABS)
1582 {
1583 struct ureg_program *ureg = tx->ureg;
1584 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1585 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1586
1587 ureg_MOV(ureg, dst, ureg_abs(src));
1588 return D3D_OK;
1589 }
1590
1591 DECL_SPECIAL(M4x4)
1592 {
1593 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1594 }
1595
1596 DECL_SPECIAL(M4x3)
1597 {
1598 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1599 }
1600
1601 DECL_SPECIAL(M3x4)
1602 {
1603 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1604 }
1605
1606 DECL_SPECIAL(M3x3)
1607 {
1608 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1609 }
1610
1611 DECL_SPECIAL(M3x2)
1612 {
1613 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1614 }
1615
1616 DECL_SPECIAL(CMP)
1617 {
1618 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1619 tx_src_param(tx, &tx->insn.src[0]),
1620 tx_src_param(tx, &tx->insn.src[2]),
1621 tx_src_param(tx, &tx->insn.src[1]));
1622 return D3D_OK;
1623 }
1624
1625 DECL_SPECIAL(CND)
1626 {
1627 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1628 struct ureg_dst cgt;
1629 struct ureg_src cnd;
1630
1631 /* the coissue flag was a tip for compilers to advise to
1632 * execute two operations at the same time, in cases
1633 * the two executions had same dst with different channels.
1634 * It has no effect on current hw. However it seems CND
1635 * is affected. The handling of this very specific case
1636 * handled below mimick wine behaviour */
1637 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1638 ureg_MOV(tx->ureg,
1639 dst, tx_src_param(tx, &tx->insn.src[1]));
1640 return D3D_OK;
1641 }
1642
1643 cnd = tx_src_param(tx, &tx->insn.src[0]);
1644 cgt = tx_scratch(tx);
1645
1646 if (tx->version.major == 1 && tx->version.minor < 4)
1647 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1648
1649 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1650
1651 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1652 tx_src_param(tx, &tx->insn.src[1]),
1653 tx_src_param(tx, &tx->insn.src[2]));
1654 return D3D_OK;
1655 }
1656
1657 DECL_SPECIAL(CALL)
1658 {
1659 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1660 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1661 return D3D_OK;
1662 }
1663
1664 DECL_SPECIAL(CALLNZ)
1665 {
1666 struct ureg_program *ureg = tx->ureg;
1667 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1668
1669 if (!tx->native_integers)
1670 ureg_IF(ureg, src, tx_cond(tx));
1671 else
1672 ureg_UIF(ureg, src, tx_cond(tx));
1673 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1674 tx_endcond(tx);
1675 ureg_ENDIF(ureg);
1676 return D3D_OK;
1677 }
1678
1679 DECL_SPECIAL(LOOP)
1680 {
1681 struct ureg_program *ureg = tx->ureg;
1682 unsigned *label;
1683 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1684 struct ureg_dst ctr;
1685 struct ureg_dst tmp;
1686 struct ureg_src ctrx;
1687
1688 label = tx_bgnloop(tx);
1689 ctr = tx_get_loopctr(tx, TRUE);
1690 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1691
1692 /* src: num_iterations - start_value of al - step for al - 0 */
1693 ureg_MOV(ureg, ctr, src);
1694 ureg_BGNLOOP(tx->ureg, label);
1695 tmp = tx_scratch_scalar(tx);
1696 /* Initially ctr.x contains the number of iterations.
1697 * ctr.y will contain the updated value of al.
1698 * We decrease ctr.x at the end of every iteration,
1699 * and stop when it reaches 0. */
1700
1701 if (!tx->native_integers) {
1702 /* case src and ctr contain floats */
1703 /* to avoid precision issue, we stop when ctr <= 0.5 */
1704 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1705 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1706 } else {
1707 /* case src and ctr contain integers */
1708 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1709 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1710 }
1711 ureg_BRK(ureg);
1712 tx_endcond(tx);
1713 ureg_ENDIF(ureg);
1714 return D3D_OK;
1715 }
1716
1717 DECL_SPECIAL(RET)
1718 {
1719 ureg_RET(tx->ureg);
1720 return D3D_OK;
1721 }
1722
1723 DECL_SPECIAL(ENDLOOP)
1724 {
1725 struct ureg_program *ureg = tx->ureg;
1726 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1727 struct ureg_dst dst_ctrx, dst_al;
1728 struct ureg_src src_ctr, al_counter;
1729
1730 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1731 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1732 src_ctr = ureg_src(ctr);
1733 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1734
1735 /* ctr.x -= 1
1736 * ctr.y (aL) += step */
1737 if (!tx->native_integers) {
1738 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1739 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1740 } else {
1741 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1742 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1743 }
1744 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1745 return D3D_OK;
1746 }
1747
1748 DECL_SPECIAL(LABEL)
1749 {
1750 unsigned k = tx->num_inst_labels;
1751 unsigned n = tx->insn.src[0].idx;
1752 assert(n < 2048);
1753 if (n >= k)
1754 tx->inst_labels = REALLOC(tx->inst_labels,
1755 k * sizeof(tx->inst_labels[0]),
1756 n * sizeof(tx->inst_labels[0]));
1757
1758 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1759 return D3D_OK;
1760 }
1761
1762 DECL_SPECIAL(SINCOS)
1763 {
1764 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1765 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1766
1767 assert(!(dst.WriteMask & 0xc));
1768
1769 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1770 ureg_SCS(tx->ureg, dst, src);
1771 return D3D_OK;
1772 }
1773
1774 DECL_SPECIAL(SGN)
1775 {
1776 ureg_SSG(tx->ureg,
1777 tx_dst_param(tx, &tx->insn.dst[0]),
1778 tx_src_param(tx, &tx->insn.src[0]));
1779 return D3D_OK;
1780 }
1781
1782 DECL_SPECIAL(REP)
1783 {
1784 struct ureg_program *ureg = tx->ureg;
1785 unsigned *label;
1786 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1787 struct ureg_dst ctr;
1788 struct ureg_dst tmp;
1789 struct ureg_src ctrx;
1790
1791 label = tx_bgnloop(tx);
1792 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1793 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1794
1795 /* NOTE: rep must be constant, so we don't have to save the count */
1796 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1797
1798 /* rep: num_iterations - 0 - 0 - 0 */
1799 ureg_MOV(ureg, ctr, rep);
1800 ureg_BGNLOOP(ureg, label);
1801 tmp = tx_scratch_scalar(tx);
1802 /* Initially ctr.x contains the number of iterations.
1803 * We decrease ctr.x at the end of every iteration,
1804 * and stop when it reaches 0. */
1805
1806 if (!tx->native_integers) {
1807 /* case src and ctr contain floats */
1808 /* to avoid precision issue, we stop when ctr <= 0.5 */
1809 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1810 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1811 } else {
1812 /* case src and ctr contain integers */
1813 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1814 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1815 }
1816 ureg_BRK(ureg);
1817 tx_endcond(tx);
1818 ureg_ENDIF(ureg);
1819
1820 return D3D_OK;
1821 }
1822
1823 DECL_SPECIAL(ENDREP)
1824 {
1825 struct ureg_program *ureg = tx->ureg;
1826 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1827 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1828 struct ureg_src src_ctr = ureg_src(ctr);
1829
1830 /* ctr.x -= 1 */
1831 if (!tx->native_integers)
1832 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1833 else
1834 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1835
1836 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1837 return D3D_OK;
1838 }
1839
1840 DECL_SPECIAL(ENDIF)
1841 {
1842 tx_endcond(tx);
1843 ureg_ENDIF(tx->ureg);
1844 return D3D_OK;
1845 }
1846
1847 DECL_SPECIAL(IF)
1848 {
1849 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1850
1851 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1852 ureg_UIF(tx->ureg, src, tx_cond(tx));
1853 else
1854 ureg_IF(tx->ureg, src, tx_cond(tx));
1855
1856 return D3D_OK;
1857 }
1858
1859 static inline unsigned
1860 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1861 {
1862 switch (flags) {
1863 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1864 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1865 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1866 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1867 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1868 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1869 default:
1870 assert(!"invalid comparison flags");
1871 return TGSI_OPCODE_SGT;
1872 }
1873 }
1874
1875 DECL_SPECIAL(IFC)
1876 {
1877 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1878 struct ureg_src src[2];
1879 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1880 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1881 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1882 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1883 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1884 return D3D_OK;
1885 }
1886
1887 DECL_SPECIAL(ELSE)
1888 {
1889 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1890 return D3D_OK;
1891 }
1892
1893 DECL_SPECIAL(BREAKC)
1894 {
1895 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1896 struct ureg_src src[2];
1897 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1898 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1899 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1900 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1901 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1902 ureg_BRK(tx->ureg);
1903 tx_endcond(tx);
1904 ureg_ENDIF(tx->ureg);
1905 return D3D_OK;
1906 }
1907
1908 static const char *sm1_declusage_names[] =
1909 {
1910 [D3DDECLUSAGE_POSITION] = "POSITION",
1911 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1912 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1913 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1914 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1915 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1916 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1917 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1918 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1919 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1920 [D3DDECLUSAGE_COLOR] = "COLOR",
1921 [D3DDECLUSAGE_FOG] = "FOG",
1922 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1923 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1924 };
1925
1926 static inline unsigned
1927 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1928 {
1929 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1930 }
1931
1932 static void
1933 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1934 boolean tc,
1935 struct sm1_semantic *dcl)
1936 {
1937 BYTE index = dcl->usage_idx;
1938
1939 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1940 * we match to a TGSI_SEMANTIC_GENERIC with index.
1941 *
1942 * The index can be anything UINT16 and usage_idx is BYTE,
1943 * so we can fit everything. It doesn't matter if indices
1944 * are close together or low.
1945 *
1946 *
1947 * POSITION >= 1: 10 * index + 6
1948 * COLOR >= 2: 10 * (index-1) + 7
1949 * TEXCOORD[0..15]: index
1950 * BLENDWEIGHT: 10 * index + 18
1951 * BLENDINDICES: 10 * index + 19
1952 * NORMAL: 10 * index + 20
1953 * TANGENT: 10 * index + 21
1954 * BINORMAL: 10 * index + 22
1955 * TESSFACTOR: 10 * index + 23
1956 */
1957
1958 switch (dcl->usage) {
1959 case D3DDECLUSAGE_POSITION:
1960 case D3DDECLUSAGE_POSITIONT:
1961 case D3DDECLUSAGE_DEPTH:
1962 if (index == 0) {
1963 sem->Name = TGSI_SEMANTIC_POSITION;
1964 sem->Index = 0;
1965 } else {
1966 sem->Name = TGSI_SEMANTIC_GENERIC;
1967 sem->Index = 10 * index + 6;
1968 }
1969 break;
1970 case D3DDECLUSAGE_COLOR:
1971 if (index < 2) {
1972 sem->Name = TGSI_SEMANTIC_COLOR;
1973 sem->Index = index;
1974 } else {
1975 sem->Name = TGSI_SEMANTIC_GENERIC;
1976 sem->Index = 10 * (index-1) + 7;
1977 }
1978 break;
1979 case D3DDECLUSAGE_FOG:
1980 assert(index == 0);
1981 sem->Name = TGSI_SEMANTIC_FOG;
1982 sem->Index = 0;
1983 break;
1984 case D3DDECLUSAGE_PSIZE:
1985 assert(index == 0);
1986 sem->Name = TGSI_SEMANTIC_PSIZE;
1987 sem->Index = 0;
1988 break;
1989 case D3DDECLUSAGE_TEXCOORD:
1990 assert(index < 16);
1991 if (index < 8 && tc)
1992 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1993 else
1994 sem->Name = TGSI_SEMANTIC_GENERIC;
1995 sem->Index = index;
1996 break;
1997 case D3DDECLUSAGE_BLENDWEIGHT:
1998 sem->Name = TGSI_SEMANTIC_GENERIC;
1999 sem->Index = 10 * index + 18;
2000 break;
2001 case D3DDECLUSAGE_BLENDINDICES:
2002 sem->Name = TGSI_SEMANTIC_GENERIC;
2003 sem->Index = 10 * index + 19;
2004 break;
2005 case D3DDECLUSAGE_NORMAL:
2006 sem->Name = TGSI_SEMANTIC_GENERIC;
2007 sem->Index = 10 * index + 20;
2008 break;
2009 case D3DDECLUSAGE_TANGENT:
2010 sem->Name = TGSI_SEMANTIC_GENERIC;
2011 sem->Index = 10 * index + 21;
2012 break;
2013 case D3DDECLUSAGE_BINORMAL:
2014 sem->Name = TGSI_SEMANTIC_GENERIC;
2015 sem->Index = 10 * index + 22;
2016 break;
2017 case D3DDECLUSAGE_TESSFACTOR:
2018 sem->Name = TGSI_SEMANTIC_GENERIC;
2019 sem->Index = 10 * index + 23;
2020 break;
2021 case D3DDECLUSAGE_SAMPLE:
2022 sem->Name = TGSI_SEMANTIC_COUNT;
2023 sem->Index = 0;
2024 break;
2025 default:
2026 unreachable("Invalid DECLUSAGE.");
2027 break;
2028 }
2029 }
2030
2031 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2032 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2033 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2034 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2035 static inline unsigned
2036 d3dstt_to_tgsi_tex(BYTE sampler_type)
2037 {
2038 switch (sampler_type) {
2039 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2040 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2041 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2042 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2043 default:
2044 assert(0);
2045 return TGSI_TEXTURE_UNKNOWN;
2046 }
2047 }
2048 static inline unsigned
2049 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2050 {
2051 switch (sampler_type) {
2052 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2053 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2054 case NINED3DSTT_VOLUME:
2055 case NINED3DSTT_CUBE:
2056 default:
2057 assert(0);
2058 return TGSI_TEXTURE_UNKNOWN;
2059 }
2060 }
2061 static inline unsigned
2062 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2063 {
2064 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2065 case 1: return TGSI_TEXTURE_1D;
2066 case 0: return TGSI_TEXTURE_2D;
2067 case 3: return TGSI_TEXTURE_3D;
2068 default:
2069 return TGSI_TEXTURE_CUBE;
2070 }
2071 }
2072
2073 static const char *
2074 sm1_sampler_type_name(BYTE sampler_type)
2075 {
2076 switch (sampler_type) {
2077 case NINED3DSTT_1D: return "1D";
2078 case NINED3DSTT_2D: return "2D";
2079 case NINED3DSTT_VOLUME: return "VOLUME";
2080 case NINED3DSTT_CUBE: return "CUBE";
2081 default:
2082 return "(D3DSTT_?)";
2083 }
2084 }
2085
2086 static inline unsigned
2087 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2088 {
2089 switch (sem->Name) {
2090 case TGSI_SEMANTIC_POSITION:
2091 case TGSI_SEMANTIC_NORMAL:
2092 return TGSI_INTERPOLATE_LINEAR;
2093 case TGSI_SEMANTIC_BCOLOR:
2094 case TGSI_SEMANTIC_COLOR:
2095 return TGSI_INTERPOLATE_COLOR;
2096 case TGSI_SEMANTIC_FOG:
2097 case TGSI_SEMANTIC_GENERIC:
2098 case TGSI_SEMANTIC_TEXCOORD:
2099 case TGSI_SEMANTIC_CLIPDIST:
2100 case TGSI_SEMANTIC_CLIPVERTEX:
2101 return TGSI_INTERPOLATE_PERSPECTIVE;
2102 case TGSI_SEMANTIC_EDGEFLAG:
2103 case TGSI_SEMANTIC_FACE:
2104 case TGSI_SEMANTIC_INSTANCEID:
2105 case TGSI_SEMANTIC_PCOORD:
2106 case TGSI_SEMANTIC_PRIMID:
2107 case TGSI_SEMANTIC_PSIZE:
2108 case TGSI_SEMANTIC_VERTEXID:
2109 return TGSI_INTERPOLATE_CONSTANT;
2110 default:
2111 assert(0);
2112 return TGSI_INTERPOLATE_CONSTANT;
2113 }
2114 }
2115
2116 DECL_SPECIAL(DCL)
2117 {
2118 struct ureg_program *ureg = tx->ureg;
2119 boolean is_input;
2120 boolean is_sampler;
2121 struct tgsi_declaration_semantic tgsi;
2122 struct sm1_semantic sem;
2123 sm1_read_semantic(tx, &sem);
2124
2125 is_input = sem.reg.file == D3DSPR_INPUT;
2126 is_sampler =
2127 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2128
2129 DUMP("DCL ");
2130 sm1_dump_dst_param(&sem.reg);
2131 if (is_sampler)
2132 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2133 else
2134 if (tx->version.major >= 3)
2135 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2136 else
2137 if (sem.usage | sem.usage_idx)
2138 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2139 else
2140 DUMP("\n");
2141
2142 if (is_sampler) {
2143 const unsigned m = 1 << sem.reg.idx;
2144 ureg_DECL_sampler(ureg, sem.reg.idx);
2145 tx->info->sampler_mask |= m;
2146 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2147 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2148 d3dstt_to_tgsi_tex(sem.sampler_type);
2149 return D3D_OK;
2150 }
2151
2152 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2153 if (IS_VS) {
2154 if (is_input) {
2155 /* linkage outside of shader with vertex declaration */
2156 ureg_DECL_vs_input(ureg, sem.reg.idx);
2157 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2158 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2159 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2160 /* NOTE: preserving order in case of indirect access */
2161 } else
2162 if (tx->version.major >= 3) {
2163 /* SM2 output semantic determined by file */
2164 assert(sem.reg.mask != 0);
2165 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2166 tx->info->position_t = TRUE;
2167 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2168 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2169 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2170 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2171 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2172 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2173 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2174 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2175 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2176 }
2177
2178 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2179 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2180 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2181 }
2182 }
2183 } else {
2184 if (is_input && tx->version.major >= 3) {
2185 unsigned interp_location = 0;
2186 /* SM3 only, SM2 input semantic determined by file */
2187 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2188 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2189 /* PositionT and tessfactor forbidden */
2190 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2191 return D3DERR_INVALIDCALL;
2192
2193 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2194 /* Position0 is forbidden (likely because vPos already does that) */
2195 if (sem.usage == D3DDECLUSAGE_POSITION)
2196 return D3DERR_INVALIDCALL;
2197 /* Following code is for depth */
2198 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2199 return D3D_OK;
2200 }
2201
2202 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2203 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2204 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2205
2206 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2207 ureg, tgsi.Name, tgsi.Index,
2208 nine_tgsi_to_interp_mode(&tgsi),
2209 0, /* cylwrap */
2210 interp_location, 0, 1);
2211 } else
2212 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2213 /* FragColor or FragDepth */
2214 assert(sem.reg.mask != 0);
2215 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2216 0, 1);
2217 }
2218 }
2219 return D3D_OK;
2220 }
2221
2222 DECL_SPECIAL(DEF)
2223 {
2224 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2225 return D3D_OK;
2226 }
2227
2228 DECL_SPECIAL(DEFB)
2229 {
2230 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2231 return D3D_OK;
2232 }
2233
2234 DECL_SPECIAL(DEFI)
2235 {
2236 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2237 return D3D_OK;
2238 }
2239
2240 DECL_SPECIAL(POW)
2241 {
2242 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2243 struct ureg_src src[2] = {
2244 tx_src_param(tx, &tx->insn.src[0]),
2245 tx_src_param(tx, &tx->insn.src[1])
2246 };
2247 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2248 return D3D_OK;
2249 }
2250
2251 DECL_SPECIAL(RSQ)
2252 {
2253 struct ureg_program *ureg = tx->ureg;
2254 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2255 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2256 struct ureg_dst tmp = tx_scratch(tx);
2257 ureg_RSQ(ureg, tmp, ureg_abs(src));
2258 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2259 return D3D_OK;
2260 }
2261
2262 DECL_SPECIAL(LOG)
2263 {
2264 struct ureg_program *ureg = tx->ureg;
2265 struct ureg_dst tmp = tx_scratch_scalar(tx);
2266 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2267 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2268 ureg_LG2(ureg, tmp, ureg_abs(src));
2269 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2270 return D3D_OK;
2271 }
2272
2273 DECL_SPECIAL(LIT)
2274 {
2275 struct ureg_program *ureg = tx->ureg;
2276 struct ureg_dst tmp = tx_scratch(tx);
2277 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2278 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2279 ureg_LIT(ureg, tmp, src);
2280 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2281 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2282 * it 0^0 if src.w=0, which value is driver dependent. */
2283 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2284 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2285 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2286 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2287 return D3D_OK;
2288 }
2289
2290 DECL_SPECIAL(NRM)
2291 {
2292 struct ureg_program *ureg = tx->ureg;
2293 struct ureg_dst tmp = tx_scratch_scalar(tx);
2294 struct ureg_src nrm = tx_src_scalar(tmp);
2295 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2296 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2297 ureg_DP3(ureg, tmp, src, src);
2298 ureg_RSQ(ureg, tmp, nrm);
2299 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2300 ureg_MUL(ureg, dst, src, nrm);
2301 return D3D_OK;
2302 }
2303
2304 DECL_SPECIAL(DP2ADD)
2305 {
2306 struct ureg_dst tmp = tx_scratch_scalar(tx);
2307 struct ureg_src dp2 = tx_src_scalar(tmp);
2308 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2309 struct ureg_src src[3];
2310 int i;
2311 for (i = 0; i < 3; ++i)
2312 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2313 assert_replicate_swizzle(&src[2]);
2314
2315 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2316 ureg_ADD(tx->ureg, dst, src[2], dp2);
2317
2318 return D3D_OK;
2319 }
2320
2321 DECL_SPECIAL(TEXCOORD)
2322 {
2323 struct ureg_program *ureg = tx->ureg;
2324 const unsigned s = tx->insn.dst[0].idx;
2325 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2326
2327 tx_texcoord_alloc(tx, s);
2328 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2329 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2330
2331 return D3D_OK;
2332 }
2333
2334 DECL_SPECIAL(TEXCOORD_ps14)
2335 {
2336 struct ureg_program *ureg = tx->ureg;
2337 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2338 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2339
2340 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2341
2342 ureg_MOV(ureg, dst, src);
2343
2344 return D3D_OK;
2345 }
2346
2347 DECL_SPECIAL(TEXKILL)
2348 {
2349 struct ureg_src reg;
2350
2351 if (tx->version.major > 1 || tx->version.minor > 3) {
2352 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2353 } else {
2354 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2355 reg = tx->regs.vT[tx->insn.dst[0].idx];
2356 }
2357 if (tx->version.major < 2)
2358 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2359 ureg_KILL_IF(tx->ureg, reg);
2360
2361 return D3D_OK;
2362 }
2363
2364 DECL_SPECIAL(TEXBEM)
2365 {
2366 struct ureg_program *ureg = tx->ureg;
2367 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2368 struct ureg_dst tmp, tmp2, texcoord;
2369 struct ureg_src sample, m00, m01, m10, m11;
2370 struct ureg_src bumpenvlscale, bumpenvloffset;
2371 const int m = tx->insn.dst[0].idx;
2372 const int n = tx->insn.src[0].idx;
2373
2374 assert(tx->version.major == 1);
2375
2376 sample = ureg_DECL_sampler(ureg, m);
2377 tx->info->sampler_mask |= 1 << m;
2378
2379 tx_texcoord_alloc(tx, m);
2380
2381 tmp = tx_scratch(tx);
2382 tmp2 = tx_scratch(tx);
2383 texcoord = tx_scratch(tx);
2384 /*
2385 * Bump-env-matrix:
2386 * 00 is X
2387 * 01 is Y
2388 * 10 is Z
2389 * 11 is W
2390 */
2391 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2392 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2393 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2394 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2395 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2396
2397 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2398 if (m % 2 == 0) {
2399 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2400 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2401 } else {
2402 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2403 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2404 }
2405
2406 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2407
2408 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2409 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2410 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2411 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2412 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2413 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2414 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2415
2416 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2417 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2418 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2419 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2420 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2421 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2422 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2423
2424 /* Now the texture coordinates are in tmp.xy */
2425
2426 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2427 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2428 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2429 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2430 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2431 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2432 bumpenvlscale, bumpenvloffset);
2433 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2434 }
2435
2436 tx->info->bumpenvmat_needed = 1;
2437
2438 return D3D_OK;
2439 }
2440
2441 DECL_SPECIAL(TEXREG2AR)
2442 {
2443 struct ureg_program *ureg = tx->ureg;
2444 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2445 struct ureg_src sample;
2446 const int m = tx->insn.dst[0].idx;
2447 const int n = tx->insn.src[0].idx;
2448 assert(m >= 0 && m > n);
2449
2450 sample = ureg_DECL_sampler(ureg, m);
2451 tx->info->sampler_mask |= 1 << m;
2452 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2453
2454 return D3D_OK;
2455 }
2456
2457 DECL_SPECIAL(TEXREG2GB)
2458 {
2459 struct ureg_program *ureg = tx->ureg;
2460 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2461 struct ureg_src sample;
2462 const int m = tx->insn.dst[0].idx;
2463 const int n = tx->insn.src[0].idx;
2464 assert(m >= 0 && m > n);
2465
2466 sample = ureg_DECL_sampler(ureg, m);
2467 tx->info->sampler_mask |= 1 << m;
2468 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2469
2470 return D3D_OK;
2471 }
2472
2473 DECL_SPECIAL(TEXM3x2PAD)
2474 {
2475 return D3D_OK; /* this is just padding */
2476 }
2477
2478 DECL_SPECIAL(TEXM3x2TEX)
2479 {
2480 struct ureg_program *ureg = tx->ureg;
2481 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2482 struct ureg_src sample;
2483 const int m = tx->insn.dst[0].idx - 1;
2484 const int n = tx->insn.src[0].idx;
2485 assert(m >= 0 && m > n);
2486
2487 tx_texcoord_alloc(tx, m);
2488 tx_texcoord_alloc(tx, m+1);
2489
2490 /* performs the matrix multiplication */
2491 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2492 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2493
2494 sample = ureg_DECL_sampler(ureg, m + 1);
2495 tx->info->sampler_mask |= 1 << (m + 1);
2496 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2497
2498 return D3D_OK;
2499 }
2500
2501 DECL_SPECIAL(TEXM3x3PAD)
2502 {
2503 return D3D_OK; /* this is just padding */
2504 }
2505
2506 DECL_SPECIAL(TEXM3x3SPEC)
2507 {
2508 struct ureg_program *ureg = tx->ureg;
2509 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2510 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2511 struct ureg_src sample;
2512 struct ureg_dst tmp;
2513 const int m = tx->insn.dst[0].idx - 2;
2514 const int n = tx->insn.src[0].idx;
2515 assert(m >= 0 && m > n);
2516
2517 tx_texcoord_alloc(tx, m);
2518 tx_texcoord_alloc(tx, m+1);
2519 tx_texcoord_alloc(tx, m+2);
2520
2521 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2522 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2523 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2524
2525 sample = ureg_DECL_sampler(ureg, m + 2);
2526 tx->info->sampler_mask |= 1 << (m + 2);
2527 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2528
2529 /* At this step, dst = N = (u', w', z').
2530 * We want dst to be the texture sampled at (u'', w'', z''), with
2531 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2532 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2533 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2534 /* at this step tmp.x = 1/N.N */
2535 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2536 /* at this step tmp.y = N.E */
2537 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2538 /* at this step tmp.x = N.E/N.N */
2539 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2540 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2541 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2542 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2543 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2544
2545 return D3D_OK;
2546 }
2547
2548 DECL_SPECIAL(TEXREG2RGB)
2549 {
2550 struct ureg_program *ureg = tx->ureg;
2551 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2552 struct ureg_src sample;
2553 const int m = tx->insn.dst[0].idx;
2554 const int n = tx->insn.src[0].idx;
2555 assert(m >= 0 && m > n);
2556
2557 sample = ureg_DECL_sampler(ureg, m);
2558 tx->info->sampler_mask |= 1 << m;
2559 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2560
2561 return D3D_OK;
2562 }
2563
2564 DECL_SPECIAL(TEXDP3TEX)
2565 {
2566 struct ureg_program *ureg = tx->ureg;
2567 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2568 struct ureg_dst tmp;
2569 struct ureg_src sample;
2570 const int m = tx->insn.dst[0].idx;
2571 const int n = tx->insn.src[0].idx;
2572 assert(m >= 0 && m > n);
2573
2574 tx_texcoord_alloc(tx, m);
2575
2576 tmp = tx_scratch(tx);
2577 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2578 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2579
2580 sample = ureg_DECL_sampler(ureg, m);
2581 tx->info->sampler_mask |= 1 << m;
2582 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2583
2584 return D3D_OK;
2585 }
2586
2587 DECL_SPECIAL(TEXM3x2DEPTH)
2588 {
2589 struct ureg_program *ureg = tx->ureg;
2590 struct ureg_dst tmp;
2591 const int m = tx->insn.dst[0].idx - 1;
2592 const int n = tx->insn.src[0].idx;
2593 assert(m >= 0 && m > n);
2594
2595 tx_texcoord_alloc(tx, m);
2596 tx_texcoord_alloc(tx, m+1);
2597
2598 tmp = tx_scratch(tx);
2599
2600 /* performs the matrix multiplication */
2601 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2602 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2603
2604 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2605 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2606 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2607 /* res = 'w' == 0 ? 1.0 : z/w */
2608 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2609 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2610 /* replace the depth for depth testing with the result */
2611 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2612 TGSI_WRITEMASK_Z, 0, 1);
2613 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2614 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2615 return D3D_OK;
2616 }
2617
2618 DECL_SPECIAL(TEXDP3)
2619 {
2620 struct ureg_program *ureg = tx->ureg;
2621 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2622 const int m = tx->insn.dst[0].idx;
2623 const int n = tx->insn.src[0].idx;
2624 assert(m >= 0 && m > n);
2625
2626 tx_texcoord_alloc(tx, m);
2627
2628 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2629
2630 return D3D_OK;
2631 }
2632
2633 DECL_SPECIAL(TEXM3x3)
2634 {
2635 struct ureg_program *ureg = tx->ureg;
2636 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2637 struct ureg_src sample;
2638 struct ureg_dst E, tmp;
2639 const int m = tx->insn.dst[0].idx - 2;
2640 const int n = tx->insn.src[0].idx;
2641 assert(m >= 0 && m > n);
2642
2643 tx_texcoord_alloc(tx, m);
2644 tx_texcoord_alloc(tx, m+1);
2645 tx_texcoord_alloc(tx, m+2);
2646
2647 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2648 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2649 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2650
2651 switch (tx->insn.opcode) {
2652 case D3DSIO_TEXM3x3:
2653 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2654 break;
2655 case D3DSIO_TEXM3x3TEX:
2656 sample = ureg_DECL_sampler(ureg, m + 2);
2657 tx->info->sampler_mask |= 1 << (m + 2);
2658 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2659 break;
2660 case D3DSIO_TEXM3x3VSPEC:
2661 sample = ureg_DECL_sampler(ureg, m + 2);
2662 tx->info->sampler_mask |= 1 << (m + 2);
2663 E = tx_scratch(tx);
2664 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2665 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2666 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2667 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2668 /* At this step, dst = N = (u', w', z').
2669 * We want dst to be the texture sampled at (u'', w'', z''), with
2670 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2671 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2672 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2673 /* at this step tmp.x = 1/N.N */
2674 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2675 /* at this step tmp.y = N.E */
2676 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2677 /* at this step tmp.x = N.E/N.N */
2678 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2679 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2680 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2681 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2682 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2683 break;
2684 default:
2685 return D3DERR_INVALIDCALL;
2686 }
2687 return D3D_OK;
2688 }
2689
2690 DECL_SPECIAL(TEXDEPTH)
2691 {
2692 struct ureg_program *ureg = tx->ureg;
2693 struct ureg_dst r5;
2694 struct ureg_src r5r, r5g;
2695
2696 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2697
2698 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2699 * r5 won't be used afterward, thus we can use r5.ba */
2700 r5 = tx->regs.r[5];
2701 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2702 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2703
2704 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2705 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2706 /* r5.r = r/g */
2707 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2708 r5r, ureg_imm1f(ureg, 1.0f));
2709 /* replace the depth for depth testing with the result */
2710 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2711 TGSI_WRITEMASK_Z, 0, 1);
2712 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2713
2714 return D3D_OK;
2715 }
2716
2717 DECL_SPECIAL(BEM)
2718 {
2719 struct ureg_program *ureg = tx->ureg;
2720 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2721 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2722 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2723 struct ureg_src m00, m01, m10, m11;
2724 const int m = tx->insn.dst[0].idx;
2725 struct ureg_dst tmp;
2726 /*
2727 * Bump-env-matrix:
2728 * 00 is X
2729 * 01 is Y
2730 * 10 is Z
2731 * 11 is W
2732 */
2733 nine_info_mark_const_f_used(tx->info, 8 + m);
2734 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2735 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2736 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2737 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2738 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2739 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2740 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2741 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2742 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2743 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2744
2745 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2746 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2747 NINE_APPLY_SWIZZLE(src1, X), src0);
2748 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2749 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2750 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2751 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2752
2753 tx->info->bumpenvmat_needed = 1;
2754
2755 return D3D_OK;
2756 }
2757
2758 DECL_SPECIAL(TEXLD)
2759 {
2760 struct ureg_program *ureg = tx->ureg;
2761 unsigned target;
2762 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2763 struct ureg_src src[2] = {
2764 tx_src_param(tx, &tx->insn.src[0]),
2765 tx_src_param(tx, &tx->insn.src[1])
2766 };
2767 assert(tx->insn.src[1].idx >= 0 &&
2768 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2769 target = tx->sampler_targets[tx->insn.src[1].idx];
2770
2771 switch (tx->insn.flags) {
2772 case 0:
2773 ureg_TEX(ureg, dst, target, src[0], src[1]);
2774 break;
2775 case NINED3DSI_TEXLD_PROJECT:
2776 ureg_TXP(ureg, dst, target, src[0], src[1]);
2777 break;
2778 case NINED3DSI_TEXLD_BIAS:
2779 ureg_TXB(ureg, dst, target, src[0], src[1]);
2780 break;
2781 default:
2782 assert(0);
2783 return D3DERR_INVALIDCALL;
2784 }
2785 return D3D_OK;
2786 }
2787
2788 DECL_SPECIAL(TEXLD_14)
2789 {
2790 struct ureg_program *ureg = tx->ureg;
2791 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2792 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2793 const unsigned s = tx->insn.dst[0].idx;
2794 const unsigned t = ps1x_sampler_type(tx->info, s);
2795
2796 tx->info->sampler_mask |= 1 << s;
2797 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2798
2799 return D3D_OK;
2800 }
2801
2802 DECL_SPECIAL(TEX)
2803 {
2804 struct ureg_program *ureg = tx->ureg;
2805 const unsigned s = tx->insn.dst[0].idx;
2806 const unsigned t = ps1x_sampler_type(tx->info, s);
2807 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2808 struct ureg_src src[2];
2809
2810 tx_texcoord_alloc(tx, s);
2811
2812 src[0] = tx->regs.vT[s];
2813 src[1] = ureg_DECL_sampler(ureg, s);
2814 tx->info->sampler_mask |= 1 << s;
2815
2816 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2817
2818 return D3D_OK;
2819 }
2820
2821 DECL_SPECIAL(TEXLDD)
2822 {
2823 unsigned target;
2824 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2825 struct ureg_src src[4] = {
2826 tx_src_param(tx, &tx->insn.src[0]),
2827 tx_src_param(tx, &tx->insn.src[1]),
2828 tx_src_param(tx, &tx->insn.src[2]),
2829 tx_src_param(tx, &tx->insn.src[3])
2830 };
2831 assert(tx->insn.src[1].idx >= 0 &&
2832 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2833 target = tx->sampler_targets[tx->insn.src[1].idx];
2834
2835 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2836 return D3D_OK;
2837 }
2838
2839 DECL_SPECIAL(TEXLDL)
2840 {
2841 unsigned target;
2842 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2843 struct ureg_src src[2] = {
2844 tx_src_param(tx, &tx->insn.src[0]),
2845 tx_src_param(tx, &tx->insn.src[1])
2846 };
2847 assert(tx->insn.src[1].idx >= 0 &&
2848 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2849 target = tx->sampler_targets[tx->insn.src[1].idx];
2850
2851 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2852 return D3D_OK;
2853 }
2854
2855 DECL_SPECIAL(SETP)
2856 {
2857 STUB(D3DERR_INVALIDCALL);
2858 }
2859
2860 DECL_SPECIAL(BREAKP)
2861 {
2862 STUB(D3DERR_INVALIDCALL);
2863 }
2864
2865 DECL_SPECIAL(PHASE)
2866 {
2867 return D3D_OK; /* we don't care about phase */
2868 }
2869
2870 DECL_SPECIAL(COMMENT)
2871 {
2872 return D3D_OK; /* nothing to do */
2873 }
2874
2875
2876 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2877 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2878
2879 struct sm1_op_info inst_table[] =
2880 {
2881 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
2882 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2883 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2884 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
2885 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2886 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2887 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2888 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2889 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2890 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2891 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2892 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2893 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2894 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2895 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2896 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2897 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2898 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2899 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2900 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2901
2902 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2903 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2904 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2905 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2906 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2907
2908 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2909 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2910 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2911 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2912 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2913 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2914
2915 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2916
2917 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2918 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2919 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2920 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
2921 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2922
2923 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2924 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2925
2926 /* More flow control */
2927 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2928 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2929 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2930 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2931 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2932 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2933 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2934 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2935 /* we don't write to the address register, but a normal register (copied
2936 * when needed to the address register), thus we don't use ARR */
2937 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2938
2939 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2940 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2941
2942 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2943 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2944 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2945 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2946 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2947 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2948 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2949 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2950 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2951 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2952 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2953 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2954 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2955 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2956 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2957 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2958
2959 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2960 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2961 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2962 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2963
2964 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2965
2966 /* More tex stuff */
2967 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2968 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2969 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2970 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2971 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2972 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2973
2974 /* Misc */
2975 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2976 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2977 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2978 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2979 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2980 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2981 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2982 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2983 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2984 };
2985
2986 struct sm1_op_info inst_phase =
2987 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2988
2989 struct sm1_op_info inst_comment =
2990 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2991
2992 static void
2993 create_op_info_map(struct shader_translator *tx)
2994 {
2995 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2996 unsigned i;
2997
2998 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
2999 tx->op_info_map[i] = -1;
3000
3001 if (tx->processor == PIPE_SHADER_VERTEX) {
3002 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3003 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3004 if (inst_table[i].vert_version.min <= version &&
3005 inst_table[i].vert_version.max >= version)
3006 tx->op_info_map[inst_table[i].sio] = i;
3007 }
3008 } else {
3009 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3010 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3011 if (inst_table[i].frag_version.min <= version &&
3012 inst_table[i].frag_version.max >= version)
3013 tx->op_info_map[inst_table[i].sio] = i;
3014 }
3015 }
3016 }
3017
3018 static inline HRESULT
3019 NineTranslateInstruction_Generic(struct shader_translator *tx)
3020 {
3021 struct ureg_dst dst[1];
3022 struct ureg_src src[4];
3023 unsigned i;
3024
3025 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3026 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3027 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3028 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3029
3030 ureg_insn(tx->ureg, tx->insn.info->opcode,
3031 dst, tx->insn.ndst,
3032 src, tx->insn.nsrc);
3033 return D3D_OK;
3034 }
3035
3036 static inline DWORD
3037 TOKEN_PEEK(struct shader_translator *tx)
3038 {
3039 return *(tx->parse);
3040 }
3041
3042 static inline DWORD
3043 TOKEN_NEXT(struct shader_translator *tx)
3044 {
3045 return *(tx->parse)++;
3046 }
3047
3048 static inline void
3049 TOKEN_JUMP(struct shader_translator *tx)
3050 {
3051 if (tx->parse_next && tx->parse != tx->parse_next) {
3052 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3053 tx->parse = tx->parse_next;
3054 }
3055 }
3056
3057 static inline boolean
3058 sm1_parse_eof(struct shader_translator *tx)
3059 {
3060 return TOKEN_PEEK(tx) == NINED3DSP_END;
3061 }
3062
3063 static void
3064 sm1_read_version(struct shader_translator *tx)
3065 {
3066 const DWORD tok = TOKEN_NEXT(tx);
3067
3068 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3069 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3070
3071 switch (tok >> 16) {
3072 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3073 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3074 default:
3075 DBG("Invalid shader type: %x\n", tok);
3076 tx->processor = ~0;
3077 break;
3078 }
3079 }
3080
3081 /* This is just to check if we parsed the instruction properly. */
3082 static void
3083 sm1_parse_get_skip(struct shader_translator *tx)
3084 {
3085 const DWORD tok = TOKEN_PEEK(tx);
3086
3087 if (tx->version.major >= 2) {
3088 tx->parse_next = tx->parse + 1 /* this */ +
3089 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3090 } else {
3091 tx->parse_next = NULL; /* TODO: determine from param count */
3092 }
3093 }
3094
3095 static void
3096 sm1_print_comment(const char *comment, UINT size)
3097 {
3098 if (!size)
3099 return;
3100 /* TODO */
3101 }
3102
3103 static void
3104 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3105 {
3106 DWORD tok = TOKEN_PEEK(tx);
3107
3108 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3109 {
3110 const char *comment = "";
3111 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3112 tx->parse += size + 1;
3113
3114 if (print)
3115 sm1_print_comment(comment, size);
3116
3117 tok = TOKEN_PEEK(tx);
3118 }
3119 }
3120
3121 static void
3122 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3123 {
3124 *reg = TOKEN_NEXT(tx);
3125
3126 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3127 {
3128 if (tx->version.major < 2)
3129 *rel = (1 << 31) |
3130 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3131 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3132 D3DSP_NOSWIZZLE;
3133 else
3134 *rel = TOKEN_NEXT(tx);
3135 }
3136 }
3137
3138 static void
3139 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3140 {
3141 int8_t shift;
3142 dst->file =
3143 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3144 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3145 dst->type = TGSI_RETURN_TYPE_FLOAT;
3146 dst->idx = tok & D3DSP_REGNUM_MASK;
3147 dst->rel = NULL;
3148 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3149 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3150 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3151 dst->shift = (shift & 0x7) - (shift & 0x8);
3152 }
3153
3154 static void
3155 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3156 {
3157 src->file =
3158 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3159 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3160 src->type = TGSI_RETURN_TYPE_FLOAT;
3161 src->idx = tok & D3DSP_REGNUM_MASK;
3162 src->rel = NULL;
3163 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3164 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3165
3166 switch (src->file) {
3167 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3168 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3169 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3170 default:
3171 break;
3172 }
3173 }
3174
3175 static void
3176 sm1_parse_immediate(struct shader_translator *tx,
3177 struct sm1_src_param *imm)
3178 {
3179 imm->file = NINED3DSPR_IMMEDIATE;
3180 imm->idx = INT_MIN;
3181 imm->rel = NULL;
3182 imm->swizzle = NINED3DSP_NOSWIZZLE;
3183 imm->mod = 0;
3184 switch (tx->insn.opcode) {
3185 case D3DSIO_DEF:
3186 imm->type = NINED3DSPTYPE_FLOAT4;
3187 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3188 tx->parse += 4;
3189 break;
3190 case D3DSIO_DEFI:
3191 imm->type = NINED3DSPTYPE_INT4;
3192 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3193 tx->parse += 4;
3194 break;
3195 case D3DSIO_DEFB:
3196 imm->type = NINED3DSPTYPE_BOOL;
3197 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3198 tx->parse += 1;
3199 break;
3200 default:
3201 assert(0);
3202 break;
3203 }
3204 }
3205
3206 static void
3207 sm1_read_dst_param(struct shader_translator *tx,
3208 struct sm1_dst_param *dst,
3209 struct sm1_src_param *rel)
3210 {
3211 DWORD tok_dst, tok_rel = 0;
3212
3213 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3214 sm1_parse_dst_param(dst, tok_dst);
3215 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3216 sm1_parse_src_param(rel, tok_rel);
3217 dst->rel = rel;
3218 }
3219 }
3220
3221 static void
3222 sm1_read_src_param(struct shader_translator *tx,
3223 struct sm1_src_param *src,
3224 struct sm1_src_param *rel)
3225 {
3226 DWORD tok_src, tok_rel = 0;
3227
3228 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3229 sm1_parse_src_param(src, tok_src);
3230 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3231 assert(rel);
3232 sm1_parse_src_param(rel, tok_rel);
3233 src->rel = rel;
3234 }
3235 }
3236
3237 static void
3238 sm1_read_semantic(struct shader_translator *tx,
3239 struct sm1_semantic *sem)
3240 {
3241 const DWORD tok_usg = TOKEN_NEXT(tx);
3242 const DWORD tok_dst = TOKEN_NEXT(tx);
3243
3244 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3245 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3246 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3247
3248 sm1_parse_dst_param(&sem->reg, tok_dst);
3249 }
3250
3251 static void
3252 sm1_parse_instruction(struct shader_translator *tx)
3253 {
3254 struct sm1_instruction *insn = &tx->insn;
3255 HRESULT hr;
3256 DWORD tok;
3257 struct sm1_op_info *info = NULL;
3258 unsigned i;
3259
3260 sm1_parse_comments(tx, TRUE);
3261 sm1_parse_get_skip(tx);
3262
3263 tok = TOKEN_NEXT(tx);
3264
3265 insn->opcode = tok & D3DSI_OPCODE_MASK;
3266 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3267 insn->coissue = !!(tok & D3DSI_COISSUE);
3268 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3269
3270 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3271 int k = tx->op_info_map[insn->opcode];
3272 if (k >= 0) {
3273 assert(k < ARRAY_SIZE(inst_table));
3274 info = &inst_table[k];
3275 }
3276 } else {
3277 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3278 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3279 }
3280 if (!info) {
3281 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3282 TOKEN_JUMP(tx);
3283 return;
3284 }
3285 insn->info = info;
3286 insn->ndst = info->ndst;
3287 insn->nsrc = info->nsrc;
3288
3289 assert(!insn->predicated && "TODO: predicated instructions");
3290
3291 /* check version */
3292 {
3293 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3294 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3295 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3296 if (ver < min || ver > max) {
3297 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3298 min, ver, max);
3299 return;
3300 }
3301 }
3302
3303 for (i = 0; i < insn->ndst; ++i)
3304 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3305 if (insn->predicated)
3306 sm1_read_src_param(tx, &insn->pred, NULL);
3307 for (i = 0; i < insn->nsrc; ++i)
3308 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3309
3310 /* parse here so we can dump them before processing */
3311 if (insn->opcode == D3DSIO_DEF ||
3312 insn->opcode == D3DSIO_DEFI ||
3313 insn->opcode == D3DSIO_DEFB)
3314 sm1_parse_immediate(tx, &tx->insn.src[0]);
3315
3316 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3317 sm1_instruction_check(insn);
3318
3319 if (info->handler)
3320 hr = info->handler(tx);
3321 else
3322 hr = NineTranslateInstruction_Generic(tx);
3323 tx_apply_dst0_modifiers(tx);
3324
3325 if (hr != D3D_OK)
3326 tx->failure = TRUE;
3327 tx->num_scratch = 0; /* reset */
3328
3329 TOKEN_JUMP(tx);
3330 }
3331
3332 static void
3333 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3334 {
3335 unsigned i;
3336
3337 tx->info = info;
3338
3339 tx->byte_code = info->byte_code;
3340 tx->parse = info->byte_code;
3341
3342 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3343 info->input_map[i] = NINE_DECLUSAGE_NONE;
3344 info->num_inputs = 0;
3345
3346 info->position_t = FALSE;
3347 info->point_size = FALSE;
3348
3349 tx->info->const_float_slots = 0;
3350 tx->info->const_int_slots = 0;
3351 tx->info->const_bool_slots = 0;
3352
3353 info->sampler_mask = 0x0;
3354 info->rt_mask = 0x0;
3355
3356 info->lconstf.data = NULL;
3357 info->lconstf.ranges = NULL;
3358
3359 info->bumpenvmat_needed = 0;
3360
3361 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3362 tx->regs.rL[i] = ureg_dst_undef();
3363 }
3364 tx->regs.address = ureg_dst_undef();
3365 tx->regs.a0 = ureg_dst_undef();
3366 tx->regs.p = ureg_dst_undef();
3367 tx->regs.oDepth = ureg_dst_undef();
3368 tx->regs.vPos = ureg_src_undef();
3369 tx->regs.vFace = ureg_src_undef();
3370 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3371 tx->regs.o[i] = ureg_dst_undef();
3372 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3373 tx->regs.oCol[i] = ureg_dst_undef();
3374 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3375 tx->regs.vC[i] = ureg_src_undef();
3376 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3377 tx->regs.vT[i] = ureg_src_undef();
3378
3379 sm1_read_version(tx);
3380
3381 info->version = (tx->version.major << 4) | tx->version.minor;
3382
3383 tx->num_outputs = 0;
3384
3385 create_op_info_map(tx);
3386 }
3387
3388 static void
3389 tx_dtor(struct shader_translator *tx)
3390 {
3391 if (tx->num_inst_labels)
3392 FREE(tx->inst_labels);
3393 FREE(tx->lconstf);
3394 FREE(tx->regs.r);
3395 FREE(tx);
3396 }
3397
3398 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3399 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3400 static void
3401 shader_add_vs_viewport_transform(struct shader_translator *tx)
3402 {
3403 struct ureg_program *ureg = tx->ureg;
3404 struct ureg_src c0 = NINE_CONSTANT_SRC(0);
3405 struct ureg_src c1 = NINE_CONSTANT_SRC(1);
3406 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3407
3408 c0 = ureg_src_dimension(c0, 4);
3409 c1 = ureg_src_dimension(c1, 4);
3410 /* TODO: find out when we need to apply the viewport transformation or not.
3411 * Likely will be XYZ vs XYZRHW in vdecl_out
3412 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3413 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3414 */
3415 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3416 }
3417
3418 static void
3419 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3420 {
3421 struct ureg_program *ureg = tx->ureg;
3422 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3423 struct ureg_src fog_end, fog_coeff, fog_density;
3424 struct ureg_src fog_vs, depth, fog_color;
3425 struct ureg_dst fog_factor;
3426
3427 if (!tx->info->fog_enable) {
3428 ureg_MOV(ureg, oCol0, src_col);
3429 return;
3430 }
3431
3432 if (tx->info->fog_mode != D3DFOG_NONE) {
3433 depth = nine_get_position_input(tx);
3434 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3435 }
3436
3437 nine_info_mark_const_f_used(tx->info, 33);
3438 fog_color = NINE_CONSTANT_SRC(32);
3439 fog_factor = tx_scratch_scalar(tx);
3440
3441 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3442 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3443 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3444 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth));
3445 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3446 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3447 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3448 ureg_MUL(ureg, fog_factor, depth, fog_density);
3449 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3450 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3451 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3452 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3453 ureg_MUL(ureg, fog_factor, depth, fog_density);
3454 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3455 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3456 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3457 } else {
3458 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3459 TGSI_INTERPOLATE_PERSPECTIVE),
3460 TGSI_SWIZZLE_X);
3461 ureg_MOV(ureg, fog_factor, fog_vs);
3462 }
3463
3464 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3465 tx_src_scalar(fog_factor), src_col, fog_color);
3466 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3467 }
3468
3469 #define GET_CAP(n) screen->get_param( \
3470 screen, PIPE_CAP_##n)
3471 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3472 screen, info->type, PIPE_SHADER_CAP_##n)
3473
3474 HRESULT
3475 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3476 {
3477 struct shader_translator *tx;
3478 HRESULT hr = D3D_OK;
3479 const unsigned processor = info->type;
3480 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3481
3482 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3483
3484 tx = CALLOC_STRUCT(shader_translator);
3485 if (!tx)
3486 return E_OUTOFMEMORY;
3487 tx_ctor(tx, info);
3488
3489 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3490 hr = D3DERR_INVALIDCALL;
3491 DBG("Unsupported shader version: %u.%u !\n",
3492 tx->version.major, tx->version.minor);
3493 goto out;
3494 }
3495 if (tx->processor != processor) {
3496 hr = D3DERR_INVALIDCALL;
3497 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3498 goto out;
3499 }
3500 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3501 tx->version.major, tx->version.minor);
3502
3503 tx->ureg = ureg_create(processor);
3504 if (!tx->ureg) {
3505 hr = E_OUTOFMEMORY;
3506 goto out;
3507 }
3508
3509 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3510 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3511 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3512 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3513 tx->texcoord_sn = tx->want_texcoord ?
3514 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3515 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3516 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3517
3518 if (IS_VS) {
3519 tx->num_constf_allowed = NINE_MAX_CONST_F;
3520 } else if (tx->version.major < 2) {/* IS_PS v1 */
3521 tx->num_constf_allowed = 8;
3522 } else if (tx->version.major == 2) {/* IS_PS v2 */
3523 tx->num_constf_allowed = 32;
3524 } else {/* IS_PS v3 */
3525 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3526 }
3527
3528 if (tx->version.major < 2) {
3529 tx->num_consti_allowed = 0;
3530 tx->num_constb_allowed = 0;
3531 } else {
3532 tx->num_consti_allowed = NINE_MAX_CONST_I;
3533 tx->num_constb_allowed = NINE_MAX_CONST_B;
3534 }
3535
3536 if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
3537 tx->num_constf_allowed = 8192;
3538 tx->num_consti_allowed = 2048;
3539 tx->num_constb_allowed = 2048;
3540 }
3541
3542 /* VS must always write position. Declare it here to make it the 1st output.
3543 * (Some drivers like nv50 are buggy and rely on that.)
3544 */
3545 if (IS_VS) {
3546 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3547 } else {
3548 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3549 if (!tx->shift_wpos)
3550 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3551 }
3552
3553 if (GET_CAP(TGSI_MUL_ZERO_WINS))
3554 ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3555
3556 while (!sm1_parse_eof(tx) && !tx->failure)
3557 sm1_parse_instruction(tx);
3558 tx->parse++; /* for byte_size */
3559
3560 if (tx->failure) {
3561 /* For VS shaders, we print the warning later,
3562 * we first try with swvp. */
3563 if (IS_PS)
3564 ERR("Encountered buggy shader\n");
3565 ureg_destroy(tx->ureg);
3566 hr = D3DERR_INVALIDCALL;
3567 goto out;
3568 }
3569
3570 if (IS_PS && tx->version.major < 3) {
3571 if (tx->version.major < 2) {
3572 assert(tx->num_temp); /* there must be color output */
3573 info->rt_mask |= 0x1;
3574 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3575 } else {
3576 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3577 }
3578 }
3579
3580 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3581 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3582 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3583 }
3584
3585 if (info->position_t)
3586 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3587
3588 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3589 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3590 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3591 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3592 info->point_size = TRUE;
3593 }
3594
3595 if (info->process_vertices)
3596 shader_add_vs_viewport_transform(tx);
3597
3598 ureg_END(tx->ureg);
3599
3600 /* record local constants */
3601 if (tx->num_lconstf && tx->indirect_const_access) {
3602 struct nine_range *ranges;
3603 float *data;
3604 int *indices;
3605 unsigned i, k, n;
3606
3607 hr = E_OUTOFMEMORY;
3608
3609 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3610 if (!data)
3611 goto out;
3612 info->lconstf.data = data;
3613
3614 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3615 if (!indices)
3616 goto out;
3617
3618 /* lazy sort, num_lconstf should be small */
3619 for (n = 0; n < tx->num_lconstf; ++n) {
3620 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3621 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3622 k = i;
3623 }
3624 indices[n] = tx->lconstf[k].idx;
3625 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
3626 tx->lconstf[k].idx = INT_MAX;
3627 }
3628
3629 /* count ranges */
3630 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3631 if (indices[i] != indices[i - 1] + 1)
3632 ++n;
3633 ranges = MALLOC(n * sizeof(ranges[0]));
3634 if (!ranges) {
3635 FREE(indices);
3636 goto out;
3637 }
3638 info->lconstf.ranges = ranges;
3639
3640 k = 0;
3641 ranges[k].bgn = indices[0];
3642 for (i = 1; i < tx->num_lconstf; ++i) {
3643 if (indices[i] != indices[i - 1] + 1) {
3644 ranges[k].next = &ranges[k + 1];
3645 ranges[k].end = indices[i - 1] + 1;
3646 ++k;
3647 ranges[k].bgn = indices[i];
3648 }
3649 }
3650 ranges[k].end = indices[i - 1] + 1;
3651 ranges[k].next = NULL;
3652 assert(n == (k + 1));
3653
3654 FREE(indices);
3655 hr = D3D_OK;
3656 }
3657
3658 /* r500 */
3659 if (info->const_float_slots > device->max_vs_const_f &&
3660 (info->const_int_slots || info->const_bool_slots) &&
3661 (!IS_VS || !info->swvp_on))
3662 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3663
3664
3665 if (tx->indirect_const_access) /* vs only */
3666 info->const_float_slots = device->max_vs_const_f;
3667
3668 if (!IS_VS || !info->swvp_on) {
3669 unsigned s, slot_max;
3670 unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3671
3672 slot_max = info->const_bool_slots > 0 ?
3673 max_const_f + NINE_MAX_CONST_I
3674 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3675 info->const_int_slots > 0 ?
3676 max_const_f + info->const_int_slots :
3677 info->const_float_slots;
3678
3679 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3680
3681 for (s = 0; s < slot_max; s++)
3682 ureg_DECL_constant(tx->ureg, s);
3683 } else {
3684 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
3685 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
3686 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
3687 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
3688 }
3689
3690 if (info->process_vertices)
3691 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
3692
3693 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3694 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
3695 tgsi_dump(toks, 0);
3696 ureg_free_tokens(toks);
3697 }
3698
3699 if (info->process_vertices) {
3700 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
3701 tx->output_info,
3702 tx->num_outputs,
3703 &(info->so));
3704 info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
3705 } else
3706 info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
3707 if (!info->cso) {
3708 hr = D3DERR_DRIVERINTERNALERROR;
3709 FREE(info->lconstf.data);
3710 FREE(info->lconstf.ranges);
3711 goto out;
3712 }
3713
3714 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3715 out:
3716 tx_dtor(tx);
3717 return hr;
3718 }