st/nine: Access pipe_context via NineDevice9_GetPipe
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37
38 #define DBG_CHANNEL DBG_SHADER
39
40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
41
42
43 struct shader_translator;
44
45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
46
47 static inline const char *d3dsio_to_string(unsigned opcode);
48
49
50 #define NINED3D_SM1_VS 0xfffe
51 #define NINED3D_SM1_PS 0xffff
52
53 #define NINE_MAX_COND_DEPTH 64
54 #define NINE_MAX_LOOP_DEPTH 64
55
56 #define NINED3DSP_END 0x0000ffff
57
58 #define NINED3DSPTYPE_FLOAT4 0
59 #define NINED3DSPTYPE_INT4 1
60 #define NINED3DSPTYPE_BOOL 2
61
62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63
64 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
65 #define NINED3DSP_WRITEMASK_SHIFT 16
66
67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68
69 #define NINED3DSHADER_REL_OP_GT 1
70 #define NINED3DSHADER_REL_OP_EQ 2
71 #define NINED3DSHADER_REL_OP_GE 3
72 #define NINED3DSHADER_REL_OP_LT 4
73 #define NINED3DSHADER_REL_OP_NE 5
74 #define NINED3DSHADER_REL_OP_LE 6
75
76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78
79 #define NINED3DSI_TEXLD_PROJECT 0x1
80 #define NINED3DSI_TEXLD_BIAS 0x2
81
82 #define NINED3DSP_WRITEMASK_0 0x1
83 #define NINED3DSP_WRITEMASK_1 0x2
84 #define NINED3DSP_WRITEMASK_2 0x4
85 #define NINED3DSP_WRITEMASK_3 0x8
86 #define NINED3DSP_WRITEMASK_ALL 0xf
87
88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89
90 #define NINE_SWIZZLE4(x,y,z,w) \
91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92
93 #define NINE_CONSTANT_SRC(index) \
94 ureg_src_register(TGSI_FILE_CONSTANT, index)
95
96 #define NINE_APPLY_SWIZZLE(src, s) \
97 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
98
99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
100 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
101
102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
105
106 /*
107 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
108 * BIAS <= PS 1.4 (x-0.5)
109 * BIASNEG <= PS 1.4 (-(x-0.5))
110 * SIGN <= PS 1.4 (2(x-0.5))
111 * SIGNNEG <= PS 1.4 (-2(x-0.5))
112 * COMP <= PS 1.4 (1-x)
113 * X2 = PS 1.4 (2x)
114 * X2NEG = PS 1.4 (-2x)
115 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
116 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
117 * ABS >= SM 3.0 (abs(x))
118 * ABSNEG >= SM 3.0 (-abs(x))
119 * NOT >= SM 2.0 pedication only
120 */
121 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
134 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
135
136 static const char *sm1_mod_str[] =
137 {
138 [NINED3DSPSM_NONE] = "",
139 [NINED3DSPSM_NEG] = "-",
140 [NINED3DSPSM_BIAS] = "bias",
141 [NINED3DSPSM_BIASNEG] = "biasneg",
142 [NINED3DSPSM_SIGN] = "sign",
143 [NINED3DSPSM_SIGNNEG] = "signneg",
144 [NINED3DSPSM_COMP] = "comp",
145 [NINED3DSPSM_X2] = "x2",
146 [NINED3DSPSM_X2NEG] = "x2neg",
147 [NINED3DSPSM_DZ] = "dz",
148 [NINED3DSPSM_DW] = "dw",
149 [NINED3DSPSM_ABS] = "abs",
150 [NINED3DSPSM_ABSNEG] = "-abs",
151 [NINED3DSPSM_NOT] = "not"
152 };
153
154 static void
155 sm1_dump_writemask(BYTE mask)
156 {
157 if (mask & 1) DUMP("x"); else DUMP("_");
158 if (mask & 2) DUMP("y"); else DUMP("_");
159 if (mask & 4) DUMP("z"); else DUMP("_");
160 if (mask & 8) DUMP("w"); else DUMP("_");
161 }
162
163 static void
164 sm1_dump_swizzle(BYTE s)
165 {
166 char c[4] = { 'x', 'y', 'z', 'w' };
167 DUMP("%c%c%c%c",
168 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
169 }
170
171 static const char sm1_file_char[] =
172 {
173 [D3DSPR_TEMP] = 'r',
174 [D3DSPR_INPUT] = 'v',
175 [D3DSPR_CONST] = 'c',
176 [D3DSPR_ADDR] = 'A',
177 [D3DSPR_RASTOUT] = 'R',
178 [D3DSPR_ATTROUT] = 'D',
179 [D3DSPR_OUTPUT] = 'o',
180 [D3DSPR_CONSTINT] = 'I',
181 [D3DSPR_COLOROUT] = 'C',
182 [D3DSPR_DEPTHOUT] = 'D',
183 [D3DSPR_SAMPLER] = 's',
184 [D3DSPR_CONST2] = 'c',
185 [D3DSPR_CONST3] = 'c',
186 [D3DSPR_CONST4] = 'c',
187 [D3DSPR_CONSTBOOL] = 'B',
188 [D3DSPR_LOOP] = 'L',
189 [D3DSPR_TEMPFLOAT16] = 'h',
190 [D3DSPR_MISCTYPE] = 'M',
191 [D3DSPR_LABEL] = 'X',
192 [D3DSPR_PREDICATE] = 'p'
193 };
194
195 static void
196 sm1_dump_reg(BYTE file, INT index)
197 {
198 switch (file) {
199 case D3DSPR_LOOP:
200 DUMP("aL");
201 break;
202 case D3DSPR_COLOROUT:
203 DUMP("oC%i", index);
204 break;
205 case D3DSPR_DEPTHOUT:
206 DUMP("oDepth");
207 break;
208 case D3DSPR_RASTOUT:
209 DUMP("oRast%i", index);
210 break;
211 case D3DSPR_CONSTINT:
212 DUMP("iconst[%i]", index);
213 break;
214 case D3DSPR_CONSTBOOL:
215 DUMP("bconst[%i]", index);
216 break;
217 default:
218 DUMP("%c%i", sm1_file_char[file], index);
219 break;
220 }
221 }
222
223 struct sm1_src_param
224 {
225 INT idx;
226 struct sm1_src_param *rel;
227 BYTE file;
228 BYTE swizzle;
229 BYTE mod;
230 BYTE type;
231 union {
232 DWORD d[4];
233 float f[4];
234 int i[4];
235 BOOL b;
236 } imm;
237 };
238 static void
239 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
240
241 struct sm1_dst_param
242 {
243 INT idx;
244 struct sm1_src_param *rel;
245 BYTE file;
246 BYTE mask;
247 BYTE mod;
248 int8_t shift; /* sint4 */
249 BYTE type;
250 };
251
252 static inline void
253 assert_replicate_swizzle(const struct ureg_src *reg)
254 {
255 assert(reg->SwizzleY == reg->SwizzleX &&
256 reg->SwizzleZ == reg->SwizzleX &&
257 reg->SwizzleW == reg->SwizzleX);
258 }
259
260 static void
261 sm1_dump_immediate(const struct sm1_src_param *param)
262 {
263 switch (param->type) {
264 case NINED3DSPTYPE_FLOAT4:
265 DUMP("{ %f %f %f %f }",
266 param->imm.f[0], param->imm.f[1],
267 param->imm.f[2], param->imm.f[3]);
268 break;
269 case NINED3DSPTYPE_INT4:
270 DUMP("{ %i %i %i %i }",
271 param->imm.i[0], param->imm.i[1],
272 param->imm.i[2], param->imm.i[3]);
273 break;
274 case NINED3DSPTYPE_BOOL:
275 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
276 break;
277 default:
278 assert(0);
279 break;
280 }
281 }
282
283 static void
284 sm1_dump_src_param(const struct sm1_src_param *param)
285 {
286 if (param->file == NINED3DSPR_IMMEDIATE) {
287 assert(!param->mod &&
288 !param->rel &&
289 param->swizzle == NINED3DSP_NOSWIZZLE);
290 sm1_dump_immediate(param);
291 return;
292 }
293
294 if (param->mod)
295 DUMP("%s(", sm1_mod_str[param->mod]);
296 if (param->rel) {
297 DUMP("%c[", sm1_file_char[param->file]);
298 sm1_dump_src_param(param->rel);
299 DUMP("+%i]", param->idx);
300 } else {
301 sm1_dump_reg(param->file, param->idx);
302 }
303 if (param->mod)
304 DUMP(")");
305 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
306 DUMP(".");
307 sm1_dump_swizzle(param->swizzle);
308 }
309 }
310
311 static void
312 sm1_dump_dst_param(const struct sm1_dst_param *param)
313 {
314 if (param->mod & NINED3DSPDM_SATURATE)
315 DUMP("sat ");
316 if (param->mod & NINED3DSPDM_PARTIALP)
317 DUMP("pp ");
318 if (param->mod & NINED3DSPDM_CENTROID)
319 DUMP("centroid ");
320 if (param->shift < 0)
321 DUMP("/%u ", 1 << -param->shift);
322 if (param->shift > 0)
323 DUMP("*%u ", 1 << param->shift);
324
325 if (param->rel) {
326 DUMP("%c[", sm1_file_char[param->file]);
327 sm1_dump_src_param(param->rel);
328 DUMP("+%i]", param->idx);
329 } else {
330 sm1_dump_reg(param->file, param->idx);
331 }
332 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
333 DUMP(".");
334 sm1_dump_writemask(param->mask);
335 }
336 }
337
338 struct sm1_semantic
339 {
340 struct sm1_dst_param reg;
341 BYTE sampler_type;
342 D3DDECLUSAGE usage;
343 BYTE usage_idx;
344 };
345
346 struct sm1_op_info
347 {
348 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
349 * should be ignored completely */
350 unsigned sio;
351 unsigned opcode; /* TGSI_OPCODE_x */
352
353 /* versions are still set even handler is set */
354 struct {
355 unsigned min;
356 unsigned max;
357 } vert_version, frag_version;
358
359 /* number of regs parsed outside of special handler */
360 unsigned ndst;
361 unsigned nsrc;
362
363 /* some instructions don't map perfectly, so use a special handler */
364 translate_instruction_func handler;
365 };
366
367 struct sm1_instruction
368 {
369 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
370 BYTE flags;
371 BOOL coissue;
372 BOOL predicated;
373 BYTE ndst;
374 BYTE nsrc;
375 struct sm1_src_param src[4];
376 struct sm1_src_param src_rel[4];
377 struct sm1_src_param pred;
378 struct sm1_src_param dst_rel[1];
379 struct sm1_dst_param dst[1];
380
381 struct sm1_op_info *info;
382 };
383
384 static void
385 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
386 {
387 unsigned i;
388
389 /* no info stored for these: */
390 if (insn->opcode == D3DSIO_DCL)
391 return;
392 for (i = 0; i < indent; ++i)
393 DUMP(" ");
394
395 if (insn->predicated) {
396 DUMP("@");
397 sm1_dump_src_param(&insn->pred);
398 DUMP(" ");
399 }
400 DUMP("%s", d3dsio_to_string(insn->opcode));
401 if (insn->flags) {
402 switch (insn->opcode) {
403 case D3DSIO_TEX:
404 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
405 break;
406 default:
407 DUMP("_%x", insn->flags);
408 break;
409 }
410 }
411 if (insn->coissue)
412 DUMP("_co");
413 DUMP(" ");
414
415 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
416 sm1_dump_dst_param(&insn->dst[i]);
417 DUMP(" ");
418 }
419
420 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
421 sm1_dump_src_param(&insn->src[i]);
422 DUMP(" ");
423 }
424 if (insn->opcode == D3DSIO_DEF ||
425 insn->opcode == D3DSIO_DEFI ||
426 insn->opcode == D3DSIO_DEFB)
427 sm1_dump_immediate(&insn->src[0]);
428
429 DUMP("\n");
430 }
431
432 struct sm1_local_const
433 {
434 INT idx;
435 struct ureg_src reg;
436 float f[4]; /* for indirect addressing of float constants */
437 };
438
439 struct shader_translator
440 {
441 const DWORD *byte_code;
442 const DWORD *parse;
443 const DWORD *parse_next;
444
445 struct ureg_program *ureg;
446
447 /* shader version */
448 struct {
449 BYTE major;
450 BYTE minor;
451 } version;
452 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
453 unsigned num_constf_allowed;
454 unsigned num_consti_allowed;
455 unsigned num_constb_allowed;
456
457 boolean native_integers;
458 boolean inline_subroutines;
459 boolean lower_preds;
460 boolean want_texcoord;
461 boolean shift_wpos;
462 boolean wpos_is_sysval;
463 boolean face_is_sysval_integer;
464 unsigned texcoord_sn;
465
466 struct sm1_instruction insn; /* current instruction */
467
468 struct {
469 struct ureg_dst *r;
470 struct ureg_dst oPos;
471 struct ureg_dst oPos_out; /* the real output when doing streamout */
472 struct ureg_dst oFog;
473 struct ureg_dst oPts;
474 struct ureg_dst oCol[4];
475 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
476 struct ureg_dst oDepth;
477 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
478 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
479 struct ureg_src vPos;
480 struct ureg_src vFace;
481 struct ureg_src s;
482 struct ureg_dst p;
483 struct ureg_dst address;
484 struct ureg_dst a0;
485 struct ureg_dst tS[8]; /* texture stage registers */
486 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
487 struct ureg_dst t[5]; /* scratch TEMPs */
488 struct ureg_src vC[2]; /* PS color in */
489 struct ureg_src vT[8]; /* PS texcoord in */
490 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
491 } regs;
492 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
493 unsigned num_scratch;
494 unsigned loop_depth;
495 unsigned loop_depth_max;
496 unsigned cond_depth;
497 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
498 unsigned cond_labels[NINE_MAX_COND_DEPTH];
499 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
500
501 unsigned *inst_labels; /* LABEL op */
502 unsigned num_inst_labels;
503
504 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
505
506 struct sm1_local_const *lconstf;
507 unsigned num_lconstf;
508 struct sm1_local_const *lconsti;
509 unsigned num_lconsti;
510 struct sm1_local_const *lconstb;
511 unsigned num_lconstb;
512
513 boolean indirect_const_access;
514 boolean failure;
515
516 struct nine_vs_output_info output_info[16];
517 int num_outputs;
518
519 struct nine_shader_info *info;
520
521 int16_t op_info_map[D3DSIO_BREAKP + 1];
522 };
523
524 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
525 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
526
527 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
528
529 static void
530 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
531
532 static void
533 sm1_instruction_check(const struct sm1_instruction *insn)
534 {
535 if (insn->opcode == D3DSIO_CRS)
536 {
537 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
538 {
539 DBG("CRS.mask.w\n");
540 }
541 }
542 }
543
544 static void
545 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
546 int mask, int output_index)
547 {
548 tx->output_info[tx->num_outputs].output_semantic = Usage;
549 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
550 tx->output_info[tx->num_outputs].mask = mask;
551 tx->output_info[tx->num_outputs].output_index = output_index;
552 tx->num_outputs++;
553 }
554
555 static boolean
556 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
557 {
558 INT i;
559
560 if (index < 0 || index >= tx->num_constf_allowed) {
561 tx->failure = TRUE;
562 return FALSE;
563 }
564 for (i = 0; i < tx->num_lconstf; ++i) {
565 if (tx->lconstf[i].idx == index) {
566 *src = tx->lconstf[i].reg;
567 return TRUE;
568 }
569 }
570 return FALSE;
571 }
572 static boolean
573 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
574 {
575 int i;
576
577 if (index < 0 || index >= tx->num_consti_allowed) {
578 tx->failure = TRUE;
579 return FALSE;
580 }
581 for (i = 0; i < tx->num_lconsti; ++i) {
582 if (tx->lconsti[i].idx == index) {
583 *src = tx->lconsti[i].reg;
584 return TRUE;
585 }
586 }
587 return FALSE;
588 }
589 static boolean
590 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
591 {
592 int i;
593
594 if (index < 0 || index >= tx->num_constb_allowed) {
595 tx->failure = TRUE;
596 return FALSE;
597 }
598 for (i = 0; i < tx->num_lconstb; ++i) {
599 if (tx->lconstb[i].idx == index) {
600 *src = tx->lconstb[i].reg;
601 return TRUE;
602 }
603 }
604 return FALSE;
605 }
606
607 static void
608 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
609 {
610 unsigned n;
611
612 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
613
614 for (n = 0; n < tx->num_lconstf; ++n)
615 if (tx->lconstf[n].idx == index)
616 break;
617 if (n == tx->num_lconstf) {
618 if ((n % 8) == 0) {
619 tx->lconstf = REALLOC(tx->lconstf,
620 (n + 0) * sizeof(tx->lconstf[0]),
621 (n + 8) * sizeof(tx->lconstf[0]));
622 assert(tx->lconstf);
623 }
624 tx->num_lconstf++;
625 }
626 tx->lconstf[n].idx = index;
627 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
628
629 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
630 }
631 static void
632 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
633 {
634 unsigned n;
635
636 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
637
638 for (n = 0; n < tx->num_lconsti; ++n)
639 if (tx->lconsti[n].idx == index)
640 break;
641 if (n == tx->num_lconsti) {
642 if ((n % 8) == 0) {
643 tx->lconsti = REALLOC(tx->lconsti,
644 (n + 0) * sizeof(tx->lconsti[0]),
645 (n + 8) * sizeof(tx->lconsti[0]));
646 assert(tx->lconsti);
647 }
648 tx->num_lconsti++;
649 }
650
651 tx->lconsti[n].idx = index;
652 tx->lconsti[n].reg = tx->native_integers ?
653 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
654 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
655 }
656 static void
657 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
658 {
659 unsigned n;
660
661 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
662
663 for (n = 0; n < tx->num_lconstb; ++n)
664 if (tx->lconstb[n].idx == index)
665 break;
666 if (n == tx->num_lconstb) {
667 if ((n % 8) == 0) {
668 tx->lconstb = REALLOC(tx->lconstb,
669 (n + 0) * sizeof(tx->lconstb[0]),
670 (n + 8) * sizeof(tx->lconstb[0]));
671 assert(tx->lconstb);
672 }
673 tx->num_lconstb++;
674 }
675
676 tx->lconstb[n].idx = index;
677 tx->lconstb[n].reg = tx->native_integers ?
678 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
679 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
680 }
681
682 static inline struct ureg_dst
683 tx_scratch(struct shader_translator *tx)
684 {
685 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
686 tx->failure = TRUE;
687 return tx->regs.t[0];
688 }
689 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
690 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
691 return tx->regs.t[tx->num_scratch++];
692 }
693
694 static inline struct ureg_dst
695 tx_scratch_scalar(struct shader_translator *tx)
696 {
697 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
698 }
699
700 static inline struct ureg_src
701 tx_src_scalar(struct ureg_dst dst)
702 {
703 struct ureg_src src = ureg_src(dst);
704 int c = ffs(dst.WriteMask) - 1;
705 if (dst.WriteMask == (1 << c))
706 src = ureg_scalar(src, c);
707 return src;
708 }
709
710 static inline void
711 tx_temp_alloc(struct shader_translator *tx, INT idx)
712 {
713 assert(idx >= 0);
714 if (idx >= tx->num_temp) {
715 unsigned k = tx->num_temp;
716 unsigned n = idx + 1;
717 tx->regs.r = REALLOC(tx->regs.r,
718 k * sizeof(tx->regs.r[0]),
719 n * sizeof(tx->regs.r[0]));
720 for (; k < n; ++k)
721 tx->regs.r[k] = ureg_dst_undef();
722 tx->num_temp = n;
723 }
724 if (ureg_dst_is_undef(tx->regs.r[idx]))
725 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
726 }
727
728 static inline void
729 tx_addr_alloc(struct shader_translator *tx, INT idx)
730 {
731 assert(idx == 0);
732 if (ureg_dst_is_undef(tx->regs.address))
733 tx->regs.address = ureg_DECL_address(tx->ureg);
734 if (ureg_dst_is_undef(tx->regs.a0))
735 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
736 }
737
738 static inline void
739 tx_pred_alloc(struct shader_translator *tx, INT idx)
740 {
741 assert(idx == 0);
742 if (ureg_dst_is_undef(tx->regs.p))
743 tx->regs.p = ureg_DECL_predicate(tx->ureg);
744 }
745
746 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
747 * the projection should be applied on the texture. It doesn't
748 * apply on texkill.
749 * The doc is very imprecise here (it says the projection is done
750 * before rasterization, thus in vs, which seems wrong since ps instructions
751 * are affected differently)
752 * For now we only apply to the ps TEX instruction and TEXBEM.
753 * Perhaps some other instructions would need it */
754 static inline void
755 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
756 struct ureg_src src, INT idx)
757 {
758 struct ureg_dst tmp;
759 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
760
761 /* no projection */
762 if (dim == 1) {
763 ureg_MOV(tx->ureg, dst, src);
764 } else {
765 tmp = tx_scratch_scalar(tx);
766 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
767 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
768 }
769 }
770
771 static inline void
772 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
773 unsigned target, struct ureg_src src0,
774 struct ureg_src src1, INT idx)
775 {
776 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
777 struct ureg_dst tmp;
778
779 /* dim == 1: no projection
780 * Looks like must be disabled when it makes no
781 * sense according the texture dimensions
782 */
783 if (dim == 1 || dim <= target) {
784 ureg_TEX(tx->ureg, dst, target, src0, src1);
785 } else if (dim == 4) {
786 ureg_TXP(tx->ureg, dst, target, src0, src1);
787 } else {
788 tmp = tx_scratch(tx);
789 apply_ps1x_projection(tx, tmp, src0, idx);
790 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
791 }
792 }
793
794 static inline void
795 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
796 {
797 assert(IS_PS);
798 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
799 if (ureg_src_is_undef(tx->regs.vT[idx]))
800 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
801 TGSI_INTERPOLATE_PERSPECTIVE);
802 }
803
804 static inline unsigned *
805 tx_bgnloop(struct shader_translator *tx)
806 {
807 tx->loop_depth++;
808 if (tx->loop_depth_max < tx->loop_depth)
809 tx->loop_depth_max = tx->loop_depth;
810 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
811 return &tx->loop_labels[tx->loop_depth - 1];
812 }
813
814 static inline unsigned *
815 tx_endloop(struct shader_translator *tx)
816 {
817 assert(tx->loop_depth);
818 tx->loop_depth--;
819 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
820 ureg_get_instruction_number(tx->ureg));
821 return &tx->loop_labels[tx->loop_depth];
822 }
823
824 static struct ureg_dst
825 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
826 {
827 const unsigned l = tx->loop_depth - 1;
828
829 if (!tx->loop_depth)
830 {
831 DBG("loop counter requested outside of loop\n");
832 return ureg_dst_undef();
833 }
834
835 if (ureg_dst_is_undef(tx->regs.rL[l])) {
836 /* loop or rep ctr creation */
837 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
838 tx->loop_or_rep[l] = loop_or_rep;
839 }
840 /* loop - rep - endloop - endrep not allowed */
841 assert(tx->loop_or_rep[l] == loop_or_rep);
842
843 return tx->regs.rL[l];
844 }
845
846 static struct ureg_src
847 tx_get_loopal(struct shader_translator *tx)
848 {
849 int loop_level = tx->loop_depth - 1;
850
851 while (loop_level >= 0) {
852 /* handle loop - rep - endrep - endloop case */
853 if (tx->loop_or_rep[loop_level])
854 /* the value is in the loop counter y component (nine implementation) */
855 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
856 loop_level--;
857 }
858
859 DBG("aL counter requested outside of loop\n");
860 return ureg_src_undef();
861 }
862
863 static inline unsigned *
864 tx_cond(struct shader_translator *tx)
865 {
866 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
867 tx->cond_depth++;
868 return &tx->cond_labels[tx->cond_depth - 1];
869 }
870
871 static inline unsigned *
872 tx_elsecond(struct shader_translator *tx)
873 {
874 assert(tx->cond_depth);
875 return &tx->cond_labels[tx->cond_depth - 1];
876 }
877
878 static inline void
879 tx_endcond(struct shader_translator *tx)
880 {
881 assert(tx->cond_depth);
882 tx->cond_depth--;
883 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
884 ureg_get_instruction_number(tx->ureg));
885 }
886
887 static inline struct ureg_dst
888 nine_ureg_dst_register(unsigned file, int index)
889 {
890 return ureg_dst(ureg_src_register(file, index));
891 }
892
893 static inline struct ureg_src
894 nine_get_position_input(struct shader_translator *tx)
895 {
896 struct ureg_program *ureg = tx->ureg;
897
898 if (tx->wpos_is_sysval)
899 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
900 else
901 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
902 0, TGSI_INTERPOLATE_LINEAR);
903 }
904
905 static struct ureg_src
906 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
907 {
908 struct ureg_program *ureg = tx->ureg;
909 struct ureg_src src;
910 struct ureg_dst tmp;
911
912 switch (param->file)
913 {
914 case D3DSPR_TEMP:
915 assert(!param->rel);
916 tx_temp_alloc(tx, param->idx);
917 src = ureg_src(tx->regs.r[param->idx]);
918 break;
919 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
920 case D3DSPR_ADDR:
921 assert(!param->rel);
922 if (IS_VS) {
923 assert(param->idx == 0);
924 /* the address register (vs only) must be
925 * assigned before use */
926 assert(!ureg_dst_is_undef(tx->regs.a0));
927 /* Round to lowest for vs1.1 (contrary to the doc), else
928 * round to nearest */
929 if (tx->version.major < 2 && tx->version.minor < 2)
930 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
931 else
932 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
933 src = ureg_src(tx->regs.address);
934 } else {
935 if (tx->version.major < 2 && tx->version.minor < 4) {
936 /* no subroutines, so should be defined */
937 src = ureg_src(tx->regs.tS[param->idx]);
938 } else {
939 tx_texcoord_alloc(tx, param->idx);
940 src = tx->regs.vT[param->idx];
941 }
942 }
943 break;
944 case D3DSPR_INPUT:
945 if (IS_VS) {
946 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
947 } else {
948 if (tx->version.major < 3) {
949 assert(!param->rel);
950 src = ureg_DECL_fs_input_cyl_centroid(
951 ureg, TGSI_SEMANTIC_COLOR, param->idx,
952 TGSI_INTERPOLATE_COLOR, 0,
953 tx->info->force_color_in_centroid ?
954 TGSI_INTERPOLATE_LOC_CENTROID : 0,
955 0, 1);
956 } else {
957 if(param->rel) {
958 /* Copy all inputs (non consecutive)
959 * to temp array (consecutive).
960 * This is not good for performance.
961 * A better way would be to have inputs
962 * consecutive (would need implement alternative
963 * way to match vs outputs and ps inputs).
964 * However even with the better way, the temp array
965 * copy would need to be used if some inputs
966 * are not GENERIC or if they have different
967 * interpolation flag. */
968 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
969 int i;
970 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
971 for (i = 0; i < 10; i++) {
972 if (!ureg_src_is_undef(tx->regs.v[i]))
973 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
974 else
975 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
976 }
977 }
978 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
979 } else {
980 assert(param->idx < ARRAY_SIZE(tx->regs.v));
981 src = tx->regs.v[param->idx];
982 }
983 }
984 }
985 break;
986 case D3DSPR_PREDICATE:
987 assert(!param->rel);
988 tx_pred_alloc(tx, param->idx);
989 src = ureg_src(tx->regs.p);
990 break;
991 case D3DSPR_SAMPLER:
992 assert(param->mod == NINED3DSPSM_NONE);
993 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
994 assert(!param->rel);
995 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
996 break;
997 case D3DSPR_CONST:
998 assert(!param->rel || IS_VS);
999 if (param->rel)
1000 tx->indirect_const_access = TRUE;
1001 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1002 if (!param->rel)
1003 nine_info_mark_const_f_used(tx->info, param->idx);
1004 /* vswp constant handling: we use two buffers
1005 * to fit all the float constants. The special handling
1006 * doesn't need to be elsewhere, because all the instructions
1007 * accessing the constants directly are VS1, and swvp
1008 * is VS >= 2 */
1009 if (IS_VS && tx->info->swvp_on) {
1010 if (!param->rel) {
1011 if (param->idx < 4096) {
1012 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1013 src = ureg_src_dimension(src, 0);
1014 } else {
1015 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
1016 src = ureg_src_dimension(src, 1);
1017 }
1018 } else {
1019 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
1020 src = ureg_src_dimension(src, 0);
1021 }
1022 } else
1023 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1024 }
1025 if (!IS_VS && tx->version.major < 2) {
1026 /* ps 1.X clamps constants */
1027 tmp = tx_scratch(tx);
1028 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1029 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1030 src = ureg_src(tmp);
1031 }
1032 break;
1033 case D3DSPR_CONST2:
1034 case D3DSPR_CONST3:
1035 case D3DSPR_CONST4:
1036 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1037 assert(!"CONST2/3/4");
1038 src = ureg_imm1f(ureg, 0.0f);
1039 break;
1040 case D3DSPR_CONSTINT:
1041 /* relative adressing only possible for float constants in vs */
1042 assert(!param->rel);
1043 if (!tx_lconsti(tx, &src, param->idx)) {
1044 nine_info_mark_const_i_used(tx->info, param->idx);
1045 if (IS_VS && tx->info->swvp_on) {
1046 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1047 src = ureg_src_dimension(src, 2);
1048 } else
1049 src = ureg_src_register(TGSI_FILE_CONSTANT,
1050 tx->info->const_i_base + param->idx);
1051 }
1052 break;
1053 case D3DSPR_CONSTBOOL:
1054 assert(!param->rel);
1055 if (!tx_lconstb(tx, &src, param->idx)) {
1056 char r = param->idx / 4;
1057 char s = param->idx & 3;
1058 nine_info_mark_const_b_used(tx->info, param->idx);
1059 if (IS_VS && tx->info->swvp_on) {
1060 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
1061 src = ureg_src_dimension(src, 3);
1062 } else
1063 src = ureg_src_register(TGSI_FILE_CONSTANT,
1064 tx->info->const_b_base + r);
1065 src = ureg_swizzle(src, s, s, s, s);
1066 }
1067 break;
1068 case D3DSPR_LOOP:
1069 if (ureg_dst_is_undef(tx->regs.address))
1070 tx->regs.address = ureg_DECL_address(ureg);
1071 if (!tx->native_integers)
1072 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1073 else
1074 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1075 src = ureg_src(tx->regs.address);
1076 break;
1077 case D3DSPR_MISCTYPE:
1078 switch (param->idx) {
1079 case D3DSMO_POSITION:
1080 if (ureg_src_is_undef(tx->regs.vPos))
1081 tx->regs.vPos = nine_get_position_input(tx);
1082 if (tx->shift_wpos) {
1083 /* TODO: do this only once */
1084 struct ureg_dst wpos = tx_scratch(tx);
1085 ureg_SUB(ureg, wpos, tx->regs.vPos,
1086 ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
1087 src = ureg_src(wpos);
1088 } else {
1089 src = tx->regs.vPos;
1090 }
1091 break;
1092 case D3DSMO_FACE:
1093 if (ureg_src_is_undef(tx->regs.vFace)) {
1094 if (tx->face_is_sysval_integer) {
1095 tmp = tx_scratch(tx);
1096 tx->regs.vFace =
1097 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1098
1099 /* convert bool to float */
1100 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1101 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1102 tx->regs.vFace = ureg_src(tmp);
1103 } else {
1104 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1105 TGSI_SEMANTIC_FACE, 0,
1106 TGSI_INTERPOLATE_CONSTANT);
1107 }
1108 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1109 }
1110 src = tx->regs.vFace;
1111 break;
1112 default:
1113 assert(!"invalid src D3DSMO");
1114 break;
1115 }
1116 assert(!param->rel);
1117 break;
1118 case D3DSPR_TEMPFLOAT16:
1119 break;
1120 default:
1121 assert(!"invalid src D3DSPR");
1122 }
1123 if (param->rel)
1124 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1125
1126 switch (param->mod) {
1127 case NINED3DSPSM_DW:
1128 tmp = tx_scratch(tx);
1129 /* NOTE: app is not allowed to read w with this modifier */
1130 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1131 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1132 src = ureg_src(tmp);
1133 break;
1134 case NINED3DSPSM_DZ:
1135 tmp = tx_scratch(tx);
1136 /* NOTE: app is not allowed to read z with this modifier */
1137 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1138 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1139 src = ureg_src(tmp);
1140 break;
1141 default:
1142 break;
1143 }
1144
1145 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1146 src = ureg_swizzle(src,
1147 (param->swizzle >> 0) & 0x3,
1148 (param->swizzle >> 2) & 0x3,
1149 (param->swizzle >> 4) & 0x3,
1150 (param->swizzle >> 6) & 0x3);
1151
1152 switch (param->mod) {
1153 case NINED3DSPSM_ABS:
1154 src = ureg_abs(src);
1155 break;
1156 case NINED3DSPSM_ABSNEG:
1157 src = ureg_negate(ureg_abs(src));
1158 break;
1159 case NINED3DSPSM_NEG:
1160 src = ureg_negate(src);
1161 break;
1162 case NINED3DSPSM_BIAS:
1163 tmp = tx_scratch(tx);
1164 ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
1165 src = ureg_src(tmp);
1166 break;
1167 case NINED3DSPSM_BIASNEG:
1168 tmp = tx_scratch(tx);
1169 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
1170 src = ureg_src(tmp);
1171 break;
1172 case NINED3DSPSM_NOT:
1173 if (tx->native_integers) {
1174 tmp = tx_scratch(tx);
1175 ureg_NOT(ureg, tmp, src);
1176 src = ureg_src(tmp);
1177 break;
1178 }
1179 /* fall through */
1180 case NINED3DSPSM_COMP:
1181 tmp = tx_scratch(tx);
1182 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
1183 src = ureg_src(tmp);
1184 break;
1185 case NINED3DSPSM_DZ:
1186 case NINED3DSPSM_DW:
1187 /* Already handled*/
1188 break;
1189 case NINED3DSPSM_SIGN:
1190 tmp = tx_scratch(tx);
1191 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1192 src = ureg_src(tmp);
1193 break;
1194 case NINED3DSPSM_SIGNNEG:
1195 tmp = tx_scratch(tx);
1196 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1197 src = ureg_src(tmp);
1198 break;
1199 case NINED3DSPSM_X2:
1200 tmp = tx_scratch(tx);
1201 ureg_ADD(ureg, tmp, src, src);
1202 src = ureg_src(tmp);
1203 break;
1204 case NINED3DSPSM_X2NEG:
1205 tmp = tx_scratch(tx);
1206 ureg_ADD(ureg, tmp, src, src);
1207 src = ureg_negate(ureg_src(tmp));
1208 break;
1209 default:
1210 assert(param->mod == NINED3DSPSM_NONE);
1211 break;
1212 }
1213
1214 return src;
1215 }
1216
1217 static struct ureg_dst
1218 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1219 {
1220 struct ureg_dst dst;
1221
1222 switch (param->file)
1223 {
1224 case D3DSPR_TEMP:
1225 assert(!param->rel);
1226 tx_temp_alloc(tx, param->idx);
1227 dst = tx->regs.r[param->idx];
1228 break;
1229 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1230 case D3DSPR_ADDR:
1231 assert(!param->rel);
1232 if (tx->version.major < 2 && !IS_VS) {
1233 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1234 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1235 dst = tx->regs.tS[param->idx];
1236 } else
1237 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1238 tx_texcoord_alloc(tx, param->idx);
1239 dst = ureg_dst(tx->regs.vT[param->idx]);
1240 } else {
1241 tx_addr_alloc(tx, param->idx);
1242 dst = tx->regs.a0;
1243 }
1244 break;
1245 case D3DSPR_RASTOUT:
1246 assert(!param->rel);
1247 switch (param->idx) {
1248 case 0:
1249 if (ureg_dst_is_undef(tx->regs.oPos))
1250 tx->regs.oPos =
1251 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1252 dst = tx->regs.oPos;
1253 break;
1254 case 1:
1255 if (ureg_dst_is_undef(tx->regs.oFog))
1256 tx->regs.oFog =
1257 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1258 dst = tx->regs.oFog;
1259 break;
1260 case 2:
1261 if (ureg_dst_is_undef(tx->regs.oPts))
1262 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1263 dst = tx->regs.oPts;
1264 break;
1265 default:
1266 assert(0);
1267 break;
1268 }
1269 break;
1270 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1271 case D3DSPR_OUTPUT:
1272 if (tx->version.major < 3) {
1273 assert(!param->rel);
1274 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1275 } else {
1276 assert(!param->rel); /* TODO */
1277 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1278 dst = tx->regs.o[param->idx];
1279 }
1280 break;
1281 case D3DSPR_ATTROUT: /* VS */
1282 case D3DSPR_COLOROUT: /* PS */
1283 assert(param->idx >= 0 && param->idx < 4);
1284 assert(!param->rel);
1285 tx->info->rt_mask |= 1 << param->idx;
1286 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1287 /* ps < 3: oCol[0] will have fog blending afterward */
1288 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1289 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1290 } else {
1291 tx->regs.oCol[param->idx] =
1292 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1293 }
1294 }
1295 dst = tx->regs.oCol[param->idx];
1296 if (IS_VS && tx->version.major < 3)
1297 dst = ureg_saturate(dst);
1298 break;
1299 case D3DSPR_DEPTHOUT:
1300 assert(!param->rel);
1301 if (ureg_dst_is_undef(tx->regs.oDepth))
1302 tx->regs.oDepth =
1303 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1304 TGSI_WRITEMASK_Z, 0, 1);
1305 dst = tx->regs.oDepth; /* XXX: must write .z component */
1306 break;
1307 case D3DSPR_PREDICATE:
1308 assert(!param->rel);
1309 tx_pred_alloc(tx, param->idx);
1310 dst = tx->regs.p;
1311 break;
1312 case D3DSPR_TEMPFLOAT16:
1313 DBG("unhandled D3DSPR: %u\n", param->file);
1314 break;
1315 default:
1316 assert(!"invalid dst D3DSPR");
1317 break;
1318 }
1319 if (param->rel)
1320 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1321
1322 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1323 dst = ureg_writemask(dst, param->mask);
1324 if (param->mod & NINED3DSPDM_SATURATE)
1325 dst = ureg_saturate(dst);
1326
1327 return dst;
1328 }
1329
1330 static struct ureg_dst
1331 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1332 {
1333 if (param->shift) {
1334 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1335 return tx->regs.tdst;
1336 }
1337 return _tx_dst_param(tx, param);
1338 }
1339
1340 static void
1341 tx_apply_dst0_modifiers(struct shader_translator *tx)
1342 {
1343 struct ureg_dst rdst;
1344 float f;
1345
1346 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1347 return;
1348 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1349
1350 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1351
1352 if (tx->insn.dst[0].shift < 0)
1353 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1354 else
1355 f = 1 << tx->insn.dst[0].shift;
1356
1357 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1358 }
1359
1360 static struct ureg_src
1361 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1362 {
1363 struct ureg_src src;
1364
1365 assert(!param->shift);
1366 assert(!(param->mod & NINED3DSPDM_SATURATE));
1367
1368 switch (param->file) {
1369 case D3DSPR_INPUT:
1370 if (IS_VS) {
1371 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1372 } else {
1373 assert(!param->rel);
1374 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1375 src = tx->regs.v[param->idx];
1376 }
1377 break;
1378 default:
1379 src = ureg_src(tx_dst_param(tx, param));
1380 break;
1381 }
1382 if (param->rel)
1383 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1384
1385 if (!param->mask)
1386 WARN("mask is 0, using identity swizzle\n");
1387
1388 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1389 char s[4];
1390 int n;
1391 int c;
1392 for (n = 0, c = 0; c < 4; ++c)
1393 if (param->mask & (1 << c))
1394 s[n++] = c;
1395 assert(n);
1396 for (c = n; c < 4; ++c)
1397 s[c] = s[n - 1];
1398 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1399 }
1400 return src;
1401 }
1402
1403 static HRESULT
1404 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1405 {
1406 struct ureg_program *ureg = tx->ureg;
1407 struct ureg_dst dst;
1408 struct ureg_src src[2];
1409 struct sm1_src_param *src_mat = &tx->insn.src[1];
1410 unsigned i;
1411
1412 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1413 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1414
1415 for (i = 0; i < n; i++)
1416 {
1417 const unsigned m = (1 << i);
1418
1419 src[1] = tx_src_param(tx, src_mat);
1420 src_mat->idx++;
1421
1422 if (!(dst.WriteMask & m))
1423 continue;
1424
1425 /* XXX: src == dst case ? */
1426
1427 switch (k) {
1428 case 3:
1429 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1430 break;
1431 case 4:
1432 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1433 break;
1434 default:
1435 DBG("invalid operation: M%ux%u\n", m, n);
1436 break;
1437 }
1438 }
1439
1440 return D3D_OK;
1441 }
1442
1443 #define VNOTSUPPORTED 0, 0
1444 #define V(maj, min) (((maj) << 8) | (min))
1445
1446 static inline const char *
1447 d3dsio_to_string( unsigned opcode )
1448 {
1449 static const char *names[] = {
1450 "NOP",
1451 "MOV",
1452 "ADD",
1453 "SUB",
1454 "MAD",
1455 "MUL",
1456 "RCP",
1457 "RSQ",
1458 "DP3",
1459 "DP4",
1460 "MIN",
1461 "MAX",
1462 "SLT",
1463 "SGE",
1464 "EXP",
1465 "LOG",
1466 "LIT",
1467 "DST",
1468 "LRP",
1469 "FRC",
1470 "M4x4",
1471 "M4x3",
1472 "M3x4",
1473 "M3x3",
1474 "M3x2",
1475 "CALL",
1476 "CALLNZ",
1477 "LOOP",
1478 "RET",
1479 "ENDLOOP",
1480 "LABEL",
1481 "DCL",
1482 "POW",
1483 "CRS",
1484 "SGN",
1485 "ABS",
1486 "NRM",
1487 "SINCOS",
1488 "REP",
1489 "ENDREP",
1490 "IF",
1491 "IFC",
1492 "ELSE",
1493 "ENDIF",
1494 "BREAK",
1495 "BREAKC",
1496 "MOVA",
1497 "DEFB",
1498 "DEFI",
1499 NULL,
1500 NULL,
1501 NULL,
1502 NULL,
1503 NULL,
1504 NULL,
1505 NULL,
1506 NULL,
1507 NULL,
1508 NULL,
1509 NULL,
1510 NULL,
1511 NULL,
1512 NULL,
1513 NULL,
1514 "TEXCOORD",
1515 "TEXKILL",
1516 "TEX",
1517 "TEXBEM",
1518 "TEXBEML",
1519 "TEXREG2AR",
1520 "TEXREG2GB",
1521 "TEXM3x2PAD",
1522 "TEXM3x2TEX",
1523 "TEXM3x3PAD",
1524 "TEXM3x3TEX",
1525 NULL,
1526 "TEXM3x3SPEC",
1527 "TEXM3x3VSPEC",
1528 "EXPP",
1529 "LOGP",
1530 "CND",
1531 "DEF",
1532 "TEXREG2RGB",
1533 "TEXDP3TEX",
1534 "TEXM3x2DEPTH",
1535 "TEXDP3",
1536 "TEXM3x3",
1537 "TEXDEPTH",
1538 "CMP",
1539 "BEM",
1540 "DP2ADD",
1541 "DSX",
1542 "DSY",
1543 "TEXLDD",
1544 "SETP",
1545 "TEXLDL",
1546 "BREAKP"
1547 };
1548
1549 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1550
1551 switch (opcode) {
1552 case D3DSIO_PHASE: return "PHASE";
1553 case D3DSIO_COMMENT: return "COMMENT";
1554 case D3DSIO_END: return "END";
1555 default:
1556 return NULL;
1557 }
1558 }
1559
1560 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1561 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1562 (inst).vert_version.max | \
1563 (inst).frag_version.min | \
1564 (inst).frag_version.max)
1565
1566 #define SPECIAL(name) \
1567 NineTranslateInstruction_##name
1568
1569 #define DECL_SPECIAL(name) \
1570 static HRESULT \
1571 NineTranslateInstruction_##name( struct shader_translator *tx )
1572
1573 static HRESULT
1574 NineTranslateInstruction_Generic(struct shader_translator *);
1575
1576 DECL_SPECIAL(NOP)
1577 {
1578 /* Nothing to do. NOP was used to avoid hangs
1579 * with very old d3d drivers. */
1580 return D3D_OK;
1581 }
1582
1583 DECL_SPECIAL(M4x4)
1584 {
1585 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1586 }
1587
1588 DECL_SPECIAL(M4x3)
1589 {
1590 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1591 }
1592
1593 DECL_SPECIAL(M3x4)
1594 {
1595 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1596 }
1597
1598 DECL_SPECIAL(M3x3)
1599 {
1600 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1601 }
1602
1603 DECL_SPECIAL(M3x2)
1604 {
1605 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1606 }
1607
1608 DECL_SPECIAL(CMP)
1609 {
1610 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1611 tx_src_param(tx, &tx->insn.src[0]),
1612 tx_src_param(tx, &tx->insn.src[2]),
1613 tx_src_param(tx, &tx->insn.src[1]));
1614 return D3D_OK;
1615 }
1616
1617 DECL_SPECIAL(CND)
1618 {
1619 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1620 struct ureg_dst cgt;
1621 struct ureg_src cnd;
1622
1623 /* the coissue flag was a tip for compilers to advise to
1624 * execute two operations at the same time, in cases
1625 * the two executions had same dst with different channels.
1626 * It has no effect on current hw. However it seems CND
1627 * is affected. The handling of this very specific case
1628 * handled below mimick wine behaviour */
1629 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1630 ureg_MOV(tx->ureg,
1631 dst, tx_src_param(tx, &tx->insn.src[1]));
1632 return D3D_OK;
1633 }
1634
1635 cnd = tx_src_param(tx, &tx->insn.src[0]);
1636 cgt = tx_scratch(tx);
1637
1638 if (tx->version.major == 1 && tx->version.minor < 4)
1639 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1640
1641 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1642
1643 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1644 tx_src_param(tx, &tx->insn.src[1]),
1645 tx_src_param(tx, &tx->insn.src[2]));
1646 return D3D_OK;
1647 }
1648
1649 DECL_SPECIAL(CALL)
1650 {
1651 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1652 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1653 return D3D_OK;
1654 }
1655
1656 DECL_SPECIAL(CALLNZ)
1657 {
1658 struct ureg_program *ureg = tx->ureg;
1659 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1660
1661 if (!tx->native_integers)
1662 ureg_IF(ureg, src, tx_cond(tx));
1663 else
1664 ureg_UIF(ureg, src, tx_cond(tx));
1665 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1666 tx_endcond(tx);
1667 ureg_ENDIF(ureg);
1668 return D3D_OK;
1669 }
1670
1671 DECL_SPECIAL(LOOP)
1672 {
1673 struct ureg_program *ureg = tx->ureg;
1674 unsigned *label;
1675 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1676 struct ureg_dst ctr;
1677 struct ureg_dst tmp;
1678 struct ureg_src ctrx;
1679
1680 label = tx_bgnloop(tx);
1681 ctr = tx_get_loopctr(tx, TRUE);
1682 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1683
1684 /* src: num_iterations - start_value of al - step for al - 0 */
1685 ureg_MOV(ureg, ctr, src);
1686 ureg_BGNLOOP(tx->ureg, label);
1687 tmp = tx_scratch_scalar(tx);
1688 /* Initially ctr.x contains the number of iterations.
1689 * ctr.y will contain the updated value of al.
1690 * We decrease ctr.x at the end of every iteration,
1691 * and stop when it reaches 0. */
1692
1693 if (!tx->native_integers) {
1694 /* case src and ctr contain floats */
1695 /* to avoid precision issue, we stop when ctr <= 0.5 */
1696 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1697 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1698 } else {
1699 /* case src and ctr contain integers */
1700 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1701 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1702 }
1703 ureg_BRK(ureg);
1704 tx_endcond(tx);
1705 ureg_ENDIF(ureg);
1706 return D3D_OK;
1707 }
1708
1709 DECL_SPECIAL(RET)
1710 {
1711 ureg_RET(tx->ureg);
1712 return D3D_OK;
1713 }
1714
1715 DECL_SPECIAL(ENDLOOP)
1716 {
1717 struct ureg_program *ureg = tx->ureg;
1718 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1719 struct ureg_dst dst_ctrx, dst_al;
1720 struct ureg_src src_ctr, al_counter;
1721
1722 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1723 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1724 src_ctr = ureg_src(ctr);
1725 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1726
1727 /* ctr.x -= 1
1728 * ctr.y (aL) += step */
1729 if (!tx->native_integers) {
1730 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1731 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1732 } else {
1733 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1734 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1735 }
1736 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1737 return D3D_OK;
1738 }
1739
1740 DECL_SPECIAL(LABEL)
1741 {
1742 unsigned k = tx->num_inst_labels;
1743 unsigned n = tx->insn.src[0].idx;
1744 assert(n < 2048);
1745 if (n >= k)
1746 tx->inst_labels = REALLOC(tx->inst_labels,
1747 k * sizeof(tx->inst_labels[0]),
1748 n * sizeof(tx->inst_labels[0]));
1749
1750 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1751 return D3D_OK;
1752 }
1753
1754 DECL_SPECIAL(SINCOS)
1755 {
1756 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1757 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1758
1759 assert(!(dst.WriteMask & 0xc));
1760
1761 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1762 ureg_SCS(tx->ureg, dst, src);
1763 return D3D_OK;
1764 }
1765
1766 DECL_SPECIAL(SGN)
1767 {
1768 ureg_SSG(tx->ureg,
1769 tx_dst_param(tx, &tx->insn.dst[0]),
1770 tx_src_param(tx, &tx->insn.src[0]));
1771 return D3D_OK;
1772 }
1773
1774 DECL_SPECIAL(REP)
1775 {
1776 struct ureg_program *ureg = tx->ureg;
1777 unsigned *label;
1778 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1779 struct ureg_dst ctr;
1780 struct ureg_dst tmp;
1781 struct ureg_src ctrx;
1782
1783 label = tx_bgnloop(tx);
1784 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1785 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1786
1787 /* NOTE: rep must be constant, so we don't have to save the count */
1788 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1789
1790 /* rep: num_iterations - 0 - 0 - 0 */
1791 ureg_MOV(ureg, ctr, rep);
1792 ureg_BGNLOOP(ureg, label);
1793 tmp = tx_scratch_scalar(tx);
1794 /* Initially ctr.x contains the number of iterations.
1795 * We decrease ctr.x at the end of every iteration,
1796 * and stop when it reaches 0. */
1797
1798 if (!tx->native_integers) {
1799 /* case src and ctr contain floats */
1800 /* to avoid precision issue, we stop when ctr <= 0.5 */
1801 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1802 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1803 } else {
1804 /* case src and ctr contain integers */
1805 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1806 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1807 }
1808 ureg_BRK(ureg);
1809 tx_endcond(tx);
1810 ureg_ENDIF(ureg);
1811
1812 return D3D_OK;
1813 }
1814
1815 DECL_SPECIAL(ENDREP)
1816 {
1817 struct ureg_program *ureg = tx->ureg;
1818 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1819 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1820 struct ureg_src src_ctr = ureg_src(ctr);
1821
1822 /* ctr.x -= 1 */
1823 if (!tx->native_integers)
1824 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1825 else
1826 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1827
1828 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1829 return D3D_OK;
1830 }
1831
1832 DECL_SPECIAL(ENDIF)
1833 {
1834 tx_endcond(tx);
1835 ureg_ENDIF(tx->ureg);
1836 return D3D_OK;
1837 }
1838
1839 DECL_SPECIAL(IF)
1840 {
1841 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1842
1843 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1844 ureg_UIF(tx->ureg, src, tx_cond(tx));
1845 else
1846 ureg_IF(tx->ureg, src, tx_cond(tx));
1847
1848 return D3D_OK;
1849 }
1850
1851 static inline unsigned
1852 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1853 {
1854 switch (flags) {
1855 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1856 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1857 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1858 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1859 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1860 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1861 default:
1862 assert(!"invalid comparison flags");
1863 return TGSI_OPCODE_SGT;
1864 }
1865 }
1866
1867 DECL_SPECIAL(IFC)
1868 {
1869 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1870 struct ureg_src src[2];
1871 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1872 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1873 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1874 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1875 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1876 return D3D_OK;
1877 }
1878
1879 DECL_SPECIAL(ELSE)
1880 {
1881 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1882 return D3D_OK;
1883 }
1884
1885 DECL_SPECIAL(BREAKC)
1886 {
1887 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1888 struct ureg_src src[2];
1889 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1890 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1891 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1892 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1893 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1894 ureg_BRK(tx->ureg);
1895 tx_endcond(tx);
1896 ureg_ENDIF(tx->ureg);
1897 return D3D_OK;
1898 }
1899
1900 static const char *sm1_declusage_names[] =
1901 {
1902 [D3DDECLUSAGE_POSITION] = "POSITION",
1903 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1904 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1905 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1906 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1907 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1908 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1909 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1910 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1911 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1912 [D3DDECLUSAGE_COLOR] = "COLOR",
1913 [D3DDECLUSAGE_FOG] = "FOG",
1914 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1915 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1916 };
1917
1918 static inline unsigned
1919 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1920 {
1921 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1922 }
1923
1924 static void
1925 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1926 boolean tc,
1927 struct sm1_semantic *dcl)
1928 {
1929 BYTE index = dcl->usage_idx;
1930
1931 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1932 * we match to a TGSI_SEMANTIC_GENERIC with index.
1933 *
1934 * The index can be anything UINT16 and usage_idx is BYTE,
1935 * so we can fit everything. It doesn't matter if indices
1936 * are close together or low.
1937 *
1938 *
1939 * POSITION >= 1: 10 * index + 6
1940 * COLOR >= 2: 10 * (index-1) + 7
1941 * TEXCOORD[0..15]: index
1942 * BLENDWEIGHT: 10 * index + 18
1943 * BLENDINDICES: 10 * index + 19
1944 * NORMAL: 10 * index + 20
1945 * TANGENT: 10 * index + 21
1946 * BINORMAL: 10 * index + 22
1947 * TESSFACTOR: 10 * index + 23
1948 */
1949
1950 switch (dcl->usage) {
1951 case D3DDECLUSAGE_POSITION:
1952 case D3DDECLUSAGE_POSITIONT:
1953 case D3DDECLUSAGE_DEPTH:
1954 if (index == 0) {
1955 sem->Name = TGSI_SEMANTIC_POSITION;
1956 sem->Index = 0;
1957 } else {
1958 sem->Name = TGSI_SEMANTIC_GENERIC;
1959 sem->Index = 10 * index + 6;
1960 }
1961 break;
1962 case D3DDECLUSAGE_COLOR:
1963 if (index < 2) {
1964 sem->Name = TGSI_SEMANTIC_COLOR;
1965 sem->Index = index;
1966 } else {
1967 sem->Name = TGSI_SEMANTIC_GENERIC;
1968 sem->Index = 10 * (index-1) + 7;
1969 }
1970 break;
1971 case D3DDECLUSAGE_FOG:
1972 assert(index == 0);
1973 sem->Name = TGSI_SEMANTIC_FOG;
1974 sem->Index = 0;
1975 break;
1976 case D3DDECLUSAGE_PSIZE:
1977 assert(index == 0);
1978 sem->Name = TGSI_SEMANTIC_PSIZE;
1979 sem->Index = 0;
1980 break;
1981 case D3DDECLUSAGE_TEXCOORD:
1982 assert(index < 16);
1983 if (index < 8 && tc)
1984 sem->Name = TGSI_SEMANTIC_TEXCOORD;
1985 else
1986 sem->Name = TGSI_SEMANTIC_GENERIC;
1987 sem->Index = index;
1988 break;
1989 case D3DDECLUSAGE_BLENDWEIGHT:
1990 sem->Name = TGSI_SEMANTIC_GENERIC;
1991 sem->Index = 10 * index + 18;
1992 break;
1993 case D3DDECLUSAGE_BLENDINDICES:
1994 sem->Name = TGSI_SEMANTIC_GENERIC;
1995 sem->Index = 10 * index + 19;
1996 break;
1997 case D3DDECLUSAGE_NORMAL:
1998 sem->Name = TGSI_SEMANTIC_GENERIC;
1999 sem->Index = 10 * index + 20;
2000 break;
2001 case D3DDECLUSAGE_TANGENT:
2002 sem->Name = TGSI_SEMANTIC_GENERIC;
2003 sem->Index = 10 * index + 21;
2004 break;
2005 case D3DDECLUSAGE_BINORMAL:
2006 sem->Name = TGSI_SEMANTIC_GENERIC;
2007 sem->Index = 10 * index + 22;
2008 break;
2009 case D3DDECLUSAGE_TESSFACTOR:
2010 sem->Name = TGSI_SEMANTIC_GENERIC;
2011 sem->Index = 10 * index + 23;
2012 break;
2013 case D3DDECLUSAGE_SAMPLE:
2014 sem->Name = TGSI_SEMANTIC_COUNT;
2015 sem->Index = 0;
2016 break;
2017 default:
2018 unreachable("Invalid DECLUSAGE.");
2019 break;
2020 }
2021 }
2022
2023 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2024 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2025 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2026 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2027 static inline unsigned
2028 d3dstt_to_tgsi_tex(BYTE sampler_type)
2029 {
2030 switch (sampler_type) {
2031 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2032 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2033 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2034 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2035 default:
2036 assert(0);
2037 return TGSI_TEXTURE_UNKNOWN;
2038 }
2039 }
2040 static inline unsigned
2041 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2042 {
2043 switch (sampler_type) {
2044 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2045 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2046 case NINED3DSTT_VOLUME:
2047 case NINED3DSTT_CUBE:
2048 default:
2049 assert(0);
2050 return TGSI_TEXTURE_UNKNOWN;
2051 }
2052 }
2053 static inline unsigned
2054 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2055 {
2056 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2057 case 1: return TGSI_TEXTURE_1D;
2058 case 0: return TGSI_TEXTURE_2D;
2059 case 3: return TGSI_TEXTURE_3D;
2060 default:
2061 return TGSI_TEXTURE_CUBE;
2062 }
2063 }
2064
2065 static const char *
2066 sm1_sampler_type_name(BYTE sampler_type)
2067 {
2068 switch (sampler_type) {
2069 case NINED3DSTT_1D: return "1D";
2070 case NINED3DSTT_2D: return "2D";
2071 case NINED3DSTT_VOLUME: return "VOLUME";
2072 case NINED3DSTT_CUBE: return "CUBE";
2073 default:
2074 return "(D3DSTT_?)";
2075 }
2076 }
2077
2078 static inline unsigned
2079 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2080 {
2081 switch (sem->Name) {
2082 case TGSI_SEMANTIC_POSITION:
2083 case TGSI_SEMANTIC_NORMAL:
2084 return TGSI_INTERPOLATE_LINEAR;
2085 case TGSI_SEMANTIC_BCOLOR:
2086 case TGSI_SEMANTIC_COLOR:
2087 return TGSI_INTERPOLATE_COLOR;
2088 case TGSI_SEMANTIC_FOG:
2089 case TGSI_SEMANTIC_GENERIC:
2090 case TGSI_SEMANTIC_TEXCOORD:
2091 case TGSI_SEMANTIC_CLIPDIST:
2092 case TGSI_SEMANTIC_CLIPVERTEX:
2093 return TGSI_INTERPOLATE_PERSPECTIVE;
2094 case TGSI_SEMANTIC_EDGEFLAG:
2095 case TGSI_SEMANTIC_FACE:
2096 case TGSI_SEMANTIC_INSTANCEID:
2097 case TGSI_SEMANTIC_PCOORD:
2098 case TGSI_SEMANTIC_PRIMID:
2099 case TGSI_SEMANTIC_PSIZE:
2100 case TGSI_SEMANTIC_VERTEXID:
2101 return TGSI_INTERPOLATE_CONSTANT;
2102 default:
2103 assert(0);
2104 return TGSI_INTERPOLATE_CONSTANT;
2105 }
2106 }
2107
2108 DECL_SPECIAL(DCL)
2109 {
2110 struct ureg_program *ureg = tx->ureg;
2111 boolean is_input;
2112 boolean is_sampler;
2113 struct tgsi_declaration_semantic tgsi;
2114 struct sm1_semantic sem;
2115 sm1_read_semantic(tx, &sem);
2116
2117 is_input = sem.reg.file == D3DSPR_INPUT;
2118 is_sampler =
2119 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2120
2121 DUMP("DCL ");
2122 sm1_dump_dst_param(&sem.reg);
2123 if (is_sampler)
2124 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2125 else
2126 if (tx->version.major >= 3)
2127 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2128 else
2129 if (sem.usage | sem.usage_idx)
2130 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2131 else
2132 DUMP("\n");
2133
2134 if (is_sampler) {
2135 const unsigned m = 1 << sem.reg.idx;
2136 ureg_DECL_sampler(ureg, sem.reg.idx);
2137 tx->info->sampler_mask |= m;
2138 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2139 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2140 d3dstt_to_tgsi_tex(sem.sampler_type);
2141 return D3D_OK;
2142 }
2143
2144 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2145 if (IS_VS) {
2146 if (is_input) {
2147 /* linkage outside of shader with vertex declaration */
2148 ureg_DECL_vs_input(ureg, sem.reg.idx);
2149 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2150 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2151 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2152 /* NOTE: preserving order in case of indirect access */
2153 } else
2154 if (tx->version.major >= 3) {
2155 /* SM2 output semantic determined by file */
2156 assert(sem.reg.mask != 0);
2157 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2158 tx->info->position_t = TRUE;
2159 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2160 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2161 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2162 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2163 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2164 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2165 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2166 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2167 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2168 }
2169
2170 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2171 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2172 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2173 }
2174 }
2175 } else {
2176 if (is_input && tx->version.major >= 3) {
2177 unsigned interp_location = 0;
2178 /* SM3 only, SM2 input semantic determined by file */
2179 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2180 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2181 /* PositionT and tessfactor forbidden */
2182 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2183 return D3DERR_INVALIDCALL;
2184
2185 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2186 /* Position0 is forbidden (likely because vPos already does that) */
2187 if (sem.usage == D3DDECLUSAGE_POSITION)
2188 return D3DERR_INVALIDCALL;
2189 /* Following code is for depth */
2190 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2191 return D3D_OK;
2192 }
2193
2194 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2195 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2196 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2197
2198 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2199 ureg, tgsi.Name, tgsi.Index,
2200 nine_tgsi_to_interp_mode(&tgsi),
2201 0, /* cylwrap */
2202 interp_location, 0, 1);
2203 } else
2204 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2205 /* FragColor or FragDepth */
2206 assert(sem.reg.mask != 0);
2207 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2208 0, 1);
2209 }
2210 }
2211 return D3D_OK;
2212 }
2213
2214 DECL_SPECIAL(DEF)
2215 {
2216 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2217 return D3D_OK;
2218 }
2219
2220 DECL_SPECIAL(DEFB)
2221 {
2222 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2223 return D3D_OK;
2224 }
2225
2226 DECL_SPECIAL(DEFI)
2227 {
2228 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2229 return D3D_OK;
2230 }
2231
2232 DECL_SPECIAL(POW)
2233 {
2234 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2235 struct ureg_src src[2] = {
2236 tx_src_param(tx, &tx->insn.src[0]),
2237 tx_src_param(tx, &tx->insn.src[1])
2238 };
2239 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2240 return D3D_OK;
2241 }
2242
2243 DECL_SPECIAL(RSQ)
2244 {
2245 struct ureg_program *ureg = tx->ureg;
2246 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2247 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2248 struct ureg_dst tmp = tx_scratch(tx);
2249 ureg_RSQ(ureg, tmp, ureg_abs(src));
2250 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2251 return D3D_OK;
2252 }
2253
2254 DECL_SPECIAL(LOG)
2255 {
2256 struct ureg_program *ureg = tx->ureg;
2257 struct ureg_dst tmp = tx_scratch_scalar(tx);
2258 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2259 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2260 ureg_LG2(ureg, tmp, ureg_abs(src));
2261 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2262 return D3D_OK;
2263 }
2264
2265 DECL_SPECIAL(LIT)
2266 {
2267 struct ureg_program *ureg = tx->ureg;
2268 struct ureg_dst tmp = tx_scratch(tx);
2269 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2270 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2271 ureg_LIT(ureg, tmp, src);
2272 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2273 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2274 * it 0^0 if src.w=0, which value is driver dependent. */
2275 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2276 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2277 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2278 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2279 return D3D_OK;
2280 }
2281
2282 DECL_SPECIAL(NRM)
2283 {
2284 struct ureg_program *ureg = tx->ureg;
2285 struct ureg_dst tmp = tx_scratch_scalar(tx);
2286 struct ureg_src nrm = tx_src_scalar(tmp);
2287 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2288 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2289 ureg_DP3(ureg, tmp, src, src);
2290 ureg_RSQ(ureg, tmp, nrm);
2291 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2292 ureg_MUL(ureg, dst, src, nrm);
2293 return D3D_OK;
2294 }
2295
2296 DECL_SPECIAL(DP2ADD)
2297 {
2298 struct ureg_dst tmp = tx_scratch_scalar(tx);
2299 struct ureg_src dp2 = tx_src_scalar(tmp);
2300 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2301 struct ureg_src src[3];
2302 int i;
2303 for (i = 0; i < 3; ++i)
2304 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2305 assert_replicate_swizzle(&src[2]);
2306
2307 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2308 ureg_ADD(tx->ureg, dst, src[2], dp2);
2309
2310 return D3D_OK;
2311 }
2312
2313 DECL_SPECIAL(TEXCOORD)
2314 {
2315 struct ureg_program *ureg = tx->ureg;
2316 const unsigned s = tx->insn.dst[0].idx;
2317 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2318
2319 tx_texcoord_alloc(tx, s);
2320 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2321 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2322
2323 return D3D_OK;
2324 }
2325
2326 DECL_SPECIAL(TEXCOORD_ps14)
2327 {
2328 struct ureg_program *ureg = tx->ureg;
2329 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2330 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2331
2332 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2333
2334 ureg_MOV(ureg, dst, src);
2335
2336 return D3D_OK;
2337 }
2338
2339 DECL_SPECIAL(TEXKILL)
2340 {
2341 struct ureg_src reg;
2342
2343 if (tx->version.major > 1 || tx->version.minor > 3) {
2344 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2345 } else {
2346 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2347 reg = tx->regs.vT[tx->insn.dst[0].idx];
2348 }
2349 if (tx->version.major < 2)
2350 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2351 ureg_KILL_IF(tx->ureg, reg);
2352
2353 return D3D_OK;
2354 }
2355
2356 DECL_SPECIAL(TEXBEM)
2357 {
2358 struct ureg_program *ureg = tx->ureg;
2359 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2360 struct ureg_dst tmp, tmp2, texcoord;
2361 struct ureg_src sample, m00, m01, m10, m11;
2362 struct ureg_src bumpenvlscale, bumpenvloffset;
2363 const int m = tx->insn.dst[0].idx;
2364 const int n = tx->insn.src[0].idx;
2365
2366 assert(tx->version.major == 1);
2367
2368 sample = ureg_DECL_sampler(ureg, m);
2369 tx->info->sampler_mask |= 1 << m;
2370
2371 tx_texcoord_alloc(tx, m);
2372
2373 tmp = tx_scratch(tx);
2374 tmp2 = tx_scratch(tx);
2375 texcoord = tx_scratch(tx);
2376 /*
2377 * Bump-env-matrix:
2378 * 00 is X
2379 * 01 is Y
2380 * 10 is Z
2381 * 11 is W
2382 */
2383 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2384 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2385 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2386 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2387 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2388
2389 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2390 if (m % 2 == 0) {
2391 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2392 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2393 } else {
2394 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2395 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2396 }
2397
2398 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2399
2400 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2401 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2402 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2403 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2404 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2405 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2406 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2407
2408 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2409 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2410 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2411 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2412 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2413 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2414 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2415
2416 /* Now the texture coordinates are in tmp.xy */
2417
2418 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2419 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2420 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2421 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2422 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2423 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2424 bumpenvlscale, bumpenvloffset);
2425 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2426 }
2427
2428 tx->info->bumpenvmat_needed = 1;
2429
2430 return D3D_OK;
2431 }
2432
2433 DECL_SPECIAL(TEXREG2AR)
2434 {
2435 struct ureg_program *ureg = tx->ureg;
2436 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2437 struct ureg_src sample;
2438 const int m = tx->insn.dst[0].idx;
2439 const int n = tx->insn.src[0].idx;
2440 assert(m >= 0 && m > n);
2441
2442 sample = ureg_DECL_sampler(ureg, m);
2443 tx->info->sampler_mask |= 1 << m;
2444 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2445
2446 return D3D_OK;
2447 }
2448
2449 DECL_SPECIAL(TEXREG2GB)
2450 {
2451 struct ureg_program *ureg = tx->ureg;
2452 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2453 struct ureg_src sample;
2454 const int m = tx->insn.dst[0].idx;
2455 const int n = tx->insn.src[0].idx;
2456 assert(m >= 0 && m > n);
2457
2458 sample = ureg_DECL_sampler(ureg, m);
2459 tx->info->sampler_mask |= 1 << m;
2460 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2461
2462 return D3D_OK;
2463 }
2464
2465 DECL_SPECIAL(TEXM3x2PAD)
2466 {
2467 return D3D_OK; /* this is just padding */
2468 }
2469
2470 DECL_SPECIAL(TEXM3x2TEX)
2471 {
2472 struct ureg_program *ureg = tx->ureg;
2473 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2474 struct ureg_src sample;
2475 const int m = tx->insn.dst[0].idx - 1;
2476 const int n = tx->insn.src[0].idx;
2477 assert(m >= 0 && m > n);
2478
2479 tx_texcoord_alloc(tx, m);
2480 tx_texcoord_alloc(tx, m+1);
2481
2482 /* performs the matrix multiplication */
2483 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2484 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2485
2486 sample = ureg_DECL_sampler(ureg, m + 1);
2487 tx->info->sampler_mask |= 1 << (m + 1);
2488 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2489
2490 return D3D_OK;
2491 }
2492
2493 DECL_SPECIAL(TEXM3x3PAD)
2494 {
2495 return D3D_OK; /* this is just padding */
2496 }
2497
2498 DECL_SPECIAL(TEXM3x3SPEC)
2499 {
2500 struct ureg_program *ureg = tx->ureg;
2501 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2502 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2503 struct ureg_src sample;
2504 struct ureg_dst tmp;
2505 const int m = tx->insn.dst[0].idx - 2;
2506 const int n = tx->insn.src[0].idx;
2507 assert(m >= 0 && m > n);
2508
2509 tx_texcoord_alloc(tx, m);
2510 tx_texcoord_alloc(tx, m+1);
2511 tx_texcoord_alloc(tx, m+2);
2512
2513 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2514 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2515 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2516
2517 sample = ureg_DECL_sampler(ureg, m + 2);
2518 tx->info->sampler_mask |= 1 << (m + 2);
2519 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2520
2521 /* At this step, dst = N = (u', w', z').
2522 * We want dst to be the texture sampled at (u'', w'', z''), with
2523 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2524 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2525 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2526 /* at this step tmp.x = 1/N.N */
2527 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2528 /* at this step tmp.y = N.E */
2529 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2530 /* at this step tmp.x = N.E/N.N */
2531 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2532 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2533 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2534 ureg_SUB(ureg, tmp, ureg_src(tmp), E);
2535 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2536
2537 return D3D_OK;
2538 }
2539
2540 DECL_SPECIAL(TEXREG2RGB)
2541 {
2542 struct ureg_program *ureg = tx->ureg;
2543 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2544 struct ureg_src sample;
2545 const int m = tx->insn.dst[0].idx;
2546 const int n = tx->insn.src[0].idx;
2547 assert(m >= 0 && m > n);
2548
2549 sample = ureg_DECL_sampler(ureg, m);
2550 tx->info->sampler_mask |= 1 << m;
2551 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2552
2553 return D3D_OK;
2554 }
2555
2556 DECL_SPECIAL(TEXDP3TEX)
2557 {
2558 struct ureg_program *ureg = tx->ureg;
2559 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2560 struct ureg_dst tmp;
2561 struct ureg_src sample;
2562 const int m = tx->insn.dst[0].idx;
2563 const int n = tx->insn.src[0].idx;
2564 assert(m >= 0 && m > n);
2565
2566 tx_texcoord_alloc(tx, m);
2567
2568 tmp = tx_scratch(tx);
2569 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2570 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2571
2572 sample = ureg_DECL_sampler(ureg, m);
2573 tx->info->sampler_mask |= 1 << m;
2574 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2575
2576 return D3D_OK;
2577 }
2578
2579 DECL_SPECIAL(TEXM3x2DEPTH)
2580 {
2581 struct ureg_program *ureg = tx->ureg;
2582 struct ureg_dst tmp;
2583 const int m = tx->insn.dst[0].idx - 1;
2584 const int n = tx->insn.src[0].idx;
2585 assert(m >= 0 && m > n);
2586
2587 tx_texcoord_alloc(tx, m);
2588 tx_texcoord_alloc(tx, m+1);
2589
2590 tmp = tx_scratch(tx);
2591
2592 /* performs the matrix multiplication */
2593 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2594 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2595
2596 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2597 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2598 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2599 /* res = 'w' == 0 ? 1.0 : z/w */
2600 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2601 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2602 /* replace the depth for depth testing with the result */
2603 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2604 TGSI_WRITEMASK_Z, 0, 1);
2605 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2606 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2607 return D3D_OK;
2608 }
2609
2610 DECL_SPECIAL(TEXDP3)
2611 {
2612 struct ureg_program *ureg = tx->ureg;
2613 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2614 const int m = tx->insn.dst[0].idx;
2615 const int n = tx->insn.src[0].idx;
2616 assert(m >= 0 && m > n);
2617
2618 tx_texcoord_alloc(tx, m);
2619
2620 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2621
2622 return D3D_OK;
2623 }
2624
2625 DECL_SPECIAL(TEXM3x3)
2626 {
2627 struct ureg_program *ureg = tx->ureg;
2628 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2629 struct ureg_src sample;
2630 struct ureg_dst E, tmp;
2631 const int m = tx->insn.dst[0].idx - 2;
2632 const int n = tx->insn.src[0].idx;
2633 assert(m >= 0 && m > n);
2634
2635 tx_texcoord_alloc(tx, m);
2636 tx_texcoord_alloc(tx, m+1);
2637 tx_texcoord_alloc(tx, m+2);
2638
2639 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2640 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2641 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2642
2643 switch (tx->insn.opcode) {
2644 case D3DSIO_TEXM3x3:
2645 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2646 break;
2647 case D3DSIO_TEXM3x3TEX:
2648 sample = ureg_DECL_sampler(ureg, m + 2);
2649 tx->info->sampler_mask |= 1 << (m + 2);
2650 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2651 break;
2652 case D3DSIO_TEXM3x3VSPEC:
2653 sample = ureg_DECL_sampler(ureg, m + 2);
2654 tx->info->sampler_mask |= 1 << (m + 2);
2655 E = tx_scratch(tx);
2656 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2657 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2658 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2659 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2660 /* At this step, dst = N = (u', w', z').
2661 * We want dst to be the texture sampled at (u'', w'', z''), with
2662 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2663 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2664 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2665 /* at this step tmp.x = 1/N.N */
2666 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2667 /* at this step tmp.y = N.E */
2668 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2669 /* at this step tmp.x = N.E/N.N */
2670 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2671 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2672 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2673 ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
2674 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2675 break;
2676 default:
2677 return D3DERR_INVALIDCALL;
2678 }
2679 return D3D_OK;
2680 }
2681
2682 DECL_SPECIAL(TEXDEPTH)
2683 {
2684 struct ureg_program *ureg = tx->ureg;
2685 struct ureg_dst r5;
2686 struct ureg_src r5r, r5g;
2687
2688 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2689
2690 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2691 * r5 won't be used afterward, thus we can use r5.ba */
2692 r5 = tx->regs.r[5];
2693 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2694 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2695
2696 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2697 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2698 /* r5.r = r/g */
2699 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2700 r5r, ureg_imm1f(ureg, 1.0f));
2701 /* replace the depth for depth testing with the result */
2702 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2703 TGSI_WRITEMASK_Z, 0, 1);
2704 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2705
2706 return D3D_OK;
2707 }
2708
2709 DECL_SPECIAL(BEM)
2710 {
2711 struct ureg_program *ureg = tx->ureg;
2712 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2713 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2714 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2715 struct ureg_src m00, m01, m10, m11;
2716 const int m = tx->insn.dst[0].idx;
2717 struct ureg_dst tmp;
2718 /*
2719 * Bump-env-matrix:
2720 * 00 is X
2721 * 01 is Y
2722 * 10 is Z
2723 * 11 is W
2724 */
2725 nine_info_mark_const_f_used(tx->info, 8 + m);
2726 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2727 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2728 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2729 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2730 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2731 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2732 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2733 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2734 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2735 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2736
2737 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2738 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2739 NINE_APPLY_SWIZZLE(src1, X), src0);
2740 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2741 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2742 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2743 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2744
2745 tx->info->bumpenvmat_needed = 1;
2746
2747 return D3D_OK;
2748 }
2749
2750 DECL_SPECIAL(TEXLD)
2751 {
2752 struct ureg_program *ureg = tx->ureg;
2753 unsigned target;
2754 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2755 struct ureg_src src[2] = {
2756 tx_src_param(tx, &tx->insn.src[0]),
2757 tx_src_param(tx, &tx->insn.src[1])
2758 };
2759 assert(tx->insn.src[1].idx >= 0 &&
2760 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2761 target = tx->sampler_targets[tx->insn.src[1].idx];
2762
2763 switch (tx->insn.flags) {
2764 case 0:
2765 ureg_TEX(ureg, dst, target, src[0], src[1]);
2766 break;
2767 case NINED3DSI_TEXLD_PROJECT:
2768 ureg_TXP(ureg, dst, target, src[0], src[1]);
2769 break;
2770 case NINED3DSI_TEXLD_BIAS:
2771 ureg_TXB(ureg, dst, target, src[0], src[1]);
2772 break;
2773 default:
2774 assert(0);
2775 return D3DERR_INVALIDCALL;
2776 }
2777 return D3D_OK;
2778 }
2779
2780 DECL_SPECIAL(TEXLD_14)
2781 {
2782 struct ureg_program *ureg = tx->ureg;
2783 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2784 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2785 const unsigned s = tx->insn.dst[0].idx;
2786 const unsigned t = ps1x_sampler_type(tx->info, s);
2787
2788 tx->info->sampler_mask |= 1 << s;
2789 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2790
2791 return D3D_OK;
2792 }
2793
2794 DECL_SPECIAL(TEX)
2795 {
2796 struct ureg_program *ureg = tx->ureg;
2797 const unsigned s = tx->insn.dst[0].idx;
2798 const unsigned t = ps1x_sampler_type(tx->info, s);
2799 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2800 struct ureg_src src[2];
2801
2802 tx_texcoord_alloc(tx, s);
2803
2804 src[0] = tx->regs.vT[s];
2805 src[1] = ureg_DECL_sampler(ureg, s);
2806 tx->info->sampler_mask |= 1 << s;
2807
2808 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2809
2810 return D3D_OK;
2811 }
2812
2813 DECL_SPECIAL(TEXLDD)
2814 {
2815 unsigned target;
2816 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2817 struct ureg_src src[4] = {
2818 tx_src_param(tx, &tx->insn.src[0]),
2819 tx_src_param(tx, &tx->insn.src[1]),
2820 tx_src_param(tx, &tx->insn.src[2]),
2821 tx_src_param(tx, &tx->insn.src[3])
2822 };
2823 assert(tx->insn.src[1].idx >= 0 &&
2824 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2825 target = tx->sampler_targets[tx->insn.src[1].idx];
2826
2827 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2828 return D3D_OK;
2829 }
2830
2831 DECL_SPECIAL(TEXLDL)
2832 {
2833 unsigned target;
2834 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2835 struct ureg_src src[2] = {
2836 tx_src_param(tx, &tx->insn.src[0]),
2837 tx_src_param(tx, &tx->insn.src[1])
2838 };
2839 assert(tx->insn.src[1].idx >= 0 &&
2840 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2841 target = tx->sampler_targets[tx->insn.src[1].idx];
2842
2843 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2844 return D3D_OK;
2845 }
2846
2847 DECL_SPECIAL(SETP)
2848 {
2849 STUB(D3DERR_INVALIDCALL);
2850 }
2851
2852 DECL_SPECIAL(BREAKP)
2853 {
2854 STUB(D3DERR_INVALIDCALL);
2855 }
2856
2857 DECL_SPECIAL(PHASE)
2858 {
2859 return D3D_OK; /* we don't care about phase */
2860 }
2861
2862 DECL_SPECIAL(COMMENT)
2863 {
2864 return D3D_OK; /* nothing to do */
2865 }
2866
2867
2868 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2869 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2870
2871 struct sm1_op_info inst_table[] =
2872 {
2873 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
2874 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2875 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2876 _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
2877 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2878 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2879 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2880 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2881 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2882 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2883 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2884 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2885 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2886 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2887 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2888 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2889 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2890 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2891 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2892 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2893
2894 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2895 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2896 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2897 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2898 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2899
2900 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2901 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2902 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2903 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2904 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2905 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2906
2907 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2908
2909 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2910 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2911 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2912 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2913 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2914
2915 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2916 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2917
2918 /* More flow control */
2919 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2920 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2921 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2922 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2923 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2924 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2925 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2926 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2927 /* we don't write to the address register, but a normal register (copied
2928 * when needed to the address register), thus we don't use ARR */
2929 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2930
2931 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2932 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2933
2934 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2935 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2936 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2937 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2938 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2939 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2940 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2941 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2942 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2943 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2944 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2945 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2946 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2947 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2948 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2949 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2950
2951 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2952 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2953 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2954 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2955
2956 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2957
2958 /* More tex stuff */
2959 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2960 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2961 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2962 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2963 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2964 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2965
2966 /* Misc */
2967 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2968 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2969 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2970 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2971 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2972 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2973 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2974 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2975 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2976 };
2977
2978 struct sm1_op_info inst_phase =
2979 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
2980
2981 struct sm1_op_info inst_comment =
2982 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
2983
2984 static void
2985 create_op_info_map(struct shader_translator *tx)
2986 {
2987 const unsigned version = (tx->version.major << 8) | tx->version.minor;
2988 unsigned i;
2989
2990 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
2991 tx->op_info_map[i] = -1;
2992
2993 if (tx->processor == PIPE_SHADER_VERTEX) {
2994 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
2995 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
2996 if (inst_table[i].vert_version.min <= version &&
2997 inst_table[i].vert_version.max >= version)
2998 tx->op_info_map[inst_table[i].sio] = i;
2999 }
3000 } else {
3001 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3002 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3003 if (inst_table[i].frag_version.min <= version &&
3004 inst_table[i].frag_version.max >= version)
3005 tx->op_info_map[inst_table[i].sio] = i;
3006 }
3007 }
3008 }
3009
3010 static inline HRESULT
3011 NineTranslateInstruction_Generic(struct shader_translator *tx)
3012 {
3013 struct ureg_dst dst[1];
3014 struct ureg_src src[4];
3015 unsigned i;
3016
3017 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3018 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3019 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3020 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3021
3022 ureg_insn(tx->ureg, tx->insn.info->opcode,
3023 dst, tx->insn.ndst,
3024 src, tx->insn.nsrc);
3025 return D3D_OK;
3026 }
3027
3028 static inline DWORD
3029 TOKEN_PEEK(struct shader_translator *tx)
3030 {
3031 return *(tx->parse);
3032 }
3033
3034 static inline DWORD
3035 TOKEN_NEXT(struct shader_translator *tx)
3036 {
3037 return *(tx->parse)++;
3038 }
3039
3040 static inline void
3041 TOKEN_JUMP(struct shader_translator *tx)
3042 {
3043 if (tx->parse_next && tx->parse != tx->parse_next) {
3044 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3045 tx->parse = tx->parse_next;
3046 }
3047 }
3048
3049 static inline boolean
3050 sm1_parse_eof(struct shader_translator *tx)
3051 {
3052 return TOKEN_PEEK(tx) == NINED3DSP_END;
3053 }
3054
3055 static void
3056 sm1_read_version(struct shader_translator *tx)
3057 {
3058 const DWORD tok = TOKEN_NEXT(tx);
3059
3060 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3061 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3062
3063 switch (tok >> 16) {
3064 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3065 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3066 default:
3067 DBG("Invalid shader type: %x\n", tok);
3068 tx->processor = ~0;
3069 break;
3070 }
3071 }
3072
3073 /* This is just to check if we parsed the instruction properly. */
3074 static void
3075 sm1_parse_get_skip(struct shader_translator *tx)
3076 {
3077 const DWORD tok = TOKEN_PEEK(tx);
3078
3079 if (tx->version.major >= 2) {
3080 tx->parse_next = tx->parse + 1 /* this */ +
3081 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3082 } else {
3083 tx->parse_next = NULL; /* TODO: determine from param count */
3084 }
3085 }
3086
3087 static void
3088 sm1_print_comment(const char *comment, UINT size)
3089 {
3090 if (!size)
3091 return;
3092 /* TODO */
3093 }
3094
3095 static void
3096 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3097 {
3098 DWORD tok = TOKEN_PEEK(tx);
3099
3100 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3101 {
3102 const char *comment = "";
3103 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3104 tx->parse += size + 1;
3105
3106 if (print)
3107 sm1_print_comment(comment, size);
3108
3109 tok = TOKEN_PEEK(tx);
3110 }
3111 }
3112
3113 static void
3114 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3115 {
3116 *reg = TOKEN_NEXT(tx);
3117
3118 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3119 {
3120 if (tx->version.major < 2)
3121 *rel = (1 << 31) |
3122 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3123 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3124 D3DSP_NOSWIZZLE;
3125 else
3126 *rel = TOKEN_NEXT(tx);
3127 }
3128 }
3129
3130 static void
3131 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3132 {
3133 int8_t shift;
3134 dst->file =
3135 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3136 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3137 dst->type = TGSI_RETURN_TYPE_FLOAT;
3138 dst->idx = tok & D3DSP_REGNUM_MASK;
3139 dst->rel = NULL;
3140 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3141 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3142 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3143 dst->shift = (shift & 0x7) - (shift & 0x8);
3144 }
3145
3146 static void
3147 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3148 {
3149 src->file =
3150 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3151 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3152 src->type = TGSI_RETURN_TYPE_FLOAT;
3153 src->idx = tok & D3DSP_REGNUM_MASK;
3154 src->rel = NULL;
3155 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3156 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3157
3158 switch (src->file) {
3159 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3160 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3161 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3162 default:
3163 break;
3164 }
3165 }
3166
3167 static void
3168 sm1_parse_immediate(struct shader_translator *tx,
3169 struct sm1_src_param *imm)
3170 {
3171 imm->file = NINED3DSPR_IMMEDIATE;
3172 imm->idx = INT_MIN;
3173 imm->rel = NULL;
3174 imm->swizzle = NINED3DSP_NOSWIZZLE;
3175 imm->mod = 0;
3176 switch (tx->insn.opcode) {
3177 case D3DSIO_DEF:
3178 imm->type = NINED3DSPTYPE_FLOAT4;
3179 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3180 tx->parse += 4;
3181 break;
3182 case D3DSIO_DEFI:
3183 imm->type = NINED3DSPTYPE_INT4;
3184 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3185 tx->parse += 4;
3186 break;
3187 case D3DSIO_DEFB:
3188 imm->type = NINED3DSPTYPE_BOOL;
3189 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3190 tx->parse += 1;
3191 break;
3192 default:
3193 assert(0);
3194 break;
3195 }
3196 }
3197
3198 static void
3199 sm1_read_dst_param(struct shader_translator *tx,
3200 struct sm1_dst_param *dst,
3201 struct sm1_src_param *rel)
3202 {
3203 DWORD tok_dst, tok_rel = 0;
3204
3205 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3206 sm1_parse_dst_param(dst, tok_dst);
3207 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3208 sm1_parse_src_param(rel, tok_rel);
3209 dst->rel = rel;
3210 }
3211 }
3212
3213 static void
3214 sm1_read_src_param(struct shader_translator *tx,
3215 struct sm1_src_param *src,
3216 struct sm1_src_param *rel)
3217 {
3218 DWORD tok_src, tok_rel = 0;
3219
3220 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3221 sm1_parse_src_param(src, tok_src);
3222 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3223 assert(rel);
3224 sm1_parse_src_param(rel, tok_rel);
3225 src->rel = rel;
3226 }
3227 }
3228
3229 static void
3230 sm1_read_semantic(struct shader_translator *tx,
3231 struct sm1_semantic *sem)
3232 {
3233 const DWORD tok_usg = TOKEN_NEXT(tx);
3234 const DWORD tok_dst = TOKEN_NEXT(tx);
3235
3236 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3237 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3238 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3239
3240 sm1_parse_dst_param(&sem->reg, tok_dst);
3241 }
3242
3243 static void
3244 sm1_parse_instruction(struct shader_translator *tx)
3245 {
3246 struct sm1_instruction *insn = &tx->insn;
3247 HRESULT hr;
3248 DWORD tok;
3249 struct sm1_op_info *info = NULL;
3250 unsigned i;
3251
3252 sm1_parse_comments(tx, TRUE);
3253 sm1_parse_get_skip(tx);
3254
3255 tok = TOKEN_NEXT(tx);
3256
3257 insn->opcode = tok & D3DSI_OPCODE_MASK;
3258 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3259 insn->coissue = !!(tok & D3DSI_COISSUE);
3260 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3261
3262 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3263 int k = tx->op_info_map[insn->opcode];
3264 if (k >= 0) {
3265 assert(k < ARRAY_SIZE(inst_table));
3266 info = &inst_table[k];
3267 }
3268 } else {
3269 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3270 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3271 }
3272 if (!info) {
3273 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3274 TOKEN_JUMP(tx);
3275 return;
3276 }
3277 insn->info = info;
3278 insn->ndst = info->ndst;
3279 insn->nsrc = info->nsrc;
3280
3281 assert(!insn->predicated && "TODO: predicated instructions");
3282
3283 /* check version */
3284 {
3285 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3286 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3287 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3288 if (ver < min || ver > max) {
3289 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3290 min, ver, max);
3291 return;
3292 }
3293 }
3294
3295 for (i = 0; i < insn->ndst; ++i)
3296 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3297 if (insn->predicated)
3298 sm1_read_src_param(tx, &insn->pred, NULL);
3299 for (i = 0; i < insn->nsrc; ++i)
3300 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3301
3302 /* parse here so we can dump them before processing */
3303 if (insn->opcode == D3DSIO_DEF ||
3304 insn->opcode == D3DSIO_DEFI ||
3305 insn->opcode == D3DSIO_DEFB)
3306 sm1_parse_immediate(tx, &tx->insn.src[0]);
3307
3308 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3309 sm1_instruction_check(insn);
3310
3311 if (info->handler)
3312 hr = info->handler(tx);
3313 else
3314 hr = NineTranslateInstruction_Generic(tx);
3315 tx_apply_dst0_modifiers(tx);
3316
3317 if (hr != D3D_OK)
3318 tx->failure = TRUE;
3319 tx->num_scratch = 0; /* reset */
3320
3321 TOKEN_JUMP(tx);
3322 }
3323
3324 static void
3325 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3326 {
3327 unsigned i;
3328
3329 tx->info = info;
3330
3331 tx->byte_code = info->byte_code;
3332 tx->parse = info->byte_code;
3333
3334 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3335 info->input_map[i] = NINE_DECLUSAGE_NONE;
3336 info->num_inputs = 0;
3337
3338 info->position_t = FALSE;
3339 info->point_size = FALSE;
3340
3341 tx->info->const_float_slots = 0;
3342 tx->info->const_int_slots = 0;
3343 tx->info->const_bool_slots = 0;
3344
3345 info->sampler_mask = 0x0;
3346 info->rt_mask = 0x0;
3347
3348 info->lconstf.data = NULL;
3349 info->lconstf.ranges = NULL;
3350
3351 info->bumpenvmat_needed = 0;
3352
3353 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3354 tx->regs.rL[i] = ureg_dst_undef();
3355 }
3356 tx->regs.address = ureg_dst_undef();
3357 tx->regs.a0 = ureg_dst_undef();
3358 tx->regs.p = ureg_dst_undef();
3359 tx->regs.oDepth = ureg_dst_undef();
3360 tx->regs.vPos = ureg_src_undef();
3361 tx->regs.vFace = ureg_src_undef();
3362 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3363 tx->regs.o[i] = ureg_dst_undef();
3364 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3365 tx->regs.oCol[i] = ureg_dst_undef();
3366 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3367 tx->regs.vC[i] = ureg_src_undef();
3368 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3369 tx->regs.vT[i] = ureg_src_undef();
3370
3371 sm1_read_version(tx);
3372
3373 info->version = (tx->version.major << 4) | tx->version.minor;
3374
3375 tx->num_outputs = 0;
3376
3377 create_op_info_map(tx);
3378 }
3379
3380 static void
3381 tx_dtor(struct shader_translator *tx)
3382 {
3383 if (tx->num_inst_labels)
3384 FREE(tx->inst_labels);
3385 FREE(tx->lconstf);
3386 FREE(tx->regs.r);
3387 FREE(tx);
3388 }
3389
3390 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3391 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3392 static void
3393 shader_add_vs_viewport_transform(struct shader_translator *tx)
3394 {
3395 struct ureg_program *ureg = tx->ureg;
3396 struct ureg_src c0 = NINE_CONSTANT_SRC(0);
3397 struct ureg_src c1 = NINE_CONSTANT_SRC(1);
3398 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3399
3400 c0 = ureg_src_dimension(c0, 4);
3401 c1 = ureg_src_dimension(c1, 4);
3402 /* TODO: find out when we need to apply the viewport transformation or not.
3403 * Likely will be XYZ vs XYZRHW in vdecl_out
3404 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3405 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3406 */
3407 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3408 }
3409
3410 static void
3411 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3412 {
3413 struct ureg_program *ureg = tx->ureg;
3414 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3415 struct ureg_src fog_end, fog_coeff, fog_density;
3416 struct ureg_src fog_vs, depth, fog_color;
3417 struct ureg_dst fog_factor;
3418
3419 if (!tx->info->fog_enable) {
3420 ureg_MOV(ureg, oCol0, src_col);
3421 return;
3422 }
3423
3424 if (tx->info->fog_mode != D3DFOG_NONE) {
3425 depth = nine_get_position_input(tx);
3426 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3427 }
3428
3429 nine_info_mark_const_f_used(tx->info, 33);
3430 fog_color = NINE_CONSTANT_SRC(32);
3431 fog_factor = tx_scratch_scalar(tx);
3432
3433 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3434 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3435 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3436 ureg_SUB(ureg, fog_factor, fog_end, depth);
3437 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3438 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3439 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3440 ureg_MUL(ureg, fog_factor, depth, fog_density);
3441 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3442 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3443 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3444 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3445 ureg_MUL(ureg, fog_factor, depth, fog_density);
3446 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3447 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3448 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3449 } else {
3450 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3451 TGSI_INTERPOLATE_PERSPECTIVE),
3452 TGSI_SWIZZLE_X);
3453 ureg_MOV(ureg, fog_factor, fog_vs);
3454 }
3455
3456 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3457 tx_src_scalar(fog_factor), src_col, fog_color);
3458 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3459 }
3460
3461 #define GET_CAP(n) screen->get_param( \
3462 screen, PIPE_CAP_##n)
3463 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3464 screen, info->type, PIPE_SHADER_CAP_##n)
3465
3466 HRESULT
3467 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
3468 {
3469 struct shader_translator *tx;
3470 HRESULT hr = D3D_OK;
3471 const unsigned processor = info->type;
3472 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3473 struct pipe_context *pipe = info->process_vertices ? device->pipe_sw : NineDevice9_GetPipe(device);
3474
3475 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3476
3477 tx = CALLOC_STRUCT(shader_translator);
3478 if (!tx)
3479 return E_OUTOFMEMORY;
3480 tx_ctor(tx, info);
3481
3482 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3483 hr = D3DERR_INVALIDCALL;
3484 DBG("Unsupported shader version: %u.%u !\n",
3485 tx->version.major, tx->version.minor);
3486 goto out;
3487 }
3488 if (tx->processor != processor) {
3489 hr = D3DERR_INVALIDCALL;
3490 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3491 goto out;
3492 }
3493 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3494 tx->version.major, tx->version.minor);
3495
3496 tx->ureg = ureg_create(processor);
3497 if (!tx->ureg) {
3498 hr = E_OUTOFMEMORY;
3499 goto out;
3500 }
3501
3502 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3503 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3504 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3505 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3506 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3507 tx->texcoord_sn = tx->want_texcoord ?
3508 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3509 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3510 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3511
3512 if (IS_VS) {
3513 tx->num_constf_allowed = NINE_MAX_CONST_F;
3514 } else if (tx->version.major < 2) {/* IS_PS v1 */
3515 tx->num_constf_allowed = 8;
3516 } else if (tx->version.major == 2) {/* IS_PS v2 */
3517 tx->num_constf_allowed = 32;
3518 } else {/* IS_PS v3 */
3519 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3520 }
3521
3522 if (tx->version.major < 2) {
3523 tx->num_consti_allowed = 0;
3524 tx->num_constb_allowed = 0;
3525 } else {
3526 tx->num_consti_allowed = NINE_MAX_CONST_I;
3527 tx->num_constb_allowed = NINE_MAX_CONST_B;
3528 }
3529
3530 if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
3531 tx->num_constf_allowed = 8192;
3532 tx->num_consti_allowed = 2048;
3533 tx->num_constb_allowed = 2048;
3534 }
3535
3536 /* VS must always write position. Declare it here to make it the 1st output.
3537 * (Some drivers like nv50 are buggy and rely on that.)
3538 */
3539 if (IS_VS) {
3540 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3541 } else {
3542 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3543 if (!tx->shift_wpos)
3544 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3545 }
3546
3547 while (!sm1_parse_eof(tx) && !tx->failure)
3548 sm1_parse_instruction(tx);
3549 tx->parse++; /* for byte_size */
3550
3551 if (tx->failure) {
3552 /* For VS shaders, we print the warning later,
3553 * we first try with swvp. */
3554 if (IS_PS)
3555 ERR("Encountered buggy shader\n");
3556 ureg_destroy(tx->ureg);
3557 hr = D3DERR_INVALIDCALL;
3558 goto out;
3559 }
3560
3561 if (IS_PS && tx->version.major < 3) {
3562 if (tx->version.major < 2) {
3563 assert(tx->num_temp); /* there must be color output */
3564 info->rt_mask |= 0x1;
3565 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3566 } else {
3567 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3568 }
3569 }
3570
3571 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3572 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3573 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3574 }
3575
3576 if (info->position_t)
3577 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3578
3579 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3580 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3581 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3582 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3583 info->point_size = TRUE;
3584 }
3585
3586 if (info->process_vertices)
3587 shader_add_vs_viewport_transform(tx);
3588
3589 ureg_END(tx->ureg);
3590
3591 /* record local constants */
3592 if (tx->num_lconstf && tx->indirect_const_access) {
3593 struct nine_range *ranges;
3594 float *data;
3595 int *indices;
3596 unsigned i, k, n;
3597
3598 hr = E_OUTOFMEMORY;
3599
3600 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3601 if (!data)
3602 goto out;
3603 info->lconstf.data = data;
3604
3605 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3606 if (!indices)
3607 goto out;
3608
3609 /* lazy sort, num_lconstf should be small */
3610 for (n = 0; n < tx->num_lconstf; ++n) {
3611 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3612 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3613 k = i;
3614 }
3615 indices[n] = tx->lconstf[k].idx;
3616 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
3617 tx->lconstf[k].idx = INT_MAX;
3618 }
3619
3620 /* count ranges */
3621 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3622 if (indices[i] != indices[i - 1] + 1)
3623 ++n;
3624 ranges = MALLOC(n * sizeof(ranges[0]));
3625 if (!ranges) {
3626 FREE(indices);
3627 goto out;
3628 }
3629 info->lconstf.ranges = ranges;
3630
3631 k = 0;
3632 ranges[k].bgn = indices[0];
3633 for (i = 1; i < tx->num_lconstf; ++i) {
3634 if (indices[i] != indices[i - 1] + 1) {
3635 ranges[k].next = &ranges[k + 1];
3636 ranges[k].end = indices[i - 1] + 1;
3637 ++k;
3638 ranges[k].bgn = indices[i];
3639 }
3640 }
3641 ranges[k].end = indices[i - 1] + 1;
3642 ranges[k].next = NULL;
3643 assert(n == (k + 1));
3644
3645 FREE(indices);
3646 hr = D3D_OK;
3647 }
3648
3649 /* r500 */
3650 if (info->const_float_slots > device->max_vs_const_f &&
3651 (info->const_int_slots || info->const_bool_slots) &&
3652 (!IS_VS || !info->swvp_on))
3653 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3654
3655
3656 if (tx->indirect_const_access) /* vs only */
3657 info->const_float_slots = device->max_vs_const_f;
3658
3659 if (!IS_VS || !info->swvp_on) {
3660 unsigned s, slot_max;
3661 unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3662
3663 slot_max = info->const_bool_slots > 0 ?
3664 max_const_f + NINE_MAX_CONST_I
3665 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3666 info->const_int_slots > 0 ?
3667 max_const_f + info->const_int_slots :
3668 info->const_float_slots;
3669
3670 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3671
3672 for (s = 0; s < slot_max; s++)
3673 ureg_DECL_constant(tx->ureg, s);
3674 } else {
3675 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
3676 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
3677 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
3678 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
3679 }
3680
3681 if (info->process_vertices)
3682 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
3683
3684 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3685 unsigned count;
3686 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3687 tgsi_dump(toks, 0);
3688 ureg_free_tokens(toks);
3689 }
3690
3691 if (info->process_vertices) {
3692 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
3693 tx->output_info,
3694 tx->num_outputs,
3695 &(info->so));
3696 info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
3697 } else
3698 info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
3699 if (!info->cso) {
3700 hr = D3DERR_DRIVERINTERNALERROR;
3701 FREE(info->lconstf.data);
3702 FREE(info->lconstf.ranges);
3703 goto out;
3704 }
3705
3706 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3707 out:
3708 tx_dtor(tx);
3709 return hr;
3710 }