b651f065585d4e53c9a39cc9d4f953f762d6fbba
[mesa.git] / src / gallium / state_trackers / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37
38 #define DBG_CHANNEL DBG_SHADER
39
40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
41
42
43 struct shader_translator;
44
45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
46
47 static inline const char *d3dsio_to_string(unsigned opcode);
48
49
50 #define NINED3D_SM1_VS 0xfffe
51 #define NINED3D_SM1_PS 0xffff
52
53 #define NINE_MAX_COND_DEPTH 64
54 #define NINE_MAX_LOOP_DEPTH 64
55
56 #define NINED3DSP_END 0x0000ffff
57
58 #define NINED3DSPTYPE_FLOAT4 0
59 #define NINED3DSPTYPE_INT4 1
60 #define NINED3DSPTYPE_BOOL 2
61
62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63
64 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
65 #define NINED3DSP_WRITEMASK_SHIFT 16
66
67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68
69 #define NINED3DSHADER_REL_OP_GT 1
70 #define NINED3DSHADER_REL_OP_EQ 2
71 #define NINED3DSHADER_REL_OP_GE 3
72 #define NINED3DSHADER_REL_OP_LT 4
73 #define NINED3DSHADER_REL_OP_NE 5
74 #define NINED3DSHADER_REL_OP_LE 6
75
76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78
79 #define NINED3DSI_TEXLD_PROJECT 0x1
80 #define NINED3DSI_TEXLD_BIAS 0x2
81
82 #define NINED3DSP_WRITEMASK_0 0x1
83 #define NINED3DSP_WRITEMASK_1 0x2
84 #define NINED3DSP_WRITEMASK_2 0x4
85 #define NINED3DSP_WRITEMASK_3 0x8
86 #define NINED3DSP_WRITEMASK_ALL 0xf
87
88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89
90 #define NINE_SWIZZLE4(x,y,z,w) \
91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92
93 #define NINE_APPLY_SWIZZLE(src, s) \
94 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
95
96 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
97 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
99
100 /*
101 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
102 * BIAS <= PS 1.4 (x-0.5)
103 * BIASNEG <= PS 1.4 (-(x-0.5))
104 * SIGN <= PS 1.4 (2(x-0.5))
105 * SIGNNEG <= PS 1.4 (-2(x-0.5))
106 * COMP <= PS 1.4 (1-x)
107 * X2 = PS 1.4 (2x)
108 * X2NEG = PS 1.4 (-2x)
109 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
110 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
111 * ABS >= SM 3.0 (abs(x))
112 * ABSNEG >= SM 3.0 (-abs(x))
113 * NOT >= SM 2.0 pedication only
114 */
115 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
129
130 static const char *sm1_mod_str[] =
131 {
132 [NINED3DSPSM_NONE] = "",
133 [NINED3DSPSM_NEG] = "-",
134 [NINED3DSPSM_BIAS] = "bias",
135 [NINED3DSPSM_BIASNEG] = "biasneg",
136 [NINED3DSPSM_SIGN] = "sign",
137 [NINED3DSPSM_SIGNNEG] = "signneg",
138 [NINED3DSPSM_COMP] = "comp",
139 [NINED3DSPSM_X2] = "x2",
140 [NINED3DSPSM_X2NEG] = "x2neg",
141 [NINED3DSPSM_DZ] = "dz",
142 [NINED3DSPSM_DW] = "dw",
143 [NINED3DSPSM_ABS] = "abs",
144 [NINED3DSPSM_ABSNEG] = "-abs",
145 [NINED3DSPSM_NOT] = "not"
146 };
147
148 static void
149 sm1_dump_writemask(BYTE mask)
150 {
151 if (mask & 1) DUMP("x"); else DUMP("_");
152 if (mask & 2) DUMP("y"); else DUMP("_");
153 if (mask & 4) DUMP("z"); else DUMP("_");
154 if (mask & 8) DUMP("w"); else DUMP("_");
155 }
156
157 static void
158 sm1_dump_swizzle(BYTE s)
159 {
160 char c[4] = { 'x', 'y', 'z', 'w' };
161 DUMP("%c%c%c%c",
162 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
163 }
164
165 static const char sm1_file_char[] =
166 {
167 [D3DSPR_TEMP] = 'r',
168 [D3DSPR_INPUT] = 'v',
169 [D3DSPR_CONST] = 'c',
170 [D3DSPR_ADDR] = 'A',
171 [D3DSPR_RASTOUT] = 'R',
172 [D3DSPR_ATTROUT] = 'D',
173 [D3DSPR_OUTPUT] = 'o',
174 [D3DSPR_CONSTINT] = 'I',
175 [D3DSPR_COLOROUT] = 'C',
176 [D3DSPR_DEPTHOUT] = 'D',
177 [D3DSPR_SAMPLER] = 's',
178 [D3DSPR_CONST2] = 'c',
179 [D3DSPR_CONST3] = 'c',
180 [D3DSPR_CONST4] = 'c',
181 [D3DSPR_CONSTBOOL] = 'B',
182 [D3DSPR_LOOP] = 'L',
183 [D3DSPR_TEMPFLOAT16] = 'h',
184 [D3DSPR_MISCTYPE] = 'M',
185 [D3DSPR_LABEL] = 'X',
186 [D3DSPR_PREDICATE] = 'p'
187 };
188
189 static void
190 sm1_dump_reg(BYTE file, INT index)
191 {
192 switch (file) {
193 case D3DSPR_LOOP:
194 DUMP("aL");
195 break;
196 case D3DSPR_COLOROUT:
197 DUMP("oC%i", index);
198 break;
199 case D3DSPR_DEPTHOUT:
200 DUMP("oDepth");
201 break;
202 case D3DSPR_RASTOUT:
203 DUMP("oRast%i", index);
204 break;
205 case D3DSPR_CONSTINT:
206 DUMP("iconst[%i]", index);
207 break;
208 case D3DSPR_CONSTBOOL:
209 DUMP("bconst[%i]", index);
210 break;
211 default:
212 DUMP("%c%i", sm1_file_char[file], index);
213 break;
214 }
215 }
216
217 struct sm1_src_param
218 {
219 INT idx;
220 struct sm1_src_param *rel;
221 BYTE file;
222 BYTE swizzle;
223 BYTE mod;
224 BYTE type;
225 union {
226 DWORD d[4];
227 float f[4];
228 int i[4];
229 BOOL b;
230 } imm;
231 };
232 static void
233 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
234
235 struct sm1_dst_param
236 {
237 INT idx;
238 struct sm1_src_param *rel;
239 BYTE file;
240 BYTE mask;
241 BYTE mod;
242 int8_t shift; /* sint4 */
243 BYTE type;
244 };
245
246 static inline void
247 assert_replicate_swizzle(const struct ureg_src *reg)
248 {
249 assert(reg->SwizzleY == reg->SwizzleX &&
250 reg->SwizzleZ == reg->SwizzleX &&
251 reg->SwizzleW == reg->SwizzleX);
252 }
253
254 static void
255 sm1_dump_immediate(const struct sm1_src_param *param)
256 {
257 switch (param->type) {
258 case NINED3DSPTYPE_FLOAT4:
259 DUMP("{ %f %f %f %f }",
260 param->imm.f[0], param->imm.f[1],
261 param->imm.f[2], param->imm.f[3]);
262 break;
263 case NINED3DSPTYPE_INT4:
264 DUMP("{ %i %i %i %i }",
265 param->imm.i[0], param->imm.i[1],
266 param->imm.i[2], param->imm.i[3]);
267 break;
268 case NINED3DSPTYPE_BOOL:
269 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
270 break;
271 default:
272 assert(0);
273 break;
274 }
275 }
276
277 static void
278 sm1_dump_src_param(const struct sm1_src_param *param)
279 {
280 if (param->file == NINED3DSPR_IMMEDIATE) {
281 assert(!param->mod &&
282 !param->rel &&
283 param->swizzle == NINED3DSP_NOSWIZZLE);
284 sm1_dump_immediate(param);
285 return;
286 }
287
288 if (param->mod)
289 DUMP("%s(", sm1_mod_str[param->mod]);
290 if (param->rel) {
291 DUMP("%c[", sm1_file_char[param->file]);
292 sm1_dump_src_param(param->rel);
293 DUMP("+%i]", param->idx);
294 } else {
295 sm1_dump_reg(param->file, param->idx);
296 }
297 if (param->mod)
298 DUMP(")");
299 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
300 DUMP(".");
301 sm1_dump_swizzle(param->swizzle);
302 }
303 }
304
305 static void
306 sm1_dump_dst_param(const struct sm1_dst_param *param)
307 {
308 if (param->mod & NINED3DSPDM_SATURATE)
309 DUMP("sat ");
310 if (param->mod & NINED3DSPDM_PARTIALP)
311 DUMP("pp ");
312 if (param->mod & NINED3DSPDM_CENTROID)
313 DUMP("centroid ");
314 if (param->shift < 0)
315 DUMP("/%u ", 1 << -param->shift);
316 if (param->shift > 0)
317 DUMP("*%u ", 1 << param->shift);
318
319 if (param->rel) {
320 DUMP("%c[", sm1_file_char[param->file]);
321 sm1_dump_src_param(param->rel);
322 DUMP("+%i]", param->idx);
323 } else {
324 sm1_dump_reg(param->file, param->idx);
325 }
326 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
327 DUMP(".");
328 sm1_dump_writemask(param->mask);
329 }
330 }
331
332 struct sm1_semantic
333 {
334 struct sm1_dst_param reg;
335 BYTE sampler_type;
336 D3DDECLUSAGE usage;
337 BYTE usage_idx;
338 };
339
340 struct sm1_op_info
341 {
342 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
343 * should be ignored completely */
344 unsigned sio;
345 unsigned opcode; /* TGSI_OPCODE_x */
346
347 /* versions are still set even handler is set */
348 struct {
349 unsigned min;
350 unsigned max;
351 } vert_version, frag_version;
352
353 /* number of regs parsed outside of special handler */
354 unsigned ndst;
355 unsigned nsrc;
356
357 /* some instructions don't map perfectly, so use a special handler */
358 translate_instruction_func handler;
359 };
360
361 struct sm1_instruction
362 {
363 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
364 BYTE flags;
365 BOOL coissue;
366 BOOL predicated;
367 BYTE ndst;
368 BYTE nsrc;
369 struct sm1_src_param src[4];
370 struct sm1_src_param src_rel[4];
371 struct sm1_src_param pred;
372 struct sm1_src_param dst_rel[1];
373 struct sm1_dst_param dst[1];
374
375 const struct sm1_op_info *info;
376 };
377
378 static void
379 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
380 {
381 unsigned i;
382
383 /* no info stored for these: */
384 if (insn->opcode == D3DSIO_DCL)
385 return;
386 for (i = 0; i < indent; ++i)
387 DUMP(" ");
388
389 if (insn->predicated) {
390 DUMP("@");
391 sm1_dump_src_param(&insn->pred);
392 DUMP(" ");
393 }
394 DUMP("%s", d3dsio_to_string(insn->opcode));
395 if (insn->flags) {
396 switch (insn->opcode) {
397 case D3DSIO_TEX:
398 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
399 break;
400 default:
401 DUMP("_%x", insn->flags);
402 break;
403 }
404 }
405 if (insn->coissue)
406 DUMP("_co");
407 DUMP(" ");
408
409 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
410 sm1_dump_dst_param(&insn->dst[i]);
411 DUMP(" ");
412 }
413
414 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
415 sm1_dump_src_param(&insn->src[i]);
416 DUMP(" ");
417 }
418 if (insn->opcode == D3DSIO_DEF ||
419 insn->opcode == D3DSIO_DEFI ||
420 insn->opcode == D3DSIO_DEFB)
421 sm1_dump_immediate(&insn->src[0]);
422
423 DUMP("\n");
424 }
425
426 struct sm1_local_const
427 {
428 INT idx;
429 struct ureg_src reg;
430 float f[4]; /* for indirect addressing of float constants */
431 };
432
433 struct shader_translator
434 {
435 const DWORD *byte_code;
436 const DWORD *parse;
437 const DWORD *parse_next;
438
439 struct ureg_program *ureg;
440
441 /* shader version */
442 struct {
443 BYTE major;
444 BYTE minor;
445 } version;
446 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
447 unsigned num_constf_allowed;
448 unsigned num_consti_allowed;
449 unsigned num_constb_allowed;
450
451 boolean native_integers;
452 boolean inline_subroutines;
453 boolean want_texcoord;
454 boolean shift_wpos;
455 boolean wpos_is_sysval;
456 boolean face_is_sysval_integer;
457 boolean mul_zero_wins;
458 unsigned texcoord_sn;
459
460 struct sm1_instruction insn; /* current instruction */
461
462 struct {
463 struct ureg_dst *r;
464 struct ureg_dst oPos;
465 struct ureg_dst oPos_out; /* the real output when doing streamout */
466 struct ureg_dst oFog;
467 struct ureg_dst oPts;
468 struct ureg_dst oCol[4];
469 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
470 struct ureg_dst oDepth;
471 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
472 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
473 struct ureg_src vPos;
474 struct ureg_src vFace;
475 struct ureg_src s;
476 struct ureg_dst p;
477 struct ureg_dst address;
478 struct ureg_dst a0;
479 struct ureg_dst predicate;
480 struct ureg_dst predicate_tmp;
481 struct ureg_dst predicate_dst;
482 struct ureg_dst tS[8]; /* texture stage registers */
483 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
484 struct ureg_dst t[8]; /* scratch TEMPs */
485 struct ureg_src vC[2]; /* PS color in */
486 struct ureg_src vT[8]; /* PS texcoord in */
487 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
488 } regs;
489 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
490 unsigned num_scratch;
491 unsigned loop_depth;
492 unsigned loop_depth_max;
493 unsigned cond_depth;
494 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
495 unsigned cond_labels[NINE_MAX_COND_DEPTH];
496 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
497 boolean predicated_activated;
498
499 unsigned *inst_labels; /* LABEL op */
500 unsigned num_inst_labels;
501
502 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
503
504 struct sm1_local_const *lconstf;
505 unsigned num_lconstf;
506 struct sm1_local_const *lconsti;
507 unsigned num_lconsti;
508 struct sm1_local_const *lconstb;
509 unsigned num_lconstb;
510
511 boolean slots_used[NINE_MAX_CONST_ALL];
512 unsigned num_slots;
513
514 boolean indirect_const_access;
515 boolean failure;
516
517 struct nine_vs_output_info output_info[16];
518 int num_outputs;
519
520 struct nine_shader_info *info;
521
522 int16_t op_info_map[D3DSIO_BREAKP + 1];
523 };
524
525 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
526 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
527
528 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
529
530 static void
531 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
532
533 static void
534 sm1_instruction_check(const struct sm1_instruction *insn)
535 {
536 if (insn->opcode == D3DSIO_CRS)
537 {
538 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
539 {
540 DBG("CRS.mask.w\n");
541 }
542 }
543 }
544
545 static void
546 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
547 int mask, int output_index)
548 {
549 tx->output_info[tx->num_outputs].output_semantic = Usage;
550 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
551 tx->output_info[tx->num_outputs].mask = mask;
552 tx->output_info[tx->num_outputs].output_index = output_index;
553 tx->num_outputs++;
554 }
555
556 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
557 {
558 struct ureg_src src;
559 /* vswp constant handling: we use two buffers
560 * to fit all the float constants. The special handling
561 * doesn't need to be elsewhere, because all the instructions
562 * accessing the constants directly are VS1, and swvp
563 * is VS >= 2 */
564 if (tx->info->swvp_on && idx >= 4096) {
565 /* TODO: swvp rel is broken if many constants are used */
566 src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
567 src = ureg_src_dimension(src, 1);
568 } else {
569 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
570 src = ureg_src_dimension(src, 0);
571 }
572
573 if (!tx->info->swvp_on)
574 tx->slots_used[idx] = TRUE;
575 if (tx->info->const_float_slots < (idx + 1))
576 tx->info->const_float_slots = idx + 1;
577 if (tx->num_slots < (idx + 1))
578 tx->num_slots = idx + 1;
579
580 return src;
581 }
582
583 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
584 {
585 struct ureg_src src;
586
587 if (tx->info->swvp_on) {
588 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
589 src = ureg_src_dimension(src, 2);
590 } else {
591 unsigned slot_idx = tx->info->const_i_base + idx;
592 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
593 src = ureg_src_dimension(src, 0);
594 tx->slots_used[slot_idx] = TRUE;
595 tx->info->int_slots_used[idx] = TRUE;
596 if (tx->num_slots < (slot_idx + 1))
597 tx->num_slots = slot_idx + 1;
598 }
599
600 if (tx->info->const_int_slots < (idx + 1))
601 tx->info->const_int_slots = idx + 1;
602
603 return src;
604 }
605
606 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
607 {
608 struct ureg_src src;
609
610 char r = idx / 4;
611 char s = idx & 3;
612
613 if (tx->info->swvp_on) {
614 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
615 src = ureg_src_dimension(src, 3);
616 } else {
617 unsigned slot_idx = tx->info->const_b_base + r;
618 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
619 src = ureg_src_dimension(src, 0);
620 tx->slots_used[slot_idx] = TRUE;
621 tx->info->bool_slots_used[idx] = TRUE;
622 if (tx->num_slots < (slot_idx + 1))
623 tx->num_slots = slot_idx + 1;
624 }
625 src = ureg_swizzle(src, s, s, s, s);
626
627 if (tx->info->const_bool_slots < (idx + 1))
628 tx->info->const_bool_slots = idx + 1;
629
630 return src;
631 }
632
633 static boolean
634 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
635 {
636 INT i;
637
638 if (index < 0 || index >= tx->num_constf_allowed) {
639 tx->failure = TRUE;
640 return FALSE;
641 }
642 for (i = 0; i < tx->num_lconstf; ++i) {
643 if (tx->lconstf[i].idx == index) {
644 *src = tx->lconstf[i].reg;
645 return TRUE;
646 }
647 }
648 return FALSE;
649 }
650 static boolean
651 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
652 {
653 int i;
654
655 if (index < 0 || index >= tx->num_consti_allowed) {
656 tx->failure = TRUE;
657 return FALSE;
658 }
659 for (i = 0; i < tx->num_lconsti; ++i) {
660 if (tx->lconsti[i].idx == index) {
661 *src = tx->lconsti[i].reg;
662 return TRUE;
663 }
664 }
665 return FALSE;
666 }
667 static boolean
668 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
669 {
670 int i;
671
672 if (index < 0 || index >= tx->num_constb_allowed) {
673 tx->failure = TRUE;
674 return FALSE;
675 }
676 for (i = 0; i < tx->num_lconstb; ++i) {
677 if (tx->lconstb[i].idx == index) {
678 *src = tx->lconstb[i].reg;
679 return TRUE;
680 }
681 }
682 return FALSE;
683 }
684
685 static void
686 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
687 {
688 unsigned n;
689
690 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
691
692 for (n = 0; n < tx->num_lconstf; ++n)
693 if (tx->lconstf[n].idx == index)
694 break;
695 if (n == tx->num_lconstf) {
696 if ((n % 8) == 0) {
697 tx->lconstf = REALLOC(tx->lconstf,
698 (n + 0) * sizeof(tx->lconstf[0]),
699 (n + 8) * sizeof(tx->lconstf[0]));
700 assert(tx->lconstf);
701 }
702 tx->num_lconstf++;
703 }
704 tx->lconstf[n].idx = index;
705 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
706
707 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
708 }
709 static void
710 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
711 {
712 unsigned n;
713
714 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
715
716 for (n = 0; n < tx->num_lconsti; ++n)
717 if (tx->lconsti[n].idx == index)
718 break;
719 if (n == tx->num_lconsti) {
720 if ((n % 8) == 0) {
721 tx->lconsti = REALLOC(tx->lconsti,
722 (n + 0) * sizeof(tx->lconsti[0]),
723 (n + 8) * sizeof(tx->lconsti[0]));
724 assert(tx->lconsti);
725 }
726 tx->num_lconsti++;
727 }
728
729 tx->lconsti[n].idx = index;
730 tx->lconsti[n].reg = tx->native_integers ?
731 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
732 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
733 }
734 static void
735 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
736 {
737 unsigned n;
738
739 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
740
741 for (n = 0; n < tx->num_lconstb; ++n)
742 if (tx->lconstb[n].idx == index)
743 break;
744 if (n == tx->num_lconstb) {
745 if ((n % 8) == 0) {
746 tx->lconstb = REALLOC(tx->lconstb,
747 (n + 0) * sizeof(tx->lconstb[0]),
748 (n + 8) * sizeof(tx->lconstb[0]));
749 assert(tx->lconstb);
750 }
751 tx->num_lconstb++;
752 }
753
754 tx->lconstb[n].idx = index;
755 tx->lconstb[n].reg = tx->native_integers ?
756 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
757 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
758 }
759
760 static inline struct ureg_dst
761 tx_scratch(struct shader_translator *tx)
762 {
763 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
764 tx->failure = TRUE;
765 return tx->regs.t[0];
766 }
767 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
768 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
769 return tx->regs.t[tx->num_scratch++];
770 }
771
772 static inline struct ureg_dst
773 tx_scratch_scalar(struct shader_translator *tx)
774 {
775 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
776 }
777
778 static inline struct ureg_src
779 tx_src_scalar(struct ureg_dst dst)
780 {
781 struct ureg_src src = ureg_src(dst);
782 int c = ffs(dst.WriteMask) - 1;
783 if (dst.WriteMask == (1 << c))
784 src = ureg_scalar(src, c);
785 return src;
786 }
787
788 static inline void
789 tx_temp_alloc(struct shader_translator *tx, INT idx)
790 {
791 assert(idx >= 0);
792 if (idx >= tx->num_temp) {
793 unsigned k = tx->num_temp;
794 unsigned n = idx + 1;
795 tx->regs.r = REALLOC(tx->regs.r,
796 k * sizeof(tx->regs.r[0]),
797 n * sizeof(tx->regs.r[0]));
798 for (; k < n; ++k)
799 tx->regs.r[k] = ureg_dst_undef();
800 tx->num_temp = n;
801 }
802 if (ureg_dst_is_undef(tx->regs.r[idx]))
803 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
804 }
805
806 static inline void
807 tx_addr_alloc(struct shader_translator *tx, INT idx)
808 {
809 assert(idx == 0);
810 if (ureg_dst_is_undef(tx->regs.address))
811 tx->regs.address = ureg_DECL_address(tx->ureg);
812 if (ureg_dst_is_undef(tx->regs.a0))
813 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
814 }
815
816 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
817 * the projection should be applied on the texture. It doesn't
818 * apply on texkill.
819 * The doc is very imprecise here (it says the projection is done
820 * before rasterization, thus in vs, which seems wrong since ps instructions
821 * are affected differently)
822 * For now we only apply to the ps TEX instruction and TEXBEM.
823 * Perhaps some other instructions would need it */
824 static inline void
825 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
826 struct ureg_src src, INT idx)
827 {
828 struct ureg_dst tmp;
829 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
830
831 /* no projection */
832 if (dim == 1) {
833 ureg_MOV(tx->ureg, dst, src);
834 } else {
835 tmp = tx_scratch_scalar(tx);
836 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
837 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
838 }
839 }
840
841 static inline void
842 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
843 unsigned target, struct ureg_src src0,
844 struct ureg_src src1, INT idx)
845 {
846 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
847 struct ureg_dst tmp;
848 boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
849
850 /* dim == 1: no projection
851 * Looks like must be disabled when it makes no
852 * sense according the texture dimensions
853 */
854 if (dim == 1 || (dim <= target && !shadow)) {
855 ureg_TEX(tx->ureg, dst, target, src0, src1);
856 } else if (dim == 4) {
857 ureg_TXP(tx->ureg, dst, target, src0, src1);
858 } else {
859 tmp = tx_scratch(tx);
860 apply_ps1x_projection(tx, tmp, src0, idx);
861 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
862 }
863 }
864
865 static inline void
866 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
867 {
868 assert(IS_PS);
869 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
870 if (ureg_src_is_undef(tx->regs.vT[idx]))
871 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
872 TGSI_INTERPOLATE_PERSPECTIVE);
873 }
874
875 static inline unsigned *
876 tx_bgnloop(struct shader_translator *tx)
877 {
878 tx->loop_depth++;
879 if (tx->loop_depth_max < tx->loop_depth)
880 tx->loop_depth_max = tx->loop_depth;
881 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
882 return &tx->loop_labels[tx->loop_depth - 1];
883 }
884
885 static inline unsigned *
886 tx_endloop(struct shader_translator *tx)
887 {
888 assert(tx->loop_depth);
889 tx->loop_depth--;
890 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
891 ureg_get_instruction_number(tx->ureg));
892 return &tx->loop_labels[tx->loop_depth];
893 }
894
895 static struct ureg_dst
896 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
897 {
898 const unsigned l = tx->loop_depth - 1;
899
900 if (!tx->loop_depth)
901 {
902 DBG("loop counter requested outside of loop\n");
903 return ureg_dst_undef();
904 }
905
906 if (ureg_dst_is_undef(tx->regs.rL[l])) {
907 /* loop or rep ctr creation */
908 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
909 tx->loop_or_rep[l] = loop_or_rep;
910 }
911 /* loop - rep - endloop - endrep not allowed */
912 assert(tx->loop_or_rep[l] == loop_or_rep);
913
914 return tx->regs.rL[l];
915 }
916
917 static struct ureg_src
918 tx_get_loopal(struct shader_translator *tx)
919 {
920 int loop_level = tx->loop_depth - 1;
921
922 while (loop_level >= 0) {
923 /* handle loop - rep - endrep - endloop case */
924 if (tx->loop_or_rep[loop_level])
925 /* the value is in the loop counter y component (nine implementation) */
926 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
927 loop_level--;
928 }
929
930 DBG("aL counter requested outside of loop\n");
931 return ureg_src_undef();
932 }
933
934 static inline unsigned *
935 tx_cond(struct shader_translator *tx)
936 {
937 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
938 tx->cond_depth++;
939 return &tx->cond_labels[tx->cond_depth - 1];
940 }
941
942 static inline unsigned *
943 tx_elsecond(struct shader_translator *tx)
944 {
945 assert(tx->cond_depth);
946 return &tx->cond_labels[tx->cond_depth - 1];
947 }
948
949 static inline void
950 tx_endcond(struct shader_translator *tx)
951 {
952 assert(tx->cond_depth);
953 tx->cond_depth--;
954 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
955 ureg_get_instruction_number(tx->ureg));
956 }
957
958 static inline struct ureg_dst
959 nine_ureg_dst_register(unsigned file, int index)
960 {
961 return ureg_dst(ureg_src_register(file, index));
962 }
963
964 static inline struct ureg_src
965 nine_get_position_input(struct shader_translator *tx)
966 {
967 struct ureg_program *ureg = tx->ureg;
968
969 if (tx->wpos_is_sysval)
970 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
971 else
972 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
973 0, TGSI_INTERPOLATE_LINEAR);
974 }
975
976 static struct ureg_src
977 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
978 {
979 struct ureg_program *ureg = tx->ureg;
980 struct ureg_src src;
981 struct ureg_dst tmp;
982
983 assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
984 (D3DSPR_ADDR && tx->version.major == 3));
985
986 switch (param->file)
987 {
988 case D3DSPR_TEMP:
989 tx_temp_alloc(tx, param->idx);
990 src = ureg_src(tx->regs.r[param->idx]);
991 break;
992 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
993 case D3DSPR_ADDR:
994 if (IS_VS) {
995 assert(param->idx == 0);
996 /* the address register (vs only) must be
997 * assigned before use */
998 assert(!ureg_dst_is_undef(tx->regs.a0));
999 /* Round to lowest for vs1.1 (contrary to the doc), else
1000 * round to nearest */
1001 if (tx->version.major < 2 && tx->version.minor < 2)
1002 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1003 else
1004 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1005 src = ureg_src(tx->regs.address);
1006 } else {
1007 if (tx->version.major < 2 && tx->version.minor < 4) {
1008 /* no subroutines, so should be defined */
1009 src = ureg_src(tx->regs.tS[param->idx]);
1010 } else {
1011 tx_texcoord_alloc(tx, param->idx);
1012 src = tx->regs.vT[param->idx];
1013 }
1014 }
1015 break;
1016 case D3DSPR_INPUT:
1017 if (IS_VS) {
1018 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1019 } else {
1020 if (tx->version.major < 3) {
1021 src = ureg_DECL_fs_input_cyl_centroid(
1022 ureg, TGSI_SEMANTIC_COLOR, param->idx,
1023 TGSI_INTERPOLATE_COLOR, 0,
1024 tx->info->force_color_in_centroid ?
1025 TGSI_INTERPOLATE_LOC_CENTROID : 0,
1026 0, 1);
1027 } else {
1028 if(param->rel) {
1029 /* Copy all inputs (non consecutive)
1030 * to temp array (consecutive).
1031 * This is not good for performance.
1032 * A better way would be to have inputs
1033 * consecutive (would need implement alternative
1034 * way to match vs outputs and ps inputs).
1035 * However even with the better way, the temp array
1036 * copy would need to be used if some inputs
1037 * are not GENERIC or if they have different
1038 * interpolation flag. */
1039 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1040 int i;
1041 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1042 for (i = 0; i < 10; i++) {
1043 if (!ureg_src_is_undef(tx->regs.v[i]))
1044 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1045 else
1046 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1047 }
1048 }
1049 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1050 } else {
1051 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1052 src = tx->regs.v[param->idx];
1053 }
1054 }
1055 }
1056 if (param->rel)
1057 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1058 break;
1059 case D3DSPR_PREDICATE:
1060 if (ureg_dst_is_undef(tx->regs.predicate)) {
1061 /* Forbidden to use the predicate register before being set */
1062 tx->failure = TRUE;
1063 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1064 }
1065 src = ureg_src(tx->regs.predicate);
1066 break;
1067 case D3DSPR_SAMPLER:
1068 assert(param->mod == NINED3DSPSM_NONE);
1069 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
1070 src = ureg_DECL_sampler(ureg, param->idx);
1071 break;
1072 case D3DSPR_CONST:
1073 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1074 src = nine_float_constant_src(tx, param->idx);
1075 if (param->rel) {
1076 tx->indirect_const_access = TRUE;
1077 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1078 }
1079 }
1080 if (!IS_VS && tx->version.major < 2) {
1081 /* ps 1.X clamps constants */
1082 tmp = tx_scratch(tx);
1083 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1084 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1085 src = ureg_src(tmp);
1086 }
1087 break;
1088 case D3DSPR_CONST2:
1089 case D3DSPR_CONST3:
1090 case D3DSPR_CONST4:
1091 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1092 assert(!"CONST2/3/4");
1093 src = ureg_imm1f(ureg, 0.0f);
1094 break;
1095 case D3DSPR_CONSTINT:
1096 /* relative adressing only possible for float constants in vs */
1097 if (!tx_lconsti(tx, &src, param->idx))
1098 src = nine_integer_constant_src(tx, param->idx);
1099 break;
1100 case D3DSPR_CONSTBOOL:
1101 if (!tx_lconstb(tx, &src, param->idx))
1102 src = nine_boolean_constant_src(tx, param->idx);
1103 break;
1104 case D3DSPR_LOOP:
1105 if (ureg_dst_is_undef(tx->regs.address))
1106 tx->regs.address = ureg_DECL_address(ureg);
1107 if (!tx->native_integers)
1108 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1109 else
1110 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1111 src = ureg_src(tx->regs.address);
1112 break;
1113 case D3DSPR_MISCTYPE:
1114 switch (param->idx) {
1115 case D3DSMO_POSITION:
1116 if (ureg_src_is_undef(tx->regs.vPos))
1117 tx->regs.vPos = nine_get_position_input(tx);
1118 if (tx->shift_wpos) {
1119 /* TODO: do this only once */
1120 struct ureg_dst wpos = tx_scratch(tx);
1121 ureg_ADD(ureg, wpos, tx->regs.vPos,
1122 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1123 src = ureg_src(wpos);
1124 } else {
1125 src = tx->regs.vPos;
1126 }
1127 break;
1128 case D3DSMO_FACE:
1129 if (ureg_src_is_undef(tx->regs.vFace)) {
1130 if (tx->face_is_sysval_integer) {
1131 tmp = ureg_DECL_temporary(ureg);
1132 tx->regs.vFace =
1133 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1134
1135 /* convert bool to float */
1136 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1137 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1138 tx->regs.vFace = ureg_src(tmp);
1139 } else {
1140 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1141 TGSI_SEMANTIC_FACE, 0,
1142 TGSI_INTERPOLATE_CONSTANT);
1143 }
1144 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1145 }
1146 src = tx->regs.vFace;
1147 break;
1148 default:
1149 assert(!"invalid src D3DSMO");
1150 break;
1151 }
1152 break;
1153 case D3DSPR_TEMPFLOAT16:
1154 break;
1155 default:
1156 assert(!"invalid src D3DSPR");
1157 }
1158
1159 switch (param->mod) {
1160 case NINED3DSPSM_DW:
1161 tmp = tx_scratch(tx);
1162 /* NOTE: app is not allowed to read w with this modifier */
1163 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1164 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1165 src = ureg_src(tmp);
1166 break;
1167 case NINED3DSPSM_DZ:
1168 tmp = tx_scratch(tx);
1169 /* NOTE: app is not allowed to read z with this modifier */
1170 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1171 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1172 src = ureg_src(tmp);
1173 break;
1174 default:
1175 break;
1176 }
1177
1178 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1179 src = ureg_swizzle(src,
1180 (param->swizzle >> 0) & 0x3,
1181 (param->swizzle >> 2) & 0x3,
1182 (param->swizzle >> 4) & 0x3,
1183 (param->swizzle >> 6) & 0x3);
1184
1185 switch (param->mod) {
1186 case NINED3DSPSM_ABS:
1187 src = ureg_abs(src);
1188 break;
1189 case NINED3DSPSM_ABSNEG:
1190 src = ureg_negate(ureg_abs(src));
1191 break;
1192 case NINED3DSPSM_NEG:
1193 src = ureg_negate(src);
1194 break;
1195 case NINED3DSPSM_BIAS:
1196 tmp = tx_scratch(tx);
1197 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1198 src = ureg_src(tmp);
1199 break;
1200 case NINED3DSPSM_BIASNEG:
1201 tmp = tx_scratch(tx);
1202 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1203 src = ureg_src(tmp);
1204 break;
1205 case NINED3DSPSM_NOT:
1206 if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1207 tmp = tx_scratch(tx);
1208 ureg_NOT(ureg, tmp, src);
1209 src = ureg_src(tmp);
1210 break;
1211 } else { /* predicate */
1212 tmp = tx_scratch(tx);
1213 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1214 src = ureg_src(tmp);
1215 }
1216 /* fall through */
1217 case NINED3DSPSM_COMP:
1218 tmp = tx_scratch(tx);
1219 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1220 src = ureg_src(tmp);
1221 break;
1222 case NINED3DSPSM_DZ:
1223 case NINED3DSPSM_DW:
1224 /* Already handled*/
1225 break;
1226 case NINED3DSPSM_SIGN:
1227 tmp = tx_scratch(tx);
1228 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1229 src = ureg_src(tmp);
1230 break;
1231 case NINED3DSPSM_SIGNNEG:
1232 tmp = tx_scratch(tx);
1233 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1234 src = ureg_src(tmp);
1235 break;
1236 case NINED3DSPSM_X2:
1237 tmp = tx_scratch(tx);
1238 ureg_ADD(ureg, tmp, src, src);
1239 src = ureg_src(tmp);
1240 break;
1241 case NINED3DSPSM_X2NEG:
1242 tmp = tx_scratch(tx);
1243 ureg_ADD(ureg, tmp, src, src);
1244 src = ureg_negate(ureg_src(tmp));
1245 break;
1246 default:
1247 assert(param->mod == NINED3DSPSM_NONE);
1248 break;
1249 }
1250
1251 return src;
1252 }
1253
1254 static struct ureg_dst
1255 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1256 {
1257 struct ureg_dst dst;
1258
1259 switch (param->file)
1260 {
1261 case D3DSPR_TEMP:
1262 assert(!param->rel);
1263 tx_temp_alloc(tx, param->idx);
1264 dst = tx->regs.r[param->idx];
1265 break;
1266 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1267 case D3DSPR_ADDR:
1268 assert(!param->rel);
1269 if (tx->version.major < 2 && !IS_VS) {
1270 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1271 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1272 dst = tx->regs.tS[param->idx];
1273 } else
1274 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1275 tx_texcoord_alloc(tx, param->idx);
1276 dst = ureg_dst(tx->regs.vT[param->idx]);
1277 } else {
1278 tx_addr_alloc(tx, param->idx);
1279 dst = tx->regs.a0;
1280 }
1281 break;
1282 case D3DSPR_RASTOUT:
1283 assert(!param->rel);
1284 switch (param->idx) {
1285 case 0:
1286 if (ureg_dst_is_undef(tx->regs.oPos))
1287 tx->regs.oPos =
1288 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1289 dst = tx->regs.oPos;
1290 break;
1291 case 1:
1292 if (ureg_dst_is_undef(tx->regs.oFog))
1293 tx->regs.oFog =
1294 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1295 dst = tx->regs.oFog;
1296 break;
1297 case 2:
1298 if (ureg_dst_is_undef(tx->regs.oPts))
1299 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1300 dst = tx->regs.oPts;
1301 break;
1302 default:
1303 assert(0);
1304 break;
1305 }
1306 break;
1307 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1308 case D3DSPR_OUTPUT:
1309 if (tx->version.major < 3) {
1310 assert(!param->rel);
1311 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1312 } else {
1313 assert(!param->rel); /* TODO */
1314 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1315 dst = tx->regs.o[param->idx];
1316 }
1317 break;
1318 case D3DSPR_ATTROUT: /* VS */
1319 case D3DSPR_COLOROUT: /* PS */
1320 assert(param->idx >= 0 && param->idx < 4);
1321 assert(!param->rel);
1322 tx->info->rt_mask |= 1 << param->idx;
1323 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1324 /* ps < 3: oCol[0] will have fog blending afterward */
1325 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1326 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1327 } else {
1328 tx->regs.oCol[param->idx] =
1329 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1330 }
1331 }
1332 dst = tx->regs.oCol[param->idx];
1333 if (IS_VS && tx->version.major < 3)
1334 dst = ureg_saturate(dst);
1335 break;
1336 case D3DSPR_DEPTHOUT:
1337 assert(!param->rel);
1338 if (ureg_dst_is_undef(tx->regs.oDepth))
1339 tx->regs.oDepth =
1340 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1341 TGSI_WRITEMASK_Z, 0, 1);
1342 dst = tx->regs.oDepth; /* XXX: must write .z component */
1343 break;
1344 case D3DSPR_PREDICATE:
1345 if (ureg_dst_is_undef(tx->regs.predicate))
1346 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1347 dst = tx->regs.predicate;
1348 break;
1349 case D3DSPR_TEMPFLOAT16:
1350 DBG("unhandled D3DSPR: %u\n", param->file);
1351 break;
1352 default:
1353 assert(!"invalid dst D3DSPR");
1354 break;
1355 }
1356 if (param->rel)
1357 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1358
1359 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1360 dst = ureg_writemask(dst, param->mask);
1361 if (param->mod & NINED3DSPDM_SATURATE)
1362 dst = ureg_saturate(dst);
1363
1364 if (tx->predicated_activated) {
1365 tx->regs.predicate_dst = dst;
1366 dst = tx->regs.predicate_tmp;
1367 }
1368
1369 return dst;
1370 }
1371
1372 static struct ureg_dst
1373 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1374 {
1375 if (param->shift) {
1376 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1377 return tx->regs.tdst;
1378 }
1379 return _tx_dst_param(tx, param);
1380 }
1381
1382 static void
1383 tx_apply_dst0_modifiers(struct shader_translator *tx)
1384 {
1385 struct ureg_dst rdst;
1386 float f;
1387
1388 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1389 return;
1390 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1391
1392 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1393
1394 if (tx->insn.dst[0].shift < 0)
1395 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1396 else
1397 f = 1 << tx->insn.dst[0].shift;
1398
1399 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1400 }
1401
1402 static struct ureg_src
1403 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1404 {
1405 struct ureg_src src;
1406
1407 assert(!param->shift);
1408 assert(!(param->mod & NINED3DSPDM_SATURATE));
1409
1410 switch (param->file) {
1411 case D3DSPR_INPUT:
1412 if (IS_VS) {
1413 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1414 } else {
1415 assert(!param->rel);
1416 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1417 src = tx->regs.v[param->idx];
1418 }
1419 break;
1420 default:
1421 src = ureg_src(tx_dst_param(tx, param));
1422 break;
1423 }
1424 if (param->rel)
1425 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1426
1427 if (!param->mask)
1428 WARN("mask is 0, using identity swizzle\n");
1429
1430 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1431 char s[4];
1432 int n;
1433 int c;
1434 for (n = 0, c = 0; c < 4; ++c)
1435 if (param->mask & (1 << c))
1436 s[n++] = c;
1437 assert(n);
1438 for (c = n; c < 4; ++c)
1439 s[c] = s[n - 1];
1440 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1441 }
1442 return src;
1443 }
1444
1445 static HRESULT
1446 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1447 {
1448 struct ureg_program *ureg = tx->ureg;
1449 struct ureg_dst dst;
1450 struct ureg_src src[2];
1451 struct sm1_src_param *src_mat = &tx->insn.src[1];
1452 unsigned i;
1453
1454 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1455 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1456
1457 for (i = 0; i < n; i++)
1458 {
1459 const unsigned m = (1 << i);
1460
1461 src[1] = tx_src_param(tx, src_mat);
1462 src_mat->idx++;
1463
1464 if (!(dst.WriteMask & m))
1465 continue;
1466
1467 /* XXX: src == dst case ? */
1468
1469 switch (k) {
1470 case 3:
1471 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1472 break;
1473 case 4:
1474 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1475 break;
1476 default:
1477 DBG("invalid operation: M%ux%u\n", m, n);
1478 break;
1479 }
1480 }
1481
1482 return D3D_OK;
1483 }
1484
1485 #define VNOTSUPPORTED 0, 0
1486 #define V(maj, min) (((maj) << 8) | (min))
1487
1488 static inline const char *
1489 d3dsio_to_string( unsigned opcode )
1490 {
1491 static const char *names[] = {
1492 "NOP",
1493 "MOV",
1494 "ADD",
1495 "SUB",
1496 "MAD",
1497 "MUL",
1498 "RCP",
1499 "RSQ",
1500 "DP3",
1501 "DP4",
1502 "MIN",
1503 "MAX",
1504 "SLT",
1505 "SGE",
1506 "EXP",
1507 "LOG",
1508 "LIT",
1509 "DST",
1510 "LRP",
1511 "FRC",
1512 "M4x4",
1513 "M4x3",
1514 "M3x4",
1515 "M3x3",
1516 "M3x2",
1517 "CALL",
1518 "CALLNZ",
1519 "LOOP",
1520 "RET",
1521 "ENDLOOP",
1522 "LABEL",
1523 "DCL",
1524 "POW",
1525 "CRS",
1526 "SGN",
1527 "ABS",
1528 "NRM",
1529 "SINCOS",
1530 "REP",
1531 "ENDREP",
1532 "IF",
1533 "IFC",
1534 "ELSE",
1535 "ENDIF",
1536 "BREAK",
1537 "BREAKC",
1538 "MOVA",
1539 "DEFB",
1540 "DEFI",
1541 NULL,
1542 NULL,
1543 NULL,
1544 NULL,
1545 NULL,
1546 NULL,
1547 NULL,
1548 NULL,
1549 NULL,
1550 NULL,
1551 NULL,
1552 NULL,
1553 NULL,
1554 NULL,
1555 NULL,
1556 "TEXCOORD",
1557 "TEXKILL",
1558 "TEX",
1559 "TEXBEM",
1560 "TEXBEML",
1561 "TEXREG2AR",
1562 "TEXREG2GB",
1563 "TEXM3x2PAD",
1564 "TEXM3x2TEX",
1565 "TEXM3x3PAD",
1566 "TEXM3x3TEX",
1567 NULL,
1568 "TEXM3x3SPEC",
1569 "TEXM3x3VSPEC",
1570 "EXPP",
1571 "LOGP",
1572 "CND",
1573 "DEF",
1574 "TEXREG2RGB",
1575 "TEXDP3TEX",
1576 "TEXM3x2DEPTH",
1577 "TEXDP3",
1578 "TEXM3x3",
1579 "TEXDEPTH",
1580 "CMP",
1581 "BEM",
1582 "DP2ADD",
1583 "DSX",
1584 "DSY",
1585 "TEXLDD",
1586 "SETP",
1587 "TEXLDL",
1588 "BREAKP"
1589 };
1590
1591 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1592
1593 switch (opcode) {
1594 case D3DSIO_PHASE: return "PHASE";
1595 case D3DSIO_COMMENT: return "COMMENT";
1596 case D3DSIO_END: return "END";
1597 default:
1598 return NULL;
1599 }
1600 }
1601
1602 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1603 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1604 (inst).vert_version.max | \
1605 (inst).frag_version.min | \
1606 (inst).frag_version.max)
1607
1608 #define SPECIAL(name) \
1609 NineTranslateInstruction_##name
1610
1611 #define DECL_SPECIAL(name) \
1612 static HRESULT \
1613 NineTranslateInstruction_##name( struct shader_translator *tx )
1614
1615 static HRESULT
1616 NineTranslateInstruction_Generic(struct shader_translator *);
1617
1618 DECL_SPECIAL(NOP)
1619 {
1620 /* Nothing to do. NOP was used to avoid hangs
1621 * with very old d3d drivers. */
1622 return D3D_OK;
1623 }
1624
1625 DECL_SPECIAL(SUB)
1626 {
1627 struct ureg_program *ureg = tx->ureg;
1628 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1629 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1630 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1631
1632 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1633 return D3D_OK;
1634 }
1635
1636 DECL_SPECIAL(ABS)
1637 {
1638 struct ureg_program *ureg = tx->ureg;
1639 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1640 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1641
1642 ureg_MOV(ureg, dst, ureg_abs(src));
1643 return D3D_OK;
1644 }
1645
1646 DECL_SPECIAL(XPD)
1647 {
1648 struct ureg_program *ureg = tx->ureg;
1649 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1650 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1651 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1652
1653 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1654 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1655 TGSI_SWIZZLE_X, 0),
1656 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1657 TGSI_SWIZZLE_Y, 0));
1658 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1659 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1660 TGSI_SWIZZLE_Y, 0),
1661 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1662 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1663 ureg_src(dst));
1664 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1665 ureg_imm1f(ureg, 1));
1666 return D3D_OK;
1667 }
1668
1669 DECL_SPECIAL(M4x4)
1670 {
1671 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1672 }
1673
1674 DECL_SPECIAL(M4x3)
1675 {
1676 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1677 }
1678
1679 DECL_SPECIAL(M3x4)
1680 {
1681 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1682 }
1683
1684 DECL_SPECIAL(M3x3)
1685 {
1686 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1687 }
1688
1689 DECL_SPECIAL(M3x2)
1690 {
1691 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1692 }
1693
1694 DECL_SPECIAL(CMP)
1695 {
1696 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1697 tx_src_param(tx, &tx->insn.src[0]),
1698 tx_src_param(tx, &tx->insn.src[2]),
1699 tx_src_param(tx, &tx->insn.src[1]));
1700 return D3D_OK;
1701 }
1702
1703 DECL_SPECIAL(CND)
1704 {
1705 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1706 struct ureg_dst cgt;
1707 struct ureg_src cnd;
1708
1709 /* the coissue flag was a tip for compilers to advise to
1710 * execute two operations at the same time, in cases
1711 * the two executions had same dst with different channels.
1712 * It has no effect on current hw. However it seems CND
1713 * is affected. The handling of this very specific case
1714 * handled below mimick wine behaviour */
1715 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1716 ureg_MOV(tx->ureg,
1717 dst, tx_src_param(tx, &tx->insn.src[1]));
1718 return D3D_OK;
1719 }
1720
1721 cnd = tx_src_param(tx, &tx->insn.src[0]);
1722 cgt = tx_scratch(tx);
1723
1724 if (tx->version.major == 1 && tx->version.minor < 4)
1725 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1726
1727 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1728
1729 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1730 tx_src_param(tx, &tx->insn.src[1]),
1731 tx_src_param(tx, &tx->insn.src[2]));
1732 return D3D_OK;
1733 }
1734
1735 DECL_SPECIAL(CALL)
1736 {
1737 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1738 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1739 return D3D_OK;
1740 }
1741
1742 DECL_SPECIAL(CALLNZ)
1743 {
1744 struct ureg_program *ureg = tx->ureg;
1745 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1746
1747 if (!tx->native_integers)
1748 ureg_IF(ureg, src, tx_cond(tx));
1749 else
1750 ureg_UIF(ureg, src, tx_cond(tx));
1751 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1752 tx_endcond(tx);
1753 ureg_ENDIF(ureg);
1754 return D3D_OK;
1755 }
1756
1757 DECL_SPECIAL(LOOP)
1758 {
1759 struct ureg_program *ureg = tx->ureg;
1760 unsigned *label;
1761 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1762 struct ureg_dst ctr;
1763 struct ureg_dst tmp;
1764 struct ureg_src ctrx;
1765
1766 label = tx_bgnloop(tx);
1767 ctr = tx_get_loopctr(tx, TRUE);
1768 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1769
1770 /* src: num_iterations - start_value of al - step for al - 0 */
1771 ureg_MOV(ureg, ctr, src);
1772 ureg_BGNLOOP(tx->ureg, label);
1773 tmp = tx_scratch_scalar(tx);
1774 /* Initially ctr.x contains the number of iterations.
1775 * ctr.y will contain the updated value of al.
1776 * We decrease ctr.x at the end of every iteration,
1777 * and stop when it reaches 0. */
1778
1779 if (!tx->native_integers) {
1780 /* case src and ctr contain floats */
1781 /* to avoid precision issue, we stop when ctr <= 0.5 */
1782 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1783 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1784 } else {
1785 /* case src and ctr contain integers */
1786 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1787 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1788 }
1789 ureg_BRK(ureg);
1790 tx_endcond(tx);
1791 ureg_ENDIF(ureg);
1792 return D3D_OK;
1793 }
1794
1795 DECL_SPECIAL(RET)
1796 {
1797 ureg_RET(tx->ureg);
1798 return D3D_OK;
1799 }
1800
1801 DECL_SPECIAL(ENDLOOP)
1802 {
1803 struct ureg_program *ureg = tx->ureg;
1804 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1805 struct ureg_dst dst_ctrx, dst_al;
1806 struct ureg_src src_ctr, al_counter;
1807
1808 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1809 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1810 src_ctr = ureg_src(ctr);
1811 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1812
1813 /* ctr.x -= 1
1814 * ctr.y (aL) += step */
1815 if (!tx->native_integers) {
1816 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1817 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1818 } else {
1819 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1820 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1821 }
1822 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1823 return D3D_OK;
1824 }
1825
1826 DECL_SPECIAL(LABEL)
1827 {
1828 unsigned k = tx->num_inst_labels;
1829 unsigned n = tx->insn.src[0].idx;
1830 assert(n < 2048);
1831 if (n >= k)
1832 tx->inst_labels = REALLOC(tx->inst_labels,
1833 k * sizeof(tx->inst_labels[0]),
1834 n * sizeof(tx->inst_labels[0]));
1835
1836 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1837 return D3D_OK;
1838 }
1839
1840 DECL_SPECIAL(SINCOS)
1841 {
1842 struct ureg_program *ureg = tx->ureg;
1843 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1844 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1845 struct ureg_dst tmp = tx_scratch_scalar(tx);
1846
1847 assert(!(dst.WriteMask & 0xc));
1848
1849 /* Copying to a temporary register avoids src/dst aliasing.
1850 * src is supposed to have replicated swizzle. */
1851 ureg_MOV(ureg, tmp, src);
1852
1853 /* z undefined, w untouched */
1854 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1855 tx_src_scalar(tmp));
1856 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1857 tx_src_scalar(tmp));
1858 return D3D_OK;
1859 }
1860
1861 DECL_SPECIAL(SGN)
1862 {
1863 ureg_SSG(tx->ureg,
1864 tx_dst_param(tx, &tx->insn.dst[0]),
1865 tx_src_param(tx, &tx->insn.src[0]));
1866 return D3D_OK;
1867 }
1868
1869 DECL_SPECIAL(REP)
1870 {
1871 struct ureg_program *ureg = tx->ureg;
1872 unsigned *label;
1873 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1874 struct ureg_dst ctr;
1875 struct ureg_dst tmp;
1876 struct ureg_src ctrx;
1877
1878 label = tx_bgnloop(tx);
1879 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1880 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1881
1882 /* NOTE: rep must be constant, so we don't have to save the count */
1883 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1884
1885 /* rep: num_iterations - 0 - 0 - 0 */
1886 ureg_MOV(ureg, ctr, rep);
1887 ureg_BGNLOOP(ureg, label);
1888 tmp = tx_scratch_scalar(tx);
1889 /* Initially ctr.x contains the number of iterations.
1890 * We decrease ctr.x at the end of every iteration,
1891 * and stop when it reaches 0. */
1892
1893 if (!tx->native_integers) {
1894 /* case src and ctr contain floats */
1895 /* to avoid precision issue, we stop when ctr <= 0.5 */
1896 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1897 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1898 } else {
1899 /* case src and ctr contain integers */
1900 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1901 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1902 }
1903 ureg_BRK(ureg);
1904 tx_endcond(tx);
1905 ureg_ENDIF(ureg);
1906
1907 return D3D_OK;
1908 }
1909
1910 DECL_SPECIAL(ENDREP)
1911 {
1912 struct ureg_program *ureg = tx->ureg;
1913 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1914 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1915 struct ureg_src src_ctr = ureg_src(ctr);
1916
1917 /* ctr.x -= 1 */
1918 if (!tx->native_integers)
1919 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1920 else
1921 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1922
1923 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1924 return D3D_OK;
1925 }
1926
1927 DECL_SPECIAL(ENDIF)
1928 {
1929 tx_endcond(tx);
1930 ureg_ENDIF(tx->ureg);
1931 return D3D_OK;
1932 }
1933
1934 DECL_SPECIAL(IF)
1935 {
1936 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1937
1938 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1939 ureg_UIF(tx->ureg, src, tx_cond(tx));
1940 else
1941 ureg_IF(tx->ureg, src, tx_cond(tx));
1942
1943 return D3D_OK;
1944 }
1945
1946 static inline unsigned
1947 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1948 {
1949 switch (flags) {
1950 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1951 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1952 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1953 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1954 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1955 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1956 default:
1957 assert(!"invalid comparison flags");
1958 return TGSI_OPCODE_SGT;
1959 }
1960 }
1961
1962 DECL_SPECIAL(IFC)
1963 {
1964 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1965 struct ureg_src src[2];
1966 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1967 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1968 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1969 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1970 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1971 return D3D_OK;
1972 }
1973
1974 DECL_SPECIAL(ELSE)
1975 {
1976 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1977 return D3D_OK;
1978 }
1979
1980 DECL_SPECIAL(BREAKC)
1981 {
1982 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1983 struct ureg_src src[2];
1984 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1985 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1986 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1987 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1988 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1989 ureg_BRK(tx->ureg);
1990 tx_endcond(tx);
1991 ureg_ENDIF(tx->ureg);
1992 return D3D_OK;
1993 }
1994
1995 static const char *sm1_declusage_names[] =
1996 {
1997 [D3DDECLUSAGE_POSITION] = "POSITION",
1998 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1999 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2000 [D3DDECLUSAGE_NORMAL] = "NORMAL",
2001 [D3DDECLUSAGE_PSIZE] = "PSIZE",
2002 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2003 [D3DDECLUSAGE_TANGENT] = "TANGENT",
2004 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2005 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2006 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2007 [D3DDECLUSAGE_COLOR] = "COLOR",
2008 [D3DDECLUSAGE_FOG] = "FOG",
2009 [D3DDECLUSAGE_DEPTH] = "DEPTH",
2010 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2011 };
2012
2013 static inline unsigned
2014 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2015 {
2016 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2017 }
2018
2019 static void
2020 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2021 boolean tc,
2022 struct sm1_semantic *dcl)
2023 {
2024 BYTE index = dcl->usage_idx;
2025
2026 /* For everything that is not matching to a TGSI_SEMANTIC_****,
2027 * we match to a TGSI_SEMANTIC_GENERIC with index.
2028 *
2029 * The index can be anything UINT16 and usage_idx is BYTE,
2030 * so we can fit everything. It doesn't matter if indices
2031 * are close together or low.
2032 *
2033 *
2034 * POSITION >= 1: 10 * index + 6
2035 * COLOR >= 2: 10 * (index-1) + 7
2036 * TEXCOORD[0..15]: index
2037 * BLENDWEIGHT: 10 * index + 18
2038 * BLENDINDICES: 10 * index + 19
2039 * NORMAL: 10 * index + 20
2040 * TANGENT: 10 * index + 21
2041 * BINORMAL: 10 * index + 22
2042 * TESSFACTOR: 10 * index + 23
2043 */
2044
2045 switch (dcl->usage) {
2046 case D3DDECLUSAGE_POSITION:
2047 case D3DDECLUSAGE_POSITIONT:
2048 case D3DDECLUSAGE_DEPTH:
2049 if (index == 0) {
2050 sem->Name = TGSI_SEMANTIC_POSITION;
2051 sem->Index = 0;
2052 } else {
2053 sem->Name = TGSI_SEMANTIC_GENERIC;
2054 sem->Index = 10 * index + 6;
2055 }
2056 break;
2057 case D3DDECLUSAGE_COLOR:
2058 if (index < 2) {
2059 sem->Name = TGSI_SEMANTIC_COLOR;
2060 sem->Index = index;
2061 } else {
2062 sem->Name = TGSI_SEMANTIC_GENERIC;
2063 sem->Index = 10 * (index-1) + 7;
2064 }
2065 break;
2066 case D3DDECLUSAGE_FOG:
2067 assert(index == 0);
2068 sem->Name = TGSI_SEMANTIC_FOG;
2069 sem->Index = 0;
2070 break;
2071 case D3DDECLUSAGE_PSIZE:
2072 assert(index == 0);
2073 sem->Name = TGSI_SEMANTIC_PSIZE;
2074 sem->Index = 0;
2075 break;
2076 case D3DDECLUSAGE_TEXCOORD:
2077 assert(index < 16);
2078 if (index < 8 && tc)
2079 sem->Name = TGSI_SEMANTIC_TEXCOORD;
2080 else
2081 sem->Name = TGSI_SEMANTIC_GENERIC;
2082 sem->Index = index;
2083 break;
2084 case D3DDECLUSAGE_BLENDWEIGHT:
2085 sem->Name = TGSI_SEMANTIC_GENERIC;
2086 sem->Index = 10 * index + 18;
2087 break;
2088 case D3DDECLUSAGE_BLENDINDICES:
2089 sem->Name = TGSI_SEMANTIC_GENERIC;
2090 sem->Index = 10 * index + 19;
2091 break;
2092 case D3DDECLUSAGE_NORMAL:
2093 sem->Name = TGSI_SEMANTIC_GENERIC;
2094 sem->Index = 10 * index + 20;
2095 break;
2096 case D3DDECLUSAGE_TANGENT:
2097 sem->Name = TGSI_SEMANTIC_GENERIC;
2098 sem->Index = 10 * index + 21;
2099 break;
2100 case D3DDECLUSAGE_BINORMAL:
2101 sem->Name = TGSI_SEMANTIC_GENERIC;
2102 sem->Index = 10 * index + 22;
2103 break;
2104 case D3DDECLUSAGE_TESSFACTOR:
2105 sem->Name = TGSI_SEMANTIC_GENERIC;
2106 sem->Index = 10 * index + 23;
2107 break;
2108 case D3DDECLUSAGE_SAMPLE:
2109 sem->Name = TGSI_SEMANTIC_COUNT;
2110 sem->Index = 0;
2111 break;
2112 default:
2113 unreachable("Invalid DECLUSAGE.");
2114 break;
2115 }
2116 }
2117
2118 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2119 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2120 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2121 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2122 static inline unsigned
2123 d3dstt_to_tgsi_tex(BYTE sampler_type)
2124 {
2125 switch (sampler_type) {
2126 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2127 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2128 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2129 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2130 default:
2131 assert(0);
2132 return TGSI_TEXTURE_UNKNOWN;
2133 }
2134 }
2135 static inline unsigned
2136 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2137 {
2138 switch (sampler_type) {
2139 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2140 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2141 case NINED3DSTT_VOLUME:
2142 case NINED3DSTT_CUBE:
2143 default:
2144 assert(0);
2145 return TGSI_TEXTURE_UNKNOWN;
2146 }
2147 }
2148 static inline unsigned
2149 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2150 {
2151 boolean shadow = !!(info->sampler_mask_shadow & (1 << stage));
2152 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2153 case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2154 case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2155 case 3: return TGSI_TEXTURE_3D;
2156 default:
2157 return TGSI_TEXTURE_CUBE;
2158 }
2159 }
2160
2161 static const char *
2162 sm1_sampler_type_name(BYTE sampler_type)
2163 {
2164 switch (sampler_type) {
2165 case NINED3DSTT_1D: return "1D";
2166 case NINED3DSTT_2D: return "2D";
2167 case NINED3DSTT_VOLUME: return "VOLUME";
2168 case NINED3DSTT_CUBE: return "CUBE";
2169 default:
2170 return "(D3DSTT_?)";
2171 }
2172 }
2173
2174 static inline unsigned
2175 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2176 {
2177 switch (sem->Name) {
2178 case TGSI_SEMANTIC_POSITION:
2179 case TGSI_SEMANTIC_NORMAL:
2180 return TGSI_INTERPOLATE_LINEAR;
2181 case TGSI_SEMANTIC_BCOLOR:
2182 case TGSI_SEMANTIC_COLOR:
2183 return TGSI_INTERPOLATE_COLOR;
2184 case TGSI_SEMANTIC_FOG:
2185 case TGSI_SEMANTIC_GENERIC:
2186 case TGSI_SEMANTIC_TEXCOORD:
2187 case TGSI_SEMANTIC_CLIPDIST:
2188 case TGSI_SEMANTIC_CLIPVERTEX:
2189 return TGSI_INTERPOLATE_PERSPECTIVE;
2190 case TGSI_SEMANTIC_EDGEFLAG:
2191 case TGSI_SEMANTIC_FACE:
2192 case TGSI_SEMANTIC_INSTANCEID:
2193 case TGSI_SEMANTIC_PCOORD:
2194 case TGSI_SEMANTIC_PRIMID:
2195 case TGSI_SEMANTIC_PSIZE:
2196 case TGSI_SEMANTIC_VERTEXID:
2197 return TGSI_INTERPOLATE_CONSTANT;
2198 default:
2199 assert(0);
2200 return TGSI_INTERPOLATE_CONSTANT;
2201 }
2202 }
2203
2204 DECL_SPECIAL(DCL)
2205 {
2206 struct ureg_program *ureg = tx->ureg;
2207 boolean is_input;
2208 boolean is_sampler;
2209 struct tgsi_declaration_semantic tgsi;
2210 struct sm1_semantic sem;
2211 sm1_read_semantic(tx, &sem);
2212
2213 is_input = sem.reg.file == D3DSPR_INPUT;
2214 is_sampler =
2215 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2216
2217 DUMP("DCL ");
2218 sm1_dump_dst_param(&sem.reg);
2219 if (is_sampler)
2220 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2221 else
2222 if (tx->version.major >= 3)
2223 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2224 else
2225 if (sem.usage | sem.usage_idx)
2226 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2227 else
2228 DUMP("\n");
2229
2230 if (is_sampler) {
2231 const unsigned m = 1 << sem.reg.idx;
2232 ureg_DECL_sampler(ureg, sem.reg.idx);
2233 tx->info->sampler_mask |= m;
2234 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2235 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2236 d3dstt_to_tgsi_tex(sem.sampler_type);
2237 return D3D_OK;
2238 }
2239
2240 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2241 if (IS_VS) {
2242 if (is_input) {
2243 /* linkage outside of shader with vertex declaration */
2244 ureg_DECL_vs_input(ureg, sem.reg.idx);
2245 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2246 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2247 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2248 /* NOTE: preserving order in case of indirect access */
2249 } else
2250 if (tx->version.major >= 3) {
2251 /* SM2 output semantic determined by file */
2252 assert(sem.reg.mask != 0);
2253 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2254 tx->info->position_t = TRUE;
2255 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2256 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2257 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2258 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2259 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2260 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2261 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2262 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2263 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2264 }
2265
2266 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2267 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2268 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2269 }
2270 }
2271 } else {
2272 if (is_input && tx->version.major >= 3) {
2273 unsigned interp_location = 0;
2274 /* SM3 only, SM2 input semantic determined by file */
2275 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2276 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2277 /* PositionT and tessfactor forbidden */
2278 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2279 return D3DERR_INVALIDCALL;
2280
2281 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2282 /* Position0 is forbidden (likely because vPos already does that) */
2283 if (sem.usage == D3DDECLUSAGE_POSITION)
2284 return D3DERR_INVALIDCALL;
2285 /* Following code is for depth */
2286 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2287 return D3D_OK;
2288 }
2289
2290 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2291 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2292 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2293
2294 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2295 ureg, tgsi.Name, tgsi.Index,
2296 nine_tgsi_to_interp_mode(&tgsi),
2297 0, /* cylwrap */
2298 interp_location, 0, 1);
2299 } else
2300 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2301 /* FragColor or FragDepth */
2302 assert(sem.reg.mask != 0);
2303 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2304 0, 1);
2305 }
2306 }
2307 return D3D_OK;
2308 }
2309
2310 DECL_SPECIAL(DEF)
2311 {
2312 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2313 return D3D_OK;
2314 }
2315
2316 DECL_SPECIAL(DEFB)
2317 {
2318 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2319 return D3D_OK;
2320 }
2321
2322 DECL_SPECIAL(DEFI)
2323 {
2324 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2325 return D3D_OK;
2326 }
2327
2328 DECL_SPECIAL(POW)
2329 {
2330 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2331 struct ureg_src src[2] = {
2332 tx_src_param(tx, &tx->insn.src[0]),
2333 tx_src_param(tx, &tx->insn.src[1])
2334 };
2335 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2336 return D3D_OK;
2337 }
2338
2339 /* Tests results on Win 10:
2340 * NV (NVIDIA GeForce GT 635M)
2341 * AMD (AMD Radeon HD 7730M)
2342 * INTEL (Intel(R) HD Graphics 4000)
2343 * PS2 and PS3:
2344 * RCP and RSQ can generate inf on NV and AMD.
2345 * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2346 * NV: log not clamped
2347 * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2348 * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2349 * All devices have 0*anything = 0
2350 *
2351 * INTEL VS2 and VS3: same behaviour.
2352 * Some differences VS2 and VS3 for constants defined with inf/NaN.
2353 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2354 * VS2 seems to clamp to zero (may be test failure).
2355 * AMD VS2: unknown, VS3: very likely behaviour of PS3
2356 * NV VS2 and VS3: very likely behaviour of PS3
2357 * For both, Inf in VS becomes NaN is PS
2358 * "Very likely" because the test was less extensive.
2359 *
2360 * Thus all clamping can be removed for shaders 2 and 3,
2361 * as long as 0*anything = 0.
2362 * Else clamps to enforce 0*anything = 0 (anything being then
2363 * neither inf or NaN, the user being unlikely to pass them
2364 * as constant).
2365 * The status for VS1 and PS1 is unknown.
2366 */
2367
2368 DECL_SPECIAL(RCP)
2369 {
2370 struct ureg_program *ureg = tx->ureg;
2371 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2372 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2373 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2374 ureg_RCP(ureg, tmp, src);
2375 if (!tx->mul_zero_wins) {
2376 /* FLT_MAX has issues with Rayman */
2377 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2378 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2379 }
2380 return D3D_OK;
2381 }
2382
2383 DECL_SPECIAL(RSQ)
2384 {
2385 struct ureg_program *ureg = tx->ureg;
2386 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2387 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2388 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2389 ureg_RSQ(ureg, tmp, ureg_abs(src));
2390 if (!tx->mul_zero_wins)
2391 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2392 return D3D_OK;
2393 }
2394
2395 DECL_SPECIAL(LOG)
2396 {
2397 struct ureg_program *ureg = tx->ureg;
2398 struct ureg_dst tmp = tx_scratch_scalar(tx);
2399 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2400 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2401 ureg_LG2(ureg, tmp, ureg_abs(src));
2402 if (tx->mul_zero_wins) {
2403 ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2404 } else {
2405 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2406 }
2407 return D3D_OK;
2408 }
2409
2410 DECL_SPECIAL(LIT)
2411 {
2412 struct ureg_program *ureg = tx->ureg;
2413 struct ureg_dst tmp = tx_scratch(tx);
2414 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2415 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2416 ureg_LIT(ureg, tmp, src);
2417 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2418 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2419 * it 0^0 if src.w=0, which value is driver dependent. */
2420 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2421 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2422 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2423 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2424 return D3D_OK;
2425 }
2426
2427 DECL_SPECIAL(NRM)
2428 {
2429 struct ureg_program *ureg = tx->ureg;
2430 struct ureg_dst tmp = tx_scratch_scalar(tx);
2431 struct ureg_src nrm = tx_src_scalar(tmp);
2432 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2433 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2434 ureg_DP3(ureg, tmp, src, src);
2435 ureg_RSQ(ureg, tmp, nrm);
2436 if (!tx->mul_zero_wins)
2437 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2438 ureg_MUL(ureg, dst, src, nrm);
2439 return D3D_OK;
2440 }
2441
2442 DECL_SPECIAL(DP2ADD)
2443 {
2444 struct ureg_dst tmp = tx_scratch_scalar(tx);
2445 struct ureg_src dp2 = tx_src_scalar(tmp);
2446 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2447 struct ureg_src src[3];
2448 int i;
2449 for (i = 0; i < 3; ++i)
2450 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2451 assert_replicate_swizzle(&src[2]);
2452
2453 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2454 ureg_ADD(tx->ureg, dst, src[2], dp2);
2455
2456 return D3D_OK;
2457 }
2458
2459 DECL_SPECIAL(TEXCOORD)
2460 {
2461 struct ureg_program *ureg = tx->ureg;
2462 const unsigned s = tx->insn.dst[0].idx;
2463 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2464
2465 tx_texcoord_alloc(tx, s);
2466 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2467 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2468
2469 return D3D_OK;
2470 }
2471
2472 DECL_SPECIAL(TEXCOORD_ps14)
2473 {
2474 struct ureg_program *ureg = tx->ureg;
2475 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2476 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2477
2478 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2479
2480 ureg_MOV(ureg, dst, src);
2481
2482 return D3D_OK;
2483 }
2484
2485 DECL_SPECIAL(TEXKILL)
2486 {
2487 struct ureg_src reg;
2488
2489 if (tx->version.major > 1 || tx->version.minor > 3) {
2490 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2491 } else {
2492 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2493 reg = tx->regs.vT[tx->insn.dst[0].idx];
2494 }
2495 if (tx->version.major < 2)
2496 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2497 ureg_KILL_IF(tx->ureg, reg);
2498
2499 return D3D_OK;
2500 }
2501
2502 DECL_SPECIAL(TEXBEM)
2503 {
2504 struct ureg_program *ureg = tx->ureg;
2505 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2506 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2507 struct ureg_dst tmp, tmp2, texcoord;
2508 struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2509 struct ureg_src bumpenvlscale, bumpenvloffset;
2510 const int m = tx->insn.dst[0].idx;
2511
2512 assert(tx->version.major == 1);
2513
2514 sample = ureg_DECL_sampler(ureg, m);
2515 tx->info->sampler_mask |= 1 << m;
2516
2517 tx_texcoord_alloc(tx, m);
2518
2519 tmp = tx_scratch(tx);
2520 tmp2 = tx_scratch(tx);
2521 texcoord = tx_scratch(tx);
2522 /*
2523 * Bump-env-matrix:
2524 * 00 is X
2525 * 01 is Y
2526 * 10 is Z
2527 * 11 is W
2528 */
2529 c8m = nine_float_constant_src(tx, 8+m);
2530 c16m2 = nine_float_constant_src(tx, 8+8+m/2);
2531
2532 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2533 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2534 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2535 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2536
2537 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2538 if (m % 2 == 0) {
2539 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2540 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2541 } else {
2542 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2543 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2544 }
2545
2546 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2547
2548 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2549 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2550 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2551 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2552 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2553 NINE_APPLY_SWIZZLE(src, Y),
2554 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2555
2556 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2557 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2558 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2559 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2560 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2561 NINE_APPLY_SWIZZLE(src, Y),
2562 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2563
2564 /* Now the texture coordinates are in tmp.xy */
2565
2566 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2567 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2568 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2569 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2570 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2571 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2572 bumpenvlscale, bumpenvloffset);
2573 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2574 }
2575
2576 tx->info->bumpenvmat_needed = 1;
2577
2578 return D3D_OK;
2579 }
2580
2581 DECL_SPECIAL(TEXREG2AR)
2582 {
2583 struct ureg_program *ureg = tx->ureg;
2584 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2585 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2586 struct ureg_src sample;
2587 const int m = tx->insn.dst[0].idx;
2588 const int n = tx->insn.src[0].idx;
2589 assert(m >= 0 && m > n);
2590
2591 sample = ureg_DECL_sampler(ureg, m);
2592 tx->info->sampler_mask |= 1 << m;
2593 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2594
2595 return D3D_OK;
2596 }
2597
2598 DECL_SPECIAL(TEXREG2GB)
2599 {
2600 struct ureg_program *ureg = tx->ureg;
2601 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2602 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2603 struct ureg_src sample;
2604 const int m = tx->insn.dst[0].idx;
2605 const int n = tx->insn.src[0].idx;
2606 assert(m >= 0 && m > n);
2607
2608 sample = ureg_DECL_sampler(ureg, m);
2609 tx->info->sampler_mask |= 1 << m;
2610 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2611
2612 return D3D_OK;
2613 }
2614
2615 DECL_SPECIAL(TEXM3x2PAD)
2616 {
2617 return D3D_OK; /* this is just padding */
2618 }
2619
2620 DECL_SPECIAL(TEXM3x2TEX)
2621 {
2622 struct ureg_program *ureg = tx->ureg;
2623 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2624 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2625 struct ureg_src sample;
2626 const int m = tx->insn.dst[0].idx - 1;
2627 const int n = tx->insn.src[0].idx;
2628 assert(m >= 0 && m > n);
2629
2630 tx_texcoord_alloc(tx, m);
2631 tx_texcoord_alloc(tx, m+1);
2632
2633 /* performs the matrix multiplication */
2634 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2635 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2636
2637 sample = ureg_DECL_sampler(ureg, m + 1);
2638 tx->info->sampler_mask |= 1 << (m + 1);
2639 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2640
2641 return D3D_OK;
2642 }
2643
2644 DECL_SPECIAL(TEXM3x3PAD)
2645 {
2646 return D3D_OK; /* this is just padding */
2647 }
2648
2649 DECL_SPECIAL(TEXM3x3SPEC)
2650 {
2651 struct ureg_program *ureg = tx->ureg;
2652 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2653 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2654 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2655 struct ureg_src sample;
2656 struct ureg_dst tmp;
2657 const int m = tx->insn.dst[0].idx - 2;
2658 const int n = tx->insn.src[0].idx;
2659 assert(m >= 0 && m > n);
2660
2661 tx_texcoord_alloc(tx, m);
2662 tx_texcoord_alloc(tx, m+1);
2663 tx_texcoord_alloc(tx, m+2);
2664
2665 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2666 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2667 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2668
2669 sample = ureg_DECL_sampler(ureg, m + 2);
2670 tx->info->sampler_mask |= 1 << (m + 2);
2671 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2672
2673 /* At this step, dst = N = (u', w', z').
2674 * We want dst to be the texture sampled at (u'', w'', z''), with
2675 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2676 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2677 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2678 /* at this step tmp.x = 1/N.N */
2679 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2680 /* at this step tmp.y = N.E */
2681 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2682 /* at this step tmp.x = N.E/N.N */
2683 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2684 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2685 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2686 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2687 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2688
2689 return D3D_OK;
2690 }
2691
2692 DECL_SPECIAL(TEXREG2RGB)
2693 {
2694 struct ureg_program *ureg = tx->ureg;
2695 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2696 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2697 struct ureg_src sample;
2698 const int m = tx->insn.dst[0].idx;
2699 const int n = tx->insn.src[0].idx;
2700 assert(m >= 0 && m > n);
2701
2702 sample = ureg_DECL_sampler(ureg, m);
2703 tx->info->sampler_mask |= 1 << m;
2704 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2705
2706 return D3D_OK;
2707 }
2708
2709 DECL_SPECIAL(TEXDP3TEX)
2710 {
2711 struct ureg_program *ureg = tx->ureg;
2712 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2713 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2714 struct ureg_dst tmp;
2715 struct ureg_src sample;
2716 const int m = tx->insn.dst[0].idx;
2717 const int n = tx->insn.src[0].idx;
2718 assert(m >= 0 && m > n);
2719
2720 tx_texcoord_alloc(tx, m);
2721
2722 tmp = tx_scratch(tx);
2723 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2724 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2725
2726 sample = ureg_DECL_sampler(ureg, m);
2727 tx->info->sampler_mask |= 1 << m;
2728 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2729
2730 return D3D_OK;
2731 }
2732
2733 DECL_SPECIAL(TEXM3x2DEPTH)
2734 {
2735 struct ureg_program *ureg = tx->ureg;
2736 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2737 struct ureg_dst tmp;
2738 const int m = tx->insn.dst[0].idx - 1;
2739 const int n = tx->insn.src[0].idx;
2740 assert(m >= 0 && m > n);
2741
2742 tx_texcoord_alloc(tx, m);
2743 tx_texcoord_alloc(tx, m+1);
2744
2745 tmp = tx_scratch(tx);
2746
2747 /* performs the matrix multiplication */
2748 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2749 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2750
2751 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2752 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2753 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2754 /* res = 'w' == 0 ? 1.0 : z/w */
2755 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2756 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2757 /* replace the depth for depth testing with the result */
2758 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2759 TGSI_WRITEMASK_Z, 0, 1);
2760 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2761 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2762 return D3D_OK;
2763 }
2764
2765 DECL_SPECIAL(TEXDP3)
2766 {
2767 struct ureg_program *ureg = tx->ureg;
2768 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2769 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2770 const int m = tx->insn.dst[0].idx;
2771 const int n = tx->insn.src[0].idx;
2772 assert(m >= 0 && m > n);
2773
2774 tx_texcoord_alloc(tx, m);
2775
2776 ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2777
2778 return D3D_OK;
2779 }
2780
2781 DECL_SPECIAL(TEXM3x3)
2782 {
2783 struct ureg_program *ureg = tx->ureg;
2784 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2785 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2786 struct ureg_src sample;
2787 struct ureg_dst E, tmp;
2788 const int m = tx->insn.dst[0].idx - 2;
2789 const int n = tx->insn.src[0].idx;
2790 assert(m >= 0 && m > n);
2791
2792 tx_texcoord_alloc(tx, m);
2793 tx_texcoord_alloc(tx, m+1);
2794 tx_texcoord_alloc(tx, m+2);
2795
2796 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2797 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2798 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2799
2800 switch (tx->insn.opcode) {
2801 case D3DSIO_TEXM3x3:
2802 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2803 break;
2804 case D3DSIO_TEXM3x3TEX:
2805 sample = ureg_DECL_sampler(ureg, m + 2);
2806 tx->info->sampler_mask |= 1 << (m + 2);
2807 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2808 break;
2809 case D3DSIO_TEXM3x3VSPEC:
2810 sample = ureg_DECL_sampler(ureg, m + 2);
2811 tx->info->sampler_mask |= 1 << (m + 2);
2812 E = tx_scratch(tx);
2813 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2814 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2815 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2816 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2817 /* At this step, dst = N = (u', w', z').
2818 * We want dst to be the texture sampled at (u'', w'', z''), with
2819 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2820 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2821 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2822 /* at this step tmp.x = 1/N.N */
2823 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2824 /* at this step tmp.y = N.E */
2825 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2826 /* at this step tmp.x = N.E/N.N */
2827 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2828 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2829 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2830 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2831 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2832 break;
2833 default:
2834 return D3DERR_INVALIDCALL;
2835 }
2836 return D3D_OK;
2837 }
2838
2839 DECL_SPECIAL(TEXDEPTH)
2840 {
2841 struct ureg_program *ureg = tx->ureg;
2842 struct ureg_dst r5;
2843 struct ureg_src r5r, r5g;
2844
2845 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2846
2847 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2848 * r5 won't be used afterward, thus we can use r5.ba */
2849 r5 = tx->regs.r[5];
2850 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2851 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2852
2853 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2854 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2855 /* r5.r = r/g */
2856 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2857 r5r, ureg_imm1f(ureg, 1.0f));
2858 /* replace the depth for depth testing with the result */
2859 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2860 TGSI_WRITEMASK_Z, 0, 1);
2861 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2862
2863 return D3D_OK;
2864 }
2865
2866 DECL_SPECIAL(BEM)
2867 {
2868 struct ureg_program *ureg = tx->ureg;
2869 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2870 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2871 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2872 struct ureg_src m00, m01, m10, m11, c8m;
2873 const int m = tx->insn.dst[0].idx;
2874 struct ureg_dst tmp;
2875 /*
2876 * Bump-env-matrix:
2877 * 00 is X
2878 * 01 is Y
2879 * 10 is Z
2880 * 11 is W
2881 */
2882 c8m = nine_float_constant_src(tx, 8+m);
2883 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2884 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2885 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2886 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2887 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2888 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2889 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2890 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2891 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2892 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2893
2894 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2895 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2896 NINE_APPLY_SWIZZLE(src1, X), src0);
2897 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2898 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2899 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2900 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2901
2902 tx->info->bumpenvmat_needed = 1;
2903
2904 return D3D_OK;
2905 }
2906
2907 DECL_SPECIAL(TEXLD)
2908 {
2909 struct ureg_program *ureg = tx->ureg;
2910 unsigned target;
2911 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2912 struct ureg_src src[2] = {
2913 tx_src_param(tx, &tx->insn.src[0]),
2914 tx_src_param(tx, &tx->insn.src[1])
2915 };
2916 assert(tx->insn.src[1].idx >= 0 &&
2917 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2918 target = tx->sampler_targets[tx->insn.src[1].idx];
2919
2920 switch (tx->insn.flags) {
2921 case 0:
2922 ureg_TEX(ureg, dst, target, src[0], src[1]);
2923 break;
2924 case NINED3DSI_TEXLD_PROJECT:
2925 ureg_TXP(ureg, dst, target, src[0], src[1]);
2926 break;
2927 case NINED3DSI_TEXLD_BIAS:
2928 ureg_TXB(ureg, dst, target, src[0], src[1]);
2929 break;
2930 default:
2931 assert(0);
2932 return D3DERR_INVALIDCALL;
2933 }
2934 return D3D_OK;
2935 }
2936
2937 DECL_SPECIAL(TEXLD_14)
2938 {
2939 struct ureg_program *ureg = tx->ureg;
2940 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2941 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2942 const unsigned s = tx->insn.dst[0].idx;
2943 const unsigned t = ps1x_sampler_type(tx->info, s);
2944
2945 tx->info->sampler_mask |= 1 << s;
2946 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2947
2948 return D3D_OK;
2949 }
2950
2951 DECL_SPECIAL(TEX)
2952 {
2953 struct ureg_program *ureg = tx->ureg;
2954 const unsigned s = tx->insn.dst[0].idx;
2955 const unsigned t = ps1x_sampler_type(tx->info, s);
2956 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2957 struct ureg_src src[2];
2958
2959 tx_texcoord_alloc(tx, s);
2960
2961 src[0] = tx->regs.vT[s];
2962 src[1] = ureg_DECL_sampler(ureg, s);
2963 tx->info->sampler_mask |= 1 << s;
2964
2965 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2966
2967 return D3D_OK;
2968 }
2969
2970 DECL_SPECIAL(TEXLDD)
2971 {
2972 unsigned target;
2973 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2974 struct ureg_src src[4] = {
2975 tx_src_param(tx, &tx->insn.src[0]),
2976 tx_src_param(tx, &tx->insn.src[1]),
2977 tx_src_param(tx, &tx->insn.src[2]),
2978 tx_src_param(tx, &tx->insn.src[3])
2979 };
2980 assert(tx->insn.src[1].idx >= 0 &&
2981 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2982 target = tx->sampler_targets[tx->insn.src[1].idx];
2983
2984 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2985 return D3D_OK;
2986 }
2987
2988 DECL_SPECIAL(TEXLDL)
2989 {
2990 unsigned target;
2991 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2992 struct ureg_src src[2] = {
2993 tx_src_param(tx, &tx->insn.src[0]),
2994 tx_src_param(tx, &tx->insn.src[1])
2995 };
2996 assert(tx->insn.src[1].idx >= 0 &&
2997 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2998 target = tx->sampler_targets[tx->insn.src[1].idx];
2999
3000 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3001 return D3D_OK;
3002 }
3003
3004 DECL_SPECIAL(SETP)
3005 {
3006 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3007 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3008 struct ureg_src src[2] = {
3009 tx_src_param(tx, &tx->insn.src[0]),
3010 tx_src_param(tx, &tx->insn.src[1])
3011 };
3012 ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3013 return D3D_OK;
3014 }
3015
3016 DECL_SPECIAL(BREAKP)
3017 {
3018 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3019 ureg_IF(tx->ureg, src, tx_cond(tx));
3020 ureg_BRK(tx->ureg);
3021 tx_endcond(tx);
3022 ureg_ENDIF(tx->ureg);
3023 return D3D_OK;
3024 }
3025
3026 DECL_SPECIAL(PHASE)
3027 {
3028 return D3D_OK; /* we don't care about phase */
3029 }
3030
3031 DECL_SPECIAL(COMMENT)
3032 {
3033 return D3D_OK; /* nothing to do */
3034 }
3035
3036
3037 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3038 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3039
3040 static const struct sm1_op_info inst_table[] =
3041 {
3042 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3043 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3044 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3045 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3046 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3047 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3048 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3049 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3050 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3051 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3052 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3053 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3054 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3055 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3056 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3057 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3058 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3059 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3060 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3061 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3062
3063 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3064 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3065 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3066 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3067 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3068
3069 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3070 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3071 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3072 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3073 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3074 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3075
3076 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3077
3078 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3079 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3080 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3081 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3082 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3083
3084 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3085 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3086
3087 /* More flow control */
3088 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3089 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3090 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3091 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3092 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3093 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3094 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3095 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3096 /* we don't write to the address register, but a normal register (copied
3097 * when needed to the address register), thus we don't use ARR */
3098 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3099
3100 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3101 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3102
3103 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3104 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3105 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3106 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3107 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3108 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3109 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3110 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3111 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3112 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3113 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3114 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3115 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3116 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3117 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3118 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3119
3120 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3121 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3122 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3123 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3124
3125 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3126
3127 /* More tex stuff */
3128 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3129 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3130 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3131 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3132 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3133 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3134
3135 /* Misc */
3136 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3137 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3138 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3139 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3140 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3141 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3142 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3143 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3144 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3145 };
3146
3147 static const struct sm1_op_info inst_phase =
3148 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3149
3150 static const struct sm1_op_info inst_comment =
3151 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3152
3153 static void
3154 create_op_info_map(struct shader_translator *tx)
3155 {
3156 const unsigned version = (tx->version.major << 8) | tx->version.minor;
3157 unsigned i;
3158
3159 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3160 tx->op_info_map[i] = -1;
3161
3162 if (tx->processor == PIPE_SHADER_VERTEX) {
3163 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3164 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3165 if (inst_table[i].vert_version.min <= version &&
3166 inst_table[i].vert_version.max >= version)
3167 tx->op_info_map[inst_table[i].sio] = i;
3168 }
3169 } else {
3170 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3171 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3172 if (inst_table[i].frag_version.min <= version &&
3173 inst_table[i].frag_version.max >= version)
3174 tx->op_info_map[inst_table[i].sio] = i;
3175 }
3176 }
3177 }
3178
3179 static inline HRESULT
3180 NineTranslateInstruction_Generic(struct shader_translator *tx)
3181 {
3182 struct ureg_dst dst[1];
3183 struct ureg_src src[4];
3184 unsigned i;
3185
3186 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3187 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3188 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3189 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3190
3191 ureg_insn(tx->ureg, tx->insn.info->opcode,
3192 dst, tx->insn.ndst,
3193 src, tx->insn.nsrc, 0);
3194 return D3D_OK;
3195 }
3196
3197 static inline DWORD
3198 TOKEN_PEEK(struct shader_translator *tx)
3199 {
3200 return *(tx->parse);
3201 }
3202
3203 static inline DWORD
3204 TOKEN_NEXT(struct shader_translator *tx)
3205 {
3206 return *(tx->parse)++;
3207 }
3208
3209 static inline void
3210 TOKEN_JUMP(struct shader_translator *tx)
3211 {
3212 if (tx->parse_next && tx->parse != tx->parse_next) {
3213 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3214 tx->parse = tx->parse_next;
3215 }
3216 }
3217
3218 static inline boolean
3219 sm1_parse_eof(struct shader_translator *tx)
3220 {
3221 return TOKEN_PEEK(tx) == NINED3DSP_END;
3222 }
3223
3224 static void
3225 sm1_read_version(struct shader_translator *tx)
3226 {
3227 const DWORD tok = TOKEN_NEXT(tx);
3228
3229 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3230 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3231
3232 switch (tok >> 16) {
3233 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3234 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3235 default:
3236 DBG("Invalid shader type: %x\n", tok);
3237 tx->processor = ~0;
3238 break;
3239 }
3240 }
3241
3242 /* This is just to check if we parsed the instruction properly. */
3243 static void
3244 sm1_parse_get_skip(struct shader_translator *tx)
3245 {
3246 const DWORD tok = TOKEN_PEEK(tx);
3247
3248 if (tx->version.major >= 2) {
3249 tx->parse_next = tx->parse + 1 /* this */ +
3250 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3251 } else {
3252 tx->parse_next = NULL; /* TODO: determine from param count */
3253 }
3254 }
3255
3256 static void
3257 sm1_print_comment(const char *comment, UINT size)
3258 {
3259 if (!size)
3260 return;
3261 /* TODO */
3262 }
3263
3264 static void
3265 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3266 {
3267 DWORD tok = TOKEN_PEEK(tx);
3268
3269 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3270 {
3271 const char *comment = "";
3272 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3273 tx->parse += size + 1;
3274
3275 if (print)
3276 sm1_print_comment(comment, size);
3277
3278 tok = TOKEN_PEEK(tx);
3279 }
3280 }
3281
3282 static void
3283 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3284 {
3285 *reg = TOKEN_NEXT(tx);
3286
3287 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3288 {
3289 if (tx->version.major < 2)
3290 *rel = (1 << 31) |
3291 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3292 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3293 D3DSP_NOSWIZZLE;
3294 else
3295 *rel = TOKEN_NEXT(tx);
3296 }
3297 }
3298
3299 static void
3300 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3301 {
3302 int8_t shift;
3303 dst->file =
3304 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3305 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3306 dst->type = TGSI_RETURN_TYPE_FLOAT;
3307 dst->idx = tok & D3DSP_REGNUM_MASK;
3308 dst->rel = NULL;
3309 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3310 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3311 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3312 dst->shift = (shift & 0x7) - (shift & 0x8);
3313 }
3314
3315 static void
3316 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3317 {
3318 src->file =
3319 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3320 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3321 src->type = TGSI_RETURN_TYPE_FLOAT;
3322 src->idx = tok & D3DSP_REGNUM_MASK;
3323 src->rel = NULL;
3324 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3325 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3326
3327 switch (src->file) {
3328 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3329 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3330 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3331 default:
3332 break;
3333 }
3334 }
3335
3336 static void
3337 sm1_parse_immediate(struct shader_translator *tx,
3338 struct sm1_src_param *imm)
3339 {
3340 imm->file = NINED3DSPR_IMMEDIATE;
3341 imm->idx = INT_MIN;
3342 imm->rel = NULL;
3343 imm->swizzle = NINED3DSP_NOSWIZZLE;
3344 imm->mod = 0;
3345 switch (tx->insn.opcode) {
3346 case D3DSIO_DEF:
3347 imm->type = NINED3DSPTYPE_FLOAT4;
3348 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3349 tx->parse += 4;
3350 break;
3351 case D3DSIO_DEFI:
3352 imm->type = NINED3DSPTYPE_INT4;
3353 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3354 tx->parse += 4;
3355 break;
3356 case D3DSIO_DEFB:
3357 imm->type = NINED3DSPTYPE_BOOL;
3358 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3359 tx->parse += 1;
3360 break;
3361 default:
3362 assert(0);
3363 break;
3364 }
3365 }
3366
3367 static void
3368 sm1_read_dst_param(struct shader_translator *tx,
3369 struct sm1_dst_param *dst,
3370 struct sm1_src_param *rel)
3371 {
3372 DWORD tok_dst, tok_rel = 0;
3373
3374 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3375 sm1_parse_dst_param(dst, tok_dst);
3376 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3377 sm1_parse_src_param(rel, tok_rel);
3378 dst->rel = rel;
3379 }
3380 }
3381
3382 static void
3383 sm1_read_src_param(struct shader_translator *tx,
3384 struct sm1_src_param *src,
3385 struct sm1_src_param *rel)
3386 {
3387 DWORD tok_src, tok_rel = 0;
3388
3389 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3390 sm1_parse_src_param(src, tok_src);
3391 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3392 assert(rel);
3393 sm1_parse_src_param(rel, tok_rel);
3394 src->rel = rel;
3395 }
3396 }
3397
3398 static void
3399 sm1_read_semantic(struct shader_translator *tx,
3400 struct sm1_semantic *sem)
3401 {
3402 const DWORD tok_usg = TOKEN_NEXT(tx);
3403 const DWORD tok_dst = TOKEN_NEXT(tx);
3404
3405 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3406 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3407 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3408
3409 sm1_parse_dst_param(&sem->reg, tok_dst);
3410 }
3411
3412 static void
3413 sm1_parse_instruction(struct shader_translator *tx)
3414 {
3415 struct sm1_instruction *insn = &tx->insn;
3416 HRESULT hr;
3417 DWORD tok;
3418 const struct sm1_op_info *info = NULL;
3419 unsigned i;
3420
3421 sm1_parse_comments(tx, TRUE);
3422 sm1_parse_get_skip(tx);
3423
3424 tok = TOKEN_NEXT(tx);
3425
3426 insn->opcode = tok & D3DSI_OPCODE_MASK;
3427 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3428 insn->coissue = !!(tok & D3DSI_COISSUE);
3429 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3430
3431 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3432 int k = tx->op_info_map[insn->opcode];
3433 if (k >= 0) {
3434 assert(k < ARRAY_SIZE(inst_table));
3435 info = &inst_table[k];
3436 }
3437 } else {
3438 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3439 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3440 }
3441 if (!info) {
3442 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3443 TOKEN_JUMP(tx);
3444 return;
3445 }
3446 insn->info = info;
3447 insn->ndst = info->ndst;
3448 insn->nsrc = info->nsrc;
3449
3450 /* check version */
3451 {
3452 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3453 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3454 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3455 if (ver < min || ver > max) {
3456 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3457 min, ver, max);
3458 return;
3459 }
3460 }
3461
3462 for (i = 0; i < insn->ndst; ++i)
3463 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3464 if (insn->predicated)
3465 sm1_read_src_param(tx, &insn->pred, NULL);
3466 for (i = 0; i < insn->nsrc; ++i)
3467 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3468
3469 /* parse here so we can dump them before processing */
3470 if (insn->opcode == D3DSIO_DEF ||
3471 insn->opcode == D3DSIO_DEFI ||
3472 insn->opcode == D3DSIO_DEFB)
3473 sm1_parse_immediate(tx, &tx->insn.src[0]);
3474
3475 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3476 sm1_instruction_check(insn);
3477
3478 if (insn->predicated) {
3479 tx->predicated_activated = true;
3480 if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3481 tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3482 tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3483 }
3484 }
3485
3486 if (info->handler)
3487 hr = info->handler(tx);
3488 else
3489 hr = NineTranslateInstruction_Generic(tx);
3490 tx_apply_dst0_modifiers(tx);
3491
3492 if (insn->predicated) {
3493 tx->predicated_activated = false;
3494 /* TODO: predicate might be allowed on outputs,
3495 * which cannot be src. Workaround it. */
3496 ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3497 ureg_negate(tx_src_param(tx, &insn->pred)),
3498 ureg_src(tx->regs.predicate_tmp),
3499 ureg_src(tx->regs.predicate_dst));
3500 }
3501
3502 if (hr != D3D_OK)
3503 tx->failure = TRUE;
3504 tx->num_scratch = 0; /* reset */
3505
3506 TOKEN_JUMP(tx);
3507 }
3508
3509 #define GET_CAP(n) screen->get_param( \
3510 screen, PIPE_CAP_##n)
3511 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3512 screen, info->type, PIPE_SHADER_CAP_##n)
3513
3514 static HRESULT
3515 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3516 {
3517 unsigned i;
3518
3519 memset(tx, 0, sizeof(*tx));
3520
3521 tx->info = info;
3522
3523 tx->byte_code = info->byte_code;
3524 tx->parse = info->byte_code;
3525
3526 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3527 info->input_map[i] = NINE_DECLUSAGE_NONE;
3528 info->num_inputs = 0;
3529
3530 info->position_t = FALSE;
3531 info->point_size = FALSE;
3532
3533 memset(tx->slots_used, 0, sizeof(tx->slots_used));
3534 memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3535 memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3536
3537 tx->info->const_float_slots = 0;
3538 tx->info->const_int_slots = 0;
3539 tx->info->const_bool_slots = 0;
3540
3541 info->sampler_mask = 0x0;
3542 info->rt_mask = 0x0;
3543
3544 info->lconstf.data = NULL;
3545 info->lconstf.ranges = NULL;
3546
3547 info->bumpenvmat_needed = 0;
3548
3549 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3550 tx->regs.rL[i] = ureg_dst_undef();
3551 }
3552 tx->regs.address = ureg_dst_undef();
3553 tx->regs.a0 = ureg_dst_undef();
3554 tx->regs.p = ureg_dst_undef();
3555 tx->regs.oDepth = ureg_dst_undef();
3556 tx->regs.vPos = ureg_src_undef();
3557 tx->regs.vFace = ureg_src_undef();
3558 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3559 tx->regs.o[i] = ureg_dst_undef();
3560 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3561 tx->regs.oCol[i] = ureg_dst_undef();
3562 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3563 tx->regs.vC[i] = ureg_src_undef();
3564 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3565 tx->regs.vT[i] = ureg_src_undef();
3566
3567 sm1_read_version(tx);
3568
3569 info->version = (tx->version.major << 4) | tx->version.minor;
3570
3571 tx->num_outputs = 0;
3572
3573 create_op_info_map(tx);
3574
3575 tx->ureg = ureg_create(info->type);
3576 if (!tx->ureg) {
3577 return E_OUTOFMEMORY;
3578 }
3579
3580 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3581 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3582 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3583 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3584 tx->texcoord_sn = tx->want_texcoord ?
3585 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3586 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3587 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3588
3589 if (IS_VS) {
3590 tx->num_constf_allowed = NINE_MAX_CONST_F;
3591 } else if (tx->version.major < 2) {/* IS_PS v1 */
3592 tx->num_constf_allowed = 8;
3593 } else if (tx->version.major == 2) {/* IS_PS v2 */
3594 tx->num_constf_allowed = 32;
3595 } else {/* IS_PS v3 */
3596 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3597 }
3598
3599 if (tx->version.major < 2) {
3600 tx->num_consti_allowed = 0;
3601 tx->num_constb_allowed = 0;
3602 } else {
3603 tx->num_consti_allowed = NINE_MAX_CONST_I;
3604 tx->num_constb_allowed = NINE_MAX_CONST_B;
3605 }
3606
3607 if (info->swvp_on && tx->version.major >= 2) {
3608 tx->num_constf_allowed = 8192;
3609 tx->num_consti_allowed = 2048;
3610 tx->num_constb_allowed = 2048;
3611 }
3612
3613 /* VS must always write position. Declare it here to make it the 1st output.
3614 * (Some drivers like nv50 are buggy and rely on that.)
3615 */
3616 if (IS_VS) {
3617 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3618 } else {
3619 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3620 if (!tx->shift_wpos)
3621 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3622 }
3623
3624 tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS);
3625 if (tx->mul_zero_wins)
3626 ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3627
3628 /* Add additional definition of constants */
3629 if (info->add_constants_defs.c_combination) {
3630 unsigned i;
3631
3632 assert(info->add_constants_defs.int_const_added);
3633 assert(info->add_constants_defs.bool_const_added);
3634 /* We only add constants that are used by the shader
3635 * and that are not defined in the shader */
3636 for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3637 if ((*info->add_constants_defs.int_const_added)[i]) {
3638 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3639 info->add_constants_defs.c_combination->const_i[i][0],
3640 info->add_constants_defs.c_combination->const_i[i][1],
3641 info->add_constants_defs.c_combination->const_i[i][2],
3642 info->add_constants_defs.c_combination->const_i[i][3]);
3643 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3644 }
3645 }
3646 for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3647 if ((*info->add_constants_defs.bool_const_added)[i]) {
3648 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3649 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3650 }
3651 }
3652 }
3653 return D3D_OK;
3654 }
3655
3656 static void
3657 tx_dtor(struct shader_translator *tx)
3658 {
3659 if (tx->num_inst_labels)
3660 FREE(tx->inst_labels);
3661 FREE(tx->lconstf);
3662 FREE(tx->regs.r);
3663 FREE(tx);
3664 }
3665
3666 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3667 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3668 static void
3669 shader_add_vs_viewport_transform(struct shader_translator *tx)
3670 {
3671 struct ureg_program *ureg = tx->ureg;
3672 struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3673 struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3674 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3675
3676 c0 = ureg_src_dimension(c0, 4);
3677 c1 = ureg_src_dimension(c1, 4);
3678 /* TODO: find out when we need to apply the viewport transformation or not.
3679 * Likely will be XYZ vs XYZRHW in vdecl_out
3680 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3681 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3682 */
3683 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3684 }
3685
3686 static void
3687 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3688 {
3689 struct ureg_program *ureg = tx->ureg;
3690 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3691 struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3692 struct ureg_src fog_vs, fog_color;
3693 struct ureg_dst fog_factor, depth;
3694
3695 if (!tx->info->fog_enable) {
3696 ureg_MOV(ureg, oCol0, src_col);
3697 return;
3698 }
3699
3700 if (tx->info->fog_mode != D3DFOG_NONE) {
3701 depth = tx_scratch_scalar(tx);
3702 /* Depth used for fog is perspective interpolated */
3703 ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3704 ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3705 }
3706
3707 fog_color = nine_float_constant_src(tx, 32);
3708 fog_params = nine_float_constant_src(tx, 33);
3709 fog_factor = tx_scratch_scalar(tx);
3710
3711 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3712 fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3713 fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3714 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3715 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3716 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3717 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3718 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3719 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3720 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3721 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3722 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3723 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3724 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3725 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3726 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3727 } else {
3728 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3729 TGSI_INTERPOLATE_PERSPECTIVE),
3730 TGSI_SWIZZLE_X);
3731 ureg_MOV(ureg, fog_factor, fog_vs);
3732 }
3733
3734 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3735 tx_src_scalar(fog_factor), src_col, fog_color);
3736 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3737 }
3738
3739 HRESULT
3740 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3741 {
3742 struct shader_translator *tx;
3743 HRESULT hr = D3D_OK;
3744 const unsigned processor = info->type;
3745 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3746
3747 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3748
3749 tx = MALLOC_STRUCT(shader_translator);
3750 if (!tx)
3751 return E_OUTOFMEMORY;
3752
3753 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
3754 hr = E_OUTOFMEMORY;
3755 goto out;
3756 }
3757
3758 assert(IS_VS || !info->swvp_on);
3759
3760 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3761 hr = D3DERR_INVALIDCALL;
3762 DBG("Unsupported shader version: %u.%u !\n",
3763 tx->version.major, tx->version.minor);
3764 goto out;
3765 }
3766 if (tx->processor != processor) {
3767 hr = D3DERR_INVALIDCALL;
3768 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3769 goto out;
3770 }
3771 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3772 tx->version.major, tx->version.minor);
3773
3774 while (!sm1_parse_eof(tx) && !tx->failure)
3775 sm1_parse_instruction(tx);
3776 tx->parse++; /* for byte_size */
3777
3778 if (tx->failure) {
3779 /* For VS shaders, we print the warning later,
3780 * we first try with swvp. */
3781 if (IS_PS)
3782 ERR("Encountered buggy shader\n");
3783 ureg_destroy(tx->ureg);
3784 hr = D3DERR_INVALIDCALL;
3785 goto out;
3786 }
3787
3788 if (IS_PS && tx->version.major < 3) {
3789 if (tx->version.major < 2) {
3790 assert(tx->num_temp); /* there must be color output */
3791 info->rt_mask |= 0x1;
3792 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3793 } else {
3794 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3795 }
3796 }
3797
3798 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3799 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3800 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3801 }
3802
3803 if (info->position_t)
3804 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3805
3806 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3807 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3808 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3809 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3810 info->point_size = TRUE;
3811 }
3812
3813 if (info->process_vertices)
3814 shader_add_vs_viewport_transform(tx);
3815
3816 ureg_END(tx->ureg);
3817
3818 /* record local constants */
3819 if (tx->num_lconstf && tx->indirect_const_access) {
3820 struct nine_range *ranges;
3821 float *data;
3822 int *indices;
3823 unsigned i, k, n;
3824
3825 hr = E_OUTOFMEMORY;
3826
3827 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3828 if (!data)
3829 goto out;
3830 info->lconstf.data = data;
3831
3832 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3833 if (!indices)
3834 goto out;
3835
3836 /* lazy sort, num_lconstf should be small */
3837 for (n = 0; n < tx->num_lconstf; ++n) {
3838 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3839 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3840 k = i;
3841 }
3842 indices[n] = tx->lconstf[k].idx;
3843 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
3844 tx->lconstf[k].idx = INT_MAX;
3845 }
3846
3847 /* count ranges */
3848 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3849 if (indices[i] != indices[i - 1] + 1)
3850 ++n;
3851 ranges = MALLOC(n * sizeof(ranges[0]));
3852 if (!ranges) {
3853 FREE(indices);
3854 goto out;
3855 }
3856 info->lconstf.ranges = ranges;
3857
3858 k = 0;
3859 ranges[k].bgn = indices[0];
3860 for (i = 1; i < tx->num_lconstf; ++i) {
3861 if (indices[i] != indices[i - 1] + 1) {
3862 ranges[k].next = &ranges[k + 1];
3863 ranges[k].end = indices[i - 1] + 1;
3864 ++k;
3865 ranges[k].bgn = indices[i];
3866 }
3867 }
3868 ranges[k].end = indices[i - 1] + 1;
3869 ranges[k].next = NULL;
3870 assert(n == (k + 1));
3871
3872 FREE(indices);
3873 hr = D3D_OK;
3874 }
3875
3876 /* r500 */
3877 if (info->const_float_slots > device->max_vs_const_f &&
3878 (info->const_int_slots || info->const_bool_slots) &&
3879 !info->swvp_on)
3880 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3881
3882
3883 if (tx->indirect_const_access) { /* vs only */
3884 info->const_float_slots = device->max_vs_const_f;
3885 tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
3886 }
3887
3888 if (!info->swvp_on) {
3889 info->const_used_size = sizeof(float[4]) * tx->num_slots;
3890 if (tx->num_slots)
3891 ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
3892 } else {
3893 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
3894 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
3895 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
3896 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
3897 }
3898
3899 if (info->process_vertices)
3900 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
3901
3902 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3903 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
3904 tgsi_dump(toks, 0);
3905 ureg_free_tokens(toks);
3906 }
3907
3908 if (info->process_vertices) {
3909 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
3910 tx->output_info,
3911 tx->num_outputs,
3912 &(info->so));
3913 info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
3914 } else
3915 info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
3916 if (!info->cso) {
3917 hr = D3DERR_DRIVERINTERNALERROR;
3918 FREE(info->lconstf.data);
3919 FREE(info->lconstf.ranges);
3920 goto out;
3921 }
3922
3923 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3924 out:
3925 tx_dtor(tx);
3926 return hr;
3927 }