st/nine: Pass more adapter formats for CheckDepthStencilMatch
[mesa.git] / src / gallium / frontends / nine / nine_shader.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "nir/tgsi_to_nir.h"
38
39 #define DBG_CHANNEL DBG_SHADER
40
41 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
42
43
44 struct shader_translator;
45
46 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
47
48 static inline const char *d3dsio_to_string(unsigned opcode);
49
50
51 #define NINED3D_SM1_VS 0xfffe
52 #define NINED3D_SM1_PS 0xffff
53
54 #define NINE_MAX_COND_DEPTH 64
55 #define NINE_MAX_LOOP_DEPTH 64
56
57 #define NINED3DSP_END 0x0000ffff
58
59 #define NINED3DSPTYPE_FLOAT4 0
60 #define NINED3DSPTYPE_INT4 1
61 #define NINED3DSPTYPE_BOOL 2
62
63 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
64
65 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
66 #define NINED3DSP_WRITEMASK_SHIFT 16
67
68 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
69
70 #define NINED3DSHADER_REL_OP_GT 1
71 #define NINED3DSHADER_REL_OP_EQ 2
72 #define NINED3DSHADER_REL_OP_GE 3
73 #define NINED3DSHADER_REL_OP_LT 4
74 #define NINED3DSHADER_REL_OP_NE 5
75 #define NINED3DSHADER_REL_OP_LE 6
76
77 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
78 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
79
80 #define NINED3DSI_TEXLD_PROJECT 0x1
81 #define NINED3DSI_TEXLD_BIAS 0x2
82
83 #define NINED3DSP_WRITEMASK_0 0x1
84 #define NINED3DSP_WRITEMASK_1 0x2
85 #define NINED3DSP_WRITEMASK_2 0x4
86 #define NINED3DSP_WRITEMASK_3 0x8
87 #define NINED3DSP_WRITEMASK_ALL 0xf
88
89 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
90
91 #define NINE_SWIZZLE4(x,y,z,w) \
92 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
93
94 #define NINE_APPLY_SWIZZLE(src, s) \
95 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
96
97 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
99 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
100
101 /*
102 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
103 * BIAS <= PS 1.4 (x-0.5)
104 * BIASNEG <= PS 1.4 (-(x-0.5))
105 * SIGN <= PS 1.4 (2(x-0.5))
106 * SIGNNEG <= PS 1.4 (-2(x-0.5))
107 * COMP <= PS 1.4 (1-x)
108 * X2 = PS 1.4 (2x)
109 * X2NEG = PS 1.4 (-2x)
110 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
111 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
112 * ABS >= SM 3.0 (abs(x))
113 * ABSNEG >= SM 3.0 (-abs(x))
114 * NOT >= SM 2.0 pedication only
115 */
116 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
130
131 static const char *sm1_mod_str[] =
132 {
133 [NINED3DSPSM_NONE] = "",
134 [NINED3DSPSM_NEG] = "-",
135 [NINED3DSPSM_BIAS] = "bias",
136 [NINED3DSPSM_BIASNEG] = "biasneg",
137 [NINED3DSPSM_SIGN] = "sign",
138 [NINED3DSPSM_SIGNNEG] = "signneg",
139 [NINED3DSPSM_COMP] = "comp",
140 [NINED3DSPSM_X2] = "x2",
141 [NINED3DSPSM_X2NEG] = "x2neg",
142 [NINED3DSPSM_DZ] = "dz",
143 [NINED3DSPSM_DW] = "dw",
144 [NINED3DSPSM_ABS] = "abs",
145 [NINED3DSPSM_ABSNEG] = "-abs",
146 [NINED3DSPSM_NOT] = "not"
147 };
148
149 static void
150 sm1_dump_writemask(BYTE mask)
151 {
152 if (mask & 1) DUMP("x"); else DUMP("_");
153 if (mask & 2) DUMP("y"); else DUMP("_");
154 if (mask & 4) DUMP("z"); else DUMP("_");
155 if (mask & 8) DUMP("w"); else DUMP("_");
156 }
157
158 static void
159 sm1_dump_swizzle(BYTE s)
160 {
161 char c[4] = { 'x', 'y', 'z', 'w' };
162 DUMP("%c%c%c%c",
163 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
164 }
165
166 static const char sm1_file_char[] =
167 {
168 [D3DSPR_TEMP] = 'r',
169 [D3DSPR_INPUT] = 'v',
170 [D3DSPR_CONST] = 'c',
171 [D3DSPR_ADDR] = 'A',
172 [D3DSPR_RASTOUT] = 'R',
173 [D3DSPR_ATTROUT] = 'D',
174 [D3DSPR_OUTPUT] = 'o',
175 [D3DSPR_CONSTINT] = 'I',
176 [D3DSPR_COLOROUT] = 'C',
177 [D3DSPR_DEPTHOUT] = 'D',
178 [D3DSPR_SAMPLER] = 's',
179 [D3DSPR_CONST2] = 'c',
180 [D3DSPR_CONST3] = 'c',
181 [D3DSPR_CONST4] = 'c',
182 [D3DSPR_CONSTBOOL] = 'B',
183 [D3DSPR_LOOP] = 'L',
184 [D3DSPR_TEMPFLOAT16] = 'h',
185 [D3DSPR_MISCTYPE] = 'M',
186 [D3DSPR_LABEL] = 'X',
187 [D3DSPR_PREDICATE] = 'p'
188 };
189
190 static void
191 sm1_dump_reg(BYTE file, INT index)
192 {
193 switch (file) {
194 case D3DSPR_LOOP:
195 DUMP("aL");
196 break;
197 case D3DSPR_COLOROUT:
198 DUMP("oC%i", index);
199 break;
200 case D3DSPR_DEPTHOUT:
201 DUMP("oDepth");
202 break;
203 case D3DSPR_RASTOUT:
204 DUMP("oRast%i", index);
205 break;
206 case D3DSPR_CONSTINT:
207 DUMP("iconst[%i]", index);
208 break;
209 case D3DSPR_CONSTBOOL:
210 DUMP("bconst[%i]", index);
211 break;
212 default:
213 DUMP("%c%i", sm1_file_char[file], index);
214 break;
215 }
216 }
217
218 struct sm1_src_param
219 {
220 INT idx;
221 struct sm1_src_param *rel;
222 BYTE file;
223 BYTE swizzle;
224 BYTE mod;
225 BYTE type;
226 union {
227 DWORD d[4];
228 float f[4];
229 int i[4];
230 BOOL b;
231 } imm;
232 };
233 static void
234 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
235
236 struct sm1_dst_param
237 {
238 INT idx;
239 struct sm1_src_param *rel;
240 BYTE file;
241 BYTE mask;
242 BYTE mod;
243 int8_t shift; /* sint4 */
244 BYTE type;
245 };
246
247 static inline void
248 assert_replicate_swizzle(const struct ureg_src *reg)
249 {
250 assert(reg->SwizzleY == reg->SwizzleX &&
251 reg->SwizzleZ == reg->SwizzleX &&
252 reg->SwizzleW == reg->SwizzleX);
253 }
254
255 static void
256 sm1_dump_immediate(const struct sm1_src_param *param)
257 {
258 switch (param->type) {
259 case NINED3DSPTYPE_FLOAT4:
260 DUMP("{ %f %f %f %f }",
261 param->imm.f[0], param->imm.f[1],
262 param->imm.f[2], param->imm.f[3]);
263 break;
264 case NINED3DSPTYPE_INT4:
265 DUMP("{ %i %i %i %i }",
266 param->imm.i[0], param->imm.i[1],
267 param->imm.i[2], param->imm.i[3]);
268 break;
269 case NINED3DSPTYPE_BOOL:
270 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
271 break;
272 default:
273 assert(0);
274 break;
275 }
276 }
277
278 static void
279 sm1_dump_src_param(const struct sm1_src_param *param)
280 {
281 if (param->file == NINED3DSPR_IMMEDIATE) {
282 assert(!param->mod &&
283 !param->rel &&
284 param->swizzle == NINED3DSP_NOSWIZZLE);
285 sm1_dump_immediate(param);
286 return;
287 }
288
289 if (param->mod)
290 DUMP("%s(", sm1_mod_str[param->mod]);
291 if (param->rel) {
292 DUMP("%c[", sm1_file_char[param->file]);
293 sm1_dump_src_param(param->rel);
294 DUMP("+%i]", param->idx);
295 } else {
296 sm1_dump_reg(param->file, param->idx);
297 }
298 if (param->mod)
299 DUMP(")");
300 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
301 DUMP(".");
302 sm1_dump_swizzle(param->swizzle);
303 }
304 }
305
306 static void
307 sm1_dump_dst_param(const struct sm1_dst_param *param)
308 {
309 if (param->mod & NINED3DSPDM_SATURATE)
310 DUMP("sat ");
311 if (param->mod & NINED3DSPDM_PARTIALP)
312 DUMP("pp ");
313 if (param->mod & NINED3DSPDM_CENTROID)
314 DUMP("centroid ");
315 if (param->shift < 0)
316 DUMP("/%u ", 1 << -param->shift);
317 if (param->shift > 0)
318 DUMP("*%u ", 1 << param->shift);
319
320 if (param->rel) {
321 DUMP("%c[", sm1_file_char[param->file]);
322 sm1_dump_src_param(param->rel);
323 DUMP("+%i]", param->idx);
324 } else {
325 sm1_dump_reg(param->file, param->idx);
326 }
327 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
328 DUMP(".");
329 sm1_dump_writemask(param->mask);
330 }
331 }
332
333 struct sm1_semantic
334 {
335 struct sm1_dst_param reg;
336 BYTE sampler_type;
337 D3DDECLUSAGE usage;
338 BYTE usage_idx;
339 };
340
341 struct sm1_op_info
342 {
343 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
344 * should be ignored completely */
345 unsigned sio;
346 unsigned opcode; /* TGSI_OPCODE_x */
347
348 /* versions are still set even handler is set */
349 struct {
350 unsigned min;
351 unsigned max;
352 } vert_version, frag_version;
353
354 /* number of regs parsed outside of special handler */
355 unsigned ndst;
356 unsigned nsrc;
357
358 /* some instructions don't map perfectly, so use a special handler */
359 translate_instruction_func handler;
360 };
361
362 struct sm1_instruction
363 {
364 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
365 BYTE flags;
366 BOOL coissue;
367 BOOL predicated;
368 BYTE ndst;
369 BYTE nsrc;
370 struct sm1_src_param src[4];
371 struct sm1_src_param src_rel[4];
372 struct sm1_src_param pred;
373 struct sm1_src_param dst_rel[1];
374 struct sm1_dst_param dst[1];
375
376 const struct sm1_op_info *info;
377 };
378
379 static void
380 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
381 {
382 unsigned i;
383
384 /* no info stored for these: */
385 if (insn->opcode == D3DSIO_DCL)
386 return;
387 for (i = 0; i < indent; ++i)
388 DUMP(" ");
389
390 if (insn->predicated) {
391 DUMP("@");
392 sm1_dump_src_param(&insn->pred);
393 DUMP(" ");
394 }
395 DUMP("%s", d3dsio_to_string(insn->opcode));
396 if (insn->flags) {
397 switch (insn->opcode) {
398 case D3DSIO_TEX:
399 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
400 break;
401 default:
402 DUMP("_%x", insn->flags);
403 break;
404 }
405 }
406 if (insn->coissue)
407 DUMP("_co");
408 DUMP(" ");
409
410 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
411 sm1_dump_dst_param(&insn->dst[i]);
412 DUMP(" ");
413 }
414
415 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
416 sm1_dump_src_param(&insn->src[i]);
417 DUMP(" ");
418 }
419 if (insn->opcode == D3DSIO_DEF ||
420 insn->opcode == D3DSIO_DEFI ||
421 insn->opcode == D3DSIO_DEFB)
422 sm1_dump_immediate(&insn->src[0]);
423
424 DUMP("\n");
425 }
426
427 struct sm1_local_const
428 {
429 INT idx;
430 struct ureg_src reg;
431 float f[4]; /* for indirect addressing of float constants */
432 };
433
434 struct shader_translator
435 {
436 const DWORD *byte_code;
437 const DWORD *parse;
438 const DWORD *parse_next;
439
440 struct ureg_program *ureg;
441
442 /* shader version */
443 struct {
444 BYTE major;
445 BYTE minor;
446 } version;
447 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
448 unsigned num_constf_allowed;
449 unsigned num_consti_allowed;
450 unsigned num_constb_allowed;
451
452 boolean native_integers;
453 boolean inline_subroutines;
454 boolean want_texcoord;
455 boolean shift_wpos;
456 boolean wpos_is_sysval;
457 boolean face_is_sysval_integer;
458 boolean mul_zero_wins;
459 unsigned texcoord_sn;
460
461 struct sm1_instruction insn; /* current instruction */
462
463 struct {
464 struct ureg_dst *r;
465 struct ureg_dst oPos;
466 struct ureg_dst oPos_out; /* the real output when doing streamout */
467 struct ureg_dst oFog;
468 struct ureg_dst oPts;
469 struct ureg_dst oCol[4];
470 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
471 struct ureg_dst oDepth;
472 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
473 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
474 struct ureg_src vPos;
475 struct ureg_src vFace;
476 struct ureg_src s;
477 struct ureg_dst p;
478 struct ureg_dst address;
479 struct ureg_dst a0;
480 struct ureg_dst predicate;
481 struct ureg_dst predicate_tmp;
482 struct ureg_dst predicate_dst;
483 struct ureg_dst tS[8]; /* texture stage registers */
484 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
485 struct ureg_dst t[8]; /* scratch TEMPs */
486 struct ureg_src vC[2]; /* PS color in */
487 struct ureg_src vT[8]; /* PS texcoord in */
488 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
489 } regs;
490 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
491 unsigned num_scratch;
492 unsigned loop_depth;
493 unsigned loop_depth_max;
494 unsigned cond_depth;
495 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
496 unsigned cond_labels[NINE_MAX_COND_DEPTH];
497 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
498 boolean predicated_activated;
499
500 unsigned *inst_labels; /* LABEL op */
501 unsigned num_inst_labels;
502
503 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504
505 struct sm1_local_const *lconstf;
506 unsigned num_lconstf;
507 struct sm1_local_const *lconsti;
508 unsigned num_lconsti;
509 struct sm1_local_const *lconstb;
510 unsigned num_lconstb;
511
512 boolean slots_used[NINE_MAX_CONST_ALL];
513 unsigned *slot_map;
514 unsigned num_slots;
515
516 boolean indirect_const_access;
517 boolean failure;
518
519 struct nine_vs_output_info output_info[16];
520 int num_outputs;
521
522 struct nine_shader_info *info;
523
524 int16_t op_info_map[D3DSIO_BREAKP + 1];
525 };
526
527 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
528 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
529
530 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
531
532 static void
533 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
534
535 static void
536 sm1_instruction_check(const struct sm1_instruction *insn)
537 {
538 if (insn->opcode == D3DSIO_CRS)
539 {
540 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
541 {
542 DBG("CRS.mask.w\n");
543 }
544 }
545 }
546
547 static void
548 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
549 int mask, int output_index)
550 {
551 tx->output_info[tx->num_outputs].output_semantic = Usage;
552 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
553 tx->output_info[tx->num_outputs].mask = mask;
554 tx->output_info[tx->num_outputs].output_index = output_index;
555 tx->num_outputs++;
556 }
557
558 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
559 {
560 struct ureg_src src;
561
562 if (tx->slot_map)
563 idx = tx->slot_map[idx];
564 /* vswp constant handling: we use two buffers
565 * to fit all the float constants. The special handling
566 * doesn't need to be elsewhere, because all the instructions
567 * accessing the constants directly are VS1, and swvp
568 * is VS >= 2 */
569 if (tx->info->swvp_on && idx >= 4096) {
570 /* TODO: swvp rel is broken if many constants are used */
571 src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
572 src = ureg_src_dimension(src, 1);
573 } else {
574 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
575 src = ureg_src_dimension(src, 0);
576 }
577
578 if (!tx->info->swvp_on)
579 tx->slots_used[idx] = TRUE;
580 if (tx->info->const_float_slots < (idx + 1))
581 tx->info->const_float_slots = idx + 1;
582 if (tx->num_slots < (idx + 1))
583 tx->num_slots = idx + 1;
584
585 return src;
586 }
587
588 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
589 {
590 struct ureg_src src;
591
592 if (tx->info->swvp_on) {
593 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
594 src = ureg_src_dimension(src, 2);
595 } else {
596 unsigned slot_idx = tx->info->const_i_base + idx;
597 if (tx->slot_map)
598 slot_idx = tx->slot_map[slot_idx];
599 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
600 src = ureg_src_dimension(src, 0);
601 tx->slots_used[slot_idx] = TRUE;
602 tx->info->int_slots_used[idx] = TRUE;
603 if (tx->num_slots < (slot_idx + 1))
604 tx->num_slots = slot_idx + 1;
605 }
606
607 if (tx->info->const_int_slots < (idx + 1))
608 tx->info->const_int_slots = idx + 1;
609
610 return src;
611 }
612
613 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
614 {
615 struct ureg_src src;
616
617 char r = idx / 4;
618 char s = idx & 3;
619
620 if (tx->info->swvp_on) {
621 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
622 src = ureg_src_dimension(src, 3);
623 } else {
624 unsigned slot_idx = tx->info->const_b_base + r;
625 if (tx->slot_map)
626 slot_idx = tx->slot_map[slot_idx];
627 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
628 src = ureg_src_dimension(src, 0);
629 tx->slots_used[slot_idx] = TRUE;
630 tx->info->bool_slots_used[idx] = TRUE;
631 if (tx->num_slots < (slot_idx + 1))
632 tx->num_slots = slot_idx + 1;
633 }
634 src = ureg_swizzle(src, s, s, s, s);
635
636 if (tx->info->const_bool_slots < (idx + 1))
637 tx->info->const_bool_slots = idx + 1;
638
639 return src;
640 }
641
642 static boolean
643 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
644 {
645 INT i;
646
647 if (index < 0 || index >= tx->num_constf_allowed) {
648 tx->failure = TRUE;
649 return FALSE;
650 }
651 for (i = 0; i < tx->num_lconstf; ++i) {
652 if (tx->lconstf[i].idx == index) {
653 *src = tx->lconstf[i].reg;
654 return TRUE;
655 }
656 }
657 return FALSE;
658 }
659 static boolean
660 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
661 {
662 int i;
663
664 if (index < 0 || index >= tx->num_consti_allowed) {
665 tx->failure = TRUE;
666 return FALSE;
667 }
668 for (i = 0; i < tx->num_lconsti; ++i) {
669 if (tx->lconsti[i].idx == index) {
670 *src = tx->lconsti[i].reg;
671 return TRUE;
672 }
673 }
674 return FALSE;
675 }
676 static boolean
677 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
678 {
679 int i;
680
681 if (index < 0 || index >= tx->num_constb_allowed) {
682 tx->failure = TRUE;
683 return FALSE;
684 }
685 for (i = 0; i < tx->num_lconstb; ++i) {
686 if (tx->lconstb[i].idx == index) {
687 *src = tx->lconstb[i].reg;
688 return TRUE;
689 }
690 }
691 return FALSE;
692 }
693
694 static void
695 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
696 {
697 unsigned n;
698
699 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
700
701 for (n = 0; n < tx->num_lconstf; ++n)
702 if (tx->lconstf[n].idx == index)
703 break;
704 if (n == tx->num_lconstf) {
705 if ((n % 8) == 0) {
706 tx->lconstf = REALLOC(tx->lconstf,
707 (n + 0) * sizeof(tx->lconstf[0]),
708 (n + 8) * sizeof(tx->lconstf[0]));
709 assert(tx->lconstf);
710 }
711 tx->num_lconstf++;
712 }
713 tx->lconstf[n].idx = index;
714 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
715
716 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
717 }
718 static void
719 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
720 {
721 unsigned n;
722
723 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
724
725 for (n = 0; n < tx->num_lconsti; ++n)
726 if (tx->lconsti[n].idx == index)
727 break;
728 if (n == tx->num_lconsti) {
729 if ((n % 8) == 0) {
730 tx->lconsti = REALLOC(tx->lconsti,
731 (n + 0) * sizeof(tx->lconsti[0]),
732 (n + 8) * sizeof(tx->lconsti[0]));
733 assert(tx->lconsti);
734 }
735 tx->num_lconsti++;
736 }
737
738 tx->lconsti[n].idx = index;
739 tx->lconsti[n].reg = tx->native_integers ?
740 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
741 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
742 }
743 static void
744 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
745 {
746 unsigned n;
747
748 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
749
750 for (n = 0; n < tx->num_lconstb; ++n)
751 if (tx->lconstb[n].idx == index)
752 break;
753 if (n == tx->num_lconstb) {
754 if ((n % 8) == 0) {
755 tx->lconstb = REALLOC(tx->lconstb,
756 (n + 0) * sizeof(tx->lconstb[0]),
757 (n + 8) * sizeof(tx->lconstb[0]));
758 assert(tx->lconstb);
759 }
760 tx->num_lconstb++;
761 }
762
763 tx->lconstb[n].idx = index;
764 tx->lconstb[n].reg = tx->native_integers ?
765 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
766 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
767 }
768
769 static inline struct ureg_dst
770 tx_scratch(struct shader_translator *tx)
771 {
772 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
773 tx->failure = TRUE;
774 return tx->regs.t[0];
775 }
776 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
777 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
778 return tx->regs.t[tx->num_scratch++];
779 }
780
781 static inline struct ureg_dst
782 tx_scratch_scalar(struct shader_translator *tx)
783 {
784 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
785 }
786
787 static inline struct ureg_src
788 tx_src_scalar(struct ureg_dst dst)
789 {
790 struct ureg_src src = ureg_src(dst);
791 int c = ffs(dst.WriteMask) - 1;
792 if (dst.WriteMask == (1 << c))
793 src = ureg_scalar(src, c);
794 return src;
795 }
796
797 static inline void
798 tx_temp_alloc(struct shader_translator *tx, INT idx)
799 {
800 assert(idx >= 0);
801 if (idx >= tx->num_temp) {
802 unsigned k = tx->num_temp;
803 unsigned n = idx + 1;
804 tx->regs.r = REALLOC(tx->regs.r,
805 k * sizeof(tx->regs.r[0]),
806 n * sizeof(tx->regs.r[0]));
807 for (; k < n; ++k)
808 tx->regs.r[k] = ureg_dst_undef();
809 tx->num_temp = n;
810 }
811 if (ureg_dst_is_undef(tx->regs.r[idx]))
812 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
813 }
814
815 static inline void
816 tx_addr_alloc(struct shader_translator *tx, INT idx)
817 {
818 assert(idx == 0);
819 if (ureg_dst_is_undef(tx->regs.address))
820 tx->regs.address = ureg_DECL_address(tx->ureg);
821 if (ureg_dst_is_undef(tx->regs.a0))
822 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
823 }
824
825 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
826 * the projection should be applied on the texture. It doesn't
827 * apply on texkill.
828 * The doc is very imprecise here (it says the projection is done
829 * before rasterization, thus in vs, which seems wrong since ps instructions
830 * are affected differently)
831 * For now we only apply to the ps TEX instruction and TEXBEM.
832 * Perhaps some other instructions would need it */
833 static inline void
834 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
835 struct ureg_src src, INT idx)
836 {
837 struct ureg_dst tmp;
838 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
839
840 /* no projection */
841 if (dim == 1) {
842 ureg_MOV(tx->ureg, dst, src);
843 } else {
844 tmp = tx_scratch_scalar(tx);
845 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
846 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
847 }
848 }
849
850 static inline void
851 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
852 unsigned target, struct ureg_src src0,
853 struct ureg_src src1, INT idx)
854 {
855 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
856 struct ureg_dst tmp;
857 boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
858
859 /* dim == 1: no projection
860 * Looks like must be disabled when it makes no
861 * sense according the texture dimensions
862 */
863 if (dim == 1 || (dim <= target && !shadow)) {
864 ureg_TEX(tx->ureg, dst, target, src0, src1);
865 } else if (dim == 4) {
866 ureg_TXP(tx->ureg, dst, target, src0, src1);
867 } else {
868 tmp = tx_scratch(tx);
869 apply_ps1x_projection(tx, tmp, src0, idx);
870 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
871 }
872 }
873
874 static inline void
875 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
876 {
877 assert(IS_PS);
878 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
879 if (ureg_src_is_undef(tx->regs.vT[idx]))
880 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
881 TGSI_INTERPOLATE_PERSPECTIVE);
882 }
883
884 static inline unsigned *
885 tx_bgnloop(struct shader_translator *tx)
886 {
887 tx->loop_depth++;
888 if (tx->loop_depth_max < tx->loop_depth)
889 tx->loop_depth_max = tx->loop_depth;
890 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
891 return &tx->loop_labels[tx->loop_depth - 1];
892 }
893
894 static inline unsigned *
895 tx_endloop(struct shader_translator *tx)
896 {
897 assert(tx->loop_depth);
898 tx->loop_depth--;
899 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
900 ureg_get_instruction_number(tx->ureg));
901 return &tx->loop_labels[tx->loop_depth];
902 }
903
904 static struct ureg_dst
905 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
906 {
907 const unsigned l = tx->loop_depth - 1;
908
909 if (!tx->loop_depth)
910 {
911 DBG("loop counter requested outside of loop\n");
912 return ureg_dst_undef();
913 }
914
915 if (ureg_dst_is_undef(tx->regs.rL[l])) {
916 /* loop or rep ctr creation */
917 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
918 tx->loop_or_rep[l] = loop_or_rep;
919 }
920 /* loop - rep - endloop - endrep not allowed */
921 assert(tx->loop_or_rep[l] == loop_or_rep);
922
923 return tx->regs.rL[l];
924 }
925
926 static struct ureg_src
927 tx_get_loopal(struct shader_translator *tx)
928 {
929 int loop_level = tx->loop_depth - 1;
930
931 while (loop_level >= 0) {
932 /* handle loop - rep - endrep - endloop case */
933 if (tx->loop_or_rep[loop_level])
934 /* the value is in the loop counter y component (nine implementation) */
935 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
936 loop_level--;
937 }
938
939 DBG("aL counter requested outside of loop\n");
940 return ureg_src_undef();
941 }
942
943 static inline unsigned *
944 tx_cond(struct shader_translator *tx)
945 {
946 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
947 tx->cond_depth++;
948 return &tx->cond_labels[tx->cond_depth - 1];
949 }
950
951 static inline unsigned *
952 tx_elsecond(struct shader_translator *tx)
953 {
954 assert(tx->cond_depth);
955 return &tx->cond_labels[tx->cond_depth - 1];
956 }
957
958 static inline void
959 tx_endcond(struct shader_translator *tx)
960 {
961 assert(tx->cond_depth);
962 tx->cond_depth--;
963 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
964 ureg_get_instruction_number(tx->ureg));
965 }
966
967 static inline struct ureg_dst
968 nine_ureg_dst_register(unsigned file, int index)
969 {
970 return ureg_dst(ureg_src_register(file, index));
971 }
972
973 static inline struct ureg_src
974 nine_get_position_input(struct shader_translator *tx)
975 {
976 struct ureg_program *ureg = tx->ureg;
977
978 if (tx->wpos_is_sysval)
979 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
980 else
981 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
982 0, TGSI_INTERPOLATE_LINEAR);
983 }
984
985 static struct ureg_src
986 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
987 {
988 struct ureg_program *ureg = tx->ureg;
989 struct ureg_src src;
990 struct ureg_dst tmp;
991
992 assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
993 (D3DSPR_ADDR && tx->version.major == 3));
994
995 switch (param->file)
996 {
997 case D3DSPR_TEMP:
998 tx_temp_alloc(tx, param->idx);
999 src = ureg_src(tx->regs.r[param->idx]);
1000 break;
1001 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1002 case D3DSPR_ADDR:
1003 if (IS_VS) {
1004 assert(param->idx == 0);
1005 /* the address register (vs only) must be
1006 * assigned before use */
1007 assert(!ureg_dst_is_undef(tx->regs.a0));
1008 /* Round to lowest for vs1.1 (contrary to the doc), else
1009 * round to nearest */
1010 if (tx->version.major < 2 && tx->version.minor < 2)
1011 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1012 else
1013 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1014 src = ureg_src(tx->regs.address);
1015 } else {
1016 if (tx->version.major < 2 && tx->version.minor < 4) {
1017 /* no subroutines, so should be defined */
1018 src = ureg_src(tx->regs.tS[param->idx]);
1019 } else {
1020 tx_texcoord_alloc(tx, param->idx);
1021 src = tx->regs.vT[param->idx];
1022 }
1023 }
1024 break;
1025 case D3DSPR_INPUT:
1026 if (IS_VS) {
1027 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1028 } else {
1029 if (tx->version.major < 3) {
1030 src = ureg_DECL_fs_input_cyl_centroid(
1031 ureg, TGSI_SEMANTIC_COLOR, param->idx,
1032 TGSI_INTERPOLATE_COLOR, 0,
1033 tx->info->force_color_in_centroid ?
1034 TGSI_INTERPOLATE_LOC_CENTROID : 0,
1035 0, 1);
1036 } else {
1037 if(param->rel) {
1038 /* Copy all inputs (non consecutive)
1039 * to temp array (consecutive).
1040 * This is not good for performance.
1041 * A better way would be to have inputs
1042 * consecutive (would need implement alternative
1043 * way to match vs outputs and ps inputs).
1044 * However even with the better way, the temp array
1045 * copy would need to be used if some inputs
1046 * are not GENERIC or if they have different
1047 * interpolation flag. */
1048 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1049 int i;
1050 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1051 for (i = 0; i < 10; i++) {
1052 if (!ureg_src_is_undef(tx->regs.v[i]))
1053 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1054 else
1055 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1056 }
1057 }
1058 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1059 } else {
1060 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1061 src = tx->regs.v[param->idx];
1062 }
1063 }
1064 }
1065 if (param->rel)
1066 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1067 break;
1068 case D3DSPR_PREDICATE:
1069 if (ureg_dst_is_undef(tx->regs.predicate)) {
1070 /* Forbidden to use the predicate register before being set */
1071 tx->failure = TRUE;
1072 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1073 }
1074 src = ureg_src(tx->regs.predicate);
1075 break;
1076 case D3DSPR_SAMPLER:
1077 assert(param->mod == NINED3DSPSM_NONE);
1078 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
1079 src = ureg_DECL_sampler(ureg, param->idx);
1080 break;
1081 case D3DSPR_CONST:
1082 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1083 src = nine_float_constant_src(tx, param->idx);
1084 if (param->rel) {
1085 tx->indirect_const_access = TRUE;
1086 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1087 }
1088 }
1089 if (!IS_VS && tx->version.major < 2) {
1090 /* ps 1.X clamps constants */
1091 tmp = tx_scratch(tx);
1092 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1093 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1094 src = ureg_src(tmp);
1095 }
1096 break;
1097 case D3DSPR_CONST2:
1098 case D3DSPR_CONST3:
1099 case D3DSPR_CONST4:
1100 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1101 assert(!"CONST2/3/4");
1102 src = ureg_imm1f(ureg, 0.0f);
1103 break;
1104 case D3DSPR_CONSTINT:
1105 /* relative adressing only possible for float constants in vs */
1106 if (!tx_lconsti(tx, &src, param->idx))
1107 src = nine_integer_constant_src(tx, param->idx);
1108 break;
1109 case D3DSPR_CONSTBOOL:
1110 if (!tx_lconstb(tx, &src, param->idx))
1111 src = nine_boolean_constant_src(tx, param->idx);
1112 break;
1113 case D3DSPR_LOOP:
1114 if (ureg_dst_is_undef(tx->regs.address))
1115 tx->regs.address = ureg_DECL_address(ureg);
1116 if (!tx->native_integers)
1117 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1118 else
1119 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1120 src = ureg_src(tx->regs.address);
1121 break;
1122 case D3DSPR_MISCTYPE:
1123 switch (param->idx) {
1124 case D3DSMO_POSITION:
1125 if (ureg_src_is_undef(tx->regs.vPos))
1126 tx->regs.vPos = nine_get_position_input(tx);
1127 if (tx->shift_wpos) {
1128 /* TODO: do this only once */
1129 struct ureg_dst wpos = tx_scratch(tx);
1130 ureg_ADD(ureg, wpos, tx->regs.vPos,
1131 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1132 src = ureg_src(wpos);
1133 } else {
1134 src = tx->regs.vPos;
1135 }
1136 break;
1137 case D3DSMO_FACE:
1138 if (ureg_src_is_undef(tx->regs.vFace)) {
1139 if (tx->face_is_sysval_integer) {
1140 tmp = ureg_DECL_temporary(ureg);
1141 tx->regs.vFace =
1142 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1143
1144 /* convert bool to float */
1145 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1146 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1147 tx->regs.vFace = ureg_src(tmp);
1148 } else {
1149 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1150 TGSI_SEMANTIC_FACE, 0,
1151 TGSI_INTERPOLATE_CONSTANT);
1152 }
1153 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1154 }
1155 src = tx->regs.vFace;
1156 break;
1157 default:
1158 assert(!"invalid src D3DSMO");
1159 break;
1160 }
1161 break;
1162 case D3DSPR_TEMPFLOAT16:
1163 break;
1164 default:
1165 assert(!"invalid src D3DSPR");
1166 }
1167
1168 switch (param->mod) {
1169 case NINED3DSPSM_DW:
1170 tmp = tx_scratch(tx);
1171 /* NOTE: app is not allowed to read w with this modifier */
1172 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1173 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1174 src = ureg_src(tmp);
1175 break;
1176 case NINED3DSPSM_DZ:
1177 tmp = tx_scratch(tx);
1178 /* NOTE: app is not allowed to read z with this modifier */
1179 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1180 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1181 src = ureg_src(tmp);
1182 break;
1183 default:
1184 break;
1185 }
1186
1187 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1188 src = ureg_swizzle(src,
1189 (param->swizzle >> 0) & 0x3,
1190 (param->swizzle >> 2) & 0x3,
1191 (param->swizzle >> 4) & 0x3,
1192 (param->swizzle >> 6) & 0x3);
1193
1194 switch (param->mod) {
1195 case NINED3DSPSM_ABS:
1196 src = ureg_abs(src);
1197 break;
1198 case NINED3DSPSM_ABSNEG:
1199 src = ureg_negate(ureg_abs(src));
1200 break;
1201 case NINED3DSPSM_NEG:
1202 src = ureg_negate(src);
1203 break;
1204 case NINED3DSPSM_BIAS:
1205 tmp = tx_scratch(tx);
1206 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1207 src = ureg_src(tmp);
1208 break;
1209 case NINED3DSPSM_BIASNEG:
1210 tmp = tx_scratch(tx);
1211 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1212 src = ureg_src(tmp);
1213 break;
1214 case NINED3DSPSM_NOT:
1215 if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1216 tmp = tx_scratch(tx);
1217 ureg_NOT(ureg, tmp, src);
1218 src = ureg_src(tmp);
1219 break;
1220 } else { /* predicate */
1221 tmp = tx_scratch(tx);
1222 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1223 src = ureg_src(tmp);
1224 }
1225 /* fall through */
1226 case NINED3DSPSM_COMP:
1227 tmp = tx_scratch(tx);
1228 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1229 src = ureg_src(tmp);
1230 break;
1231 case NINED3DSPSM_DZ:
1232 case NINED3DSPSM_DW:
1233 /* Already handled*/
1234 break;
1235 case NINED3DSPSM_SIGN:
1236 tmp = tx_scratch(tx);
1237 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1238 src = ureg_src(tmp);
1239 break;
1240 case NINED3DSPSM_SIGNNEG:
1241 tmp = tx_scratch(tx);
1242 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1243 src = ureg_src(tmp);
1244 break;
1245 case NINED3DSPSM_X2:
1246 tmp = tx_scratch(tx);
1247 ureg_ADD(ureg, tmp, src, src);
1248 src = ureg_src(tmp);
1249 break;
1250 case NINED3DSPSM_X2NEG:
1251 tmp = tx_scratch(tx);
1252 ureg_ADD(ureg, tmp, src, src);
1253 src = ureg_negate(ureg_src(tmp));
1254 break;
1255 default:
1256 assert(param->mod == NINED3DSPSM_NONE);
1257 break;
1258 }
1259
1260 return src;
1261 }
1262
1263 static struct ureg_dst
1264 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1265 {
1266 struct ureg_dst dst;
1267
1268 switch (param->file)
1269 {
1270 case D3DSPR_TEMP:
1271 assert(!param->rel);
1272 tx_temp_alloc(tx, param->idx);
1273 dst = tx->regs.r[param->idx];
1274 break;
1275 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1276 case D3DSPR_ADDR:
1277 assert(!param->rel);
1278 if (tx->version.major < 2 && !IS_VS) {
1279 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1280 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1281 dst = tx->regs.tS[param->idx];
1282 } else
1283 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1284 tx_texcoord_alloc(tx, param->idx);
1285 dst = ureg_dst(tx->regs.vT[param->idx]);
1286 } else {
1287 tx_addr_alloc(tx, param->idx);
1288 dst = tx->regs.a0;
1289 }
1290 break;
1291 case D3DSPR_RASTOUT:
1292 assert(!param->rel);
1293 switch (param->idx) {
1294 case 0:
1295 if (ureg_dst_is_undef(tx->regs.oPos))
1296 tx->regs.oPos =
1297 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1298 dst = tx->regs.oPos;
1299 break;
1300 case 1:
1301 if (ureg_dst_is_undef(tx->regs.oFog))
1302 tx->regs.oFog =
1303 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1304 dst = tx->regs.oFog;
1305 break;
1306 case 2:
1307 if (ureg_dst_is_undef(tx->regs.oPts))
1308 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1309 dst = tx->regs.oPts;
1310 break;
1311 default:
1312 assert(0);
1313 break;
1314 }
1315 break;
1316 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1317 case D3DSPR_OUTPUT:
1318 if (tx->version.major < 3) {
1319 assert(!param->rel);
1320 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1321 } else {
1322 assert(!param->rel); /* TODO */
1323 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1324 dst = tx->regs.o[param->idx];
1325 }
1326 break;
1327 case D3DSPR_ATTROUT: /* VS */
1328 case D3DSPR_COLOROUT: /* PS */
1329 assert(param->idx >= 0 && param->idx < 4);
1330 assert(!param->rel);
1331 tx->info->rt_mask |= 1 << param->idx;
1332 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1333 /* ps < 3: oCol[0] will have fog blending afterward */
1334 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1335 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1336 } else {
1337 tx->regs.oCol[param->idx] =
1338 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1339 }
1340 }
1341 dst = tx->regs.oCol[param->idx];
1342 if (IS_VS && tx->version.major < 3)
1343 dst = ureg_saturate(dst);
1344 break;
1345 case D3DSPR_DEPTHOUT:
1346 assert(!param->rel);
1347 if (ureg_dst_is_undef(tx->regs.oDepth))
1348 tx->regs.oDepth =
1349 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1350 TGSI_WRITEMASK_Z, 0, 1);
1351 dst = tx->regs.oDepth; /* XXX: must write .z component */
1352 break;
1353 case D3DSPR_PREDICATE:
1354 if (ureg_dst_is_undef(tx->regs.predicate))
1355 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1356 dst = tx->regs.predicate;
1357 break;
1358 case D3DSPR_TEMPFLOAT16:
1359 DBG("unhandled D3DSPR: %u\n", param->file);
1360 break;
1361 default:
1362 assert(!"invalid dst D3DSPR");
1363 break;
1364 }
1365 if (param->rel)
1366 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1367
1368 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1369 dst = ureg_writemask(dst, param->mask);
1370 if (param->mod & NINED3DSPDM_SATURATE)
1371 dst = ureg_saturate(dst);
1372
1373 if (tx->predicated_activated) {
1374 tx->regs.predicate_dst = dst;
1375 dst = tx->regs.predicate_tmp;
1376 }
1377
1378 return dst;
1379 }
1380
1381 static struct ureg_dst
1382 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1383 {
1384 if (param->shift) {
1385 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1386 return tx->regs.tdst;
1387 }
1388 return _tx_dst_param(tx, param);
1389 }
1390
1391 static void
1392 tx_apply_dst0_modifiers(struct shader_translator *tx)
1393 {
1394 struct ureg_dst rdst;
1395 float f;
1396
1397 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1398 return;
1399 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1400
1401 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1402
1403 if (tx->insn.dst[0].shift < 0)
1404 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1405 else
1406 f = 1 << tx->insn.dst[0].shift;
1407
1408 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1409 }
1410
1411 static struct ureg_src
1412 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1413 {
1414 struct ureg_src src;
1415
1416 assert(!param->shift);
1417 assert(!(param->mod & NINED3DSPDM_SATURATE));
1418
1419 switch (param->file) {
1420 case D3DSPR_INPUT:
1421 if (IS_VS) {
1422 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1423 } else {
1424 assert(!param->rel);
1425 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1426 src = tx->regs.v[param->idx];
1427 }
1428 break;
1429 default:
1430 src = ureg_src(tx_dst_param(tx, param));
1431 break;
1432 }
1433 if (param->rel)
1434 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1435
1436 if (!param->mask)
1437 WARN("mask is 0, using identity swizzle\n");
1438
1439 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1440 char s[4];
1441 int n;
1442 int c;
1443 for (n = 0, c = 0; c < 4; ++c)
1444 if (param->mask & (1 << c))
1445 s[n++] = c;
1446 assert(n);
1447 for (c = n; c < 4; ++c)
1448 s[c] = s[n - 1];
1449 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1450 }
1451 return src;
1452 }
1453
1454 static HRESULT
1455 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1456 {
1457 struct ureg_program *ureg = tx->ureg;
1458 struct ureg_dst dst;
1459 struct ureg_src src[2];
1460 struct sm1_src_param *src_mat = &tx->insn.src[1];
1461 unsigned i;
1462
1463 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1464 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1465
1466 for (i = 0; i < n; i++)
1467 {
1468 const unsigned m = (1 << i);
1469
1470 src[1] = tx_src_param(tx, src_mat);
1471 src_mat->idx++;
1472
1473 if (!(dst.WriteMask & m))
1474 continue;
1475
1476 /* XXX: src == dst case ? */
1477
1478 switch (k) {
1479 case 3:
1480 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1481 break;
1482 case 4:
1483 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1484 break;
1485 default:
1486 DBG("invalid operation: M%ux%u\n", m, n);
1487 break;
1488 }
1489 }
1490
1491 return D3D_OK;
1492 }
1493
1494 #define VNOTSUPPORTED 0, 0
1495 #define V(maj, min) (((maj) << 8) | (min))
1496
1497 static inline const char *
1498 d3dsio_to_string( unsigned opcode )
1499 {
1500 static const char *names[] = {
1501 "NOP",
1502 "MOV",
1503 "ADD",
1504 "SUB",
1505 "MAD",
1506 "MUL",
1507 "RCP",
1508 "RSQ",
1509 "DP3",
1510 "DP4",
1511 "MIN",
1512 "MAX",
1513 "SLT",
1514 "SGE",
1515 "EXP",
1516 "LOG",
1517 "LIT",
1518 "DST",
1519 "LRP",
1520 "FRC",
1521 "M4x4",
1522 "M4x3",
1523 "M3x4",
1524 "M3x3",
1525 "M3x2",
1526 "CALL",
1527 "CALLNZ",
1528 "LOOP",
1529 "RET",
1530 "ENDLOOP",
1531 "LABEL",
1532 "DCL",
1533 "POW",
1534 "CRS",
1535 "SGN",
1536 "ABS",
1537 "NRM",
1538 "SINCOS",
1539 "REP",
1540 "ENDREP",
1541 "IF",
1542 "IFC",
1543 "ELSE",
1544 "ENDIF",
1545 "BREAK",
1546 "BREAKC",
1547 "MOVA",
1548 "DEFB",
1549 "DEFI",
1550 NULL,
1551 NULL,
1552 NULL,
1553 NULL,
1554 NULL,
1555 NULL,
1556 NULL,
1557 NULL,
1558 NULL,
1559 NULL,
1560 NULL,
1561 NULL,
1562 NULL,
1563 NULL,
1564 NULL,
1565 "TEXCOORD",
1566 "TEXKILL",
1567 "TEX",
1568 "TEXBEM",
1569 "TEXBEML",
1570 "TEXREG2AR",
1571 "TEXREG2GB",
1572 "TEXM3x2PAD",
1573 "TEXM3x2TEX",
1574 "TEXM3x3PAD",
1575 "TEXM3x3TEX",
1576 NULL,
1577 "TEXM3x3SPEC",
1578 "TEXM3x3VSPEC",
1579 "EXPP",
1580 "LOGP",
1581 "CND",
1582 "DEF",
1583 "TEXREG2RGB",
1584 "TEXDP3TEX",
1585 "TEXM3x2DEPTH",
1586 "TEXDP3",
1587 "TEXM3x3",
1588 "TEXDEPTH",
1589 "CMP",
1590 "BEM",
1591 "DP2ADD",
1592 "DSX",
1593 "DSY",
1594 "TEXLDD",
1595 "SETP",
1596 "TEXLDL",
1597 "BREAKP"
1598 };
1599
1600 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1601
1602 switch (opcode) {
1603 case D3DSIO_PHASE: return "PHASE";
1604 case D3DSIO_COMMENT: return "COMMENT";
1605 case D3DSIO_END: return "END";
1606 default:
1607 return NULL;
1608 }
1609 }
1610
1611 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1612 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1613 (inst).vert_version.max | \
1614 (inst).frag_version.min | \
1615 (inst).frag_version.max)
1616
1617 #define SPECIAL(name) \
1618 NineTranslateInstruction_##name
1619
1620 #define DECL_SPECIAL(name) \
1621 static HRESULT \
1622 NineTranslateInstruction_##name( struct shader_translator *tx )
1623
1624 static HRESULT
1625 NineTranslateInstruction_Generic(struct shader_translator *);
1626
1627 DECL_SPECIAL(NOP)
1628 {
1629 /* Nothing to do. NOP was used to avoid hangs
1630 * with very old d3d drivers. */
1631 return D3D_OK;
1632 }
1633
1634 DECL_SPECIAL(SUB)
1635 {
1636 struct ureg_program *ureg = tx->ureg;
1637 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1638 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1639 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1640
1641 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1642 return D3D_OK;
1643 }
1644
1645 DECL_SPECIAL(ABS)
1646 {
1647 struct ureg_program *ureg = tx->ureg;
1648 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1649 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1650
1651 ureg_MOV(ureg, dst, ureg_abs(src));
1652 return D3D_OK;
1653 }
1654
1655 DECL_SPECIAL(XPD)
1656 {
1657 struct ureg_program *ureg = tx->ureg;
1658 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1659 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1660 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1661
1662 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1663 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1664 TGSI_SWIZZLE_X, 0),
1665 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1666 TGSI_SWIZZLE_Y, 0));
1667 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1668 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1669 TGSI_SWIZZLE_Y, 0),
1670 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1671 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1672 ureg_src(dst));
1673 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1674 ureg_imm1f(ureg, 1));
1675 return D3D_OK;
1676 }
1677
1678 DECL_SPECIAL(M4x4)
1679 {
1680 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1681 }
1682
1683 DECL_SPECIAL(M4x3)
1684 {
1685 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1686 }
1687
1688 DECL_SPECIAL(M3x4)
1689 {
1690 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1691 }
1692
1693 DECL_SPECIAL(M3x3)
1694 {
1695 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1696 }
1697
1698 DECL_SPECIAL(M3x2)
1699 {
1700 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1701 }
1702
1703 DECL_SPECIAL(CMP)
1704 {
1705 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1706 tx_src_param(tx, &tx->insn.src[0]),
1707 tx_src_param(tx, &tx->insn.src[2]),
1708 tx_src_param(tx, &tx->insn.src[1]));
1709 return D3D_OK;
1710 }
1711
1712 DECL_SPECIAL(CND)
1713 {
1714 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1715 struct ureg_dst cgt;
1716 struct ureg_src cnd;
1717
1718 /* the coissue flag was a tip for compilers to advise to
1719 * execute two operations at the same time, in cases
1720 * the two executions had same dst with different channels.
1721 * It has no effect on current hw. However it seems CND
1722 * is affected. The handling of this very specific case
1723 * handled below mimick wine behaviour */
1724 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1725 ureg_MOV(tx->ureg,
1726 dst, tx_src_param(tx, &tx->insn.src[1]));
1727 return D3D_OK;
1728 }
1729
1730 cnd = tx_src_param(tx, &tx->insn.src[0]);
1731 cgt = tx_scratch(tx);
1732
1733 if (tx->version.major == 1 && tx->version.minor < 4)
1734 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1735
1736 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1737
1738 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1739 tx_src_param(tx, &tx->insn.src[1]),
1740 tx_src_param(tx, &tx->insn.src[2]));
1741 return D3D_OK;
1742 }
1743
1744 DECL_SPECIAL(CALL)
1745 {
1746 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1747 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1748 return D3D_OK;
1749 }
1750
1751 DECL_SPECIAL(CALLNZ)
1752 {
1753 struct ureg_program *ureg = tx->ureg;
1754 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1755
1756 if (!tx->native_integers)
1757 ureg_IF(ureg, src, tx_cond(tx));
1758 else
1759 ureg_UIF(ureg, src, tx_cond(tx));
1760 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1761 tx_endcond(tx);
1762 ureg_ENDIF(ureg);
1763 return D3D_OK;
1764 }
1765
1766 DECL_SPECIAL(LOOP)
1767 {
1768 struct ureg_program *ureg = tx->ureg;
1769 unsigned *label;
1770 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1771 struct ureg_dst ctr;
1772 struct ureg_dst tmp;
1773 struct ureg_src ctrx;
1774
1775 label = tx_bgnloop(tx);
1776 ctr = tx_get_loopctr(tx, TRUE);
1777 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1778
1779 /* src: num_iterations - start_value of al - step for al - 0 */
1780 ureg_MOV(ureg, ctr, src);
1781 ureg_BGNLOOP(tx->ureg, label);
1782 tmp = tx_scratch_scalar(tx);
1783 /* Initially ctr.x contains the number of iterations.
1784 * ctr.y will contain the updated value of al.
1785 * We decrease ctr.x at the end of every iteration,
1786 * and stop when it reaches 0. */
1787
1788 if (!tx->native_integers) {
1789 /* case src and ctr contain floats */
1790 /* to avoid precision issue, we stop when ctr <= 0.5 */
1791 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1792 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1793 } else {
1794 /* case src and ctr contain integers */
1795 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1796 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1797 }
1798 ureg_BRK(ureg);
1799 tx_endcond(tx);
1800 ureg_ENDIF(ureg);
1801 return D3D_OK;
1802 }
1803
1804 DECL_SPECIAL(RET)
1805 {
1806 /* RET as a last instruction could be safely ignored.
1807 * Remove it to prevent crashes/warnings in case underlying
1808 * driver doesn't implement arbitrary returns.
1809 */
1810 if (*(tx->parse_next) != NINED3DSP_END) {
1811 ureg_RET(tx->ureg);
1812 }
1813 return D3D_OK;
1814 }
1815
1816 DECL_SPECIAL(ENDLOOP)
1817 {
1818 struct ureg_program *ureg = tx->ureg;
1819 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1820 struct ureg_dst dst_ctrx, dst_al;
1821 struct ureg_src src_ctr, al_counter;
1822
1823 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1824 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1825 src_ctr = ureg_src(ctr);
1826 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1827
1828 /* ctr.x -= 1
1829 * ctr.y (aL) += step */
1830 if (!tx->native_integers) {
1831 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1832 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1833 } else {
1834 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1835 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1836 }
1837 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1838 return D3D_OK;
1839 }
1840
1841 DECL_SPECIAL(LABEL)
1842 {
1843 unsigned k = tx->num_inst_labels;
1844 unsigned n = tx->insn.src[0].idx;
1845 assert(n < 2048);
1846 if (n >= k)
1847 tx->inst_labels = REALLOC(tx->inst_labels,
1848 k * sizeof(tx->inst_labels[0]),
1849 n * sizeof(tx->inst_labels[0]));
1850
1851 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1852 return D3D_OK;
1853 }
1854
1855 DECL_SPECIAL(SINCOS)
1856 {
1857 struct ureg_program *ureg = tx->ureg;
1858 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1859 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1860 struct ureg_dst tmp = tx_scratch_scalar(tx);
1861
1862 assert(!(dst.WriteMask & 0xc));
1863
1864 /* Copying to a temporary register avoids src/dst aliasing.
1865 * src is supposed to have replicated swizzle. */
1866 ureg_MOV(ureg, tmp, src);
1867
1868 /* z undefined, w untouched */
1869 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1870 tx_src_scalar(tmp));
1871 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1872 tx_src_scalar(tmp));
1873 return D3D_OK;
1874 }
1875
1876 DECL_SPECIAL(SGN)
1877 {
1878 ureg_SSG(tx->ureg,
1879 tx_dst_param(tx, &tx->insn.dst[0]),
1880 tx_src_param(tx, &tx->insn.src[0]));
1881 return D3D_OK;
1882 }
1883
1884 DECL_SPECIAL(REP)
1885 {
1886 struct ureg_program *ureg = tx->ureg;
1887 unsigned *label;
1888 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1889 struct ureg_dst ctr;
1890 struct ureg_dst tmp;
1891 struct ureg_src ctrx;
1892
1893 label = tx_bgnloop(tx);
1894 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1895 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1896
1897 /* NOTE: rep must be constant, so we don't have to save the count */
1898 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1899
1900 /* rep: num_iterations - 0 - 0 - 0 */
1901 ureg_MOV(ureg, ctr, rep);
1902 ureg_BGNLOOP(ureg, label);
1903 tmp = tx_scratch_scalar(tx);
1904 /* Initially ctr.x contains the number of iterations.
1905 * We decrease ctr.x at the end of every iteration,
1906 * and stop when it reaches 0. */
1907
1908 if (!tx->native_integers) {
1909 /* case src and ctr contain floats */
1910 /* to avoid precision issue, we stop when ctr <= 0.5 */
1911 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1912 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1913 } else {
1914 /* case src and ctr contain integers */
1915 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1916 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1917 }
1918 ureg_BRK(ureg);
1919 tx_endcond(tx);
1920 ureg_ENDIF(ureg);
1921
1922 return D3D_OK;
1923 }
1924
1925 DECL_SPECIAL(ENDREP)
1926 {
1927 struct ureg_program *ureg = tx->ureg;
1928 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1929 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1930 struct ureg_src src_ctr = ureg_src(ctr);
1931
1932 /* ctr.x -= 1 */
1933 if (!tx->native_integers)
1934 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1935 else
1936 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1937
1938 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1939 return D3D_OK;
1940 }
1941
1942 DECL_SPECIAL(ENDIF)
1943 {
1944 tx_endcond(tx);
1945 ureg_ENDIF(tx->ureg);
1946 return D3D_OK;
1947 }
1948
1949 DECL_SPECIAL(IF)
1950 {
1951 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1952
1953 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1954 ureg_UIF(tx->ureg, src, tx_cond(tx));
1955 else
1956 ureg_IF(tx->ureg, src, tx_cond(tx));
1957
1958 return D3D_OK;
1959 }
1960
1961 static inline unsigned
1962 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1963 {
1964 switch (flags) {
1965 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1966 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1967 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1968 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1969 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1970 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1971 default:
1972 assert(!"invalid comparison flags");
1973 return TGSI_OPCODE_SGT;
1974 }
1975 }
1976
1977 DECL_SPECIAL(IFC)
1978 {
1979 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1980 struct ureg_src src[2];
1981 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1982 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1983 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1984 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1985 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1986 return D3D_OK;
1987 }
1988
1989 DECL_SPECIAL(ELSE)
1990 {
1991 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1992 return D3D_OK;
1993 }
1994
1995 DECL_SPECIAL(BREAKC)
1996 {
1997 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1998 struct ureg_src src[2];
1999 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2000 src[0] = tx_src_param(tx, &tx->insn.src[0]);
2001 src[1] = tx_src_param(tx, &tx->insn.src[1]);
2002 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2003 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2004 ureg_BRK(tx->ureg);
2005 tx_endcond(tx);
2006 ureg_ENDIF(tx->ureg);
2007 return D3D_OK;
2008 }
2009
2010 static const char *sm1_declusage_names[] =
2011 {
2012 [D3DDECLUSAGE_POSITION] = "POSITION",
2013 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2014 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2015 [D3DDECLUSAGE_NORMAL] = "NORMAL",
2016 [D3DDECLUSAGE_PSIZE] = "PSIZE",
2017 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2018 [D3DDECLUSAGE_TANGENT] = "TANGENT",
2019 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2020 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2021 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2022 [D3DDECLUSAGE_COLOR] = "COLOR",
2023 [D3DDECLUSAGE_FOG] = "FOG",
2024 [D3DDECLUSAGE_DEPTH] = "DEPTH",
2025 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2026 };
2027
2028 static inline unsigned
2029 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2030 {
2031 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2032 }
2033
2034 static void
2035 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2036 boolean tc,
2037 struct sm1_semantic *dcl)
2038 {
2039 BYTE index = dcl->usage_idx;
2040
2041 /* For everything that is not matching to a TGSI_SEMANTIC_****,
2042 * we match to a TGSI_SEMANTIC_GENERIC with index.
2043 *
2044 * The index can be anything UINT16 and usage_idx is BYTE,
2045 * so we can fit everything. It doesn't matter if indices
2046 * are close together or low.
2047 *
2048 *
2049 * POSITION >= 1: 10 * index + 7
2050 * COLOR >= 2: 10 * (index-1) + 8
2051 * FOG: 16
2052 * TEXCOORD[0..15]: index
2053 * BLENDWEIGHT: 10 * index + 19
2054 * BLENDINDICES: 10 * index + 20
2055 * NORMAL: 10 * index + 21
2056 * TANGENT: 10 * index + 22
2057 * BINORMAL: 10 * index + 23
2058 * TESSFACTOR: 10 * index + 24
2059 */
2060
2061 switch (dcl->usage) {
2062 case D3DDECLUSAGE_POSITION:
2063 case D3DDECLUSAGE_POSITIONT:
2064 case D3DDECLUSAGE_DEPTH:
2065 if (index == 0) {
2066 sem->Name = TGSI_SEMANTIC_POSITION;
2067 sem->Index = 0;
2068 } else {
2069 sem->Name = TGSI_SEMANTIC_GENERIC;
2070 sem->Index = 10 * index + 7;
2071 }
2072 break;
2073 case D3DDECLUSAGE_COLOR:
2074 if (index < 2) {
2075 sem->Name = TGSI_SEMANTIC_COLOR;
2076 sem->Index = index;
2077 } else {
2078 sem->Name = TGSI_SEMANTIC_GENERIC;
2079 sem->Index = 10 * (index-1) + 8;
2080 }
2081 break;
2082 case D3DDECLUSAGE_FOG:
2083 assert(index == 0);
2084 sem->Name = TGSI_SEMANTIC_GENERIC;
2085 sem->Index = 16;
2086 break;
2087 case D3DDECLUSAGE_PSIZE:
2088 assert(index == 0);
2089 sem->Name = TGSI_SEMANTIC_PSIZE;
2090 sem->Index = 0;
2091 break;
2092 case D3DDECLUSAGE_TEXCOORD:
2093 assert(index < 16);
2094 if (index < 8 && tc)
2095 sem->Name = TGSI_SEMANTIC_TEXCOORD;
2096 else
2097 sem->Name = TGSI_SEMANTIC_GENERIC;
2098 sem->Index = index;
2099 break;
2100 case D3DDECLUSAGE_BLENDWEIGHT:
2101 sem->Name = TGSI_SEMANTIC_GENERIC;
2102 sem->Index = 10 * index + 19;
2103 break;
2104 case D3DDECLUSAGE_BLENDINDICES:
2105 sem->Name = TGSI_SEMANTIC_GENERIC;
2106 sem->Index = 10 * index + 20;
2107 break;
2108 case D3DDECLUSAGE_NORMAL:
2109 sem->Name = TGSI_SEMANTIC_GENERIC;
2110 sem->Index = 10 * index + 21;
2111 break;
2112 case D3DDECLUSAGE_TANGENT:
2113 sem->Name = TGSI_SEMANTIC_GENERIC;
2114 sem->Index = 10 * index + 22;
2115 break;
2116 case D3DDECLUSAGE_BINORMAL:
2117 sem->Name = TGSI_SEMANTIC_GENERIC;
2118 sem->Index = 10 * index + 23;
2119 break;
2120 case D3DDECLUSAGE_TESSFACTOR:
2121 sem->Name = TGSI_SEMANTIC_GENERIC;
2122 sem->Index = 10 * index + 24;
2123 break;
2124 case D3DDECLUSAGE_SAMPLE:
2125 sem->Name = TGSI_SEMANTIC_COUNT;
2126 sem->Index = 0;
2127 break;
2128 default:
2129 unreachable("Invalid DECLUSAGE.");
2130 break;
2131 }
2132 }
2133
2134 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2135 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2136 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2137 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2138 static inline unsigned
2139 d3dstt_to_tgsi_tex(BYTE sampler_type)
2140 {
2141 switch (sampler_type) {
2142 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2143 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2144 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2145 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2146 default:
2147 assert(0);
2148 return TGSI_TEXTURE_UNKNOWN;
2149 }
2150 }
2151 static inline unsigned
2152 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2153 {
2154 switch (sampler_type) {
2155 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2156 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2157 case NINED3DSTT_VOLUME:
2158 case NINED3DSTT_CUBE:
2159 default:
2160 assert(0);
2161 return TGSI_TEXTURE_UNKNOWN;
2162 }
2163 }
2164 static inline unsigned
2165 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2166 {
2167 boolean shadow = !!(info->sampler_mask_shadow & (1 << stage));
2168 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2169 case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2170 case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2171 case 3: return TGSI_TEXTURE_3D;
2172 default:
2173 return TGSI_TEXTURE_CUBE;
2174 }
2175 }
2176
2177 static const char *
2178 sm1_sampler_type_name(BYTE sampler_type)
2179 {
2180 switch (sampler_type) {
2181 case NINED3DSTT_1D: return "1D";
2182 case NINED3DSTT_2D: return "2D";
2183 case NINED3DSTT_VOLUME: return "VOLUME";
2184 case NINED3DSTT_CUBE: return "CUBE";
2185 default:
2186 return "(D3DSTT_?)";
2187 }
2188 }
2189
2190 static inline unsigned
2191 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2192 {
2193 switch (sem->Name) {
2194 case TGSI_SEMANTIC_POSITION:
2195 case TGSI_SEMANTIC_NORMAL:
2196 return TGSI_INTERPOLATE_LINEAR;
2197 case TGSI_SEMANTIC_BCOLOR:
2198 case TGSI_SEMANTIC_COLOR:
2199 return TGSI_INTERPOLATE_COLOR;
2200 case TGSI_SEMANTIC_FOG:
2201 case TGSI_SEMANTIC_GENERIC:
2202 case TGSI_SEMANTIC_TEXCOORD:
2203 case TGSI_SEMANTIC_CLIPDIST:
2204 case TGSI_SEMANTIC_CLIPVERTEX:
2205 return TGSI_INTERPOLATE_PERSPECTIVE;
2206 case TGSI_SEMANTIC_EDGEFLAG:
2207 case TGSI_SEMANTIC_FACE:
2208 case TGSI_SEMANTIC_INSTANCEID:
2209 case TGSI_SEMANTIC_PCOORD:
2210 case TGSI_SEMANTIC_PRIMID:
2211 case TGSI_SEMANTIC_PSIZE:
2212 case TGSI_SEMANTIC_VERTEXID:
2213 return TGSI_INTERPOLATE_CONSTANT;
2214 default:
2215 assert(0);
2216 return TGSI_INTERPOLATE_CONSTANT;
2217 }
2218 }
2219
2220 DECL_SPECIAL(DCL)
2221 {
2222 struct ureg_program *ureg = tx->ureg;
2223 boolean is_input;
2224 boolean is_sampler;
2225 struct tgsi_declaration_semantic tgsi;
2226 struct sm1_semantic sem;
2227 sm1_read_semantic(tx, &sem);
2228
2229 is_input = sem.reg.file == D3DSPR_INPUT;
2230 is_sampler =
2231 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2232
2233 DUMP("DCL ");
2234 sm1_dump_dst_param(&sem.reg);
2235 if (is_sampler)
2236 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2237 else
2238 if (tx->version.major >= 3)
2239 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2240 else
2241 if (sem.usage | sem.usage_idx)
2242 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2243 else
2244 DUMP("\n");
2245
2246 if (is_sampler) {
2247 const unsigned m = 1 << sem.reg.idx;
2248 ureg_DECL_sampler(ureg, sem.reg.idx);
2249 tx->info->sampler_mask |= m;
2250 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2251 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2252 d3dstt_to_tgsi_tex(sem.sampler_type);
2253 return D3D_OK;
2254 }
2255
2256 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2257 if (IS_VS) {
2258 if (is_input) {
2259 /* linkage outside of shader with vertex declaration */
2260 ureg_DECL_vs_input(ureg, sem.reg.idx);
2261 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2262 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2263 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2264 /* NOTE: preserving order in case of indirect access */
2265 } else
2266 if (tx->version.major >= 3) {
2267 /* SM2 output semantic determined by file */
2268 assert(sem.reg.mask != 0);
2269 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2270 tx->info->position_t = TRUE;
2271 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2272 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2273 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2274 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2275 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2276 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2277 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2278 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2279 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2280 }
2281
2282 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2283 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2284 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2285 }
2286 }
2287 } else {
2288 if (is_input && tx->version.major >= 3) {
2289 unsigned interp_location = 0;
2290 /* SM3 only, SM2 input semantic determined by file */
2291 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2292 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2293 /* PositionT and tessfactor forbidden */
2294 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2295 return D3DERR_INVALIDCALL;
2296
2297 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2298 /* Position0 is forbidden (likely because vPos already does that) */
2299 if (sem.usage == D3DDECLUSAGE_POSITION)
2300 return D3DERR_INVALIDCALL;
2301 /* Following code is for depth */
2302 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2303 return D3D_OK;
2304 }
2305
2306 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2307 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2308 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2309
2310 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2311 ureg, tgsi.Name, tgsi.Index,
2312 nine_tgsi_to_interp_mode(&tgsi),
2313 0, /* cylwrap */
2314 interp_location, 0, 1);
2315 } else
2316 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2317 /* FragColor or FragDepth */
2318 assert(sem.reg.mask != 0);
2319 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2320 0, 1);
2321 }
2322 }
2323 return D3D_OK;
2324 }
2325
2326 DECL_SPECIAL(DEF)
2327 {
2328 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2329 return D3D_OK;
2330 }
2331
2332 DECL_SPECIAL(DEFB)
2333 {
2334 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2335 return D3D_OK;
2336 }
2337
2338 DECL_SPECIAL(DEFI)
2339 {
2340 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2341 return D3D_OK;
2342 }
2343
2344 DECL_SPECIAL(POW)
2345 {
2346 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2347 struct ureg_src src[2] = {
2348 tx_src_param(tx, &tx->insn.src[0]),
2349 tx_src_param(tx, &tx->insn.src[1])
2350 };
2351 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2352 return D3D_OK;
2353 }
2354
2355 /* Tests results on Win 10:
2356 * NV (NVIDIA GeForce GT 635M)
2357 * AMD (AMD Radeon HD 7730M)
2358 * INTEL (Intel(R) HD Graphics 4000)
2359 * PS2 and PS3:
2360 * RCP and RSQ can generate inf on NV and AMD.
2361 * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2362 * NV: log not clamped
2363 * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2364 * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2365 * All devices have 0*anything = 0
2366 *
2367 * INTEL VS2 and VS3: same behaviour.
2368 * Some differences VS2 and VS3 for constants defined with inf/NaN.
2369 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2370 * VS2 seems to clamp to zero (may be test failure).
2371 * AMD VS2: unknown, VS3: very likely behaviour of PS3
2372 * NV VS2 and VS3: very likely behaviour of PS3
2373 * For both, Inf in VS becomes NaN is PS
2374 * "Very likely" because the test was less extensive.
2375 *
2376 * Thus all clamping can be removed for shaders 2 and 3,
2377 * as long as 0*anything = 0.
2378 * Else clamps to enforce 0*anything = 0 (anything being then
2379 * neither inf or NaN, the user being unlikely to pass them
2380 * as constant).
2381 * The status for VS1 and PS1 is unknown.
2382 */
2383
2384 DECL_SPECIAL(RCP)
2385 {
2386 struct ureg_program *ureg = tx->ureg;
2387 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2388 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2389 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2390 ureg_RCP(ureg, tmp, src);
2391 if (!tx->mul_zero_wins) {
2392 /* FLT_MAX has issues with Rayman */
2393 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2394 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2395 }
2396 return D3D_OK;
2397 }
2398
2399 DECL_SPECIAL(RSQ)
2400 {
2401 struct ureg_program *ureg = tx->ureg;
2402 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2403 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2404 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2405 ureg_RSQ(ureg, tmp, ureg_abs(src));
2406 if (!tx->mul_zero_wins)
2407 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2408 return D3D_OK;
2409 }
2410
2411 DECL_SPECIAL(LOG)
2412 {
2413 struct ureg_program *ureg = tx->ureg;
2414 struct ureg_dst tmp = tx_scratch_scalar(tx);
2415 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2416 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2417 ureg_LG2(ureg, tmp, ureg_abs(src));
2418 if (tx->mul_zero_wins) {
2419 ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2420 } else {
2421 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2422 }
2423 return D3D_OK;
2424 }
2425
2426 DECL_SPECIAL(LIT)
2427 {
2428 struct ureg_program *ureg = tx->ureg;
2429 struct ureg_dst tmp = tx_scratch(tx);
2430 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2431 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2432 ureg_LIT(ureg, tmp, src);
2433 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2434 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2435 * it 0^0 if src.w=0, which value is driver dependent. */
2436 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2437 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2438 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2439 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2440 return D3D_OK;
2441 }
2442
2443 DECL_SPECIAL(NRM)
2444 {
2445 struct ureg_program *ureg = tx->ureg;
2446 struct ureg_dst tmp = tx_scratch_scalar(tx);
2447 struct ureg_src nrm = tx_src_scalar(tmp);
2448 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2449 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2450 ureg_DP3(ureg, tmp, src, src);
2451 ureg_RSQ(ureg, tmp, nrm);
2452 if (!tx->mul_zero_wins)
2453 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2454 ureg_MUL(ureg, dst, src, nrm);
2455 return D3D_OK;
2456 }
2457
2458 DECL_SPECIAL(DP2ADD)
2459 {
2460 struct ureg_dst tmp = tx_scratch_scalar(tx);
2461 struct ureg_src dp2 = tx_src_scalar(tmp);
2462 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2463 struct ureg_src src[3];
2464 int i;
2465 for (i = 0; i < 3; ++i)
2466 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2467 assert_replicate_swizzle(&src[2]);
2468
2469 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2470 ureg_ADD(tx->ureg, dst, src[2], dp2);
2471
2472 return D3D_OK;
2473 }
2474
2475 DECL_SPECIAL(TEXCOORD)
2476 {
2477 struct ureg_program *ureg = tx->ureg;
2478 const unsigned s = tx->insn.dst[0].idx;
2479 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2480
2481 tx_texcoord_alloc(tx, s);
2482 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2483 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2484
2485 return D3D_OK;
2486 }
2487
2488 DECL_SPECIAL(TEXCOORD_ps14)
2489 {
2490 struct ureg_program *ureg = tx->ureg;
2491 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2492 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2493
2494 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2495
2496 ureg_MOV(ureg, dst, src);
2497
2498 return D3D_OK;
2499 }
2500
2501 DECL_SPECIAL(TEXKILL)
2502 {
2503 struct ureg_src reg;
2504
2505 if (tx->version.major > 1 || tx->version.minor > 3) {
2506 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2507 } else {
2508 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2509 reg = tx->regs.vT[tx->insn.dst[0].idx];
2510 }
2511 if (tx->version.major < 2)
2512 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2513 ureg_KILL_IF(tx->ureg, reg);
2514
2515 return D3D_OK;
2516 }
2517
2518 DECL_SPECIAL(TEXBEM)
2519 {
2520 struct ureg_program *ureg = tx->ureg;
2521 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2522 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2523 struct ureg_dst tmp, tmp2, texcoord;
2524 struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2525 struct ureg_src bumpenvlscale, bumpenvloffset;
2526 const int m = tx->insn.dst[0].idx;
2527
2528 assert(tx->version.major == 1);
2529
2530 sample = ureg_DECL_sampler(ureg, m);
2531 tx->info->sampler_mask |= 1 << m;
2532
2533 tx_texcoord_alloc(tx, m);
2534
2535 tmp = tx_scratch(tx);
2536 tmp2 = tx_scratch(tx);
2537 texcoord = tx_scratch(tx);
2538 /*
2539 * Bump-env-matrix:
2540 * 00 is X
2541 * 01 is Y
2542 * 10 is Z
2543 * 11 is W
2544 */
2545 c8m = nine_float_constant_src(tx, 8+m);
2546 c16m2 = nine_float_constant_src(tx, 8+8+m/2);
2547
2548 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2549 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2550 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2551 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2552
2553 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2554 if (m % 2 == 0) {
2555 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2556 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2557 } else {
2558 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2559 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2560 }
2561
2562 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2563
2564 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2565 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2566 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2567 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2568 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2569 NINE_APPLY_SWIZZLE(src, Y),
2570 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2571
2572 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2573 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2574 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2575 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2576 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2577 NINE_APPLY_SWIZZLE(src, Y),
2578 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2579
2580 /* Now the texture coordinates are in tmp.xy */
2581
2582 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2583 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2584 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2585 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2586 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2587 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2588 bumpenvlscale, bumpenvloffset);
2589 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2590 }
2591
2592 tx->info->bumpenvmat_needed = 1;
2593
2594 return D3D_OK;
2595 }
2596
2597 DECL_SPECIAL(TEXREG2AR)
2598 {
2599 struct ureg_program *ureg = tx->ureg;
2600 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2601 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2602 struct ureg_src sample;
2603 const int m = tx->insn.dst[0].idx;
2604 ASSERTED const int n = tx->insn.src[0].idx;
2605 assert(m >= 0 && m > n);
2606
2607 sample = ureg_DECL_sampler(ureg, m);
2608 tx->info->sampler_mask |= 1 << m;
2609 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2610
2611 return D3D_OK;
2612 }
2613
2614 DECL_SPECIAL(TEXREG2GB)
2615 {
2616 struct ureg_program *ureg = tx->ureg;
2617 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2618 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2619 struct ureg_src sample;
2620 const int m = tx->insn.dst[0].idx;
2621 ASSERTED const int n = tx->insn.src[0].idx;
2622 assert(m >= 0 && m > n);
2623
2624 sample = ureg_DECL_sampler(ureg, m);
2625 tx->info->sampler_mask |= 1 << m;
2626 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2627
2628 return D3D_OK;
2629 }
2630
2631 DECL_SPECIAL(TEXM3x2PAD)
2632 {
2633 return D3D_OK; /* this is just padding */
2634 }
2635
2636 DECL_SPECIAL(TEXM3x2TEX)
2637 {
2638 struct ureg_program *ureg = tx->ureg;
2639 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2640 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2641 struct ureg_src sample;
2642 const int m = tx->insn.dst[0].idx - 1;
2643 ASSERTED const int n = tx->insn.src[0].idx;
2644 assert(m >= 0 && m > n);
2645
2646 tx_texcoord_alloc(tx, m);
2647 tx_texcoord_alloc(tx, m+1);
2648
2649 /* performs the matrix multiplication */
2650 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2651 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2652
2653 sample = ureg_DECL_sampler(ureg, m + 1);
2654 tx->info->sampler_mask |= 1 << (m + 1);
2655 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2656
2657 return D3D_OK;
2658 }
2659
2660 DECL_SPECIAL(TEXM3x3PAD)
2661 {
2662 return D3D_OK; /* this is just padding */
2663 }
2664
2665 DECL_SPECIAL(TEXM3x3SPEC)
2666 {
2667 struct ureg_program *ureg = tx->ureg;
2668 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2669 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2670 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2671 struct ureg_src sample;
2672 struct ureg_dst tmp;
2673 const int m = tx->insn.dst[0].idx - 2;
2674 ASSERTED const int n = tx->insn.src[0].idx;
2675 assert(m >= 0 && m > n);
2676
2677 tx_texcoord_alloc(tx, m);
2678 tx_texcoord_alloc(tx, m+1);
2679 tx_texcoord_alloc(tx, m+2);
2680
2681 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2682 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2683 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2684
2685 sample = ureg_DECL_sampler(ureg, m + 2);
2686 tx->info->sampler_mask |= 1 << (m + 2);
2687 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2688
2689 /* At this step, dst = N = (u', w', z').
2690 * We want dst to be the texture sampled at (u'', w'', z''), with
2691 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2692 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2693 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2694 /* at this step tmp.x = 1/N.N */
2695 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2696 /* at this step tmp.y = N.E */
2697 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2698 /* at this step tmp.x = N.E/N.N */
2699 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2700 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2701 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2702 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2703 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2704
2705 return D3D_OK;
2706 }
2707
2708 DECL_SPECIAL(TEXREG2RGB)
2709 {
2710 struct ureg_program *ureg = tx->ureg;
2711 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2712 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2713 struct ureg_src sample;
2714 const int m = tx->insn.dst[0].idx;
2715 ASSERTED const int n = tx->insn.src[0].idx;
2716 assert(m >= 0 && m > n);
2717
2718 sample = ureg_DECL_sampler(ureg, m);
2719 tx->info->sampler_mask |= 1 << m;
2720 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2721
2722 return D3D_OK;
2723 }
2724
2725 DECL_SPECIAL(TEXDP3TEX)
2726 {
2727 struct ureg_program *ureg = tx->ureg;
2728 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2729 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2730 struct ureg_dst tmp;
2731 struct ureg_src sample;
2732 const int m = tx->insn.dst[0].idx;
2733 ASSERTED const int n = tx->insn.src[0].idx;
2734 assert(m >= 0 && m > n);
2735
2736 tx_texcoord_alloc(tx, m);
2737
2738 tmp = tx_scratch(tx);
2739 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2740 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2741
2742 sample = ureg_DECL_sampler(ureg, m);
2743 tx->info->sampler_mask |= 1 << m;
2744 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2745
2746 return D3D_OK;
2747 }
2748
2749 DECL_SPECIAL(TEXM3x2DEPTH)
2750 {
2751 struct ureg_program *ureg = tx->ureg;
2752 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2753 struct ureg_dst tmp;
2754 const int m = tx->insn.dst[0].idx - 1;
2755 ASSERTED const int n = tx->insn.src[0].idx;
2756 assert(m >= 0 && m > n);
2757
2758 tx_texcoord_alloc(tx, m);
2759 tx_texcoord_alloc(tx, m+1);
2760
2761 tmp = tx_scratch(tx);
2762
2763 /* performs the matrix multiplication */
2764 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2765 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2766
2767 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2768 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2769 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2770 /* res = 'w' == 0 ? 1.0 : z/w */
2771 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2772 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2773 /* replace the depth for depth testing with the result */
2774 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2775 TGSI_WRITEMASK_Z, 0, 1);
2776 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2777 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2778 return D3D_OK;
2779 }
2780
2781 DECL_SPECIAL(TEXDP3)
2782 {
2783 struct ureg_program *ureg = tx->ureg;
2784 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2785 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2786 const int m = tx->insn.dst[0].idx;
2787 ASSERTED const int n = tx->insn.src[0].idx;
2788 assert(m >= 0 && m > n);
2789
2790 tx_texcoord_alloc(tx, m);
2791
2792 ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2793
2794 return D3D_OK;
2795 }
2796
2797 DECL_SPECIAL(TEXM3x3)
2798 {
2799 struct ureg_program *ureg = tx->ureg;
2800 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2801 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2802 struct ureg_src sample;
2803 struct ureg_dst E, tmp;
2804 const int m = tx->insn.dst[0].idx - 2;
2805 ASSERTED const int n = tx->insn.src[0].idx;
2806 assert(m >= 0 && m > n);
2807
2808 tx_texcoord_alloc(tx, m);
2809 tx_texcoord_alloc(tx, m+1);
2810 tx_texcoord_alloc(tx, m+2);
2811
2812 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2813 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2814 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2815
2816 switch (tx->insn.opcode) {
2817 case D3DSIO_TEXM3x3:
2818 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2819 break;
2820 case D3DSIO_TEXM3x3TEX:
2821 sample = ureg_DECL_sampler(ureg, m + 2);
2822 tx->info->sampler_mask |= 1 << (m + 2);
2823 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2824 break;
2825 case D3DSIO_TEXM3x3VSPEC:
2826 sample = ureg_DECL_sampler(ureg, m + 2);
2827 tx->info->sampler_mask |= 1 << (m + 2);
2828 E = tx_scratch(tx);
2829 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2830 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2831 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2832 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2833 /* At this step, dst = N = (u', w', z').
2834 * We want dst to be the texture sampled at (u'', w'', z''), with
2835 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2836 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2837 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2838 /* at this step tmp.x = 1/N.N */
2839 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2840 /* at this step tmp.y = N.E */
2841 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2842 /* at this step tmp.x = N.E/N.N */
2843 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2844 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2845 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2846 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2847 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2848 break;
2849 default:
2850 return D3DERR_INVALIDCALL;
2851 }
2852 return D3D_OK;
2853 }
2854
2855 DECL_SPECIAL(TEXDEPTH)
2856 {
2857 struct ureg_program *ureg = tx->ureg;
2858 struct ureg_dst r5;
2859 struct ureg_src r5r, r5g;
2860
2861 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2862
2863 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2864 * r5 won't be used afterward, thus we can use r5.ba */
2865 r5 = tx->regs.r[5];
2866 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2867 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2868
2869 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2870 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2871 /* r5.r = r/g */
2872 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2873 r5r, ureg_imm1f(ureg, 1.0f));
2874 /* replace the depth for depth testing with the result */
2875 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2876 TGSI_WRITEMASK_Z, 0, 1);
2877 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2878
2879 return D3D_OK;
2880 }
2881
2882 DECL_SPECIAL(BEM)
2883 {
2884 struct ureg_program *ureg = tx->ureg;
2885 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2886 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2887 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2888 struct ureg_src m00, m01, m10, m11, c8m;
2889 const int m = tx->insn.dst[0].idx;
2890 struct ureg_dst tmp;
2891 /*
2892 * Bump-env-matrix:
2893 * 00 is X
2894 * 01 is Y
2895 * 10 is Z
2896 * 11 is W
2897 */
2898 c8m = nine_float_constant_src(tx, 8+m);
2899 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2900 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2901 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2902 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2903 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2904 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2905 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2906 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2907 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2908 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2909
2910 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2911 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2912 NINE_APPLY_SWIZZLE(src1, X), src0);
2913 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2914 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2915 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2916 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2917
2918 tx->info->bumpenvmat_needed = 1;
2919
2920 return D3D_OK;
2921 }
2922
2923 DECL_SPECIAL(TEXLD)
2924 {
2925 struct ureg_program *ureg = tx->ureg;
2926 unsigned target;
2927 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2928 struct ureg_src src[2] = {
2929 tx_src_param(tx, &tx->insn.src[0]),
2930 tx_src_param(tx, &tx->insn.src[1])
2931 };
2932 assert(tx->insn.src[1].idx >= 0 &&
2933 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2934 target = tx->sampler_targets[tx->insn.src[1].idx];
2935
2936 switch (tx->insn.flags) {
2937 case 0:
2938 ureg_TEX(ureg, dst, target, src[0], src[1]);
2939 break;
2940 case NINED3DSI_TEXLD_PROJECT:
2941 ureg_TXP(ureg, dst, target, src[0], src[1]);
2942 break;
2943 case NINED3DSI_TEXLD_BIAS:
2944 ureg_TXB(ureg, dst, target, src[0], src[1]);
2945 break;
2946 default:
2947 assert(0);
2948 return D3DERR_INVALIDCALL;
2949 }
2950 return D3D_OK;
2951 }
2952
2953 DECL_SPECIAL(TEXLD_14)
2954 {
2955 struct ureg_program *ureg = tx->ureg;
2956 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2957 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2958 const unsigned s = tx->insn.dst[0].idx;
2959 const unsigned t = ps1x_sampler_type(tx->info, s);
2960
2961 tx->info->sampler_mask |= 1 << s;
2962 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2963
2964 return D3D_OK;
2965 }
2966
2967 DECL_SPECIAL(TEX)
2968 {
2969 struct ureg_program *ureg = tx->ureg;
2970 const unsigned s = tx->insn.dst[0].idx;
2971 const unsigned t = ps1x_sampler_type(tx->info, s);
2972 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2973 struct ureg_src src[2];
2974
2975 tx_texcoord_alloc(tx, s);
2976
2977 src[0] = tx->regs.vT[s];
2978 src[1] = ureg_DECL_sampler(ureg, s);
2979 tx->info->sampler_mask |= 1 << s;
2980
2981 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2982
2983 return D3D_OK;
2984 }
2985
2986 DECL_SPECIAL(TEXLDD)
2987 {
2988 unsigned target;
2989 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2990 struct ureg_src src[4] = {
2991 tx_src_param(tx, &tx->insn.src[0]),
2992 tx_src_param(tx, &tx->insn.src[1]),
2993 tx_src_param(tx, &tx->insn.src[2]),
2994 tx_src_param(tx, &tx->insn.src[3])
2995 };
2996 assert(tx->insn.src[1].idx >= 0 &&
2997 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2998 target = tx->sampler_targets[tx->insn.src[1].idx];
2999
3000 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3001 return D3D_OK;
3002 }
3003
3004 DECL_SPECIAL(TEXLDL)
3005 {
3006 unsigned target;
3007 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3008 struct ureg_src src[2] = {
3009 tx_src_param(tx, &tx->insn.src[0]),
3010 tx_src_param(tx, &tx->insn.src[1])
3011 };
3012 assert(tx->insn.src[1].idx >= 0 &&
3013 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3014 target = tx->sampler_targets[tx->insn.src[1].idx];
3015
3016 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3017 return D3D_OK;
3018 }
3019
3020 DECL_SPECIAL(SETP)
3021 {
3022 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3023 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3024 struct ureg_src src[2] = {
3025 tx_src_param(tx, &tx->insn.src[0]),
3026 tx_src_param(tx, &tx->insn.src[1])
3027 };
3028 ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3029 return D3D_OK;
3030 }
3031
3032 DECL_SPECIAL(BREAKP)
3033 {
3034 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3035 ureg_IF(tx->ureg, src, tx_cond(tx));
3036 ureg_BRK(tx->ureg);
3037 tx_endcond(tx);
3038 ureg_ENDIF(tx->ureg);
3039 return D3D_OK;
3040 }
3041
3042 DECL_SPECIAL(PHASE)
3043 {
3044 return D3D_OK; /* we don't care about phase */
3045 }
3046
3047 DECL_SPECIAL(COMMENT)
3048 {
3049 return D3D_OK; /* nothing to do */
3050 }
3051
3052
3053 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3054 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3055
3056 static const struct sm1_op_info inst_table[] =
3057 {
3058 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3059 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3060 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3061 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3062 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3063 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3064 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3065 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3066 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3067 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3068 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3069 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3070 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3071 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3072 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3073 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3074 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3075 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3076 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3077 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3078
3079 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3080 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3081 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3082 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3083 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3084
3085 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3086 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3087 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3088 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3089 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3090 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3091
3092 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3093
3094 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3095 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3096 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3097 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3098 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3099
3100 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3101 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3102
3103 /* More flow control */
3104 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3105 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3106 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3107 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3108 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3109 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3110 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3111 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3112 /* we don't write to the address register, but a normal register (copied
3113 * when needed to the address register), thus we don't use ARR */
3114 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3115
3116 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3117 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3118
3119 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3120 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3121 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3122 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3123 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3124 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3125 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3126 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3127 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3128 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3129 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3130 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3131 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3132 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3133 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3134 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3135
3136 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3137 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3138 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3139 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3140
3141 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3142
3143 /* More tex stuff */
3144 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3145 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3146 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3147 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3148 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3149 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3150
3151 /* Misc */
3152 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3153 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3154 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3155 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3156 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3157 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3158 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3159 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3160 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3161 };
3162
3163 static const struct sm1_op_info inst_phase =
3164 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3165
3166 static const struct sm1_op_info inst_comment =
3167 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3168
3169 static void
3170 create_op_info_map(struct shader_translator *tx)
3171 {
3172 const unsigned version = (tx->version.major << 8) | tx->version.minor;
3173 unsigned i;
3174
3175 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3176 tx->op_info_map[i] = -1;
3177
3178 if (tx->processor == PIPE_SHADER_VERTEX) {
3179 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3180 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3181 if (inst_table[i].vert_version.min <= version &&
3182 inst_table[i].vert_version.max >= version)
3183 tx->op_info_map[inst_table[i].sio] = i;
3184 }
3185 } else {
3186 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3187 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3188 if (inst_table[i].frag_version.min <= version &&
3189 inst_table[i].frag_version.max >= version)
3190 tx->op_info_map[inst_table[i].sio] = i;
3191 }
3192 }
3193 }
3194
3195 static inline HRESULT
3196 NineTranslateInstruction_Generic(struct shader_translator *tx)
3197 {
3198 struct ureg_dst dst[1];
3199 struct ureg_src src[4];
3200 unsigned i;
3201
3202 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3203 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3204 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3205 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3206
3207 ureg_insn(tx->ureg, tx->insn.info->opcode,
3208 dst, tx->insn.ndst,
3209 src, tx->insn.nsrc, 0);
3210 return D3D_OK;
3211 }
3212
3213 static inline DWORD
3214 TOKEN_PEEK(struct shader_translator *tx)
3215 {
3216 return *(tx->parse);
3217 }
3218
3219 static inline DWORD
3220 TOKEN_NEXT(struct shader_translator *tx)
3221 {
3222 return *(tx->parse)++;
3223 }
3224
3225 static inline void
3226 TOKEN_JUMP(struct shader_translator *tx)
3227 {
3228 if (tx->parse_next && tx->parse != tx->parse_next) {
3229 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3230 tx->parse = tx->parse_next;
3231 }
3232 }
3233
3234 static inline boolean
3235 sm1_parse_eof(struct shader_translator *tx)
3236 {
3237 return TOKEN_PEEK(tx) == NINED3DSP_END;
3238 }
3239
3240 static void
3241 sm1_read_version(struct shader_translator *tx)
3242 {
3243 const DWORD tok = TOKEN_NEXT(tx);
3244
3245 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3246 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3247
3248 switch (tok >> 16) {
3249 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3250 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3251 default:
3252 DBG("Invalid shader type: %x\n", tok);
3253 tx->processor = ~0;
3254 break;
3255 }
3256 }
3257
3258 /* This is just to check if we parsed the instruction properly. */
3259 static void
3260 sm1_parse_get_skip(struct shader_translator *tx)
3261 {
3262 const DWORD tok = TOKEN_PEEK(tx);
3263
3264 if (tx->version.major >= 2) {
3265 tx->parse_next = tx->parse + 1 /* this */ +
3266 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3267 } else {
3268 tx->parse_next = NULL; /* TODO: determine from param count */
3269 }
3270 }
3271
3272 static void
3273 sm1_print_comment(const char *comment, UINT size)
3274 {
3275 if (!size)
3276 return;
3277 /* TODO */
3278 }
3279
3280 static void
3281 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3282 {
3283 DWORD tok = TOKEN_PEEK(tx);
3284
3285 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3286 {
3287 const char *comment = "";
3288 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3289 tx->parse += size + 1;
3290
3291 if (print)
3292 sm1_print_comment(comment, size);
3293
3294 tok = TOKEN_PEEK(tx);
3295 }
3296 }
3297
3298 static void
3299 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3300 {
3301 *reg = TOKEN_NEXT(tx);
3302
3303 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3304 {
3305 if (tx->version.major < 2)
3306 *rel = (1 << 31) |
3307 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3308 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3309 D3DSP_NOSWIZZLE;
3310 else
3311 *rel = TOKEN_NEXT(tx);
3312 }
3313 }
3314
3315 static void
3316 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3317 {
3318 int8_t shift;
3319 dst->file =
3320 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3321 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3322 dst->type = TGSI_RETURN_TYPE_FLOAT;
3323 dst->idx = tok & D3DSP_REGNUM_MASK;
3324 dst->rel = NULL;
3325 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3326 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3327 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3328 dst->shift = (shift & 0x7) - (shift & 0x8);
3329 }
3330
3331 static void
3332 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3333 {
3334 src->file =
3335 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3336 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3337 src->type = TGSI_RETURN_TYPE_FLOAT;
3338 src->idx = tok & D3DSP_REGNUM_MASK;
3339 src->rel = NULL;
3340 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3341 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3342
3343 switch (src->file) {
3344 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3345 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3346 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3347 default:
3348 break;
3349 }
3350 }
3351
3352 static void
3353 sm1_parse_immediate(struct shader_translator *tx,
3354 struct sm1_src_param *imm)
3355 {
3356 imm->file = NINED3DSPR_IMMEDIATE;
3357 imm->idx = INT_MIN;
3358 imm->rel = NULL;
3359 imm->swizzle = NINED3DSP_NOSWIZZLE;
3360 imm->mod = 0;
3361 switch (tx->insn.opcode) {
3362 case D3DSIO_DEF:
3363 imm->type = NINED3DSPTYPE_FLOAT4;
3364 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3365 tx->parse += 4;
3366 break;
3367 case D3DSIO_DEFI:
3368 imm->type = NINED3DSPTYPE_INT4;
3369 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3370 tx->parse += 4;
3371 break;
3372 case D3DSIO_DEFB:
3373 imm->type = NINED3DSPTYPE_BOOL;
3374 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3375 tx->parse += 1;
3376 break;
3377 default:
3378 assert(0);
3379 break;
3380 }
3381 }
3382
3383 static void
3384 sm1_read_dst_param(struct shader_translator *tx,
3385 struct sm1_dst_param *dst,
3386 struct sm1_src_param *rel)
3387 {
3388 DWORD tok_dst, tok_rel = 0;
3389
3390 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3391 sm1_parse_dst_param(dst, tok_dst);
3392 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3393 sm1_parse_src_param(rel, tok_rel);
3394 dst->rel = rel;
3395 }
3396 }
3397
3398 static void
3399 sm1_read_src_param(struct shader_translator *tx,
3400 struct sm1_src_param *src,
3401 struct sm1_src_param *rel)
3402 {
3403 DWORD tok_src, tok_rel = 0;
3404
3405 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3406 sm1_parse_src_param(src, tok_src);
3407 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3408 assert(rel);
3409 sm1_parse_src_param(rel, tok_rel);
3410 src->rel = rel;
3411 }
3412 }
3413
3414 static void
3415 sm1_read_semantic(struct shader_translator *tx,
3416 struct sm1_semantic *sem)
3417 {
3418 const DWORD tok_usg = TOKEN_NEXT(tx);
3419 const DWORD tok_dst = TOKEN_NEXT(tx);
3420
3421 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3422 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3423 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3424
3425 sm1_parse_dst_param(&sem->reg, tok_dst);
3426 }
3427
3428 static void
3429 sm1_parse_instruction(struct shader_translator *tx)
3430 {
3431 struct sm1_instruction *insn = &tx->insn;
3432 HRESULT hr;
3433 DWORD tok;
3434 const struct sm1_op_info *info = NULL;
3435 unsigned i;
3436
3437 sm1_parse_comments(tx, TRUE);
3438 sm1_parse_get_skip(tx);
3439
3440 tok = TOKEN_NEXT(tx);
3441
3442 insn->opcode = tok & D3DSI_OPCODE_MASK;
3443 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3444 insn->coissue = !!(tok & D3DSI_COISSUE);
3445 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3446
3447 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3448 int k = tx->op_info_map[insn->opcode];
3449 if (k >= 0) {
3450 assert(k < ARRAY_SIZE(inst_table));
3451 info = &inst_table[k];
3452 }
3453 } else {
3454 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3455 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3456 }
3457 if (!info) {
3458 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3459 TOKEN_JUMP(tx);
3460 return;
3461 }
3462 insn->info = info;
3463 insn->ndst = info->ndst;
3464 insn->nsrc = info->nsrc;
3465
3466 /* check version */
3467 {
3468 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3469 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3470 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3471 if (ver < min || ver > max) {
3472 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3473 min, ver, max);
3474 return;
3475 }
3476 }
3477
3478 for (i = 0; i < insn->ndst; ++i)
3479 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3480 if (insn->predicated)
3481 sm1_read_src_param(tx, &insn->pred, NULL);
3482 for (i = 0; i < insn->nsrc; ++i)
3483 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3484
3485 /* parse here so we can dump them before processing */
3486 if (insn->opcode == D3DSIO_DEF ||
3487 insn->opcode == D3DSIO_DEFI ||
3488 insn->opcode == D3DSIO_DEFB)
3489 sm1_parse_immediate(tx, &tx->insn.src[0]);
3490
3491 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3492 sm1_instruction_check(insn);
3493
3494 if (insn->predicated) {
3495 tx->predicated_activated = true;
3496 if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3497 tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3498 tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3499 }
3500 }
3501
3502 if (info->handler)
3503 hr = info->handler(tx);
3504 else
3505 hr = NineTranslateInstruction_Generic(tx);
3506 tx_apply_dst0_modifiers(tx);
3507
3508 if (insn->predicated) {
3509 tx->predicated_activated = false;
3510 /* TODO: predicate might be allowed on outputs,
3511 * which cannot be src. Workaround it. */
3512 ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3513 ureg_negate(tx_src_param(tx, &insn->pred)),
3514 ureg_src(tx->regs.predicate_tmp),
3515 ureg_src(tx->regs.predicate_dst));
3516 }
3517
3518 if (hr != D3D_OK)
3519 tx->failure = TRUE;
3520 tx->num_scratch = 0; /* reset */
3521
3522 TOKEN_JUMP(tx);
3523 }
3524
3525 #define GET_CAP(n) screen->get_param( \
3526 screen, PIPE_CAP_##n)
3527 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3528 screen, info->type, PIPE_SHADER_CAP_##n)
3529
3530 static HRESULT
3531 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3532 {
3533 unsigned i;
3534
3535 memset(tx, 0, sizeof(*tx));
3536
3537 tx->info = info;
3538
3539 tx->byte_code = info->byte_code;
3540 tx->parse = info->byte_code;
3541
3542 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3543 info->input_map[i] = NINE_DECLUSAGE_NONE;
3544 info->num_inputs = 0;
3545
3546 info->position_t = FALSE;
3547 info->point_size = FALSE;
3548
3549 memset(tx->slots_used, 0, sizeof(tx->slots_used));
3550 memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3551 memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3552
3553 tx->info->const_float_slots = 0;
3554 tx->info->const_int_slots = 0;
3555 tx->info->const_bool_slots = 0;
3556
3557 info->sampler_mask = 0x0;
3558 info->rt_mask = 0x0;
3559
3560 info->lconstf.data = NULL;
3561 info->lconstf.ranges = NULL;
3562
3563 info->bumpenvmat_needed = 0;
3564
3565 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3566 tx->regs.rL[i] = ureg_dst_undef();
3567 }
3568 tx->regs.address = ureg_dst_undef();
3569 tx->regs.a0 = ureg_dst_undef();
3570 tx->regs.p = ureg_dst_undef();
3571 tx->regs.oDepth = ureg_dst_undef();
3572 tx->regs.vPos = ureg_src_undef();
3573 tx->regs.vFace = ureg_src_undef();
3574 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3575 tx->regs.o[i] = ureg_dst_undef();
3576 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3577 tx->regs.oCol[i] = ureg_dst_undef();
3578 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3579 tx->regs.vC[i] = ureg_src_undef();
3580 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3581 tx->regs.vT[i] = ureg_src_undef();
3582
3583 sm1_read_version(tx);
3584
3585 info->version = (tx->version.major << 4) | tx->version.minor;
3586
3587 tx->num_outputs = 0;
3588
3589 create_op_info_map(tx);
3590
3591 tx->ureg = ureg_create(info->type);
3592 if (!tx->ureg) {
3593 return E_OUTOFMEMORY;
3594 }
3595
3596 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3597 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3598 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3599 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3600 tx->texcoord_sn = tx->want_texcoord ?
3601 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3602 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3603 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3604
3605 if (IS_VS) {
3606 tx->num_constf_allowed = NINE_MAX_CONST_F;
3607 } else if (tx->version.major < 2) {/* IS_PS v1 */
3608 tx->num_constf_allowed = 8;
3609 } else if (tx->version.major == 2) {/* IS_PS v2 */
3610 tx->num_constf_allowed = 32;
3611 } else {/* IS_PS v3 */
3612 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3613 }
3614
3615 if (tx->version.major < 2) {
3616 tx->num_consti_allowed = 0;
3617 tx->num_constb_allowed = 0;
3618 } else {
3619 tx->num_consti_allowed = NINE_MAX_CONST_I;
3620 tx->num_constb_allowed = NINE_MAX_CONST_B;
3621 }
3622
3623 if (info->swvp_on && tx->version.major >= 2) {
3624 tx->num_constf_allowed = 8192;
3625 tx->num_consti_allowed = 2048;
3626 tx->num_constb_allowed = 2048;
3627 }
3628
3629 /* VS must always write position. Declare it here to make it the 1st output.
3630 * (Some drivers like nv50 are buggy and rely on that.)
3631 */
3632 if (IS_VS) {
3633 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3634 } else {
3635 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3636 if (!tx->shift_wpos)
3637 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3638 }
3639
3640 tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS);
3641 if (tx->mul_zero_wins)
3642 ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3643
3644 /* Add additional definition of constants */
3645 if (info->add_constants_defs.c_combination) {
3646 unsigned i;
3647
3648 assert(info->add_constants_defs.int_const_added);
3649 assert(info->add_constants_defs.bool_const_added);
3650 /* We only add constants that are used by the shader
3651 * and that are not defined in the shader */
3652 for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3653 if ((*info->add_constants_defs.int_const_added)[i]) {
3654 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3655 info->add_constants_defs.c_combination->const_i[i][0],
3656 info->add_constants_defs.c_combination->const_i[i][1],
3657 info->add_constants_defs.c_combination->const_i[i][2],
3658 info->add_constants_defs.c_combination->const_i[i][3]);
3659 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3660 }
3661 }
3662 for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3663 if ((*info->add_constants_defs.bool_const_added)[i]) {
3664 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3665 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3666 }
3667 }
3668 }
3669 return D3D_OK;
3670 }
3671
3672 static void
3673 tx_dtor(struct shader_translator *tx)
3674 {
3675 if (tx->slot_map)
3676 FREE(tx->slot_map);
3677 if (tx->num_inst_labels)
3678 FREE(tx->inst_labels);
3679 FREE(tx->lconstf);
3680 FREE(tx->regs.r);
3681 FREE(tx);
3682 }
3683
3684 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3685 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3686 static void
3687 shader_add_vs_viewport_transform(struct shader_translator *tx)
3688 {
3689 struct ureg_program *ureg = tx->ureg;
3690 struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3691 struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3692 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3693
3694 c0 = ureg_src_dimension(c0, 4);
3695 c1 = ureg_src_dimension(c1, 4);
3696 /* TODO: find out when we need to apply the viewport transformation or not.
3697 * Likely will be XYZ vs XYZRHW in vdecl_out
3698 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3699 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3700 */
3701 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3702 }
3703
3704 static void
3705 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3706 {
3707 struct ureg_program *ureg = tx->ureg;
3708 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3709 struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3710 struct ureg_src fog_vs, fog_color;
3711 struct ureg_dst fog_factor, depth;
3712
3713 if (!tx->info->fog_enable) {
3714 ureg_MOV(ureg, oCol0, src_col);
3715 return;
3716 }
3717
3718 if (tx->info->fog_mode != D3DFOG_NONE) {
3719 depth = tx_scratch_scalar(tx);
3720 /* Depth used for fog is perspective interpolated */
3721 ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3722 ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3723 }
3724
3725 fog_color = nine_float_constant_src(tx, 32);
3726 fog_params = nine_float_constant_src(tx, 33);
3727 fog_factor = tx_scratch_scalar(tx);
3728
3729 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3730 fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3731 fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3732 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3733 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3734 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3735 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3736 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3737 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3738 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3739 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3740 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3741 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3742 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3743 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3744 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3745 } else {
3746 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3747 TGSI_INTERPOLATE_PERSPECTIVE),
3748 TGSI_SWIZZLE_X);
3749 ureg_MOV(ureg, fog_factor, fog_vs);
3750 }
3751
3752 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3753 tx_src_scalar(fog_factor), src_col, fog_color);
3754 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3755 }
3756
3757 static void parse_shader(struct shader_translator *tx)
3758 {
3759 struct nine_shader_info *info = tx->info;
3760
3761 while (!sm1_parse_eof(tx) && !tx->failure)
3762 sm1_parse_instruction(tx);
3763 tx->parse++; /* for byte_size */
3764
3765 if (tx->failure)
3766 return;
3767
3768 if (IS_PS && tx->version.major < 3) {
3769 if (tx->version.major < 2) {
3770 assert(tx->num_temp); /* there must be color output */
3771 info->rt_mask |= 0x1;
3772 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3773 } else {
3774 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3775 }
3776 }
3777
3778 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3779 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3780 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3781 }
3782
3783 if (info->position_t)
3784 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3785
3786 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3787 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3788 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3789 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3790 info->point_size = TRUE;
3791 }
3792
3793 if (info->process_vertices)
3794 shader_add_vs_viewport_transform(tx);
3795
3796 ureg_END(tx->ureg);
3797 }
3798
3799 #define NINE_SHADER_DEBUG_OPTION_NIR_VS (1 << 0)
3800 #define NINE_SHADER_DEBUG_OPTION_NIR_PS (1 << 1)
3801 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
3802 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
3803 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
3804 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
3805
3806 static const struct debug_named_value nine_shader_debug_options[] = {
3807 { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
3808 { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
3809 { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3810 { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3811 { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3812 { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3813 DEBUG_NAMED_VALUE_END /* must be last */
3814 };
3815
3816 static inline boolean
3817 nine_shader_get_debug_flag(uint64_t flag)
3818 {
3819 static uint64_t flags = 0;
3820 static boolean first_run = TRUE;
3821
3822 if (unlikely(first_run)) {
3823 first_run = FALSE;
3824 flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3825
3826 // Check old TGSI dump envvar too
3827 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3828 flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3829 }
3830 }
3831
3832 return !!(flags & flag);
3833 }
3834
3835 static void
3836 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3837 struct pipe_screen *screen)
3838 {
3839 struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, true);
3840
3841 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3842 nir_print_shader(nir, stdout);
3843 }
3844
3845 state->type = PIPE_SHADER_IR_NIR;
3846 state->tokens = NULL;
3847 state->ir.nir = nir;
3848 memset(&state->stream_output, 0, sizeof(state->stream_output));
3849 }
3850
3851 static void *
3852 nine_ureg_create_shader(struct ureg_program *ureg,
3853 struct pipe_context *pipe,
3854 const struct pipe_stream_output_info *so)
3855 {
3856 struct pipe_shader_state state;
3857 const struct tgsi_token *tgsi_tokens;
3858 struct pipe_screen *screen = pipe->screen;
3859
3860 tgsi_tokens = ureg_finalize(ureg);
3861 if (!tgsi_tokens)
3862 return NULL;
3863
3864 assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3865 enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
3866
3867 int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR);
3868 bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR);
3869 bool use_nir = prefer_nir ||
3870 ((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) ||
3871 ((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS));
3872
3873 /* Allow user to override preferred IR, this is very useful for debugging */
3874 if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
3875 use_nir = false;
3876 if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
3877 use_nir = false;
3878
3879 DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
3880 shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
3881 prefer_nir ? "NIR" : "TGSI",
3882 use_nir ? "NIR" : "TGSI");
3883
3884 if (use_nir) {
3885 nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
3886 } else {
3887 pipe_shader_state_from_tgsi(&state, tgsi_tokens);
3888 }
3889
3890 assert(state.tokens || state.ir.nir);
3891
3892 if (so)
3893 state.stream_output = *so;
3894
3895 switch (shader_type) {
3896 case PIPE_SHADER_VERTEX:
3897 return pipe->create_vs_state(pipe, &state);
3898 case PIPE_SHADER_FRAGMENT:
3899 return pipe->create_fs_state(pipe, &state);
3900 default:
3901 unreachable("unsupported shader type");
3902 }
3903 }
3904
3905
3906 void *
3907 nine_create_shader_with_so_and_destroy(struct ureg_program *p,
3908 struct pipe_context *pipe,
3909 const struct pipe_stream_output_info *so)
3910 {
3911 void *result = nine_ureg_create_shader(p, pipe, so);
3912 ureg_destroy(p);
3913 return result;
3914 }
3915
3916 HRESULT
3917 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3918 {
3919 struct shader_translator *tx;
3920 HRESULT hr = D3D_OK;
3921 const unsigned processor = info->type;
3922 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3923 unsigned *const_ranges = NULL;
3924
3925 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3926
3927 tx = MALLOC_STRUCT(shader_translator);
3928 if (!tx)
3929 return E_OUTOFMEMORY;
3930
3931 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
3932 hr = E_OUTOFMEMORY;
3933 goto out;
3934 }
3935
3936 assert(IS_VS || !info->swvp_on);
3937
3938 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3939 hr = D3DERR_INVALIDCALL;
3940 DBG("Unsupported shader version: %u.%u !\n",
3941 tx->version.major, tx->version.minor);
3942 goto out;
3943 }
3944 if (tx->processor != processor) {
3945 hr = D3DERR_INVALIDCALL;
3946 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3947 goto out;
3948 }
3949 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3950 tx->version.major, tx->version.minor);
3951
3952 parse_shader(tx);
3953
3954 if (tx->failure) {
3955 /* For VS shaders, we print the warning later,
3956 * we first try with swvp. */
3957 if (IS_PS)
3958 ERR("Encountered buggy shader\n");
3959 ureg_destroy(tx->ureg);
3960 hr = D3DERR_INVALIDCALL;
3961 goto out;
3962 }
3963
3964 /* Recompile after compacting constant slots if possible */
3965 if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
3966 unsigned *slot_map;
3967 unsigned c;
3968 int i, j, num_ranges, prev;
3969
3970 DBG("Recompiling shader for constant compaction\n");
3971 ureg_destroy(tx->ureg);
3972
3973 if (tx->num_inst_labels)
3974 FREE(tx->inst_labels);
3975 FREE(tx->lconstf);
3976 FREE(tx->regs.r);
3977
3978 num_ranges = 0;
3979 prev = -2;
3980 for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
3981 if (tx->slots_used[i]) {
3982 if (prev != i - 1)
3983 num_ranges++;
3984 prev = i;
3985 }
3986 }
3987 slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
3988 const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
3989 if (!slot_map || !const_ranges) {
3990 hr = E_OUTOFMEMORY;
3991 goto out;
3992 }
3993 c = 0;
3994 j = -1;
3995 prev = -2;
3996 for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
3997 if (tx->slots_used[i]) {
3998 if (prev != i - 1)
3999 j++;
4000 /* Initialize first slot of the range */
4001 if (!const_ranges[2*j+1])
4002 const_ranges[2*j] = i;
4003 const_ranges[2*j+1]++;
4004 prev = i;
4005 slot_map[i] = c++;
4006 }
4007 }
4008
4009 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4010 hr = E_OUTOFMEMORY;
4011 goto out;
4012 }
4013 tx->slot_map = slot_map;
4014 parse_shader(tx);
4015 assert(!tx->failure);
4016 #if !defined(NDEBUG)
4017 i = 0;
4018 j = 0;
4019 while (const_ranges[i*2+1] != 0) {
4020 j += const_ranges[i*2+1];
4021 i++;
4022 }
4023 assert(j == tx->num_slots);
4024 #endif
4025 }
4026
4027 /* record local constants */
4028 if (tx->num_lconstf && tx->indirect_const_access) {
4029 struct nine_range *ranges;
4030 float *data;
4031 int *indices;
4032 unsigned i, k, n;
4033
4034 hr = E_OUTOFMEMORY;
4035
4036 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4037 if (!data)
4038 goto out;
4039 info->lconstf.data = data;
4040
4041 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4042 if (!indices)
4043 goto out;
4044
4045 /* lazy sort, num_lconstf should be small */
4046 for (n = 0; n < tx->num_lconstf; ++n) {
4047 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4048 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4049 k = i;
4050 }
4051 indices[n] = tx->lconstf[k].idx;
4052 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4053 tx->lconstf[k].idx = INT_MAX;
4054 }
4055
4056 /* count ranges */
4057 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4058 if (indices[i] != indices[i - 1] + 1)
4059 ++n;
4060 ranges = MALLOC(n * sizeof(ranges[0]));
4061 if (!ranges) {
4062 FREE(indices);
4063 goto out;
4064 }
4065 info->lconstf.ranges = ranges;
4066
4067 k = 0;
4068 ranges[k].bgn = indices[0];
4069 for (i = 1; i < tx->num_lconstf; ++i) {
4070 if (indices[i] != indices[i - 1] + 1) {
4071 ranges[k].next = &ranges[k + 1];
4072 ranges[k].end = indices[i - 1] + 1;
4073 ++k;
4074 ranges[k].bgn = indices[i];
4075 }
4076 }
4077 ranges[k].end = indices[i - 1] + 1;
4078 ranges[k].next = NULL;
4079 assert(n == (k + 1));
4080
4081 FREE(indices);
4082 hr = D3D_OK;
4083 }
4084
4085 /* r500 */
4086 if (info->const_float_slots > device->max_vs_const_f &&
4087 (info->const_int_slots || info->const_bool_slots) &&
4088 !info->swvp_on)
4089 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4090
4091
4092 if (tx->indirect_const_access) { /* vs only */
4093 info->const_float_slots = device->max_vs_const_f;
4094 tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4095 }
4096
4097 if (!info->swvp_on) {
4098 info->const_used_size = sizeof(float[4]) * tx->num_slots;
4099 if (tx->num_slots)
4100 ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4101 } else {
4102 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4103 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4104 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4105 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4106 }
4107
4108 if (info->process_vertices)
4109 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4110
4111 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4112 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4113 tgsi_dump(toks, 0);
4114 ureg_free_tokens(toks);
4115 }
4116
4117 if (info->process_vertices) {
4118 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4119 tx->output_info,
4120 tx->num_outputs,
4121 &(info->so));
4122 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4123 } else
4124 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4125 if (!info->cso) {
4126 hr = D3DERR_DRIVERINTERNALERROR;
4127 FREE(info->lconstf.data);
4128 FREE(info->lconstf.ranges);
4129 goto out;
4130 }
4131
4132 info->const_ranges = const_ranges;
4133 const_ranges = NULL;
4134 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4135 out:
4136 if (const_ranges)
4137 FREE(const_ranges);
4138 tx_dtor(tx);
4139 return hr;
4140 }