2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
24 #include "nine_shader.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "nir/tgsi_to_nir.h"
39 #define DBG_CHANNEL DBG_SHADER
41 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
44 struct shader_translator
;
46 typedef HRESULT (*translate_instruction_func
)(struct shader_translator
*);
48 static inline const char *d3dsio_to_string(unsigned opcode
);
51 #define NINED3D_SM1_VS 0xfffe
52 #define NINED3D_SM1_PS 0xffff
54 #define NINE_MAX_COND_DEPTH 64
55 #define NINE_MAX_LOOP_DEPTH 64
57 #define NINED3DSP_END 0x0000ffff
59 #define NINED3DSPTYPE_FLOAT4 0
60 #define NINED3DSPTYPE_INT4 1
61 #define NINED3DSPTYPE_BOOL 2
63 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
65 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
66 #define NINED3DSP_WRITEMASK_SHIFT 16
68 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
70 #define NINED3DSHADER_REL_OP_GT 1
71 #define NINED3DSHADER_REL_OP_EQ 2
72 #define NINED3DSHADER_REL_OP_GE 3
73 #define NINED3DSHADER_REL_OP_LT 4
74 #define NINED3DSHADER_REL_OP_NE 5
75 #define NINED3DSHADER_REL_OP_LE 6
77 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
78 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
80 #define NINED3DSI_TEXLD_PROJECT 0x1
81 #define NINED3DSI_TEXLD_BIAS 0x2
83 #define NINED3DSP_WRITEMASK_0 0x1
84 #define NINED3DSP_WRITEMASK_1 0x2
85 #define NINED3DSP_WRITEMASK_2 0x4
86 #define NINED3DSP_WRITEMASK_3 0x8
87 #define NINED3DSP_WRITEMASK_ALL 0xf
89 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
91 #define NINE_SWIZZLE4(x,y,z,w) \
92 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
94 #define NINE_APPLY_SWIZZLE(src, s) \
95 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
99 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
102 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
103 * BIAS <= PS 1.4 (x-0.5)
104 * BIASNEG <= PS 1.4 (-(x-0.5))
105 * SIGN <= PS 1.4 (2(x-0.5))
106 * SIGNNEG <= PS 1.4 (-2(x-0.5))
107 * COMP <= PS 1.4 (1-x)
109 * X2NEG = PS 1.4 (-2x)
110 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
111 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
112 * ABS >= SM 3.0 (abs(x))
113 * ABSNEG >= SM 3.0 (-abs(x))
114 * NOT >= SM 2.0 pedication only
116 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
131 static const char *sm1_mod_str
[] =
133 [NINED3DSPSM_NONE
] = "",
134 [NINED3DSPSM_NEG
] = "-",
135 [NINED3DSPSM_BIAS
] = "bias",
136 [NINED3DSPSM_BIASNEG
] = "biasneg",
137 [NINED3DSPSM_SIGN
] = "sign",
138 [NINED3DSPSM_SIGNNEG
] = "signneg",
139 [NINED3DSPSM_COMP
] = "comp",
140 [NINED3DSPSM_X2
] = "x2",
141 [NINED3DSPSM_X2NEG
] = "x2neg",
142 [NINED3DSPSM_DZ
] = "dz",
143 [NINED3DSPSM_DW
] = "dw",
144 [NINED3DSPSM_ABS
] = "abs",
145 [NINED3DSPSM_ABSNEG
] = "-abs",
146 [NINED3DSPSM_NOT
] = "not"
150 sm1_dump_writemask(BYTE mask
)
152 if (mask
& 1) DUMP("x"); else DUMP("_");
153 if (mask
& 2) DUMP("y"); else DUMP("_");
154 if (mask
& 4) DUMP("z"); else DUMP("_");
155 if (mask
& 8) DUMP("w"); else DUMP("_");
159 sm1_dump_swizzle(BYTE s
)
161 char c
[4] = { 'x', 'y', 'z', 'w' };
163 c
[(s
>> 0) & 3], c
[(s
>> 2) & 3], c
[(s
>> 4) & 3], c
[(s
>> 6) & 3]);
166 static const char sm1_file_char
[] =
169 [D3DSPR_INPUT
] = 'v',
170 [D3DSPR_CONST
] = 'c',
172 [D3DSPR_RASTOUT
] = 'R',
173 [D3DSPR_ATTROUT
] = 'D',
174 [D3DSPR_OUTPUT
] = 'o',
175 [D3DSPR_CONSTINT
] = 'I',
176 [D3DSPR_COLOROUT
] = 'C',
177 [D3DSPR_DEPTHOUT
] = 'D',
178 [D3DSPR_SAMPLER
] = 's',
179 [D3DSPR_CONST2
] = 'c',
180 [D3DSPR_CONST3
] = 'c',
181 [D3DSPR_CONST4
] = 'c',
182 [D3DSPR_CONSTBOOL
] = 'B',
184 [D3DSPR_TEMPFLOAT16
] = 'h',
185 [D3DSPR_MISCTYPE
] = 'M',
186 [D3DSPR_LABEL
] = 'X',
187 [D3DSPR_PREDICATE
] = 'p'
191 sm1_dump_reg(BYTE file
, INT index
)
197 case D3DSPR_COLOROUT
:
200 case D3DSPR_DEPTHOUT
:
204 DUMP("oRast%i", index
);
206 case D3DSPR_CONSTINT
:
207 DUMP("iconst[%i]", index
);
209 case D3DSPR_CONSTBOOL
:
210 DUMP("bconst[%i]", index
);
213 DUMP("%c%i", sm1_file_char
[file
], index
);
221 struct sm1_src_param
*rel
;
234 sm1_parse_immediate(struct shader_translator
*, struct sm1_src_param
*);
239 struct sm1_src_param
*rel
;
243 int8_t shift
; /* sint4 */
248 assert_replicate_swizzle(const struct ureg_src
*reg
)
250 assert(reg
->SwizzleY
== reg
->SwizzleX
&&
251 reg
->SwizzleZ
== reg
->SwizzleX
&&
252 reg
->SwizzleW
== reg
->SwizzleX
);
256 sm1_dump_immediate(const struct sm1_src_param
*param
)
258 switch (param
->type
) {
259 case NINED3DSPTYPE_FLOAT4
:
260 DUMP("{ %f %f %f %f }",
261 param
->imm
.f
[0], param
->imm
.f
[1],
262 param
->imm
.f
[2], param
->imm
.f
[3]);
264 case NINED3DSPTYPE_INT4
:
265 DUMP("{ %i %i %i %i }",
266 param
->imm
.i
[0], param
->imm
.i
[1],
267 param
->imm
.i
[2], param
->imm
.i
[3]);
269 case NINED3DSPTYPE_BOOL
:
270 DUMP("%s", param
->imm
.b
? "TRUE" : "FALSE");
279 sm1_dump_src_param(const struct sm1_src_param
*param
)
281 if (param
->file
== NINED3DSPR_IMMEDIATE
) {
282 assert(!param
->mod
&&
284 param
->swizzle
== NINED3DSP_NOSWIZZLE
);
285 sm1_dump_immediate(param
);
290 DUMP("%s(", sm1_mod_str
[param
->mod
]);
292 DUMP("%c[", sm1_file_char
[param
->file
]);
293 sm1_dump_src_param(param
->rel
);
294 DUMP("+%i]", param
->idx
);
296 sm1_dump_reg(param
->file
, param
->idx
);
300 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
) {
302 sm1_dump_swizzle(param
->swizzle
);
307 sm1_dump_dst_param(const struct sm1_dst_param
*param
)
309 if (param
->mod
& NINED3DSPDM_SATURATE
)
311 if (param
->mod
& NINED3DSPDM_PARTIALP
)
313 if (param
->mod
& NINED3DSPDM_CENTROID
)
315 if (param
->shift
< 0)
316 DUMP("/%u ", 1 << -param
->shift
);
317 if (param
->shift
> 0)
318 DUMP("*%u ", 1 << param
->shift
);
321 DUMP("%c[", sm1_file_char
[param
->file
]);
322 sm1_dump_src_param(param
->rel
);
323 DUMP("+%i]", param
->idx
);
325 sm1_dump_reg(param
->file
, param
->idx
);
327 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
329 sm1_dump_writemask(param
->mask
);
335 struct sm1_dst_param reg
;
343 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
344 * should be ignored completely */
346 unsigned opcode
; /* TGSI_OPCODE_x */
348 /* versions are still set even handler is set */
352 } vert_version
, frag_version
;
354 /* number of regs parsed outside of special handler */
358 /* some instructions don't map perfectly, so use a special handler */
359 translate_instruction_func handler
;
362 struct sm1_instruction
364 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode
;
370 struct sm1_src_param src
[4];
371 struct sm1_src_param src_rel
[4];
372 struct sm1_src_param pred
;
373 struct sm1_src_param dst_rel
[1];
374 struct sm1_dst_param dst
[1];
376 const struct sm1_op_info
*info
;
380 sm1_dump_instruction(struct sm1_instruction
*insn
, unsigned indent
)
384 /* no info stored for these: */
385 if (insn
->opcode
== D3DSIO_DCL
)
387 for (i
= 0; i
< indent
; ++i
)
390 if (insn
->predicated
) {
392 sm1_dump_src_param(&insn
->pred
);
395 DUMP("%s", d3dsio_to_string(insn
->opcode
));
397 switch (insn
->opcode
) {
399 DUMP(insn
->flags
== NINED3DSI_TEXLD_PROJECT
? "p" : "b");
402 DUMP("_%x", insn
->flags
);
410 for (i
= 0; i
< insn
->ndst
&& i
< ARRAY_SIZE(insn
->dst
); ++i
) {
411 sm1_dump_dst_param(&insn
->dst
[i
]);
415 for (i
= 0; i
< insn
->nsrc
&& i
< ARRAY_SIZE(insn
->src
); ++i
) {
416 sm1_dump_src_param(&insn
->src
[i
]);
419 if (insn
->opcode
== D3DSIO_DEF
||
420 insn
->opcode
== D3DSIO_DEFI
||
421 insn
->opcode
== D3DSIO_DEFB
)
422 sm1_dump_immediate(&insn
->src
[0]);
427 struct sm1_local_const
431 float f
[4]; /* for indirect addressing of float constants */
434 struct shader_translator
436 const DWORD
*byte_code
;
438 const DWORD
*parse_next
;
440 struct ureg_program
*ureg
;
447 unsigned processor
; /* PIPE_SHADER_VERTEX/FRAMGENT */
448 unsigned num_constf_allowed
;
449 unsigned num_consti_allowed
;
450 unsigned num_constb_allowed
;
452 boolean native_integers
;
453 boolean inline_subroutines
;
454 boolean want_texcoord
;
456 boolean wpos_is_sysval
;
457 boolean face_is_sysval_integer
;
458 boolean mul_zero_wins
;
459 unsigned texcoord_sn
;
461 struct sm1_instruction insn
; /* current instruction */
465 struct ureg_dst oPos
;
466 struct ureg_dst oPos_out
; /* the real output when doing streamout */
467 struct ureg_dst oFog
;
468 struct ureg_dst oPts
;
469 struct ureg_dst oCol
[4];
470 struct ureg_dst o
[PIPE_MAX_SHADER_OUTPUTS
];
471 struct ureg_dst oDepth
;
472 struct ureg_src v
[PIPE_MAX_SHADER_INPUTS
];
473 struct ureg_src v_consecutive
; /* copy in temp array of ps inputs for rel addressing */
474 struct ureg_src vPos
;
475 struct ureg_src vFace
;
478 struct ureg_dst address
;
480 struct ureg_dst predicate
;
481 struct ureg_dst predicate_tmp
;
482 struct ureg_dst predicate_dst
;
483 struct ureg_dst tS
[8]; /* texture stage registers */
484 struct ureg_dst tdst
; /* scratch dst if we need extra modifiers */
485 struct ureg_dst t
[8]; /* scratch TEMPs */
486 struct ureg_src vC
[2]; /* PS color in */
487 struct ureg_src vT
[8]; /* PS texcoord in */
488 struct ureg_dst rL
[NINE_MAX_LOOP_DEPTH
]; /* loop ctr */
490 unsigned num_temp
; /* ARRAY_SIZE(regs.r) */
491 unsigned num_scratch
;
493 unsigned loop_depth_max
;
495 unsigned loop_labels
[NINE_MAX_LOOP_DEPTH
];
496 unsigned cond_labels
[NINE_MAX_COND_DEPTH
];
497 boolean loop_or_rep
[NINE_MAX_LOOP_DEPTH
]; /* true: loop, false: rep */
498 boolean predicated_activated
;
500 unsigned *inst_labels
; /* LABEL op */
501 unsigned num_inst_labels
;
503 unsigned sampler_targets
[NINE_MAX_SAMPLERS
]; /* TGSI_TEXTURE_x */
505 struct sm1_local_const
*lconstf
;
506 unsigned num_lconstf
;
507 struct sm1_local_const
*lconsti
;
508 unsigned num_lconsti
;
509 struct sm1_local_const
*lconstb
;
510 unsigned num_lconstb
;
512 boolean slots_used
[NINE_MAX_CONST_ALL
];
516 boolean indirect_const_access
;
519 struct nine_vs_output_info output_info
[16];
522 struct nine_shader_info
*info
;
524 int16_t op_info_map
[D3DSIO_BREAKP
+ 1];
527 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
528 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
530 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
533 sm1_read_semantic(struct shader_translator
*, struct sm1_semantic
*);
536 sm1_instruction_check(const struct sm1_instruction
*insn
)
538 if (insn
->opcode
== D3DSIO_CRS
)
540 if (insn
->dst
[0].mask
& NINED3DSP_WRITEMASK_3
)
548 nine_record_outputs(struct shader_translator
*tx
, BYTE Usage
, BYTE UsageIndex
,
549 int mask
, int output_index
)
551 tx
->output_info
[tx
->num_outputs
].output_semantic
= Usage
;
552 tx
->output_info
[tx
->num_outputs
].output_semantic_index
= UsageIndex
;
553 tx
->output_info
[tx
->num_outputs
].mask
= mask
;
554 tx
->output_info
[tx
->num_outputs
].output_index
= output_index
;
558 static struct ureg_src
nine_float_constant_src(struct shader_translator
*tx
, int idx
)
563 idx
= tx
->slot_map
[idx
];
564 /* vswp constant handling: we use two buffers
565 * to fit all the float constants. The special handling
566 * doesn't need to be elsewhere, because all the instructions
567 * accessing the constants directly are VS1, and swvp
569 if (tx
->info
->swvp_on
&& idx
>= 4096) {
570 /* TODO: swvp rel is broken if many constants are used */
571 src
= ureg_src_register(TGSI_FILE_CONSTANT
, idx
- 4096);
572 src
= ureg_src_dimension(src
, 1);
574 src
= ureg_src_register(TGSI_FILE_CONSTANT
, idx
);
575 src
= ureg_src_dimension(src
, 0);
578 if (!tx
->info
->swvp_on
)
579 tx
->slots_used
[idx
] = TRUE
;
580 if (tx
->info
->const_float_slots
< (idx
+ 1))
581 tx
->info
->const_float_slots
= idx
+ 1;
582 if (tx
->num_slots
< (idx
+ 1))
583 tx
->num_slots
= idx
+ 1;
588 static struct ureg_src
nine_integer_constant_src(struct shader_translator
*tx
, int idx
)
592 if (tx
->info
->swvp_on
) {
593 src
= ureg_src_register(TGSI_FILE_CONSTANT
, idx
);
594 src
= ureg_src_dimension(src
, 2);
596 unsigned slot_idx
= tx
->info
->const_i_base
+ idx
;
598 slot_idx
= tx
->slot_map
[slot_idx
];
599 src
= ureg_src_register(TGSI_FILE_CONSTANT
, slot_idx
);
600 src
= ureg_src_dimension(src
, 0);
601 tx
->slots_used
[slot_idx
] = TRUE
;
602 tx
->info
->int_slots_used
[idx
] = TRUE
;
603 if (tx
->num_slots
< (slot_idx
+ 1))
604 tx
->num_slots
= slot_idx
+ 1;
607 if (tx
->info
->const_int_slots
< (idx
+ 1))
608 tx
->info
->const_int_slots
= idx
+ 1;
613 static struct ureg_src
nine_boolean_constant_src(struct shader_translator
*tx
, int idx
)
620 if (tx
->info
->swvp_on
) {
621 src
= ureg_src_register(TGSI_FILE_CONSTANT
, r
);
622 src
= ureg_src_dimension(src
, 3);
624 unsigned slot_idx
= tx
->info
->const_b_base
+ r
;
626 slot_idx
= tx
->slot_map
[slot_idx
];
627 src
= ureg_src_register(TGSI_FILE_CONSTANT
, slot_idx
);
628 src
= ureg_src_dimension(src
, 0);
629 tx
->slots_used
[slot_idx
] = TRUE
;
630 tx
->info
->bool_slots_used
[idx
] = TRUE
;
631 if (tx
->num_slots
< (slot_idx
+ 1))
632 tx
->num_slots
= slot_idx
+ 1;
634 src
= ureg_swizzle(src
, s
, s
, s
, s
);
636 if (tx
->info
->const_bool_slots
< (idx
+ 1))
637 tx
->info
->const_bool_slots
= idx
+ 1;
643 tx_lconstf(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
647 if (index
< 0 || index
>= tx
->num_constf_allowed
) {
651 for (i
= 0; i
< tx
->num_lconstf
; ++i
) {
652 if (tx
->lconstf
[i
].idx
== index
) {
653 *src
= tx
->lconstf
[i
].reg
;
660 tx_lconsti(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
664 if (index
< 0 || index
>= tx
->num_consti_allowed
) {
668 for (i
= 0; i
< tx
->num_lconsti
; ++i
) {
669 if (tx
->lconsti
[i
].idx
== index
) {
670 *src
= tx
->lconsti
[i
].reg
;
677 tx_lconstb(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
681 if (index
< 0 || index
>= tx
->num_constb_allowed
) {
685 for (i
= 0; i
< tx
->num_lconstb
; ++i
) {
686 if (tx
->lconstb
[i
].idx
== index
) {
687 *src
= tx
->lconstb
[i
].reg
;
695 tx_set_lconstf(struct shader_translator
*tx
, INT index
, float f
[4])
699 FAILURE_VOID(index
< 0 || index
>= tx
->num_constf_allowed
)
701 for (n
= 0; n
< tx
->num_lconstf
; ++n
)
702 if (tx
->lconstf
[n
].idx
== index
)
704 if (n
== tx
->num_lconstf
) {
706 tx
->lconstf
= REALLOC(tx
->lconstf
,
707 (n
+ 0) * sizeof(tx
->lconstf
[0]),
708 (n
+ 8) * sizeof(tx
->lconstf
[0]));
713 tx
->lconstf
[n
].idx
= index
;
714 tx
->lconstf
[n
].reg
= ureg_imm4f(tx
->ureg
, f
[0], f
[1], f
[2], f
[3]);
716 memcpy(tx
->lconstf
[n
].f
, f
, sizeof(tx
->lconstf
[n
].f
));
719 tx_set_lconsti(struct shader_translator
*tx
, INT index
, int i
[4])
723 FAILURE_VOID(index
< 0 || index
>= tx
->num_consti_allowed
)
725 for (n
= 0; n
< tx
->num_lconsti
; ++n
)
726 if (tx
->lconsti
[n
].idx
== index
)
728 if (n
== tx
->num_lconsti
) {
730 tx
->lconsti
= REALLOC(tx
->lconsti
,
731 (n
+ 0) * sizeof(tx
->lconsti
[0]),
732 (n
+ 8) * sizeof(tx
->lconsti
[0]));
738 tx
->lconsti
[n
].idx
= index
;
739 tx
->lconsti
[n
].reg
= tx
->native_integers
?
740 ureg_imm4i(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]) :
741 ureg_imm4f(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]);
744 tx_set_lconstb(struct shader_translator
*tx
, INT index
, BOOL b
)
748 FAILURE_VOID(index
< 0 || index
>= tx
->num_constb_allowed
)
750 for (n
= 0; n
< tx
->num_lconstb
; ++n
)
751 if (tx
->lconstb
[n
].idx
== index
)
753 if (n
== tx
->num_lconstb
) {
755 tx
->lconstb
= REALLOC(tx
->lconstb
,
756 (n
+ 0) * sizeof(tx
->lconstb
[0]),
757 (n
+ 8) * sizeof(tx
->lconstb
[0]));
763 tx
->lconstb
[n
].idx
= index
;
764 tx
->lconstb
[n
].reg
= tx
->native_integers
?
765 ureg_imm1u(tx
->ureg
, b
? 0xffffffff : 0) :
766 ureg_imm1f(tx
->ureg
, b
? 1.0f
: 0.0f
);
769 static inline struct ureg_dst
770 tx_scratch(struct shader_translator
*tx
)
772 if (tx
->num_scratch
>= ARRAY_SIZE(tx
->regs
.t
)) {
774 return tx
->regs
.t
[0];
776 if (ureg_dst_is_undef(tx
->regs
.t
[tx
->num_scratch
]))
777 tx
->regs
.t
[tx
->num_scratch
] = ureg_DECL_local_temporary(tx
->ureg
);
778 return tx
->regs
.t
[tx
->num_scratch
++];
781 static inline struct ureg_dst
782 tx_scratch_scalar(struct shader_translator
*tx
)
784 return ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
787 static inline struct ureg_src
788 tx_src_scalar(struct ureg_dst dst
)
790 struct ureg_src src
= ureg_src(dst
);
791 int c
= ffs(dst
.WriteMask
) - 1;
792 if (dst
.WriteMask
== (1 << c
))
793 src
= ureg_scalar(src
, c
);
798 tx_temp_alloc(struct shader_translator
*tx
, INT idx
)
801 if (idx
>= tx
->num_temp
) {
802 unsigned k
= tx
->num_temp
;
803 unsigned n
= idx
+ 1;
804 tx
->regs
.r
= REALLOC(tx
->regs
.r
,
805 k
* sizeof(tx
->regs
.r
[0]),
806 n
* sizeof(tx
->regs
.r
[0]));
808 tx
->regs
.r
[k
] = ureg_dst_undef();
811 if (ureg_dst_is_undef(tx
->regs
.r
[idx
]))
812 tx
->regs
.r
[idx
] = ureg_DECL_temporary(tx
->ureg
);
816 tx_addr_alloc(struct shader_translator
*tx
, INT idx
)
819 if (ureg_dst_is_undef(tx
->regs
.address
))
820 tx
->regs
.address
= ureg_DECL_address(tx
->ureg
);
821 if (ureg_dst_is_undef(tx
->regs
.a0
))
822 tx
->regs
.a0
= ureg_DECL_temporary(tx
->ureg
);
825 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
826 * the projection should be applied on the texture. It doesn't
828 * The doc is very imprecise here (it says the projection is done
829 * before rasterization, thus in vs, which seems wrong since ps instructions
830 * are affected differently)
831 * For now we only apply to the ps TEX instruction and TEXBEM.
832 * Perhaps some other instructions would need it */
834 apply_ps1x_projection(struct shader_translator
*tx
, struct ureg_dst dst
,
835 struct ureg_src src
, INT idx
)
838 unsigned dim
= 1 + ((tx
->info
->projected
>> (2 * idx
)) & 3);
842 ureg_MOV(tx
->ureg
, dst
, src
);
844 tmp
= tx_scratch_scalar(tx
);
845 ureg_RCP(tx
->ureg
, tmp
, ureg_scalar(src
, dim
-1));
846 ureg_MUL(tx
->ureg
, dst
, tx_src_scalar(tmp
), src
);
851 TEX_with_ps1x_projection(struct shader_translator
*tx
, struct ureg_dst dst
,
852 unsigned target
, struct ureg_src src0
,
853 struct ureg_src src1
, INT idx
)
855 unsigned dim
= 1 + ((tx
->info
->projected
>> (2 * idx
)) & 3);
857 boolean shadow
= !!(tx
->info
->sampler_mask_shadow
& (1 << idx
));
859 /* dim == 1: no projection
860 * Looks like must be disabled when it makes no
861 * sense according the texture dimensions
863 if (dim
== 1 || (dim
<= target
&& !shadow
)) {
864 ureg_TEX(tx
->ureg
, dst
, target
, src0
, src1
);
865 } else if (dim
== 4) {
866 ureg_TXP(tx
->ureg
, dst
, target
, src0
, src1
);
868 tmp
= tx_scratch(tx
);
869 apply_ps1x_projection(tx
, tmp
, src0
, idx
);
870 ureg_TEX(tx
->ureg
, dst
, target
, ureg_src(tmp
), src1
);
875 tx_texcoord_alloc(struct shader_translator
*tx
, INT idx
)
878 assert(idx
>= 0 && idx
< ARRAY_SIZE(tx
->regs
.vT
));
879 if (ureg_src_is_undef(tx
->regs
.vT
[idx
]))
880 tx
->regs
.vT
[idx
] = ureg_DECL_fs_input(tx
->ureg
, tx
->texcoord_sn
, idx
,
881 TGSI_INTERPOLATE_PERSPECTIVE
);
884 static inline unsigned *
885 tx_bgnloop(struct shader_translator
*tx
)
888 if (tx
->loop_depth_max
< tx
->loop_depth
)
889 tx
->loop_depth_max
= tx
->loop_depth
;
890 assert(tx
->loop_depth
< NINE_MAX_LOOP_DEPTH
);
891 return &tx
->loop_labels
[tx
->loop_depth
- 1];
894 static inline unsigned *
895 tx_endloop(struct shader_translator
*tx
)
897 assert(tx
->loop_depth
);
899 ureg_fixup_label(tx
->ureg
, tx
->loop_labels
[tx
->loop_depth
],
900 ureg_get_instruction_number(tx
->ureg
));
901 return &tx
->loop_labels
[tx
->loop_depth
];
904 static struct ureg_dst
905 tx_get_loopctr(struct shader_translator
*tx
, boolean loop_or_rep
)
907 const unsigned l
= tx
->loop_depth
- 1;
911 DBG("loop counter requested outside of loop\n");
912 return ureg_dst_undef();
915 if (ureg_dst_is_undef(tx
->regs
.rL
[l
])) {
916 /* loop or rep ctr creation */
917 tx
->regs
.rL
[l
] = ureg_DECL_local_temporary(tx
->ureg
);
918 tx
->loop_or_rep
[l
] = loop_or_rep
;
920 /* loop - rep - endloop - endrep not allowed */
921 assert(tx
->loop_or_rep
[l
] == loop_or_rep
);
923 return tx
->regs
.rL
[l
];
926 static struct ureg_src
927 tx_get_loopal(struct shader_translator
*tx
)
929 int loop_level
= tx
->loop_depth
- 1;
931 while (loop_level
>= 0) {
932 /* handle loop - rep - endrep - endloop case */
933 if (tx
->loop_or_rep
[loop_level
])
934 /* the value is in the loop counter y component (nine implementation) */
935 return ureg_scalar(ureg_src(tx
->regs
.rL
[loop_level
]), TGSI_SWIZZLE_Y
);
939 DBG("aL counter requested outside of loop\n");
940 return ureg_src_undef();
943 static inline unsigned *
944 tx_cond(struct shader_translator
*tx
)
946 assert(tx
->cond_depth
<= NINE_MAX_COND_DEPTH
);
948 return &tx
->cond_labels
[tx
->cond_depth
- 1];
951 static inline unsigned *
952 tx_elsecond(struct shader_translator
*tx
)
954 assert(tx
->cond_depth
);
955 return &tx
->cond_labels
[tx
->cond_depth
- 1];
959 tx_endcond(struct shader_translator
*tx
)
961 assert(tx
->cond_depth
);
963 ureg_fixup_label(tx
->ureg
, tx
->cond_labels
[tx
->cond_depth
],
964 ureg_get_instruction_number(tx
->ureg
));
967 static inline struct ureg_dst
968 nine_ureg_dst_register(unsigned file
, int index
)
970 return ureg_dst(ureg_src_register(file
, index
));
973 static inline struct ureg_src
974 nine_get_position_input(struct shader_translator
*tx
)
976 struct ureg_program
*ureg
= tx
->ureg
;
978 if (tx
->wpos_is_sysval
)
979 return ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_POSITION
, 0);
981 return ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_POSITION
,
982 0, TGSI_INTERPOLATE_LINEAR
);
985 static struct ureg_src
986 tx_src_param(struct shader_translator
*tx
, const struct sm1_src_param
*param
)
988 struct ureg_program
*ureg
= tx
->ureg
;
992 assert(!param
->rel
|| (IS_VS
&& param
->file
== D3DSPR_CONST
) ||
993 (D3DSPR_ADDR
&& tx
->version
.major
== 3));
998 tx_temp_alloc(tx
, param
->idx
);
999 src
= ureg_src(tx
->regs
.r
[param
->idx
]);
1001 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1004 assert(param
->idx
== 0);
1005 /* the address register (vs only) must be
1006 * assigned before use */
1007 assert(!ureg_dst_is_undef(tx
->regs
.a0
));
1008 /* Round to lowest for vs1.1 (contrary to the doc), else
1009 * round to nearest */
1010 if (tx
->version
.major
< 2 && tx
->version
.minor
< 2)
1011 ureg_ARL(ureg
, tx
->regs
.address
, ureg_src(tx
->regs
.a0
));
1013 ureg_ARR(ureg
, tx
->regs
.address
, ureg_src(tx
->regs
.a0
));
1014 src
= ureg_src(tx
->regs
.address
);
1016 if (tx
->version
.major
< 2 && tx
->version
.minor
< 4) {
1017 /* no subroutines, so should be defined */
1018 src
= ureg_src(tx
->regs
.tS
[param
->idx
]);
1020 tx_texcoord_alloc(tx
, param
->idx
);
1021 src
= tx
->regs
.vT
[param
->idx
];
1027 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
1029 if (tx
->version
.major
< 3) {
1030 src
= ureg_DECL_fs_input_cyl_centroid(
1031 ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
,
1032 TGSI_INTERPOLATE_COLOR
, 0,
1033 tx
->info
->force_color_in_centroid
?
1034 TGSI_INTERPOLATE_LOC_CENTROID
: 0,
1038 /* Copy all inputs (non consecutive)
1039 * to temp array (consecutive).
1040 * This is not good for performance.
1041 * A better way would be to have inputs
1042 * consecutive (would need implement alternative
1043 * way to match vs outputs and ps inputs).
1044 * However even with the better way, the temp array
1045 * copy would need to be used if some inputs
1046 * are not GENERIC or if they have different
1047 * interpolation flag. */
1048 if (ureg_src_is_undef(tx
->regs
.v_consecutive
)) {
1050 tx
->regs
.v_consecutive
= ureg_src(ureg_DECL_array_temporary(ureg
, 10, 0));
1051 for (i
= 0; i
< 10; i
++) {
1052 if (!ureg_src_is_undef(tx
->regs
.v
[i
]))
1053 ureg_MOV(ureg
, ureg_dst_array_offset(ureg_dst(tx
->regs
.v_consecutive
), i
), tx
->regs
.v
[i
]);
1055 ureg_MOV(ureg
, ureg_dst_array_offset(ureg_dst(tx
->regs
.v_consecutive
), i
), ureg_imm4f(ureg
, 0.0f
, 0.0f
, 0.0f
, 1.0f
));
1058 src
= ureg_src_array_offset(tx
->regs
.v_consecutive
, param
->idx
);
1060 assert(param
->idx
< ARRAY_SIZE(tx
->regs
.v
));
1061 src
= tx
->regs
.v
[param
->idx
];
1066 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1068 case D3DSPR_PREDICATE
:
1069 if (ureg_dst_is_undef(tx
->regs
.predicate
)) {
1070 /* Forbidden to use the predicate register before being set */
1072 tx
->regs
.predicate
= ureg_DECL_temporary(tx
->ureg
);
1074 src
= ureg_src(tx
->regs
.predicate
);
1076 case D3DSPR_SAMPLER
:
1077 assert(param
->mod
== NINED3DSPSM_NONE
);
1078 assert(param
->swizzle
== NINED3DSP_NOSWIZZLE
);
1079 src
= ureg_DECL_sampler(ureg
, param
->idx
);
1082 if (param
->rel
|| !tx_lconstf(tx
, &src
, param
->idx
)) {
1083 src
= nine_float_constant_src(tx
, param
->idx
);
1085 tx
->indirect_const_access
= TRUE
;
1086 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1089 if (!IS_VS
&& tx
->version
.major
< 2) {
1090 /* ps 1.X clamps constants */
1091 tmp
= tx_scratch(tx
);
1092 ureg_MIN(ureg
, tmp
, src
, ureg_imm1f(ureg
, 1.0f
));
1093 ureg_MAX(ureg
, tmp
, ureg_src(tmp
), ureg_imm1f(ureg
, -1.0f
));
1094 src
= ureg_src(tmp
);
1100 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1101 assert(!"CONST2/3/4");
1102 src
= ureg_imm1f(ureg
, 0.0f
);
1104 case D3DSPR_CONSTINT
:
1105 /* relative adressing only possible for float constants in vs */
1106 if (!tx_lconsti(tx
, &src
, param
->idx
))
1107 src
= nine_integer_constant_src(tx
, param
->idx
);
1109 case D3DSPR_CONSTBOOL
:
1110 if (!tx_lconstb(tx
, &src
, param
->idx
))
1111 src
= nine_boolean_constant_src(tx
, param
->idx
);
1114 if (ureg_dst_is_undef(tx
->regs
.address
))
1115 tx
->regs
.address
= ureg_DECL_address(ureg
);
1116 if (!tx
->native_integers
)
1117 ureg_ARR(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
1119 ureg_UARL(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
1120 src
= ureg_src(tx
->regs
.address
);
1122 case D3DSPR_MISCTYPE
:
1123 switch (param
->idx
) {
1124 case D3DSMO_POSITION
:
1125 if (ureg_src_is_undef(tx
->regs
.vPos
))
1126 tx
->regs
.vPos
= nine_get_position_input(tx
);
1127 if (tx
->shift_wpos
) {
1128 /* TODO: do this only once */
1129 struct ureg_dst wpos
= tx_scratch(tx
);
1130 ureg_ADD(ureg
, wpos
, tx
->regs
.vPos
,
1131 ureg_imm4f(ureg
, -0.5f
, -0.5f
, 0.0f
, 0.0f
));
1132 src
= ureg_src(wpos
);
1134 src
= tx
->regs
.vPos
;
1138 if (ureg_src_is_undef(tx
->regs
.vFace
)) {
1139 if (tx
->face_is_sysval_integer
) {
1140 tmp
= ureg_DECL_temporary(ureg
);
1142 ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_FACE
, 0);
1144 /* convert bool to float */
1145 ureg_UCMP(ureg
, tmp
, ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
),
1146 ureg_imm1f(ureg
, 1), ureg_imm1f(ureg
, -1));
1147 tx
->regs
.vFace
= ureg_src(tmp
);
1149 tx
->regs
.vFace
= ureg_DECL_fs_input(ureg
,
1150 TGSI_SEMANTIC_FACE
, 0,
1151 TGSI_INTERPOLATE_CONSTANT
);
1153 tx
->regs
.vFace
= ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
);
1155 src
= tx
->regs
.vFace
;
1158 assert(!"invalid src D3DSMO");
1162 case D3DSPR_TEMPFLOAT16
:
1165 assert(!"invalid src D3DSPR");
1168 switch (param
->mod
) {
1169 case NINED3DSPSM_DW
:
1170 tmp
= tx_scratch(tx
);
1171 /* NOTE: app is not allowed to read w with this modifier */
1172 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_3
), ureg_scalar(src
, TGSI_SWIZZLE_W
));
1173 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(W
,W
,W
,W
)));
1174 src
= ureg_src(tmp
);
1176 case NINED3DSPSM_DZ
:
1177 tmp
= tx_scratch(tx
);
1178 /* NOTE: app is not allowed to read z with this modifier */
1179 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_2
), ureg_scalar(src
, TGSI_SWIZZLE_Z
));
1180 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(Z
,Z
,Z
,Z
)));
1181 src
= ureg_src(tmp
);
1187 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
)
1188 src
= ureg_swizzle(src
,
1189 (param
->swizzle
>> 0) & 0x3,
1190 (param
->swizzle
>> 2) & 0x3,
1191 (param
->swizzle
>> 4) & 0x3,
1192 (param
->swizzle
>> 6) & 0x3);
1194 switch (param
->mod
) {
1195 case NINED3DSPSM_ABS
:
1196 src
= ureg_abs(src
);
1198 case NINED3DSPSM_ABSNEG
:
1199 src
= ureg_negate(ureg_abs(src
));
1201 case NINED3DSPSM_NEG
:
1202 src
= ureg_negate(src
);
1204 case NINED3DSPSM_BIAS
:
1205 tmp
= tx_scratch(tx
);
1206 ureg_ADD(ureg
, tmp
, src
, ureg_imm1f(ureg
, -0.5f
));
1207 src
= ureg_src(tmp
);
1209 case NINED3DSPSM_BIASNEG
:
1210 tmp
= tx_scratch(tx
);
1211 ureg_ADD(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ureg_negate(src
));
1212 src
= ureg_src(tmp
);
1214 case NINED3DSPSM_NOT
:
1215 if (tx
->native_integers
&& param
->file
== D3DSPR_CONSTBOOL
) {
1216 tmp
= tx_scratch(tx
);
1217 ureg_NOT(ureg
, tmp
, src
);
1218 src
= ureg_src(tmp
);
1220 } else { /* predicate */
1221 tmp
= tx_scratch(tx
);
1222 ureg_ADD(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), ureg_negate(src
));
1223 src
= ureg_src(tmp
);
1226 case NINED3DSPSM_COMP
:
1227 tmp
= tx_scratch(tx
);
1228 ureg_ADD(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), ureg_negate(src
));
1229 src
= ureg_src(tmp
);
1231 case NINED3DSPSM_DZ
:
1232 case NINED3DSPSM_DW
:
1233 /* Already handled*/
1235 case NINED3DSPSM_SIGN
:
1236 tmp
= tx_scratch(tx
);
1237 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, 2.0f
), ureg_imm1f(ureg
, -1.0f
));
1238 src
= ureg_src(tmp
);
1240 case NINED3DSPSM_SIGNNEG
:
1241 tmp
= tx_scratch(tx
);
1242 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, -2.0f
), ureg_imm1f(ureg
, 1.0f
));
1243 src
= ureg_src(tmp
);
1245 case NINED3DSPSM_X2
:
1246 tmp
= tx_scratch(tx
);
1247 ureg_ADD(ureg
, tmp
, src
, src
);
1248 src
= ureg_src(tmp
);
1250 case NINED3DSPSM_X2NEG
:
1251 tmp
= tx_scratch(tx
);
1252 ureg_ADD(ureg
, tmp
, src
, src
);
1253 src
= ureg_negate(ureg_src(tmp
));
1256 assert(param
->mod
== NINED3DSPSM_NONE
);
1263 static struct ureg_dst
1264 _tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1266 struct ureg_dst dst
;
1268 switch (param
->file
)
1271 assert(!param
->rel
);
1272 tx_temp_alloc(tx
, param
->idx
);
1273 dst
= tx
->regs
.r
[param
->idx
];
1275 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1277 assert(!param
->rel
);
1278 if (tx
->version
.major
< 2 && !IS_VS
) {
1279 if (ureg_dst_is_undef(tx
->regs
.tS
[param
->idx
]))
1280 tx
->regs
.tS
[param
->idx
] = ureg_DECL_temporary(tx
->ureg
);
1281 dst
= tx
->regs
.tS
[param
->idx
];
1283 if (!IS_VS
&& tx
->insn
.opcode
== D3DSIO_TEXKILL
) { /* maybe others, too */
1284 tx_texcoord_alloc(tx
, param
->idx
);
1285 dst
= ureg_dst(tx
->regs
.vT
[param
->idx
]);
1287 tx_addr_alloc(tx
, param
->idx
);
1291 case D3DSPR_RASTOUT
:
1292 assert(!param
->rel
);
1293 switch (param
->idx
) {
1295 if (ureg_dst_is_undef(tx
->regs
.oPos
))
1297 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
1298 dst
= tx
->regs
.oPos
;
1301 if (ureg_dst_is_undef(tx
->regs
.oFog
))
1303 ureg_saturate(ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_GENERIC
, 16));
1304 dst
= tx
->regs
.oFog
;
1307 if (ureg_dst_is_undef(tx
->regs
.oPts
))
1308 tx
->regs
.oPts
= ureg_DECL_temporary(tx
->ureg
);
1309 dst
= tx
->regs
.oPts
;
1316 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1318 if (tx
->version
.major
< 3) {
1319 assert(!param
->rel
);
1320 dst
= ureg_DECL_output(tx
->ureg
, tx
->texcoord_sn
, param
->idx
);
1322 assert(!param
->rel
); /* TODO */
1323 assert(param
->idx
< ARRAY_SIZE(tx
->regs
.o
));
1324 dst
= tx
->regs
.o
[param
->idx
];
1327 case D3DSPR_ATTROUT
: /* VS */
1328 case D3DSPR_COLOROUT
: /* PS */
1329 assert(param
->idx
>= 0 && param
->idx
< 4);
1330 assert(!param
->rel
);
1331 tx
->info
->rt_mask
|= 1 << param
->idx
;
1332 if (ureg_dst_is_undef(tx
->regs
.oCol
[param
->idx
])) {
1333 /* ps < 3: oCol[0] will have fog blending afterward */
1334 if (!IS_VS
&& tx
->version
.major
< 3 && param
->idx
== 0) {
1335 tx
->regs
.oCol
[0] = ureg_DECL_temporary(tx
->ureg
);
1337 tx
->regs
.oCol
[param
->idx
] =
1338 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
);
1341 dst
= tx
->regs
.oCol
[param
->idx
];
1342 if (IS_VS
&& tx
->version
.major
< 3)
1343 dst
= ureg_saturate(dst
);
1345 case D3DSPR_DEPTHOUT
:
1346 assert(!param
->rel
);
1347 if (ureg_dst_is_undef(tx
->regs
.oDepth
))
1349 ureg_DECL_output_masked(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0,
1350 TGSI_WRITEMASK_Z
, 0, 1);
1351 dst
= tx
->regs
.oDepth
; /* XXX: must write .z component */
1353 case D3DSPR_PREDICATE
:
1354 if (ureg_dst_is_undef(tx
->regs
.predicate
))
1355 tx
->regs
.predicate
= ureg_DECL_temporary(tx
->ureg
);
1356 dst
= tx
->regs
.predicate
;
1358 case D3DSPR_TEMPFLOAT16
:
1359 DBG("unhandled D3DSPR: %u\n", param
->file
);
1362 assert(!"invalid dst D3DSPR");
1366 dst
= ureg_dst_indirect(dst
, tx_src_param(tx
, param
->rel
));
1368 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
)
1369 dst
= ureg_writemask(dst
, param
->mask
);
1370 if (param
->mod
& NINED3DSPDM_SATURATE
)
1371 dst
= ureg_saturate(dst
);
1373 if (tx
->predicated_activated
) {
1374 tx
->regs
.predicate_dst
= dst
;
1375 dst
= tx
->regs
.predicate_tmp
;
1381 static struct ureg_dst
1382 tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1385 tx
->regs
.tdst
= ureg_writemask(tx_scratch(tx
), param
->mask
);
1386 return tx
->regs
.tdst
;
1388 return _tx_dst_param(tx
, param
);
1392 tx_apply_dst0_modifiers(struct shader_translator
*tx
)
1394 struct ureg_dst rdst
;
1397 if (!tx
->insn
.ndst
|| !tx
->insn
.dst
[0].shift
|| tx
->insn
.opcode
== D3DSIO_TEXKILL
)
1399 rdst
= _tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1401 assert(rdst
.File
!= TGSI_FILE_ADDRESS
); /* this probably isn't possible */
1403 if (tx
->insn
.dst
[0].shift
< 0)
1404 f
= 1.0f
/ (1 << -tx
->insn
.dst
[0].shift
);
1406 f
= 1 << tx
->insn
.dst
[0].shift
;
1408 ureg_MUL(tx
->ureg
, rdst
, ureg_src(tx
->regs
.tdst
), ureg_imm1f(tx
->ureg
, f
));
1411 static struct ureg_src
1412 tx_dst_param_as_src(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1414 struct ureg_src src
;
1416 assert(!param
->shift
);
1417 assert(!(param
->mod
& NINED3DSPDM_SATURATE
));
1419 switch (param
->file
) {
1422 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
1424 assert(!param
->rel
);
1425 assert(param
->idx
< ARRAY_SIZE(tx
->regs
.v
));
1426 src
= tx
->regs
.v
[param
->idx
];
1430 src
= ureg_src(tx_dst_param(tx
, param
));
1434 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1437 WARN("mask is 0, using identity swizzle\n");
1439 if (param
->mask
&& param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
1443 for (n
= 0, c
= 0; c
< 4; ++c
)
1444 if (param
->mask
& (1 << c
))
1447 for (c
= n
; c
< 4; ++c
)
1449 src
= ureg_swizzle(src
, s
[0], s
[1], s
[2], s
[3]);
1455 NineTranslateInstruction_Mkxn(struct shader_translator
*tx
, const unsigned k
, const unsigned n
)
1457 struct ureg_program
*ureg
= tx
->ureg
;
1458 struct ureg_dst dst
;
1459 struct ureg_src src
[2];
1460 struct sm1_src_param
*src_mat
= &tx
->insn
.src
[1];
1463 dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1464 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1466 for (i
= 0; i
< n
; i
++)
1468 const unsigned m
= (1 << i
);
1470 src
[1] = tx_src_param(tx
, src_mat
);
1473 if (!(dst
.WriteMask
& m
))
1476 /* XXX: src == dst case ? */
1480 ureg_DP3(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1483 ureg_DP4(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1486 DBG("invalid operation: M%ux%u\n", m
, n
);
1494 #define VNOTSUPPORTED 0, 0
1495 #define V(maj, min) (((maj) << 8) | (min))
1497 static inline const char *
1498 d3dsio_to_string( unsigned opcode
)
1500 static const char *names
[] = {
1600 if (opcode
< ARRAY_SIZE(names
)) return names
[opcode
];
1603 case D3DSIO_PHASE
: return "PHASE";
1604 case D3DSIO_COMMENT
: return "COMMENT";
1605 case D3DSIO_END
: return "END";
1611 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1612 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1613 (inst).vert_version.max | \
1614 (inst).frag_version.min | \
1615 (inst).frag_version.max)
1617 #define SPECIAL(name) \
1618 NineTranslateInstruction_##name
1620 #define DECL_SPECIAL(name) \
1622 NineTranslateInstruction_##name( struct shader_translator *tx )
1625 NineTranslateInstruction_Generic(struct shader_translator
*);
1629 /* Nothing to do. NOP was used to avoid hangs
1630 * with very old d3d drivers. */
1636 struct ureg_program
*ureg
= tx
->ureg
;
1637 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1638 struct ureg_src src0
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1639 struct ureg_src src1
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1641 ureg_ADD(ureg
, dst
, src0
, ureg_negate(src1
));
1647 struct ureg_program
*ureg
= tx
->ureg
;
1648 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1649 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1651 ureg_MOV(ureg
, dst
, ureg_abs(src
));
1657 struct ureg_program
*ureg
= tx
->ureg
;
1658 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1659 struct ureg_src src0
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1660 struct ureg_src src1
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1662 ureg_MUL(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYZ
),
1663 ureg_swizzle(src0
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Z
,
1665 ureg_swizzle(src1
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_X
,
1666 TGSI_SWIZZLE_Y
, 0));
1667 ureg_MAD(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYZ
),
1668 ureg_swizzle(src0
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_X
,
1670 ureg_negate(ureg_swizzle(src1
, TGSI_SWIZZLE_Y
,
1671 TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_X
, 0)),
1673 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
),
1674 ureg_imm1f(ureg
, 1));
1680 return NineTranslateInstruction_Mkxn(tx
, 4, 4);
1685 return NineTranslateInstruction_Mkxn(tx
, 4, 3);
1690 return NineTranslateInstruction_Mkxn(tx
, 3, 4);
1695 return NineTranslateInstruction_Mkxn(tx
, 3, 3);
1700 return NineTranslateInstruction_Mkxn(tx
, 3, 2);
1705 ureg_CMP(tx
->ureg
, tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1706 tx_src_param(tx
, &tx
->insn
.src
[0]),
1707 tx_src_param(tx
, &tx
->insn
.src
[2]),
1708 tx_src_param(tx
, &tx
->insn
.src
[1]));
1714 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1715 struct ureg_dst cgt
;
1716 struct ureg_src cnd
;
1718 /* the coissue flag was a tip for compilers to advise to
1719 * execute two operations at the same time, in cases
1720 * the two executions had same dst with different channels.
1721 * It has no effect on current hw. However it seems CND
1722 * is affected. The handling of this very specific case
1723 * handled below mimick wine behaviour */
1724 if (tx
->insn
.coissue
&& tx
->version
.major
== 1 && tx
->version
.minor
< 4 && tx
->insn
.dst
[0].mask
!= NINED3DSP_WRITEMASK_3
) {
1726 dst
, tx_src_param(tx
, &tx
->insn
.src
[1]));
1730 cnd
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1731 cgt
= tx_scratch(tx
);
1733 if (tx
->version
.major
== 1 && tx
->version
.minor
< 4)
1734 cnd
= ureg_scalar(cnd
, TGSI_SWIZZLE_W
);
1736 ureg_SGT(tx
->ureg
, cgt
, cnd
, ureg_imm1f(tx
->ureg
, 0.5f
));
1738 ureg_CMP(tx
->ureg
, dst
, ureg_negate(ureg_src(cgt
)),
1739 tx_src_param(tx
, &tx
->insn
.src
[1]),
1740 tx_src_param(tx
, &tx
->insn
.src
[2]));
1746 assert(tx
->insn
.src
[0].idx
< tx
->num_inst_labels
);
1747 ureg_CAL(tx
->ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1751 DECL_SPECIAL(CALLNZ
)
1753 struct ureg_program
*ureg
= tx
->ureg
;
1754 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1756 if (!tx
->native_integers
)
1757 ureg_IF(ureg
, src
, tx_cond(tx
));
1759 ureg_UIF(ureg
, src
, tx_cond(tx
));
1760 ureg_CAL(ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1768 struct ureg_program
*ureg
= tx
->ureg
;
1770 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1771 struct ureg_dst ctr
;
1772 struct ureg_dst tmp
;
1773 struct ureg_src ctrx
;
1775 label
= tx_bgnloop(tx
);
1776 ctr
= tx_get_loopctr(tx
, TRUE
);
1777 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1779 /* src: num_iterations - start_value of al - step for al - 0 */
1780 ureg_MOV(ureg
, ctr
, src
);
1781 ureg_BGNLOOP(tx
->ureg
, label
);
1782 tmp
= tx_scratch_scalar(tx
);
1783 /* Initially ctr.x contains the number of iterations.
1784 * ctr.y will contain the updated value of al.
1785 * We decrease ctr.x at the end of every iteration,
1786 * and stop when it reaches 0. */
1788 if (!tx
->native_integers
) {
1789 /* case src and ctr contain floats */
1790 /* to avoid precision issue, we stop when ctr <= 0.5 */
1791 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1792 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1794 /* case src and ctr contain integers */
1795 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1796 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1806 /* RET as a last instruction could be safely ignored.
1807 * Remove it to prevent crashes/warnings in case underlying
1808 * driver doesn't implement arbitrary returns.
1810 if (*(tx
->parse_next
) != NINED3DSP_END
) {
1816 DECL_SPECIAL(ENDLOOP
)
1818 struct ureg_program
*ureg
= tx
->ureg
;
1819 struct ureg_dst ctr
= tx_get_loopctr(tx
, TRUE
);
1820 struct ureg_dst dst_ctrx
, dst_al
;
1821 struct ureg_src src_ctr
, al_counter
;
1823 dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1824 dst_al
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_1
);
1825 src_ctr
= ureg_src(ctr
);
1826 al_counter
= ureg_scalar(src_ctr
, TGSI_SWIZZLE_Z
);
1829 * ctr.y (aL) += step */
1830 if (!tx
->native_integers
) {
1831 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1832 ureg_ADD(ureg
, dst_al
, src_ctr
, al_counter
);
1834 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1835 ureg_UADD(ureg
, dst_al
, src_ctr
, al_counter
);
1837 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1843 unsigned k
= tx
->num_inst_labels
;
1844 unsigned n
= tx
->insn
.src
[0].idx
;
1847 tx
->inst_labels
= REALLOC(tx
->inst_labels
,
1848 k
* sizeof(tx
->inst_labels
[0]),
1849 n
* sizeof(tx
->inst_labels
[0]));
1851 tx
->inst_labels
[n
] = ureg_get_instruction_number(tx
->ureg
);
1855 DECL_SPECIAL(SINCOS
)
1857 struct ureg_program
*ureg
= tx
->ureg
;
1858 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1859 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1860 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
1862 assert(!(dst
.WriteMask
& 0xc));
1864 /* Copying to a temporary register avoids src/dst aliasing.
1865 * src is supposed to have replicated swizzle. */
1866 ureg_MOV(ureg
, tmp
, src
);
1868 /* z undefined, w untouched */
1869 ureg_COS(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
),
1870 tx_src_scalar(tmp
));
1871 ureg_SIN(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
),
1872 tx_src_scalar(tmp
));
1879 tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1880 tx_src_param(tx
, &tx
->insn
.src
[0]));
1886 struct ureg_program
*ureg
= tx
->ureg
;
1888 struct ureg_src rep
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1889 struct ureg_dst ctr
;
1890 struct ureg_dst tmp
;
1891 struct ureg_src ctrx
;
1893 label
= tx_bgnloop(tx
);
1894 ctr
= ureg_writemask(tx_get_loopctr(tx
, FALSE
), NINED3DSP_WRITEMASK_0
);
1895 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1897 /* NOTE: rep must be constant, so we don't have to save the count */
1898 assert(rep
.File
== TGSI_FILE_CONSTANT
|| rep
.File
== TGSI_FILE_IMMEDIATE
);
1900 /* rep: num_iterations - 0 - 0 - 0 */
1901 ureg_MOV(ureg
, ctr
, rep
);
1902 ureg_BGNLOOP(ureg
, label
);
1903 tmp
= tx_scratch_scalar(tx
);
1904 /* Initially ctr.x contains the number of iterations.
1905 * We decrease ctr.x at the end of every iteration,
1906 * and stop when it reaches 0. */
1908 if (!tx
->native_integers
) {
1909 /* case src and ctr contain floats */
1910 /* to avoid precision issue, we stop when ctr <= 0.5 */
1911 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1912 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1914 /* case src and ctr contain integers */
1915 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1916 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1925 DECL_SPECIAL(ENDREP
)
1927 struct ureg_program
*ureg
= tx
->ureg
;
1928 struct ureg_dst ctr
= tx_get_loopctr(tx
, FALSE
);
1929 struct ureg_dst dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1930 struct ureg_src src_ctr
= ureg_src(ctr
);
1933 if (!tx
->native_integers
)
1934 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1936 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1938 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1945 ureg_ENDIF(tx
->ureg
);
1951 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1953 if (tx
->native_integers
&& tx
->insn
.src
[0].file
== D3DSPR_CONSTBOOL
)
1954 ureg_UIF(tx
->ureg
, src
, tx_cond(tx
));
1956 ureg_IF(tx
->ureg
, src
, tx_cond(tx
));
1961 static inline unsigned
1962 sm1_insn_flags_to_tgsi_setop(BYTE flags
)
1965 case NINED3DSHADER_REL_OP_GT
: return TGSI_OPCODE_SGT
;
1966 case NINED3DSHADER_REL_OP_EQ
: return TGSI_OPCODE_SEQ
;
1967 case NINED3DSHADER_REL_OP_GE
: return TGSI_OPCODE_SGE
;
1968 case NINED3DSHADER_REL_OP_LT
: return TGSI_OPCODE_SLT
;
1969 case NINED3DSHADER_REL_OP_NE
: return TGSI_OPCODE_SNE
;
1970 case NINED3DSHADER_REL_OP_LE
: return TGSI_OPCODE_SLE
;
1972 assert(!"invalid comparison flags");
1973 return TGSI_OPCODE_SGT
;
1979 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1980 struct ureg_src src
[2];
1981 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1982 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1983 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1984 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2, 0);
1985 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1991 ureg_ELSE(tx
->ureg
, tx_elsecond(tx
));
1995 DECL_SPECIAL(BREAKC
)
1997 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1998 struct ureg_src src
[2];
1999 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
2000 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
2001 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
2002 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2, 0);
2003 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
2006 ureg_ENDIF(tx
->ureg
);
2010 static const char *sm1_declusage_names
[] =
2012 [D3DDECLUSAGE_POSITION
] = "POSITION",
2013 [D3DDECLUSAGE_BLENDWEIGHT
] = "BLENDWEIGHT",
2014 [D3DDECLUSAGE_BLENDINDICES
] = "BLENDINDICES",
2015 [D3DDECLUSAGE_NORMAL
] = "NORMAL",
2016 [D3DDECLUSAGE_PSIZE
] = "PSIZE",
2017 [D3DDECLUSAGE_TEXCOORD
] = "TEXCOORD",
2018 [D3DDECLUSAGE_TANGENT
] = "TANGENT",
2019 [D3DDECLUSAGE_BINORMAL
] = "BINORMAL",
2020 [D3DDECLUSAGE_TESSFACTOR
] = "TESSFACTOR",
2021 [D3DDECLUSAGE_POSITIONT
] = "POSITIONT",
2022 [D3DDECLUSAGE_COLOR
] = "COLOR",
2023 [D3DDECLUSAGE_FOG
] = "FOG",
2024 [D3DDECLUSAGE_DEPTH
] = "DEPTH",
2025 [D3DDECLUSAGE_SAMPLE
] = "SAMPLE"
2028 static inline unsigned
2029 sm1_to_nine_declusage(struct sm1_semantic
*dcl
)
2031 return nine_d3d9_to_nine_declusage(dcl
->usage
, dcl
->usage_idx
);
2035 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic
*sem
,
2037 struct sm1_semantic
*dcl
)
2039 BYTE index
= dcl
->usage_idx
;
2041 /* For everything that is not matching to a TGSI_SEMANTIC_****,
2042 * we match to a TGSI_SEMANTIC_GENERIC with index.
2044 * The index can be anything UINT16 and usage_idx is BYTE,
2045 * so we can fit everything. It doesn't matter if indices
2046 * are close together or low.
2049 * POSITION >= 1: 10 * index + 7
2050 * COLOR >= 2: 10 * (index-1) + 8
2052 * TEXCOORD[0..15]: index
2053 * BLENDWEIGHT: 10 * index + 19
2054 * BLENDINDICES: 10 * index + 20
2055 * NORMAL: 10 * index + 21
2056 * TANGENT: 10 * index + 22
2057 * BINORMAL: 10 * index + 23
2058 * TESSFACTOR: 10 * index + 24
2061 switch (dcl
->usage
) {
2062 case D3DDECLUSAGE_POSITION
:
2063 case D3DDECLUSAGE_POSITIONT
:
2064 case D3DDECLUSAGE_DEPTH
:
2066 sem
->Name
= TGSI_SEMANTIC_POSITION
;
2069 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2070 sem
->Index
= 10 * index
+ 7;
2073 case D3DDECLUSAGE_COLOR
:
2075 sem
->Name
= TGSI_SEMANTIC_COLOR
;
2078 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2079 sem
->Index
= 10 * (index
-1) + 8;
2082 case D3DDECLUSAGE_FOG
:
2084 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2087 case D3DDECLUSAGE_PSIZE
:
2089 sem
->Name
= TGSI_SEMANTIC_PSIZE
;
2092 case D3DDECLUSAGE_TEXCOORD
:
2094 if (index
< 8 && tc
)
2095 sem
->Name
= TGSI_SEMANTIC_TEXCOORD
;
2097 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2100 case D3DDECLUSAGE_BLENDWEIGHT
:
2101 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2102 sem
->Index
= 10 * index
+ 19;
2104 case D3DDECLUSAGE_BLENDINDICES
:
2105 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2106 sem
->Index
= 10 * index
+ 20;
2108 case D3DDECLUSAGE_NORMAL
:
2109 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2110 sem
->Index
= 10 * index
+ 21;
2112 case D3DDECLUSAGE_TANGENT
:
2113 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2114 sem
->Index
= 10 * index
+ 22;
2116 case D3DDECLUSAGE_BINORMAL
:
2117 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2118 sem
->Index
= 10 * index
+ 23;
2120 case D3DDECLUSAGE_TESSFACTOR
:
2121 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2122 sem
->Index
= 10 * index
+ 24;
2124 case D3DDECLUSAGE_SAMPLE
:
2125 sem
->Name
= TGSI_SEMANTIC_COUNT
;
2129 unreachable("Invalid DECLUSAGE.");
2134 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2135 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2136 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2137 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2138 static inline unsigned
2139 d3dstt_to_tgsi_tex(BYTE sampler_type
)
2141 switch (sampler_type
) {
2142 case NINED3DSTT_1D
: return TGSI_TEXTURE_1D
;
2143 case NINED3DSTT_2D
: return TGSI_TEXTURE_2D
;
2144 case NINED3DSTT_VOLUME
: return TGSI_TEXTURE_3D
;
2145 case NINED3DSTT_CUBE
: return TGSI_TEXTURE_CUBE
;
2148 return TGSI_TEXTURE_UNKNOWN
;
2151 static inline unsigned
2152 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type
)
2154 switch (sampler_type
) {
2155 case NINED3DSTT_1D
: return TGSI_TEXTURE_SHADOW1D
;
2156 case NINED3DSTT_2D
: return TGSI_TEXTURE_SHADOW2D
;
2157 case NINED3DSTT_VOLUME
:
2158 case NINED3DSTT_CUBE
:
2161 return TGSI_TEXTURE_UNKNOWN
;
2164 static inline unsigned
2165 ps1x_sampler_type(const struct nine_shader_info
*info
, unsigned stage
)
2167 boolean shadow
= !!(info
->sampler_mask_shadow
& (1 << stage
));
2168 switch ((info
->sampler_ps1xtypes
>> (stage
* 2)) & 0x3) {
2169 case 1: return shadow
? TGSI_TEXTURE_SHADOW1D
: TGSI_TEXTURE_1D
;
2170 case 0: return shadow
? TGSI_TEXTURE_SHADOW2D
: TGSI_TEXTURE_2D
;
2171 case 3: return TGSI_TEXTURE_3D
;
2173 return TGSI_TEXTURE_CUBE
;
2178 sm1_sampler_type_name(BYTE sampler_type
)
2180 switch (sampler_type
) {
2181 case NINED3DSTT_1D
: return "1D";
2182 case NINED3DSTT_2D
: return "2D";
2183 case NINED3DSTT_VOLUME
: return "VOLUME";
2184 case NINED3DSTT_CUBE
: return "CUBE";
2186 return "(D3DSTT_?)";
2190 static inline unsigned
2191 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic
*sem
)
2193 switch (sem
->Name
) {
2194 case TGSI_SEMANTIC_POSITION
:
2195 case TGSI_SEMANTIC_NORMAL
:
2196 return TGSI_INTERPOLATE_LINEAR
;
2197 case TGSI_SEMANTIC_BCOLOR
:
2198 case TGSI_SEMANTIC_COLOR
:
2199 return TGSI_INTERPOLATE_COLOR
;
2200 case TGSI_SEMANTIC_FOG
:
2201 case TGSI_SEMANTIC_GENERIC
:
2202 case TGSI_SEMANTIC_TEXCOORD
:
2203 case TGSI_SEMANTIC_CLIPDIST
:
2204 case TGSI_SEMANTIC_CLIPVERTEX
:
2205 return TGSI_INTERPOLATE_PERSPECTIVE
;
2206 case TGSI_SEMANTIC_EDGEFLAG
:
2207 case TGSI_SEMANTIC_FACE
:
2208 case TGSI_SEMANTIC_INSTANCEID
:
2209 case TGSI_SEMANTIC_PCOORD
:
2210 case TGSI_SEMANTIC_PRIMID
:
2211 case TGSI_SEMANTIC_PSIZE
:
2212 case TGSI_SEMANTIC_VERTEXID
:
2213 return TGSI_INTERPOLATE_CONSTANT
;
2216 return TGSI_INTERPOLATE_CONSTANT
;
2222 struct ureg_program
*ureg
= tx
->ureg
;
2225 struct tgsi_declaration_semantic tgsi
;
2226 struct sm1_semantic sem
;
2227 sm1_read_semantic(tx
, &sem
);
2229 is_input
= sem
.reg
.file
== D3DSPR_INPUT
;
2231 sem
.usage
== D3DDECLUSAGE_SAMPLE
|| sem
.reg
.file
== D3DSPR_SAMPLER
;
2234 sm1_dump_dst_param(&sem
.reg
);
2236 DUMP(" %s\n", sm1_sampler_type_name(sem
.sampler_type
));
2238 if (tx
->version
.major
>= 3)
2239 DUMP(" %s%i\n", sm1_declusage_names
[sem
.usage
], sem
.usage_idx
);
2241 if (sem
.usage
| sem
.usage_idx
)
2242 DUMP(" %u[%u]\n", sem
.usage
, sem
.usage_idx
);
2247 const unsigned m
= 1 << sem
.reg
.idx
;
2248 ureg_DECL_sampler(ureg
, sem
.reg
.idx
);
2249 tx
->info
->sampler_mask
|= m
;
2250 tx
->sampler_targets
[sem
.reg
.idx
] = (tx
->info
->sampler_mask_shadow
& m
) ?
2251 d3dstt_to_tgsi_tex_shadow(sem
.sampler_type
) :
2252 d3dstt_to_tgsi_tex(sem
.sampler_type
);
2256 sm1_declusage_to_tgsi(&tgsi
, tx
->want_texcoord
, &sem
);
2259 /* linkage outside of shader with vertex declaration */
2260 ureg_DECL_vs_input(ureg
, sem
.reg
.idx
);
2261 assert(sem
.reg
.idx
< ARRAY_SIZE(tx
->info
->input_map
));
2262 tx
->info
->input_map
[sem
.reg
.idx
] = sm1_to_nine_declusage(&sem
);
2263 tx
->info
->num_inputs
= MAX2(tx
->info
->num_inputs
, sem
.reg
.idx
+ 1);
2264 /* NOTE: preserving order in case of indirect access */
2266 if (tx
->version
.major
>= 3) {
2267 /* SM2 output semantic determined by file */
2268 assert(sem
.reg
.mask
!= 0);
2269 if (sem
.usage
== D3DDECLUSAGE_POSITIONT
)
2270 tx
->info
->position_t
= TRUE
;
2271 assert(sem
.reg
.idx
< ARRAY_SIZE(tx
->regs
.o
));
2272 assert(ureg_dst_is_undef(tx
->regs
.o
[sem
.reg
.idx
]) && "Nine doesn't support yet packing");
2273 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_output_masked(
2274 ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
, 0, 1);
2275 nine_record_outputs(tx
, sem
.usage
, sem
.usage_idx
, sem
.reg
.mask
, sem
.reg
.idx
);
2276 if (tx
->info
->process_vertices
&& sem
.usage
== D3DDECLUSAGE_POSITION
&& sem
.usage_idx
== 0) {
2277 tx
->regs
.oPos_out
= tx
->regs
.o
[sem
.reg
.idx
];
2278 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_temporary(ureg
);
2279 tx
->regs
.oPos
= tx
->regs
.o
[sem
.reg
.idx
];
2282 if (tgsi
.Name
== TGSI_SEMANTIC_PSIZE
) {
2283 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_temporary(ureg
);
2284 tx
->regs
.oPts
= tx
->regs
.o
[sem
.reg
.idx
];
2288 if (is_input
&& tx
->version
.major
>= 3) {
2289 unsigned interp_location
= 0;
2290 /* SM3 only, SM2 input semantic determined by file */
2291 assert(sem
.reg
.idx
< ARRAY_SIZE(tx
->regs
.v
));
2292 assert(ureg_src_is_undef(tx
->regs
.v
[sem
.reg
.idx
]) && "Nine doesn't support yet packing");
2293 /* PositionT and tessfactor forbidden */
2294 if (sem
.usage
== D3DDECLUSAGE_POSITIONT
|| sem
.usage
== D3DDECLUSAGE_TESSFACTOR
)
2295 return D3DERR_INVALIDCALL
;
2297 if (tgsi
.Name
== TGSI_SEMANTIC_POSITION
) {
2298 /* Position0 is forbidden (likely because vPos already does that) */
2299 if (sem
.usage
== D3DDECLUSAGE_POSITION
)
2300 return D3DERR_INVALIDCALL
;
2301 /* Following code is for depth */
2302 tx
->regs
.v
[sem
.reg
.idx
] = nine_get_position_input(tx
);
2306 if (sem
.reg
.mod
& NINED3DSPDM_CENTROID
||
2307 (tgsi
.Name
== TGSI_SEMANTIC_COLOR
&& tx
->info
->force_color_in_centroid
))
2308 interp_location
= TGSI_INTERPOLATE_LOC_CENTROID
;
2310 tx
->regs
.v
[sem
.reg
.idx
] = ureg_DECL_fs_input_cyl_centroid(
2311 ureg
, tgsi
.Name
, tgsi
.Index
,
2312 nine_tgsi_to_interp_mode(&tgsi
),
2314 interp_location
, 0, 1);
2316 if (!is_input
&& 0) { /* declare in COLOROUT/DEPTHOUT case */
2317 /* FragColor or FragDepth */
2318 assert(sem
.reg
.mask
!= 0);
2319 ureg_DECL_output_masked(ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
,
2328 tx_set_lconstf(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.f
);
2334 tx_set_lconstb(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.b
);
2340 tx_set_lconsti(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.i
);
2346 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2347 struct ureg_src src
[2] = {
2348 tx_src_param(tx
, &tx
->insn
.src
[0]),
2349 tx_src_param(tx
, &tx
->insn
.src
[1])
2351 ureg_POW(tx
->ureg
, dst
, ureg_abs(src
[0]), src
[1]);
2355 /* Tests results on Win 10:
2356 * NV (NVIDIA GeForce GT 635M)
2357 * AMD (AMD Radeon HD 7730M)
2358 * INTEL (Intel(R) HD Graphics 4000)
2360 * RCP and RSQ can generate inf on NV and AMD.
2361 * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2362 * NV: log not clamped
2363 * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2364 * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2365 * All devices have 0*anything = 0
2367 * INTEL VS2 and VS3: same behaviour.
2368 * Some differences VS2 and VS3 for constants defined with inf/NaN.
2369 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2370 * VS2 seems to clamp to zero (may be test failure).
2371 * AMD VS2: unknown, VS3: very likely behaviour of PS3
2372 * NV VS2 and VS3: very likely behaviour of PS3
2373 * For both, Inf in VS becomes NaN is PS
2374 * "Very likely" because the test was less extensive.
2376 * Thus all clamping can be removed for shaders 2 and 3,
2377 * as long as 0*anything = 0.
2378 * Else clamps to enforce 0*anything = 0 (anything being then
2379 * neither inf or NaN, the user being unlikely to pass them
2381 * The status for VS1 and PS1 is unknown.
2386 struct ureg_program
*ureg
= tx
->ureg
;
2387 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2388 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2389 struct ureg_dst tmp
= tx
->mul_zero_wins
? dst
: tx_scratch(tx
);
2390 ureg_RCP(ureg
, tmp
, src
);
2391 if (!tx
->mul_zero_wins
) {
2392 /* FLT_MAX has issues with Rayman */
2393 ureg_MIN(ureg
, tmp
, ureg_imm1f(ureg
, FLT_MAX
/2.f
), ureg_src(tmp
));
2394 ureg_MAX(ureg
, dst
, ureg_imm1f(ureg
, -FLT_MAX
/2.f
), ureg_src(tmp
));
2401 struct ureg_program
*ureg
= tx
->ureg
;
2402 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2403 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2404 struct ureg_dst tmp
= tx
->mul_zero_wins
? dst
: tx_scratch(tx
);
2405 ureg_RSQ(ureg
, tmp
, ureg_abs(src
));
2406 if (!tx
->mul_zero_wins
)
2407 ureg_MIN(ureg
, dst
, ureg_imm1f(ureg
, FLT_MAX
), ureg_src(tmp
));
2413 struct ureg_program
*ureg
= tx
->ureg
;
2414 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2415 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2416 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2417 ureg_LG2(ureg
, tmp
, ureg_abs(src
));
2418 if (tx
->mul_zero_wins
) {
2419 ureg_MOV(ureg
, dst
, tx_src_scalar(tmp
));
2421 ureg_MAX(ureg
, dst
, ureg_imm1f(ureg
, -FLT_MAX
), tx_src_scalar(tmp
));
2428 struct ureg_program
*ureg
= tx
->ureg
;
2429 struct ureg_dst tmp
= tx_scratch(tx
);
2430 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2431 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2432 ureg_LIT(ureg
, tmp
, src
);
2433 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2434 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2435 * it 0^0 if src.w=0, which value is driver dependent. */
2436 ureg_CMP(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
),
2437 ureg_negate(ureg_scalar(src
, TGSI_SWIZZLE_Y
)),
2438 ureg_src(tmp
), ureg_imm1f(ureg
, 0.0f
));
2439 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYW
), ureg_src(tmp
));
2445 struct ureg_program
*ureg
= tx
->ureg
;
2446 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2447 struct ureg_src nrm
= tx_src_scalar(tmp
);
2448 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2449 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2450 ureg_DP3(ureg
, tmp
, src
, src
);
2451 ureg_RSQ(ureg
, tmp
, nrm
);
2452 if (!tx
->mul_zero_wins
)
2453 ureg_MIN(ureg
, tmp
, ureg_imm1f(ureg
, FLT_MAX
), nrm
);
2454 ureg_MUL(ureg
, dst
, src
, nrm
);
2458 DECL_SPECIAL(DP2ADD
)
2460 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2461 struct ureg_src dp2
= tx_src_scalar(tmp
);
2462 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2463 struct ureg_src src
[3];
2465 for (i
= 0; i
< 3; ++i
)
2466 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
2467 assert_replicate_swizzle(&src
[2]);
2469 ureg_DP2(tx
->ureg
, tmp
, src
[0], src
[1]);
2470 ureg_ADD(tx
->ureg
, dst
, src
[2], dp2
);
2475 DECL_SPECIAL(TEXCOORD
)
2477 struct ureg_program
*ureg
= tx
->ureg
;
2478 const unsigned s
= tx
->insn
.dst
[0].idx
;
2479 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2481 tx_texcoord_alloc(tx
, s
);
2482 ureg_MOV(ureg
, ureg_writemask(ureg_saturate(dst
), TGSI_WRITEMASK_XYZ
), tx
->regs
.vT
[s
]);
2483 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(tx
->ureg
, 1.0f
));
2488 DECL_SPECIAL(TEXCOORD_ps14
)
2490 struct ureg_program
*ureg
= tx
->ureg
;
2491 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2492 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2494 assert(tx
->insn
.src
[0].file
== D3DSPR_TEXTURE
);
2496 ureg_MOV(ureg
, dst
, src
);
2501 DECL_SPECIAL(TEXKILL
)
2503 struct ureg_src reg
;
2505 if (tx
->version
.major
> 1 || tx
->version
.minor
> 3) {
2506 reg
= tx_dst_param_as_src(tx
, &tx
->insn
.dst
[0]);
2508 tx_texcoord_alloc(tx
, tx
->insn
.dst
[0].idx
);
2509 reg
= tx
->regs
.vT
[tx
->insn
.dst
[0].idx
];
2511 if (tx
->version
.major
< 2)
2512 reg
= ureg_swizzle(reg
, NINE_SWIZZLE4(X
,Y
,Z
,Z
));
2513 ureg_KILL_IF(tx
->ureg
, reg
);
2518 DECL_SPECIAL(TEXBEM
)
2520 struct ureg_program
*ureg
= tx
->ureg
;
2521 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2522 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2523 struct ureg_dst tmp
, tmp2
, texcoord
;
2524 struct ureg_src sample
, m00
, m01
, m10
, m11
, c8m
, c16m2
;
2525 struct ureg_src bumpenvlscale
, bumpenvloffset
;
2526 const int m
= tx
->insn
.dst
[0].idx
;
2528 assert(tx
->version
.major
== 1);
2530 sample
= ureg_DECL_sampler(ureg
, m
);
2531 tx
->info
->sampler_mask
|= 1 << m
;
2533 tx_texcoord_alloc(tx
, m
);
2535 tmp
= tx_scratch(tx
);
2536 tmp2
= tx_scratch(tx
);
2537 texcoord
= tx_scratch(tx
);
2545 c8m
= nine_float_constant_src(tx
, 8+m
);
2546 c16m2
= nine_float_constant_src(tx
, 8+8+m
/2);
2548 m00
= NINE_APPLY_SWIZZLE(c8m
, X
);
2549 m01
= NINE_APPLY_SWIZZLE(c8m
, Y
);
2550 m10
= NINE_APPLY_SWIZZLE(c8m
, Z
);
2551 m11
= NINE_APPLY_SWIZZLE(c8m
, W
);
2553 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2555 bumpenvlscale
= NINE_APPLY_SWIZZLE(c16m2
, X
);
2556 bumpenvloffset
= NINE_APPLY_SWIZZLE(c16m2
, Y
);
2558 bumpenvlscale
= NINE_APPLY_SWIZZLE(c16m2
, Z
);
2559 bumpenvloffset
= NINE_APPLY_SWIZZLE(c16m2
, W
);
2562 apply_ps1x_projection(tx
, texcoord
, tx
->regs
.vT
[m
], m
);
2564 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2565 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m00
,
2566 NINE_APPLY_SWIZZLE(src
, X
), ureg_src(texcoord
));
2567 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2568 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m10
,
2569 NINE_APPLY_SWIZZLE(src
, Y
),
2570 NINE_APPLY_SWIZZLE(ureg_src(tmp
), X
));
2572 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2573 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m01
,
2574 NINE_APPLY_SWIZZLE(src
, X
), ureg_src(texcoord
));
2575 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2576 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m11
,
2577 NINE_APPLY_SWIZZLE(src
, Y
),
2578 NINE_APPLY_SWIZZLE(ureg_src(tmp
), Y
));
2580 /* Now the texture coordinates are in tmp.xy */
2582 if (tx
->insn
.opcode
== D3DSIO_TEXBEM
) {
2583 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2584 } else if (tx
->insn
.opcode
== D3DSIO_TEXBEML
) {
2585 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2586 ureg_TEX(ureg
, tmp
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2587 ureg_MAD(ureg
, tmp2
, NINE_APPLY_SWIZZLE(src
, Z
),
2588 bumpenvlscale
, bumpenvloffset
);
2589 ureg_MUL(ureg
, dst
, ureg_src(tmp
), ureg_src(tmp2
));
2592 tx
->info
->bumpenvmat_needed
= 1;
2597 DECL_SPECIAL(TEXREG2AR
)
2599 struct ureg_program
*ureg
= tx
->ureg
;
2600 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2601 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2602 struct ureg_src sample
;
2603 const int m
= tx
->insn
.dst
[0].idx
;
2604 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2605 assert(m
>= 0 && m
> n
);
2607 sample
= ureg_DECL_sampler(ureg
, m
);
2608 tx
->info
->sampler_mask
|= 1 << m
;
2609 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(src
, NINE_SWIZZLE4(W
,X
,X
,X
)), sample
);
2614 DECL_SPECIAL(TEXREG2GB
)
2616 struct ureg_program
*ureg
= tx
->ureg
;
2617 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2618 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2619 struct ureg_src sample
;
2620 const int m
= tx
->insn
.dst
[0].idx
;
2621 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2622 assert(m
>= 0 && m
> n
);
2624 sample
= ureg_DECL_sampler(ureg
, m
);
2625 tx
->info
->sampler_mask
|= 1 << m
;
2626 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(src
, NINE_SWIZZLE4(Y
,Z
,Z
,Z
)), sample
);
2631 DECL_SPECIAL(TEXM3x2PAD
)
2633 return D3D_OK
; /* this is just padding */
2636 DECL_SPECIAL(TEXM3x2TEX
)
2638 struct ureg_program
*ureg
= tx
->ureg
;
2639 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2640 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2641 struct ureg_src sample
;
2642 const int m
= tx
->insn
.dst
[0].idx
- 1;
2643 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2644 assert(m
>= 0 && m
> n
);
2646 tx_texcoord_alloc(tx
, m
);
2647 tx_texcoord_alloc(tx
, m
+1);
2649 /* performs the matrix multiplication */
2650 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], src
);
2651 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], src
);
2653 sample
= ureg_DECL_sampler(ureg
, m
+ 1);
2654 tx
->info
->sampler_mask
|= 1 << (m
+ 1);
2655 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 1), ureg_src(dst
), sample
);
2660 DECL_SPECIAL(TEXM3x3PAD
)
2662 return D3D_OK
; /* this is just padding */
2665 DECL_SPECIAL(TEXM3x3SPEC
)
2667 struct ureg_program
*ureg
= tx
->ureg
;
2668 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2669 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2670 struct ureg_src E
= tx_src_param(tx
, &tx
->insn
.src
[1]);
2671 struct ureg_src sample
;
2672 struct ureg_dst tmp
;
2673 const int m
= tx
->insn
.dst
[0].idx
- 2;
2674 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2675 assert(m
>= 0 && m
> n
);
2677 tx_texcoord_alloc(tx
, m
);
2678 tx_texcoord_alloc(tx
, m
+1);
2679 tx_texcoord_alloc(tx
, m
+2);
2681 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], src
);
2682 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], src
);
2683 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], src
);
2685 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2686 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2687 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2689 /* At this step, dst = N = (u', w', z').
2690 * We want dst to be the texture sampled at (u'', w'', z''), with
2691 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2692 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2693 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2694 /* at this step tmp.x = 1/N.N */
2695 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), E
);
2696 /* at this step tmp.y = N.E */
2697 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2698 /* at this step tmp.x = N.E/N.N */
2699 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2700 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2701 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2702 ureg_ADD(ureg
, tmp
, ureg_src(tmp
), ureg_negate(E
));
2703 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2708 DECL_SPECIAL(TEXREG2RGB
)
2710 struct ureg_program
*ureg
= tx
->ureg
;
2711 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2712 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2713 struct ureg_src sample
;
2714 const int m
= tx
->insn
.dst
[0].idx
;
2715 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2716 assert(m
>= 0 && m
> n
);
2718 sample
= ureg_DECL_sampler(ureg
, m
);
2719 tx
->info
->sampler_mask
|= 1 << m
;
2720 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), src
, sample
);
2725 DECL_SPECIAL(TEXDP3TEX
)
2727 struct ureg_program
*ureg
= tx
->ureg
;
2728 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2729 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2730 struct ureg_dst tmp
;
2731 struct ureg_src sample
;
2732 const int m
= tx
->insn
.dst
[0].idx
;
2733 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2734 assert(m
>= 0 && m
> n
);
2736 tx_texcoord_alloc(tx
, m
);
2738 tmp
= tx_scratch(tx
);
2739 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], src
);
2740 ureg_MOV(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_YZ
), ureg_imm1f(ureg
, 0.0f
));
2742 sample
= ureg_DECL_sampler(ureg
, m
);
2743 tx
->info
->sampler_mask
|= 1 << m
;
2744 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2749 DECL_SPECIAL(TEXM3x2DEPTH
)
2751 struct ureg_program
*ureg
= tx
->ureg
;
2752 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2753 struct ureg_dst tmp
;
2754 const int m
= tx
->insn
.dst
[0].idx
- 1;
2755 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2756 assert(m
>= 0 && m
> n
);
2758 tx_texcoord_alloc(tx
, m
);
2759 tx_texcoord_alloc(tx
, m
+1);
2761 tmp
= tx_scratch(tx
);
2763 /* performs the matrix multiplication */
2764 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], src
);
2765 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], src
);
2767 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Z
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2768 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2769 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Z
));
2770 /* res = 'w' == 0 ? 1.0 : z/w */
2771 ureg_CMP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
))),
2772 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 1.0f
));
2773 /* replace the depth for depth testing with the result */
2774 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2775 TGSI_WRITEMASK_Z
, 0, 1);
2776 ureg_MOV(ureg
, tx
->regs
.oDepth
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2777 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2781 DECL_SPECIAL(TEXDP3
)
2783 struct ureg_program
*ureg
= tx
->ureg
;
2784 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2785 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2786 const int m
= tx
->insn
.dst
[0].idx
;
2787 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2788 assert(m
>= 0 && m
> n
);
2790 tx_texcoord_alloc(tx
, m
);
2792 ureg_DP3(ureg
, dst
, tx
->regs
.vT
[m
], src
);
2797 DECL_SPECIAL(TEXM3x3
)
2799 struct ureg_program
*ureg
= tx
->ureg
;
2800 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2801 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]); /* t[n] */
2802 struct ureg_src sample
;
2803 struct ureg_dst E
, tmp
;
2804 const int m
= tx
->insn
.dst
[0].idx
- 2;
2805 ASSERTED
const int n
= tx
->insn
.src
[0].idx
;
2806 assert(m
>= 0 && m
> n
);
2808 tx_texcoord_alloc(tx
, m
);
2809 tx_texcoord_alloc(tx
, m
+1);
2810 tx_texcoord_alloc(tx
, m
+2);
2812 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], src
);
2813 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], src
);
2814 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], src
);
2816 switch (tx
->insn
.opcode
) {
2817 case D3DSIO_TEXM3x3
:
2818 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
2820 case D3DSIO_TEXM3x3TEX
:
2821 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2822 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2823 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(dst
), sample
);
2825 case D3DSIO_TEXM3x3VSPEC
:
2826 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2827 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2829 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2830 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_X
), ureg_scalar(tx
->regs
.vT
[m
], TGSI_SWIZZLE_W
));
2831 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Y
), ureg_scalar(tx
->regs
.vT
[m
+1], TGSI_SWIZZLE_W
));
2832 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Z
), ureg_scalar(tx
->regs
.vT
[m
+2], TGSI_SWIZZLE_W
));
2833 /* At this step, dst = N = (u', w', z').
2834 * We want dst to be the texture sampled at (u'', w'', z''), with
2835 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2836 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2837 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2838 /* at this step tmp.x = 1/N.N */
2839 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), ureg_src(E
));
2840 /* at this step tmp.y = N.E */
2841 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2842 /* at this step tmp.x = N.E/N.N */
2843 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2844 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2845 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2846 ureg_ADD(ureg
, tmp
, ureg_src(tmp
), ureg_negate(ureg_src(E
)));
2847 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2850 return D3DERR_INVALIDCALL
;
2855 DECL_SPECIAL(TEXDEPTH
)
2857 struct ureg_program
*ureg
= tx
->ureg
;
2859 struct ureg_src r5r
, r5g
;
2861 assert(tx
->insn
.dst
[0].idx
== 5); /* instruction must get r5 here */
2863 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2864 * r5 won't be used afterward, thus we can use r5.ba */
2866 r5r
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_X
);
2867 r5g
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Y
);
2869 ureg_RCP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_Z
), r5g
);
2870 ureg_MUL(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), r5r
, ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Z
));
2872 ureg_CMP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(r5g
)),
2873 r5r
, ureg_imm1f(ureg
, 1.0f
));
2874 /* replace the depth for depth testing with the result */
2875 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2876 TGSI_WRITEMASK_Z
, 0, 1);
2877 ureg_MOV(ureg
, tx
->regs
.oDepth
, r5r
);
2884 struct ureg_program
*ureg
= tx
->ureg
;
2885 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2886 struct ureg_src src0
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2887 struct ureg_src src1
= tx_src_param(tx
, &tx
->insn
.src
[1]);
2888 struct ureg_src m00
, m01
, m10
, m11
, c8m
;
2889 const int m
= tx
->insn
.dst
[0].idx
;
2890 struct ureg_dst tmp
;
2898 c8m
= nine_float_constant_src(tx
, 8+m
);
2899 m00
= NINE_APPLY_SWIZZLE(c8m
, X
);
2900 m01
= NINE_APPLY_SWIZZLE(c8m
, Y
);
2901 m10
= NINE_APPLY_SWIZZLE(c8m
, Z
);
2902 m11
= NINE_APPLY_SWIZZLE(c8m
, W
);
2903 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2904 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m00
,
2905 NINE_APPLY_SWIZZLE(src1
, X
), NINE_APPLY_SWIZZLE(src0
, X
));
2906 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2907 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m10
,
2908 NINE_APPLY_SWIZZLE(src1
, Y
), NINE_APPLY_SWIZZLE(ureg_src(tmp
), X
));
2910 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2911 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m01
,
2912 NINE_APPLY_SWIZZLE(src1
, X
), src0
);
2913 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2914 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m11
,
2915 NINE_APPLY_SWIZZLE(src1
, Y
), NINE_APPLY_SWIZZLE(ureg_src(tmp
), Y
));
2916 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XY
), ureg_src(tmp
));
2918 tx
->info
->bumpenvmat_needed
= 1;
2925 struct ureg_program
*ureg
= tx
->ureg
;
2927 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2928 struct ureg_src src
[2] = {
2929 tx_src_param(tx
, &tx
->insn
.src
[0]),
2930 tx_src_param(tx
, &tx
->insn
.src
[1])
2932 assert(tx
->insn
.src
[1].idx
>= 0 &&
2933 tx
->insn
.src
[1].idx
< ARRAY_SIZE(tx
->sampler_targets
));
2934 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2936 switch (tx
->insn
.flags
) {
2938 ureg_TEX(ureg
, dst
, target
, src
[0], src
[1]);
2940 case NINED3DSI_TEXLD_PROJECT
:
2941 ureg_TXP(ureg
, dst
, target
, src
[0], src
[1]);
2943 case NINED3DSI_TEXLD_BIAS
:
2944 ureg_TXB(ureg
, dst
, target
, src
[0], src
[1]);
2948 return D3DERR_INVALIDCALL
;
2953 DECL_SPECIAL(TEXLD_14
)
2955 struct ureg_program
*ureg
= tx
->ureg
;
2956 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2957 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2958 const unsigned s
= tx
->insn
.dst
[0].idx
;
2959 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2961 tx
->info
->sampler_mask
|= 1 << s
;
2962 ureg_TEX(ureg
, dst
, t
, src
, ureg_DECL_sampler(ureg
, s
));
2969 struct ureg_program
*ureg
= tx
->ureg
;
2970 const unsigned s
= tx
->insn
.dst
[0].idx
;
2971 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2972 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2973 struct ureg_src src
[2];
2975 tx_texcoord_alloc(tx
, s
);
2977 src
[0] = tx
->regs
.vT
[s
];
2978 src
[1] = ureg_DECL_sampler(ureg
, s
);
2979 tx
->info
->sampler_mask
|= 1 << s
;
2981 TEX_with_ps1x_projection(tx
, dst
, t
, src
[0], src
[1], s
);
2986 DECL_SPECIAL(TEXLDD
)
2989 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2990 struct ureg_src src
[4] = {
2991 tx_src_param(tx
, &tx
->insn
.src
[0]),
2992 tx_src_param(tx
, &tx
->insn
.src
[1]),
2993 tx_src_param(tx
, &tx
->insn
.src
[2]),
2994 tx_src_param(tx
, &tx
->insn
.src
[3])
2996 assert(tx
->insn
.src
[1].idx
>= 0 &&
2997 tx
->insn
.src
[1].idx
< ARRAY_SIZE(tx
->sampler_targets
));
2998 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
3000 ureg_TXD(tx
->ureg
, dst
, target
, src
[0], src
[2], src
[3], src
[1]);
3004 DECL_SPECIAL(TEXLDL
)
3007 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
3008 struct ureg_src src
[2] = {
3009 tx_src_param(tx
, &tx
->insn
.src
[0]),
3010 tx_src_param(tx
, &tx
->insn
.src
[1])
3012 assert(tx
->insn
.src
[1].idx
>= 0 &&
3013 tx
->insn
.src
[1].idx
< ARRAY_SIZE(tx
->sampler_targets
));
3014 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
3016 ureg_TXL(tx
->ureg
, dst
, target
, src
[0], src
[1]);
3022 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
3023 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
3024 struct ureg_src src
[2] = {
3025 tx_src_param(tx
, &tx
->insn
.src
[0]),
3026 tx_src_param(tx
, &tx
->insn
.src
[1])
3028 ureg_insn(tx
->ureg
, cmp_op
, &dst
, 1, src
, 2, 0);
3032 DECL_SPECIAL(BREAKP
)
3034 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
3035 ureg_IF(tx
->ureg
, src
, tx_cond(tx
));
3038 ureg_ENDIF(tx
->ureg
);
3044 return D3D_OK
; /* we don't care about phase */
3047 DECL_SPECIAL(COMMENT
)
3049 return D3D_OK
; /* nothing to do */
3053 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3054 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3056 static const struct sm1_op_info inst_table
[] =
3058 _OPI(NOP
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP
)), /* 0 */
3059 _OPI(MOV
, MOV
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
3060 _OPI(ADD
, ADD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 2 */
3061 _OPI(SUB
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB
)), /* 3 */
3062 _OPI(MAD
, MAD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 4 */
3063 _OPI(MUL
, MUL
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 5 */
3064 _OPI(RCP
, RCP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP
)), /* 6 */
3065 _OPI(RSQ
, RSQ
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ
)), /* 7 */
3066 _OPI(DP3
, DP3
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 8 */
3067 _OPI(DP4
, DP4
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 9 */
3068 _OPI(MIN
, MIN
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 10 */
3069 _OPI(MAX
, MAX
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 11 */
3070 _OPI(SLT
, SLT
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 12 */
3071 _OPI(SGE
, SGE
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 13 */
3072 _OPI(EXP
, EX2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 14 */
3073 _OPI(LOG
, LG2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG
)), /* 15 */
3074 _OPI(LIT
, LIT
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT
)), /* 16 */
3075 _OPI(DST
, DST
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 17 */
3076 _OPI(LRP
, LRP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 18 */
3077 _OPI(FRC
, FRC
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 19 */
3079 _OPI(M4x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4
)),
3080 _OPI(M4x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3
)),
3081 _OPI(M3x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4
)),
3082 _OPI(M3x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3
)),
3083 _OPI(M3x2
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2
)),
3085 _OPI(CALL
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL
)),
3086 _OPI(CALLNZ
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ
)),
3087 _OPI(LOOP
, BGNLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP
)),
3088 _OPI(RET
, RET
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET
)),
3089 _OPI(ENDLOOP
, ENDLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP
)),
3090 _OPI(LABEL
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL
)),
3092 _OPI(DCL
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL
)),
3094 _OPI(POW
, POW
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW
)),
3095 _OPI(CRS
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD
)), /* XXX: .w */
3096 _OPI(SGN
, SSG
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN
)), /* ignore src1,2 */
3097 _OPI(ABS
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS
)),
3098 _OPI(NRM
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM
)), /* NRM doesn't fit */
3100 _OPI(SINCOS
, NOP
, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS
)),
3101 _OPI(SINCOS
, NOP
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS
)),
3103 /* More flow control */
3104 _OPI(REP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP
)),
3105 _OPI(ENDREP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP
)),
3106 _OPI(IF
, IF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF
)),
3107 _OPI(IFC
, IF
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC
)),
3108 _OPI(ELSE
, ELSE
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE
)),
3109 _OPI(ENDIF
, ENDIF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF
)),
3110 _OPI(BREAK
, BRK
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL
),
3111 _OPI(BREAKC
, NOP
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC
)),
3112 /* we don't write to the address register, but a normal register (copied
3113 * when needed to the address register), thus we don't use ARR */
3114 _OPI(MOVA
, MOV
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
3116 _OPI(DEFB
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB
)),
3117 _OPI(DEFI
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI
)),
3119 _OPI(TEXCOORD
, NOP
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD
)),
3120 _OPI(TEXCOORD
, MOV
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14
)),
3121 _OPI(TEXKILL
, KILL_IF
, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL
)),
3122 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX
)),
3123 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14
)),
3124 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD
)),
3125 _OPI(TEXBEM
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM
)),
3126 _OPI(TEXBEML
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM
)),
3127 _OPI(TEXREG2AR
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR
)),
3128 _OPI(TEXREG2GB
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB
)),
3129 _OPI(TEXM3x2PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD
)),
3130 _OPI(TEXM3x2TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX
)),
3131 _OPI(TEXM3x3PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD
)),
3132 _OPI(TEXM3x3TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
3133 _OPI(TEXM3x3SPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC
)),
3134 _OPI(TEXM3x3VSPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
3136 _OPI(EXPP
, EXP
, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL
),
3137 _OPI(EXPP
, EX2
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
3138 _OPI(LOGP
, LG2
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG
)),
3139 _OPI(CND
, NOP
, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND
)),
3141 _OPI(DEF
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF
)),
3143 /* More tex stuff */
3144 _OPI(TEXREG2RGB
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB
)),
3145 _OPI(TEXDP3TEX
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX
)),
3146 _OPI(TEXM3x2DEPTH
, TEX
, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH
)),
3147 _OPI(TEXDP3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3
)),
3148 _OPI(TEXM3x3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
3149 _OPI(TEXDEPTH
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH
)),
3152 _OPI(CMP
, CMP
, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP
)), /* reversed */
3153 _OPI(BEM
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM
)),
3154 _OPI(DP2ADD
, NOP
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD
)),
3155 _OPI(DSX
, DDX
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
3156 _OPI(DSY
, DDY
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
3157 _OPI(TEXLDD
, TXD
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD
)),
3158 _OPI(SETP
, NOP
, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP
)),
3159 _OPI(TEXLDL
, TXL
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL
)),
3160 _OPI(BREAKP
, BRK
, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP
))
3163 static const struct sm1_op_info inst_phase
=
3164 _OPI(PHASE
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE
));
3166 static const struct sm1_op_info inst_comment
=
3167 _OPI(COMMENT
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT
));
3170 create_op_info_map(struct shader_translator
*tx
)
3172 const unsigned version
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
3175 for (i
= 0; i
< ARRAY_SIZE(tx
->op_info_map
); ++i
)
3176 tx
->op_info_map
[i
] = -1;
3178 if (tx
->processor
== PIPE_SHADER_VERTEX
) {
3179 for (i
= 0; i
< ARRAY_SIZE(inst_table
); ++i
) {
3180 assert(inst_table
[i
].sio
< ARRAY_SIZE(tx
->op_info_map
));
3181 if (inst_table
[i
].vert_version
.min
<= version
&&
3182 inst_table
[i
].vert_version
.max
>= version
)
3183 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
3186 for (i
= 0; i
< ARRAY_SIZE(inst_table
); ++i
) {
3187 assert(inst_table
[i
].sio
< ARRAY_SIZE(tx
->op_info_map
));
3188 if (inst_table
[i
].frag_version
.min
<= version
&&
3189 inst_table
[i
].frag_version
.max
>= version
)
3190 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
3195 static inline HRESULT
3196 NineTranslateInstruction_Generic(struct shader_translator
*tx
)
3198 struct ureg_dst dst
[1];
3199 struct ureg_src src
[4];
3202 for (i
= 0; i
< tx
->insn
.ndst
&& i
< ARRAY_SIZE(dst
); ++i
)
3203 dst
[i
] = tx_dst_param(tx
, &tx
->insn
.dst
[i
]);
3204 for (i
= 0; i
< tx
->insn
.nsrc
&& i
< ARRAY_SIZE(src
); ++i
)
3205 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
3207 ureg_insn(tx
->ureg
, tx
->insn
.info
->opcode
,
3209 src
, tx
->insn
.nsrc
, 0);
3214 TOKEN_PEEK(struct shader_translator
*tx
)
3216 return *(tx
->parse
);
3220 TOKEN_NEXT(struct shader_translator
*tx
)
3222 return *(tx
->parse
)++;
3226 TOKEN_JUMP(struct shader_translator
*tx
)
3228 if (tx
->parse_next
&& tx
->parse
!= tx
->parse_next
) {
3229 WARN("parse(%p) != parse_next(%p) !\n", tx
->parse
, tx
->parse_next
);
3230 tx
->parse
= tx
->parse_next
;
3234 static inline boolean
3235 sm1_parse_eof(struct shader_translator
*tx
)
3237 return TOKEN_PEEK(tx
) == NINED3DSP_END
;
3241 sm1_read_version(struct shader_translator
*tx
)
3243 const DWORD tok
= TOKEN_NEXT(tx
);
3245 tx
->version
.major
= D3DSHADER_VERSION_MAJOR(tok
);
3246 tx
->version
.minor
= D3DSHADER_VERSION_MINOR(tok
);
3248 switch (tok
>> 16) {
3249 case NINED3D_SM1_VS
: tx
->processor
= PIPE_SHADER_VERTEX
; break;
3250 case NINED3D_SM1_PS
: tx
->processor
= PIPE_SHADER_FRAGMENT
; break;
3252 DBG("Invalid shader type: %x\n", tok
);
3258 /* This is just to check if we parsed the instruction properly. */
3260 sm1_parse_get_skip(struct shader_translator
*tx
)
3262 const DWORD tok
= TOKEN_PEEK(tx
);
3264 if (tx
->version
.major
>= 2) {
3265 tx
->parse_next
= tx
->parse
+ 1 /* this */ +
3266 ((tok
& D3DSI_INSTLENGTH_MASK
) >> D3DSI_INSTLENGTH_SHIFT
);
3268 tx
->parse_next
= NULL
; /* TODO: determine from param count */
3273 sm1_print_comment(const char *comment
, UINT size
)
3281 sm1_parse_comments(struct shader_translator
*tx
, BOOL print
)
3283 DWORD tok
= TOKEN_PEEK(tx
);
3285 while ((tok
& D3DSI_OPCODE_MASK
) == D3DSIO_COMMENT
)
3287 const char *comment
= "";
3288 UINT size
= (tok
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
3289 tx
->parse
+= size
+ 1;
3292 sm1_print_comment(comment
, size
);
3294 tok
= TOKEN_PEEK(tx
);
3299 sm1_parse_get_param(struct shader_translator
*tx
, DWORD
*reg
, DWORD
*rel
)
3301 *reg
= TOKEN_NEXT(tx
);
3303 if (*reg
& D3DSHADER_ADDRMODE_RELATIVE
)
3305 if (tx
->version
.major
< 2)
3307 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT2
) & D3DSP_REGTYPE_MASK2
) |
3308 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT
) & D3DSP_REGTYPE_MASK
) |
3311 *rel
= TOKEN_NEXT(tx
);
3316 sm1_parse_dst_param(struct sm1_dst_param
*dst
, DWORD tok
)
3320 (tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
|
3321 (tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
;
3322 dst
->type
= TGSI_RETURN_TYPE_FLOAT
;
3323 dst
->idx
= tok
& D3DSP_REGNUM_MASK
;
3325 dst
->mask
= (tok
& NINED3DSP_WRITEMASK_MASK
) >> NINED3DSP_WRITEMASK_SHIFT
;
3326 dst
->mod
= (tok
& D3DSP_DSTMOD_MASK
) >> D3DSP_DSTMOD_SHIFT
;
3327 shift
= (tok
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
3328 dst
->shift
= (shift
& 0x7) - (shift
& 0x8);
3332 sm1_parse_src_param(struct sm1_src_param
*src
, DWORD tok
)
3335 ((tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) |
3336 ((tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
);
3337 src
->type
= TGSI_RETURN_TYPE_FLOAT
;
3338 src
->idx
= tok
& D3DSP_REGNUM_MASK
;
3340 src
->swizzle
= (tok
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
3341 src
->mod
= (tok
& D3DSP_SRCMOD_MASK
) >> D3DSP_SRCMOD_SHIFT
;
3343 switch (src
->file
) {
3344 case D3DSPR_CONST2
: src
->file
= D3DSPR_CONST
; src
->idx
+= 2048; break;
3345 case D3DSPR_CONST3
: src
->file
= D3DSPR_CONST
; src
->idx
+= 4096; break;
3346 case D3DSPR_CONST4
: src
->file
= D3DSPR_CONST
; src
->idx
+= 6144; break;
3353 sm1_parse_immediate(struct shader_translator
*tx
,
3354 struct sm1_src_param
*imm
)
3356 imm
->file
= NINED3DSPR_IMMEDIATE
;
3359 imm
->swizzle
= NINED3DSP_NOSWIZZLE
;
3361 switch (tx
->insn
.opcode
) {
3363 imm
->type
= NINED3DSPTYPE_FLOAT4
;
3364 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
3368 imm
->type
= NINED3DSPTYPE_INT4
;
3369 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
3373 imm
->type
= NINED3DSPTYPE_BOOL
;
3374 memcpy(&imm
->imm
.d
[0], tx
->parse
, 1 * sizeof(DWORD
));
3384 sm1_read_dst_param(struct shader_translator
*tx
,
3385 struct sm1_dst_param
*dst
,
3386 struct sm1_src_param
*rel
)
3388 DWORD tok_dst
, tok_rel
= 0;
3390 sm1_parse_get_param(tx
, &tok_dst
, &tok_rel
);
3391 sm1_parse_dst_param(dst
, tok_dst
);
3392 if (tok_dst
& D3DSHADER_ADDRMODE_RELATIVE
) {
3393 sm1_parse_src_param(rel
, tok_rel
);
3399 sm1_read_src_param(struct shader_translator
*tx
,
3400 struct sm1_src_param
*src
,
3401 struct sm1_src_param
*rel
)
3403 DWORD tok_src
, tok_rel
= 0;
3405 sm1_parse_get_param(tx
, &tok_src
, &tok_rel
);
3406 sm1_parse_src_param(src
, tok_src
);
3407 if (tok_src
& D3DSHADER_ADDRMODE_RELATIVE
) {
3409 sm1_parse_src_param(rel
, tok_rel
);
3415 sm1_read_semantic(struct shader_translator
*tx
,
3416 struct sm1_semantic
*sem
)
3418 const DWORD tok_usg
= TOKEN_NEXT(tx
);
3419 const DWORD tok_dst
= TOKEN_NEXT(tx
);
3421 sem
->sampler_type
= (tok_usg
& D3DSP_TEXTURETYPE_MASK
) >> D3DSP_TEXTURETYPE_SHIFT
;
3422 sem
->usage
= (tok_usg
& D3DSP_DCL_USAGE_MASK
) >> D3DSP_DCL_USAGE_SHIFT
;
3423 sem
->usage_idx
= (tok_usg
& D3DSP_DCL_USAGEINDEX_MASK
) >> D3DSP_DCL_USAGEINDEX_SHIFT
;
3425 sm1_parse_dst_param(&sem
->reg
, tok_dst
);
3429 sm1_parse_instruction(struct shader_translator
*tx
)
3431 struct sm1_instruction
*insn
= &tx
->insn
;
3434 const struct sm1_op_info
*info
= NULL
;
3437 sm1_parse_comments(tx
, TRUE
);
3438 sm1_parse_get_skip(tx
);
3440 tok
= TOKEN_NEXT(tx
);
3442 insn
->opcode
= tok
& D3DSI_OPCODE_MASK
;
3443 insn
->flags
= (tok
& NINED3DSIO_OPCODE_FLAGS_MASK
) >> NINED3DSIO_OPCODE_FLAGS_SHIFT
;
3444 insn
->coissue
= !!(tok
& D3DSI_COISSUE
);
3445 insn
->predicated
= !!(tok
& NINED3DSHADER_INST_PREDICATED
);
3447 if (insn
->opcode
< ARRAY_SIZE(tx
->op_info_map
)) {
3448 int k
= tx
->op_info_map
[insn
->opcode
];
3450 assert(k
< ARRAY_SIZE(inst_table
));
3451 info
= &inst_table
[k
];
3454 if (insn
->opcode
== D3DSIO_PHASE
) info
= &inst_phase
;
3455 if (insn
->opcode
== D3DSIO_COMMENT
) info
= &inst_comment
;
3458 DBG("illegal or unhandled opcode: %08x\n", insn
->opcode
);
3463 insn
->ndst
= info
->ndst
;
3464 insn
->nsrc
= info
->nsrc
;
3468 unsigned min
= IS_VS
? info
->vert_version
.min
: info
->frag_version
.min
;
3469 unsigned max
= IS_VS
? info
->vert_version
.max
: info
->frag_version
.max
;
3470 unsigned ver
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
3471 if (ver
< min
|| ver
> max
) {
3472 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3478 for (i
= 0; i
< insn
->ndst
; ++i
)
3479 sm1_read_dst_param(tx
, &insn
->dst
[i
], &insn
->dst_rel
[i
]);
3480 if (insn
->predicated
)
3481 sm1_read_src_param(tx
, &insn
->pred
, NULL
);
3482 for (i
= 0; i
< insn
->nsrc
; ++i
)
3483 sm1_read_src_param(tx
, &insn
->src
[i
], &insn
->src_rel
[i
]);
3485 /* parse here so we can dump them before processing */
3486 if (insn
->opcode
== D3DSIO_DEF
||
3487 insn
->opcode
== D3DSIO_DEFI
||
3488 insn
->opcode
== D3DSIO_DEFB
)
3489 sm1_parse_immediate(tx
, &tx
->insn
.src
[0]);
3491 sm1_dump_instruction(insn
, tx
->cond_depth
+ tx
->loop_depth
);
3492 sm1_instruction_check(insn
);
3494 if (insn
->predicated
) {
3495 tx
->predicated_activated
= true;
3496 if (ureg_dst_is_undef(tx
->regs
.predicate_tmp
)) {
3497 tx
->regs
.predicate_tmp
= ureg_DECL_temporary(tx
->ureg
);
3498 tx
->regs
.predicate_dst
= ureg_DECL_temporary(tx
->ureg
);
3503 hr
= info
->handler(tx
);
3505 hr
= NineTranslateInstruction_Generic(tx
);
3506 tx_apply_dst0_modifiers(tx
);
3508 if (insn
->predicated
) {
3509 tx
->predicated_activated
= false;
3510 /* TODO: predicate might be allowed on outputs,
3511 * which cannot be src. Workaround it. */
3512 ureg_CMP(tx
->ureg
, tx
->regs
.predicate_dst
,
3513 ureg_negate(tx_src_param(tx
, &insn
->pred
)),
3514 ureg_src(tx
->regs
.predicate_tmp
),
3515 ureg_src(tx
->regs
.predicate_dst
));
3520 tx
->num_scratch
= 0; /* reset */
3525 #define GET_CAP(n) screen->get_param( \
3526 screen, PIPE_CAP_##n)
3527 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3528 screen, info->type, PIPE_SHADER_CAP_##n)
3531 tx_ctor(struct shader_translator
*tx
, struct pipe_screen
*screen
, struct nine_shader_info
*info
)
3535 memset(tx
, 0, sizeof(*tx
));
3539 tx
->byte_code
= info
->byte_code
;
3540 tx
->parse
= info
->byte_code
;
3542 for (i
= 0; i
< ARRAY_SIZE(info
->input_map
); ++i
)
3543 info
->input_map
[i
] = NINE_DECLUSAGE_NONE
;
3544 info
->num_inputs
= 0;
3546 info
->position_t
= FALSE
;
3547 info
->point_size
= FALSE
;
3549 memset(tx
->slots_used
, 0, sizeof(tx
->slots_used
));
3550 memset(info
->int_slots_used
, 0, sizeof(info
->int_slots_used
));
3551 memset(info
->bool_slots_used
, 0, sizeof(info
->bool_slots_used
));
3553 tx
->info
->const_float_slots
= 0;
3554 tx
->info
->const_int_slots
= 0;
3555 tx
->info
->const_bool_slots
= 0;
3557 info
->sampler_mask
= 0x0;
3558 info
->rt_mask
= 0x0;
3560 info
->lconstf
.data
= NULL
;
3561 info
->lconstf
.ranges
= NULL
;
3563 info
->bumpenvmat_needed
= 0;
3565 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.rL
); ++i
) {
3566 tx
->regs
.rL
[i
] = ureg_dst_undef();
3568 tx
->regs
.address
= ureg_dst_undef();
3569 tx
->regs
.a0
= ureg_dst_undef();
3570 tx
->regs
.p
= ureg_dst_undef();
3571 tx
->regs
.oDepth
= ureg_dst_undef();
3572 tx
->regs
.vPos
= ureg_src_undef();
3573 tx
->regs
.vFace
= ureg_src_undef();
3574 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.o
); ++i
)
3575 tx
->regs
.o
[i
] = ureg_dst_undef();
3576 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.oCol
); ++i
)
3577 tx
->regs
.oCol
[i
] = ureg_dst_undef();
3578 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.vC
); ++i
)
3579 tx
->regs
.vC
[i
] = ureg_src_undef();
3580 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.vT
); ++i
)
3581 tx
->regs
.vT
[i
] = ureg_src_undef();
3583 sm1_read_version(tx
);
3585 info
->version
= (tx
->version
.major
<< 4) | tx
->version
.minor
;
3587 tx
->num_outputs
= 0;
3589 create_op_info_map(tx
);
3591 tx
->ureg
= ureg_create(info
->type
);
3593 return E_OUTOFMEMORY
;
3596 tx
->native_integers
= GET_SHADER_CAP(INTEGERS
);
3597 tx
->inline_subroutines
= !GET_SHADER_CAP(SUBROUTINES
);
3598 tx
->want_texcoord
= GET_CAP(TGSI_TEXCOORD
);
3599 tx
->shift_wpos
= !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3600 tx
->texcoord_sn
= tx
->want_texcoord
?
3601 TGSI_SEMANTIC_TEXCOORD
: TGSI_SEMANTIC_GENERIC
;
3602 tx
->wpos_is_sysval
= GET_CAP(TGSI_FS_POSITION_IS_SYSVAL
);
3603 tx
->face_is_sysval_integer
= GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL
);
3606 tx
->num_constf_allowed
= NINE_MAX_CONST_F
;
3607 } else if (tx
->version
.major
< 2) {/* IS_PS v1 */
3608 tx
->num_constf_allowed
= 8;
3609 } else if (tx
->version
.major
== 2) {/* IS_PS v2 */
3610 tx
->num_constf_allowed
= 32;
3611 } else {/* IS_PS v3 */
3612 tx
->num_constf_allowed
= NINE_MAX_CONST_F_PS3
;
3615 if (tx
->version
.major
< 2) {
3616 tx
->num_consti_allowed
= 0;
3617 tx
->num_constb_allowed
= 0;
3619 tx
->num_consti_allowed
= NINE_MAX_CONST_I
;
3620 tx
->num_constb_allowed
= NINE_MAX_CONST_B
;
3623 if (info
->swvp_on
&& tx
->version
.major
>= 2) {
3624 tx
->num_constf_allowed
= 8192;
3625 tx
->num_consti_allowed
= 2048;
3626 tx
->num_constb_allowed
= 2048;
3629 /* VS must always write position. Declare it here to make it the 1st output.
3630 * (Some drivers like nv50 are buggy and rely on that.)
3633 tx
->regs
.oPos
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
3635 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_ORIGIN
, TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
3636 if (!tx
->shift_wpos
)
3637 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
, TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3640 tx
->mul_zero_wins
= GET_CAP(TGSI_MUL_ZERO_WINS
);
3641 if (tx
->mul_zero_wins
)
3642 ureg_property(tx
->ureg
, TGSI_PROPERTY_MUL_ZERO_WINS
, 1);
3644 /* Add additional definition of constants */
3645 if (info
->add_constants_defs
.c_combination
) {
3648 assert(info
->add_constants_defs
.int_const_added
);
3649 assert(info
->add_constants_defs
.bool_const_added
);
3650 /* We only add constants that are used by the shader
3651 * and that are not defined in the shader */
3652 for (i
= 0; i
< NINE_MAX_CONST_I
; ++i
) {
3653 if ((*info
->add_constants_defs
.int_const_added
)[i
]) {
3654 DBG("Defining const i%i : { %i %i %i %i }\n", i
,
3655 info
->add_constants_defs
.c_combination
->const_i
[i
][0],
3656 info
->add_constants_defs
.c_combination
->const_i
[i
][1],
3657 info
->add_constants_defs
.c_combination
->const_i
[i
][2],
3658 info
->add_constants_defs
.c_combination
->const_i
[i
][3]);
3659 tx_set_lconsti(tx
, i
, info
->add_constants_defs
.c_combination
->const_i
[i
]);
3662 for (i
= 0; i
< NINE_MAX_CONST_B
; ++i
) {
3663 if ((*info
->add_constants_defs
.bool_const_added
)[i
]) {
3664 DBG("Defining const b%i : %i\n", i
, (int)(info
->add_constants_defs
.c_combination
->const_b
[i
] != 0));
3665 tx_set_lconstb(tx
, i
, info
->add_constants_defs
.c_combination
->const_b
[i
]);
3673 tx_dtor(struct shader_translator
*tx
)
3677 if (tx
->num_inst_labels
)
3678 FREE(tx
->inst_labels
);
3684 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3685 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3687 shader_add_vs_viewport_transform(struct shader_translator
*tx
)
3689 struct ureg_program
*ureg
= tx
->ureg
;
3690 struct ureg_src c0
= ureg_src_register(TGSI_FILE_CONSTANT
, 0);
3691 struct ureg_src c1
= ureg_src_register(TGSI_FILE_CONSTANT
, 1);
3692 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3694 c0
= ureg_src_dimension(c0
, 4);
3695 c1
= ureg_src_dimension(c1
, 4);
3696 /* TODO: find out when we need to apply the viewport transformation or not.
3697 * Likely will be XYZ vs XYZRHW in vdecl_out
3698 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3699 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3701 ureg_MOV(ureg
, ureg_writemask(tx
->regs
.oPos_out
, TGSI_WRITEMASK_XYZ
), ureg_src(tx
->regs
.oPos
));
3705 shader_add_ps_fog_stage(struct shader_translator
*tx
, struct ureg_src src_col
)
3707 struct ureg_program
*ureg
= tx
->ureg
;
3708 struct ureg_dst oCol0
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
3709 struct ureg_src fog_end
, fog_coeff
, fog_density
, fog_params
;
3710 struct ureg_src fog_vs
, fog_color
;
3711 struct ureg_dst fog_factor
, depth
;
3713 if (!tx
->info
->fog_enable
) {
3714 ureg_MOV(ureg
, oCol0
, src_col
);
3718 if (tx
->info
->fog_mode
!= D3DFOG_NONE
) {
3719 depth
= tx_scratch_scalar(tx
);
3720 /* Depth used for fog is perspective interpolated */
3721 ureg_RCP(ureg
, depth
, ureg_scalar(nine_get_position_input(tx
), TGSI_SWIZZLE_W
));
3722 ureg_MUL(ureg
, depth
, ureg_src(depth
), ureg_scalar(nine_get_position_input(tx
), TGSI_SWIZZLE_Z
));
3725 fog_color
= nine_float_constant_src(tx
, 32);
3726 fog_params
= nine_float_constant_src(tx
, 33);
3727 fog_factor
= tx_scratch_scalar(tx
);
3729 if (tx
->info
->fog_mode
== D3DFOG_LINEAR
) {
3730 fog_end
= NINE_APPLY_SWIZZLE(fog_params
, X
);
3731 fog_coeff
= NINE_APPLY_SWIZZLE(fog_params
, Y
);
3732 ureg_ADD(ureg
, fog_factor
, fog_end
, ureg_negate(ureg_src(depth
)));
3733 ureg_MUL(ureg
, ureg_saturate(fog_factor
), tx_src_scalar(fog_factor
), fog_coeff
);
3734 } else if (tx
->info
->fog_mode
== D3DFOG_EXP
) {
3735 fog_density
= NINE_APPLY_SWIZZLE(fog_params
, X
);
3736 ureg_MUL(ureg
, fog_factor
, ureg_src(depth
), fog_density
);
3737 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), ureg_imm1f(ureg
, -1.442695f
));
3738 ureg_EX2(ureg
, fog_factor
, tx_src_scalar(fog_factor
));
3739 } else if (tx
->info
->fog_mode
== D3DFOG_EXP2
) {
3740 fog_density
= NINE_APPLY_SWIZZLE(fog_params
, X
);
3741 ureg_MUL(ureg
, fog_factor
, ureg_src(depth
), fog_density
);
3742 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), tx_src_scalar(fog_factor
));
3743 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), ureg_imm1f(ureg
, -1.442695f
));
3744 ureg_EX2(ureg
, fog_factor
, tx_src_scalar(fog_factor
));
3746 fog_vs
= ureg_scalar(ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_GENERIC
, 16,
3747 TGSI_INTERPOLATE_PERSPECTIVE
),
3749 ureg_MOV(ureg
, fog_factor
, fog_vs
);
3752 ureg_LRP(ureg
, ureg_writemask(oCol0
, TGSI_WRITEMASK_XYZ
),
3753 tx_src_scalar(fog_factor
), src_col
, fog_color
);
3754 ureg_MOV(ureg
, ureg_writemask(oCol0
, TGSI_WRITEMASK_W
), src_col
);
3757 static void parse_shader(struct shader_translator
*tx
)
3759 struct nine_shader_info
*info
= tx
->info
;
3761 while (!sm1_parse_eof(tx
) && !tx
->failure
)
3762 sm1_parse_instruction(tx
);
3763 tx
->parse
++; /* for byte_size */
3768 if (IS_PS
&& tx
->version
.major
< 3) {
3769 if (tx
->version
.major
< 2) {
3770 assert(tx
->num_temp
); /* there must be color output */
3771 info
->rt_mask
|= 0x1;
3772 shader_add_ps_fog_stage(tx
, ureg_src(tx
->regs
.r
[0]));
3774 shader_add_ps_fog_stage(tx
, ureg_src(tx
->regs
.oCol
[0]));
3778 if (IS_VS
&& tx
->version
.major
< 3 && ureg_dst_is_undef(tx
->regs
.oFog
) && info
->fog_enable
) {
3779 tx
->regs
.oFog
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_GENERIC
, 16);
3780 ureg_MOV(tx
->ureg
, ureg_writemask(tx
->regs
.oFog
, TGSI_WRITEMASK_X
), ureg_imm1f(tx
->ureg
, 0.0f
));
3783 if (info
->position_t
)
3784 ureg_property(tx
->ureg
, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
, TRUE
);
3786 if (IS_VS
&& !ureg_dst_is_undef(tx
->regs
.oPts
)) {
3787 struct ureg_dst oPts
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_PSIZE
, 0);
3788 ureg_MAX(tx
->ureg
, tx
->regs
.oPts
, ureg_src(tx
->regs
.oPts
), ureg_imm1f(tx
->ureg
, info
->point_size_min
));
3789 ureg_MIN(tx
->ureg
, oPts
, ureg_src(tx
->regs
.oPts
), ureg_imm1f(tx
->ureg
, info
->point_size_max
));
3790 info
->point_size
= TRUE
;
3793 if (info
->process_vertices
)
3794 shader_add_vs_viewport_transform(tx
);
3799 #define NINE_SHADER_DEBUG_OPTION_NIR_VS (1 << 0)
3800 #define NINE_SHADER_DEBUG_OPTION_NIR_PS (1 << 1)
3801 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
3802 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
3803 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
3804 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
3806 static const struct debug_named_value nine_shader_debug_options
[] = {
3807 { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS
, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
3808 { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS
, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
3809 { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS
, "Never use NIR for vertex shaders even if the driver prefers it." },
3810 { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS
, "Never use NIR for pixel shaders even if the driver prefers it." },
3811 { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR
, "Print translated NIR shaders." },
3812 { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI
, "Print TGSI shaders." },
3813 DEBUG_NAMED_VALUE_END
/* must be last */
3816 static inline boolean
3817 nine_shader_get_debug_flag(uint64_t flag
)
3819 static uint64_t flags
= 0;
3820 static boolean first_run
= TRUE
;
3822 if (unlikely(first_run
)) {
3824 flags
= debug_get_flags_option("NINE_SHADER", nine_shader_debug_options
, 0);
3826 // Check old TGSI dump envvar too
3827 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE
)) {
3828 flags
|= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI
;
3832 return !!(flags
& flag
);
3836 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state
*state
, const struct tgsi_token
*tgsi_tokens
,
3837 struct pipe_screen
*screen
)
3839 struct nir_shader
*nir
= tgsi_to_nir(tgsi_tokens
, screen
, true);
3841 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR
))) {
3842 nir_print_shader(nir
, stdout
);
3845 state
->type
= PIPE_SHADER_IR_NIR
;
3846 state
->tokens
= NULL
;
3847 state
->ir
.nir
= nir
;
3848 memset(&state
->stream_output
, 0, sizeof(state
->stream_output
));
3852 nine_ureg_create_shader(struct ureg_program
*ureg
,
3853 struct pipe_context
*pipe
,
3854 const struct pipe_stream_output_info
*so
)
3856 struct pipe_shader_state state
;
3857 const struct tgsi_token
*tgsi_tokens
;
3858 struct pipe_screen
*screen
= pipe
->screen
;
3860 tgsi_tokens
= ureg_finalize(ureg
);
3864 assert(((struct tgsi_header
*) &tgsi_tokens
[0])->HeaderSize
>= 2);
3865 enum pipe_shader_type shader_type
= ((struct tgsi_processor
*) &tgsi_tokens
[1])->Processor
;
3867 int preferred_ir
= screen
->get_shader_param(screen
, shader_type
, PIPE_SHADER_CAP_PREFERRED_IR
);
3868 bool prefer_nir
= (preferred_ir
== PIPE_SHADER_IR_NIR
);
3869 bool use_nir
= prefer_nir
||
3870 ((shader_type
== PIPE_SHADER_VERTEX
) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS
)) ||
3871 ((shader_type
== PIPE_SHADER_FRAGMENT
) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS
));
3873 /* Allow user to override preferred IR, this is very useful for debugging */
3874 if (unlikely(shader_type
== PIPE_SHADER_VERTEX
&& nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS
)))
3876 if (unlikely(shader_type
== PIPE_SHADER_FRAGMENT
&& nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS
)))
3879 DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
3880 shader_type
== PIPE_SHADER_VERTEX
? "VS" : "PS",
3881 prefer_nir
? "NIR" : "TGSI",
3882 use_nir
? "NIR" : "TGSI");
3885 nine_pipe_nir_shader_state_from_tgsi(&state
, tgsi_tokens
, screen
);
3887 pipe_shader_state_from_tgsi(&state
, tgsi_tokens
);
3890 assert(state
.tokens
|| state
.ir
.nir
);
3893 state
.stream_output
= *so
;
3895 switch (shader_type
) {
3896 case PIPE_SHADER_VERTEX
:
3897 return pipe
->create_vs_state(pipe
, &state
);
3898 case PIPE_SHADER_FRAGMENT
:
3899 return pipe
->create_fs_state(pipe
, &state
);
3901 unreachable("unsupported shader type");
3907 nine_create_shader_with_so_and_destroy(struct ureg_program
*p
,
3908 struct pipe_context
*pipe
,
3909 const struct pipe_stream_output_info
*so
)
3911 void *result
= nine_ureg_create_shader(p
, pipe
, so
);
3917 nine_translate_shader(struct NineDevice9
*device
, struct nine_shader_info
*info
, struct pipe_context
*pipe
)
3919 struct shader_translator
*tx
;
3920 HRESULT hr
= D3D_OK
;
3921 const unsigned processor
= info
->type
;
3922 struct pipe_screen
*screen
= info
->process_vertices
? device
->screen_sw
: device
->screen
;
3923 unsigned *const_ranges
= NULL
;
3925 user_assert(processor
!= ~0, D3DERR_INVALIDCALL
);
3927 tx
= MALLOC_STRUCT(shader_translator
);
3929 return E_OUTOFMEMORY
;
3931 if (tx_ctor(tx
, screen
, info
) == E_OUTOFMEMORY
) {
3936 assert(IS_VS
|| !info
->swvp_on
);
3938 if (((tx
->version
.major
<< 16) | tx
->version
.minor
) > 0x00030000) {
3939 hr
= D3DERR_INVALIDCALL
;
3940 DBG("Unsupported shader version: %u.%u !\n",
3941 tx
->version
.major
, tx
->version
.minor
);
3944 if (tx
->processor
!= processor
) {
3945 hr
= D3DERR_INVALIDCALL
;
3946 DBG("Shader type mismatch: %u / %u !\n", tx
->processor
, processor
);
3949 DUMP("%s%u.%u\n", processor
== PIPE_SHADER_VERTEX
? "VS" : "PS",
3950 tx
->version
.major
, tx
->version
.minor
);
3955 /* For VS shaders, we print the warning later,
3956 * we first try with swvp. */
3958 ERR("Encountered buggy shader\n");
3959 ureg_destroy(tx
->ureg
);
3960 hr
= D3DERR_INVALIDCALL
;
3964 /* Recompile after compacting constant slots if possible */
3965 if (!tx
->indirect_const_access
&& !info
->swvp_on
&& tx
->num_slots
> 0) {
3968 int i
, j
, num_ranges
, prev
;
3970 DBG("Recompiling shader for constant compaction\n");
3971 ureg_destroy(tx
->ureg
);
3973 if (tx
->num_inst_labels
)
3974 FREE(tx
->inst_labels
);
3980 for (i
= 0; i
< NINE_MAX_CONST_ALL
; i
++) {
3981 if (tx
->slots_used
[i
]) {
3987 slot_map
= MALLOC(NINE_MAX_CONST_ALL
* sizeof(unsigned));
3988 const_ranges
= CALLOC(num_ranges
+ 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
3989 if (!slot_map
|| !const_ranges
) {
3996 for (i
= 0; i
< NINE_MAX_CONST_ALL
; i
++) {
3997 if (tx
->slots_used
[i
]) {
4000 /* Initialize first slot of the range */
4001 if (!const_ranges
[2*j
+1])
4002 const_ranges
[2*j
] = i
;
4003 const_ranges
[2*j
+1]++;
4009 if (tx_ctor(tx
, screen
, info
) == E_OUTOFMEMORY
) {
4013 tx
->slot_map
= slot_map
;
4015 assert(!tx
->failure
);
4016 #if !defined(NDEBUG)
4019 while (const_ranges
[i
*2+1] != 0) {
4020 j
+= const_ranges
[i
*2+1];
4023 assert(j
== tx
->num_slots
);
4027 /* record local constants */
4028 if (tx
->num_lconstf
&& tx
->indirect_const_access
) {
4029 struct nine_range
*ranges
;
4036 data
= MALLOC(tx
->num_lconstf
* 4 * sizeof(float));
4039 info
->lconstf
.data
= data
;
4041 indices
= MALLOC(tx
->num_lconstf
* sizeof(indices
[0]));
4045 /* lazy sort, num_lconstf should be small */
4046 for (n
= 0; n
< tx
->num_lconstf
; ++n
) {
4047 for (k
= 0, i
= 0; i
< tx
->num_lconstf
; ++i
) {
4048 if (tx
->lconstf
[i
].idx
< tx
->lconstf
[k
].idx
)
4051 indices
[n
] = tx
->lconstf
[k
].idx
;
4052 memcpy(&data
[n
* 4], &tx
->lconstf
[k
].f
[0], 4 * sizeof(float));
4053 tx
->lconstf
[k
].idx
= INT_MAX
;
4057 for (n
= 1, i
= 1; i
< tx
->num_lconstf
; ++i
)
4058 if (indices
[i
] != indices
[i
- 1] + 1)
4060 ranges
= MALLOC(n
* sizeof(ranges
[0]));
4065 info
->lconstf
.ranges
= ranges
;
4068 ranges
[k
].bgn
= indices
[0];
4069 for (i
= 1; i
< tx
->num_lconstf
; ++i
) {
4070 if (indices
[i
] != indices
[i
- 1] + 1) {
4071 ranges
[k
].next
= &ranges
[k
+ 1];
4072 ranges
[k
].end
= indices
[i
- 1] + 1;
4074 ranges
[k
].bgn
= indices
[i
];
4077 ranges
[k
].end
= indices
[i
- 1] + 1;
4078 ranges
[k
].next
= NULL
;
4079 assert(n
== (k
+ 1));
4086 if (info
->const_float_slots
> device
->max_vs_const_f
&&
4087 (info
->const_int_slots
|| info
->const_bool_slots
) &&
4089 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4092 if (tx
->indirect_const_access
) { /* vs only */
4093 info
->const_float_slots
= device
->max_vs_const_f
;
4094 tx
->num_slots
= MAX2(tx
->num_slots
, device
->max_vs_const_f
);
4097 if (!info
->swvp_on
) {
4098 info
->const_used_size
= sizeof(float[4]) * tx
->num_slots
;
4100 ureg_DECL_constant2D(tx
->ureg
, 0, tx
->num_slots
-1, 0);
4102 ureg_DECL_constant2D(tx
->ureg
, 0, 4095, 0);
4103 ureg_DECL_constant2D(tx
->ureg
, 0, 4095, 1);
4104 ureg_DECL_constant2D(tx
->ureg
, 0, 2047, 2);
4105 ureg_DECL_constant2D(tx
->ureg
, 0, 511, 3);
4108 if (info
->process_vertices
)
4109 ureg_DECL_constant2D(tx
->ureg
, 0, 2, 4); /* Viewport data */
4111 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI
))) {
4112 const struct tgsi_token
*toks
= ureg_get_tokens(tx
->ureg
, NULL
);
4114 ureg_free_tokens(toks
);
4117 if (info
->process_vertices
) {
4118 NineVertexDeclaration9_FillStreamOutputInfo(info
->vdecl_out
,
4122 info
->cso
= nine_create_shader_with_so_and_destroy(tx
->ureg
, pipe
, &(info
->so
));
4124 info
->cso
= nine_create_shader_with_so_and_destroy(tx
->ureg
, pipe
, NULL
);
4126 hr
= D3DERR_DRIVERINTERNALERROR
;
4127 FREE(info
->lconstf
.data
);
4128 FREE(info
->lconstf
.ranges
);
4132 info
->const_ranges
= const_ranges
;
4133 const_ranges
= NULL
;
4134 info
->byte_size
= (tx
->parse
- tx
->byte_code
) * sizeof(DWORD
);