2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
24 #include "nine_shader.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
37 #define DBG_CHANNEL DBG_SHADER
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
42 struct shader_translator
;
44 typedef HRESULT (*translate_instruction_func
)(struct shader_translator
*);
46 static inline const char *d3dsio_to_string(unsigned opcode
);
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
55 #define NINED3DSP_END 0x0000ffff
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
93 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
94 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
97 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
98 * BIAS <= PS 1.4 (x-0.5)
99 * BIASNEG <= PS 1.4 (-(x-0.5))
100 * SIGN <= PS 1.4 (2(x-0.5))
101 * SIGNNEG <= PS 1.4 (-2(x-0.5))
102 * COMP <= PS 1.4 (1-x)
104 * X2NEG = PS 1.4 (-2x)
105 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
106 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
107 * ABS >= SM 3.0 (abs(x))
108 * ABSNEG >= SM 3.0 (-abs(x))
109 * NOT >= SM 2.0 pedication only
111 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
114 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
115 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
126 static const char *sm1_mod_str
[] =
128 [NINED3DSPSM_NONE
] = "",
129 [NINED3DSPSM_NEG
] = "-",
130 [NINED3DSPSM_BIAS
] = "bias",
131 [NINED3DSPSM_BIASNEG
] = "biasneg",
132 [NINED3DSPSM_SIGN
] = "sign",
133 [NINED3DSPSM_SIGNNEG
] = "signneg",
134 [NINED3DSPSM_COMP
] = "comp",
135 [NINED3DSPSM_X2
] = "x2",
136 [NINED3DSPSM_X2NEG
] = "x2neg",
137 [NINED3DSPSM_DZ
] = "dz",
138 [NINED3DSPSM_DW
] = "dw",
139 [NINED3DSPSM_ABS
] = "abs",
140 [NINED3DSPSM_ABSNEG
] = "-abs",
141 [NINED3DSPSM_NOT
] = "not"
145 sm1_dump_writemask(BYTE mask
)
147 if (mask
& 1) DUMP("x"); else DUMP("_");
148 if (mask
& 2) DUMP("y"); else DUMP("_");
149 if (mask
& 4) DUMP("z"); else DUMP("_");
150 if (mask
& 8) DUMP("w"); else DUMP("_");
154 sm1_dump_swizzle(BYTE s
)
156 char c
[4] = { 'x', 'y', 'z', 'w' };
158 c
[(s
>> 0) & 3], c
[(s
>> 2) & 3], c
[(s
>> 4) & 3], c
[(s
>> 6) & 3]);
161 static const char sm1_file_char
[] =
164 [D3DSPR_INPUT
] = 'v',
165 [D3DSPR_CONST
] = 'c',
167 [D3DSPR_RASTOUT
] = 'R',
168 [D3DSPR_ATTROUT
] = 'D',
169 [D3DSPR_OUTPUT
] = 'o',
170 [D3DSPR_CONSTINT
] = 'I',
171 [D3DSPR_COLOROUT
] = 'C',
172 [D3DSPR_DEPTHOUT
] = 'D',
173 [D3DSPR_SAMPLER
] = 's',
174 [D3DSPR_CONST2
] = 'c',
175 [D3DSPR_CONST3
] = 'c',
176 [D3DSPR_CONST4
] = 'c',
177 [D3DSPR_CONSTBOOL
] = 'B',
179 [D3DSPR_TEMPFLOAT16
] = 'h',
180 [D3DSPR_MISCTYPE
] = 'M',
181 [D3DSPR_LABEL
] = 'X',
182 [D3DSPR_PREDICATE
] = 'p'
186 sm1_dump_reg(BYTE file
, INT index
)
192 case D3DSPR_COLOROUT
:
195 case D3DSPR_DEPTHOUT
:
199 DUMP("oRast%i", index
);
201 case D3DSPR_CONSTINT
:
202 DUMP("iconst[%i]", index
);
204 case D3DSPR_CONSTBOOL
:
205 DUMP("bconst[%i]", index
);
208 DUMP("%c%i", sm1_file_char
[file
], index
);
216 struct sm1_src_param
*rel
;
229 sm1_parse_immediate(struct shader_translator
*, struct sm1_src_param
*);
234 struct sm1_src_param
*rel
;
238 int8_t shift
; /* sint4 */
243 assert_replicate_swizzle(const struct ureg_src
*reg
)
245 assert(reg
->SwizzleY
== reg
->SwizzleX
&&
246 reg
->SwizzleZ
== reg
->SwizzleX
&&
247 reg
->SwizzleW
== reg
->SwizzleX
);
251 sm1_dump_immediate(const struct sm1_src_param
*param
)
253 switch (param
->type
) {
254 case NINED3DSPTYPE_FLOAT4
:
255 DUMP("{ %f %f %f %f }",
256 param
->imm
.f
[0], param
->imm
.f
[1],
257 param
->imm
.f
[2], param
->imm
.f
[3]);
259 case NINED3DSPTYPE_INT4
:
260 DUMP("{ %i %i %i %i }",
261 param
->imm
.i
[0], param
->imm
.i
[1],
262 param
->imm
.i
[2], param
->imm
.i
[3]);
264 case NINED3DSPTYPE_BOOL
:
265 DUMP("%s", param
->imm
.b
? "TRUE" : "FALSE");
274 sm1_dump_src_param(const struct sm1_src_param
*param
)
276 if (param
->file
== NINED3DSPR_IMMEDIATE
) {
277 assert(!param
->mod
&&
279 param
->swizzle
== NINED3DSP_NOSWIZZLE
);
280 sm1_dump_immediate(param
);
285 DUMP("%s(", sm1_mod_str
[param
->mod
]);
287 DUMP("%c[", sm1_file_char
[param
->file
]);
288 sm1_dump_src_param(param
->rel
);
289 DUMP("+%i]", param
->idx
);
291 sm1_dump_reg(param
->file
, param
->idx
);
295 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
) {
297 sm1_dump_swizzle(param
->swizzle
);
302 sm1_dump_dst_param(const struct sm1_dst_param
*param
)
304 if (param
->mod
& NINED3DSPDM_SATURATE
)
306 if (param
->mod
& NINED3DSPDM_PARTIALP
)
308 if (param
->mod
& NINED3DSPDM_CENTROID
)
310 if (param
->shift
< 0)
311 DUMP("/%u ", 1 << -param
->shift
);
312 if (param
->shift
> 0)
313 DUMP("*%u ", 1 << param
->shift
);
316 DUMP("%c[", sm1_file_char
[param
->file
]);
317 sm1_dump_src_param(param
->rel
);
318 DUMP("+%i]", param
->idx
);
320 sm1_dump_reg(param
->file
, param
->idx
);
322 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
324 sm1_dump_writemask(param
->mask
);
330 struct sm1_dst_param reg
;
338 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
339 * should be ignored completely */
341 unsigned opcode
; /* TGSI_OPCODE_x */
343 /* versions are still set even handler is set */
347 } vert_version
, frag_version
;
349 /* number of regs parsed outside of special handler */
353 /* some instructions don't map perfectly, so use a special handler */
354 translate_instruction_func handler
;
357 struct sm1_instruction
359 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode
;
365 struct sm1_src_param src
[4];
366 struct sm1_src_param src_rel
[4];
367 struct sm1_src_param pred
;
368 struct sm1_src_param dst_rel
[1];
369 struct sm1_dst_param dst
[1];
371 struct sm1_op_info
*info
;
375 sm1_dump_instruction(struct sm1_instruction
*insn
, unsigned indent
)
379 /* no info stored for these: */
380 if (insn
->opcode
== D3DSIO_DCL
)
382 for (i
= 0; i
< indent
; ++i
)
385 if (insn
->predicated
) {
387 sm1_dump_src_param(&insn
->pred
);
390 DUMP("%s", d3dsio_to_string(insn
->opcode
));
392 switch (insn
->opcode
) {
394 DUMP(insn
->flags
== NINED3DSI_TEXLD_PROJECT
? "p" : "b");
397 DUMP("_%x", insn
->flags
);
405 for (i
= 0; i
< insn
->ndst
&& i
< Elements(insn
->dst
); ++i
) {
406 sm1_dump_dst_param(&insn
->dst
[i
]);
410 for (i
= 0; i
< insn
->nsrc
&& i
< Elements(insn
->src
); ++i
) {
411 sm1_dump_src_param(&insn
->src
[i
]);
414 if (insn
->opcode
== D3DSIO_DEF
||
415 insn
->opcode
== D3DSIO_DEFI
||
416 insn
->opcode
== D3DSIO_DEFB
)
417 sm1_dump_immediate(&insn
->src
[0]);
422 struct sm1_local_const
433 struct shader_translator
435 const DWORD
*byte_code
;
437 const DWORD
*parse_next
;
439 struct ureg_program
*ureg
;
446 unsigned processor
; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
448 boolean native_integers
;
449 boolean inline_subroutines
;
451 boolean want_texcoord
;
453 unsigned texcoord_sn
;
455 struct sm1_instruction insn
; /* current instruction */
459 struct ureg_dst oPos
;
460 struct ureg_dst oFog
;
461 struct ureg_dst oPts
;
462 struct ureg_dst oCol
[4];
463 struct ureg_dst o
[PIPE_MAX_SHADER_OUTPUTS
];
464 struct ureg_dst oDepth
;
465 struct ureg_src v
[PIPE_MAX_SHADER_INPUTS
];
466 struct ureg_src vPos
;
467 struct ureg_src vFace
;
470 struct ureg_dst address
;
472 struct ureg_dst tS
[8]; /* texture stage registers */
473 struct ureg_dst tdst
; /* scratch dst if we need extra modifiers */
474 struct ureg_dst t
[5]; /* scratch TEMPs */
475 struct ureg_src vC
[2]; /* PS color in */
476 struct ureg_src vT
[8]; /* PS texcoord in */
477 struct ureg_dst rL
[NINE_MAX_LOOP_DEPTH
]; /* loop ctr */
479 unsigned num_temp
; /* Elements(regs.r) */
480 unsigned num_scratch
;
482 unsigned loop_depth_max
;
484 unsigned loop_labels
[NINE_MAX_LOOP_DEPTH
];
485 unsigned cond_labels
[NINE_MAX_COND_DEPTH
];
486 boolean loop_or_rep
[NINE_MAX_LOOP_DEPTH
]; /* true: loop, false: rep */
488 unsigned *inst_labels
; /* LABEL op */
489 unsigned num_inst_labels
;
491 unsigned sampler_targets
[NINE_MAX_SAMPLERS
]; /* TGSI_TEXTURE_x */
493 struct sm1_local_const
*lconstf
;
494 unsigned num_lconstf
;
495 struct sm1_local_const lconsti
[NINE_MAX_CONST_I
];
496 struct sm1_local_const lconstb
[NINE_MAX_CONST_B
];
498 boolean indirect_const_access
;
501 struct nine_shader_info
*info
;
503 int16_t op_info_map
[D3DSIO_BREAKP
+ 1];
506 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
507 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
508 #define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
510 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
513 sm1_read_semantic(struct shader_translator
*, struct sm1_semantic
*);
516 sm1_instruction_check(const struct sm1_instruction
*insn
)
518 if (insn
->opcode
== D3DSIO_CRS
)
520 if (insn
->dst
[0].mask
& NINED3DSP_WRITEMASK_3
)
528 tx_lconstf(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
531 if (index
< 0 || index
>= NINE_MAX_CONST_F_SHADER
) {
535 for (i
= 0; i
< tx
->num_lconstf
; ++i
) {
536 if (tx
->lconstf
[i
].idx
== index
) {
537 *src
= tx
->lconstf
[i
].reg
;
544 tx_lconsti(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
546 if (index
< 0 || index
>= NINE_MAX_CONST_I
) {
550 if (tx
->lconsti
[index
].idx
== index
)
551 *src
= tx
->lconsti
[index
].reg
;
552 return tx
->lconsti
[index
].idx
== index
;
555 tx_lconstb(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
557 if (index
< 0 || index
>= NINE_MAX_CONST_B
) {
561 if (tx
->lconstb
[index
].idx
== index
)
562 *src
= tx
->lconstb
[index
].reg
;
563 return tx
->lconstb
[index
].idx
== index
;
567 tx_set_lconstf(struct shader_translator
*tx
, INT index
, float f
[4])
571 FAILURE_VOID(index
< 0 || index
>= NINE_MAX_CONST_F_SHADER
)
572 if (IS_VS
&& index
>= NINE_MAX_CONST_F_SHADER
)
573 WARN("lconstf index %i too high, indirect access won't work\n", index
);
575 for (n
= 0; n
< tx
->num_lconstf
; ++n
)
576 if (tx
->lconstf
[n
].idx
== index
)
578 if (n
== tx
->num_lconstf
) {
580 tx
->lconstf
= REALLOC(tx
->lconstf
,
581 (n
+ 0) * sizeof(tx
->lconstf
[0]),
582 (n
+ 8) * sizeof(tx
->lconstf
[0]));
587 tx
->lconstf
[n
].idx
= index
;
588 tx
->lconstf
[n
].reg
= ureg_imm4f(tx
->ureg
, f
[0], f
[1], f
[2], f
[3]);
590 memcpy(tx
->lconstf
[n
].imm
.f
, f
, sizeof(tx
->lconstf
[n
].imm
.f
));
593 tx_set_lconsti(struct shader_translator
*tx
, INT index
, int i
[4])
595 FAILURE_VOID(index
< 0 || index
>= NINE_MAX_CONST_I
)
596 tx
->lconsti
[index
].idx
= index
;
597 tx
->lconsti
[index
].reg
= tx
->native_integers
?
598 ureg_imm4i(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]) :
599 ureg_imm4f(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]);
602 tx_set_lconstb(struct shader_translator
*tx
, INT index
, BOOL b
)
604 FAILURE_VOID(index
< 0 || index
>= NINE_MAX_CONST_B
)
605 tx
->lconstb
[index
].idx
= index
;
606 tx
->lconstb
[index
].reg
= tx
->native_integers
?
607 ureg_imm1u(tx
->ureg
, b
? 0xffffffff : 0) :
608 ureg_imm1f(tx
->ureg
, b
? 1.0f
: 0.0f
);
611 static inline struct ureg_dst
612 tx_scratch(struct shader_translator
*tx
)
614 if (tx
->num_scratch
>= Elements(tx
->regs
.t
)) {
616 return tx
->regs
.t
[0];
618 if (ureg_dst_is_undef(tx
->regs
.t
[tx
->num_scratch
]))
619 tx
->regs
.t
[tx
->num_scratch
] = ureg_DECL_local_temporary(tx
->ureg
);
620 return tx
->regs
.t
[tx
->num_scratch
++];
623 static inline struct ureg_dst
624 tx_scratch_scalar(struct shader_translator
*tx
)
626 return ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
629 static inline struct ureg_src
630 tx_src_scalar(struct ureg_dst dst
)
632 struct ureg_src src
= ureg_src(dst
);
633 int c
= ffs(dst
.WriteMask
) - 1;
634 if (dst
.WriteMask
== (1 << c
))
635 src
= ureg_scalar(src
, c
);
640 tx_temp_alloc(struct shader_translator
*tx
, INT idx
)
643 if (idx
>= tx
->num_temp
) {
644 unsigned k
= tx
->num_temp
;
645 unsigned n
= idx
+ 1;
646 tx
->regs
.r
= REALLOC(tx
->regs
.r
,
647 k
* sizeof(tx
->regs
.r
[0]),
648 n
* sizeof(tx
->regs
.r
[0]));
650 tx
->regs
.r
[k
] = ureg_dst_undef();
653 if (ureg_dst_is_undef(tx
->regs
.r
[idx
]))
654 tx
->regs
.r
[idx
] = ureg_DECL_temporary(tx
->ureg
);
658 tx_addr_alloc(struct shader_translator
*tx
, INT idx
)
661 if (ureg_dst_is_undef(tx
->regs
.address
))
662 tx
->regs
.address
= ureg_DECL_address(tx
->ureg
);
663 if (ureg_dst_is_undef(tx
->regs
.a0
))
664 tx
->regs
.a0
= ureg_DECL_temporary(tx
->ureg
);
668 tx_pred_alloc(struct shader_translator
*tx
, INT idx
)
671 if (ureg_dst_is_undef(tx
->regs
.p
))
672 tx
->regs
.p
= ureg_DECL_predicate(tx
->ureg
);
676 tx_texcoord_alloc(struct shader_translator
*tx
, INT idx
)
679 assert(idx
>= 0 && idx
< Elements(tx
->regs
.vT
));
680 if (ureg_src_is_undef(tx
->regs
.vT
[idx
]))
681 tx
->regs
.vT
[idx
] = ureg_DECL_fs_input(tx
->ureg
, tx
->texcoord_sn
, idx
,
682 TGSI_INTERPOLATE_PERSPECTIVE
);
685 static inline unsigned *
686 tx_bgnloop(struct shader_translator
*tx
)
689 if (tx
->loop_depth_max
< tx
->loop_depth
)
690 tx
->loop_depth_max
= tx
->loop_depth
;
691 assert(tx
->loop_depth
< NINE_MAX_LOOP_DEPTH
);
692 return &tx
->loop_labels
[tx
->loop_depth
- 1];
695 static inline unsigned *
696 tx_endloop(struct shader_translator
*tx
)
698 assert(tx
->loop_depth
);
700 ureg_fixup_label(tx
->ureg
, tx
->loop_labels
[tx
->loop_depth
],
701 ureg_get_instruction_number(tx
->ureg
));
702 return &tx
->loop_labels
[tx
->loop_depth
];
705 static struct ureg_dst
706 tx_get_loopctr(struct shader_translator
*tx
, boolean loop_or_rep
)
708 const unsigned l
= tx
->loop_depth
- 1;
712 DBG("loop counter requested outside of loop\n");
713 return ureg_dst_undef();
716 if (ureg_dst_is_undef(tx
->regs
.rL
[l
])) {
717 /* loop or rep ctr creation */
718 tx
->regs
.rL
[l
] = ureg_DECL_local_temporary(tx
->ureg
);
719 tx
->loop_or_rep
[l
] = loop_or_rep
;
721 /* loop - rep - endloop - endrep not allowed */
722 assert(tx
->loop_or_rep
[l
] == loop_or_rep
);
724 return tx
->regs
.rL
[l
];
727 static struct ureg_src
728 tx_get_loopal(struct shader_translator
*tx
)
730 int loop_level
= tx
->loop_depth
- 1;
732 while (loop_level
>= 0) {
733 /* handle loop - rep - endrep - endloop case */
734 if (tx
->loop_or_rep
[loop_level
])
735 /* the value is in the loop counter y component (nine implementation) */
736 return ureg_scalar(ureg_src(tx
->regs
.rL
[loop_level
]), TGSI_SWIZZLE_Y
);
740 DBG("aL counter requested outside of loop\n");
741 return ureg_src_undef();
744 static inline unsigned *
745 tx_cond(struct shader_translator
*tx
)
747 assert(tx
->cond_depth
<= NINE_MAX_COND_DEPTH
);
749 return &tx
->cond_labels
[tx
->cond_depth
- 1];
752 static inline unsigned *
753 tx_elsecond(struct shader_translator
*tx
)
755 assert(tx
->cond_depth
);
756 return &tx
->cond_labels
[tx
->cond_depth
- 1];
760 tx_endcond(struct shader_translator
*tx
)
762 assert(tx
->cond_depth
);
764 ureg_fixup_label(tx
->ureg
, tx
->cond_labels
[tx
->cond_depth
],
765 ureg_get_instruction_number(tx
->ureg
));
768 static inline struct ureg_dst
769 nine_ureg_dst_register(unsigned file
, int index
)
771 return ureg_dst(ureg_src_register(file
, index
));
774 static struct ureg_src
775 tx_src_param(struct shader_translator
*tx
, const struct sm1_src_param
*param
)
777 struct ureg_program
*ureg
= tx
->ureg
;
785 tx_temp_alloc(tx
, param
->idx
);
786 src
= ureg_src(tx
->regs
.r
[param
->idx
]);
788 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
792 assert(param
->idx
== 0);
793 /* the address register (vs only) must be
794 * assigned before use */
795 assert(!ureg_dst_is_undef(tx
->regs
.a0
));
796 ureg_ARR(ureg
, tx
->regs
.address
, ureg_src(tx
->regs
.a0
));
797 src
= ureg_src(tx
->regs
.address
);
799 if (tx
->version
.major
< 2 && tx
->version
.minor
< 4) {
800 /* no subroutines, so should be defined */
801 src
= ureg_src(tx
->regs
.tS
[param
->idx
]);
803 tx_texcoord_alloc(tx
, param
->idx
);
804 src
= tx
->regs
.vT
[param
->idx
];
810 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
812 if (tx
->version
.major
< 3) {
814 src
= ureg_DECL_fs_input(tx
->ureg
, TGSI_SEMANTIC_COLOR
,
816 TGSI_INTERPOLATE_PERSPECTIVE
);
818 assert(!param
->rel
); /* TODO */
819 assert(param
->idx
< Elements(tx
->regs
.v
));
820 src
= tx
->regs
.v
[param
->idx
];
824 case D3DSPR_PREDICATE
:
826 tx_pred_alloc(tx
, param
->idx
);
827 src
= ureg_src(tx
->regs
.p
);
830 assert(param
->mod
== NINED3DSPSM_NONE
);
831 assert(param
->swizzle
== NINED3DSP_NOSWIZZLE
);
833 src
= ureg_src_register(TGSI_FILE_SAMPLER
, param
->idx
);
836 assert(!param
->rel
|| IS_VS
);
838 tx
->indirect_const_access
= TRUE
;
839 if (param
->rel
|| !tx_lconstf(tx
, &src
, param
->idx
)) {
841 nine_info_mark_const_f_used(tx
->info
, param
->idx
);
842 src
= ureg_src_register(TGSI_FILE_CONSTANT
, param
->idx
);
844 if (!IS_VS
&& tx
->version
.major
< 2) {
845 /* ps 1.X clamps constants */
846 tmp
= tx_scratch(tx
);
847 ureg_MIN(ureg
, tmp
, src
, ureg_imm1f(ureg
, 1.0f
));
848 ureg_MAX(ureg
, tmp
, ureg_src(tmp
), ureg_imm1f(ureg
, -1.0f
));
855 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
856 assert(!"CONST2/3/4");
857 src
= ureg_imm1f(ureg
, 0.0f
);
859 case D3DSPR_CONSTINT
:
860 /* relative adressing only possible for float constants in vs */
862 if (!tx_lconsti(tx
, &src
, param
->idx
)) {
863 nine_info_mark_const_i_used(tx
->info
, param
->idx
);
864 src
= ureg_src_register(TGSI_FILE_CONSTANT
,
865 tx
->info
->const_i_base
+ param
->idx
);
868 case D3DSPR_CONSTBOOL
:
870 if (!tx_lconstb(tx
, &src
, param
->idx
)) {
871 char r
= param
->idx
/ 4;
872 char s
= param
->idx
& 3;
873 nine_info_mark_const_b_used(tx
->info
, param
->idx
);
874 src
= ureg_src_register(TGSI_FILE_CONSTANT
,
875 tx
->info
->const_b_base
+ r
);
876 src
= ureg_swizzle(src
, s
, s
, s
, s
);
880 if (ureg_dst_is_undef(tx
->regs
.address
))
881 tx
->regs
.address
= ureg_DECL_address(ureg
);
882 if (!tx
->native_integers
)
883 ureg_ARR(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
885 ureg_UARL(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
886 src
= ureg_src(tx
->regs
.address
);
888 case D3DSPR_MISCTYPE
:
889 switch (param
->idx
) {
890 case D3DSMO_POSITION
:
891 if (ureg_src_is_undef(tx
->regs
.vPos
))
892 tx
->regs
.vPos
= ureg_DECL_fs_input(ureg
,
893 TGSI_SEMANTIC_POSITION
, 0,
894 TGSI_INTERPOLATE_LINEAR
);
895 if (tx
->shift_wpos
) {
896 /* TODO: do this only once */
897 struct ureg_dst wpos
= tx_scratch(tx
);
898 ureg_SUB(ureg
, wpos
, tx
->regs
.vPos
,
899 ureg_imm4f(ureg
, 0.5f
, 0.5f
, 0.0f
, 0.0f
));
900 src
= ureg_src(wpos
);
906 if (ureg_src_is_undef(tx
->regs
.vFace
)) {
907 tx
->regs
.vFace
= ureg_DECL_fs_input(ureg
,
908 TGSI_SEMANTIC_FACE
, 0,
909 TGSI_INTERPOLATE_CONSTANT
);
910 tx
->regs
.vFace
= ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
);
912 src
= tx
->regs
.vFace
;
915 assert(!"invalid src D3DSMO");
920 case D3DSPR_TEMPFLOAT16
:
923 assert(!"invalid src D3DSPR");
926 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
928 switch (param
->mod
) {
930 tmp
= tx_scratch(tx
);
931 /* NOTE: app is not allowed to read w with this modifier */
932 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_3
), src
);
933 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(W
,W
,W
,W
)));
937 tmp
= tx_scratch(tx
);
938 /* NOTE: app is not allowed to read z with this modifier */
939 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_2
), src
);
940 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(Z
,Z
,Z
,Z
)));
947 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
)
948 src
= ureg_swizzle(src
,
949 (param
->swizzle
>> 0) & 0x3,
950 (param
->swizzle
>> 2) & 0x3,
951 (param
->swizzle
>> 4) & 0x3,
952 (param
->swizzle
>> 6) & 0x3);
954 switch (param
->mod
) {
955 case NINED3DSPSM_ABS
:
958 case NINED3DSPSM_ABSNEG
:
959 src
= ureg_negate(ureg_abs(src
));
961 case NINED3DSPSM_NEG
:
962 src
= ureg_negate(src
);
964 case NINED3DSPSM_BIAS
:
965 tmp
= tx_scratch(tx
);
966 ureg_SUB(ureg
, tmp
, src
, ureg_imm1f(ureg
, 0.5f
));
969 case NINED3DSPSM_BIASNEG
:
970 tmp
= tx_scratch(tx
);
971 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), src
);
974 case NINED3DSPSM_NOT
:
975 if (tx
->native_integers
) {
976 tmp
= tx_scratch(tx
);
977 ureg_NOT(ureg
, tmp
, src
);
982 case NINED3DSPSM_COMP
:
983 tmp
= tx_scratch(tx
);
984 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), src
);
991 case NINED3DSPSM_SIGN
:
992 tmp
= tx_scratch(tx
);
993 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, 2.0f
), ureg_imm1f(ureg
, -1.0f
));
996 case NINED3DSPSM_SIGNNEG
:
997 tmp
= tx_scratch(tx
);
998 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, -2.0f
), ureg_imm1f(ureg
, 1.0f
));
1001 case NINED3DSPSM_X2
:
1002 tmp
= tx_scratch(tx
);
1003 ureg_ADD(ureg
, tmp
, src
, src
);
1004 src
= ureg_src(tmp
);
1006 case NINED3DSPSM_X2NEG
:
1007 tmp
= tx_scratch(tx
);
1008 ureg_ADD(ureg
, tmp
, src
, src
);
1009 src
= ureg_negate(ureg_src(tmp
));
1012 assert(param
->mod
== NINED3DSPSM_NONE
);
1019 static struct ureg_dst
1020 _tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1022 struct ureg_dst dst
;
1024 switch (param
->file
)
1027 assert(!param
->rel
);
1028 tx_temp_alloc(tx
, param
->idx
);
1029 dst
= tx
->regs
.r
[param
->idx
];
1031 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1033 assert(!param
->rel
);
1034 if (tx
->version
.major
< 2 && !IS_VS
) {
1035 if (ureg_dst_is_undef(tx
->regs
.tS
[param
->idx
]))
1036 tx
->regs
.tS
[param
->idx
] = ureg_DECL_temporary(tx
->ureg
);
1037 dst
= tx
->regs
.tS
[param
->idx
];
1039 if (!IS_VS
&& tx
->insn
.opcode
== D3DSIO_TEXKILL
) { /* maybe others, too */
1040 tx_texcoord_alloc(tx
, param
->idx
);
1041 dst
= ureg_dst(tx
->regs
.vT
[param
->idx
]);
1043 tx_addr_alloc(tx
, param
->idx
);
1047 case D3DSPR_RASTOUT
:
1048 assert(!param
->rel
);
1049 switch (param
->idx
) {
1051 if (ureg_dst_is_undef(tx
->regs
.oPos
))
1053 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
1054 dst
= tx
->regs
.oPos
;
1057 if (ureg_dst_is_undef(tx
->regs
.oFog
))
1059 ureg_saturate(ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_FOG
, 0));
1060 dst
= tx
->regs
.oFog
;
1063 if (ureg_dst_is_undef(tx
->regs
.oPts
))
1065 ureg_saturate(ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_PSIZE
, 0));
1066 dst
= tx
->regs
.oPts
;
1073 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1075 if (tx
->version
.major
< 3) {
1076 assert(!param
->rel
);
1077 dst
= ureg_DECL_output(tx
->ureg
, tx
->texcoord_sn
, param
->idx
);
1079 assert(!param
->rel
); /* TODO */
1080 assert(param
->idx
< Elements(tx
->regs
.o
));
1081 dst
= tx
->regs
.o
[param
->idx
];
1084 case D3DSPR_ATTROUT
: /* VS */
1085 case D3DSPR_COLOROUT
: /* PS */
1086 assert(param
->idx
>= 0 && param
->idx
< 4);
1087 assert(!param
->rel
);
1088 tx
->info
->rt_mask
|= 1 << param
->idx
;
1089 if (ureg_dst_is_undef(tx
->regs
.oCol
[param
->idx
]))
1090 tx
->regs
.oCol
[param
->idx
] =
1091 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
);
1092 dst
= tx
->regs
.oCol
[param
->idx
];
1093 if (IS_VS
&& tx
->version
.major
< 3)
1094 dst
= ureg_saturate(dst
);
1096 case D3DSPR_DEPTHOUT
:
1097 assert(!param
->rel
);
1098 if (ureg_dst_is_undef(tx
->regs
.oDepth
))
1100 ureg_DECL_output_masked(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0,
1101 TGSI_WRITEMASK_Z
, 0, 1);
1102 dst
= tx
->regs
.oDepth
; /* XXX: must write .z component */
1104 case D3DSPR_PREDICATE
:
1105 assert(!param
->rel
);
1106 tx_pred_alloc(tx
, param
->idx
);
1109 case D3DSPR_TEMPFLOAT16
:
1110 DBG("unhandled D3DSPR: %u\n", param
->file
);
1113 assert(!"invalid dst D3DSPR");
1117 dst
= ureg_dst_indirect(dst
, tx_src_param(tx
, param
->rel
));
1119 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
)
1120 dst
= ureg_writemask(dst
, param
->mask
);
1121 if (param
->mod
& NINED3DSPDM_SATURATE
)
1122 dst
= ureg_saturate(dst
);
1127 static struct ureg_dst
1128 tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1131 tx
->regs
.tdst
= ureg_writemask(tx_scratch(tx
), param
->mask
);
1132 return tx
->regs
.tdst
;
1134 return _tx_dst_param(tx
, param
);
1138 tx_apply_dst0_modifiers(struct shader_translator
*tx
)
1140 struct ureg_dst rdst
;
1143 if (!tx
->insn
.ndst
|| !tx
->insn
.dst
[0].shift
|| tx
->insn
.opcode
== D3DSIO_TEXKILL
)
1145 rdst
= _tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1147 assert(rdst
.File
!= TGSI_FILE_ADDRESS
); /* this probably isn't possible */
1149 if (tx
->insn
.dst
[0].shift
< 0)
1150 f
= 1.0f
/ (1 << -tx
->insn
.dst
[0].shift
);
1152 f
= 1 << tx
->insn
.dst
[0].shift
;
1154 ureg_MUL(tx
->ureg
, rdst
, ureg_src(tx
->regs
.tdst
), ureg_imm1f(tx
->ureg
, f
));
1157 static struct ureg_src
1158 tx_dst_param_as_src(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1160 struct ureg_src src
;
1162 assert(!param
->shift
);
1163 assert(!(param
->mod
& NINED3DSPDM_SATURATE
));
1165 switch (param
->file
) {
1168 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
1170 assert(!param
->rel
);
1171 assert(param
->idx
< Elements(tx
->regs
.v
));
1172 src
= tx
->regs
.v
[param
->idx
];
1176 src
= ureg_src(tx_dst_param(tx
, param
));
1180 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1183 WARN("mask is 0, using identity swizzle\n");
1185 if (param
->mask
&& param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
1189 for (n
= 0, c
= 0; c
< 4; ++c
)
1190 if (param
->mask
& (1 << c
))
1193 for (c
= n
; c
< 4; ++c
)
1195 src
= ureg_swizzle(src
, s
[0], s
[1], s
[2], s
[3]);
1201 NineTranslateInstruction_Mkxn(struct shader_translator
*tx
, const unsigned k
, const unsigned n
)
1203 struct ureg_program
*ureg
= tx
->ureg
;
1204 struct ureg_dst dst
;
1205 struct ureg_src src
[2];
1206 struct sm1_src_param
*src_mat
= &tx
->insn
.src
[1];
1209 dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1210 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1212 for (i
= 0; i
< n
; i
++)
1214 const unsigned m
= (1 << i
);
1216 src
[1] = tx_src_param(tx
, src_mat
);
1219 if (!(dst
.WriteMask
& m
))
1222 /* XXX: src == dst case ? */
1226 ureg_DP3(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1229 ureg_DP4(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1232 DBG("invalid operation: M%ux%u\n", m
, n
);
1240 #define VNOTSUPPORTED 0, 0
1241 #define V(maj, min) (((maj) << 8) | (min))
1243 static inline const char *
1244 d3dsio_to_string( unsigned opcode
)
1246 static const char *names
[] = {
1346 if (opcode
< Elements(names
)) return names
[opcode
];
1349 case D3DSIO_PHASE
: return "PHASE";
1350 case D3DSIO_COMMENT
: return "COMMENT";
1351 case D3DSIO_END
: return "END";
1357 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1358 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1359 (inst).vert_version.max | \
1360 (inst).frag_version.min | \
1361 (inst).frag_version.max)
1363 #define SPECIAL(name) \
1364 NineTranslateInstruction_##name
1366 #define DECL_SPECIAL(name) \
1368 NineTranslateInstruction_##name( struct shader_translator *tx )
1371 NineTranslateInstruction_Generic(struct shader_translator
*);
1375 return NineTranslateInstruction_Mkxn(tx
, 4, 4);
1380 return NineTranslateInstruction_Mkxn(tx
, 4, 3);
1385 return NineTranslateInstruction_Mkxn(tx
, 3, 4);
1390 return NineTranslateInstruction_Mkxn(tx
, 3, 3);
1395 return NineTranslateInstruction_Mkxn(tx
, 3, 2);
1400 ureg_CMP(tx
->ureg
, tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1401 tx_src_param(tx
, &tx
->insn
.src
[0]),
1402 tx_src_param(tx
, &tx
->insn
.src
[2]),
1403 tx_src_param(tx
, &tx
->insn
.src
[1]));
1409 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1410 struct ureg_dst cgt
;
1411 struct ureg_src cnd
;
1413 /* the coissue flag was a tip for compilers to advise to
1414 * execute two operations at the same time, in cases
1415 * the two executions had same dst with different channels.
1416 * It has no effect on current hw. However it seems CND
1417 * is affected. The handling of this very specific case
1418 * handled below mimick wine behaviour */
1419 if (tx
->insn
.coissue
&& tx
->version
.major
== 1 && tx
->version
.minor
< 4 && tx
->insn
.dst
[0].mask
!= NINED3DSP_WRITEMASK_3
) {
1421 dst
, tx_src_param(tx
, &tx
->insn
.src
[1]));
1425 cnd
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1426 cgt
= tx_scratch(tx
);
1428 if (tx
->version
.major
== 1 && tx
->version
.minor
< 4)
1429 cnd
= ureg_scalar(cnd
, TGSI_SWIZZLE_W
);
1431 ureg_SGT(tx
->ureg
, cgt
, cnd
, ureg_imm1f(tx
->ureg
, 0.5f
));
1433 ureg_CMP(tx
->ureg
, dst
, ureg_negate(ureg_src(cgt
)),
1434 tx_src_param(tx
, &tx
->insn
.src
[1]),
1435 tx_src_param(tx
, &tx
->insn
.src
[2]));
1441 assert(tx
->insn
.src
[0].idx
< tx
->num_inst_labels
);
1442 ureg_CAL(tx
->ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1446 DECL_SPECIAL(CALLNZ
)
1448 struct ureg_program
*ureg
= tx
->ureg
;
1449 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1451 if (!tx
->native_integers
)
1452 ureg_IF(ureg
, src
, tx_cond(tx
));
1454 ureg_UIF(ureg
, src
, tx_cond(tx
));
1455 ureg_CAL(ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1461 DECL_SPECIAL(MOV_vs1x
)
1463 if (tx
->insn
.dst
[0].file
== D3DSPR_ADDR
) {
1464 /* Implementation note: We don't write directly
1465 * to the addr register, but to an intermediate
1467 * Contrary to the doc, when writing to ADDR here,
1468 * the rounding is not to nearest, but to lowest
1470 * Since we use ARR next, substract 0.5. */
1472 tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1473 tx_src_param(tx
, &tx
->insn
.src
[0]),
1474 ureg_imm1f(tx
->ureg
, 0.5f
));
1477 return NineTranslateInstruction_Generic(tx
);
1482 struct ureg_program
*ureg
= tx
->ureg
;
1484 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1485 struct ureg_dst ctr
;
1486 struct ureg_dst tmp
;
1487 struct ureg_src ctrx
;
1489 label
= tx_bgnloop(tx
);
1490 ctr
= tx_get_loopctr(tx
, TRUE
);
1491 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1493 /* src: num_iterations - start_value of al - step for al - 0 */
1494 ureg_MOV(ureg
, ctr
, src
);
1495 ureg_BGNLOOP(tx
->ureg
, label
);
1496 tmp
= tx_scratch_scalar(tx
);
1497 /* Initially ctr.x contains the number of iterations.
1498 * ctr.y will contain the updated value of al.
1499 * We decrease ctr.x at the end of every iteration,
1500 * and stop when it reaches 0. */
1502 if (!tx
->native_integers
) {
1503 /* case src and ctr contain floats */
1504 /* to avoid precision issue, we stop when ctr <= 0.5 */
1505 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1506 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1508 /* case src and ctr contain integers */
1509 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1510 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1524 DECL_SPECIAL(ENDLOOP
)
1526 struct ureg_program
*ureg
= tx
->ureg
;
1527 struct ureg_dst ctr
= tx_get_loopctr(tx
, TRUE
);
1528 struct ureg_dst dst_ctrx
, dst_al
;
1529 struct ureg_src src_ctr
, al_counter
;
1531 dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1532 dst_al
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_1
);
1533 src_ctr
= ureg_src(ctr
);
1534 al_counter
= ureg_scalar(src_ctr
, TGSI_SWIZZLE_Z
);
1537 * ctr.y (aL) += step */
1538 if (!tx
->native_integers
) {
1539 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1540 ureg_ADD(ureg
, dst_al
, src_ctr
, al_counter
);
1542 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1543 ureg_UADD(ureg
, dst_al
, src_ctr
, al_counter
);
1545 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1551 unsigned k
= tx
->num_inst_labels
;
1552 unsigned n
= tx
->insn
.src
[0].idx
;
1555 tx
->inst_labels
= REALLOC(tx
->inst_labels
,
1556 k
* sizeof(tx
->inst_labels
[0]),
1557 n
* sizeof(tx
->inst_labels
[0]));
1559 tx
->inst_labels
[n
] = ureg_get_instruction_number(tx
->ureg
);
1563 DECL_SPECIAL(SINCOS
)
1565 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1566 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1568 assert(!(dst
.WriteMask
& 0xc));
1570 dst
.WriteMask
&= TGSI_WRITEMASK_XY
; /* z undefined, w untouched */
1571 ureg_SCS(tx
->ureg
, dst
, src
);
1578 tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1579 tx_src_param(tx
, &tx
->insn
.src
[0]));
1585 struct ureg_program
*ureg
= tx
->ureg
;
1587 struct ureg_src rep
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1588 struct ureg_dst ctr
;
1589 struct ureg_dst tmp
;
1590 struct ureg_src ctrx
;
1592 label
= tx_bgnloop(tx
);
1593 ctr
= ureg_writemask(tx_get_loopctr(tx
, FALSE
), NINED3DSP_WRITEMASK_0
);
1594 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1596 /* NOTE: rep must be constant, so we don't have to save the count */
1597 assert(rep
.File
== TGSI_FILE_CONSTANT
|| rep
.File
== TGSI_FILE_IMMEDIATE
);
1599 /* rep: num_iterations - 0 - 0 - 0 */
1600 ureg_MOV(ureg
, ctr
, rep
);
1601 ureg_BGNLOOP(ureg
, label
);
1602 tmp
= tx_scratch_scalar(tx
);
1603 /* Initially ctr.x contains the number of iterations.
1604 * We decrease ctr.x at the end of every iteration,
1605 * and stop when it reaches 0. */
1607 if (!tx
->native_integers
) {
1608 /* case src and ctr contain floats */
1609 /* to avoid precision issue, we stop when ctr <= 0.5 */
1610 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1611 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1613 /* case src and ctr contain integers */
1614 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1615 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1624 DECL_SPECIAL(ENDREP
)
1626 struct ureg_program
*ureg
= tx
->ureg
;
1627 struct ureg_dst ctr
= tx_get_loopctr(tx
, FALSE
);
1628 struct ureg_dst dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1629 struct ureg_src src_ctr
= ureg_src(ctr
);
1632 if (!tx
->native_integers
)
1633 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1635 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1637 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1644 ureg_ENDIF(tx
->ureg
);
1650 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1652 if (tx
->native_integers
&& tx
->insn
.src
[0].file
== D3DSPR_CONSTBOOL
)
1653 ureg_UIF(tx
->ureg
, src
, tx_cond(tx
));
1655 ureg_IF(tx
->ureg
, src
, tx_cond(tx
));
1660 static inline unsigned
1661 sm1_insn_flags_to_tgsi_setop(BYTE flags
)
1664 case NINED3DSHADER_REL_OP_GT
: return TGSI_OPCODE_SGT
;
1665 case NINED3DSHADER_REL_OP_EQ
: return TGSI_OPCODE_SEQ
;
1666 case NINED3DSHADER_REL_OP_GE
: return TGSI_OPCODE_SGE
;
1667 case NINED3DSHADER_REL_OP_LT
: return TGSI_OPCODE_SLT
;
1668 case NINED3DSHADER_REL_OP_NE
: return TGSI_OPCODE_SNE
;
1669 case NINED3DSHADER_REL_OP_LE
: return TGSI_OPCODE_SLE
;
1671 assert(!"invalid comparison flags");
1672 return TGSI_OPCODE_SGT
;
1678 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1679 struct ureg_src src
[2];
1680 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1681 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1682 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1683 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2);
1684 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1690 ureg_ELSE(tx
->ureg
, tx_elsecond(tx
));
1694 DECL_SPECIAL(BREAKC
)
1696 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1697 struct ureg_src src
[2];
1698 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1699 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1700 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1701 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2);
1702 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1705 ureg_ENDIF(tx
->ureg
);
1709 static const char *sm1_declusage_names
[] =
1711 [D3DDECLUSAGE_POSITION
] = "POSITION",
1712 [D3DDECLUSAGE_BLENDWEIGHT
] = "BLENDWEIGHT",
1713 [D3DDECLUSAGE_BLENDINDICES
] = "BLENDINDICES",
1714 [D3DDECLUSAGE_NORMAL
] = "NORMAL",
1715 [D3DDECLUSAGE_PSIZE
] = "PSIZE",
1716 [D3DDECLUSAGE_TEXCOORD
] = "TEXCOORD",
1717 [D3DDECLUSAGE_TANGENT
] = "TANGENT",
1718 [D3DDECLUSAGE_BINORMAL
] = "BINORMAL",
1719 [D3DDECLUSAGE_TESSFACTOR
] = "TESSFACTOR",
1720 [D3DDECLUSAGE_POSITIONT
] = "POSITIONT",
1721 [D3DDECLUSAGE_COLOR
] = "COLOR",
1722 [D3DDECLUSAGE_FOG
] = "FOG",
1723 [D3DDECLUSAGE_DEPTH
] = "DEPTH",
1724 [D3DDECLUSAGE_SAMPLE
] = "SAMPLE"
1727 static inline unsigned
1728 sm1_to_nine_declusage(struct sm1_semantic
*dcl
)
1730 return nine_d3d9_to_nine_declusage(dcl
->usage
, dcl
->usage_idx
);
1734 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic
*sem
,
1736 struct sm1_semantic
*dcl
)
1738 BYTE index
= dcl
->usage_idx
;
1740 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1741 * we match to a TGSI_SEMANTIC_GENERIC with index.
1743 * The index can be anything UINT16 and usage_idx is BYTE,
1744 * so we can fit everything. It doesn't matter if indices
1745 * are close together or low.
1748 * POSITION >= 1: 10 * index + 6
1749 * COLOR >= 2: 10 * (index-1) + 7
1750 * TEXCOORD[0..15]: index
1751 * BLENDWEIGHT: 10 * index + 18
1752 * BLENDINDICES: 10 * index + 19
1753 * NORMAL: 10 * index + 20
1754 * TANGENT: 10 * index + 21
1755 * BINORMAL: 10 * index + 22
1756 * TESSFACTOR: 10 * index + 23
1759 switch (dcl
->usage
) {
1760 case D3DDECLUSAGE_POSITION
:
1761 case D3DDECLUSAGE_POSITIONT
:
1762 case D3DDECLUSAGE_DEPTH
:
1764 sem
->Name
= TGSI_SEMANTIC_POSITION
;
1767 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1768 sem
->Index
= 10 * index
+ 6;
1771 case D3DDECLUSAGE_COLOR
:
1773 sem
->Name
= TGSI_SEMANTIC_COLOR
;
1776 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1777 sem
->Index
= 10 * (index
-1) + 7;
1780 case D3DDECLUSAGE_FOG
:
1782 sem
->Name
= TGSI_SEMANTIC_FOG
;
1785 case D3DDECLUSAGE_PSIZE
:
1787 sem
->Name
= TGSI_SEMANTIC_PSIZE
;
1790 case D3DDECLUSAGE_TEXCOORD
:
1792 if (index
< 8 && tc
)
1793 sem
->Name
= TGSI_SEMANTIC_TEXCOORD
;
1795 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1798 case D3DDECLUSAGE_BLENDWEIGHT
:
1799 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1800 sem
->Index
= 10 * index
+ 18;
1802 case D3DDECLUSAGE_BLENDINDICES
:
1803 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1804 sem
->Index
= 10 * index
+ 19;
1806 case D3DDECLUSAGE_NORMAL
:
1807 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1808 sem
->Index
= 10 * index
+ 20;
1810 case D3DDECLUSAGE_TANGENT
:
1811 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1812 sem
->Index
= 10 * index
+ 21;
1814 case D3DDECLUSAGE_BINORMAL
:
1815 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1816 sem
->Index
= 10 * index
+ 22;
1818 case D3DDECLUSAGE_TESSFACTOR
:
1819 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1820 sem
->Index
= 10 * index
+ 23;
1822 case D3DDECLUSAGE_SAMPLE
:
1823 sem
->Name
= TGSI_SEMANTIC_COUNT
;
1827 assert(!"Invalid DECLUSAGE.");
1832 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1833 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1834 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1835 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1836 static inline unsigned
1837 d3dstt_to_tgsi_tex(BYTE sampler_type
)
1839 switch (sampler_type
) {
1840 case NINED3DSTT_1D
: return TGSI_TEXTURE_1D
;
1841 case NINED3DSTT_2D
: return TGSI_TEXTURE_2D
;
1842 case NINED3DSTT_VOLUME
: return TGSI_TEXTURE_3D
;
1843 case NINED3DSTT_CUBE
: return TGSI_TEXTURE_CUBE
;
1846 return TGSI_TEXTURE_UNKNOWN
;
1849 static inline unsigned
1850 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type
)
1852 switch (sampler_type
) {
1853 case NINED3DSTT_1D
: return TGSI_TEXTURE_SHADOW1D
;
1854 case NINED3DSTT_2D
: return TGSI_TEXTURE_SHADOW2D
;
1855 case NINED3DSTT_VOLUME
:
1856 case NINED3DSTT_CUBE
:
1859 return TGSI_TEXTURE_UNKNOWN
;
1862 static inline unsigned
1863 ps1x_sampler_type(const struct nine_shader_info
*info
, unsigned stage
)
1865 switch ((info
->sampler_ps1xtypes
>> (stage
* 2)) & 0x3) {
1866 case 1: return TGSI_TEXTURE_1D
;
1867 case 0: return TGSI_TEXTURE_2D
;
1868 case 3: return TGSI_TEXTURE_3D
;
1870 return TGSI_TEXTURE_CUBE
;
1875 sm1_sampler_type_name(BYTE sampler_type
)
1877 switch (sampler_type
) {
1878 case NINED3DSTT_1D
: return "1D";
1879 case NINED3DSTT_2D
: return "2D";
1880 case NINED3DSTT_VOLUME
: return "VOLUME";
1881 case NINED3DSTT_CUBE
: return "CUBE";
1883 return "(D3DSTT_?)";
1887 static inline unsigned
1888 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic
*sem
)
1890 switch (sem
->Name
) {
1891 case TGSI_SEMANTIC_POSITION
:
1892 case TGSI_SEMANTIC_NORMAL
:
1893 return TGSI_INTERPOLATE_LINEAR
;
1894 case TGSI_SEMANTIC_BCOLOR
:
1895 case TGSI_SEMANTIC_COLOR
:
1896 case TGSI_SEMANTIC_FOG
:
1897 case TGSI_SEMANTIC_GENERIC
:
1898 case TGSI_SEMANTIC_TEXCOORD
:
1899 case TGSI_SEMANTIC_CLIPDIST
:
1900 case TGSI_SEMANTIC_CLIPVERTEX
:
1901 return TGSI_INTERPOLATE_PERSPECTIVE
;
1902 case TGSI_SEMANTIC_EDGEFLAG
:
1903 case TGSI_SEMANTIC_FACE
:
1904 case TGSI_SEMANTIC_INSTANCEID
:
1905 case TGSI_SEMANTIC_PCOORD
:
1906 case TGSI_SEMANTIC_PRIMID
:
1907 case TGSI_SEMANTIC_PSIZE
:
1908 case TGSI_SEMANTIC_VERTEXID
:
1909 return TGSI_INTERPOLATE_CONSTANT
;
1912 return TGSI_INTERPOLATE_CONSTANT
;
1918 struct ureg_program
*ureg
= tx
->ureg
;
1921 struct tgsi_declaration_semantic tgsi
;
1922 struct sm1_semantic sem
;
1923 sm1_read_semantic(tx
, &sem
);
1925 is_input
= sem
.reg
.file
== D3DSPR_INPUT
;
1927 sem
.usage
== D3DDECLUSAGE_SAMPLE
|| sem
.reg
.file
== D3DSPR_SAMPLER
;
1930 sm1_dump_dst_param(&sem
.reg
);
1932 DUMP(" %s\n", sm1_sampler_type_name(sem
.sampler_type
));
1934 if (tx
->version
.major
>= 3)
1935 DUMP(" %s%i\n", sm1_declusage_names
[sem
.usage
], sem
.usage_idx
);
1937 if (sem
.usage
| sem
.usage_idx
)
1938 DUMP(" %u[%u]\n", sem
.usage
, sem
.usage_idx
);
1943 const unsigned m
= 1 << sem
.reg
.idx
;
1944 ureg_DECL_sampler(ureg
, sem
.reg
.idx
);
1945 tx
->info
->sampler_mask
|= m
;
1946 tx
->sampler_targets
[sem
.reg
.idx
] = (tx
->info
->sampler_mask_shadow
& m
) ?
1947 d3dstt_to_tgsi_tex_shadow(sem
.sampler_type
) :
1948 d3dstt_to_tgsi_tex(sem
.sampler_type
);
1952 sm1_declusage_to_tgsi(&tgsi
, tx
->want_texcoord
, &sem
);
1955 /* linkage outside of shader with vertex declaration */
1956 ureg_DECL_vs_input(ureg
, sem
.reg
.idx
);
1957 assert(sem
.reg
.idx
< Elements(tx
->info
->input_map
));
1958 tx
->info
->input_map
[sem
.reg
.idx
] = sm1_to_nine_declusage(&sem
);
1959 tx
->info
->num_inputs
= sem
.reg
.idx
+ 1;
1960 /* NOTE: preserving order in case of indirect access */
1962 if (tx
->version
.major
>= 3) {
1963 /* SM2 output semantic determined by file */
1964 assert(sem
.reg
.mask
!= 0);
1965 if (sem
.usage
== D3DDECLUSAGE_POSITIONT
)
1966 tx
->info
->position_t
= TRUE
;
1967 assert(sem
.reg
.idx
< Elements(tx
->regs
.o
));
1968 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_output_masked(
1969 ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
, 0, 1);
1971 if (tgsi
.Name
== TGSI_SEMANTIC_PSIZE
)
1972 tx
->regs
.oPts
= tx
->regs
.o
[sem
.reg
.idx
];
1975 if (is_input
&& tx
->version
.major
>= 3) {
1976 /* SM3 only, SM2 input semantic determined by file */
1977 assert(sem
.reg
.idx
< Elements(tx
->regs
.v
));
1978 tx
->regs
.v
[sem
.reg
.idx
] = ureg_DECL_fs_input_cyl_centroid(
1979 ureg
, tgsi
.Name
, tgsi
.Index
,
1980 nine_tgsi_to_interp_mode(&tgsi
),
1982 sem
.reg
.mod
& NINED3DSPDM_CENTROID
, 0, 1);
1984 if (!is_input
&& 0) { /* declare in COLOROUT/DEPTHOUT case */
1985 /* FragColor or FragDepth */
1986 assert(sem
.reg
.mask
!= 0);
1987 ureg_DECL_output_masked(ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
,
1996 tx_set_lconstf(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.f
);
2002 tx_set_lconstb(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.b
);
2008 tx_set_lconsti(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.i
);
2014 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2015 struct ureg_src src
[2] = {
2016 tx_src_param(tx
, &tx
->insn
.src
[0]),
2017 tx_src_param(tx
, &tx
->insn
.src
[1])
2019 ureg_POW(tx
->ureg
, dst
, ureg_abs(src
[0]), src
[1]);
2025 struct ureg_program
*ureg
= tx
->ureg
;
2026 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2027 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2028 struct ureg_dst tmp
= tx_scratch(tx
);
2029 ureg_RSQ(ureg
, tmp
, ureg_abs(src
));
2030 ureg_MIN(ureg
, dst
, ureg_imm1f(ureg
, FLT_MAX
), ureg_src(tmp
));
2036 struct ureg_program
*ureg
= tx
->ureg
;
2037 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2038 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2039 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2040 ureg_LG2(ureg
, tmp
, ureg_abs(src
));
2041 ureg_MAX(ureg
, dst
, ureg_imm1f(ureg
, -FLT_MAX
), tx_src_scalar(tmp
));
2047 struct ureg_program
*ureg
= tx
->ureg
;
2048 struct ureg_dst tmp
= tx_scratch(tx
);
2049 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2050 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2051 ureg_LIT(ureg
, tmp
, src
);
2052 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2053 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2054 * it 0^0 if src.w=0, which value is driver dependent. */
2055 ureg_CMP(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
),
2056 ureg_negate(ureg_scalar(src
, TGSI_SWIZZLE_Y
)),
2057 ureg_src(tmp
), ureg_imm1f(ureg
, 0.0f
));
2058 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYW
), ureg_src(tmp
));
2064 struct ureg_program
*ureg
= tx
->ureg
;
2065 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2066 struct ureg_src nrm
= tx_src_scalar(tmp
);
2067 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2068 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2069 ureg_DP3(ureg
, tmp
, src
, src
);
2070 ureg_RSQ(ureg
, tmp
, nrm
);
2071 ureg_MIN(ureg
, tmp
, ureg_imm1f(ureg
, FLT_MAX
), nrm
);
2072 ureg_MUL(ureg
, dst
, src
, nrm
);
2076 DECL_SPECIAL(DP2ADD
)
2078 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2079 struct ureg_src dp2
= tx_src_scalar(tmp
);
2080 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2081 struct ureg_src src
[3];
2083 for (i
= 0; i
< 3; ++i
)
2084 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
2085 assert_replicate_swizzle(&src
[2]);
2087 ureg_DP2(tx
->ureg
, tmp
, src
[0], src
[1]);
2088 ureg_ADD(tx
->ureg
, dst
, src
[2], dp2
);
2093 DECL_SPECIAL(TEXCOORD
)
2095 struct ureg_program
*ureg
= tx
->ureg
;
2096 const unsigned s
= tx
->insn
.dst
[0].idx
;
2097 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2099 tx_texcoord_alloc(tx
, s
);
2100 ureg_MOV(ureg
, ureg_writemask(ureg_saturate(dst
), TGSI_WRITEMASK_XYZ
), tx
->regs
.vT
[s
]);
2101 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(tx
->ureg
, 1.0f
));
2106 DECL_SPECIAL(TEXCOORD_ps14
)
2108 struct ureg_program
*ureg
= tx
->ureg
;
2109 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2110 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2112 assert(tx
->insn
.src
[0].file
== D3DSPR_TEXTURE
);
2114 ureg_MOV(ureg
, dst
, src
);
2119 DECL_SPECIAL(TEXKILL
)
2121 struct ureg_src reg
;
2123 if (tx
->version
.major
> 1 || tx
->version
.minor
> 3) {
2124 reg
= tx_dst_param_as_src(tx
, &tx
->insn
.dst
[0]);
2126 tx_texcoord_alloc(tx
, tx
->insn
.dst
[0].idx
);
2127 reg
= tx
->regs
.vT
[tx
->insn
.dst
[0].idx
];
2129 if (tx
->version
.major
< 2)
2130 reg
= ureg_swizzle(reg
, NINE_SWIZZLE4(X
,Y
,Z
,Z
));
2131 ureg_KILL_IF(tx
->ureg
, reg
);
2136 DECL_SPECIAL(TEXBEM
)
2138 STUB(D3DERR_INVALIDCALL
);
2141 DECL_SPECIAL(TEXBEML
)
2143 STUB(D3DERR_INVALIDCALL
);
2146 DECL_SPECIAL(TEXREG2AR
)
2148 struct ureg_program
*ureg
= tx
->ureg
;
2149 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2150 struct ureg_src sample
;
2151 const int m
= tx
->insn
.dst
[0].idx
;
2152 const int n
= tx
->insn
.src
[0].idx
;
2153 assert(m
>= 0 && m
> n
);
2155 sample
= ureg_DECL_sampler(ureg
, m
);
2156 tx
->info
->sampler_mask
|= 1 << m
;
2157 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(ureg_src(tx
->regs
.tS
[n
]), NINE_SWIZZLE4(W
,X
,X
,X
)), sample
);
2162 DECL_SPECIAL(TEXREG2GB
)
2164 struct ureg_program
*ureg
= tx
->ureg
;
2165 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2166 struct ureg_src sample
;
2167 const int m
= tx
->insn
.dst
[0].idx
;
2168 const int n
= tx
->insn
.src
[0].idx
;
2169 assert(m
>= 0 && m
> n
);
2171 sample
= ureg_DECL_sampler(ureg
, m
);
2172 tx
->info
->sampler_mask
|= 1 << m
;
2173 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(ureg_src(tx
->regs
.tS
[n
]), NINE_SWIZZLE4(Y
,Z
,Z
,Z
)), sample
);
2178 DECL_SPECIAL(TEXM3x2PAD
)
2180 return D3D_OK
; /* this is just padding */
2183 DECL_SPECIAL(TEXM3x2TEX
)
2185 struct ureg_program
*ureg
= tx
->ureg
;
2186 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2187 struct ureg_src sample
;
2188 const int m
= tx
->insn
.dst
[0].idx
- 1;
2189 const int n
= tx
->insn
.src
[0].idx
;
2190 assert(m
>= 0 && m
> n
);
2192 tx_texcoord_alloc(tx
, m
);
2193 tx_texcoord_alloc(tx
, m
+1);
2195 /* performs the matrix multiplication */
2196 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2197 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2199 sample
= ureg_DECL_sampler(ureg
, m
+ 1);
2200 tx
->info
->sampler_mask
|= 1 << (m
+ 1);
2201 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 1), ureg_src(dst
), sample
);
2206 DECL_SPECIAL(TEXM3x3PAD
)
2208 return D3D_OK
; /* this is just padding */
2211 DECL_SPECIAL(TEXM3x3SPEC
)
2213 struct ureg_program
*ureg
= tx
->ureg
;
2214 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2215 struct ureg_src E
= tx_src_param(tx
, &tx
->insn
.src
[1]);
2216 struct ureg_src sample
;
2217 struct ureg_dst tmp
;
2218 const int m
= tx
->insn
.dst
[0].idx
- 2;
2219 const int n
= tx
->insn
.src
[0].idx
;
2220 assert(m
>= 0 && m
> n
);
2222 tx_texcoord_alloc(tx
, m
);
2223 tx_texcoord_alloc(tx
, m
+1);
2224 tx_texcoord_alloc(tx
, m
+2);
2226 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2227 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2228 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], ureg_src(tx
->regs
.tS
[n
]));
2230 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2231 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2232 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2234 /* At this step, dst = N = (u', w', z').
2235 * We want dst to be the texture sampled at (u'', w'', z''), with
2236 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2237 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2238 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2239 /* at this step tmp.x = 1/N.N */
2240 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), E
);
2241 /* at this step tmp.y = N.E */
2242 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2243 /* at this step tmp.x = N.E/N.N */
2244 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2245 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2246 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2247 ureg_SUB(ureg
, tmp
, ureg_src(tmp
), E
);
2248 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2253 DECL_SPECIAL(TEXREG2RGB
)
2255 struct ureg_program
*ureg
= tx
->ureg
;
2256 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2257 struct ureg_src sample
;
2258 const int m
= tx
->insn
.dst
[0].idx
;
2259 const int n
= tx
->insn
.src
[0].idx
;
2260 assert(m
>= 0 && m
> n
);
2262 sample
= ureg_DECL_sampler(ureg
, m
);
2263 tx
->info
->sampler_mask
|= 1 << m
;
2264 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tx
->regs
.tS
[n
]), sample
);
2269 DECL_SPECIAL(TEXDP3TEX
)
2271 struct ureg_program
*ureg
= tx
->ureg
;
2272 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2273 struct ureg_dst tmp
;
2274 struct ureg_src sample
;
2275 const int m
= tx
->insn
.dst
[0].idx
;
2276 const int n
= tx
->insn
.src
[0].idx
;
2277 assert(m
>= 0 && m
> n
);
2279 tx_texcoord_alloc(tx
, m
);
2281 tmp
= tx_scratch(tx
);
2282 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2283 ureg_MOV(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_YZ
), ureg_imm1f(ureg
, 0.0f
));
2285 sample
= ureg_DECL_sampler(ureg
, m
);
2286 tx
->info
->sampler_mask
|= 1 << m
;
2287 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2292 DECL_SPECIAL(TEXM3x2DEPTH
)
2294 struct ureg_program
*ureg
= tx
->ureg
;
2295 struct ureg_dst tmp
;
2296 const int m
= tx
->insn
.dst
[0].idx
- 1;
2297 const int n
= tx
->insn
.src
[0].idx
;
2298 assert(m
>= 0 && m
> n
);
2300 tx_texcoord_alloc(tx
, m
);
2301 tx_texcoord_alloc(tx
, m
+1);
2303 tmp
= tx_scratch(tx
);
2305 /* performs the matrix multiplication */
2306 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2307 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2309 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Z
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2310 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2311 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Z
));
2312 /* res = 'w' == 0 ? 1.0 : z/w */
2313 ureg_CMP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
))),
2314 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 1.0f
));
2315 /* replace the depth for depth testing with the result */
2316 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2317 TGSI_WRITEMASK_Z
, 0, 1);
2318 ureg_MOV(ureg
, tx
->regs
.oDepth
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2319 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2323 DECL_SPECIAL(TEXDP3
)
2325 struct ureg_program
*ureg
= tx
->ureg
;
2326 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2327 const int m
= tx
->insn
.dst
[0].idx
;
2328 const int n
= tx
->insn
.src
[0].idx
;
2329 assert(m
>= 0 && m
> n
);
2331 tx_texcoord_alloc(tx
, m
);
2333 ureg_DP3(ureg
, dst
, tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2338 DECL_SPECIAL(TEXM3x3
)
2340 struct ureg_program
*ureg
= tx
->ureg
;
2341 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2342 struct ureg_src sample
;
2343 struct ureg_dst E
, tmp
;
2344 const int m
= tx
->insn
.dst
[0].idx
- 2;
2345 const int n
= tx
->insn
.src
[0].idx
;
2346 assert(m
>= 0 && m
> n
);
2348 tx_texcoord_alloc(tx
, m
);
2349 tx_texcoord_alloc(tx
, m
+1);
2350 tx_texcoord_alloc(tx
, m
+2);
2352 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2353 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2354 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], ureg_src(tx
->regs
.tS
[n
]));
2356 switch (tx
->insn
.opcode
) {
2357 case D3DSIO_TEXM3x3
:
2358 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
2360 case D3DSIO_TEXM3x3TEX
:
2361 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2362 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2363 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(dst
), sample
);
2365 case D3DSIO_TEXM3x3VSPEC
:
2366 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2367 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2369 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2370 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_X
), ureg_scalar(tx
->regs
.vT
[m
], TGSI_SWIZZLE_W
));
2371 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Y
), ureg_scalar(tx
->regs
.vT
[m
+1], TGSI_SWIZZLE_W
));
2372 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Z
), ureg_scalar(tx
->regs
.vT
[m
+2], TGSI_SWIZZLE_W
));
2373 /* At this step, dst = N = (u', w', z').
2374 * We want dst to be the texture sampled at (u'', w'', z''), with
2375 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2376 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2377 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2378 /* at this step tmp.x = 1/N.N */
2379 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), ureg_src(E
));
2380 /* at this step tmp.y = N.E */
2381 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2382 /* at this step tmp.x = N.E/N.N */
2383 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2384 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2385 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2386 ureg_SUB(ureg
, tmp
, ureg_src(tmp
), ureg_src(E
));
2387 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2390 return D3DERR_INVALIDCALL
;
2395 DECL_SPECIAL(TEXDEPTH
)
2397 struct ureg_program
*ureg
= tx
->ureg
;
2399 struct ureg_src r5r
, r5g
;
2401 assert(tx
->insn
.dst
[0].idx
== 5); /* instruction must get r5 here */
2403 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2404 * r5 won't be used afterward, thus we can use r5.ba */
2406 r5r
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_X
);
2407 r5g
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Y
);
2409 ureg_RCP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_Z
), r5g
);
2410 ureg_MUL(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), r5r
, ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Z
));
2412 ureg_CMP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(r5g
)),
2413 r5r
, ureg_imm1f(ureg
, 1.0f
));
2414 /* replace the depth for depth testing with the result */
2415 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2416 TGSI_WRITEMASK_Z
, 0, 1);
2417 ureg_MOV(ureg
, tx
->regs
.oDepth
, r5r
);
2424 STUB(D3DERR_INVALIDCALL
);
2429 struct ureg_program
*ureg
= tx
->ureg
;
2431 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2432 struct ureg_src src
[2] = {
2433 tx_src_param(tx
, &tx
->insn
.src
[0]),
2434 tx_src_param(tx
, &tx
->insn
.src
[1])
2436 assert(tx
->insn
.src
[1].idx
>= 0 &&
2437 tx
->insn
.src
[1].idx
< Elements(tx
->sampler_targets
));
2438 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2440 switch (tx
->insn
.flags
) {
2442 ureg_TEX(ureg
, dst
, target
, src
[0], src
[1]);
2444 case NINED3DSI_TEXLD_PROJECT
:
2445 ureg_TXP(ureg
, dst
, target
, src
[0], src
[1]);
2447 case NINED3DSI_TEXLD_BIAS
:
2448 ureg_TXB(ureg
, dst
, target
, src
[0], src
[1]);
2452 return D3DERR_INVALIDCALL
;
2457 DECL_SPECIAL(TEXLD_14
)
2459 struct ureg_program
*ureg
= tx
->ureg
;
2460 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2461 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2462 const unsigned s
= tx
->insn
.dst
[0].idx
;
2463 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2465 tx
->info
->sampler_mask
|= 1 << s
;
2466 ureg_TEX(ureg
, dst
, t
, src
, ureg_DECL_sampler(ureg
, s
));
2473 struct ureg_program
*ureg
= tx
->ureg
;
2474 const unsigned s
= tx
->insn
.dst
[0].idx
;
2475 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2476 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2477 struct ureg_src src
[2];
2479 tx_texcoord_alloc(tx
, s
);
2481 src
[0] = tx
->regs
.vT
[s
];
2482 src
[1] = ureg_DECL_sampler(ureg
, s
);
2483 tx
->info
->sampler_mask
|= 1 << s
;
2485 ureg_TEX(ureg
, dst
, t
, src
[0], src
[1]);
2490 DECL_SPECIAL(TEXLDD
)
2493 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2494 struct ureg_src src
[4] = {
2495 tx_src_param(tx
, &tx
->insn
.src
[0]),
2496 tx_src_param(tx
, &tx
->insn
.src
[1]),
2497 tx_src_param(tx
, &tx
->insn
.src
[2]),
2498 tx_src_param(tx
, &tx
->insn
.src
[3])
2500 assert(tx
->insn
.src
[1].idx
>= 0 &&
2501 tx
->insn
.src
[1].idx
< Elements(tx
->sampler_targets
));
2502 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2504 ureg_TXD(tx
->ureg
, dst
, target
, src
[0], src
[2], src
[3], src
[1]);
2508 DECL_SPECIAL(TEXLDL
)
2511 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2512 struct ureg_src src
[2] = {
2513 tx_src_param(tx
, &tx
->insn
.src
[0]),
2514 tx_src_param(tx
, &tx
->insn
.src
[1])
2516 assert(tx
->insn
.src
[1].idx
>= 0 &&
2517 tx
->insn
.src
[1].idx
< Elements(tx
->sampler_targets
));
2518 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2520 ureg_TXL(tx
->ureg
, dst
, target
, src
[0], src
[1]);
2526 STUB(D3DERR_INVALIDCALL
);
2529 DECL_SPECIAL(BREAKP
)
2531 STUB(D3DERR_INVALIDCALL
);
2536 return D3D_OK
; /* we don't care about phase */
2539 DECL_SPECIAL(COMMENT
)
2541 return D3D_OK
; /* nothing to do */
2545 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2546 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2548 struct sm1_op_info inst_table
[] =
2550 _OPI(NOP
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL
), /* 0 */
2551 _OPI(MOV
, MOV
, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x
)),
2552 _OPI(MOV
, MOV
, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
2553 _OPI(ADD
, ADD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 2 */
2554 _OPI(SUB
, SUB
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 3 */
2555 _OPI(MAD
, MAD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 4 */
2556 _OPI(MUL
, MUL
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 5 */
2557 _OPI(RCP
, RCP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 6 */
2558 _OPI(RSQ
, RSQ
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ
)), /* 7 */
2559 _OPI(DP3
, DP3
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 8 */
2560 _OPI(DP4
, DP4
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 9 */
2561 _OPI(MIN
, MIN
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 10 */
2562 _OPI(MAX
, MAX
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 11 */
2563 _OPI(SLT
, SLT
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 12 */
2564 _OPI(SGE
, SGE
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 13 */
2565 _OPI(EXP
, EX2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 14 */
2566 _OPI(LOG
, LG2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG
)), /* 15 */
2567 _OPI(LIT
, LIT
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT
)), /* 16 */
2568 _OPI(DST
, DST
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 17 */
2569 _OPI(LRP
, LRP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 18 */
2570 _OPI(FRC
, FRC
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 19 */
2572 _OPI(M4x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4
)),
2573 _OPI(M4x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3
)),
2574 _OPI(M3x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4
)),
2575 _OPI(M3x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3
)),
2576 _OPI(M3x2
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2
)),
2578 _OPI(CALL
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL
)),
2579 _OPI(CALLNZ
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ
)),
2580 _OPI(LOOP
, BGNLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP
)),
2581 _OPI(RET
, RET
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET
)),
2582 _OPI(ENDLOOP
, ENDLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP
)),
2583 _OPI(LABEL
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL
)),
2585 _OPI(DCL
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL
)),
2587 _OPI(POW
, POW
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW
)),
2588 _OPI(CRS
, XPD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* XXX: .w */
2589 _OPI(SGN
, SSG
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN
)), /* ignore src1,2 */
2590 _OPI(ABS
, ABS
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
2591 _OPI(NRM
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM
)), /* NRM doesn't fit */
2593 _OPI(SINCOS
, SCS
, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS
)),
2594 _OPI(SINCOS
, SCS
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS
)),
2596 /* More flow control */
2597 _OPI(REP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP
)),
2598 _OPI(ENDREP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP
)),
2599 _OPI(IF
, IF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF
)),
2600 _OPI(IFC
, IF
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC
)),
2601 _OPI(ELSE
, ELSE
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE
)),
2602 _OPI(ENDIF
, ENDIF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF
)),
2603 _OPI(BREAK
, BRK
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL
),
2604 _OPI(BREAKC
, BREAKC
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC
)),
2605 /* we don't write to the address register, but a normal register (copied
2606 * when needed to the address register), thus we don't use ARR */
2607 _OPI(MOVA
, MOV
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2609 _OPI(DEFB
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB
)),
2610 _OPI(DEFI
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI
)),
2612 _OPI(TEXCOORD
, NOP
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD
)),
2613 _OPI(TEXCOORD
, MOV
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14
)),
2614 _OPI(TEXKILL
, KILL_IF
, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL
)),
2615 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX
)),
2616 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14
)),
2617 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD
)),
2618 _OPI(TEXBEM
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM
)),
2619 _OPI(TEXBEML
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML
)),
2620 _OPI(TEXREG2AR
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR
)),
2621 _OPI(TEXREG2GB
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB
)),
2622 _OPI(TEXM3x2PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD
)),
2623 _OPI(TEXM3x2TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX
)),
2624 _OPI(TEXM3x3PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD
)),
2625 _OPI(TEXM3x3TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2626 _OPI(TEXM3x3SPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC
)),
2627 _OPI(TEXM3x3VSPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2629 _OPI(EXPP
, EXP
, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL
),
2630 _OPI(EXPP
, EX2
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2631 _OPI(LOGP
, LG2
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG
)),
2632 _OPI(CND
, NOP
, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND
)),
2634 _OPI(DEF
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF
)),
2636 /* More tex stuff */
2637 _OPI(TEXREG2RGB
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB
)),
2638 _OPI(TEXDP3TEX
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX
)),
2639 _OPI(TEXM3x2DEPTH
, TEX
, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH
)),
2640 _OPI(TEXDP3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3
)),
2641 _OPI(TEXM3x3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2642 _OPI(TEXDEPTH
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH
)),
2645 _OPI(CMP
, CMP
, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP
)), /* reversed */
2646 _OPI(BEM
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM
)),
2647 _OPI(DP2ADD
, NOP
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD
)),
2648 _OPI(DSX
, DDX
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
2649 _OPI(DSY
, DDY
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
2650 _OPI(TEXLDD
, TXD
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD
)),
2651 _OPI(SETP
, NOP
, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP
)),
2652 _OPI(TEXLDL
, TXL
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL
)),
2653 _OPI(BREAKP
, BRK
, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP
))
2656 struct sm1_op_info inst_phase
=
2657 _OPI(PHASE
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE
));
2659 struct sm1_op_info inst_comment
=
2660 _OPI(COMMENT
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT
));
2663 create_op_info_map(struct shader_translator
*tx
)
2665 const unsigned version
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
2668 for (i
= 0; i
< Elements(tx
->op_info_map
); ++i
)
2669 tx
->op_info_map
[i
] = -1;
2671 if (tx
->processor
== TGSI_PROCESSOR_VERTEX
) {
2672 for (i
= 0; i
< Elements(inst_table
); ++i
) {
2673 assert(inst_table
[i
].sio
< Elements(tx
->op_info_map
));
2674 if (inst_table
[i
].vert_version
.min
<= version
&&
2675 inst_table
[i
].vert_version
.max
>= version
)
2676 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
2679 for (i
= 0; i
< Elements(inst_table
); ++i
) {
2680 assert(inst_table
[i
].sio
< Elements(tx
->op_info_map
));
2681 if (inst_table
[i
].frag_version
.min
<= version
&&
2682 inst_table
[i
].frag_version
.max
>= version
)
2683 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
2688 static inline HRESULT
2689 NineTranslateInstruction_Generic(struct shader_translator
*tx
)
2691 struct ureg_dst dst
[1];
2692 struct ureg_src src
[4];
2695 for (i
= 0; i
< tx
->insn
.ndst
&& i
< Elements(dst
); ++i
)
2696 dst
[i
] = tx_dst_param(tx
, &tx
->insn
.dst
[i
]);
2697 for (i
= 0; i
< tx
->insn
.nsrc
&& i
< Elements(src
); ++i
)
2698 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
2700 ureg_insn(tx
->ureg
, tx
->insn
.info
->opcode
,
2702 src
, tx
->insn
.nsrc
);
2707 TOKEN_PEEK(struct shader_translator
*tx
)
2709 return *(tx
->parse
);
2713 TOKEN_NEXT(struct shader_translator
*tx
)
2715 return *(tx
->parse
)++;
2719 TOKEN_JUMP(struct shader_translator
*tx
)
2721 if (tx
->parse_next
&& tx
->parse
!= tx
->parse_next
) {
2722 WARN("parse(%p) != parse_next(%p) !\n", tx
->parse
, tx
->parse_next
);
2723 tx
->parse
= tx
->parse_next
;
2727 static inline boolean
2728 sm1_parse_eof(struct shader_translator
*tx
)
2730 return TOKEN_PEEK(tx
) == NINED3DSP_END
;
2734 sm1_read_version(struct shader_translator
*tx
)
2736 const DWORD tok
= TOKEN_NEXT(tx
);
2738 tx
->version
.major
= D3DSHADER_VERSION_MAJOR(tok
);
2739 tx
->version
.minor
= D3DSHADER_VERSION_MINOR(tok
);
2741 switch (tok
>> 16) {
2742 case NINED3D_SM1_VS
: tx
->processor
= TGSI_PROCESSOR_VERTEX
; break;
2743 case NINED3D_SM1_PS
: tx
->processor
= TGSI_PROCESSOR_FRAGMENT
; break;
2745 DBG("Invalid shader type: %x\n", tok
);
2751 /* This is just to check if we parsed the instruction properly. */
2753 sm1_parse_get_skip(struct shader_translator
*tx
)
2755 const DWORD tok
= TOKEN_PEEK(tx
);
2757 if (tx
->version
.major
>= 2) {
2758 tx
->parse_next
= tx
->parse
+ 1 /* this */ +
2759 ((tok
& D3DSI_INSTLENGTH_MASK
) >> D3DSI_INSTLENGTH_SHIFT
);
2761 tx
->parse_next
= NULL
; /* TODO: determine from param count */
2766 sm1_print_comment(const char *comment
, UINT size
)
2774 sm1_parse_comments(struct shader_translator
*tx
, BOOL print
)
2776 DWORD tok
= TOKEN_PEEK(tx
);
2778 while ((tok
& D3DSI_OPCODE_MASK
) == D3DSIO_COMMENT
)
2780 const char *comment
= "";
2781 UINT size
= (tok
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
2782 tx
->parse
+= size
+ 1;
2785 sm1_print_comment(comment
, size
);
2787 tok
= TOKEN_PEEK(tx
);
2792 sm1_parse_get_param(struct shader_translator
*tx
, DWORD
*reg
, DWORD
*rel
)
2794 *reg
= TOKEN_NEXT(tx
);
2796 if (*reg
& D3DSHADER_ADDRMODE_RELATIVE
)
2798 if (tx
->version
.major
< 2)
2800 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT2
) & D3DSP_REGTYPE_MASK2
) |
2801 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT
) & D3DSP_REGTYPE_MASK
) |
2804 *rel
= TOKEN_NEXT(tx
);
2809 sm1_parse_dst_param(struct sm1_dst_param
*dst
, DWORD tok
)
2813 (tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
|
2814 (tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
;
2815 dst
->type
= TGSI_RETURN_TYPE_FLOAT
;
2816 dst
->idx
= tok
& D3DSP_REGNUM_MASK
;
2818 dst
->mask
= (tok
& NINED3DSP_WRITEMASK_MASK
) >> NINED3DSP_WRITEMASK_SHIFT
;
2819 dst
->mod
= (tok
& D3DSP_DSTMOD_MASK
) >> D3DSP_DSTMOD_SHIFT
;
2820 shift
= (tok
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
2821 dst
->shift
= (shift
& 0x8) ? -(shift
& 0x7) : shift
& 0x7;
2825 sm1_parse_src_param(struct sm1_src_param
*src
, DWORD tok
)
2828 ((tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) |
2829 ((tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
);
2830 src
->type
= TGSI_RETURN_TYPE_FLOAT
;
2831 src
->idx
= tok
& D3DSP_REGNUM_MASK
;
2833 src
->swizzle
= (tok
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
2834 src
->mod
= (tok
& D3DSP_SRCMOD_MASK
) >> D3DSP_SRCMOD_SHIFT
;
2836 switch (src
->file
) {
2837 case D3DSPR_CONST2
: src
->file
= D3DSPR_CONST
; src
->idx
+= 2048; break;
2838 case D3DSPR_CONST3
: src
->file
= D3DSPR_CONST
; src
->idx
+= 4096; break;
2839 case D3DSPR_CONST4
: src
->file
= D3DSPR_CONST
; src
->idx
+= 6144; break;
2846 sm1_parse_immediate(struct shader_translator
*tx
,
2847 struct sm1_src_param
*imm
)
2849 imm
->file
= NINED3DSPR_IMMEDIATE
;
2852 imm
->swizzle
= NINED3DSP_NOSWIZZLE
;
2854 switch (tx
->insn
.opcode
) {
2856 imm
->type
= NINED3DSPTYPE_FLOAT4
;
2857 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
2861 imm
->type
= NINED3DSPTYPE_INT4
;
2862 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
2866 imm
->type
= NINED3DSPTYPE_BOOL
;
2867 memcpy(&imm
->imm
.d
[0], tx
->parse
, 1 * sizeof(DWORD
));
2877 sm1_read_dst_param(struct shader_translator
*tx
,
2878 struct sm1_dst_param
*dst
,
2879 struct sm1_src_param
*rel
)
2881 DWORD tok_dst
, tok_rel
= 0;
2883 sm1_parse_get_param(tx
, &tok_dst
, &tok_rel
);
2884 sm1_parse_dst_param(dst
, tok_dst
);
2885 if (tok_dst
& D3DSHADER_ADDRMODE_RELATIVE
) {
2886 sm1_parse_src_param(rel
, tok_rel
);
2892 sm1_read_src_param(struct shader_translator
*tx
,
2893 struct sm1_src_param
*src
,
2894 struct sm1_src_param
*rel
)
2896 DWORD tok_src
, tok_rel
= 0;
2898 sm1_parse_get_param(tx
, &tok_src
, &tok_rel
);
2899 sm1_parse_src_param(src
, tok_src
);
2900 if (tok_src
& D3DSHADER_ADDRMODE_RELATIVE
) {
2902 sm1_parse_src_param(rel
, tok_rel
);
2908 sm1_read_semantic(struct shader_translator
*tx
,
2909 struct sm1_semantic
*sem
)
2911 const DWORD tok_usg
= TOKEN_NEXT(tx
);
2912 const DWORD tok_dst
= TOKEN_NEXT(tx
);
2914 sem
->sampler_type
= (tok_usg
& D3DSP_TEXTURETYPE_MASK
) >> D3DSP_TEXTURETYPE_SHIFT
;
2915 sem
->usage
= (tok_usg
& D3DSP_DCL_USAGE_MASK
) >> D3DSP_DCL_USAGE_SHIFT
;
2916 sem
->usage_idx
= (tok_usg
& D3DSP_DCL_USAGEINDEX_MASK
) >> D3DSP_DCL_USAGEINDEX_SHIFT
;
2918 sm1_parse_dst_param(&sem
->reg
, tok_dst
);
2922 sm1_parse_instruction(struct shader_translator
*tx
)
2924 struct sm1_instruction
*insn
= &tx
->insn
;
2926 struct sm1_op_info
*info
= NULL
;
2929 sm1_parse_comments(tx
, TRUE
);
2930 sm1_parse_get_skip(tx
);
2932 tok
= TOKEN_NEXT(tx
);
2934 insn
->opcode
= tok
& D3DSI_OPCODE_MASK
;
2935 insn
->flags
= (tok
& NINED3DSIO_OPCODE_FLAGS_MASK
) >> NINED3DSIO_OPCODE_FLAGS_SHIFT
;
2936 insn
->coissue
= !!(tok
& D3DSI_COISSUE
);
2937 insn
->predicated
= !!(tok
& NINED3DSHADER_INST_PREDICATED
);
2939 if (insn
->opcode
< Elements(tx
->op_info_map
)) {
2940 int k
= tx
->op_info_map
[insn
->opcode
];
2942 assert(k
< Elements(inst_table
));
2943 info
= &inst_table
[k
];
2946 if (insn
->opcode
== D3DSIO_PHASE
) info
= &inst_phase
;
2947 if (insn
->opcode
== D3DSIO_COMMENT
) info
= &inst_comment
;
2950 DBG("illegal or unhandled opcode: %08x\n", insn
->opcode
);
2955 insn
->ndst
= info
->ndst
;
2956 insn
->nsrc
= info
->nsrc
;
2958 assert(!insn
->predicated
&& "TODO: predicated instructions");
2962 unsigned min
= IS_VS
? info
->vert_version
.min
: info
->frag_version
.min
;
2963 unsigned max
= IS_VS
? info
->vert_version
.max
: info
->frag_version
.max
;
2964 unsigned ver
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
2965 if (ver
< min
|| ver
> max
) {
2966 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2972 for (i
= 0; i
< insn
->ndst
; ++i
)
2973 sm1_read_dst_param(tx
, &insn
->dst
[i
], &insn
->dst_rel
[i
]);
2974 if (insn
->predicated
)
2975 sm1_read_src_param(tx
, &insn
->pred
, NULL
);
2976 for (i
= 0; i
< insn
->nsrc
; ++i
)
2977 sm1_read_src_param(tx
, &insn
->src
[i
], &insn
->src_rel
[i
]);
2979 /* parse here so we can dump them before processing */
2980 if (insn
->opcode
== D3DSIO_DEF
||
2981 insn
->opcode
== D3DSIO_DEFI
||
2982 insn
->opcode
== D3DSIO_DEFB
)
2983 sm1_parse_immediate(tx
, &tx
->insn
.src
[0]);
2985 sm1_dump_instruction(insn
, tx
->cond_depth
+ tx
->loop_depth
);
2986 sm1_instruction_check(insn
);
2991 NineTranslateInstruction_Generic(tx
);
2992 tx_apply_dst0_modifiers(tx
);
2994 tx
->num_scratch
= 0; /* reset */
3000 tx_ctor(struct shader_translator
*tx
, struct nine_shader_info
*info
)
3006 tx
->byte_code
= info
->byte_code
;
3007 tx
->parse
= info
->byte_code
;
3009 for (i
= 0; i
< Elements(info
->input_map
); ++i
)
3010 info
->input_map
[i
] = NINE_DECLUSAGE_NONE
;
3011 info
->num_inputs
= 0;
3013 info
->position_t
= FALSE
;
3014 info
->point_size
= FALSE
;
3016 tx
->info
->const_float_slots
= 0;
3017 tx
->info
->const_int_slots
= 0;
3018 tx
->info
->const_bool_slots
= 0;
3020 info
->sampler_mask
= 0x0;
3021 info
->rt_mask
= 0x0;
3023 info
->lconstf
.data
= NULL
;
3024 info
->lconstf
.ranges
= NULL
;
3026 for (i
= 0; i
< Elements(tx
->regs
.rL
); ++i
) {
3027 tx
->regs
.rL
[i
] = ureg_dst_undef();
3029 tx
->regs
.address
= ureg_dst_undef();
3030 tx
->regs
.a0
= ureg_dst_undef();
3031 tx
->regs
.p
= ureg_dst_undef();
3032 tx
->regs
.oDepth
= ureg_dst_undef();
3033 tx
->regs
.vPos
= ureg_src_undef();
3034 tx
->regs
.vFace
= ureg_src_undef();
3035 for (i
= 0; i
< Elements(tx
->regs
.o
); ++i
)
3036 tx
->regs
.o
[i
] = ureg_dst_undef();
3037 for (i
= 0; i
< Elements(tx
->regs
.oCol
); ++i
)
3038 tx
->regs
.oCol
[i
] = ureg_dst_undef();
3039 for (i
= 0; i
< Elements(tx
->regs
.vC
); ++i
)
3040 tx
->regs
.vC
[i
] = ureg_src_undef();
3041 for (i
= 0; i
< Elements(tx
->regs
.vT
); ++i
)
3042 tx
->regs
.vT
[i
] = ureg_src_undef();
3044 for (i
= 0; i
< Elements(tx
->lconsti
); ++i
)
3045 tx
->lconsti
[i
].idx
= -1;
3046 for (i
= 0; i
< Elements(tx
->lconstb
); ++i
)
3047 tx
->lconstb
[i
].idx
= -1;
3049 sm1_read_version(tx
);
3051 info
->version
= (tx
->version
.major
<< 4) | tx
->version
.minor
;
3053 create_op_info_map(tx
);
3057 tx_dtor(struct shader_translator
*tx
)
3059 if (tx
->num_inst_labels
)
3060 FREE(tx
->inst_labels
);
3066 static inline unsigned
3067 tgsi_processor_from_type(unsigned shader_type
)
3069 switch (shader_type
) {
3070 case PIPE_SHADER_VERTEX
: return TGSI_PROCESSOR_VERTEX
;
3071 case PIPE_SHADER_FRAGMENT
: return TGSI_PROCESSOR_FRAGMENT
;
3077 #define GET_CAP(n) device->screen->get_param( \
3078 device->screen, PIPE_CAP_##n)
3079 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3080 device->screen, info->type, PIPE_SHADER_CAP_##n)
3083 nine_translate_shader(struct NineDevice9
*device
, struct nine_shader_info
*info
)
3085 struct shader_translator
*tx
;
3086 HRESULT hr
= D3D_OK
;
3087 const unsigned processor
= tgsi_processor_from_type(info
->type
);
3088 unsigned s
, slot_max
;
3089 unsigned max_const_f
;
3091 user_assert(processor
!= ~0, D3DERR_INVALIDCALL
);
3093 tx
= CALLOC_STRUCT(shader_translator
);
3095 return E_OUTOFMEMORY
;
3098 if (((tx
->version
.major
<< 16) | tx
->version
.minor
) > 0x00030000) {
3099 hr
= D3DERR_INVALIDCALL
;
3100 DBG("Unsupported shader version: %u.%u !\n",
3101 tx
->version
.major
, tx
->version
.minor
);
3104 if (tx
->processor
!= processor
) {
3105 hr
= D3DERR_INVALIDCALL
;
3106 DBG("Shader type mismatch: %u / %u !\n", tx
->processor
, processor
);
3109 DUMP("%s%u.%u\n", processor
== TGSI_PROCESSOR_VERTEX
? "VS" : "PS",
3110 tx
->version
.major
, tx
->version
.minor
);
3112 tx
->ureg
= ureg_create(processor
);
3118 tx
->native_integers
= GET_SHADER_CAP(INTEGERS
);
3119 tx
->inline_subroutines
= !GET_SHADER_CAP(SUBROUTINES
);
3120 tx
->lower_preds
= !GET_SHADER_CAP(MAX_PREDS
);
3121 tx
->want_texcoord
= GET_CAP(TGSI_TEXCOORD
);
3122 tx
->shift_wpos
= !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3123 tx
->texcoord_sn
= tx
->want_texcoord
?
3124 TGSI_SEMANTIC_TEXCOORD
: TGSI_SEMANTIC_GENERIC
;
3126 /* VS must always write position. Declare it here to make it the 1st output.
3127 * (Some drivers like nv50 are buggy and rely on that.)
3130 tx
->regs
.oPos
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
3132 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_ORIGIN
, TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
3133 if (!tx
->shift_wpos
)
3134 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
, TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3137 while (!sm1_parse_eof(tx
) && !tx
->failure
)
3138 sm1_parse_instruction(tx
);
3139 tx
->parse
++; /* for byte_size */
3142 ERR("Encountered buggy shader\n");
3143 ureg_destroy(tx
->ureg
);
3144 hr
= D3DERR_INVALIDCALL
;
3148 if (IS_PS
&& (tx
->version
.major
< 2) && tx
->num_temp
) {
3149 ureg_MOV(tx
->ureg
, ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_COLOR
, 0),
3150 ureg_src(tx
->regs
.r
[0]));
3151 info
->rt_mask
|= 0x1;
3154 if (info
->position_t
)
3155 ureg_property(tx
->ureg
, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
, TRUE
);
3159 if (IS_VS
&& !ureg_dst_is_undef(tx
->regs
.oPts
))
3160 info
->point_size
= TRUE
;
3162 /* record local constants */
3163 if (tx
->num_lconstf
&& tx
->indirect_const_access
) {
3164 struct nine_range
*ranges
;
3171 data
= MALLOC(tx
->num_lconstf
* 4 * sizeof(float));
3174 info
->lconstf
.data
= data
;
3176 indices
= MALLOC(tx
->num_lconstf
* sizeof(indices
[0]));
3180 /* lazy sort, num_lconstf should be small */
3181 for (n
= 0; n
< tx
->num_lconstf
; ++n
) {
3182 for (k
= 0, i
= 0; i
< tx
->num_lconstf
; ++i
) {
3183 if (tx
->lconstf
[i
].idx
< tx
->lconstf
[k
].idx
)
3186 indices
[n
] = tx
->lconstf
[k
].idx
;
3187 memcpy(&data
[n
* 4], &tx
->lconstf
[k
].imm
.f
[0], 4 * sizeof(float));
3188 tx
->lconstf
[k
].idx
= INT_MAX
;
3192 for (n
= 1, i
= 1; i
< tx
->num_lconstf
; ++i
)
3193 if (indices
[i
] != indices
[i
- 1] + 1)
3195 ranges
= MALLOC(n
* sizeof(ranges
[0]));
3200 info
->lconstf
.ranges
= ranges
;
3203 ranges
[k
].bgn
= indices
[0];
3204 for (i
= 1; i
< tx
->num_lconstf
; ++i
) {
3205 if (indices
[i
] != indices
[i
- 1] + 1) {
3206 ranges
[k
].next
= &ranges
[k
+ 1];
3207 ranges
[k
].end
= indices
[i
- 1] + 1;
3209 ranges
[k
].bgn
= indices
[i
];
3212 ranges
[k
].end
= indices
[i
- 1] + 1;
3213 ranges
[k
].next
= NULL
;
3214 assert(n
== (k
+ 1));
3221 if (info
->const_float_slots
> device
->max_vs_const_f
&&
3222 (info
->const_int_slots
|| info
->const_bool_slots
))
3223 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3226 if (tx
->indirect_const_access
) /* vs only */
3227 info
->const_float_slots
= device
->max_vs_const_f
;
3229 max_const_f
= IS_VS
? device
->max_vs_const_f
: device
->max_ps_const_f
;
3230 slot_max
= info
->const_bool_slots
> 0 ?
3231 max_const_f
+ NINE_MAX_CONST_I
3232 + DIV_ROUND_UP(info
->const_bool_slots
, 4) :
3233 info
->const_int_slots
> 0 ?
3234 max_const_f
+ info
->const_int_slots
:
3235 info
->const_float_slots
;
3236 info
->const_used_size
= sizeof(float[4]) * slot_max
; /* slots start from 1 */
3238 for (s
= 0; s
< slot_max
; s
++)
3239 ureg_DECL_constant(tx
->ureg
, s
);
3241 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE
)) {
3243 const struct tgsi_token
*toks
= ureg_get_tokens(tx
->ureg
, &count
);
3245 ureg_free_tokens(toks
);
3248 info
->cso
= ureg_create_shader_and_destroy(tx
->ureg
, device
->pipe
);
3250 hr
= D3DERR_DRIVERINTERNALERROR
;
3251 FREE(info
->lconstf
.data
);
3252 FREE(info
->lconstf
.ranges
);
3256 info
->byte_size
= (tx
->parse
- tx
->byte_code
) * sizeof(DWORD
);