2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
24 #include "nine_shader.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_ureg.h"
34 #include "tgsi/tgsi_dump.h"
36 #define DBG_CHANNEL DBG_SHADER
39 #define NINE_TGSI_LAZY_DEVS /* don't use TGSI_OPCODE_BREAKC */
41 #define NINE_TGSI_LAZY_R600 /* don't use TGSI_OPCODE_DP2A */
43 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
46 struct shader_translator
;
48 typedef HRESULT (*translate_instruction_func
)(struct shader_translator
*);
50 static INLINE
const char *d3dsio_to_string(unsigned opcode
);
53 #define NINED3D_SM1_VS 0xfffe
54 #define NINED3D_SM1_PS 0xffff
56 #define NINE_MAX_COND_DEPTH 64
57 #define NINE_MAX_LOOP_DEPTH 64
59 #define NINED3DSP_END 0x0000ffff
61 #define NINED3DSPTYPE_FLOAT4 0
62 #define NINED3DSPTYPE_INT4 1
63 #define NINED3DSPTYPE_BOOL 2
65 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
67 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
68 #define NINED3DSP_WRITEMASK_SHIFT 16
70 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
72 #define NINED3DSHADER_REL_OP_GT 1
73 #define NINED3DSHADER_REL_OP_EQ 2
74 #define NINED3DSHADER_REL_OP_GE 3
75 #define NINED3DSHADER_REL_OP_LT 4
76 #define NINED3DSHADER_REL_OP_NE 5
77 #define NINED3DSHADER_REL_OP_LE 6
79 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
80 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
82 #define NINED3DSI_TEXLD_PROJECT 0x1
83 #define NINED3DSI_TEXLD_BIAS 0x2
85 #define NINED3DSP_WRITEMASK_0 0x1
86 #define NINED3DSP_WRITEMASK_1 0x2
87 #define NINED3DSP_WRITEMASK_2 0x4
88 #define NINED3DSP_WRITEMASK_3 0x8
89 #define NINED3DSP_WRITEMASK_ALL 0xf
91 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
93 #define NINE_SWIZZLE4(x,y,z,w) \
94 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
96 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
97 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
101 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
102 * BIAS <= PS 1.4 (x-0.5)
103 * BIASNEG <= PS 1.4 (-(x-0.5))
104 * SIGN <= PS 1.4 (2(x-0.5))
105 * SIGNNEG <= PS 1.4 (-2(x-0.5))
106 * COMP <= PS 1.4 (1-x)
108 * X2NEG = PS 1.4 (-2x)
109 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
110 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
111 * ABS >= SM 3.0 (abs(x))
112 * ABSNEG >= SM 3.0 (-abs(x))
113 * NOT >= SM 2.0 pedication only
115 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
116 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
130 static const char *sm1_mod_str
[] =
132 [NINED3DSPSM_NONE
] = "",
133 [NINED3DSPSM_NEG
] = "-",
134 [NINED3DSPSM_BIAS
] = "bias",
135 [NINED3DSPSM_BIASNEG
] = "biasneg",
136 [NINED3DSPSM_SIGN
] = "sign",
137 [NINED3DSPSM_SIGNNEG
] = "signneg",
138 [NINED3DSPSM_COMP
] = "comp",
139 [NINED3DSPSM_X2
] = "x2",
140 [NINED3DSPSM_X2NEG
] = "x2neg",
141 [NINED3DSPSM_DZ
] = "dz",
142 [NINED3DSPSM_DW
] = "dw",
143 [NINED3DSPSM_ABS
] = "abs",
144 [NINED3DSPSM_ABSNEG
] = "-abs",
145 [NINED3DSPSM_NOT
] = "not"
149 sm1_dump_writemask(BYTE mask
)
151 if (mask
& 1) DUMP("x"); else DUMP("_");
152 if (mask
& 2) DUMP("y"); else DUMP("_");
153 if (mask
& 4) DUMP("z"); else DUMP("_");
154 if (mask
& 8) DUMP("w"); else DUMP("_");
158 sm1_dump_swizzle(BYTE s
)
160 char c
[4] = { 'x', 'y', 'z', 'w' };
162 c
[(s
>> 0) & 3], c
[(s
>> 2) & 3], c
[(s
>> 4) & 3], c
[(s
>> 6) & 3]);
165 static const char sm1_file_char
[] =
168 [D3DSPR_INPUT
] = 'v',
169 [D3DSPR_CONST
] = 'c',
171 [D3DSPR_RASTOUT
] = 'R',
172 [D3DSPR_ATTROUT
] = 'D',
173 [D3DSPR_OUTPUT
] = 'o',
174 [D3DSPR_CONSTINT
] = 'I',
175 [D3DSPR_COLOROUT
] = 'C',
176 [D3DSPR_DEPTHOUT
] = 'D',
177 [D3DSPR_SAMPLER
] = 's',
178 [D3DSPR_CONST2
] = 'c',
179 [D3DSPR_CONST3
] = 'c',
180 [D3DSPR_CONST4
] = 'c',
181 [D3DSPR_CONSTBOOL
] = 'B',
183 [D3DSPR_TEMPFLOAT16
] = 'h',
184 [D3DSPR_MISCTYPE
] = 'M',
185 [D3DSPR_LABEL
] = 'X',
186 [D3DSPR_PREDICATE
] = 'p'
190 sm1_dump_reg(BYTE file
, INT index
)
196 case D3DSPR_COLOROUT
:
199 case D3DSPR_DEPTHOUT
:
203 DUMP("oRast%i", index
);
205 case D3DSPR_CONSTINT
:
206 DUMP("iconst[%i]", index
);
208 case D3DSPR_CONSTBOOL
:
209 DUMP("bconst[%i]", index
);
212 DUMP("%c%i", sm1_file_char
[file
], index
);
220 struct sm1_src_param
*rel
;
233 sm1_parse_immediate(struct shader_translator
*, struct sm1_src_param
*);
238 struct sm1_src_param
*rel
;
242 BYTE shift
; /* sint4 */
247 assert_replicate_swizzle(const struct ureg_src
*reg
)
249 assert(reg
->SwizzleY
== reg
->SwizzleX
&&
250 reg
->SwizzleZ
== reg
->SwizzleX
&&
251 reg
->SwizzleW
== reg
->SwizzleX
);
255 sm1_dump_immediate(const struct sm1_src_param
*param
)
257 switch (param
->type
) {
258 case NINED3DSPTYPE_FLOAT4
:
259 DUMP("{ %f %f %f %f }",
260 param
->imm
.f
[0], param
->imm
.f
[1],
261 param
->imm
.f
[2], param
->imm
.f
[3]);
263 case NINED3DSPTYPE_INT4
:
264 DUMP("{ %i %i %i %i }",
265 param
->imm
.i
[0], param
->imm
.i
[1],
266 param
->imm
.i
[2], param
->imm
.i
[3]);
268 case NINED3DSPTYPE_BOOL
:
269 DUMP("%s", param
->imm
.b
? "TRUE" : "FALSE");
278 sm1_dump_src_param(const struct sm1_src_param
*param
)
280 if (param
->file
== NINED3DSPR_IMMEDIATE
) {
281 assert(!param
->mod
&&
283 param
->swizzle
== NINED3DSP_NOSWIZZLE
);
284 sm1_dump_immediate(param
);
289 DUMP("%s(", sm1_mod_str
[param
->mod
]);
291 DUMP("%c[", sm1_file_char
[param
->file
]);
292 sm1_dump_src_param(param
->rel
);
293 DUMP("+%i]", param
->idx
);
295 sm1_dump_reg(param
->file
, param
->idx
);
299 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
) {
301 sm1_dump_swizzle(param
->swizzle
);
306 sm1_dump_dst_param(const struct sm1_dst_param
*param
)
308 if (param
->mod
& NINED3DSPDM_SATURATE
)
310 if (param
->mod
& NINED3DSPDM_PARTIALP
)
312 if (param
->mod
& NINED3DSPDM_CENTROID
)
314 if (param
->shift
< 0)
315 DUMP("/%u ", 1 << -param
->shift
);
316 if (param
->shift
> 0)
317 DUMP("*%u ", 1 << param
->shift
);
320 DUMP("%c[", sm1_file_char
[param
->file
]);
321 sm1_dump_src_param(param
->rel
);
322 DUMP("+%i]", param
->idx
);
324 sm1_dump_reg(param
->file
, param
->idx
);
326 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
328 sm1_dump_writemask(param
->mask
);
334 struct sm1_dst_param reg
;
342 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
343 * should be ignored completely */
345 unsigned opcode
; /* TGSI_OPCODE_x */
347 /* versions are still set even handler is set */
351 } vert_version
, frag_version
;
353 /* number of regs parsed outside of special handler */
357 /* some instructions don't map perfectly, so use a special handler */
358 translate_instruction_func handler
;
361 struct sm1_instruction
363 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode
;
369 struct sm1_src_param src
[4];
370 struct sm1_src_param src_rel
[4];
371 struct sm1_src_param pred
;
372 struct sm1_src_param dst_rel
[1];
373 struct sm1_dst_param dst
[1];
375 struct sm1_op_info
*info
;
379 sm1_dump_instruction(struct sm1_instruction
*insn
, unsigned indent
)
383 /* no info stored for these: */
384 if (insn
->opcode
== D3DSIO_DCL
)
386 for (i
= 0; i
< indent
; ++i
)
389 if (insn
->predicated
) {
391 sm1_dump_src_param(&insn
->pred
);
394 DUMP("%s", d3dsio_to_string(insn
->opcode
));
396 switch (insn
->opcode
) {
398 DUMP(insn
->flags
== NINED3DSI_TEXLD_PROJECT
? "p" : "b");
401 DUMP("_%x", insn
->flags
);
409 for (i
= 0; i
< insn
->ndst
&& i
< Elements(insn
->dst
); ++i
) {
410 sm1_dump_dst_param(&insn
->dst
[i
]);
414 for (i
= 0; i
< insn
->nsrc
&& i
< Elements(insn
->src
); ++i
) {
415 sm1_dump_src_param(&insn
->src
[i
]);
418 if (insn
->opcode
== D3DSIO_DEF
||
419 insn
->opcode
== D3DSIO_DEFI
||
420 insn
->opcode
== D3DSIO_DEFB
)
421 sm1_dump_immediate(&insn
->src
[0]);
426 struct sm1_local_const
437 struct shader_translator
439 const DWORD
*byte_code
;
441 const DWORD
*parse_next
;
443 struct ureg_program
*ureg
;
450 unsigned processor
; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
452 boolean native_integers
;
453 boolean inline_subroutines
;
455 boolean want_texcoord
;
457 unsigned texcoord_sn
;
459 struct sm1_instruction insn
; /* current instruction */
463 struct ureg_dst oPos
;
464 struct ureg_dst oFog
;
465 struct ureg_dst oPts
;
466 struct ureg_dst oCol
[4];
467 struct ureg_dst o
[PIPE_MAX_SHADER_OUTPUTS
];
468 struct ureg_dst oDepth
;
469 struct ureg_src v
[PIPE_MAX_SHADER_INPUTS
];
470 struct ureg_src vPos
;
471 struct ureg_src vFace
;
475 struct ureg_dst tS
[8]; /* texture stage registers */
476 struct ureg_dst tdst
; /* scratch dst if we need extra modifiers */
477 struct ureg_dst t
[5]; /* scratch TEMPs */
478 struct ureg_src vC
[2]; /* PS color in */
479 struct ureg_src vT
[8]; /* PS texcoord in */
480 struct ureg_dst rL
[NINE_MAX_LOOP_DEPTH
]; /* loop ctr */
481 struct ureg_dst aL
[NINE_MAX_LOOP_DEPTH
]; /* loop ctr ADDR register */
483 unsigned num_temp
; /* Elements(regs.r) */
484 unsigned num_scratch
;
486 unsigned loop_depth_max
;
488 unsigned loop_labels
[NINE_MAX_LOOP_DEPTH
];
489 unsigned cond_labels
[NINE_MAX_COND_DEPTH
];
491 unsigned *inst_labels
; /* LABEL op */
492 unsigned num_inst_labels
;
494 unsigned sampler_targets
[NINE_MAX_SAMPLERS
]; /* TGSI_TEXTURE_x */
496 struct sm1_local_const
*lconstf
;
497 unsigned num_lconstf
;
498 struct sm1_local_const lconsti
[NINE_MAX_CONST_I
];
499 struct sm1_local_const lconstb
[NINE_MAX_CONST_B
];
501 boolean indirect_const_access
;
503 struct nine_shader_info
*info
;
505 int16_t op_info_map
[D3DSIO_BREAKP
+ 1];
508 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
509 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
512 sm1_read_semantic(struct shader_translator
*, struct sm1_semantic
*);
515 sm1_instruction_check(const struct sm1_instruction
*insn
)
517 if (insn
->opcode
== D3DSIO_CRS
)
519 if (insn
->dst
[0].mask
& NINED3DSP_WRITEMASK_3
)
527 tx_lconstf(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
530 assert(index
>= 0 && index
< (NINE_MAX_CONST_F
* 2));
531 for (i
= 0; i
< tx
->num_lconstf
; ++i
) {
532 if (tx
->lconstf
[i
].idx
== index
) {
533 *src
= tx
->lconstf
[i
].reg
;
540 tx_lconsti(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
542 assert(index
>= 0 && index
< NINE_MAX_CONST_I
);
543 if (tx
->lconsti
[index
].idx
== index
)
544 *src
= tx
->lconsti
[index
].reg
;
545 return tx
->lconsti
[index
].idx
== index
;
548 tx_lconstb(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
550 assert(index
>= 0 && index
< NINE_MAX_CONST_B
);
551 if (tx
->lconstb
[index
].idx
== index
)
552 *src
= tx
->lconstb
[index
].reg
;
553 return tx
->lconstb
[index
].idx
== index
;
557 tx_set_lconstf(struct shader_translator
*tx
, INT index
, float f
[4])
561 /* Anno1404 sets out of range constants. */
562 assert(index
>= 0 && index
< (NINE_MAX_CONST_F
* 2));
563 if (index
>= NINE_MAX_CONST_F
)
564 WARN("lconstf index %i too high, indirect access won't work\n", index
);
566 for (n
= 0; n
< tx
->num_lconstf
; ++n
)
567 if (tx
->lconstf
[n
].idx
== index
)
569 if (n
== tx
->num_lconstf
) {
571 tx
->lconstf
= REALLOC(tx
->lconstf
,
572 (n
+ 0) * sizeof(tx
->lconstf
[0]),
573 (n
+ 8) * sizeof(tx
->lconstf
[0]));
578 tx
->lconstf
[n
].idx
= index
;
579 tx
->lconstf
[n
].reg
= ureg_imm4f(tx
->ureg
, f
[0], f
[1], f
[2], f
[3]);
581 memcpy(tx
->lconstf
[n
].imm
.f
, f
, sizeof(tx
->lconstf
[n
].imm
.f
));
584 tx_set_lconsti(struct shader_translator
*tx
, INT index
, int i
[4])
586 assert(index
>= 0 && index
< NINE_MAX_CONST_I
);
587 tx
->lconsti
[index
].idx
= index
;
588 tx
->lconsti
[index
].reg
= tx
->native_integers
?
589 ureg_imm4i(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]) :
590 ureg_imm4f(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]);
593 tx_set_lconstb(struct shader_translator
*tx
, INT index
, BOOL b
)
595 assert(index
>= 0 && index
< NINE_MAX_CONST_B
);
596 tx
->lconstb
[index
].idx
= index
;
597 tx
->lconstb
[index
].reg
= tx
->native_integers
?
598 ureg_imm1u(tx
->ureg
, b
? 0xffffffff : 0) :
599 ureg_imm1f(tx
->ureg
, b
? 1.0f
: 0.0f
);
602 static INLINE
struct ureg_dst
603 tx_scratch(struct shader_translator
*tx
)
605 assert(tx
->num_scratch
< Elements(tx
->regs
.t
));
606 if (ureg_dst_is_undef(tx
->regs
.t
[tx
->num_scratch
]))
607 tx
->regs
.t
[tx
->num_scratch
] = ureg_DECL_local_temporary(tx
->ureg
);
608 return tx
->regs
.t
[tx
->num_scratch
++];
611 static INLINE
struct ureg_dst
612 tx_scratch_scalar(struct shader_translator
*tx
)
614 return ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
617 static INLINE
struct ureg_src
618 tx_src_scalar(struct ureg_dst dst
)
620 struct ureg_src src
= ureg_src(dst
);
621 int c
= ffs(dst
.WriteMask
) - 1;
622 if (dst
.WriteMask
== (1 << c
))
623 src
= ureg_scalar(src
, c
);
627 /* Need to declare all constants if indirect addressing is used,
628 * otherwise we could scan the shader to determine the maximum.
629 * TODO: It doesn't really matter for nv50 so I won't do the scan,
630 * but radeon drivers might care, if they don't infer it from TGSI.
633 tx_decl_constants(struct shader_translator
*tx
)
637 for (i
= 0; i
< NINE_MAX_CONST_F
; ++i
)
638 ureg_DECL_constant(tx
->ureg
, n
++);
639 for (i
= 0; i
< NINE_MAX_CONST_I
; ++i
)
640 ureg_DECL_constant(tx
->ureg
, n
++);
641 for (i
= 0; i
< (NINE_MAX_CONST_B
/ 4); ++i
)
642 ureg_DECL_constant(tx
->ureg
, n
++);
646 tx_temp_alloc(struct shader_translator
*tx
, INT idx
)
649 if (idx
>= tx
->num_temp
) {
650 unsigned k
= tx
->num_temp
;
651 unsigned n
= idx
+ 1;
652 tx
->regs
.r
= REALLOC(tx
->regs
.r
,
653 k
* sizeof(tx
->regs
.r
[0]),
654 n
* sizeof(tx
->regs
.r
[0]));
656 tx
->regs
.r
[k
] = ureg_dst_undef();
659 if (ureg_dst_is_undef(tx
->regs
.r
[idx
]))
660 tx
->regs
.r
[idx
] = ureg_DECL_temporary(tx
->ureg
);
664 tx_addr_alloc(struct shader_translator
*tx
, INT idx
)
667 if (ureg_dst_is_undef(tx
->regs
.a
))
668 tx
->regs
.a
= ureg_DECL_address(tx
->ureg
);
672 tx_pred_alloc(struct shader_translator
*tx
, INT idx
)
675 if (ureg_dst_is_undef(tx
->regs
.p
))
676 tx
->regs
.p
= ureg_DECL_predicate(tx
->ureg
);
680 tx_texcoord_alloc(struct shader_translator
*tx
, INT idx
)
683 assert(idx
>= 0 && idx
< Elements(tx
->regs
.vT
));
684 if (ureg_src_is_undef(tx
->regs
.vT
[idx
]))
685 tx
->regs
.vT
[idx
] = ureg_DECL_fs_input(tx
->ureg
, tx
->texcoord_sn
, idx
,
686 TGSI_INTERPOLATE_PERSPECTIVE
);
689 static INLINE
unsigned *
690 tx_bgnloop(struct shader_translator
*tx
)
693 if (tx
->loop_depth_max
< tx
->loop_depth
)
694 tx
->loop_depth_max
= tx
->loop_depth
;
695 assert(tx
->loop_depth
< NINE_MAX_LOOP_DEPTH
);
696 return &tx
->loop_labels
[tx
->loop_depth
- 1];
699 static INLINE
unsigned *
700 tx_endloop(struct shader_translator
*tx
)
702 assert(tx
->loop_depth
);
704 ureg_fixup_label(tx
->ureg
, tx
->loop_labels
[tx
->loop_depth
],
705 ureg_get_instruction_number(tx
->ureg
));
706 return &tx
->loop_labels
[tx
->loop_depth
];
709 static struct ureg_dst
710 tx_get_loopctr(struct shader_translator
*tx
)
712 const unsigned l
= tx
->loop_depth
- 1;
716 DBG("loop counter requested outside of loop\n");
717 return ureg_dst_undef();
720 if (ureg_dst_is_undef(tx
->regs
.aL
[l
]))
722 struct ureg_dst rreg
= ureg_DECL_local_temporary(tx
->ureg
);
723 struct ureg_dst areg
= ureg_DECL_address(tx
->ureg
);
727 for (c
= l
; c
< (l
+ 4) && c
< Elements(tx
->regs
.aL
); ++c
) {
728 tx
->regs
.rL
[c
] = ureg_writemask(rreg
, 1 << (c
& 3));
729 tx
->regs
.aL
[c
] = ureg_writemask(areg
, 1 << (c
& 3));
732 return tx
->regs
.rL
[l
];
734 static struct ureg_dst
735 tx_get_aL(struct shader_translator
*tx
)
737 if (!ureg_dst_is_undef(tx_get_loopctr(tx
)))
738 return tx
->regs
.aL
[tx
->loop_depth
- 1];
739 return ureg_dst_undef();
742 static INLINE
unsigned *
743 tx_cond(struct shader_translator
*tx
)
745 assert(tx
->cond_depth
<= NINE_MAX_COND_DEPTH
);
747 return &tx
->cond_labels
[tx
->cond_depth
- 1];
750 static INLINE
unsigned *
751 tx_elsecond(struct shader_translator
*tx
)
753 assert(tx
->cond_depth
);
754 return &tx
->cond_labels
[tx
->cond_depth
- 1];
758 tx_endcond(struct shader_translator
*tx
)
760 assert(tx
->cond_depth
);
762 ureg_fixup_label(tx
->ureg
, tx
->cond_labels
[tx
->cond_depth
],
763 ureg_get_instruction_number(tx
->ureg
));
766 static INLINE
struct ureg_dst
767 nine_ureg_dst_register(unsigned file
, int index
)
769 return ureg_dst(ureg_src_register(file
, index
));
772 static struct ureg_src
773 tx_src_param(struct shader_translator
*tx
, const struct sm1_src_param
*param
)
775 struct ureg_program
*ureg
= tx
->ureg
;
783 tx_temp_alloc(tx
, param
->idx
);
784 src
= ureg_src(tx
->regs
.r
[param
->idx
]);
786 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
790 tx_addr_alloc(tx
, param
->idx
);
791 src
= ureg_src(tx
->regs
.a
);
793 if (tx
->version
.major
< 2 && tx
->version
.minor
< 4) {
794 /* no subroutines, so should be defined */
795 src
= ureg_src(tx
->regs
.tS
[param
->idx
]);
797 tx_texcoord_alloc(tx
, param
->idx
);
798 src
= tx
->regs
.vT
[param
->idx
];
804 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
806 if (tx
->version
.major
< 3) {
808 src
= ureg_DECL_fs_input(tx
->ureg
, TGSI_SEMANTIC_COLOR
,
810 TGSI_INTERPOLATE_PERSPECTIVE
);
812 assert(!param
->rel
); /* TODO */
813 assert(param
->idx
< Elements(tx
->regs
.v
));
814 src
= tx
->regs
.v
[param
->idx
];
818 case D3DSPR_PREDICATE
:
820 tx_pred_alloc(tx
, param
->idx
);
821 src
= ureg_src(tx
->regs
.p
);
824 assert(param
->mod
== NINED3DSPSM_NONE
);
825 assert(param
->swizzle
== NINED3DSP_NOSWIZZLE
);
827 src
= ureg_src_register(TGSI_FILE_SAMPLER
, param
->idx
);
831 tx
->indirect_const_access
= TRUE
;
832 if (param
->rel
|| !tx_lconstf(tx
, &src
, param
->idx
)) {
834 nine_info_mark_const_f_used(tx
->info
, param
->idx
);
835 src
= ureg_src_register(TGSI_FILE_CONSTANT
, param
->idx
);
841 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
842 assert(!"CONST2/3/4");
843 src
= ureg_imm1f(ureg
, 0.0f
);
845 case D3DSPR_CONSTINT
:
846 if (param
->rel
|| !tx_lconsti(tx
, &src
, param
->idx
)) {
848 nine_info_mark_const_i_used(tx
->info
, param
->idx
);
849 src
= ureg_src_register(TGSI_FILE_CONSTANT
,
850 tx
->info
->const_i_base
+ param
->idx
);
853 case D3DSPR_CONSTBOOL
:
854 if (param
->rel
|| !tx_lconstb(tx
, &src
, param
->idx
)) {
855 char r
= param
->idx
/ 4;
856 char s
= param
->idx
& 3;
858 nine_info_mark_const_b_used(tx
->info
, param
->idx
);
859 src
= ureg_src_register(TGSI_FILE_CONSTANT
,
860 tx
->info
->const_b_base
+ r
);
861 src
= ureg_swizzle(src
, s
, s
, s
, s
);
865 src
= tx_src_scalar(tx_get_aL(tx
));
867 case D3DSPR_MISCTYPE
:
868 switch (param
->idx
) {
869 case D3DSMO_POSITION
:
870 if (ureg_src_is_undef(tx
->regs
.vPos
))
871 tx
->regs
.vPos
= ureg_DECL_fs_input(ureg
,
872 TGSI_SEMANTIC_POSITION
, 0,
873 TGSI_INTERPOLATE_LINEAR
);
874 if (tx
->shift_wpos
) {
875 /* TODO: do this only once */
876 struct ureg_dst wpos
= tx_scratch(tx
);
877 ureg_SUB(ureg
, wpos
, tx
->regs
.vPos
,
878 ureg_imm4f(ureg
, 0.5f
, 0.5f
, 0.0f
, 0.0f
));
879 src
= ureg_src(wpos
);
885 if (ureg_src_is_undef(tx
->regs
.vFace
)) {
886 tx
->regs
.vFace
= ureg_DECL_fs_input(ureg
,
887 TGSI_SEMANTIC_FACE
, 0,
888 TGSI_INTERPOLATE_CONSTANT
);
889 tx
->regs
.vFace
= ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
);
891 src
= tx
->regs
.vFace
;
894 assert(!"invalid src D3DSMO");
899 case D3DSPR_TEMPFLOAT16
:
902 assert(!"invalid src D3DSPR");
905 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
907 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
)
908 src
= ureg_swizzle(src
,
909 (param
->swizzle
>> 0) & 0x3,
910 (param
->swizzle
>> 2) & 0x3,
911 (param
->swizzle
>> 4) & 0x3,
912 (param
->swizzle
>> 6) & 0x3);
914 switch (param
->mod
) {
915 case NINED3DSPSM_ABS
:
918 case NINED3DSPSM_ABSNEG
:
919 src
= ureg_negate(ureg_abs(src
));
921 case NINED3DSPSM_NEG
:
922 src
= ureg_negate(src
);
924 case NINED3DSPSM_BIAS
:
925 tmp
= tx_scratch(tx
);
926 ureg_SUB(ureg
, tmp
, src
, ureg_imm1f(ureg
, 0.5f
));
929 case NINED3DSPSM_BIASNEG
:
930 tmp
= tx_scratch(tx
);
931 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), src
);
934 case NINED3DSPSM_NOT
:
935 if (tx
->native_integers
) {
936 tmp
= tx_scratch(tx
);
937 ureg_NOT(ureg
, tmp
, src
);
942 case NINED3DSPSM_COMP
:
943 tmp
= tx_scratch(tx
);
944 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), src
);
949 /* handled in instruction */
951 case NINED3DSPSM_SIGN
:
952 tmp
= tx_scratch(tx
);
953 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, 2.0f
), ureg_imm1f(ureg
, -1.0f
));
956 case NINED3DSPSM_SIGNNEG
:
957 tmp
= tx_scratch(tx
);
958 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, -2.0f
), ureg_imm1f(ureg
, 1.0f
));
962 tmp
= tx_scratch(tx
);
963 ureg_ADD(ureg
, tmp
, src
, src
);
966 case NINED3DSPSM_X2NEG
:
967 tmp
= tx_scratch(tx
);
968 ureg_ADD(ureg
, tmp
, src
, src
);
969 src
= ureg_negate(ureg_src(tmp
));
972 assert(param
->mod
== NINED3DSPSM_NONE
);
979 static struct ureg_dst
980 _tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
988 tx_temp_alloc(tx
, param
->idx
);
989 dst
= tx
->regs
.r
[param
->idx
];
991 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
994 if (tx
->version
.major
< 2 && !IS_VS
) {
995 if (ureg_dst_is_undef(tx
->regs
.tS
[param
->idx
]))
996 tx
->regs
.tS
[param
->idx
] = ureg_DECL_temporary(tx
->ureg
);
997 dst
= tx
->regs
.tS
[param
->idx
];
999 if (!IS_VS
&& tx
->insn
.opcode
== D3DSIO_TEXKILL
) { /* maybe others, too */
1000 tx_texcoord_alloc(tx
, param
->idx
);
1001 dst
= ureg_dst(tx
->regs
.vT
[param
->idx
]);
1003 tx_addr_alloc(tx
, param
->idx
);
1007 case D3DSPR_RASTOUT
:
1008 assert(!param
->rel
);
1009 switch (param
->idx
) {
1011 if (ureg_dst_is_undef(tx
->regs
.oPos
))
1013 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
1014 dst
= tx
->regs
.oPos
;
1017 if (ureg_dst_is_undef(tx
->regs
.oFog
))
1019 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_FOG
, 0);
1020 dst
= tx
->regs
.oFog
;
1023 if (ureg_dst_is_undef(tx
->regs
.oPts
))
1025 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_PSIZE
, 0);
1026 dst
= tx
->regs
.oPts
;
1033 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1035 if (tx
->version
.major
< 3) {
1036 assert(!param
->rel
);
1037 dst
= ureg_DECL_output(tx
->ureg
, tx
->texcoord_sn
, param
->idx
);
1039 assert(!param
->rel
); /* TODO */
1040 assert(param
->idx
< Elements(tx
->regs
.o
));
1041 dst
= tx
->regs
.o
[param
->idx
];
1044 case D3DSPR_ATTROUT
: /* VS */
1045 case D3DSPR_COLOROUT
: /* PS */
1046 assert(param
->idx
>= 0 && param
->idx
< 4);
1047 assert(!param
->rel
);
1048 tx
->info
->rt_mask
|= 1 << param
->idx
;
1049 if (ureg_dst_is_undef(tx
->regs
.oCol
[param
->idx
]))
1050 tx
->regs
.oCol
[param
->idx
] =
1051 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
);
1052 dst
= tx
->regs
.oCol
[param
->idx
];
1053 if (IS_VS
&& tx
->version
.major
< 3)
1054 dst
= ureg_saturate(dst
);
1056 case D3DSPR_DEPTHOUT
:
1057 assert(!param
->rel
);
1058 if (ureg_dst_is_undef(tx
->regs
.oDepth
))
1060 ureg_DECL_output_masked(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0,
1062 dst
= tx
->regs
.oDepth
; /* XXX: must write .z component */
1064 case D3DSPR_PREDICATE
:
1065 assert(!param
->rel
);
1066 tx_pred_alloc(tx
, param
->idx
);
1069 case D3DSPR_TEMPFLOAT16
:
1070 DBG("unhandled D3DSPR: %u\n", param
->file
);
1073 assert(!"invalid dst D3DSPR");
1077 dst
= ureg_dst_indirect(dst
, tx_src_param(tx
, param
->rel
));
1079 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
)
1080 dst
= ureg_writemask(dst
, param
->mask
);
1081 if (param
->mod
& NINED3DSPDM_SATURATE
)
1082 dst
= ureg_saturate(dst
);
1087 static struct ureg_dst
1088 tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1091 tx
->regs
.tdst
= ureg_writemask(tx_scratch(tx
), param
->mask
);
1092 return tx
->regs
.tdst
;
1094 return _tx_dst_param(tx
, param
);
1098 tx_apply_dst0_modifiers(struct shader_translator
*tx
)
1100 struct ureg_dst rdst
;
1103 if (!tx
->insn
.ndst
|| !tx
->insn
.dst
[0].shift
|| tx
->insn
.opcode
== D3DSIO_TEXKILL
)
1105 rdst
= _tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1107 assert(rdst
.File
!= TGSI_FILE_ADDRESS
); /* this probably isn't possible */
1109 if (tx
->insn
.dst
[0].shift
< 0)
1110 f
= 1.0f
/ (1 << -tx
->insn
.dst
[0].shift
);
1112 f
= 1 << tx
->insn
.dst
[0].shift
;
1114 ureg_MUL(tx
->ureg
, rdst
, ureg_src(tx
->regs
.tdst
), ureg_imm1f(tx
->ureg
, f
));
1117 static struct ureg_src
1118 tx_dst_param_as_src(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1120 struct ureg_src src
;
1122 assert(!param
->shift
);
1123 assert(!(param
->mod
& NINED3DSPDM_SATURATE
));
1125 switch (param
->file
) {
1128 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
1130 assert(!param
->rel
);
1131 assert(param
->idx
< Elements(tx
->regs
.v
));
1132 src
= tx
->regs
.v
[param
->idx
];
1136 src
= ureg_src(tx_dst_param(tx
, param
));
1140 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1143 WARN("mask is 0, using identity swizzle\n");
1145 if (param
->mask
&& param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
1149 for (n
= 0, c
= 0; c
< 4; ++c
)
1150 if (param
->mask
& (1 << c
))
1153 for (c
= n
; c
< 4; ++c
)
1155 src
= ureg_swizzle(src
, s
[0], s
[1], s
[2], s
[3]);
1161 NineTranslateInstruction_Mkxn(struct shader_translator
*tx
, const unsigned k
, const unsigned n
)
1163 struct ureg_program
*ureg
= tx
->ureg
;
1164 struct ureg_dst dst
;
1165 struct ureg_src src
[2];
1168 dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1169 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1170 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1172 for (i
= 0; i
< n
; i
++, src
[1].Index
++)
1174 const unsigned m
= (1 << i
);
1176 if (!(dst
.WriteMask
& m
))
1179 /* XXX: src == dst case ? */
1183 ureg_DP3(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1186 ureg_DP4(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1189 DBG("invalid operation: M%ux%u\n", m
, n
);
1197 #define VNOTSUPPORTED 0, 0
1198 #define V(maj, min) (((maj) << 8) | (min))
1200 static INLINE
const char *
1201 d3dsio_to_string( unsigned opcode
)
1203 static const char *names
[] = {
1303 if (opcode
< Elements(names
)) return names
[opcode
];
1306 case D3DSIO_PHASE
: return "PHASE";
1307 case D3DSIO_COMMENT
: return "COMMENT";
1308 case D3DSIO_END
: return "END";
1314 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1315 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1316 (inst).vert_version.max | \
1317 (inst).frag_version.min | \
1318 (inst).frag_version.max)
1320 #define SPECIAL(name) \
1321 NineTranslateInstruction_##name
1323 #define DECL_SPECIAL(name) \
1325 NineTranslateInstruction_##name( struct shader_translator *tx )
1328 NineTranslateInstruction_Generic(struct shader_translator
*);
1332 return NineTranslateInstruction_Mkxn(tx
, 4, 3);
1337 return NineTranslateInstruction_Mkxn(tx
, 4, 3);
1342 return NineTranslateInstruction_Mkxn(tx
, 3, 4);
1347 return NineTranslateInstruction_Mkxn(tx
, 3, 3);
1352 return NineTranslateInstruction_Mkxn(tx
, 3, 2);
1357 ureg_CMP(tx
->ureg
, tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1358 tx_src_param(tx
, &tx
->insn
.src
[0]),
1359 tx_src_param(tx
, &tx
->insn
.src
[2]),
1360 tx_src_param(tx
, &tx
->insn
.src
[1]));
1366 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1367 struct ureg_dst cgt
;
1368 struct ureg_src cnd
;
1370 if (tx
->insn
.coissue
&& tx
->version
.major
== 1 && tx
->version
.minor
< 4) {
1372 dst
, tx_src_param(tx
, &tx
->insn
.src
[1]));
1376 cnd
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1377 #ifdef NINE_TGSI_LAZY_R600
1378 cgt
= tx_scratch(tx
);
1380 if (tx
->version
.major
== 1 && tx
->version
.minor
< 4) {
1381 cgt
.WriteMask
= TGSI_WRITEMASK_W
;
1382 ureg_SGT(tx
->ureg
, cgt
, cnd
, ureg_imm1f(tx
->ureg
, 0.5f
));
1383 cnd
= ureg_scalar(cnd
, TGSI_SWIZZLE_W
);
1385 ureg_SGT(tx
->ureg
, cgt
, cnd
, ureg_imm1f(tx
->ureg
, 0.5f
));
1387 ureg_CMP(tx
->ureg
, dst
,
1388 tx_src_param(tx
, &tx
->insn
.src
[1]),
1389 tx_src_param(tx
, &tx
->insn
.src
[2]), ureg_negate(cnd
));
1391 if (tx
->version
.major
== 1 && tx
->version
.minor
< 4)
1392 cnd
= ureg_scalar(cnd
, TGSI_SWIZZLE_W
);
1393 ureg_CND(tx
->ureg
, dst
,
1394 tx_src_param(tx
, &tx
->insn
.src
[1]),
1395 tx_src_param(tx
, &tx
->insn
.src
[2]), cnd
);
1402 assert(tx
->insn
.src
[0].idx
< tx
->num_inst_labels
);
1403 ureg_CAL(tx
->ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1407 DECL_SPECIAL(CALLNZ
)
1409 struct ureg_program
*ureg
= tx
->ureg
;
1410 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
1411 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1413 /* NOTE: source should be const bool, so we can use NOT/SUB instead of [U]SNE 0 */
1414 if (!tx
->insn
.flags
) {
1415 if (tx
->native_integers
)
1416 ureg_NOT(ureg
, tmp
, src
);
1418 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), src
);
1420 ureg_IF(ureg
, tx
->insn
.flags
? src
: tx_src_scalar(tmp
), tx_cond(tx
));
1421 ureg_CAL(ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1427 DECL_SPECIAL(MOV_vs1x
)
1429 if (tx
->insn
.dst
[0].file
== D3DSPR_ADDR
) {
1431 tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1432 tx_src_param(tx
, &tx
->insn
.src
[0]));
1435 return NineTranslateInstruction_Generic(tx
);
1440 struct ureg_program
*ureg
= tx
->ureg
;
1442 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1443 struct ureg_src iter
= ureg_scalar(src
, TGSI_SWIZZLE_X
);
1444 struct ureg_src init
= ureg_scalar(src
, TGSI_SWIZZLE_Y
);
1445 struct ureg_src step
= ureg_scalar(src
, TGSI_SWIZZLE_Z
);
1446 struct ureg_dst ctr
;
1447 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
1449 label
= tx_bgnloop(tx
);
1450 ctr
= tx_get_loopctr(tx
);
1452 ureg_MOV(tx
->ureg
, ctr
, init
);
1453 ureg_BGNLOOP(tx
->ureg
, label
);
1454 if (tx
->native_integers
) {
1455 /* we'll let the backend pull up that MAD ... */
1456 ureg_UMAD(ureg
, tmp
, iter
, step
, init
);
1457 ureg_USEQ(ureg
, tmp
, ureg_src(ctr
), tx_src_scalar(tmp
));
1458 #ifdef NINE_TGSI_LAZY_DEVS
1459 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1462 /* can't simply use SGE for precision because step might be negative */
1463 ureg_MAD(ureg
, tmp
, iter
, step
, init
);
1464 ureg_SEQ(ureg
, tmp
, ureg_src(ctr
), tx_src_scalar(tmp
));
1465 #ifdef NINE_TGSI_LAZY_DEVS
1466 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1469 #ifdef NINE_TGSI_LAZY_DEVS
1474 ureg_BREAKC(ureg
, tx_src_scalar(tmp
));
1476 if (tx
->native_integers
) {
1477 ureg_UARL(ureg
, tx_get_aL(tx
), tx_src_scalar(ctr
));
1478 ureg_UADD(ureg
, ctr
, tx_src_scalar(ctr
), step
);
1480 ureg_ARL(ureg
, tx_get_aL(tx
), tx_src_scalar(ctr
));
1481 ureg_ADD(ureg
, ctr
, tx_src_scalar(ctr
), step
);
1492 DECL_SPECIAL(ENDLOOP
)
1494 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1500 unsigned k
= tx
->num_inst_labels
;
1501 unsigned n
= tx
->insn
.src
[0].idx
;
1504 tx
->inst_labels
= REALLOC(tx
->inst_labels
,
1505 k
* sizeof(tx
->inst_labels
[0]),
1506 n
* sizeof(tx
->inst_labels
[0]));
1508 tx
->inst_labels
[n
] = ureg_get_instruction_number(tx
->ureg
);
1512 DECL_SPECIAL(SINCOS
)
1514 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1515 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1517 assert(!(dst
.WriteMask
& 0xc));
1519 dst
.WriteMask
&= TGSI_WRITEMASK_XY
; /* z undefined, w untouched */
1520 ureg_SCS(tx
->ureg
, dst
, src
);
1527 tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1528 tx_src_param(tx
, &tx
->insn
.src
[0]));
1534 struct ureg_program
*ureg
= tx
->ureg
;
1536 struct ureg_src rep
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1537 struct ureg_dst ctr
;
1538 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
1539 struct ureg_src imm
=
1540 tx
->native_integers
? ureg_imm1u(ureg
, 0) : ureg_imm1f(ureg
, 0.0f
);
1542 label
= tx_bgnloop(tx
);
1543 ctr
= tx_get_loopctr(tx
);
1545 /* NOTE: rep must be constant, so we don't have to save the count */
1546 assert(rep
.File
== TGSI_FILE_CONSTANT
|| rep
.File
== TGSI_FILE_IMMEDIATE
);
1548 ureg_MOV(ureg
, ctr
, imm
);
1549 ureg_BGNLOOP(ureg
, label
);
1550 if (tx
->native_integers
)
1552 ureg_USGE(ureg
, tmp
, tx_src_scalar(ctr
), rep
);
1553 #ifdef NINE_TGSI_LAZY_DEVS
1554 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1559 ureg_SGE(ureg
, tmp
, tx_src_scalar(ctr
), rep
);
1560 #ifdef NINE_TGSI_LAZY_DEVS
1561 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1564 #ifdef NINE_TGSI_LAZY_DEVS
1569 ureg_BREAKC(ureg
, tx_src_scalar(tmp
));
1572 if (tx
->native_integers
) {
1573 ureg_UADD(ureg
, ctr
, tx_src_scalar(ctr
), ureg_imm1u(ureg
, 1));
1575 ureg_ADD(ureg
, ctr
, tx_src_scalar(ctr
), ureg_imm1f(ureg
, 1.0f
));
1581 DECL_SPECIAL(ENDREP
)
1583 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1590 ureg_ENDIF(tx
->ureg
);
1596 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1598 if (tx
->native_integers
&& tx
->insn
.src
[0].file
== D3DSPR_CONSTBOOL
)
1599 ureg_UIF(tx
->ureg
, src
, tx_cond(tx
));
1601 ureg_IF(tx
->ureg
, src
, tx_cond(tx
));
1606 static INLINE
unsigned
1607 sm1_insn_flags_to_tgsi_setop(BYTE flags
)
1610 case NINED3DSHADER_REL_OP_GT
: return TGSI_OPCODE_SGT
;
1611 case NINED3DSHADER_REL_OP_EQ
: return TGSI_OPCODE_SEQ
;
1612 case NINED3DSHADER_REL_OP_GE
: return TGSI_OPCODE_SGE
;
1613 case NINED3DSHADER_REL_OP_LT
: return TGSI_OPCODE_SLT
;
1614 case NINED3DSHADER_REL_OP_NE
: return TGSI_OPCODE_SNE
;
1615 case NINED3DSHADER_REL_OP_LE
: return TGSI_OPCODE_SLE
;
1617 assert(!"invalid comparison flags");
1618 return TGSI_OPCODE_SGT
;
1624 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1625 struct ureg_src src
[2];
1626 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1627 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1628 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1629 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2);
1630 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1636 ureg_ELSE(tx
->ureg
, tx_elsecond(tx
));
1640 DECL_SPECIAL(BREAKC
)
1642 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1643 struct ureg_src src
[2];
1644 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1645 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1646 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1647 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2);
1648 #ifdef NINE_TGSI_LAZY_DEVS
1649 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1652 ureg_ENDIF(tx
->ureg
);
1654 ureg_BREAKC(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
1659 static const char *sm1_declusage_names
[] =
1661 [D3DDECLUSAGE_POSITION
] = "POSITION",
1662 [D3DDECLUSAGE_BLENDWEIGHT
] = "BLENDWEIGHT",
1663 [D3DDECLUSAGE_BLENDINDICES
] = "BLENDINDICES",
1664 [D3DDECLUSAGE_NORMAL
] = "NORMAL",
1665 [D3DDECLUSAGE_PSIZE
] = "PSIZE",
1666 [D3DDECLUSAGE_TEXCOORD
] = "TEXCOORD",
1667 [D3DDECLUSAGE_TANGENT
] = "TANGENT",
1668 [D3DDECLUSAGE_BINORMAL
] = "BINORMAL",
1669 [D3DDECLUSAGE_TESSFACTOR
] = "TESSFACTOR",
1670 [D3DDECLUSAGE_POSITIONT
] = "POSITIONT",
1671 [D3DDECLUSAGE_COLOR
] = "COLOR",
1672 [D3DDECLUSAGE_FOG
] = "FOG",
1673 [D3DDECLUSAGE_DEPTH
] = "DEPTH",
1674 [D3DDECLUSAGE_SAMPLE
] = "SAMPLE"
1677 static INLINE
unsigned
1678 sm1_to_nine_declusage(struct sm1_semantic
*dcl
)
1680 return nine_d3d9_to_nine_declusage(dcl
->usage
, dcl
->usage_idx
);
1684 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic
*sem
,
1686 struct sm1_semantic
*dcl
)
1688 const unsigned generic_base
= tc
? 0 : 8; /* TEXCOORD[0..7] */
1690 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1693 /* TGSI_SEMANTIC_GENERIC assignments (+8 if !PIPE_CAP_TGSI_TEXCOORD):
1694 * Try to put frequently used semantics at low GENERIC indices.
1696 * POSITION[1..4]: 17, 27, 28, 29
1697 * COLOR[2..4]: 14, 15, 26
1698 * TEXCOORD[8..15]: 10, 11, 18, 19, 20, 21, 22, 23
1699 * BLENDWEIGHT[0..3]: 0, 4, 8, 12
1700 * BLENDINDICES[0..3]: 1, 5, 9, 13
1701 * NORMAL[0..1]: 2, 6
1703 * BINORMAL[0]: 7, 25
1707 switch (dcl
->usage
) {
1708 case D3DDECLUSAGE_POSITION
:
1709 case D3DDECLUSAGE_POSITIONT
:
1710 case D3DDECLUSAGE_DEPTH
:
1711 sem
->Name
= TGSI_SEMANTIC_POSITION
;
1712 assert(dcl
->usage_idx
<= 4);
1713 if (dcl
->usage_idx
== 1) {
1714 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1715 sem
->Index
= generic_base
+ 17;
1717 if (dcl
->usage_idx
>= 2) {
1718 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1719 sem
->Index
= generic_base
+ 27 + (dcl
->usage_idx
- 2);
1722 case D3DDECLUSAGE_COLOR
:
1723 assert(dcl
->usage_idx
<= 4);
1724 if (dcl
->usage_idx
< 2) {
1725 sem
->Name
= TGSI_SEMANTIC_COLOR
;
1726 sem
->Index
= dcl
->usage_idx
;
1728 if (dcl
->usage_idx
< 4) {
1729 sem
->Index
= generic_base
+ 14 + (dcl
->usage_idx
- 2);
1731 sem
->Index
= generic_base
+ 26;
1734 case D3DDECLUSAGE_FOG
:
1735 sem
->Name
= TGSI_SEMANTIC_FOG
;
1736 assert(dcl
->usage_idx
== 0);
1738 case D3DDECLUSAGE_PSIZE
:
1739 sem
->Name
= TGSI_SEMANTIC_PSIZE
;
1740 assert(dcl
->usage_idx
== 0);
1742 case D3DDECLUSAGE_TEXCOORD
:
1743 assert(dcl
->usage_idx
< 16);
1744 if (dcl
->usage_idx
< 8) {
1746 sem
->Name
= TGSI_SEMANTIC_TEXCOORD
;
1747 sem
->Index
= dcl
->usage_idx
;
1749 if (dcl
->usage_idx
< 10) {
1750 sem
->Index
= generic_base
+ 10 + (dcl
->usage_idx
- 8);
1752 sem
->Index
= generic_base
+ 18 + (dcl
->usage_idx
- 10);
1755 case D3DDECLUSAGE_BLENDWEIGHT
: /* 0, 4, 8, 12 */
1756 assert(dcl
->usage_idx
< 4);
1757 sem
->Index
= generic_base
+ dcl
->usage_idx
* 4;
1759 case D3DDECLUSAGE_BLENDINDICES
: /* 1, 5, 9, 13 */
1760 assert(dcl
->usage_idx
< 4);
1761 sem
->Index
= generic_base
+ dcl
->usage_idx
* 4 + 1;
1763 case D3DDECLUSAGE_NORMAL
: /* 2, 3 */
1764 assert(dcl
->usage_idx
< 2);
1765 sem
->Index
= generic_base
+ 2 + dcl
->usage_idx
* 4;
1767 case D3DDECLUSAGE_TANGENT
:
1768 /* Yes these are weird, but we try to fit the more frequently used
1769 * into lower slots. */
1770 assert(dcl
->usage_idx
<= 1);
1771 sem
->Index
= generic_base
+ (dcl
->usage_idx
? 24 : 3);
1773 case D3DDECLUSAGE_BINORMAL
:
1774 assert(dcl
->usage_idx
<= 1);
1775 sem
->Index
= generic_base
+ (dcl
->usage_idx
? 25 : 7);
1777 case D3DDECLUSAGE_TESSFACTOR
:
1778 assert(dcl
->usage_idx
== 0);
1779 sem
->Index
= generic_base
+ 16;
1781 case D3DDECLUSAGE_SAMPLE
:
1782 sem
->Name
= TGSI_SEMANTIC_COUNT
;
1785 assert(!"Invalid DECLUSAGE.");
1790 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1791 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1792 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1793 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1794 static INLINE
unsigned
1795 d3dstt_to_tgsi_tex(BYTE sampler_type
)
1797 switch (sampler_type
) {
1798 case NINED3DSTT_1D
: return TGSI_TEXTURE_1D
;
1799 case NINED3DSTT_2D
: return TGSI_TEXTURE_2D
;
1800 case NINED3DSTT_VOLUME
: return TGSI_TEXTURE_3D
;
1801 case NINED3DSTT_CUBE
: return TGSI_TEXTURE_CUBE
;
1804 return TGSI_TEXTURE_UNKNOWN
;
1807 static INLINE
unsigned
1808 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type
)
1810 switch (sampler_type
) {
1811 case NINED3DSTT_1D
: return TGSI_TEXTURE_SHADOW1D
;
1812 case NINED3DSTT_2D
: return TGSI_TEXTURE_SHADOW2D
;
1813 case NINED3DSTT_VOLUME
:
1814 case NINED3DSTT_CUBE
:
1817 return TGSI_TEXTURE_UNKNOWN
;
1820 static INLINE
unsigned
1821 ps1x_sampler_type(const struct nine_shader_info
*info
, unsigned stage
)
1823 switch ((info
->sampler_ps1xtypes
>> (stage
* 2)) & 0x3) {
1824 case 1: return TGSI_TEXTURE_1D
;
1825 case 0: return TGSI_TEXTURE_2D
;
1826 case 3: return TGSI_TEXTURE_3D
;
1828 return TGSI_TEXTURE_CUBE
;
1833 sm1_sampler_type_name(BYTE sampler_type
)
1835 switch (sampler_type
) {
1836 case NINED3DSTT_1D
: return "1D";
1837 case NINED3DSTT_2D
: return "2D";
1838 case NINED3DSTT_VOLUME
: return "VOLUME";
1839 case NINED3DSTT_CUBE
: return "CUBE";
1841 return "(D3DSTT_?)";
1845 static INLINE
unsigned
1846 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic
*sem
)
1848 switch (sem
->Name
) {
1849 case TGSI_SEMANTIC_POSITION
:
1850 case TGSI_SEMANTIC_NORMAL
:
1851 return TGSI_INTERPOLATE_LINEAR
;
1852 case TGSI_SEMANTIC_BCOLOR
:
1853 case TGSI_SEMANTIC_COLOR
:
1854 case TGSI_SEMANTIC_FOG
:
1855 case TGSI_SEMANTIC_GENERIC
:
1856 case TGSI_SEMANTIC_TEXCOORD
:
1857 case TGSI_SEMANTIC_CLIPDIST
:
1858 case TGSI_SEMANTIC_CLIPVERTEX
:
1859 return TGSI_INTERPOLATE_PERSPECTIVE
;
1860 case TGSI_SEMANTIC_EDGEFLAG
:
1861 case TGSI_SEMANTIC_FACE
:
1862 case TGSI_SEMANTIC_INSTANCEID
:
1863 case TGSI_SEMANTIC_PCOORD
:
1864 case TGSI_SEMANTIC_PRIMID
:
1865 case TGSI_SEMANTIC_PSIZE
:
1866 case TGSI_SEMANTIC_VERTEXID
:
1867 return TGSI_INTERPOLATE_CONSTANT
;
1870 return TGSI_INTERPOLATE_CONSTANT
;
1876 struct ureg_program
*ureg
= tx
->ureg
;
1879 struct tgsi_declaration_semantic tgsi
;
1880 struct sm1_semantic sem
;
1881 sm1_read_semantic(tx
, &sem
);
1883 is_input
= sem
.reg
.file
== D3DSPR_INPUT
;
1885 sem
.usage
== D3DDECLUSAGE_SAMPLE
|| sem
.reg
.file
== D3DSPR_SAMPLER
;
1888 sm1_dump_dst_param(&sem
.reg
);
1890 DUMP(" %s\n", sm1_sampler_type_name(sem
.sampler_type
));
1892 if (tx
->version
.major
>= 3)
1893 DUMP(" %s%i\n", sm1_declusage_names
[sem
.usage
], sem
.usage_idx
);
1895 if (sem
.usage
| sem
.usage_idx
)
1896 DUMP(" %u[%u]\n", sem
.usage
, sem
.usage_idx
);
1901 const unsigned m
= 1 << sem
.reg
.idx
;
1902 ureg_DECL_sampler(ureg
, sem
.reg
.idx
);
1903 tx
->info
->sampler_mask
|= m
;
1904 tx
->sampler_targets
[sem
.reg
.idx
] = (tx
->info
->sampler_mask_shadow
& m
) ?
1905 d3dstt_to_tgsi_tex_shadow(sem
.sampler_type
) :
1906 d3dstt_to_tgsi_tex(sem
.sampler_type
);
1910 sm1_declusage_to_tgsi(&tgsi
, tx
->want_texcoord
, &sem
);
1913 /* linkage outside of shader with vertex declaration */
1914 ureg_DECL_vs_input(ureg
, sem
.reg
.idx
);
1915 assert(sem
.reg
.idx
< Elements(tx
->info
->input_map
));
1916 tx
->info
->input_map
[sem
.reg
.idx
] = sm1_to_nine_declusage(&sem
);
1917 tx
->info
->num_inputs
= sem
.reg
.idx
+ 1;
1918 /* NOTE: preserving order in case of indirect access */
1920 if (tx
->version
.major
>= 3) {
1921 /* SM2 output semantic determined by file */
1922 assert(sem
.reg
.mask
!= 0);
1923 if (sem
.usage
== D3DDECLUSAGE_POSITIONT
)
1924 tx
->info
->position_t
= TRUE
;
1925 assert(sem
.reg
.idx
< Elements(tx
->regs
.o
));
1926 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_output_masked(
1927 ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
);
1929 if (tgsi
.Name
== TGSI_SEMANTIC_PSIZE
)
1930 tx
->regs
.oPts
= tx
->regs
.o
[sem
.reg
.idx
];
1933 if (is_input
&& tx
->version
.major
>= 3) {
1934 /* SM3 only, SM2 input semantic determined by file */
1935 assert(sem
.reg
.idx
< Elements(tx
->regs
.v
));
1936 tx
->regs
.v
[sem
.reg
.idx
] = ureg_DECL_fs_input_cyl_centroid(
1937 ureg
, tgsi
.Name
, tgsi
.Index
,
1938 nine_tgsi_to_interp_mode(&tgsi
),
1940 sem
.reg
.mod
& NINED3DSPDM_CENTROID
);
1942 if (!is_input
&& 0) { /* declare in COLOROUT/DEPTHOUT case */
1943 /* FragColor or FragDepth */
1944 assert(sem
.reg
.mask
!= 0);
1945 ureg_DECL_output_masked(ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
);
1953 tx_set_lconstf(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.f
);
1959 tx_set_lconstb(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.b
);
1965 tx_set_lconsti(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.i
);
1971 struct ureg_program
*ureg
= tx
->ureg
;
1972 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
1973 struct ureg_src nrm
= tx_src_scalar(tmp
);
1974 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1975 ureg_DP3(ureg
, tmp
, src
, src
);
1976 ureg_RSQ(ureg
, tmp
, nrm
);
1977 ureg_MUL(ureg
, tx_dst_param(tx
, &tx
->insn
.dst
[0]), src
, nrm
);
1981 DECL_SPECIAL(DP2ADD
)
1983 #ifdef NINE_TGSI_LAZY_R600
1984 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
1985 struct ureg_src dp2
= tx_src_scalar(tmp
);
1986 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1987 struct ureg_src src
[3];
1989 for (i
= 0; i
< 3; ++i
)
1990 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
1991 assert_replicate_swizzle(&src
[2]);
1993 ureg_DP2(tx
->ureg
, tmp
, src
[0], src
[1]);
1994 ureg_ADD(tx
->ureg
, dst
, src
[2], dp2
);
1998 return NineTranslateInstruction_Generic(tx
);
2002 DECL_SPECIAL(TEXCOORD
)
2004 struct ureg_program
*ureg
= tx
->ureg
;
2005 const unsigned s
= tx
->insn
.dst
[0].idx
;
2006 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2008 if (ureg_src_is_undef(tx
->regs
.vT
[s
]))
2009 tx
->regs
.vT
[s
] = ureg_DECL_fs_input(ureg
, tx
->texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
2010 ureg_MOV(ureg
, dst
, tx
->regs
.vT
[s
]); /* XXX is this sufficient ? */
2015 DECL_SPECIAL(TEXCOORD_ps14
)
2017 struct ureg_program
*ureg
= tx
->ureg
;
2018 const unsigned s
= tx
->insn
.src
[0].idx
;
2019 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2021 if (ureg_src_is_undef(tx
->regs
.vT
[s
]))
2022 tx
->regs
.vT
[s
] = ureg_DECL_fs_input(ureg
, tx
->texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
2023 ureg_MOV(ureg
, dst
, tx
->regs
.vT
[s
]); /* XXX is this sufficient ? */
2028 DECL_SPECIAL(TEXKILL
)
2030 struct ureg_src reg
;
2032 if (tx
->version
.major
> 1 || tx
->version
.minor
> 3) {
2033 reg
= tx_dst_param_as_src(tx
, &tx
->insn
.dst
[0]);
2035 tx_texcoord_alloc(tx
, tx
->insn
.dst
[0].idx
);
2036 reg
= tx
->regs
.vT
[tx
->insn
.dst
[0].idx
];
2038 if (tx
->version
.major
< 2)
2039 reg
= ureg_swizzle(reg
, NINE_SWIZZLE4(X
,Y
,Z
,Z
));
2040 ureg_KILL_IF(tx
->ureg
, reg
);
2045 DECL_SPECIAL(TEXBEM
)
2047 STUB(D3DERR_INVALIDCALL
);
2050 DECL_SPECIAL(TEXBEML
)
2052 STUB(D3DERR_INVALIDCALL
);
2055 DECL_SPECIAL(TEXREG2AR
)
2057 STUB(D3DERR_INVALIDCALL
);
2060 DECL_SPECIAL(TEXREG2GB
)
2062 STUB(D3DERR_INVALIDCALL
);
2065 DECL_SPECIAL(TEXM3x2PAD
)
2067 STUB(D3DERR_INVALIDCALL
);
2070 DECL_SPECIAL(TEXM3x2TEX
)
2072 STUB(D3DERR_INVALIDCALL
);
2075 DECL_SPECIAL(TEXM3x3PAD
)
2077 return D3D_OK
; /* this is just padding */
2080 DECL_SPECIAL(TEXM3x3SPEC
)
2082 STUB(D3DERR_INVALIDCALL
);
2085 DECL_SPECIAL(TEXM3x3VSPEC
)
2087 STUB(D3DERR_INVALIDCALL
);
2090 DECL_SPECIAL(TEXREG2RGB
)
2092 STUB(D3DERR_INVALIDCALL
);
2095 DECL_SPECIAL(TEXDP3TEX
)
2097 STUB(D3DERR_INVALIDCALL
);
2100 DECL_SPECIAL(TEXM3x2DEPTH
)
2102 STUB(D3DERR_INVALIDCALL
);
2105 DECL_SPECIAL(TEXDP3
)
2107 STUB(D3DERR_INVALIDCALL
);
2110 DECL_SPECIAL(TEXM3x3
)
2112 struct ureg_program
*ureg
= tx
->ureg
;
2113 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2114 struct ureg_src src
[4];
2116 const int m
= tx
->insn
.dst
[0].idx
- 2;
2117 const int n
= tx
->insn
.src
[0].idx
;
2118 assert(m
>= 0 && m
> n
);
2120 for (s
= m
; s
<= (m
+ 2); ++s
) {
2121 if (ureg_src_is_undef(tx
->regs
.vT
[s
]))
2122 tx
->regs
.vT
[s
] = ureg_DECL_fs_input(ureg
, tx
->texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
2123 src
[s
] = tx
->regs
.vT
[s
];
2125 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), src
[0], ureg_src(tx
->regs
.tS
[n
]));
2126 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), src
[1], ureg_src(tx
->regs
.tS
[n
]));
2127 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), src
[2], ureg_src(tx
->regs
.tS
[n
]));
2129 switch (tx
->insn
.opcode
) {
2130 case D3DSIO_TEXM3x3
:
2131 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
2133 case D3DSIO_TEXM3x3TEX
:
2134 src
[3] = ureg_DECL_sampler(ureg
, m
+ 2);
2135 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2136 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(dst
), src
[3]);
2139 return D3DERR_INVALIDCALL
;
2144 DECL_SPECIAL(TEXDEPTH
)
2146 STUB(D3DERR_INVALIDCALL
);
2151 STUB(D3DERR_INVALIDCALL
);
2156 struct ureg_program
*ureg
= tx
->ureg
;
2158 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2159 struct ureg_src src
[2] = {
2160 tx_src_param(tx
, &tx
->insn
.src
[0]),
2161 tx_src_param(tx
, &tx
->insn
.src
[1])
2163 assert(tx
->insn
.src
[1].idx
>= 0 &&
2164 tx
->insn
.src
[1].idx
< Elements(tx
->sampler_targets
));
2165 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2167 switch (tx
->insn
.flags
) {
2169 ureg_TEX(ureg
, dst
, target
, src
[0], src
[1]);
2171 case NINED3DSI_TEXLD_PROJECT
:
2172 ureg_TXP(ureg
, dst
, target
, src
[0], src
[1]);
2174 case NINED3DSI_TEXLD_BIAS
:
2175 ureg_TXB(ureg
, dst
, target
, src
[0], src
[1]);
2179 return D3DERR_INVALIDCALL
;
2184 DECL_SPECIAL(TEXLD_14
)
2186 struct ureg_program
*ureg
= tx
->ureg
;
2187 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2188 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2189 const unsigned s
= tx
->insn
.dst
[0].idx
;
2190 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2192 tx
->info
->sampler_mask
|= 1 << s
;
2193 ureg_TEX(ureg
, dst
, t
, src
, ureg_DECL_sampler(ureg
, s
));
2200 struct ureg_program
*ureg
= tx
->ureg
;
2201 const unsigned s
= tx
->insn
.dst
[0].idx
;
2202 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2203 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2204 struct ureg_src src
[2];
2206 if (ureg_src_is_undef(tx
->regs
.vT
[s
]))
2207 tx
->regs
.vT
[s
] = ureg_DECL_fs_input(ureg
, tx
->texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
2209 src
[0] = tx
->regs
.vT
[s
];
2210 src
[1] = ureg_DECL_sampler(ureg
, s
);
2211 tx
->info
->sampler_mask
|= 1 << s
;
2213 ureg_TEX(ureg
, dst
, t
, src
[0], src
[1]);
2218 DECL_SPECIAL(TEXLDD
)
2221 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2222 struct ureg_src src
[4] = {
2223 tx_src_param(tx
, &tx
->insn
.src
[0]),
2224 tx_src_param(tx
, &tx
->insn
.src
[1]),
2225 tx_src_param(tx
, &tx
->insn
.src
[2]),
2226 tx_src_param(tx
, &tx
->insn
.src
[3])
2228 assert(tx
->insn
.src
[3].idx
>= 0 &&
2229 tx
->insn
.src
[3].idx
< Elements(tx
->sampler_targets
));
2230 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2232 ureg_TXD(tx
->ureg
, dst
, target
, src
[0], src
[2], src
[3], src
[1]);
2236 DECL_SPECIAL(TEXLDL
)
2239 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2240 struct ureg_src src
[2] = {
2241 tx_src_param(tx
, &tx
->insn
.src
[0]),
2242 tx_src_param(tx
, &tx
->insn
.src
[1])
2244 assert(tx
->insn
.src
[3].idx
>= 0 &&
2245 tx
->insn
.src
[3].idx
< Elements(tx
->sampler_targets
));
2246 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2248 ureg_TXL(tx
->ureg
, dst
, target
, src
[0], src
[1]);
2254 STUB(D3DERR_INVALIDCALL
);
2257 DECL_SPECIAL(BREAKP
)
2259 STUB(D3DERR_INVALIDCALL
);
2264 return D3D_OK
; /* we don't care about phase */
2267 DECL_SPECIAL(COMMENT
)
2269 return D3D_OK
; /* nothing to do */
2273 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2274 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2276 struct sm1_op_info inst_table
[] =
2278 _OPI(NOP
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL
), /* 0 */
2279 _OPI(MOV
, MOV
, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x
)),
2280 _OPI(MOV
, MOV
, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
2281 _OPI(ADD
, ADD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 2 */
2282 _OPI(SUB
, SUB
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 3 */
2283 _OPI(MAD
, MAD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 4 */
2284 _OPI(MUL
, MUL
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 5 */
2285 _OPI(RCP
, RCP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 6 */
2286 _OPI(RSQ
, RSQ
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 7 */
2287 _OPI(DP3
, DP3
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 8 */
2288 _OPI(DP4
, DP4
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 9 */
2289 _OPI(MIN
, MIN
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 10 */
2290 _OPI(MAX
, MAX
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 11 */
2291 _OPI(SLT
, SLT
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 12 */
2292 _OPI(SGE
, SGE
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 13 */
2293 _OPI(EXP
, EX2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 14 */
2294 _OPI(LOG
, LG2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 15 */
2295 _OPI(LIT
, LIT
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
), /* 16 */
2296 _OPI(DST
, DST
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 17 */
2297 _OPI(LRP
, LRP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 18 */
2298 _OPI(FRC
, FRC
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 19 */
2300 _OPI(M4x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4
)),
2301 _OPI(M4x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3
)),
2302 _OPI(M3x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4
)),
2303 _OPI(M3x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3
)),
2304 _OPI(M3x2
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2
)),
2306 _OPI(CALL
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALL
)),
2307 _OPI(CALLNZ
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALLNZ
)),
2308 _OPI(LOOP
, BGNLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP
)),
2309 _OPI(RET
, RET
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET
)),
2310 _OPI(ENDLOOP
, ENDLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP
)),
2311 _OPI(LABEL
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(LABEL
)),
2313 _OPI(DCL
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL
)),
2315 _OPI(POW
, POW
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
),
2316 _OPI(CRS
, XPD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* XXX: .w */
2317 _OPI(SGN
, SSG
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN
)), /* ignore src1,2 */
2318 _OPI(ABS
, ABS
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
2319 _OPI(NRM
, NRM
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM
)), /* NRM doesn't fit */
2321 _OPI(SINCOS
, SCS
, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS
)),
2322 _OPI(SINCOS
, SCS
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS
)),
2324 /* More flow control */
2325 _OPI(REP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP
)),
2326 _OPI(ENDREP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP
)),
2327 _OPI(IF
, IF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF
)),
2328 _OPI(IFC
, IF
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC
)),
2329 _OPI(ELSE
, ELSE
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE
)),
2330 _OPI(ENDIF
, ENDIF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF
)),
2331 _OPI(BREAK
, BRK
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL
),
2332 _OPI(BREAKC
, BREAKC
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC
)),
2334 _OPI(MOVA
, ARR
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2336 _OPI(DEFB
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB
)),
2337 _OPI(DEFI
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI
)),
2339 _OPI(TEXCOORD
, NOP
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD
)),
2340 _OPI(TEXCOORD
, MOV
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14
)),
2341 _OPI(TEXKILL
, KILL_IF
, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL
)),
2342 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX
)),
2343 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14
)),
2344 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD
)),
2345 _OPI(TEXBEM
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEM
)),
2346 _OPI(TEXBEML
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEML
)),
2347 _OPI(TEXREG2AR
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2AR
)),
2348 _OPI(TEXREG2GB
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2GB
)),
2349 _OPI(TEXM3x2PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2PAD
)),
2350 _OPI(TEXM3x2TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2TEX
)),
2351 _OPI(TEXM3x3PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3PAD
)),
2352 _OPI(TEXM3x3TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3
)),
2353 _OPI(TEXM3x3SPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3SPEC
)),
2354 _OPI(TEXM3x3VSPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3VSPEC
)),
2356 _OPI(EXPP
, EXP
, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL
),
2357 _OPI(EXPP
, EX2
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2358 _OPI(LOGP
, LG2
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2359 _OPI(CND
, CND
, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND
)),
2361 _OPI(DEF
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF
)),
2363 /* More tex stuff */
2364 _OPI(TEXREG2RGB
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXREG2RGB
)),
2365 _OPI(TEXDP3TEX
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3TEX
)),
2366 _OPI(TEXM3x2DEPTH
, TEX
, V(0,0), V(0,0), V(1,3), V(1,3), 0, 0, SPECIAL(TEXM3x2DEPTH
)),
2367 _OPI(TEXDP3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3
)),
2368 _OPI(TEXM3x3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXM3x3
)),
2369 _OPI(TEXDEPTH
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(TEXDEPTH
)),
2372 _OPI(CMP
, CMP
, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP
)), /* reversed */
2373 _OPI(BEM
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(BEM
)),
2374 _OPI(DP2ADD
, DP2A
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD
)), /* for radeons */
2375 _OPI(DSX
, DDX
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
2376 _OPI(DSY
, DDY
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
2377 _OPI(TEXLDD
, TXD
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD
)),
2378 _OPI(SETP
, NOP
, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(SETP
)),
2379 _OPI(TEXLDL
, TXL
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL
)),
2380 _OPI(BREAKP
, BRK
, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(BREAKP
))
2383 struct sm1_op_info inst_phase
=
2384 _OPI(PHASE
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE
));
2386 struct sm1_op_info inst_comment
=
2387 _OPI(COMMENT
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT
));
2390 create_op_info_map(struct shader_translator
*tx
)
2392 const unsigned version
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
2395 for (i
= 0; i
< Elements(tx
->op_info_map
); ++i
)
2396 tx
->op_info_map
[i
] = -1;
2398 if (tx
->processor
== TGSI_PROCESSOR_VERTEX
) {
2399 for (i
= 0; i
< Elements(inst_table
); ++i
) {
2400 assert(inst_table
[i
].sio
< Elements(tx
->op_info_map
));
2401 if (inst_table
[i
].vert_version
.min
<= version
&&
2402 inst_table
[i
].vert_version
.max
>= version
)
2403 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
2406 for (i
= 0; i
< Elements(inst_table
); ++i
) {
2407 assert(inst_table
[i
].sio
< Elements(tx
->op_info_map
));
2408 if (inst_table
[i
].frag_version
.min
<= version
&&
2409 inst_table
[i
].frag_version
.max
>= version
)
2410 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
2415 static INLINE HRESULT
2416 NineTranslateInstruction_Generic(struct shader_translator
*tx
)
2418 struct ureg_dst dst
[1];
2419 struct ureg_src src
[4];
2422 for (i
= 0; i
< tx
->insn
.ndst
&& i
< Elements(dst
); ++i
)
2423 dst
[i
] = tx_dst_param(tx
, &tx
->insn
.dst
[i
]);
2424 for (i
= 0; i
< tx
->insn
.nsrc
&& i
< Elements(src
); ++i
)
2425 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
2427 ureg_insn(tx
->ureg
, tx
->insn
.info
->opcode
,
2429 src
, tx
->insn
.nsrc
);
2434 TOKEN_PEEK(struct shader_translator
*tx
)
2436 return *(tx
->parse
);
2440 TOKEN_NEXT(struct shader_translator
*tx
)
2442 return *(tx
->parse
)++;
2446 TOKEN_JUMP(struct shader_translator
*tx
)
2448 if (tx
->parse_next
&& tx
->parse
!= tx
->parse_next
) {
2449 WARN("parse(%p) != parse_next(%p) !\n", tx
->parse
, tx
->parse_next
);
2450 tx
->parse
= tx
->parse_next
;
2454 static INLINE boolean
2455 sm1_parse_eof(struct shader_translator
*tx
)
2457 return TOKEN_PEEK(tx
) == NINED3DSP_END
;
2461 sm1_read_version(struct shader_translator
*tx
)
2463 const DWORD tok
= TOKEN_NEXT(tx
);
2465 tx
->version
.major
= D3DSHADER_VERSION_MAJOR(tok
);
2466 tx
->version
.minor
= D3DSHADER_VERSION_MINOR(tok
);
2468 switch (tok
>> 16) {
2469 case NINED3D_SM1_VS
: tx
->processor
= TGSI_PROCESSOR_VERTEX
; break;
2470 case NINED3D_SM1_PS
: tx
->processor
= TGSI_PROCESSOR_FRAGMENT
; break;
2472 DBG("Invalid shader type: %x\n", tok
);
2478 /* This is just to check if we parsed the instruction properly. */
2480 sm1_parse_get_skip(struct shader_translator
*tx
)
2482 const DWORD tok
= TOKEN_PEEK(tx
);
2484 if (tx
->version
.major
>= 2) {
2485 tx
->parse_next
= tx
->parse
+ 1 /* this */ +
2486 ((tok
& D3DSI_INSTLENGTH_MASK
) >> D3DSI_INSTLENGTH_SHIFT
);
2488 tx
->parse_next
= NULL
; /* TODO: determine from param count */
2493 sm1_print_comment(const char *comment
, UINT size
)
2501 sm1_parse_comments(struct shader_translator
*tx
, BOOL print
)
2503 DWORD tok
= TOKEN_PEEK(tx
);
2505 while ((tok
& D3DSI_OPCODE_MASK
) == D3DSIO_COMMENT
)
2507 const char *comment
= "";
2508 UINT size
= (tok
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
2509 tx
->parse
+= size
+ 1;
2512 sm1_print_comment(comment
, size
);
2514 tok
= TOKEN_PEEK(tx
);
2519 sm1_parse_get_param(struct shader_translator
*tx
, DWORD
*reg
, DWORD
*rel
)
2521 *reg
= TOKEN_NEXT(tx
);
2523 if (*reg
& D3DSHADER_ADDRMODE_RELATIVE
)
2525 if (tx
->version
.major
< 2)
2527 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT2
) & D3DSP_REGTYPE_MASK2
) |
2528 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT
) & D3DSP_REGTYPE_MASK
) |
2529 (D3DSP_NOSWIZZLE
<< D3DSP_SWIZZLE_SHIFT
);
2531 *rel
= TOKEN_NEXT(tx
);
2536 sm1_parse_dst_param(struct sm1_dst_param
*dst
, DWORD tok
)
2539 (tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
|
2540 (tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
;
2541 dst
->type
= TGSI_RETURN_TYPE_FLOAT
;
2542 dst
->idx
= tok
& D3DSP_REGNUM_MASK
;
2544 dst
->mask
= (tok
& NINED3DSP_WRITEMASK_MASK
) >> NINED3DSP_WRITEMASK_SHIFT
;
2545 dst
->mod
= (tok
& D3DSP_DSTMOD_MASK
) >> D3DSP_DSTMOD_SHIFT
;
2546 dst
->shift
= (tok
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
2550 sm1_parse_src_param(struct sm1_src_param
*src
, DWORD tok
)
2553 ((tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) |
2554 ((tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
);
2555 src
->type
= TGSI_RETURN_TYPE_FLOAT
;
2556 src
->idx
= tok
& D3DSP_REGNUM_MASK
;
2558 src
->swizzle
= (tok
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
2559 src
->mod
= (tok
& D3DSP_SRCMOD_MASK
) >> D3DSP_SRCMOD_SHIFT
;
2561 switch (src
->file
) {
2562 case D3DSPR_CONST2
: src
->file
= D3DSPR_CONST
; src
->idx
+= 2048; break;
2563 case D3DSPR_CONST3
: src
->file
= D3DSPR_CONST
; src
->idx
+= 4096; break;
2564 case D3DSPR_CONST4
: src
->file
= D3DSPR_CONST
; src
->idx
+= 6144; break;
2571 sm1_parse_immediate(struct shader_translator
*tx
,
2572 struct sm1_src_param
*imm
)
2574 imm
->file
= NINED3DSPR_IMMEDIATE
;
2577 imm
->swizzle
= NINED3DSP_NOSWIZZLE
;
2579 switch (tx
->insn
.opcode
) {
2581 imm
->type
= NINED3DSPTYPE_FLOAT4
;
2582 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
2586 imm
->type
= NINED3DSPTYPE_INT4
;
2587 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
2591 imm
->type
= NINED3DSPTYPE_BOOL
;
2592 memcpy(&imm
->imm
.d
[0], tx
->parse
, 1 * sizeof(DWORD
));
2602 sm1_read_dst_param(struct shader_translator
*tx
,
2603 struct sm1_dst_param
*dst
,
2604 struct sm1_src_param
*rel
)
2606 DWORD tok_dst
, tok_rel
= 0;
2608 sm1_parse_get_param(tx
, &tok_dst
, &tok_rel
);
2609 sm1_parse_dst_param(dst
, tok_dst
);
2610 if (tok_dst
& D3DSHADER_ADDRMODE_RELATIVE
) {
2611 sm1_parse_src_param(rel
, tok_rel
);
2617 sm1_read_src_param(struct shader_translator
*tx
,
2618 struct sm1_src_param
*src
,
2619 struct sm1_src_param
*rel
)
2621 DWORD tok_src
, tok_rel
= 0;
2623 sm1_parse_get_param(tx
, &tok_src
, &tok_rel
);
2624 sm1_parse_src_param(src
, tok_src
);
2625 if (tok_src
& D3DSHADER_ADDRMODE_RELATIVE
) {
2627 sm1_parse_src_param(rel
, tok_rel
);
2633 sm1_read_semantic(struct shader_translator
*tx
,
2634 struct sm1_semantic
*sem
)
2636 const DWORD tok_usg
= TOKEN_NEXT(tx
);
2637 const DWORD tok_dst
= TOKEN_NEXT(tx
);
2639 sem
->sampler_type
= (tok_usg
& D3DSP_TEXTURETYPE_MASK
) >> D3DSP_TEXTURETYPE_SHIFT
;
2640 sem
->usage
= (tok_usg
& D3DSP_DCL_USAGE_MASK
) >> D3DSP_DCL_USAGE_SHIFT
;
2641 sem
->usage_idx
= (tok_usg
& D3DSP_DCL_USAGEINDEX_MASK
) >> D3DSP_DCL_USAGEINDEX_SHIFT
;
2643 sm1_parse_dst_param(&sem
->reg
, tok_dst
);
2647 sm1_parse_instruction(struct shader_translator
*tx
)
2649 struct sm1_instruction
*insn
= &tx
->insn
;
2651 struct sm1_op_info
*info
= NULL
;
2654 sm1_parse_comments(tx
, TRUE
);
2655 sm1_parse_get_skip(tx
);
2657 tok
= TOKEN_NEXT(tx
);
2659 insn
->opcode
= tok
& D3DSI_OPCODE_MASK
;
2660 insn
->flags
= (tok
& NINED3DSIO_OPCODE_FLAGS_MASK
) >> NINED3DSIO_OPCODE_FLAGS_SHIFT
;
2661 insn
->coissue
= !!(tok
& D3DSI_COISSUE
);
2662 insn
->predicated
= !!(tok
& NINED3DSHADER_INST_PREDICATED
);
2664 if (insn
->opcode
< Elements(tx
->op_info_map
)) {
2665 int k
= tx
->op_info_map
[insn
->opcode
];
2667 assert(k
< Elements(inst_table
));
2668 info
= &inst_table
[k
];
2671 if (insn
->opcode
== D3DSIO_PHASE
) info
= &inst_phase
;
2672 if (insn
->opcode
== D3DSIO_COMMENT
) info
= &inst_comment
;
2675 DBG("illegal or unhandled opcode: %08x\n", insn
->opcode
);
2680 insn
->ndst
= info
->ndst
;
2681 insn
->nsrc
= info
->nsrc
;
2683 assert(!insn
->predicated
&& "TODO: predicated instructions");
2687 unsigned min
= IS_VS
? info
->vert_version
.min
: info
->frag_version
.min
;
2688 unsigned max
= IS_VS
? info
->vert_version
.max
: info
->frag_version
.max
;
2689 unsigned ver
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
2690 if (ver
< min
|| ver
> max
) {
2691 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
2697 for (i
= 0; i
< insn
->ndst
; ++i
)
2698 sm1_read_dst_param(tx
, &insn
->dst
[i
], &insn
->dst_rel
[i
]);
2699 if (insn
->predicated
)
2700 sm1_read_src_param(tx
, &insn
->pred
, NULL
);
2701 for (i
= 0; i
< insn
->nsrc
; ++i
)
2702 sm1_read_src_param(tx
, &insn
->src
[i
], &insn
->src_rel
[i
]);
2704 /* parse here so we can dump them before processing */
2705 if (insn
->opcode
== D3DSIO_DEF
||
2706 insn
->opcode
== D3DSIO_DEFI
||
2707 insn
->opcode
== D3DSIO_DEFB
)
2708 sm1_parse_immediate(tx
, &tx
->insn
.src
[0]);
2710 sm1_dump_instruction(insn
, tx
->cond_depth
+ tx
->loop_depth
);
2711 sm1_instruction_check(insn
);
2716 NineTranslateInstruction_Generic(tx
);
2717 tx_apply_dst0_modifiers(tx
);
2719 tx
->num_scratch
= 0; /* reset */
2725 tx_ctor(struct shader_translator
*tx
, struct nine_shader_info
*info
)
2731 tx
->byte_code
= info
->byte_code
;
2732 tx
->parse
= info
->byte_code
;
2734 for (i
= 0; i
< Elements(info
->input_map
); ++i
)
2735 info
->input_map
[i
] = NINE_DECLUSAGE_NONE
;
2736 info
->num_inputs
= 0;
2738 info
->position_t
= FALSE
;
2739 info
->point_size
= FALSE
;
2741 tx
->info
->const_used_size
= 0;
2743 info
->sampler_mask
= 0x0;
2744 info
->rt_mask
= 0x0;
2746 info
->lconstf
.data
= NULL
;
2747 info
->lconstf
.ranges
= NULL
;
2749 for (i
= 0; i
< Elements(tx
->regs
.aL
); ++i
) {
2750 tx
->regs
.aL
[i
] = ureg_dst_undef();
2751 tx
->regs
.rL
[i
] = ureg_dst_undef();
2753 tx
->regs
.a
= ureg_dst_undef();
2754 tx
->regs
.p
= ureg_dst_undef();
2755 tx
->regs
.oDepth
= ureg_dst_undef();
2756 tx
->regs
.vPos
= ureg_src_undef();
2757 tx
->regs
.vFace
= ureg_src_undef();
2758 for (i
= 0; i
< Elements(tx
->regs
.o
); ++i
)
2759 tx
->regs
.o
[i
] = ureg_dst_undef();
2760 for (i
= 0; i
< Elements(tx
->regs
.oCol
); ++i
)
2761 tx
->regs
.oCol
[i
] = ureg_dst_undef();
2762 for (i
= 0; i
< Elements(tx
->regs
.vC
); ++i
)
2763 tx
->regs
.vC
[i
] = ureg_src_undef();
2764 for (i
= 0; i
< Elements(tx
->regs
.vT
); ++i
)
2765 tx
->regs
.vT
[i
] = ureg_src_undef();
2767 for (i
= 0; i
< Elements(tx
->lconsti
); ++i
)
2768 tx
->lconsti
[i
].idx
= -1;
2769 for (i
= 0; i
< Elements(tx
->lconstb
); ++i
)
2770 tx
->lconstb
[i
].idx
= -1;
2772 sm1_read_version(tx
);
2774 info
->version
= (tx
->version
.major
<< 4) | tx
->version
.minor
;
2776 create_op_info_map(tx
);
2780 tx_dtor(struct shader_translator
*tx
)
2782 if (tx
->num_inst_labels
)
2783 FREE(tx
->inst_labels
);
2791 static INLINE
unsigned
2792 tgsi_processor_from_type(unsigned shader_type
)
2794 switch (shader_type
) {
2795 case PIPE_SHADER_VERTEX
: return TGSI_PROCESSOR_VERTEX
;
2796 case PIPE_SHADER_FRAGMENT
: return TGSI_PROCESSOR_FRAGMENT
;
2802 #define GET_CAP(n) device->screen->get_param( \
2803 device->screen, PIPE_CAP_##n)
2804 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
2805 device->screen, info->type, PIPE_SHADER_CAP_##n)
2808 nine_translate_shader(struct NineDevice9
*device
, struct nine_shader_info
*info
)
2810 struct shader_translator
*tx
;
2811 HRESULT hr
= D3D_OK
;
2812 const unsigned processor
= tgsi_processor_from_type(info
->type
);
2814 user_assert(processor
!= ~0, D3DERR_INVALIDCALL
);
2816 tx
= CALLOC_STRUCT(shader_translator
);
2818 return E_OUTOFMEMORY
;
2821 if (((tx
->version
.major
<< 16) | tx
->version
.minor
) > 0x00030000) {
2822 hr
= D3DERR_INVALIDCALL
;
2823 DBG("Unsupported shader version: %u.%u !\n",
2824 tx
->version
.major
, tx
->version
.minor
);
2827 if (tx
->processor
!= processor
) {
2828 hr
= D3DERR_INVALIDCALL
;
2829 DBG("Shader type mismatch: %u / %u !\n", tx
->processor
, processor
);
2832 DUMP("%s%u.%u\n", processor
== TGSI_PROCESSOR_VERTEX
? "VS" : "PS",
2833 tx
->version
.major
, tx
->version
.minor
);
2835 tx
->ureg
= ureg_create(processor
);
2840 tx_decl_constants(tx
);
2842 tx
->native_integers
= GET_SHADER_CAP(INTEGERS
);
2843 tx
->inline_subroutines
= !GET_SHADER_CAP(SUBROUTINES
);
2844 tx
->lower_preds
= !GET_SHADER_CAP(MAX_PREDS
);
2845 tx
->want_texcoord
= GET_CAP(TGSI_TEXCOORD
);
2846 tx
->shift_wpos
= !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
2847 tx
->texcoord_sn
= tx
->want_texcoord
?
2848 TGSI_SEMANTIC_TEXCOORD
: TGSI_SEMANTIC_GENERIC
;
2850 /* VS must always write position. Declare it here to make it the 1st output.
2851 * (Some drivers like nv50 are buggy and rely on that.)
2854 tx
->regs
.oPos
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
2856 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_ORIGIN
, TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
2857 if (!tx
->shift_wpos
)
2858 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
, TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
2861 if (!ureg_dst_is_undef(tx
->regs
.oPts
))
2862 info
->point_size
= TRUE
;
2864 while (!sm1_parse_eof(tx
))
2865 sm1_parse_instruction(tx
);
2866 tx
->parse
++; /* for byte_size */
2868 if (IS_PS
&& (tx
->version
.major
< 2) && tx
->num_temp
) {
2869 ureg_MOV(tx
->ureg
, ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_COLOR
, 0),
2870 ureg_src(tx
->regs
.r
[0]));
2871 info
->rt_mask
|= 0x1;
2874 if (info
->position_t
)
2875 ureg_property(tx
->ureg
, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
, TRUE
);
2879 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE
)) {
2881 const struct tgsi_token
*toks
= ureg_get_tokens(tx
->ureg
, &count
);
2883 ureg_free_tokens(toks
);
2886 /* record local constants */
2887 if (tx
->num_lconstf
&& tx
->indirect_const_access
) {
2888 struct nine_range
*ranges
;
2895 data
= MALLOC(tx
->num_lconstf
* 4 * sizeof(float));
2898 info
->lconstf
.data
= data
;
2900 indices
= MALLOC(tx
->num_lconstf
* sizeof(indices
[0]));
2904 /* lazy sort, num_lconstf should be small */
2905 for (n
= 0; n
< tx
->num_lconstf
; ++n
) {
2906 for (k
= 0, i
= 0; i
< tx
->num_lconstf
; ++i
) {
2907 if (tx
->lconstf
[i
].idx
< tx
->lconstf
[k
].idx
)
2910 indices
[n
] = tx
->lconstf
[k
].idx
;
2911 memcpy(&data
[n
* 4], &tx
->lconstf
[k
].imm
.f
[0], 4 * sizeof(float));
2912 tx
->lconstf
[k
].idx
= INT_MAX
;
2916 for (n
= 1, i
= 1; i
< tx
->num_lconstf
; ++i
)
2917 if (indices
[i
] != indices
[i
- 1] + 1)
2919 ranges
= MALLOC(n
* sizeof(ranges
[0]));
2924 info
->lconstf
.ranges
= ranges
;
2927 ranges
[k
].bgn
= indices
[0];
2928 for (i
= 1; i
< tx
->num_lconstf
; ++i
) {
2929 if (indices
[i
] != indices
[i
- 1] + 1) {
2930 ranges
[k
].next
= &ranges
[k
+ 1];
2931 ranges
[k
].end
= indices
[i
- 1] + 1;
2933 ranges
[k
].bgn
= indices
[i
];
2936 ranges
[k
].end
= indices
[i
- 1] + 1;
2937 ranges
[k
].next
= NULL
;
2938 assert(n
== (k
+ 1));
2944 if (tx
->indirect_const_access
)
2945 info
->const_used_size
= ~0;
2947 info
->cso
= ureg_create_shader_and_destroy(tx
->ureg
, device
->pipe
);
2949 hr
= D3DERR_DRIVERINTERNALERROR
;
2950 FREE(info
->lconstf
.data
);
2951 FREE(info
->lconstf
.ranges
);
2955 info
->byte_size
= (tx
->parse
- tx
->byte_code
) * sizeof(DWORD
);