2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
24 #include "nine_shader.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
38 #define DBG_CHANNEL DBG_SHADER
40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
43 struct shader_translator
;
45 typedef HRESULT (*translate_instruction_func
)(struct shader_translator
*);
47 static inline const char *d3dsio_to_string(unsigned opcode
);
50 #define NINED3D_SM1_VS 0xfffe
51 #define NINED3D_SM1_PS 0xffff
53 #define NINE_MAX_COND_DEPTH 64
54 #define NINE_MAX_LOOP_DEPTH 64
56 #define NINED3DSP_END 0x0000ffff
58 #define NINED3DSPTYPE_FLOAT4 0
59 #define NINED3DSPTYPE_INT4 1
60 #define NINED3DSPTYPE_BOOL 2
62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
64 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
65 #define NINED3DSP_WRITEMASK_SHIFT 16
67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
69 #define NINED3DSHADER_REL_OP_GT 1
70 #define NINED3DSHADER_REL_OP_EQ 2
71 #define NINED3DSHADER_REL_OP_GE 3
72 #define NINED3DSHADER_REL_OP_LT 4
73 #define NINED3DSHADER_REL_OP_NE 5
74 #define NINED3DSHADER_REL_OP_LE 6
76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
79 #define NINED3DSI_TEXLD_PROJECT 0x1
80 #define NINED3DSI_TEXLD_BIAS 0x2
82 #define NINED3DSP_WRITEMASK_0 0x1
83 #define NINED3DSP_WRITEMASK_1 0x2
84 #define NINED3DSP_WRITEMASK_2 0x4
85 #define NINED3DSP_WRITEMASK_3 0x8
86 #define NINED3DSP_WRITEMASK_ALL 0xf
88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
90 #define NINE_SWIZZLE4(x,y,z,w) \
91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
93 #define NINE_CONSTANT_SRC(index) \
94 ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, index), 0)
96 #define NINE_APPLY_SWIZZLE(src, s) \
97 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
100 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
107 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
108 * BIAS <= PS 1.4 (x-0.5)
109 * BIASNEG <= PS 1.4 (-(x-0.5))
110 * SIGN <= PS 1.4 (2(x-0.5))
111 * SIGNNEG <= PS 1.4 (-2(x-0.5))
112 * COMP <= PS 1.4 (1-x)
114 * X2NEG = PS 1.4 (-2x)
115 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
116 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
117 * ABS >= SM 3.0 (abs(x))
118 * ABSNEG >= SM 3.0 (-abs(x))
119 * NOT >= SM 2.0 pedication only
121 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
134 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
136 static const char *sm1_mod_str
[] =
138 [NINED3DSPSM_NONE
] = "",
139 [NINED3DSPSM_NEG
] = "-",
140 [NINED3DSPSM_BIAS
] = "bias",
141 [NINED3DSPSM_BIASNEG
] = "biasneg",
142 [NINED3DSPSM_SIGN
] = "sign",
143 [NINED3DSPSM_SIGNNEG
] = "signneg",
144 [NINED3DSPSM_COMP
] = "comp",
145 [NINED3DSPSM_X2
] = "x2",
146 [NINED3DSPSM_X2NEG
] = "x2neg",
147 [NINED3DSPSM_DZ
] = "dz",
148 [NINED3DSPSM_DW
] = "dw",
149 [NINED3DSPSM_ABS
] = "abs",
150 [NINED3DSPSM_ABSNEG
] = "-abs",
151 [NINED3DSPSM_NOT
] = "not"
155 sm1_dump_writemask(BYTE mask
)
157 if (mask
& 1) DUMP("x"); else DUMP("_");
158 if (mask
& 2) DUMP("y"); else DUMP("_");
159 if (mask
& 4) DUMP("z"); else DUMP("_");
160 if (mask
& 8) DUMP("w"); else DUMP("_");
164 sm1_dump_swizzle(BYTE s
)
166 char c
[4] = { 'x', 'y', 'z', 'w' };
168 c
[(s
>> 0) & 3], c
[(s
>> 2) & 3], c
[(s
>> 4) & 3], c
[(s
>> 6) & 3]);
171 static const char sm1_file_char
[] =
174 [D3DSPR_INPUT
] = 'v',
175 [D3DSPR_CONST
] = 'c',
177 [D3DSPR_RASTOUT
] = 'R',
178 [D3DSPR_ATTROUT
] = 'D',
179 [D3DSPR_OUTPUT
] = 'o',
180 [D3DSPR_CONSTINT
] = 'I',
181 [D3DSPR_COLOROUT
] = 'C',
182 [D3DSPR_DEPTHOUT
] = 'D',
183 [D3DSPR_SAMPLER
] = 's',
184 [D3DSPR_CONST2
] = 'c',
185 [D3DSPR_CONST3
] = 'c',
186 [D3DSPR_CONST4
] = 'c',
187 [D3DSPR_CONSTBOOL
] = 'B',
189 [D3DSPR_TEMPFLOAT16
] = 'h',
190 [D3DSPR_MISCTYPE
] = 'M',
191 [D3DSPR_LABEL
] = 'X',
192 [D3DSPR_PREDICATE
] = 'p'
196 sm1_dump_reg(BYTE file
, INT index
)
202 case D3DSPR_COLOROUT
:
205 case D3DSPR_DEPTHOUT
:
209 DUMP("oRast%i", index
);
211 case D3DSPR_CONSTINT
:
212 DUMP("iconst[%i]", index
);
214 case D3DSPR_CONSTBOOL
:
215 DUMP("bconst[%i]", index
);
218 DUMP("%c%i", sm1_file_char
[file
], index
);
226 struct sm1_src_param
*rel
;
239 sm1_parse_immediate(struct shader_translator
*, struct sm1_src_param
*);
244 struct sm1_src_param
*rel
;
248 int8_t shift
; /* sint4 */
253 assert_replicate_swizzle(const struct ureg_src
*reg
)
255 assert(reg
->SwizzleY
== reg
->SwizzleX
&&
256 reg
->SwizzleZ
== reg
->SwizzleX
&&
257 reg
->SwizzleW
== reg
->SwizzleX
);
261 sm1_dump_immediate(const struct sm1_src_param
*param
)
263 switch (param
->type
) {
264 case NINED3DSPTYPE_FLOAT4
:
265 DUMP("{ %f %f %f %f }",
266 param
->imm
.f
[0], param
->imm
.f
[1],
267 param
->imm
.f
[2], param
->imm
.f
[3]);
269 case NINED3DSPTYPE_INT4
:
270 DUMP("{ %i %i %i %i }",
271 param
->imm
.i
[0], param
->imm
.i
[1],
272 param
->imm
.i
[2], param
->imm
.i
[3]);
274 case NINED3DSPTYPE_BOOL
:
275 DUMP("%s", param
->imm
.b
? "TRUE" : "FALSE");
284 sm1_dump_src_param(const struct sm1_src_param
*param
)
286 if (param
->file
== NINED3DSPR_IMMEDIATE
) {
287 assert(!param
->mod
&&
289 param
->swizzle
== NINED3DSP_NOSWIZZLE
);
290 sm1_dump_immediate(param
);
295 DUMP("%s(", sm1_mod_str
[param
->mod
]);
297 DUMP("%c[", sm1_file_char
[param
->file
]);
298 sm1_dump_src_param(param
->rel
);
299 DUMP("+%i]", param
->idx
);
301 sm1_dump_reg(param
->file
, param
->idx
);
305 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
) {
307 sm1_dump_swizzle(param
->swizzle
);
312 sm1_dump_dst_param(const struct sm1_dst_param
*param
)
314 if (param
->mod
& NINED3DSPDM_SATURATE
)
316 if (param
->mod
& NINED3DSPDM_PARTIALP
)
318 if (param
->mod
& NINED3DSPDM_CENTROID
)
320 if (param
->shift
< 0)
321 DUMP("/%u ", 1 << -param
->shift
);
322 if (param
->shift
> 0)
323 DUMP("*%u ", 1 << param
->shift
);
326 DUMP("%c[", sm1_file_char
[param
->file
]);
327 sm1_dump_src_param(param
->rel
);
328 DUMP("+%i]", param
->idx
);
330 sm1_dump_reg(param
->file
, param
->idx
);
332 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
334 sm1_dump_writemask(param
->mask
);
340 struct sm1_dst_param reg
;
348 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
349 * should be ignored completely */
351 unsigned opcode
; /* TGSI_OPCODE_x */
353 /* versions are still set even handler is set */
357 } vert_version
, frag_version
;
359 /* number of regs parsed outside of special handler */
363 /* some instructions don't map perfectly, so use a special handler */
364 translate_instruction_func handler
;
367 struct sm1_instruction
369 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode
;
375 struct sm1_src_param src
[4];
376 struct sm1_src_param src_rel
[4];
377 struct sm1_src_param pred
;
378 struct sm1_src_param dst_rel
[1];
379 struct sm1_dst_param dst
[1];
381 struct sm1_op_info
*info
;
385 sm1_dump_instruction(struct sm1_instruction
*insn
, unsigned indent
)
389 /* no info stored for these: */
390 if (insn
->opcode
== D3DSIO_DCL
)
392 for (i
= 0; i
< indent
; ++i
)
395 if (insn
->predicated
) {
397 sm1_dump_src_param(&insn
->pred
);
400 DUMP("%s", d3dsio_to_string(insn
->opcode
));
402 switch (insn
->opcode
) {
404 DUMP(insn
->flags
== NINED3DSI_TEXLD_PROJECT
? "p" : "b");
407 DUMP("_%x", insn
->flags
);
415 for (i
= 0; i
< insn
->ndst
&& i
< ARRAY_SIZE(insn
->dst
); ++i
) {
416 sm1_dump_dst_param(&insn
->dst
[i
]);
420 for (i
= 0; i
< insn
->nsrc
&& i
< ARRAY_SIZE(insn
->src
); ++i
) {
421 sm1_dump_src_param(&insn
->src
[i
]);
424 if (insn
->opcode
== D3DSIO_DEF
||
425 insn
->opcode
== D3DSIO_DEFI
||
426 insn
->opcode
== D3DSIO_DEFB
)
427 sm1_dump_immediate(&insn
->src
[0]);
432 struct sm1_local_const
436 float f
[4]; /* for indirect addressing of float constants */
439 struct shader_translator
441 const DWORD
*byte_code
;
443 const DWORD
*parse_next
;
445 struct ureg_program
*ureg
;
452 unsigned processor
; /* PIPE_SHADER_VERTEX/FRAMGENT */
453 unsigned num_constf_allowed
;
454 unsigned num_consti_allowed
;
455 unsigned num_constb_allowed
;
457 boolean native_integers
;
458 boolean inline_subroutines
;
459 boolean want_texcoord
;
461 boolean wpos_is_sysval
;
462 boolean face_is_sysval_integer
;
463 unsigned texcoord_sn
;
465 struct sm1_instruction insn
; /* current instruction */
469 struct ureg_dst oPos
;
470 struct ureg_dst oPos_out
; /* the real output when doing streamout */
471 struct ureg_dst oFog
;
472 struct ureg_dst oPts
;
473 struct ureg_dst oCol
[4];
474 struct ureg_dst o
[PIPE_MAX_SHADER_OUTPUTS
];
475 struct ureg_dst oDepth
;
476 struct ureg_src v
[PIPE_MAX_SHADER_INPUTS
];
477 struct ureg_src v_consecutive
; /* copy in temp array of ps inputs for rel addressing */
478 struct ureg_src vPos
;
479 struct ureg_src vFace
;
482 struct ureg_dst address
;
484 struct ureg_dst tS
[8]; /* texture stage registers */
485 struct ureg_dst tdst
; /* scratch dst if we need extra modifiers */
486 struct ureg_dst t
[5]; /* scratch TEMPs */
487 struct ureg_src vC
[2]; /* PS color in */
488 struct ureg_src vT
[8]; /* PS texcoord in */
489 struct ureg_dst rL
[NINE_MAX_LOOP_DEPTH
]; /* loop ctr */
491 unsigned num_temp
; /* ARRAY_SIZE(regs.r) */
492 unsigned num_scratch
;
494 unsigned loop_depth_max
;
496 unsigned loop_labels
[NINE_MAX_LOOP_DEPTH
];
497 unsigned cond_labels
[NINE_MAX_COND_DEPTH
];
498 boolean loop_or_rep
[NINE_MAX_LOOP_DEPTH
]; /* true: loop, false: rep */
500 unsigned *inst_labels
; /* LABEL op */
501 unsigned num_inst_labels
;
503 unsigned sampler_targets
[NINE_MAX_SAMPLERS
]; /* TGSI_TEXTURE_x */
505 struct sm1_local_const
*lconstf
;
506 unsigned num_lconstf
;
507 struct sm1_local_const
*lconsti
;
508 unsigned num_lconsti
;
509 struct sm1_local_const
*lconstb
;
510 unsigned num_lconstb
;
512 boolean indirect_const_access
;
515 struct nine_vs_output_info output_info
[16];
518 struct nine_shader_info
*info
;
520 int16_t op_info_map
[D3DSIO_BREAKP
+ 1];
523 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
524 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
526 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
529 sm1_read_semantic(struct shader_translator
*, struct sm1_semantic
*);
532 sm1_instruction_check(const struct sm1_instruction
*insn
)
534 if (insn
->opcode
== D3DSIO_CRS
)
536 if (insn
->dst
[0].mask
& NINED3DSP_WRITEMASK_3
)
544 nine_record_outputs(struct shader_translator
*tx
, BYTE Usage
, BYTE UsageIndex
,
545 int mask
, int output_index
)
547 tx
->output_info
[tx
->num_outputs
].output_semantic
= Usage
;
548 tx
->output_info
[tx
->num_outputs
].output_semantic_index
= UsageIndex
;
549 tx
->output_info
[tx
->num_outputs
].mask
= mask
;
550 tx
->output_info
[tx
->num_outputs
].output_index
= output_index
;
555 tx_lconstf(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
559 if (index
< 0 || index
>= tx
->num_constf_allowed
) {
563 for (i
= 0; i
< tx
->num_lconstf
; ++i
) {
564 if (tx
->lconstf
[i
].idx
== index
) {
565 *src
= tx
->lconstf
[i
].reg
;
572 tx_lconsti(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
576 if (index
< 0 || index
>= tx
->num_consti_allowed
) {
580 for (i
= 0; i
< tx
->num_lconsti
; ++i
) {
581 if (tx
->lconsti
[i
].idx
== index
) {
582 *src
= tx
->lconsti
[i
].reg
;
589 tx_lconstb(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
593 if (index
< 0 || index
>= tx
->num_constb_allowed
) {
597 for (i
= 0; i
< tx
->num_lconstb
; ++i
) {
598 if (tx
->lconstb
[i
].idx
== index
) {
599 *src
= tx
->lconstb
[i
].reg
;
607 tx_set_lconstf(struct shader_translator
*tx
, INT index
, float f
[4])
611 FAILURE_VOID(index
< 0 || index
>= tx
->num_constf_allowed
)
613 for (n
= 0; n
< tx
->num_lconstf
; ++n
)
614 if (tx
->lconstf
[n
].idx
== index
)
616 if (n
== tx
->num_lconstf
) {
618 tx
->lconstf
= REALLOC(tx
->lconstf
,
619 (n
+ 0) * sizeof(tx
->lconstf
[0]),
620 (n
+ 8) * sizeof(tx
->lconstf
[0]));
625 tx
->lconstf
[n
].idx
= index
;
626 tx
->lconstf
[n
].reg
= ureg_imm4f(tx
->ureg
, f
[0], f
[1], f
[2], f
[3]);
628 memcpy(tx
->lconstf
[n
].f
, f
, sizeof(tx
->lconstf
[n
].f
));
631 tx_set_lconsti(struct shader_translator
*tx
, INT index
, int i
[4])
635 FAILURE_VOID(index
< 0 || index
>= tx
->num_consti_allowed
)
637 for (n
= 0; n
< tx
->num_lconsti
; ++n
)
638 if (tx
->lconsti
[n
].idx
== index
)
640 if (n
== tx
->num_lconsti
) {
642 tx
->lconsti
= REALLOC(tx
->lconsti
,
643 (n
+ 0) * sizeof(tx
->lconsti
[0]),
644 (n
+ 8) * sizeof(tx
->lconsti
[0]));
650 tx
->lconsti
[n
].idx
= index
;
651 tx
->lconsti
[n
].reg
= tx
->native_integers
?
652 ureg_imm4i(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]) :
653 ureg_imm4f(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]);
656 tx_set_lconstb(struct shader_translator
*tx
, INT index
, BOOL b
)
660 FAILURE_VOID(index
< 0 || index
>= tx
->num_constb_allowed
)
662 for (n
= 0; n
< tx
->num_lconstb
; ++n
)
663 if (tx
->lconstb
[n
].idx
== index
)
665 if (n
== tx
->num_lconstb
) {
667 tx
->lconstb
= REALLOC(tx
->lconstb
,
668 (n
+ 0) * sizeof(tx
->lconstb
[0]),
669 (n
+ 8) * sizeof(tx
->lconstb
[0]));
675 tx
->lconstb
[n
].idx
= index
;
676 tx
->lconstb
[n
].reg
= tx
->native_integers
?
677 ureg_imm1u(tx
->ureg
, b
? 0xffffffff : 0) :
678 ureg_imm1f(tx
->ureg
, b
? 1.0f
: 0.0f
);
681 static inline struct ureg_dst
682 tx_scratch(struct shader_translator
*tx
)
684 if (tx
->num_scratch
>= ARRAY_SIZE(tx
->regs
.t
)) {
686 return tx
->regs
.t
[0];
688 if (ureg_dst_is_undef(tx
->regs
.t
[tx
->num_scratch
]))
689 tx
->regs
.t
[tx
->num_scratch
] = ureg_DECL_local_temporary(tx
->ureg
);
690 return tx
->regs
.t
[tx
->num_scratch
++];
693 static inline struct ureg_dst
694 tx_scratch_scalar(struct shader_translator
*tx
)
696 return ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
699 static inline struct ureg_src
700 tx_src_scalar(struct ureg_dst dst
)
702 struct ureg_src src
= ureg_src(dst
);
703 int c
= ffs(dst
.WriteMask
) - 1;
704 if (dst
.WriteMask
== (1 << c
))
705 src
= ureg_scalar(src
, c
);
710 tx_temp_alloc(struct shader_translator
*tx
, INT idx
)
713 if (idx
>= tx
->num_temp
) {
714 unsigned k
= tx
->num_temp
;
715 unsigned n
= idx
+ 1;
716 tx
->regs
.r
= REALLOC(tx
->regs
.r
,
717 k
* sizeof(tx
->regs
.r
[0]),
718 n
* sizeof(tx
->regs
.r
[0]));
720 tx
->regs
.r
[k
] = ureg_dst_undef();
723 if (ureg_dst_is_undef(tx
->regs
.r
[idx
]))
724 tx
->regs
.r
[idx
] = ureg_DECL_temporary(tx
->ureg
);
728 tx_addr_alloc(struct shader_translator
*tx
, INT idx
)
731 if (ureg_dst_is_undef(tx
->regs
.address
))
732 tx
->regs
.address
= ureg_DECL_address(tx
->ureg
);
733 if (ureg_dst_is_undef(tx
->regs
.a0
))
734 tx
->regs
.a0
= ureg_DECL_temporary(tx
->ureg
);
737 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
738 * the projection should be applied on the texture. It doesn't
740 * The doc is very imprecise here (it says the projection is done
741 * before rasterization, thus in vs, which seems wrong since ps instructions
742 * are affected differently)
743 * For now we only apply to the ps TEX instruction and TEXBEM.
744 * Perhaps some other instructions would need it */
746 apply_ps1x_projection(struct shader_translator
*tx
, struct ureg_dst dst
,
747 struct ureg_src src
, INT idx
)
750 unsigned dim
= 1 + ((tx
->info
->projected
>> (2 * idx
)) & 3);
754 ureg_MOV(tx
->ureg
, dst
, src
);
756 tmp
= tx_scratch_scalar(tx
);
757 ureg_RCP(tx
->ureg
, tmp
, ureg_scalar(src
, dim
-1));
758 ureg_MUL(tx
->ureg
, dst
, tx_src_scalar(tmp
), src
);
763 TEX_with_ps1x_projection(struct shader_translator
*tx
, struct ureg_dst dst
,
764 unsigned target
, struct ureg_src src0
,
765 struct ureg_src src1
, INT idx
)
767 unsigned dim
= 1 + ((tx
->info
->projected
>> (2 * idx
)) & 3);
770 /* dim == 1: no projection
771 * Looks like must be disabled when it makes no
772 * sense according the texture dimensions
774 if (dim
== 1 || dim
<= target
) {
775 ureg_TEX(tx
->ureg
, dst
, target
, src0
, src1
);
776 } else if (dim
== 4) {
777 ureg_TXP(tx
->ureg
, dst
, target
, src0
, src1
);
779 tmp
= tx_scratch(tx
);
780 apply_ps1x_projection(tx
, tmp
, src0
, idx
);
781 ureg_TEX(tx
->ureg
, dst
, target
, ureg_src(tmp
), src1
);
786 tx_texcoord_alloc(struct shader_translator
*tx
, INT idx
)
789 assert(idx
>= 0 && idx
< ARRAY_SIZE(tx
->regs
.vT
));
790 if (ureg_src_is_undef(tx
->regs
.vT
[idx
]))
791 tx
->regs
.vT
[idx
] = ureg_DECL_fs_input(tx
->ureg
, tx
->texcoord_sn
, idx
,
792 TGSI_INTERPOLATE_PERSPECTIVE
);
795 static inline unsigned *
796 tx_bgnloop(struct shader_translator
*tx
)
799 if (tx
->loop_depth_max
< tx
->loop_depth
)
800 tx
->loop_depth_max
= tx
->loop_depth
;
801 assert(tx
->loop_depth
< NINE_MAX_LOOP_DEPTH
);
802 return &tx
->loop_labels
[tx
->loop_depth
- 1];
805 static inline unsigned *
806 tx_endloop(struct shader_translator
*tx
)
808 assert(tx
->loop_depth
);
810 ureg_fixup_label(tx
->ureg
, tx
->loop_labels
[tx
->loop_depth
],
811 ureg_get_instruction_number(tx
->ureg
));
812 return &tx
->loop_labels
[tx
->loop_depth
];
815 static struct ureg_dst
816 tx_get_loopctr(struct shader_translator
*tx
, boolean loop_or_rep
)
818 const unsigned l
= tx
->loop_depth
- 1;
822 DBG("loop counter requested outside of loop\n");
823 return ureg_dst_undef();
826 if (ureg_dst_is_undef(tx
->regs
.rL
[l
])) {
827 /* loop or rep ctr creation */
828 tx
->regs
.rL
[l
] = ureg_DECL_local_temporary(tx
->ureg
);
829 tx
->loop_or_rep
[l
] = loop_or_rep
;
831 /* loop - rep - endloop - endrep not allowed */
832 assert(tx
->loop_or_rep
[l
] == loop_or_rep
);
834 return tx
->regs
.rL
[l
];
837 static struct ureg_src
838 tx_get_loopal(struct shader_translator
*tx
)
840 int loop_level
= tx
->loop_depth
- 1;
842 while (loop_level
>= 0) {
843 /* handle loop - rep - endrep - endloop case */
844 if (tx
->loop_or_rep
[loop_level
])
845 /* the value is in the loop counter y component (nine implementation) */
846 return ureg_scalar(ureg_src(tx
->regs
.rL
[loop_level
]), TGSI_SWIZZLE_Y
);
850 DBG("aL counter requested outside of loop\n");
851 return ureg_src_undef();
854 static inline unsigned *
855 tx_cond(struct shader_translator
*tx
)
857 assert(tx
->cond_depth
<= NINE_MAX_COND_DEPTH
);
859 return &tx
->cond_labels
[tx
->cond_depth
- 1];
862 static inline unsigned *
863 tx_elsecond(struct shader_translator
*tx
)
865 assert(tx
->cond_depth
);
866 return &tx
->cond_labels
[tx
->cond_depth
- 1];
870 tx_endcond(struct shader_translator
*tx
)
872 assert(tx
->cond_depth
);
874 ureg_fixup_label(tx
->ureg
, tx
->cond_labels
[tx
->cond_depth
],
875 ureg_get_instruction_number(tx
->ureg
));
878 static inline struct ureg_dst
879 nine_ureg_dst_register(unsigned file
, int index
)
881 return ureg_dst(ureg_src_register(file
, index
));
884 static inline struct ureg_src
885 nine_get_position_input(struct shader_translator
*tx
)
887 struct ureg_program
*ureg
= tx
->ureg
;
889 if (tx
->wpos_is_sysval
)
890 return ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_POSITION
, 0);
892 return ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_POSITION
,
893 0, TGSI_INTERPOLATE_LINEAR
);
896 static struct ureg_src
897 tx_src_param(struct shader_translator
*tx
, const struct sm1_src_param
*param
)
899 struct ureg_program
*ureg
= tx
->ureg
;
907 tx_temp_alloc(tx
, param
->idx
);
908 src
= ureg_src(tx
->regs
.r
[param
->idx
]);
910 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
914 assert(param
->idx
== 0);
915 /* the address register (vs only) must be
916 * assigned before use */
917 assert(!ureg_dst_is_undef(tx
->regs
.a0
));
918 /* Round to lowest for vs1.1 (contrary to the doc), else
919 * round to nearest */
920 if (tx
->version
.major
< 2 && tx
->version
.minor
< 2)
921 ureg_ARL(ureg
, tx
->regs
.address
, ureg_src(tx
->regs
.a0
));
923 ureg_ARR(ureg
, tx
->regs
.address
, ureg_src(tx
->regs
.a0
));
924 src
= ureg_src(tx
->regs
.address
);
926 if (tx
->version
.major
< 2 && tx
->version
.minor
< 4) {
927 /* no subroutines, so should be defined */
928 src
= ureg_src(tx
->regs
.tS
[param
->idx
]);
930 tx_texcoord_alloc(tx
, param
->idx
);
931 src
= tx
->regs
.vT
[param
->idx
];
937 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
939 if (tx
->version
.major
< 3) {
941 src
= ureg_DECL_fs_input_cyl_centroid(
942 ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
,
943 TGSI_INTERPOLATE_COLOR
, 0,
944 tx
->info
->force_color_in_centroid
?
945 TGSI_INTERPOLATE_LOC_CENTROID
: 0,
949 /* Copy all inputs (non consecutive)
950 * to temp array (consecutive).
951 * This is not good for performance.
952 * A better way would be to have inputs
953 * consecutive (would need implement alternative
954 * way to match vs outputs and ps inputs).
955 * However even with the better way, the temp array
956 * copy would need to be used if some inputs
957 * are not GENERIC or if they have different
958 * interpolation flag. */
959 if (ureg_src_is_undef(tx
->regs
.v_consecutive
)) {
961 tx
->regs
.v_consecutive
= ureg_src(ureg_DECL_array_temporary(ureg
, 10, 0));
962 for (i
= 0; i
< 10; i
++) {
963 if (!ureg_src_is_undef(tx
->regs
.v
[i
]))
964 ureg_MOV(ureg
, ureg_dst_array_offset(ureg_dst(tx
->regs
.v_consecutive
), i
), tx
->regs
.v
[i
]);
966 ureg_MOV(ureg
, ureg_dst_array_offset(ureg_dst(tx
->regs
.v_consecutive
), i
), ureg_imm4f(ureg
, 0.0f
, 0.0f
, 0.0f
, 1.0f
));
969 src
= ureg_src_array_offset(tx
->regs
.v_consecutive
, param
->idx
);
971 assert(param
->idx
< ARRAY_SIZE(tx
->regs
.v
));
972 src
= tx
->regs
.v
[param
->idx
];
977 case D3DSPR_PREDICATE
:
978 assert(!"D3DSPR_PREDICATE");
981 assert(param
->mod
== NINED3DSPSM_NONE
);
982 assert(param
->swizzle
== NINED3DSP_NOSWIZZLE
);
984 src
= ureg_src_register(TGSI_FILE_SAMPLER
, param
->idx
);
987 assert(!param
->rel
|| IS_VS
);
989 tx
->indirect_const_access
= TRUE
;
990 if (param
->rel
|| !tx_lconstf(tx
, &src
, param
->idx
)) {
992 nine_info_mark_const_f_used(tx
->info
, param
->idx
);
993 /* vswp constant handling: we use two buffers
994 * to fit all the float constants. The special handling
995 * doesn't need to be elsewhere, because all the instructions
996 * accessing the constants directly are VS1, and swvp
998 if (IS_VS
&& tx
->info
->swvp_on
) {
1000 if (param
->idx
< 4096) {
1001 src
= ureg_src_register(TGSI_FILE_CONSTANT
, param
->idx
);
1002 src
= ureg_src_dimension(src
, 0);
1004 src
= ureg_src_register(TGSI_FILE_CONSTANT
, param
->idx
- 4096);
1005 src
= ureg_src_dimension(src
, 1);
1008 src
= ureg_src_register(TGSI_FILE_CONSTANT
, param
->idx
); /* TODO: swvp rel > 4096 */
1009 src
= ureg_src_dimension(src
, 0);
1012 src
= NINE_CONSTANT_SRC(param
->idx
);
1014 if (!IS_VS
&& tx
->version
.major
< 2) {
1015 /* ps 1.X clamps constants */
1016 tmp
= tx_scratch(tx
);
1017 ureg_MIN(ureg
, tmp
, src
, ureg_imm1f(ureg
, 1.0f
));
1018 ureg_MAX(ureg
, tmp
, ureg_src(tmp
), ureg_imm1f(ureg
, -1.0f
));
1019 src
= ureg_src(tmp
);
1025 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1026 assert(!"CONST2/3/4");
1027 src
= ureg_imm1f(ureg
, 0.0f
);
1029 case D3DSPR_CONSTINT
:
1030 /* relative adressing only possible for float constants in vs */
1031 assert(!param
->rel
);
1032 if (!tx_lconsti(tx
, &src
, param
->idx
)) {
1033 nine_info_mark_const_i_used(tx
->info
, param
->idx
);
1034 if (IS_VS
&& tx
->info
->swvp_on
) {
1035 src
= ureg_src_register(TGSI_FILE_CONSTANT
, param
->idx
);
1036 src
= ureg_src_dimension(src
, 2);
1038 src
= NINE_CONSTANT_SRC(tx
->info
->const_i_base
+ param
->idx
);
1041 case D3DSPR_CONSTBOOL
:
1042 assert(!param
->rel
);
1043 if (!tx_lconstb(tx
, &src
, param
->idx
)) {
1044 char r
= param
->idx
/ 4;
1045 char s
= param
->idx
& 3;
1046 nine_info_mark_const_b_used(tx
->info
, param
->idx
);
1047 if (IS_VS
&& tx
->info
->swvp_on
) {
1048 src
= ureg_src_register(TGSI_FILE_CONSTANT
, r
);
1049 src
= ureg_src_dimension(src
, 3);
1051 src
= NINE_CONSTANT_SRC(tx
->info
->const_b_base
+ r
);
1052 src
= ureg_swizzle(src
, s
, s
, s
, s
);
1056 if (ureg_dst_is_undef(tx
->regs
.address
))
1057 tx
->regs
.address
= ureg_DECL_address(ureg
);
1058 if (!tx
->native_integers
)
1059 ureg_ARR(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
1061 ureg_UARL(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
1062 src
= ureg_src(tx
->regs
.address
);
1064 case D3DSPR_MISCTYPE
:
1065 switch (param
->idx
) {
1066 case D3DSMO_POSITION
:
1067 if (ureg_src_is_undef(tx
->regs
.vPos
))
1068 tx
->regs
.vPos
= nine_get_position_input(tx
);
1069 if (tx
->shift_wpos
) {
1070 /* TODO: do this only once */
1071 struct ureg_dst wpos
= tx_scratch(tx
);
1072 ureg_ADD(ureg
, wpos
, tx
->regs
.vPos
,
1073 ureg_imm4f(ureg
, -0.5f
, -0.5f
, 0.0f
, 0.0f
));
1074 src
= ureg_src(wpos
);
1076 src
= tx
->regs
.vPos
;
1080 if (ureg_src_is_undef(tx
->regs
.vFace
)) {
1081 if (tx
->face_is_sysval_integer
) {
1082 tmp
= tx_scratch(tx
);
1084 ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_FACE
, 0);
1086 /* convert bool to float */
1087 ureg_UCMP(ureg
, tmp
, ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
),
1088 ureg_imm1f(ureg
, 1), ureg_imm1f(ureg
, -1));
1089 tx
->regs
.vFace
= ureg_src(tmp
);
1091 tx
->regs
.vFace
= ureg_DECL_fs_input(ureg
,
1092 TGSI_SEMANTIC_FACE
, 0,
1093 TGSI_INTERPOLATE_CONSTANT
);
1095 tx
->regs
.vFace
= ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
);
1097 src
= tx
->regs
.vFace
;
1100 assert(!"invalid src D3DSMO");
1103 assert(!param
->rel
);
1105 case D3DSPR_TEMPFLOAT16
:
1108 assert(!"invalid src D3DSPR");
1111 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1113 switch (param
->mod
) {
1114 case NINED3DSPSM_DW
:
1115 tmp
= tx_scratch(tx
);
1116 /* NOTE: app is not allowed to read w with this modifier */
1117 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_3
), ureg_scalar(src
, TGSI_SWIZZLE_W
));
1118 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(W
,W
,W
,W
)));
1119 src
= ureg_src(tmp
);
1121 case NINED3DSPSM_DZ
:
1122 tmp
= tx_scratch(tx
);
1123 /* NOTE: app is not allowed to read z with this modifier */
1124 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_2
), ureg_scalar(src
, TGSI_SWIZZLE_Z
));
1125 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(Z
,Z
,Z
,Z
)));
1126 src
= ureg_src(tmp
);
1132 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
)
1133 src
= ureg_swizzle(src
,
1134 (param
->swizzle
>> 0) & 0x3,
1135 (param
->swizzle
>> 2) & 0x3,
1136 (param
->swizzle
>> 4) & 0x3,
1137 (param
->swizzle
>> 6) & 0x3);
1139 switch (param
->mod
) {
1140 case NINED3DSPSM_ABS
:
1141 src
= ureg_abs(src
);
1143 case NINED3DSPSM_ABSNEG
:
1144 src
= ureg_negate(ureg_abs(src
));
1146 case NINED3DSPSM_NEG
:
1147 src
= ureg_negate(src
);
1149 case NINED3DSPSM_BIAS
:
1150 tmp
= tx_scratch(tx
);
1151 ureg_ADD(ureg
, tmp
, src
, ureg_imm1f(ureg
, -0.5f
));
1152 src
= ureg_src(tmp
);
1154 case NINED3DSPSM_BIASNEG
:
1155 tmp
= tx_scratch(tx
);
1156 ureg_ADD(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ureg_negate(src
));
1157 src
= ureg_src(tmp
);
1159 case NINED3DSPSM_NOT
:
1160 if (tx
->native_integers
) {
1161 tmp
= tx_scratch(tx
);
1162 ureg_NOT(ureg
, tmp
, src
);
1163 src
= ureg_src(tmp
);
1167 case NINED3DSPSM_COMP
:
1168 tmp
= tx_scratch(tx
);
1169 ureg_ADD(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), ureg_negate(src
));
1170 src
= ureg_src(tmp
);
1172 case NINED3DSPSM_DZ
:
1173 case NINED3DSPSM_DW
:
1174 /* Already handled*/
1176 case NINED3DSPSM_SIGN
:
1177 tmp
= tx_scratch(tx
);
1178 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, 2.0f
), ureg_imm1f(ureg
, -1.0f
));
1179 src
= ureg_src(tmp
);
1181 case NINED3DSPSM_SIGNNEG
:
1182 tmp
= tx_scratch(tx
);
1183 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, -2.0f
), ureg_imm1f(ureg
, 1.0f
));
1184 src
= ureg_src(tmp
);
1186 case NINED3DSPSM_X2
:
1187 tmp
= tx_scratch(tx
);
1188 ureg_ADD(ureg
, tmp
, src
, src
);
1189 src
= ureg_src(tmp
);
1191 case NINED3DSPSM_X2NEG
:
1192 tmp
= tx_scratch(tx
);
1193 ureg_ADD(ureg
, tmp
, src
, src
);
1194 src
= ureg_negate(ureg_src(tmp
));
1197 assert(param
->mod
== NINED3DSPSM_NONE
);
1204 static struct ureg_dst
1205 _tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1207 struct ureg_dst dst
;
1209 switch (param
->file
)
1212 assert(!param
->rel
);
1213 tx_temp_alloc(tx
, param
->idx
);
1214 dst
= tx
->regs
.r
[param
->idx
];
1216 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1218 assert(!param
->rel
);
1219 if (tx
->version
.major
< 2 && !IS_VS
) {
1220 if (ureg_dst_is_undef(tx
->regs
.tS
[param
->idx
]))
1221 tx
->regs
.tS
[param
->idx
] = ureg_DECL_temporary(tx
->ureg
);
1222 dst
= tx
->regs
.tS
[param
->idx
];
1224 if (!IS_VS
&& tx
->insn
.opcode
== D3DSIO_TEXKILL
) { /* maybe others, too */
1225 tx_texcoord_alloc(tx
, param
->idx
);
1226 dst
= ureg_dst(tx
->regs
.vT
[param
->idx
]);
1228 tx_addr_alloc(tx
, param
->idx
);
1232 case D3DSPR_RASTOUT
:
1233 assert(!param
->rel
);
1234 switch (param
->idx
) {
1236 if (ureg_dst_is_undef(tx
->regs
.oPos
))
1238 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
1239 dst
= tx
->regs
.oPos
;
1242 if (ureg_dst_is_undef(tx
->regs
.oFog
))
1244 ureg_saturate(ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_FOG
, 0));
1245 dst
= tx
->regs
.oFog
;
1248 if (ureg_dst_is_undef(tx
->regs
.oPts
))
1249 tx
->regs
.oPts
= ureg_DECL_temporary(tx
->ureg
);
1250 dst
= tx
->regs
.oPts
;
1257 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1259 if (tx
->version
.major
< 3) {
1260 assert(!param
->rel
);
1261 dst
= ureg_DECL_output(tx
->ureg
, tx
->texcoord_sn
, param
->idx
);
1263 assert(!param
->rel
); /* TODO */
1264 assert(param
->idx
< ARRAY_SIZE(tx
->regs
.o
));
1265 dst
= tx
->regs
.o
[param
->idx
];
1268 case D3DSPR_ATTROUT
: /* VS */
1269 case D3DSPR_COLOROUT
: /* PS */
1270 assert(param
->idx
>= 0 && param
->idx
< 4);
1271 assert(!param
->rel
);
1272 tx
->info
->rt_mask
|= 1 << param
->idx
;
1273 if (ureg_dst_is_undef(tx
->regs
.oCol
[param
->idx
])) {
1274 /* ps < 3: oCol[0] will have fog blending afterward */
1275 if (!IS_VS
&& tx
->version
.major
< 3 && param
->idx
== 0) {
1276 tx
->regs
.oCol
[0] = ureg_DECL_temporary(tx
->ureg
);
1278 tx
->regs
.oCol
[param
->idx
] =
1279 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
);
1282 dst
= tx
->regs
.oCol
[param
->idx
];
1283 if (IS_VS
&& tx
->version
.major
< 3)
1284 dst
= ureg_saturate(dst
);
1286 case D3DSPR_DEPTHOUT
:
1287 assert(!param
->rel
);
1288 if (ureg_dst_is_undef(tx
->regs
.oDepth
))
1290 ureg_DECL_output_masked(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0,
1291 TGSI_WRITEMASK_Z
, 0, 1);
1292 dst
= tx
->regs
.oDepth
; /* XXX: must write .z component */
1294 case D3DSPR_PREDICATE
:
1295 assert(!"D3DSPR_PREDICATE");
1297 case D3DSPR_TEMPFLOAT16
:
1298 DBG("unhandled D3DSPR: %u\n", param
->file
);
1301 assert(!"invalid dst D3DSPR");
1305 dst
= ureg_dst_indirect(dst
, tx_src_param(tx
, param
->rel
));
1307 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
)
1308 dst
= ureg_writemask(dst
, param
->mask
);
1309 if (param
->mod
& NINED3DSPDM_SATURATE
)
1310 dst
= ureg_saturate(dst
);
1315 static struct ureg_dst
1316 tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1319 tx
->regs
.tdst
= ureg_writemask(tx_scratch(tx
), param
->mask
);
1320 return tx
->regs
.tdst
;
1322 return _tx_dst_param(tx
, param
);
1326 tx_apply_dst0_modifiers(struct shader_translator
*tx
)
1328 struct ureg_dst rdst
;
1331 if (!tx
->insn
.ndst
|| !tx
->insn
.dst
[0].shift
|| tx
->insn
.opcode
== D3DSIO_TEXKILL
)
1333 rdst
= _tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1335 assert(rdst
.File
!= TGSI_FILE_ADDRESS
); /* this probably isn't possible */
1337 if (tx
->insn
.dst
[0].shift
< 0)
1338 f
= 1.0f
/ (1 << -tx
->insn
.dst
[0].shift
);
1340 f
= 1 << tx
->insn
.dst
[0].shift
;
1342 ureg_MUL(tx
->ureg
, rdst
, ureg_src(tx
->regs
.tdst
), ureg_imm1f(tx
->ureg
, f
));
1345 static struct ureg_src
1346 tx_dst_param_as_src(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1348 struct ureg_src src
;
1350 assert(!param
->shift
);
1351 assert(!(param
->mod
& NINED3DSPDM_SATURATE
));
1353 switch (param
->file
) {
1356 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
1358 assert(!param
->rel
);
1359 assert(param
->idx
< ARRAY_SIZE(tx
->regs
.v
));
1360 src
= tx
->regs
.v
[param
->idx
];
1364 src
= ureg_src(tx_dst_param(tx
, param
));
1368 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1371 WARN("mask is 0, using identity swizzle\n");
1373 if (param
->mask
&& param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
1377 for (n
= 0, c
= 0; c
< 4; ++c
)
1378 if (param
->mask
& (1 << c
))
1381 for (c
= n
; c
< 4; ++c
)
1383 src
= ureg_swizzle(src
, s
[0], s
[1], s
[2], s
[3]);
1389 NineTranslateInstruction_Mkxn(struct shader_translator
*tx
, const unsigned k
, const unsigned n
)
1391 struct ureg_program
*ureg
= tx
->ureg
;
1392 struct ureg_dst dst
;
1393 struct ureg_src src
[2];
1394 struct sm1_src_param
*src_mat
= &tx
->insn
.src
[1];
1397 dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1398 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1400 for (i
= 0; i
< n
; i
++)
1402 const unsigned m
= (1 << i
);
1404 src
[1] = tx_src_param(tx
, src_mat
);
1407 if (!(dst
.WriteMask
& m
))
1410 /* XXX: src == dst case ? */
1414 ureg_DP3(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1417 ureg_DP4(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1420 DBG("invalid operation: M%ux%u\n", m
, n
);
1428 #define VNOTSUPPORTED 0, 0
1429 #define V(maj, min) (((maj) << 8) | (min))
1431 static inline const char *
1432 d3dsio_to_string( unsigned opcode
)
1434 static const char *names
[] = {
1534 if (opcode
< ARRAY_SIZE(names
)) return names
[opcode
];
1537 case D3DSIO_PHASE
: return "PHASE";
1538 case D3DSIO_COMMENT
: return "COMMENT";
1539 case D3DSIO_END
: return "END";
1545 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1546 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1547 (inst).vert_version.max | \
1548 (inst).frag_version.min | \
1549 (inst).frag_version.max)
1551 #define SPECIAL(name) \
1552 NineTranslateInstruction_##name
1554 #define DECL_SPECIAL(name) \
1556 NineTranslateInstruction_##name( struct shader_translator *tx )
1559 NineTranslateInstruction_Generic(struct shader_translator
*);
1563 /* Nothing to do. NOP was used to avoid hangs
1564 * with very old d3d drivers. */
1570 struct ureg_program
*ureg
= tx
->ureg
;
1571 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1572 struct ureg_src src0
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1573 struct ureg_src src1
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1575 ureg_ADD(ureg
, dst
, src0
, ureg_negate(src1
));
1581 struct ureg_program
*ureg
= tx
->ureg
;
1582 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1583 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1585 ureg_MOV(ureg
, dst
, ureg_abs(src
));
1591 struct ureg_program
*ureg
= tx
->ureg
;
1592 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1593 struct ureg_src src0
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1594 struct ureg_src src1
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1596 ureg_MUL(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYZ
),
1597 ureg_swizzle(src0
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Z
,
1599 ureg_swizzle(src1
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_X
,
1600 TGSI_SWIZZLE_Y
, 0));
1601 ureg_MAD(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYZ
),
1602 ureg_swizzle(src0
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_X
,
1604 ureg_negate(ureg_swizzle(src1
, TGSI_SWIZZLE_Y
,
1605 TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_X
, 0)),
1607 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
),
1608 ureg_imm1f(ureg
, 1));
1614 return NineTranslateInstruction_Mkxn(tx
, 4, 4);
1619 return NineTranslateInstruction_Mkxn(tx
, 4, 3);
1624 return NineTranslateInstruction_Mkxn(tx
, 3, 4);
1629 return NineTranslateInstruction_Mkxn(tx
, 3, 3);
1634 return NineTranslateInstruction_Mkxn(tx
, 3, 2);
1639 ureg_CMP(tx
->ureg
, tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1640 tx_src_param(tx
, &tx
->insn
.src
[0]),
1641 tx_src_param(tx
, &tx
->insn
.src
[2]),
1642 tx_src_param(tx
, &tx
->insn
.src
[1]));
1648 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1649 struct ureg_dst cgt
;
1650 struct ureg_src cnd
;
1652 /* the coissue flag was a tip for compilers to advise to
1653 * execute two operations at the same time, in cases
1654 * the two executions had same dst with different channels.
1655 * It has no effect on current hw. However it seems CND
1656 * is affected. The handling of this very specific case
1657 * handled below mimick wine behaviour */
1658 if (tx
->insn
.coissue
&& tx
->version
.major
== 1 && tx
->version
.minor
< 4 && tx
->insn
.dst
[0].mask
!= NINED3DSP_WRITEMASK_3
) {
1660 dst
, tx_src_param(tx
, &tx
->insn
.src
[1]));
1664 cnd
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1665 cgt
= tx_scratch(tx
);
1667 if (tx
->version
.major
== 1 && tx
->version
.minor
< 4)
1668 cnd
= ureg_scalar(cnd
, TGSI_SWIZZLE_W
);
1670 ureg_SGT(tx
->ureg
, cgt
, cnd
, ureg_imm1f(tx
->ureg
, 0.5f
));
1672 ureg_CMP(tx
->ureg
, dst
, ureg_negate(ureg_src(cgt
)),
1673 tx_src_param(tx
, &tx
->insn
.src
[1]),
1674 tx_src_param(tx
, &tx
->insn
.src
[2]));
1680 assert(tx
->insn
.src
[0].idx
< tx
->num_inst_labels
);
1681 ureg_CAL(tx
->ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1685 DECL_SPECIAL(CALLNZ
)
1687 struct ureg_program
*ureg
= tx
->ureg
;
1688 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1690 if (!tx
->native_integers
)
1691 ureg_IF(ureg
, src
, tx_cond(tx
));
1693 ureg_UIF(ureg
, src
, tx_cond(tx
));
1694 ureg_CAL(ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1702 struct ureg_program
*ureg
= tx
->ureg
;
1704 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1705 struct ureg_dst ctr
;
1706 struct ureg_dst tmp
;
1707 struct ureg_src ctrx
;
1709 label
= tx_bgnloop(tx
);
1710 ctr
= tx_get_loopctr(tx
, TRUE
);
1711 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1713 /* src: num_iterations - start_value of al - step for al - 0 */
1714 ureg_MOV(ureg
, ctr
, src
);
1715 ureg_BGNLOOP(tx
->ureg
, label
);
1716 tmp
= tx_scratch_scalar(tx
);
1717 /* Initially ctr.x contains the number of iterations.
1718 * ctr.y will contain the updated value of al.
1719 * We decrease ctr.x at the end of every iteration,
1720 * and stop when it reaches 0. */
1722 if (!tx
->native_integers
) {
1723 /* case src and ctr contain floats */
1724 /* to avoid precision issue, we stop when ctr <= 0.5 */
1725 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1726 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1728 /* case src and ctr contain integers */
1729 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1730 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1744 DECL_SPECIAL(ENDLOOP
)
1746 struct ureg_program
*ureg
= tx
->ureg
;
1747 struct ureg_dst ctr
= tx_get_loopctr(tx
, TRUE
);
1748 struct ureg_dst dst_ctrx
, dst_al
;
1749 struct ureg_src src_ctr
, al_counter
;
1751 dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1752 dst_al
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_1
);
1753 src_ctr
= ureg_src(ctr
);
1754 al_counter
= ureg_scalar(src_ctr
, TGSI_SWIZZLE_Z
);
1757 * ctr.y (aL) += step */
1758 if (!tx
->native_integers
) {
1759 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1760 ureg_ADD(ureg
, dst_al
, src_ctr
, al_counter
);
1762 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1763 ureg_UADD(ureg
, dst_al
, src_ctr
, al_counter
);
1765 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1771 unsigned k
= tx
->num_inst_labels
;
1772 unsigned n
= tx
->insn
.src
[0].idx
;
1775 tx
->inst_labels
= REALLOC(tx
->inst_labels
,
1776 k
* sizeof(tx
->inst_labels
[0]),
1777 n
* sizeof(tx
->inst_labels
[0]));
1779 tx
->inst_labels
[n
] = ureg_get_instruction_number(tx
->ureg
);
1783 DECL_SPECIAL(SINCOS
)
1785 struct ureg_program
*ureg
= tx
->ureg
;
1786 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1787 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1789 assert(!(dst
.WriteMask
& 0xc));
1791 /* z undefined, w untouched */
1792 ureg_COS(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
),
1793 ureg_scalar(src
, TGSI_SWIZZLE_X
));
1794 ureg_SIN(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
),
1795 ureg_scalar(src
, TGSI_SWIZZLE_X
));
1802 tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1803 tx_src_param(tx
, &tx
->insn
.src
[0]));
1809 struct ureg_program
*ureg
= tx
->ureg
;
1811 struct ureg_src rep
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1812 struct ureg_dst ctr
;
1813 struct ureg_dst tmp
;
1814 struct ureg_src ctrx
;
1816 label
= tx_bgnloop(tx
);
1817 ctr
= ureg_writemask(tx_get_loopctr(tx
, FALSE
), NINED3DSP_WRITEMASK_0
);
1818 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1820 /* NOTE: rep must be constant, so we don't have to save the count */
1821 assert(rep
.File
== TGSI_FILE_CONSTANT
|| rep
.File
== TGSI_FILE_IMMEDIATE
);
1823 /* rep: num_iterations - 0 - 0 - 0 */
1824 ureg_MOV(ureg
, ctr
, rep
);
1825 ureg_BGNLOOP(ureg
, label
);
1826 tmp
= tx_scratch_scalar(tx
);
1827 /* Initially ctr.x contains the number of iterations.
1828 * We decrease ctr.x at the end of every iteration,
1829 * and stop when it reaches 0. */
1831 if (!tx
->native_integers
) {
1832 /* case src and ctr contain floats */
1833 /* to avoid precision issue, we stop when ctr <= 0.5 */
1834 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1835 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1837 /* case src and ctr contain integers */
1838 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1839 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1848 DECL_SPECIAL(ENDREP
)
1850 struct ureg_program
*ureg
= tx
->ureg
;
1851 struct ureg_dst ctr
= tx_get_loopctr(tx
, FALSE
);
1852 struct ureg_dst dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1853 struct ureg_src src_ctr
= ureg_src(ctr
);
1856 if (!tx
->native_integers
)
1857 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1859 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1861 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1868 ureg_ENDIF(tx
->ureg
);
1874 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1876 if (tx
->native_integers
&& tx
->insn
.src
[0].file
== D3DSPR_CONSTBOOL
)
1877 ureg_UIF(tx
->ureg
, src
, tx_cond(tx
));
1879 ureg_IF(tx
->ureg
, src
, tx_cond(tx
));
1884 static inline unsigned
1885 sm1_insn_flags_to_tgsi_setop(BYTE flags
)
1888 case NINED3DSHADER_REL_OP_GT
: return TGSI_OPCODE_SGT
;
1889 case NINED3DSHADER_REL_OP_EQ
: return TGSI_OPCODE_SEQ
;
1890 case NINED3DSHADER_REL_OP_GE
: return TGSI_OPCODE_SGE
;
1891 case NINED3DSHADER_REL_OP_LT
: return TGSI_OPCODE_SLT
;
1892 case NINED3DSHADER_REL_OP_NE
: return TGSI_OPCODE_SNE
;
1893 case NINED3DSHADER_REL_OP_LE
: return TGSI_OPCODE_SLE
;
1895 assert(!"invalid comparison flags");
1896 return TGSI_OPCODE_SGT
;
1902 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1903 struct ureg_src src
[2];
1904 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1905 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1906 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1907 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2, 0);
1908 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1914 ureg_ELSE(tx
->ureg
, tx_elsecond(tx
));
1918 DECL_SPECIAL(BREAKC
)
1920 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1921 struct ureg_src src
[2];
1922 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1923 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1924 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1925 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2, 0);
1926 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1929 ureg_ENDIF(tx
->ureg
);
1933 static const char *sm1_declusage_names
[] =
1935 [D3DDECLUSAGE_POSITION
] = "POSITION",
1936 [D3DDECLUSAGE_BLENDWEIGHT
] = "BLENDWEIGHT",
1937 [D3DDECLUSAGE_BLENDINDICES
] = "BLENDINDICES",
1938 [D3DDECLUSAGE_NORMAL
] = "NORMAL",
1939 [D3DDECLUSAGE_PSIZE
] = "PSIZE",
1940 [D3DDECLUSAGE_TEXCOORD
] = "TEXCOORD",
1941 [D3DDECLUSAGE_TANGENT
] = "TANGENT",
1942 [D3DDECLUSAGE_BINORMAL
] = "BINORMAL",
1943 [D3DDECLUSAGE_TESSFACTOR
] = "TESSFACTOR",
1944 [D3DDECLUSAGE_POSITIONT
] = "POSITIONT",
1945 [D3DDECLUSAGE_COLOR
] = "COLOR",
1946 [D3DDECLUSAGE_FOG
] = "FOG",
1947 [D3DDECLUSAGE_DEPTH
] = "DEPTH",
1948 [D3DDECLUSAGE_SAMPLE
] = "SAMPLE"
1951 static inline unsigned
1952 sm1_to_nine_declusage(struct sm1_semantic
*dcl
)
1954 return nine_d3d9_to_nine_declusage(dcl
->usage
, dcl
->usage_idx
);
1958 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic
*sem
,
1960 struct sm1_semantic
*dcl
)
1962 BYTE index
= dcl
->usage_idx
;
1964 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1965 * we match to a TGSI_SEMANTIC_GENERIC with index.
1967 * The index can be anything UINT16 and usage_idx is BYTE,
1968 * so we can fit everything. It doesn't matter if indices
1969 * are close together or low.
1972 * POSITION >= 1: 10 * index + 6
1973 * COLOR >= 2: 10 * (index-1) + 7
1974 * TEXCOORD[0..15]: index
1975 * BLENDWEIGHT: 10 * index + 18
1976 * BLENDINDICES: 10 * index + 19
1977 * NORMAL: 10 * index + 20
1978 * TANGENT: 10 * index + 21
1979 * BINORMAL: 10 * index + 22
1980 * TESSFACTOR: 10 * index + 23
1983 switch (dcl
->usage
) {
1984 case D3DDECLUSAGE_POSITION
:
1985 case D3DDECLUSAGE_POSITIONT
:
1986 case D3DDECLUSAGE_DEPTH
:
1988 sem
->Name
= TGSI_SEMANTIC_POSITION
;
1991 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1992 sem
->Index
= 10 * index
+ 6;
1995 case D3DDECLUSAGE_COLOR
:
1997 sem
->Name
= TGSI_SEMANTIC_COLOR
;
2000 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2001 sem
->Index
= 10 * (index
-1) + 7;
2004 case D3DDECLUSAGE_FOG
:
2006 sem
->Name
= TGSI_SEMANTIC_FOG
;
2009 case D3DDECLUSAGE_PSIZE
:
2011 sem
->Name
= TGSI_SEMANTIC_PSIZE
;
2014 case D3DDECLUSAGE_TEXCOORD
:
2016 if (index
< 8 && tc
)
2017 sem
->Name
= TGSI_SEMANTIC_TEXCOORD
;
2019 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2022 case D3DDECLUSAGE_BLENDWEIGHT
:
2023 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2024 sem
->Index
= 10 * index
+ 18;
2026 case D3DDECLUSAGE_BLENDINDICES
:
2027 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2028 sem
->Index
= 10 * index
+ 19;
2030 case D3DDECLUSAGE_NORMAL
:
2031 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2032 sem
->Index
= 10 * index
+ 20;
2034 case D3DDECLUSAGE_TANGENT
:
2035 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2036 sem
->Index
= 10 * index
+ 21;
2038 case D3DDECLUSAGE_BINORMAL
:
2039 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2040 sem
->Index
= 10 * index
+ 22;
2042 case D3DDECLUSAGE_TESSFACTOR
:
2043 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
2044 sem
->Index
= 10 * index
+ 23;
2046 case D3DDECLUSAGE_SAMPLE
:
2047 sem
->Name
= TGSI_SEMANTIC_COUNT
;
2051 unreachable("Invalid DECLUSAGE.");
2056 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2057 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2058 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2059 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2060 static inline unsigned
2061 d3dstt_to_tgsi_tex(BYTE sampler_type
)
2063 switch (sampler_type
) {
2064 case NINED3DSTT_1D
: return TGSI_TEXTURE_1D
;
2065 case NINED3DSTT_2D
: return TGSI_TEXTURE_2D
;
2066 case NINED3DSTT_VOLUME
: return TGSI_TEXTURE_3D
;
2067 case NINED3DSTT_CUBE
: return TGSI_TEXTURE_CUBE
;
2070 return TGSI_TEXTURE_UNKNOWN
;
2073 static inline unsigned
2074 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type
)
2076 switch (sampler_type
) {
2077 case NINED3DSTT_1D
: return TGSI_TEXTURE_SHADOW1D
;
2078 case NINED3DSTT_2D
: return TGSI_TEXTURE_SHADOW2D
;
2079 case NINED3DSTT_VOLUME
:
2080 case NINED3DSTT_CUBE
:
2083 return TGSI_TEXTURE_UNKNOWN
;
2086 static inline unsigned
2087 ps1x_sampler_type(const struct nine_shader_info
*info
, unsigned stage
)
2089 switch ((info
->sampler_ps1xtypes
>> (stage
* 2)) & 0x3) {
2090 case 1: return TGSI_TEXTURE_1D
;
2091 case 0: return TGSI_TEXTURE_2D
;
2092 case 3: return TGSI_TEXTURE_3D
;
2094 return TGSI_TEXTURE_CUBE
;
2099 sm1_sampler_type_name(BYTE sampler_type
)
2101 switch (sampler_type
) {
2102 case NINED3DSTT_1D
: return "1D";
2103 case NINED3DSTT_2D
: return "2D";
2104 case NINED3DSTT_VOLUME
: return "VOLUME";
2105 case NINED3DSTT_CUBE
: return "CUBE";
2107 return "(D3DSTT_?)";
2111 static inline unsigned
2112 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic
*sem
)
2114 switch (sem
->Name
) {
2115 case TGSI_SEMANTIC_POSITION
:
2116 case TGSI_SEMANTIC_NORMAL
:
2117 return TGSI_INTERPOLATE_LINEAR
;
2118 case TGSI_SEMANTIC_BCOLOR
:
2119 case TGSI_SEMANTIC_COLOR
:
2120 return TGSI_INTERPOLATE_COLOR
;
2121 case TGSI_SEMANTIC_FOG
:
2122 case TGSI_SEMANTIC_GENERIC
:
2123 case TGSI_SEMANTIC_TEXCOORD
:
2124 case TGSI_SEMANTIC_CLIPDIST
:
2125 case TGSI_SEMANTIC_CLIPVERTEX
:
2126 return TGSI_INTERPOLATE_PERSPECTIVE
;
2127 case TGSI_SEMANTIC_EDGEFLAG
:
2128 case TGSI_SEMANTIC_FACE
:
2129 case TGSI_SEMANTIC_INSTANCEID
:
2130 case TGSI_SEMANTIC_PCOORD
:
2131 case TGSI_SEMANTIC_PRIMID
:
2132 case TGSI_SEMANTIC_PSIZE
:
2133 case TGSI_SEMANTIC_VERTEXID
:
2134 return TGSI_INTERPOLATE_CONSTANT
;
2137 return TGSI_INTERPOLATE_CONSTANT
;
2143 struct ureg_program
*ureg
= tx
->ureg
;
2146 struct tgsi_declaration_semantic tgsi
;
2147 struct sm1_semantic sem
;
2148 sm1_read_semantic(tx
, &sem
);
2150 is_input
= sem
.reg
.file
== D3DSPR_INPUT
;
2152 sem
.usage
== D3DDECLUSAGE_SAMPLE
|| sem
.reg
.file
== D3DSPR_SAMPLER
;
2155 sm1_dump_dst_param(&sem
.reg
);
2157 DUMP(" %s\n", sm1_sampler_type_name(sem
.sampler_type
));
2159 if (tx
->version
.major
>= 3)
2160 DUMP(" %s%i\n", sm1_declusage_names
[sem
.usage
], sem
.usage_idx
);
2162 if (sem
.usage
| sem
.usage_idx
)
2163 DUMP(" %u[%u]\n", sem
.usage
, sem
.usage_idx
);
2168 const unsigned m
= 1 << sem
.reg
.idx
;
2169 ureg_DECL_sampler(ureg
, sem
.reg
.idx
);
2170 tx
->info
->sampler_mask
|= m
;
2171 tx
->sampler_targets
[sem
.reg
.idx
] = (tx
->info
->sampler_mask_shadow
& m
) ?
2172 d3dstt_to_tgsi_tex_shadow(sem
.sampler_type
) :
2173 d3dstt_to_tgsi_tex(sem
.sampler_type
);
2177 sm1_declusage_to_tgsi(&tgsi
, tx
->want_texcoord
, &sem
);
2180 /* linkage outside of shader with vertex declaration */
2181 ureg_DECL_vs_input(ureg
, sem
.reg
.idx
);
2182 assert(sem
.reg
.idx
< ARRAY_SIZE(tx
->info
->input_map
));
2183 tx
->info
->input_map
[sem
.reg
.idx
] = sm1_to_nine_declusage(&sem
);
2184 tx
->info
->num_inputs
= MAX2(tx
->info
->num_inputs
, sem
.reg
.idx
+ 1);
2185 /* NOTE: preserving order in case of indirect access */
2187 if (tx
->version
.major
>= 3) {
2188 /* SM2 output semantic determined by file */
2189 assert(sem
.reg
.mask
!= 0);
2190 if (sem
.usage
== D3DDECLUSAGE_POSITIONT
)
2191 tx
->info
->position_t
= TRUE
;
2192 assert(sem
.reg
.idx
< ARRAY_SIZE(tx
->regs
.o
));
2193 assert(ureg_dst_is_undef(tx
->regs
.o
[sem
.reg
.idx
]) && "Nine doesn't support yet packing");
2194 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_output_masked(
2195 ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
, 0, 1);
2196 nine_record_outputs(tx
, sem
.usage
, sem
.usage_idx
, sem
.reg
.mask
, sem
.reg
.idx
);
2197 if (tx
->info
->process_vertices
&& sem
.usage
== D3DDECLUSAGE_POSITION
&& sem
.usage_idx
== 0) {
2198 tx
->regs
.oPos_out
= tx
->regs
.o
[sem
.reg
.idx
];
2199 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_temporary(ureg
);
2200 tx
->regs
.oPos
= tx
->regs
.o
[sem
.reg
.idx
];
2203 if (tgsi
.Name
== TGSI_SEMANTIC_PSIZE
) {
2204 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_temporary(ureg
);
2205 tx
->regs
.oPts
= tx
->regs
.o
[sem
.reg
.idx
];
2209 if (is_input
&& tx
->version
.major
>= 3) {
2210 unsigned interp_location
= 0;
2211 /* SM3 only, SM2 input semantic determined by file */
2212 assert(sem
.reg
.idx
< ARRAY_SIZE(tx
->regs
.v
));
2213 assert(ureg_src_is_undef(tx
->regs
.v
[sem
.reg
.idx
]) && "Nine doesn't support yet packing");
2214 /* PositionT and tessfactor forbidden */
2215 if (sem
.usage
== D3DDECLUSAGE_POSITIONT
|| sem
.usage
== D3DDECLUSAGE_TESSFACTOR
)
2216 return D3DERR_INVALIDCALL
;
2218 if (tgsi
.Name
== TGSI_SEMANTIC_POSITION
) {
2219 /* Position0 is forbidden (likely because vPos already does that) */
2220 if (sem
.usage
== D3DDECLUSAGE_POSITION
)
2221 return D3DERR_INVALIDCALL
;
2222 /* Following code is for depth */
2223 tx
->regs
.v
[sem
.reg
.idx
] = nine_get_position_input(tx
);
2227 if (sem
.reg
.mod
& NINED3DSPDM_CENTROID
||
2228 (tgsi
.Name
== TGSI_SEMANTIC_COLOR
&& tx
->info
->force_color_in_centroid
))
2229 interp_location
= TGSI_INTERPOLATE_LOC_CENTROID
;
2231 tx
->regs
.v
[sem
.reg
.idx
] = ureg_DECL_fs_input_cyl_centroid(
2232 ureg
, tgsi
.Name
, tgsi
.Index
,
2233 nine_tgsi_to_interp_mode(&tgsi
),
2235 interp_location
, 0, 1);
2237 if (!is_input
&& 0) { /* declare in COLOROUT/DEPTHOUT case */
2238 /* FragColor or FragDepth */
2239 assert(sem
.reg
.mask
!= 0);
2240 ureg_DECL_output_masked(ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
,
2249 tx_set_lconstf(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.f
);
2255 tx_set_lconstb(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.b
);
2261 tx_set_lconsti(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.i
);
2267 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2268 struct ureg_src src
[2] = {
2269 tx_src_param(tx
, &tx
->insn
.src
[0]),
2270 tx_src_param(tx
, &tx
->insn
.src
[1])
2272 ureg_POW(tx
->ureg
, dst
, ureg_abs(src
[0]), src
[1]);
2278 struct ureg_program
*ureg
= tx
->ureg
;
2279 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2280 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2281 struct ureg_dst tmp
= tx_scratch(tx
);
2282 ureg_RSQ(ureg
, tmp
, ureg_abs(src
));
2283 ureg_MIN(ureg
, dst
, ureg_imm1f(ureg
, FLT_MAX
), ureg_src(tmp
));
2289 struct ureg_program
*ureg
= tx
->ureg
;
2290 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2291 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2292 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2293 ureg_LG2(ureg
, tmp
, ureg_abs(src
));
2294 ureg_MAX(ureg
, dst
, ureg_imm1f(ureg
, -FLT_MAX
), tx_src_scalar(tmp
));
2300 struct ureg_program
*ureg
= tx
->ureg
;
2301 struct ureg_dst tmp
= tx_scratch(tx
);
2302 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2303 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2304 ureg_LIT(ureg
, tmp
, src
);
2305 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2306 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2307 * it 0^0 if src.w=0, which value is driver dependent. */
2308 ureg_CMP(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
),
2309 ureg_negate(ureg_scalar(src
, TGSI_SWIZZLE_Y
)),
2310 ureg_src(tmp
), ureg_imm1f(ureg
, 0.0f
));
2311 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYW
), ureg_src(tmp
));
2317 struct ureg_program
*ureg
= tx
->ureg
;
2318 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2319 struct ureg_src nrm
= tx_src_scalar(tmp
);
2320 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2321 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2322 ureg_DP3(ureg
, tmp
, src
, src
);
2323 ureg_RSQ(ureg
, tmp
, nrm
);
2324 ureg_MIN(ureg
, tmp
, ureg_imm1f(ureg
, FLT_MAX
), nrm
);
2325 ureg_MUL(ureg
, dst
, src
, nrm
);
2329 DECL_SPECIAL(DP2ADD
)
2331 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2332 struct ureg_src dp2
= tx_src_scalar(tmp
);
2333 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2334 struct ureg_src src
[3];
2336 for (i
= 0; i
< 3; ++i
)
2337 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
2338 assert_replicate_swizzle(&src
[2]);
2340 ureg_DP2(tx
->ureg
, tmp
, src
[0], src
[1]);
2341 ureg_ADD(tx
->ureg
, dst
, src
[2], dp2
);
2346 DECL_SPECIAL(TEXCOORD
)
2348 struct ureg_program
*ureg
= tx
->ureg
;
2349 const unsigned s
= tx
->insn
.dst
[0].idx
;
2350 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2352 tx_texcoord_alloc(tx
, s
);
2353 ureg_MOV(ureg
, ureg_writemask(ureg_saturate(dst
), TGSI_WRITEMASK_XYZ
), tx
->regs
.vT
[s
]);
2354 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(tx
->ureg
, 1.0f
));
2359 DECL_SPECIAL(TEXCOORD_ps14
)
2361 struct ureg_program
*ureg
= tx
->ureg
;
2362 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2363 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2365 assert(tx
->insn
.src
[0].file
== D3DSPR_TEXTURE
);
2367 ureg_MOV(ureg
, dst
, src
);
2372 DECL_SPECIAL(TEXKILL
)
2374 struct ureg_src reg
;
2376 if (tx
->version
.major
> 1 || tx
->version
.minor
> 3) {
2377 reg
= tx_dst_param_as_src(tx
, &tx
->insn
.dst
[0]);
2379 tx_texcoord_alloc(tx
, tx
->insn
.dst
[0].idx
);
2380 reg
= tx
->regs
.vT
[tx
->insn
.dst
[0].idx
];
2382 if (tx
->version
.major
< 2)
2383 reg
= ureg_swizzle(reg
, NINE_SWIZZLE4(X
,Y
,Z
,Z
));
2384 ureg_KILL_IF(tx
->ureg
, reg
);
2389 DECL_SPECIAL(TEXBEM
)
2391 struct ureg_program
*ureg
= tx
->ureg
;
2392 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2393 struct ureg_dst tmp
, tmp2
, texcoord
;
2394 struct ureg_src sample
, m00
, m01
, m10
, m11
;
2395 struct ureg_src bumpenvlscale
, bumpenvloffset
;
2396 const int m
= tx
->insn
.dst
[0].idx
;
2397 const int n
= tx
->insn
.src
[0].idx
;
2399 assert(tx
->version
.major
== 1);
2401 sample
= ureg_DECL_sampler(ureg
, m
);
2402 tx
->info
->sampler_mask
|= 1 << m
;
2404 tx_texcoord_alloc(tx
, m
);
2406 tmp
= tx_scratch(tx
);
2407 tmp2
= tx_scratch(tx
);
2408 texcoord
= tx_scratch(tx
);
2416 nine_info_mark_const_f_used(tx
->info
, 8 + 8 + m
/2);
2417 m00
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, X
);
2418 m01
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Y
);
2419 m10
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Z
);
2420 m11
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, W
);
2422 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2424 bumpenvlscale
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, X
);
2425 bumpenvloffset
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, Y
);
2427 bumpenvlscale
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, Z
);
2428 bumpenvloffset
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, W
);
2431 apply_ps1x_projection(tx
, texcoord
, tx
->regs
.vT
[m
], m
);
2433 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2434 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m00
,
2435 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), X
), ureg_src(texcoord
));
2436 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2437 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m10
,
2438 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), Y
),
2439 NINE_APPLY_SWIZZLE(ureg_src(tmp
), X
));
2441 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2442 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m01
,
2443 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), X
), ureg_src(texcoord
));
2444 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2445 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m11
,
2446 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), Y
),
2447 NINE_APPLY_SWIZZLE(ureg_src(tmp
), Y
));
2449 /* Now the texture coordinates are in tmp.xy */
2451 if (tx
->insn
.opcode
== D3DSIO_TEXBEM
) {
2452 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2453 } else if (tx
->insn
.opcode
== D3DSIO_TEXBEML
) {
2454 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2455 ureg_TEX(ureg
, tmp
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2456 ureg_MAD(ureg
, tmp2
, NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), Z
),
2457 bumpenvlscale
, bumpenvloffset
);
2458 ureg_MUL(ureg
, dst
, ureg_src(tmp
), ureg_src(tmp2
));
2461 tx
->info
->bumpenvmat_needed
= 1;
2466 DECL_SPECIAL(TEXREG2AR
)
2468 struct ureg_program
*ureg
= tx
->ureg
;
2469 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2470 struct ureg_src sample
;
2471 const int m
= tx
->insn
.dst
[0].idx
;
2472 const int n
= tx
->insn
.src
[0].idx
;
2473 assert(m
>= 0 && m
> n
);
2475 sample
= ureg_DECL_sampler(ureg
, m
);
2476 tx
->info
->sampler_mask
|= 1 << m
;
2477 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(ureg_src(tx
->regs
.tS
[n
]), NINE_SWIZZLE4(W
,X
,X
,X
)), sample
);
2482 DECL_SPECIAL(TEXREG2GB
)
2484 struct ureg_program
*ureg
= tx
->ureg
;
2485 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2486 struct ureg_src sample
;
2487 const int m
= tx
->insn
.dst
[0].idx
;
2488 const int n
= tx
->insn
.src
[0].idx
;
2489 assert(m
>= 0 && m
> n
);
2491 sample
= ureg_DECL_sampler(ureg
, m
);
2492 tx
->info
->sampler_mask
|= 1 << m
;
2493 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(ureg_src(tx
->regs
.tS
[n
]), NINE_SWIZZLE4(Y
,Z
,Z
,Z
)), sample
);
2498 DECL_SPECIAL(TEXM3x2PAD
)
2500 return D3D_OK
; /* this is just padding */
2503 DECL_SPECIAL(TEXM3x2TEX
)
2505 struct ureg_program
*ureg
= tx
->ureg
;
2506 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2507 struct ureg_src sample
;
2508 const int m
= tx
->insn
.dst
[0].idx
- 1;
2509 const int n
= tx
->insn
.src
[0].idx
;
2510 assert(m
>= 0 && m
> n
);
2512 tx_texcoord_alloc(tx
, m
);
2513 tx_texcoord_alloc(tx
, m
+1);
2515 /* performs the matrix multiplication */
2516 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2517 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2519 sample
= ureg_DECL_sampler(ureg
, m
+ 1);
2520 tx
->info
->sampler_mask
|= 1 << (m
+ 1);
2521 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 1), ureg_src(dst
), sample
);
2526 DECL_SPECIAL(TEXM3x3PAD
)
2528 return D3D_OK
; /* this is just padding */
2531 DECL_SPECIAL(TEXM3x3SPEC
)
2533 struct ureg_program
*ureg
= tx
->ureg
;
2534 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2535 struct ureg_src E
= tx_src_param(tx
, &tx
->insn
.src
[1]);
2536 struct ureg_src sample
;
2537 struct ureg_dst tmp
;
2538 const int m
= tx
->insn
.dst
[0].idx
- 2;
2539 const int n
= tx
->insn
.src
[0].idx
;
2540 assert(m
>= 0 && m
> n
);
2542 tx_texcoord_alloc(tx
, m
);
2543 tx_texcoord_alloc(tx
, m
+1);
2544 tx_texcoord_alloc(tx
, m
+2);
2546 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2547 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2548 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], ureg_src(tx
->regs
.tS
[n
]));
2550 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2551 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2552 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2554 /* At this step, dst = N = (u', w', z').
2555 * We want dst to be the texture sampled at (u'', w'', z''), with
2556 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2557 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2558 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2559 /* at this step tmp.x = 1/N.N */
2560 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), E
);
2561 /* at this step tmp.y = N.E */
2562 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2563 /* at this step tmp.x = N.E/N.N */
2564 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2565 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2566 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2567 ureg_ADD(ureg
, tmp
, ureg_src(tmp
), ureg_negate(E
));
2568 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2573 DECL_SPECIAL(TEXREG2RGB
)
2575 struct ureg_program
*ureg
= tx
->ureg
;
2576 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2577 struct ureg_src sample
;
2578 const int m
= tx
->insn
.dst
[0].idx
;
2579 const int n
= tx
->insn
.src
[0].idx
;
2580 assert(m
>= 0 && m
> n
);
2582 sample
= ureg_DECL_sampler(ureg
, m
);
2583 tx
->info
->sampler_mask
|= 1 << m
;
2584 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tx
->regs
.tS
[n
]), sample
);
2589 DECL_SPECIAL(TEXDP3TEX
)
2591 struct ureg_program
*ureg
= tx
->ureg
;
2592 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2593 struct ureg_dst tmp
;
2594 struct ureg_src sample
;
2595 const int m
= tx
->insn
.dst
[0].idx
;
2596 const int n
= tx
->insn
.src
[0].idx
;
2597 assert(m
>= 0 && m
> n
);
2599 tx_texcoord_alloc(tx
, m
);
2601 tmp
= tx_scratch(tx
);
2602 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2603 ureg_MOV(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_YZ
), ureg_imm1f(ureg
, 0.0f
));
2605 sample
= ureg_DECL_sampler(ureg
, m
);
2606 tx
->info
->sampler_mask
|= 1 << m
;
2607 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2612 DECL_SPECIAL(TEXM3x2DEPTH
)
2614 struct ureg_program
*ureg
= tx
->ureg
;
2615 struct ureg_dst tmp
;
2616 const int m
= tx
->insn
.dst
[0].idx
- 1;
2617 const int n
= tx
->insn
.src
[0].idx
;
2618 assert(m
>= 0 && m
> n
);
2620 tx_texcoord_alloc(tx
, m
);
2621 tx_texcoord_alloc(tx
, m
+1);
2623 tmp
= tx_scratch(tx
);
2625 /* performs the matrix multiplication */
2626 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2627 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2629 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Z
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2630 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2631 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Z
));
2632 /* res = 'w' == 0 ? 1.0 : z/w */
2633 ureg_CMP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
))),
2634 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 1.0f
));
2635 /* replace the depth for depth testing with the result */
2636 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2637 TGSI_WRITEMASK_Z
, 0, 1);
2638 ureg_MOV(ureg
, tx
->regs
.oDepth
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2639 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2643 DECL_SPECIAL(TEXDP3
)
2645 struct ureg_program
*ureg
= tx
->ureg
;
2646 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2647 const int m
= tx
->insn
.dst
[0].idx
;
2648 const int n
= tx
->insn
.src
[0].idx
;
2649 assert(m
>= 0 && m
> n
);
2651 tx_texcoord_alloc(tx
, m
);
2653 ureg_DP3(ureg
, dst
, tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2658 DECL_SPECIAL(TEXM3x3
)
2660 struct ureg_program
*ureg
= tx
->ureg
;
2661 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2662 struct ureg_src sample
;
2663 struct ureg_dst E
, tmp
;
2664 const int m
= tx
->insn
.dst
[0].idx
- 2;
2665 const int n
= tx
->insn
.src
[0].idx
;
2666 assert(m
>= 0 && m
> n
);
2668 tx_texcoord_alloc(tx
, m
);
2669 tx_texcoord_alloc(tx
, m
+1);
2670 tx_texcoord_alloc(tx
, m
+2);
2672 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2673 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2674 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], ureg_src(tx
->regs
.tS
[n
]));
2676 switch (tx
->insn
.opcode
) {
2677 case D3DSIO_TEXM3x3
:
2678 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
2680 case D3DSIO_TEXM3x3TEX
:
2681 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2682 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2683 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(dst
), sample
);
2685 case D3DSIO_TEXM3x3VSPEC
:
2686 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2687 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2689 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2690 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_X
), ureg_scalar(tx
->regs
.vT
[m
], TGSI_SWIZZLE_W
));
2691 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Y
), ureg_scalar(tx
->regs
.vT
[m
+1], TGSI_SWIZZLE_W
));
2692 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Z
), ureg_scalar(tx
->regs
.vT
[m
+2], TGSI_SWIZZLE_W
));
2693 /* At this step, dst = N = (u', w', z').
2694 * We want dst to be the texture sampled at (u'', w'', z''), with
2695 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2696 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2697 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2698 /* at this step tmp.x = 1/N.N */
2699 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), ureg_src(E
));
2700 /* at this step tmp.y = N.E */
2701 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2702 /* at this step tmp.x = N.E/N.N */
2703 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2704 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2705 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2706 ureg_ADD(ureg
, tmp
, ureg_src(tmp
), ureg_negate(ureg_src(E
)));
2707 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2710 return D3DERR_INVALIDCALL
;
2715 DECL_SPECIAL(TEXDEPTH
)
2717 struct ureg_program
*ureg
= tx
->ureg
;
2719 struct ureg_src r5r
, r5g
;
2721 assert(tx
->insn
.dst
[0].idx
== 5); /* instruction must get r5 here */
2723 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2724 * r5 won't be used afterward, thus we can use r5.ba */
2726 r5r
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_X
);
2727 r5g
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Y
);
2729 ureg_RCP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_Z
), r5g
);
2730 ureg_MUL(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), r5r
, ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Z
));
2732 ureg_CMP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(r5g
)),
2733 r5r
, ureg_imm1f(ureg
, 1.0f
));
2734 /* replace the depth for depth testing with the result */
2735 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2736 TGSI_WRITEMASK_Z
, 0, 1);
2737 ureg_MOV(ureg
, tx
->regs
.oDepth
, r5r
);
2744 struct ureg_program
*ureg
= tx
->ureg
;
2745 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2746 struct ureg_src src0
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2747 struct ureg_src src1
= tx_src_param(tx
, &tx
->insn
.src
[1]);
2748 struct ureg_src m00
, m01
, m10
, m11
;
2749 const int m
= tx
->insn
.dst
[0].idx
;
2750 struct ureg_dst tmp
;
2758 nine_info_mark_const_f_used(tx
->info
, 8 + m
);
2759 m00
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, X
);
2760 m01
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Y
);
2761 m10
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Z
);
2762 m11
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, W
);
2763 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2764 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m00
,
2765 NINE_APPLY_SWIZZLE(src1
, X
), NINE_APPLY_SWIZZLE(src0
, X
));
2766 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2767 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m10
,
2768 NINE_APPLY_SWIZZLE(src1
, Y
), NINE_APPLY_SWIZZLE(ureg_src(tmp
), X
));
2770 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2771 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m01
,
2772 NINE_APPLY_SWIZZLE(src1
, X
), src0
);
2773 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2774 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m11
,
2775 NINE_APPLY_SWIZZLE(src1
, Y
), NINE_APPLY_SWIZZLE(ureg_src(tmp
), Y
));
2776 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XY
), ureg_src(tmp
));
2778 tx
->info
->bumpenvmat_needed
= 1;
2785 struct ureg_program
*ureg
= tx
->ureg
;
2787 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2788 struct ureg_src src
[2] = {
2789 tx_src_param(tx
, &tx
->insn
.src
[0]),
2790 tx_src_param(tx
, &tx
->insn
.src
[1])
2792 assert(tx
->insn
.src
[1].idx
>= 0 &&
2793 tx
->insn
.src
[1].idx
< ARRAY_SIZE(tx
->sampler_targets
));
2794 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2796 switch (tx
->insn
.flags
) {
2798 ureg_TEX(ureg
, dst
, target
, src
[0], src
[1]);
2800 case NINED3DSI_TEXLD_PROJECT
:
2801 ureg_TXP(ureg
, dst
, target
, src
[0], src
[1]);
2803 case NINED3DSI_TEXLD_BIAS
:
2804 ureg_TXB(ureg
, dst
, target
, src
[0], src
[1]);
2808 return D3DERR_INVALIDCALL
;
2813 DECL_SPECIAL(TEXLD_14
)
2815 struct ureg_program
*ureg
= tx
->ureg
;
2816 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2817 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2818 const unsigned s
= tx
->insn
.dst
[0].idx
;
2819 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2821 tx
->info
->sampler_mask
|= 1 << s
;
2822 ureg_TEX(ureg
, dst
, t
, src
, ureg_DECL_sampler(ureg
, s
));
2829 struct ureg_program
*ureg
= tx
->ureg
;
2830 const unsigned s
= tx
->insn
.dst
[0].idx
;
2831 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2832 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2833 struct ureg_src src
[2];
2835 tx_texcoord_alloc(tx
, s
);
2837 src
[0] = tx
->regs
.vT
[s
];
2838 src
[1] = ureg_DECL_sampler(ureg
, s
);
2839 tx
->info
->sampler_mask
|= 1 << s
;
2841 TEX_with_ps1x_projection(tx
, dst
, t
, src
[0], src
[1], s
);
2846 DECL_SPECIAL(TEXLDD
)
2849 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2850 struct ureg_src src
[4] = {
2851 tx_src_param(tx
, &tx
->insn
.src
[0]),
2852 tx_src_param(tx
, &tx
->insn
.src
[1]),
2853 tx_src_param(tx
, &tx
->insn
.src
[2]),
2854 tx_src_param(tx
, &tx
->insn
.src
[3])
2856 assert(tx
->insn
.src
[1].idx
>= 0 &&
2857 tx
->insn
.src
[1].idx
< ARRAY_SIZE(tx
->sampler_targets
));
2858 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2860 ureg_TXD(tx
->ureg
, dst
, target
, src
[0], src
[2], src
[3], src
[1]);
2864 DECL_SPECIAL(TEXLDL
)
2867 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2868 struct ureg_src src
[2] = {
2869 tx_src_param(tx
, &tx
->insn
.src
[0]),
2870 tx_src_param(tx
, &tx
->insn
.src
[1])
2872 assert(tx
->insn
.src
[1].idx
>= 0 &&
2873 tx
->insn
.src
[1].idx
< ARRAY_SIZE(tx
->sampler_targets
));
2874 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2876 ureg_TXL(tx
->ureg
, dst
, target
, src
[0], src
[1]);
2882 STUB(D3DERR_INVALIDCALL
);
2885 DECL_SPECIAL(BREAKP
)
2887 STUB(D3DERR_INVALIDCALL
);
2892 return D3D_OK
; /* we don't care about phase */
2895 DECL_SPECIAL(COMMENT
)
2897 return D3D_OK
; /* nothing to do */
2901 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2902 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2904 struct sm1_op_info inst_table
[] =
2906 _OPI(NOP
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP
)), /* 0 */
2907 _OPI(MOV
, MOV
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
2908 _OPI(ADD
, ADD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 2 */
2909 _OPI(SUB
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB
)), /* 3 */
2910 _OPI(MAD
, MAD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 4 */
2911 _OPI(MUL
, MUL
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 5 */
2912 _OPI(RCP
, RCP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 6 */
2913 _OPI(RSQ
, RSQ
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ
)), /* 7 */
2914 _OPI(DP3
, DP3
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 8 */
2915 _OPI(DP4
, DP4
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 9 */
2916 _OPI(MIN
, MIN
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 10 */
2917 _OPI(MAX
, MAX
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 11 */
2918 _OPI(SLT
, SLT
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 12 */
2919 _OPI(SGE
, SGE
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 13 */
2920 _OPI(EXP
, EX2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 14 */
2921 _OPI(LOG
, LG2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG
)), /* 15 */
2922 _OPI(LIT
, LIT
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT
)), /* 16 */
2923 _OPI(DST
, DST
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 17 */
2924 _OPI(LRP
, LRP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 18 */
2925 _OPI(FRC
, FRC
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 19 */
2927 _OPI(M4x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4
)),
2928 _OPI(M4x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3
)),
2929 _OPI(M3x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4
)),
2930 _OPI(M3x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3
)),
2931 _OPI(M3x2
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2
)),
2933 _OPI(CALL
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL
)),
2934 _OPI(CALLNZ
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ
)),
2935 _OPI(LOOP
, BGNLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP
)),
2936 _OPI(RET
, RET
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET
)),
2937 _OPI(ENDLOOP
, ENDLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP
)),
2938 _OPI(LABEL
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL
)),
2940 _OPI(DCL
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL
)),
2942 _OPI(POW
, POW
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW
)),
2943 _OPI(CRS
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD
)), /* XXX: .w */
2944 _OPI(SGN
, SSG
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN
)), /* ignore src1,2 */
2945 _OPI(ABS
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS
)),
2946 _OPI(NRM
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM
)), /* NRM doesn't fit */
2948 _OPI(SINCOS
, NOP
, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS
)),
2949 _OPI(SINCOS
, NOP
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS
)),
2951 /* More flow control */
2952 _OPI(REP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP
)),
2953 _OPI(ENDREP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP
)),
2954 _OPI(IF
, IF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF
)),
2955 _OPI(IFC
, IF
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC
)),
2956 _OPI(ELSE
, ELSE
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE
)),
2957 _OPI(ENDIF
, ENDIF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF
)),
2958 _OPI(BREAK
, BRK
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL
),
2959 _OPI(BREAKC
, NOP
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC
)),
2960 /* we don't write to the address register, but a normal register (copied
2961 * when needed to the address register), thus we don't use ARR */
2962 _OPI(MOVA
, MOV
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2964 _OPI(DEFB
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB
)),
2965 _OPI(DEFI
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI
)),
2967 _OPI(TEXCOORD
, NOP
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD
)),
2968 _OPI(TEXCOORD
, MOV
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14
)),
2969 _OPI(TEXKILL
, KILL_IF
, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL
)),
2970 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX
)),
2971 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14
)),
2972 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD
)),
2973 _OPI(TEXBEM
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM
)),
2974 _OPI(TEXBEML
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM
)),
2975 _OPI(TEXREG2AR
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR
)),
2976 _OPI(TEXREG2GB
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB
)),
2977 _OPI(TEXM3x2PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD
)),
2978 _OPI(TEXM3x2TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX
)),
2979 _OPI(TEXM3x3PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD
)),
2980 _OPI(TEXM3x3TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2981 _OPI(TEXM3x3SPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC
)),
2982 _OPI(TEXM3x3VSPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2984 _OPI(EXPP
, EXP
, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL
),
2985 _OPI(EXPP
, EX2
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2986 _OPI(LOGP
, LG2
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG
)),
2987 _OPI(CND
, NOP
, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND
)),
2989 _OPI(DEF
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF
)),
2991 /* More tex stuff */
2992 _OPI(TEXREG2RGB
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB
)),
2993 _OPI(TEXDP3TEX
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX
)),
2994 _OPI(TEXM3x2DEPTH
, TEX
, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH
)),
2995 _OPI(TEXDP3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3
)),
2996 _OPI(TEXM3x3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2997 _OPI(TEXDEPTH
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH
)),
3000 _OPI(CMP
, CMP
, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP
)), /* reversed */
3001 _OPI(BEM
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM
)),
3002 _OPI(DP2ADD
, NOP
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD
)),
3003 _OPI(DSX
, DDX
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
3004 _OPI(DSY
, DDY
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
3005 _OPI(TEXLDD
, TXD
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD
)),
3006 _OPI(SETP
, NOP
, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP
)),
3007 _OPI(TEXLDL
, TXL
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL
)),
3008 _OPI(BREAKP
, BRK
, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP
))
3011 struct sm1_op_info inst_phase
=
3012 _OPI(PHASE
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE
));
3014 struct sm1_op_info inst_comment
=
3015 _OPI(COMMENT
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT
));
3018 create_op_info_map(struct shader_translator
*tx
)
3020 const unsigned version
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
3023 for (i
= 0; i
< ARRAY_SIZE(tx
->op_info_map
); ++i
)
3024 tx
->op_info_map
[i
] = -1;
3026 if (tx
->processor
== PIPE_SHADER_VERTEX
) {
3027 for (i
= 0; i
< ARRAY_SIZE(inst_table
); ++i
) {
3028 assert(inst_table
[i
].sio
< ARRAY_SIZE(tx
->op_info_map
));
3029 if (inst_table
[i
].vert_version
.min
<= version
&&
3030 inst_table
[i
].vert_version
.max
>= version
)
3031 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
3034 for (i
= 0; i
< ARRAY_SIZE(inst_table
); ++i
) {
3035 assert(inst_table
[i
].sio
< ARRAY_SIZE(tx
->op_info_map
));
3036 if (inst_table
[i
].frag_version
.min
<= version
&&
3037 inst_table
[i
].frag_version
.max
>= version
)
3038 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
3043 static inline HRESULT
3044 NineTranslateInstruction_Generic(struct shader_translator
*tx
)
3046 struct ureg_dst dst
[1];
3047 struct ureg_src src
[4];
3050 for (i
= 0; i
< tx
->insn
.ndst
&& i
< ARRAY_SIZE(dst
); ++i
)
3051 dst
[i
] = tx_dst_param(tx
, &tx
->insn
.dst
[i
]);
3052 for (i
= 0; i
< tx
->insn
.nsrc
&& i
< ARRAY_SIZE(src
); ++i
)
3053 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
3055 ureg_insn(tx
->ureg
, tx
->insn
.info
->opcode
,
3057 src
, tx
->insn
.nsrc
, 0);
3062 TOKEN_PEEK(struct shader_translator
*tx
)
3064 return *(tx
->parse
);
3068 TOKEN_NEXT(struct shader_translator
*tx
)
3070 return *(tx
->parse
)++;
3074 TOKEN_JUMP(struct shader_translator
*tx
)
3076 if (tx
->parse_next
&& tx
->parse
!= tx
->parse_next
) {
3077 WARN("parse(%p) != parse_next(%p) !\n", tx
->parse
, tx
->parse_next
);
3078 tx
->parse
= tx
->parse_next
;
3082 static inline boolean
3083 sm1_parse_eof(struct shader_translator
*tx
)
3085 return TOKEN_PEEK(tx
) == NINED3DSP_END
;
3089 sm1_read_version(struct shader_translator
*tx
)
3091 const DWORD tok
= TOKEN_NEXT(tx
);
3093 tx
->version
.major
= D3DSHADER_VERSION_MAJOR(tok
);
3094 tx
->version
.minor
= D3DSHADER_VERSION_MINOR(tok
);
3096 switch (tok
>> 16) {
3097 case NINED3D_SM1_VS
: tx
->processor
= PIPE_SHADER_VERTEX
; break;
3098 case NINED3D_SM1_PS
: tx
->processor
= PIPE_SHADER_FRAGMENT
; break;
3100 DBG("Invalid shader type: %x\n", tok
);
3106 /* This is just to check if we parsed the instruction properly. */
3108 sm1_parse_get_skip(struct shader_translator
*tx
)
3110 const DWORD tok
= TOKEN_PEEK(tx
);
3112 if (tx
->version
.major
>= 2) {
3113 tx
->parse_next
= tx
->parse
+ 1 /* this */ +
3114 ((tok
& D3DSI_INSTLENGTH_MASK
) >> D3DSI_INSTLENGTH_SHIFT
);
3116 tx
->parse_next
= NULL
; /* TODO: determine from param count */
3121 sm1_print_comment(const char *comment
, UINT size
)
3129 sm1_parse_comments(struct shader_translator
*tx
, BOOL print
)
3131 DWORD tok
= TOKEN_PEEK(tx
);
3133 while ((tok
& D3DSI_OPCODE_MASK
) == D3DSIO_COMMENT
)
3135 const char *comment
= "";
3136 UINT size
= (tok
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
3137 tx
->parse
+= size
+ 1;
3140 sm1_print_comment(comment
, size
);
3142 tok
= TOKEN_PEEK(tx
);
3147 sm1_parse_get_param(struct shader_translator
*tx
, DWORD
*reg
, DWORD
*rel
)
3149 *reg
= TOKEN_NEXT(tx
);
3151 if (*reg
& D3DSHADER_ADDRMODE_RELATIVE
)
3153 if (tx
->version
.major
< 2)
3155 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT2
) & D3DSP_REGTYPE_MASK2
) |
3156 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT
) & D3DSP_REGTYPE_MASK
) |
3159 *rel
= TOKEN_NEXT(tx
);
3164 sm1_parse_dst_param(struct sm1_dst_param
*dst
, DWORD tok
)
3168 (tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
|
3169 (tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
;
3170 dst
->type
= TGSI_RETURN_TYPE_FLOAT
;
3171 dst
->idx
= tok
& D3DSP_REGNUM_MASK
;
3173 dst
->mask
= (tok
& NINED3DSP_WRITEMASK_MASK
) >> NINED3DSP_WRITEMASK_SHIFT
;
3174 dst
->mod
= (tok
& D3DSP_DSTMOD_MASK
) >> D3DSP_DSTMOD_SHIFT
;
3175 shift
= (tok
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
3176 dst
->shift
= (shift
& 0x7) - (shift
& 0x8);
3180 sm1_parse_src_param(struct sm1_src_param
*src
, DWORD tok
)
3183 ((tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) |
3184 ((tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
);
3185 src
->type
= TGSI_RETURN_TYPE_FLOAT
;
3186 src
->idx
= tok
& D3DSP_REGNUM_MASK
;
3188 src
->swizzle
= (tok
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
3189 src
->mod
= (tok
& D3DSP_SRCMOD_MASK
) >> D3DSP_SRCMOD_SHIFT
;
3191 switch (src
->file
) {
3192 case D3DSPR_CONST2
: src
->file
= D3DSPR_CONST
; src
->idx
+= 2048; break;
3193 case D3DSPR_CONST3
: src
->file
= D3DSPR_CONST
; src
->idx
+= 4096; break;
3194 case D3DSPR_CONST4
: src
->file
= D3DSPR_CONST
; src
->idx
+= 6144; break;
3201 sm1_parse_immediate(struct shader_translator
*tx
,
3202 struct sm1_src_param
*imm
)
3204 imm
->file
= NINED3DSPR_IMMEDIATE
;
3207 imm
->swizzle
= NINED3DSP_NOSWIZZLE
;
3209 switch (tx
->insn
.opcode
) {
3211 imm
->type
= NINED3DSPTYPE_FLOAT4
;
3212 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
3216 imm
->type
= NINED3DSPTYPE_INT4
;
3217 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
3221 imm
->type
= NINED3DSPTYPE_BOOL
;
3222 memcpy(&imm
->imm
.d
[0], tx
->parse
, 1 * sizeof(DWORD
));
3232 sm1_read_dst_param(struct shader_translator
*tx
,
3233 struct sm1_dst_param
*dst
,
3234 struct sm1_src_param
*rel
)
3236 DWORD tok_dst
, tok_rel
= 0;
3238 sm1_parse_get_param(tx
, &tok_dst
, &tok_rel
);
3239 sm1_parse_dst_param(dst
, tok_dst
);
3240 if (tok_dst
& D3DSHADER_ADDRMODE_RELATIVE
) {
3241 sm1_parse_src_param(rel
, tok_rel
);
3247 sm1_read_src_param(struct shader_translator
*tx
,
3248 struct sm1_src_param
*src
,
3249 struct sm1_src_param
*rel
)
3251 DWORD tok_src
, tok_rel
= 0;
3253 sm1_parse_get_param(tx
, &tok_src
, &tok_rel
);
3254 sm1_parse_src_param(src
, tok_src
);
3255 if (tok_src
& D3DSHADER_ADDRMODE_RELATIVE
) {
3257 sm1_parse_src_param(rel
, tok_rel
);
3263 sm1_read_semantic(struct shader_translator
*tx
,
3264 struct sm1_semantic
*sem
)
3266 const DWORD tok_usg
= TOKEN_NEXT(tx
);
3267 const DWORD tok_dst
= TOKEN_NEXT(tx
);
3269 sem
->sampler_type
= (tok_usg
& D3DSP_TEXTURETYPE_MASK
) >> D3DSP_TEXTURETYPE_SHIFT
;
3270 sem
->usage
= (tok_usg
& D3DSP_DCL_USAGE_MASK
) >> D3DSP_DCL_USAGE_SHIFT
;
3271 sem
->usage_idx
= (tok_usg
& D3DSP_DCL_USAGEINDEX_MASK
) >> D3DSP_DCL_USAGEINDEX_SHIFT
;
3273 sm1_parse_dst_param(&sem
->reg
, tok_dst
);
3277 sm1_parse_instruction(struct shader_translator
*tx
)
3279 struct sm1_instruction
*insn
= &tx
->insn
;
3282 struct sm1_op_info
*info
= NULL
;
3285 sm1_parse_comments(tx
, TRUE
);
3286 sm1_parse_get_skip(tx
);
3288 tok
= TOKEN_NEXT(tx
);
3290 insn
->opcode
= tok
& D3DSI_OPCODE_MASK
;
3291 insn
->flags
= (tok
& NINED3DSIO_OPCODE_FLAGS_MASK
) >> NINED3DSIO_OPCODE_FLAGS_SHIFT
;
3292 insn
->coissue
= !!(tok
& D3DSI_COISSUE
);
3293 insn
->predicated
= !!(tok
& NINED3DSHADER_INST_PREDICATED
);
3295 if (insn
->opcode
< ARRAY_SIZE(tx
->op_info_map
)) {
3296 int k
= tx
->op_info_map
[insn
->opcode
];
3298 assert(k
< ARRAY_SIZE(inst_table
));
3299 info
= &inst_table
[k
];
3302 if (insn
->opcode
== D3DSIO_PHASE
) info
= &inst_phase
;
3303 if (insn
->opcode
== D3DSIO_COMMENT
) info
= &inst_comment
;
3306 DBG("illegal or unhandled opcode: %08x\n", insn
->opcode
);
3311 insn
->ndst
= info
->ndst
;
3312 insn
->nsrc
= info
->nsrc
;
3314 assert(!insn
->predicated
&& "TODO: predicated instructions");
3318 unsigned min
= IS_VS
? info
->vert_version
.min
: info
->frag_version
.min
;
3319 unsigned max
= IS_VS
? info
->vert_version
.max
: info
->frag_version
.max
;
3320 unsigned ver
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
3321 if (ver
< min
|| ver
> max
) {
3322 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3328 for (i
= 0; i
< insn
->ndst
; ++i
)
3329 sm1_read_dst_param(tx
, &insn
->dst
[i
], &insn
->dst_rel
[i
]);
3330 if (insn
->predicated
)
3331 sm1_read_src_param(tx
, &insn
->pred
, NULL
);
3332 for (i
= 0; i
< insn
->nsrc
; ++i
)
3333 sm1_read_src_param(tx
, &insn
->src
[i
], &insn
->src_rel
[i
]);
3335 /* parse here so we can dump them before processing */
3336 if (insn
->opcode
== D3DSIO_DEF
||
3337 insn
->opcode
== D3DSIO_DEFI
||
3338 insn
->opcode
== D3DSIO_DEFB
)
3339 sm1_parse_immediate(tx
, &tx
->insn
.src
[0]);
3341 sm1_dump_instruction(insn
, tx
->cond_depth
+ tx
->loop_depth
);
3342 sm1_instruction_check(insn
);
3345 hr
= info
->handler(tx
);
3347 hr
= NineTranslateInstruction_Generic(tx
);
3348 tx_apply_dst0_modifiers(tx
);
3352 tx
->num_scratch
= 0; /* reset */
3358 tx_ctor(struct shader_translator
*tx
, struct nine_shader_info
*info
)
3364 tx
->byte_code
= info
->byte_code
;
3365 tx
->parse
= info
->byte_code
;
3367 for (i
= 0; i
< ARRAY_SIZE(info
->input_map
); ++i
)
3368 info
->input_map
[i
] = NINE_DECLUSAGE_NONE
;
3369 info
->num_inputs
= 0;
3371 info
->position_t
= FALSE
;
3372 info
->point_size
= FALSE
;
3374 tx
->info
->const_float_slots
= 0;
3375 tx
->info
->const_int_slots
= 0;
3376 tx
->info
->const_bool_slots
= 0;
3378 info
->sampler_mask
= 0x0;
3379 info
->rt_mask
= 0x0;
3381 info
->lconstf
.data
= NULL
;
3382 info
->lconstf
.ranges
= NULL
;
3384 info
->bumpenvmat_needed
= 0;
3386 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.rL
); ++i
) {
3387 tx
->regs
.rL
[i
] = ureg_dst_undef();
3389 tx
->regs
.address
= ureg_dst_undef();
3390 tx
->regs
.a0
= ureg_dst_undef();
3391 tx
->regs
.p
= ureg_dst_undef();
3392 tx
->regs
.oDepth
= ureg_dst_undef();
3393 tx
->regs
.vPos
= ureg_src_undef();
3394 tx
->regs
.vFace
= ureg_src_undef();
3395 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.o
); ++i
)
3396 tx
->regs
.o
[i
] = ureg_dst_undef();
3397 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.oCol
); ++i
)
3398 tx
->regs
.oCol
[i
] = ureg_dst_undef();
3399 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.vC
); ++i
)
3400 tx
->regs
.vC
[i
] = ureg_src_undef();
3401 for (i
= 0; i
< ARRAY_SIZE(tx
->regs
.vT
); ++i
)
3402 tx
->regs
.vT
[i
] = ureg_src_undef();
3404 sm1_read_version(tx
);
3406 info
->version
= (tx
->version
.major
<< 4) | tx
->version
.minor
;
3408 tx
->num_outputs
= 0;
3410 create_op_info_map(tx
);
3414 tx_dtor(struct shader_translator
*tx
)
3416 if (tx
->num_inst_labels
)
3417 FREE(tx
->inst_labels
);
3423 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3424 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3426 shader_add_vs_viewport_transform(struct shader_translator
*tx
)
3428 struct ureg_program
*ureg
= tx
->ureg
;
3429 struct ureg_src c0
= NINE_CONSTANT_SRC(0);
3430 struct ureg_src c1
= NINE_CONSTANT_SRC(1);
3431 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3433 c0
= ureg_src_dimension(c0
, 4);
3434 c1
= ureg_src_dimension(c1
, 4);
3435 /* TODO: find out when we need to apply the viewport transformation or not.
3436 * Likely will be XYZ vs XYZRHW in vdecl_out
3437 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3438 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3440 ureg_MOV(ureg
, ureg_writemask(tx
->regs
.oPos_out
, TGSI_WRITEMASK_XYZ
), ureg_src(tx
->regs
.oPos
));
3444 shader_add_ps_fog_stage(struct shader_translator
*tx
, struct ureg_src src_col
)
3446 struct ureg_program
*ureg
= tx
->ureg
;
3447 struct ureg_dst oCol0
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
3448 struct ureg_src fog_end
, fog_coeff
, fog_density
;
3449 struct ureg_src fog_vs
, depth
, fog_color
;
3450 struct ureg_dst fog_factor
;
3452 if (!tx
->info
->fog_enable
) {
3453 ureg_MOV(ureg
, oCol0
, src_col
);
3457 if (tx
->info
->fog_mode
!= D3DFOG_NONE
) {
3458 depth
= nine_get_position_input(tx
);
3459 depth
= ureg_scalar(depth
, TGSI_SWIZZLE_Z
);
3462 nine_info_mark_const_f_used(tx
->info
, 33);
3463 fog_color
= NINE_CONSTANT_SRC(32);
3464 fog_factor
= tx_scratch_scalar(tx
);
3466 if (tx
->info
->fog_mode
== D3DFOG_LINEAR
) {
3467 fog_end
= NINE_CONSTANT_SRC_SWIZZLE(33, X
);
3468 fog_coeff
= NINE_CONSTANT_SRC_SWIZZLE(33, Y
);
3469 ureg_ADD(ureg
, fog_factor
, fog_end
, ureg_negate(depth
));
3470 ureg_MUL(ureg
, ureg_saturate(fog_factor
), tx_src_scalar(fog_factor
), fog_coeff
);
3471 } else if (tx
->info
->fog_mode
== D3DFOG_EXP
) {
3472 fog_density
= NINE_CONSTANT_SRC_SWIZZLE(33, X
);
3473 ureg_MUL(ureg
, fog_factor
, depth
, fog_density
);
3474 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), ureg_imm1f(ureg
, -1.442695f
));
3475 ureg_EX2(ureg
, fog_factor
, tx_src_scalar(fog_factor
));
3476 } else if (tx
->info
->fog_mode
== D3DFOG_EXP2
) {
3477 fog_density
= NINE_CONSTANT_SRC_SWIZZLE(33, X
);
3478 ureg_MUL(ureg
, fog_factor
, depth
, fog_density
);
3479 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), tx_src_scalar(fog_factor
));
3480 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), ureg_imm1f(ureg
, -1.442695f
));
3481 ureg_EX2(ureg
, fog_factor
, tx_src_scalar(fog_factor
));
3483 fog_vs
= ureg_scalar(ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_FOG
, 0,
3484 TGSI_INTERPOLATE_PERSPECTIVE
),
3486 ureg_MOV(ureg
, fog_factor
, fog_vs
);
3489 ureg_LRP(ureg
, ureg_writemask(oCol0
, TGSI_WRITEMASK_XYZ
),
3490 tx_src_scalar(fog_factor
), src_col
, fog_color
);
3491 ureg_MOV(ureg
, ureg_writemask(oCol0
, TGSI_WRITEMASK_W
), src_col
);
3494 #define GET_CAP(n) screen->get_param( \
3495 screen, PIPE_CAP_##n)
3496 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3497 screen, info->type, PIPE_SHADER_CAP_##n)
3500 nine_translate_shader(struct NineDevice9
*device
, struct nine_shader_info
*info
, struct pipe_context
*pipe
)
3502 struct shader_translator
*tx
;
3503 HRESULT hr
= D3D_OK
;
3504 const unsigned processor
= info
->type
;
3505 struct pipe_screen
*screen
= info
->process_vertices
? device
->screen_sw
: device
->screen
;
3507 user_assert(processor
!= ~0, D3DERR_INVALIDCALL
);
3509 tx
= CALLOC_STRUCT(shader_translator
);
3511 return E_OUTOFMEMORY
;
3514 if (((tx
->version
.major
<< 16) | tx
->version
.minor
) > 0x00030000) {
3515 hr
= D3DERR_INVALIDCALL
;
3516 DBG("Unsupported shader version: %u.%u !\n",
3517 tx
->version
.major
, tx
->version
.minor
);
3520 if (tx
->processor
!= processor
) {
3521 hr
= D3DERR_INVALIDCALL
;
3522 DBG("Shader type mismatch: %u / %u !\n", tx
->processor
, processor
);
3525 DUMP("%s%u.%u\n", processor
== PIPE_SHADER_VERTEX
? "VS" : "PS",
3526 tx
->version
.major
, tx
->version
.minor
);
3528 tx
->ureg
= ureg_create(processor
);
3534 tx
->native_integers
= GET_SHADER_CAP(INTEGERS
);
3535 tx
->inline_subroutines
= !GET_SHADER_CAP(SUBROUTINES
);
3536 tx
->want_texcoord
= GET_CAP(TGSI_TEXCOORD
);
3537 tx
->shift_wpos
= !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3538 tx
->texcoord_sn
= tx
->want_texcoord
?
3539 TGSI_SEMANTIC_TEXCOORD
: TGSI_SEMANTIC_GENERIC
;
3540 tx
->wpos_is_sysval
= GET_CAP(TGSI_FS_POSITION_IS_SYSVAL
);
3541 tx
->face_is_sysval_integer
= GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL
);
3544 tx
->num_constf_allowed
= NINE_MAX_CONST_F
;
3545 } else if (tx
->version
.major
< 2) {/* IS_PS v1 */
3546 tx
->num_constf_allowed
= 8;
3547 } else if (tx
->version
.major
== 2) {/* IS_PS v2 */
3548 tx
->num_constf_allowed
= 32;
3549 } else {/* IS_PS v3 */
3550 tx
->num_constf_allowed
= NINE_MAX_CONST_F_PS3
;
3553 if (tx
->version
.major
< 2) {
3554 tx
->num_consti_allowed
= 0;
3555 tx
->num_constb_allowed
= 0;
3557 tx
->num_consti_allowed
= NINE_MAX_CONST_I
;
3558 tx
->num_constb_allowed
= NINE_MAX_CONST_B
;
3561 if (IS_VS
&& tx
->version
.major
>= 2 && info
->swvp_on
) {
3562 tx
->num_constf_allowed
= 8192;
3563 tx
->num_consti_allowed
= 2048;
3564 tx
->num_constb_allowed
= 2048;
3567 /* VS must always write position. Declare it here to make it the 1st output.
3568 * (Some drivers like nv50 are buggy and rely on that.)
3571 tx
->regs
.oPos
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
3573 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_ORIGIN
, TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
3574 if (!tx
->shift_wpos
)
3575 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
, TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3578 if (GET_CAP(TGSI_MUL_ZERO_WINS
))
3579 ureg_property(tx
->ureg
, TGSI_PROPERTY_MUL_ZERO_WINS
, 1);
3581 while (!sm1_parse_eof(tx
) && !tx
->failure
)
3582 sm1_parse_instruction(tx
);
3583 tx
->parse
++; /* for byte_size */
3586 /* For VS shaders, we print the warning later,
3587 * we first try with swvp. */
3589 ERR("Encountered buggy shader\n");
3590 ureg_destroy(tx
->ureg
);
3591 hr
= D3DERR_INVALIDCALL
;
3595 if (IS_PS
&& tx
->version
.major
< 3) {
3596 if (tx
->version
.major
< 2) {
3597 assert(tx
->num_temp
); /* there must be color output */
3598 info
->rt_mask
|= 0x1;
3599 shader_add_ps_fog_stage(tx
, ureg_src(tx
->regs
.r
[0]));
3601 shader_add_ps_fog_stage(tx
, ureg_src(tx
->regs
.oCol
[0]));
3605 if (IS_VS
&& tx
->version
.major
< 3 && ureg_dst_is_undef(tx
->regs
.oFog
) && info
->fog_enable
) {
3606 tx
->regs
.oFog
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_FOG
, 0);
3607 ureg_MOV(tx
->ureg
, ureg_writemask(tx
->regs
.oFog
, TGSI_WRITEMASK_X
), ureg_imm1f(tx
->ureg
, 0.0f
));
3610 if (info
->position_t
)
3611 ureg_property(tx
->ureg
, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
, TRUE
);
3613 if (IS_VS
&& !ureg_dst_is_undef(tx
->regs
.oPts
)) {
3614 struct ureg_dst oPts
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_PSIZE
, 0);
3615 ureg_MAX(tx
->ureg
, tx
->regs
.oPts
, ureg_src(tx
->regs
.oPts
), ureg_imm1f(tx
->ureg
, info
->point_size_min
));
3616 ureg_MIN(tx
->ureg
, oPts
, ureg_src(tx
->regs
.oPts
), ureg_imm1f(tx
->ureg
, info
->point_size_max
));
3617 info
->point_size
= TRUE
;
3620 if (info
->process_vertices
)
3621 shader_add_vs_viewport_transform(tx
);
3625 /* record local constants */
3626 if (tx
->num_lconstf
&& tx
->indirect_const_access
) {
3627 struct nine_range
*ranges
;
3634 data
= MALLOC(tx
->num_lconstf
* 4 * sizeof(float));
3637 info
->lconstf
.data
= data
;
3639 indices
= MALLOC(tx
->num_lconstf
* sizeof(indices
[0]));
3643 /* lazy sort, num_lconstf should be small */
3644 for (n
= 0; n
< tx
->num_lconstf
; ++n
) {
3645 for (k
= 0, i
= 0; i
< tx
->num_lconstf
; ++i
) {
3646 if (tx
->lconstf
[i
].idx
< tx
->lconstf
[k
].idx
)
3649 indices
[n
] = tx
->lconstf
[k
].idx
;
3650 memcpy(&data
[n
* 4], &tx
->lconstf
[k
].f
[0], 4 * sizeof(float));
3651 tx
->lconstf
[k
].idx
= INT_MAX
;
3655 for (n
= 1, i
= 1; i
< tx
->num_lconstf
; ++i
)
3656 if (indices
[i
] != indices
[i
- 1] + 1)
3658 ranges
= MALLOC(n
* sizeof(ranges
[0]));
3663 info
->lconstf
.ranges
= ranges
;
3666 ranges
[k
].bgn
= indices
[0];
3667 for (i
= 1; i
< tx
->num_lconstf
; ++i
) {
3668 if (indices
[i
] != indices
[i
- 1] + 1) {
3669 ranges
[k
].next
= &ranges
[k
+ 1];
3670 ranges
[k
].end
= indices
[i
- 1] + 1;
3672 ranges
[k
].bgn
= indices
[i
];
3675 ranges
[k
].end
= indices
[i
- 1] + 1;
3676 ranges
[k
].next
= NULL
;
3677 assert(n
== (k
+ 1));
3684 if (info
->const_float_slots
> device
->max_vs_const_f
&&
3685 (info
->const_int_slots
|| info
->const_bool_slots
) &&
3686 (!IS_VS
|| !info
->swvp_on
))
3687 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3690 if (tx
->indirect_const_access
) /* vs only */
3691 info
->const_float_slots
= device
->max_vs_const_f
;
3693 if (!IS_VS
|| !info
->swvp_on
) {
3694 unsigned s
, slot_max
;
3695 unsigned max_const_f
= IS_VS
? device
->max_vs_const_f
: device
->max_ps_const_f
;
3697 slot_max
= info
->const_bool_slots
> 0 ?
3698 max_const_f
+ NINE_MAX_CONST_I
3699 + DIV_ROUND_UP(info
->const_bool_slots
, 4) :
3700 info
->const_int_slots
> 0 ?
3701 max_const_f
+ info
->const_int_slots
:
3702 info
->const_float_slots
;
3704 info
->const_used_size
= sizeof(float[4]) * slot_max
; /* slots start from 1 */
3706 for (s
= 0; s
< slot_max
; s
++)
3707 ureg_DECL_constant(tx
->ureg
, s
);
3709 ureg_DECL_constant2D(tx
->ureg
, 0, 4095, 0);
3710 ureg_DECL_constant2D(tx
->ureg
, 0, 4095, 1);
3711 ureg_DECL_constant2D(tx
->ureg
, 0, 2047, 2);
3712 ureg_DECL_constant2D(tx
->ureg
, 0, 511, 3);
3715 if (info
->process_vertices
)
3716 ureg_DECL_constant2D(tx
->ureg
, 0, 2, 4); /* Viewport data */
3718 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE
)) {
3719 const struct tgsi_token
*toks
= ureg_get_tokens(tx
->ureg
, NULL
);
3721 ureg_free_tokens(toks
);
3724 if (info
->process_vertices
) {
3725 NineVertexDeclaration9_FillStreamOutputInfo(info
->vdecl_out
,
3729 info
->cso
= ureg_create_shader_with_so_and_destroy(tx
->ureg
, pipe
, &(info
->so
));
3731 info
->cso
= ureg_create_shader_and_destroy(tx
->ureg
, pipe
);
3733 hr
= D3DERR_DRIVERINTERNALERROR
;
3734 FREE(info
->lconstf
.data
);
3735 FREE(info
->lconstf
.ranges
);
3739 info
->byte_size
= (tx
->parse
- tx
->byte_code
) * sizeof(DWORD
);