2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
24 #include "nine_shader.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
30 #include "util/macros.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_dump.h"
37 #define DBG_CHANNEL DBG_SHADER
39 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
42 struct shader_translator
;
44 typedef HRESULT (*translate_instruction_func
)(struct shader_translator
*);
46 static inline const char *d3dsio_to_string(unsigned opcode
);
49 #define NINED3D_SM1_VS 0xfffe
50 #define NINED3D_SM1_PS 0xffff
52 #define NINE_MAX_COND_DEPTH 64
53 #define NINE_MAX_LOOP_DEPTH 64
55 #define NINED3DSP_END 0x0000ffff
57 #define NINED3DSPTYPE_FLOAT4 0
58 #define NINED3DSPTYPE_INT4 1
59 #define NINED3DSPTYPE_BOOL 2
61 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
64 #define NINED3DSP_WRITEMASK_SHIFT 16
66 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68 #define NINED3DSHADER_REL_OP_GT 1
69 #define NINED3DSHADER_REL_OP_EQ 2
70 #define NINED3DSHADER_REL_OP_GE 3
71 #define NINED3DSHADER_REL_OP_LT 4
72 #define NINED3DSHADER_REL_OP_NE 5
73 #define NINED3DSHADER_REL_OP_LE 6
75 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
76 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78 #define NINED3DSI_TEXLD_PROJECT 0x1
79 #define NINED3DSI_TEXLD_BIAS 0x2
81 #define NINED3DSP_WRITEMASK_0 0x1
82 #define NINED3DSP_WRITEMASK_1 0x2
83 #define NINED3DSP_WRITEMASK_2 0x4
84 #define NINED3DSP_WRITEMASK_3 0x8
85 #define NINED3DSP_WRITEMASK_ALL 0xf
87 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89 #define NINE_SWIZZLE4(x,y,z,w) \
90 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92 #define NINE_CONSTANT_SRC(index) \
93 ureg_src_register(TGSI_FILE_CONSTANT, index)
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
98 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
99 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
101 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
102 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
106 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
107 * BIAS <= PS 1.4 (x-0.5)
108 * BIASNEG <= PS 1.4 (-(x-0.5))
109 * SIGN <= PS 1.4 (2(x-0.5))
110 * SIGNNEG <= PS 1.4 (-2(x-0.5))
111 * COMP <= PS 1.4 (1-x)
113 * X2NEG = PS 1.4 (-2x)
114 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
115 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
116 * ABS >= SM 3.0 (abs(x))
117 * ABSNEG >= SM 3.0 (-abs(x))
118 * NOT >= SM 2.0 pedication only
120 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
135 static const char *sm1_mod_str
[] =
137 [NINED3DSPSM_NONE
] = "",
138 [NINED3DSPSM_NEG
] = "-",
139 [NINED3DSPSM_BIAS
] = "bias",
140 [NINED3DSPSM_BIASNEG
] = "biasneg",
141 [NINED3DSPSM_SIGN
] = "sign",
142 [NINED3DSPSM_SIGNNEG
] = "signneg",
143 [NINED3DSPSM_COMP
] = "comp",
144 [NINED3DSPSM_X2
] = "x2",
145 [NINED3DSPSM_X2NEG
] = "x2neg",
146 [NINED3DSPSM_DZ
] = "dz",
147 [NINED3DSPSM_DW
] = "dw",
148 [NINED3DSPSM_ABS
] = "abs",
149 [NINED3DSPSM_ABSNEG
] = "-abs",
150 [NINED3DSPSM_NOT
] = "not"
154 sm1_dump_writemask(BYTE mask
)
156 if (mask
& 1) DUMP("x"); else DUMP("_");
157 if (mask
& 2) DUMP("y"); else DUMP("_");
158 if (mask
& 4) DUMP("z"); else DUMP("_");
159 if (mask
& 8) DUMP("w"); else DUMP("_");
163 sm1_dump_swizzle(BYTE s
)
165 char c
[4] = { 'x', 'y', 'z', 'w' };
167 c
[(s
>> 0) & 3], c
[(s
>> 2) & 3], c
[(s
>> 4) & 3], c
[(s
>> 6) & 3]);
170 static const char sm1_file_char
[] =
173 [D3DSPR_INPUT
] = 'v',
174 [D3DSPR_CONST
] = 'c',
176 [D3DSPR_RASTOUT
] = 'R',
177 [D3DSPR_ATTROUT
] = 'D',
178 [D3DSPR_OUTPUT
] = 'o',
179 [D3DSPR_CONSTINT
] = 'I',
180 [D3DSPR_COLOROUT
] = 'C',
181 [D3DSPR_DEPTHOUT
] = 'D',
182 [D3DSPR_SAMPLER
] = 's',
183 [D3DSPR_CONST2
] = 'c',
184 [D3DSPR_CONST3
] = 'c',
185 [D3DSPR_CONST4
] = 'c',
186 [D3DSPR_CONSTBOOL
] = 'B',
188 [D3DSPR_TEMPFLOAT16
] = 'h',
189 [D3DSPR_MISCTYPE
] = 'M',
190 [D3DSPR_LABEL
] = 'X',
191 [D3DSPR_PREDICATE
] = 'p'
195 sm1_dump_reg(BYTE file
, INT index
)
201 case D3DSPR_COLOROUT
:
204 case D3DSPR_DEPTHOUT
:
208 DUMP("oRast%i", index
);
210 case D3DSPR_CONSTINT
:
211 DUMP("iconst[%i]", index
);
213 case D3DSPR_CONSTBOOL
:
214 DUMP("bconst[%i]", index
);
217 DUMP("%c%i", sm1_file_char
[file
], index
);
225 struct sm1_src_param
*rel
;
238 sm1_parse_immediate(struct shader_translator
*, struct sm1_src_param
*);
243 struct sm1_src_param
*rel
;
247 int8_t shift
; /* sint4 */
252 assert_replicate_swizzle(const struct ureg_src
*reg
)
254 assert(reg
->SwizzleY
== reg
->SwizzleX
&&
255 reg
->SwizzleZ
== reg
->SwizzleX
&&
256 reg
->SwizzleW
== reg
->SwizzleX
);
260 sm1_dump_immediate(const struct sm1_src_param
*param
)
262 switch (param
->type
) {
263 case NINED3DSPTYPE_FLOAT4
:
264 DUMP("{ %f %f %f %f }",
265 param
->imm
.f
[0], param
->imm
.f
[1],
266 param
->imm
.f
[2], param
->imm
.f
[3]);
268 case NINED3DSPTYPE_INT4
:
269 DUMP("{ %i %i %i %i }",
270 param
->imm
.i
[0], param
->imm
.i
[1],
271 param
->imm
.i
[2], param
->imm
.i
[3]);
273 case NINED3DSPTYPE_BOOL
:
274 DUMP("%s", param
->imm
.b
? "TRUE" : "FALSE");
283 sm1_dump_src_param(const struct sm1_src_param
*param
)
285 if (param
->file
== NINED3DSPR_IMMEDIATE
) {
286 assert(!param
->mod
&&
288 param
->swizzle
== NINED3DSP_NOSWIZZLE
);
289 sm1_dump_immediate(param
);
294 DUMP("%s(", sm1_mod_str
[param
->mod
]);
296 DUMP("%c[", sm1_file_char
[param
->file
]);
297 sm1_dump_src_param(param
->rel
);
298 DUMP("+%i]", param
->idx
);
300 sm1_dump_reg(param
->file
, param
->idx
);
304 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
) {
306 sm1_dump_swizzle(param
->swizzle
);
311 sm1_dump_dst_param(const struct sm1_dst_param
*param
)
313 if (param
->mod
& NINED3DSPDM_SATURATE
)
315 if (param
->mod
& NINED3DSPDM_PARTIALP
)
317 if (param
->mod
& NINED3DSPDM_CENTROID
)
319 if (param
->shift
< 0)
320 DUMP("/%u ", 1 << -param
->shift
);
321 if (param
->shift
> 0)
322 DUMP("*%u ", 1 << param
->shift
);
325 DUMP("%c[", sm1_file_char
[param
->file
]);
326 sm1_dump_src_param(param
->rel
);
327 DUMP("+%i]", param
->idx
);
329 sm1_dump_reg(param
->file
, param
->idx
);
331 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
333 sm1_dump_writemask(param
->mask
);
339 struct sm1_dst_param reg
;
347 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
348 * should be ignored completely */
350 unsigned opcode
; /* TGSI_OPCODE_x */
352 /* versions are still set even handler is set */
356 } vert_version
, frag_version
;
358 /* number of regs parsed outside of special handler */
362 /* some instructions don't map perfectly, so use a special handler */
363 translate_instruction_func handler
;
366 struct sm1_instruction
368 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode
;
374 struct sm1_src_param src
[4];
375 struct sm1_src_param src_rel
[4];
376 struct sm1_src_param pred
;
377 struct sm1_src_param dst_rel
[1];
378 struct sm1_dst_param dst
[1];
380 struct sm1_op_info
*info
;
384 sm1_dump_instruction(struct sm1_instruction
*insn
, unsigned indent
)
388 /* no info stored for these: */
389 if (insn
->opcode
== D3DSIO_DCL
)
391 for (i
= 0; i
< indent
; ++i
)
394 if (insn
->predicated
) {
396 sm1_dump_src_param(&insn
->pred
);
399 DUMP("%s", d3dsio_to_string(insn
->opcode
));
401 switch (insn
->opcode
) {
403 DUMP(insn
->flags
== NINED3DSI_TEXLD_PROJECT
? "p" : "b");
406 DUMP("_%x", insn
->flags
);
414 for (i
= 0; i
< insn
->ndst
&& i
< Elements(insn
->dst
); ++i
) {
415 sm1_dump_dst_param(&insn
->dst
[i
]);
419 for (i
= 0; i
< insn
->nsrc
&& i
< Elements(insn
->src
); ++i
) {
420 sm1_dump_src_param(&insn
->src
[i
]);
423 if (insn
->opcode
== D3DSIO_DEF
||
424 insn
->opcode
== D3DSIO_DEFI
||
425 insn
->opcode
== D3DSIO_DEFB
)
426 sm1_dump_immediate(&insn
->src
[0]);
431 struct sm1_local_const
442 struct shader_translator
444 const DWORD
*byte_code
;
446 const DWORD
*parse_next
;
448 struct ureg_program
*ureg
;
455 unsigned processor
; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
456 unsigned num_constf_allowed
;
457 unsigned num_consti_allowed
;
458 unsigned num_constb_allowed
;
460 boolean native_integers
;
461 boolean inline_subroutines
;
463 boolean want_texcoord
;
465 boolean wpos_is_sysval
;
466 boolean face_is_sysval_integer
;
467 unsigned texcoord_sn
;
469 struct sm1_instruction insn
; /* current instruction */
473 struct ureg_dst oPos
;
474 struct ureg_dst oFog
;
475 struct ureg_dst oPts
;
476 struct ureg_dst oCol
[4];
477 struct ureg_dst o
[PIPE_MAX_SHADER_OUTPUTS
];
478 struct ureg_dst oDepth
;
479 struct ureg_src v
[PIPE_MAX_SHADER_INPUTS
];
480 struct ureg_src vPos
;
481 struct ureg_src vFace
;
484 struct ureg_dst address
;
486 struct ureg_dst tS
[8]; /* texture stage registers */
487 struct ureg_dst tdst
; /* scratch dst if we need extra modifiers */
488 struct ureg_dst t
[5]; /* scratch TEMPs */
489 struct ureg_src vC
[2]; /* PS color in */
490 struct ureg_src vT
[8]; /* PS texcoord in */
491 struct ureg_dst rL
[NINE_MAX_LOOP_DEPTH
]; /* loop ctr */
493 unsigned num_temp
; /* Elements(regs.r) */
494 unsigned num_scratch
;
496 unsigned loop_depth_max
;
498 unsigned loop_labels
[NINE_MAX_LOOP_DEPTH
];
499 unsigned cond_labels
[NINE_MAX_COND_DEPTH
];
500 boolean loop_or_rep
[NINE_MAX_LOOP_DEPTH
]; /* true: loop, false: rep */
502 unsigned *inst_labels
; /* LABEL op */
503 unsigned num_inst_labels
;
505 unsigned sampler_targets
[NINE_MAX_SAMPLERS
]; /* TGSI_TEXTURE_x */
507 struct sm1_local_const
*lconstf
;
508 unsigned num_lconstf
;
509 struct sm1_local_const lconsti
[NINE_MAX_CONST_I
];
510 struct sm1_local_const lconstb
[NINE_MAX_CONST_B
];
512 boolean indirect_const_access
;
515 struct nine_shader_info
*info
;
517 int16_t op_info_map
[D3DSIO_BREAKP
+ 1];
520 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
521 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
523 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
526 sm1_read_semantic(struct shader_translator
*, struct sm1_semantic
*);
529 sm1_instruction_check(const struct sm1_instruction
*insn
)
531 if (insn
->opcode
== D3DSIO_CRS
)
533 if (insn
->dst
[0].mask
& NINED3DSP_WRITEMASK_3
)
541 tx_lconstf(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
544 if (index
< 0 || index
>= tx
->num_constf_allowed
) {
548 for (i
= 0; i
< tx
->num_lconstf
; ++i
) {
549 if (tx
->lconstf
[i
].idx
== index
) {
550 *src
= tx
->lconstf
[i
].reg
;
557 tx_lconsti(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
559 if (index
< 0 || index
>= tx
->num_consti_allowed
) {
563 if (tx
->lconsti
[index
].idx
== index
)
564 *src
= tx
->lconsti
[index
].reg
;
565 return tx
->lconsti
[index
].idx
== index
;
568 tx_lconstb(struct shader_translator
*tx
, struct ureg_src
*src
, INT index
)
570 if (index
< 0 || index
>= tx
->num_constb_allowed
) {
574 if (tx
->lconstb
[index
].idx
== index
)
575 *src
= tx
->lconstb
[index
].reg
;
576 return tx
->lconstb
[index
].idx
== index
;
580 tx_set_lconstf(struct shader_translator
*tx
, INT index
, float f
[4])
584 FAILURE_VOID(index
< 0 || index
>= tx
->num_constf_allowed
)
586 for (n
= 0; n
< tx
->num_lconstf
; ++n
)
587 if (tx
->lconstf
[n
].idx
== index
)
589 if (n
== tx
->num_lconstf
) {
591 tx
->lconstf
= REALLOC(tx
->lconstf
,
592 (n
+ 0) * sizeof(tx
->lconstf
[0]),
593 (n
+ 8) * sizeof(tx
->lconstf
[0]));
598 tx
->lconstf
[n
].idx
= index
;
599 tx
->lconstf
[n
].reg
= ureg_imm4f(tx
->ureg
, f
[0], f
[1], f
[2], f
[3]);
601 memcpy(tx
->lconstf
[n
].imm
.f
, f
, sizeof(tx
->lconstf
[n
].imm
.f
));
604 tx_set_lconsti(struct shader_translator
*tx
, INT index
, int i
[4])
606 FAILURE_VOID(index
< 0 || index
>= tx
->num_consti_allowed
)
607 tx
->lconsti
[index
].idx
= index
;
608 tx
->lconsti
[index
].reg
= tx
->native_integers
?
609 ureg_imm4i(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]) :
610 ureg_imm4f(tx
->ureg
, i
[0], i
[1], i
[2], i
[3]);
613 tx_set_lconstb(struct shader_translator
*tx
, INT index
, BOOL b
)
615 FAILURE_VOID(index
< 0 || index
>= tx
->num_constb_allowed
)
616 tx
->lconstb
[index
].idx
= index
;
617 tx
->lconstb
[index
].reg
= tx
->native_integers
?
618 ureg_imm1u(tx
->ureg
, b
? 0xffffffff : 0) :
619 ureg_imm1f(tx
->ureg
, b
? 1.0f
: 0.0f
);
622 static inline struct ureg_dst
623 tx_scratch(struct shader_translator
*tx
)
625 if (tx
->num_scratch
>= Elements(tx
->regs
.t
)) {
627 return tx
->regs
.t
[0];
629 if (ureg_dst_is_undef(tx
->regs
.t
[tx
->num_scratch
]))
630 tx
->regs
.t
[tx
->num_scratch
] = ureg_DECL_local_temporary(tx
->ureg
);
631 return tx
->regs
.t
[tx
->num_scratch
++];
634 static inline struct ureg_dst
635 tx_scratch_scalar(struct shader_translator
*tx
)
637 return ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
640 static inline struct ureg_src
641 tx_src_scalar(struct ureg_dst dst
)
643 struct ureg_src src
= ureg_src(dst
);
644 int c
= ffs(dst
.WriteMask
) - 1;
645 if (dst
.WriteMask
== (1 << c
))
646 src
= ureg_scalar(src
, c
);
651 tx_temp_alloc(struct shader_translator
*tx
, INT idx
)
654 if (idx
>= tx
->num_temp
) {
655 unsigned k
= tx
->num_temp
;
656 unsigned n
= idx
+ 1;
657 tx
->regs
.r
= REALLOC(tx
->regs
.r
,
658 k
* sizeof(tx
->regs
.r
[0]),
659 n
* sizeof(tx
->regs
.r
[0]));
661 tx
->regs
.r
[k
] = ureg_dst_undef();
664 if (ureg_dst_is_undef(tx
->regs
.r
[idx
]))
665 tx
->regs
.r
[idx
] = ureg_DECL_temporary(tx
->ureg
);
669 tx_addr_alloc(struct shader_translator
*tx
, INT idx
)
672 if (ureg_dst_is_undef(tx
->regs
.address
))
673 tx
->regs
.address
= ureg_DECL_address(tx
->ureg
);
674 if (ureg_dst_is_undef(tx
->regs
.a0
))
675 tx
->regs
.a0
= ureg_DECL_temporary(tx
->ureg
);
679 tx_pred_alloc(struct shader_translator
*tx
, INT idx
)
682 if (ureg_dst_is_undef(tx
->regs
.p
))
683 tx
->regs
.p
= ureg_DECL_predicate(tx
->ureg
);
686 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
687 * the projection should be applied on the texture. It doesn't
689 * The doc is very imprecise here (it says the projection is done
690 * before rasterization, thus in vs, which seems wrong since ps instructions
691 * are affected differently)
692 * For now we only apply to the ps TEX instruction and TEXBEM.
693 * Perhaps some other instructions would need it */
695 apply_ps1x_projection(struct shader_translator
*tx
, struct ureg_dst dst
,
696 struct ureg_src src
, INT idx
)
699 unsigned dim
= 1 + ((tx
->info
->projected
>> (2 * idx
)) & 3);
703 ureg_MOV(tx
->ureg
, dst
, src
);
705 tmp
= tx_scratch_scalar(tx
);
706 ureg_RCP(tx
->ureg
, tmp
, ureg_scalar(src
, dim
-1));
707 ureg_MUL(tx
->ureg
, dst
, tx_src_scalar(tmp
), src
);
712 TEX_with_ps1x_projection(struct shader_translator
*tx
, struct ureg_dst dst
,
713 unsigned target
, struct ureg_src src0
,
714 struct ureg_src src1
, INT idx
)
716 unsigned dim
= 1 + ((tx
->info
->projected
>> (2 * idx
)) & 3);
719 /* dim == 1: no projection
720 * Looks like must be disabled when it makes no
721 * sense according the texture dimensions
723 if (dim
== 1 || dim
<= target
) {
724 ureg_TEX(tx
->ureg
, dst
, target
, src0
, src1
);
725 } else if (dim
== 4) {
726 ureg_TXP(tx
->ureg
, dst
, target
, src0
, src1
);
728 tmp
= tx_scratch(tx
);
729 apply_ps1x_projection(tx
, tmp
, src0
, idx
);
730 ureg_TEX(tx
->ureg
, dst
, target
, ureg_src(tmp
), src1
);
735 tx_texcoord_alloc(struct shader_translator
*tx
, INT idx
)
738 assert(idx
>= 0 && idx
< Elements(tx
->regs
.vT
));
739 if (ureg_src_is_undef(tx
->regs
.vT
[idx
]))
740 tx
->regs
.vT
[idx
] = ureg_DECL_fs_input(tx
->ureg
, tx
->texcoord_sn
, idx
,
741 TGSI_INTERPOLATE_PERSPECTIVE
);
744 static inline unsigned *
745 tx_bgnloop(struct shader_translator
*tx
)
748 if (tx
->loop_depth_max
< tx
->loop_depth
)
749 tx
->loop_depth_max
= tx
->loop_depth
;
750 assert(tx
->loop_depth
< NINE_MAX_LOOP_DEPTH
);
751 return &tx
->loop_labels
[tx
->loop_depth
- 1];
754 static inline unsigned *
755 tx_endloop(struct shader_translator
*tx
)
757 assert(tx
->loop_depth
);
759 ureg_fixup_label(tx
->ureg
, tx
->loop_labels
[tx
->loop_depth
],
760 ureg_get_instruction_number(tx
->ureg
));
761 return &tx
->loop_labels
[tx
->loop_depth
];
764 static struct ureg_dst
765 tx_get_loopctr(struct shader_translator
*tx
, boolean loop_or_rep
)
767 const unsigned l
= tx
->loop_depth
- 1;
771 DBG("loop counter requested outside of loop\n");
772 return ureg_dst_undef();
775 if (ureg_dst_is_undef(tx
->regs
.rL
[l
])) {
776 /* loop or rep ctr creation */
777 tx
->regs
.rL
[l
] = ureg_DECL_local_temporary(tx
->ureg
);
778 tx
->loop_or_rep
[l
] = loop_or_rep
;
780 /* loop - rep - endloop - endrep not allowed */
781 assert(tx
->loop_or_rep
[l
] == loop_or_rep
);
783 return tx
->regs
.rL
[l
];
786 static struct ureg_src
787 tx_get_loopal(struct shader_translator
*tx
)
789 int loop_level
= tx
->loop_depth
- 1;
791 while (loop_level
>= 0) {
792 /* handle loop - rep - endrep - endloop case */
793 if (tx
->loop_or_rep
[loop_level
])
794 /* the value is in the loop counter y component (nine implementation) */
795 return ureg_scalar(ureg_src(tx
->regs
.rL
[loop_level
]), TGSI_SWIZZLE_Y
);
799 DBG("aL counter requested outside of loop\n");
800 return ureg_src_undef();
803 static inline unsigned *
804 tx_cond(struct shader_translator
*tx
)
806 assert(tx
->cond_depth
<= NINE_MAX_COND_DEPTH
);
808 return &tx
->cond_labels
[tx
->cond_depth
- 1];
811 static inline unsigned *
812 tx_elsecond(struct shader_translator
*tx
)
814 assert(tx
->cond_depth
);
815 return &tx
->cond_labels
[tx
->cond_depth
- 1];
819 tx_endcond(struct shader_translator
*tx
)
821 assert(tx
->cond_depth
);
823 ureg_fixup_label(tx
->ureg
, tx
->cond_labels
[tx
->cond_depth
],
824 ureg_get_instruction_number(tx
->ureg
));
827 static inline struct ureg_dst
828 nine_ureg_dst_register(unsigned file
, int index
)
830 return ureg_dst(ureg_src_register(file
, index
));
833 static struct ureg_src
834 tx_src_param(struct shader_translator
*tx
, const struct sm1_src_param
*param
)
836 struct ureg_program
*ureg
= tx
->ureg
;
844 tx_temp_alloc(tx
, param
->idx
);
845 src
= ureg_src(tx
->regs
.r
[param
->idx
]);
847 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
851 assert(param
->idx
== 0);
852 /* the address register (vs only) must be
853 * assigned before use */
854 assert(!ureg_dst_is_undef(tx
->regs
.a0
));
855 /* Round to lowest for vs1.1 (contrary to the doc), else
856 * round to nearest */
857 if (tx
->version
.major
< 2 && tx
->version
.minor
< 2)
858 ureg_ARL(ureg
, tx
->regs
.address
, ureg_src(tx
->regs
.a0
));
860 ureg_ARR(ureg
, tx
->regs
.address
, ureg_src(tx
->regs
.a0
));
861 src
= ureg_src(tx
->regs
.address
);
863 if (tx
->version
.major
< 2 && tx
->version
.minor
< 4) {
864 /* no subroutines, so should be defined */
865 src
= ureg_src(tx
->regs
.tS
[param
->idx
]);
867 tx_texcoord_alloc(tx
, param
->idx
);
868 src
= tx
->regs
.vT
[param
->idx
];
874 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
876 if (tx
->version
.major
< 3) {
878 src
= ureg_DECL_fs_input_cyl_centroid(
879 ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
,
880 TGSI_INTERPOLATE_COLOR
, 0,
881 tx
->info
->force_color_in_centroid
?
882 TGSI_INTERPOLATE_LOC_CENTROID
: 0,
885 assert(!param
->rel
); /* TODO */
886 assert(param
->idx
< Elements(tx
->regs
.v
));
887 src
= tx
->regs
.v
[param
->idx
];
891 case D3DSPR_PREDICATE
:
893 tx_pred_alloc(tx
, param
->idx
);
894 src
= ureg_src(tx
->regs
.p
);
897 assert(param
->mod
== NINED3DSPSM_NONE
);
898 assert(param
->swizzle
== NINED3DSP_NOSWIZZLE
);
900 src
= ureg_src_register(TGSI_FILE_SAMPLER
, param
->idx
);
903 assert(!param
->rel
|| IS_VS
);
905 tx
->indirect_const_access
= TRUE
;
906 if (param
->rel
|| !tx_lconstf(tx
, &src
, param
->idx
)) {
908 nine_info_mark_const_f_used(tx
->info
, param
->idx
);
909 src
= ureg_src_register(TGSI_FILE_CONSTANT
, param
->idx
);
911 if (!IS_VS
&& tx
->version
.major
< 2) {
912 /* ps 1.X clamps constants */
913 tmp
= tx_scratch(tx
);
914 ureg_MIN(ureg
, tmp
, src
, ureg_imm1f(ureg
, 1.0f
));
915 ureg_MAX(ureg
, tmp
, ureg_src(tmp
), ureg_imm1f(ureg
, -1.0f
));
922 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
923 assert(!"CONST2/3/4");
924 src
= ureg_imm1f(ureg
, 0.0f
);
926 case D3DSPR_CONSTINT
:
927 /* relative adressing only possible for float constants in vs */
929 if (!tx_lconsti(tx
, &src
, param
->idx
)) {
930 nine_info_mark_const_i_used(tx
->info
, param
->idx
);
931 src
= ureg_src_register(TGSI_FILE_CONSTANT
,
932 tx
->info
->const_i_base
+ param
->idx
);
935 case D3DSPR_CONSTBOOL
:
937 if (!tx_lconstb(tx
, &src
, param
->idx
)) {
938 char r
= param
->idx
/ 4;
939 char s
= param
->idx
& 3;
940 nine_info_mark_const_b_used(tx
->info
, param
->idx
);
941 src
= ureg_src_register(TGSI_FILE_CONSTANT
,
942 tx
->info
->const_b_base
+ r
);
943 src
= ureg_swizzle(src
, s
, s
, s
, s
);
947 if (ureg_dst_is_undef(tx
->regs
.address
))
948 tx
->regs
.address
= ureg_DECL_address(ureg
);
949 if (!tx
->native_integers
)
950 ureg_ARR(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
952 ureg_UARL(ureg
, tx
->regs
.address
, tx_get_loopal(tx
));
953 src
= ureg_src(tx
->regs
.address
);
955 case D3DSPR_MISCTYPE
:
956 switch (param
->idx
) {
957 case D3DSMO_POSITION
:
958 if (ureg_src_is_undef(tx
->regs
.vPos
)) {
959 if (tx
->wpos_is_sysval
) {
961 ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_POSITION
, 0);
964 ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_POSITION
, 0,
965 TGSI_INTERPOLATE_LINEAR
);
968 if (tx
->shift_wpos
) {
969 /* TODO: do this only once */
970 struct ureg_dst wpos
= tx_scratch(tx
);
971 ureg_SUB(ureg
, wpos
, tx
->regs
.vPos
,
972 ureg_imm4f(ureg
, 0.5f
, 0.5f
, 0.0f
, 0.0f
));
973 src
= ureg_src(wpos
);
979 if (ureg_src_is_undef(tx
->regs
.vFace
)) {
980 if (tx
->face_is_sysval_integer
) {
981 tmp
= tx_scratch(tx
);
983 ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_FACE
, 0);
985 /* convert bool to float */
986 ureg_UCMP(ureg
, tmp
, ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
),
987 ureg_imm1f(ureg
, 1), ureg_imm1f(ureg
, -1));
988 tx
->regs
.vFace
= ureg_src(tmp
);
990 tx
->regs
.vFace
= ureg_DECL_fs_input(ureg
,
991 TGSI_SEMANTIC_FACE
, 0,
992 TGSI_INTERPOLATE_CONSTANT
);
994 tx
->regs
.vFace
= ureg_scalar(tx
->regs
.vFace
, TGSI_SWIZZLE_X
);
996 src
= tx
->regs
.vFace
;
999 assert(!"invalid src D3DSMO");
1002 assert(!param
->rel
);
1004 case D3DSPR_TEMPFLOAT16
:
1007 assert(!"invalid src D3DSPR");
1010 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1012 switch (param
->mod
) {
1013 case NINED3DSPSM_DW
:
1014 tmp
= tx_scratch(tx
);
1015 /* NOTE: app is not allowed to read w with this modifier */
1016 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_3
), src
);
1017 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(W
,W
,W
,W
)));
1018 src
= ureg_src(tmp
);
1020 case NINED3DSPSM_DZ
:
1021 tmp
= tx_scratch(tx
);
1022 /* NOTE: app is not allowed to read z with this modifier */
1023 ureg_RCP(ureg
, ureg_writemask(tmp
, NINED3DSP_WRITEMASK_2
), src
);
1024 ureg_MUL(ureg
, tmp
, src
, ureg_swizzle(ureg_src(tmp
), NINE_SWIZZLE4(Z
,Z
,Z
,Z
)));
1025 src
= ureg_src(tmp
);
1031 if (param
->swizzle
!= NINED3DSP_NOSWIZZLE
)
1032 src
= ureg_swizzle(src
,
1033 (param
->swizzle
>> 0) & 0x3,
1034 (param
->swizzle
>> 2) & 0x3,
1035 (param
->swizzle
>> 4) & 0x3,
1036 (param
->swizzle
>> 6) & 0x3);
1038 switch (param
->mod
) {
1039 case NINED3DSPSM_ABS
:
1040 src
= ureg_abs(src
);
1042 case NINED3DSPSM_ABSNEG
:
1043 src
= ureg_negate(ureg_abs(src
));
1045 case NINED3DSPSM_NEG
:
1046 src
= ureg_negate(src
);
1048 case NINED3DSPSM_BIAS
:
1049 tmp
= tx_scratch(tx
);
1050 ureg_SUB(ureg
, tmp
, src
, ureg_imm1f(ureg
, 0.5f
));
1051 src
= ureg_src(tmp
);
1053 case NINED3DSPSM_BIASNEG
:
1054 tmp
= tx_scratch(tx
);
1055 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), src
);
1056 src
= ureg_src(tmp
);
1058 case NINED3DSPSM_NOT
:
1059 if (tx
->native_integers
) {
1060 tmp
= tx_scratch(tx
);
1061 ureg_NOT(ureg
, tmp
, src
);
1062 src
= ureg_src(tmp
);
1066 case NINED3DSPSM_COMP
:
1067 tmp
= tx_scratch(tx
);
1068 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), src
);
1069 src
= ureg_src(tmp
);
1071 case NINED3DSPSM_DZ
:
1072 case NINED3DSPSM_DW
:
1073 /* Already handled*/
1075 case NINED3DSPSM_SIGN
:
1076 tmp
= tx_scratch(tx
);
1077 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, 2.0f
), ureg_imm1f(ureg
, -1.0f
));
1078 src
= ureg_src(tmp
);
1080 case NINED3DSPSM_SIGNNEG
:
1081 tmp
= tx_scratch(tx
);
1082 ureg_MAD(ureg
, tmp
, src
, ureg_imm1f(ureg
, -2.0f
), ureg_imm1f(ureg
, 1.0f
));
1083 src
= ureg_src(tmp
);
1085 case NINED3DSPSM_X2
:
1086 tmp
= tx_scratch(tx
);
1087 ureg_ADD(ureg
, tmp
, src
, src
);
1088 src
= ureg_src(tmp
);
1090 case NINED3DSPSM_X2NEG
:
1091 tmp
= tx_scratch(tx
);
1092 ureg_ADD(ureg
, tmp
, src
, src
);
1093 src
= ureg_negate(ureg_src(tmp
));
1096 assert(param
->mod
== NINED3DSPSM_NONE
);
1103 static struct ureg_dst
1104 _tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1106 struct ureg_dst dst
;
1108 switch (param
->file
)
1111 assert(!param
->rel
);
1112 tx_temp_alloc(tx
, param
->idx
);
1113 dst
= tx
->regs
.r
[param
->idx
];
1115 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1117 assert(!param
->rel
);
1118 if (tx
->version
.major
< 2 && !IS_VS
) {
1119 if (ureg_dst_is_undef(tx
->regs
.tS
[param
->idx
]))
1120 tx
->regs
.tS
[param
->idx
] = ureg_DECL_temporary(tx
->ureg
);
1121 dst
= tx
->regs
.tS
[param
->idx
];
1123 if (!IS_VS
&& tx
->insn
.opcode
== D3DSIO_TEXKILL
) { /* maybe others, too */
1124 tx_texcoord_alloc(tx
, param
->idx
);
1125 dst
= ureg_dst(tx
->regs
.vT
[param
->idx
]);
1127 tx_addr_alloc(tx
, param
->idx
);
1131 case D3DSPR_RASTOUT
:
1132 assert(!param
->rel
);
1133 switch (param
->idx
) {
1135 if (ureg_dst_is_undef(tx
->regs
.oPos
))
1137 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
1138 dst
= tx
->regs
.oPos
;
1141 if (ureg_dst_is_undef(tx
->regs
.oFog
))
1143 ureg_saturate(ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_FOG
, 0));
1144 dst
= tx
->regs
.oFog
;
1147 if (ureg_dst_is_undef(tx
->regs
.oPts
))
1149 ureg_saturate(ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_PSIZE
, 0));
1150 dst
= tx
->regs
.oPts
;
1157 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1159 if (tx
->version
.major
< 3) {
1160 assert(!param
->rel
);
1161 dst
= ureg_DECL_output(tx
->ureg
, tx
->texcoord_sn
, param
->idx
);
1163 assert(!param
->rel
); /* TODO */
1164 assert(param
->idx
< Elements(tx
->regs
.o
));
1165 dst
= tx
->regs
.o
[param
->idx
];
1168 case D3DSPR_ATTROUT
: /* VS */
1169 case D3DSPR_COLOROUT
: /* PS */
1170 assert(param
->idx
>= 0 && param
->idx
< 4);
1171 assert(!param
->rel
);
1172 tx
->info
->rt_mask
|= 1 << param
->idx
;
1173 if (ureg_dst_is_undef(tx
->regs
.oCol
[param
->idx
])) {
1174 /* ps < 3: oCol[0] will have fog blending afterward */
1175 if (!IS_VS
&& tx
->version
.major
< 3 && param
->idx
== 0) {
1176 tx
->regs
.oCol
[0] = ureg_DECL_temporary(tx
->ureg
);
1178 tx
->regs
.oCol
[param
->idx
] =
1179 ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_COLOR
, param
->idx
);
1182 dst
= tx
->regs
.oCol
[param
->idx
];
1183 if (IS_VS
&& tx
->version
.major
< 3)
1184 dst
= ureg_saturate(dst
);
1186 case D3DSPR_DEPTHOUT
:
1187 assert(!param
->rel
);
1188 if (ureg_dst_is_undef(tx
->regs
.oDepth
))
1190 ureg_DECL_output_masked(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0,
1191 TGSI_WRITEMASK_Z
, 0, 1);
1192 dst
= tx
->regs
.oDepth
; /* XXX: must write .z component */
1194 case D3DSPR_PREDICATE
:
1195 assert(!param
->rel
);
1196 tx_pred_alloc(tx
, param
->idx
);
1199 case D3DSPR_TEMPFLOAT16
:
1200 DBG("unhandled D3DSPR: %u\n", param
->file
);
1203 assert(!"invalid dst D3DSPR");
1207 dst
= ureg_dst_indirect(dst
, tx_src_param(tx
, param
->rel
));
1209 if (param
->mask
!= NINED3DSP_WRITEMASK_ALL
)
1210 dst
= ureg_writemask(dst
, param
->mask
);
1211 if (param
->mod
& NINED3DSPDM_SATURATE
)
1212 dst
= ureg_saturate(dst
);
1217 static struct ureg_dst
1218 tx_dst_param(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1221 tx
->regs
.tdst
= ureg_writemask(tx_scratch(tx
), param
->mask
);
1222 return tx
->regs
.tdst
;
1224 return _tx_dst_param(tx
, param
);
1228 tx_apply_dst0_modifiers(struct shader_translator
*tx
)
1230 struct ureg_dst rdst
;
1233 if (!tx
->insn
.ndst
|| !tx
->insn
.dst
[0].shift
|| tx
->insn
.opcode
== D3DSIO_TEXKILL
)
1235 rdst
= _tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1237 assert(rdst
.File
!= TGSI_FILE_ADDRESS
); /* this probably isn't possible */
1239 if (tx
->insn
.dst
[0].shift
< 0)
1240 f
= 1.0f
/ (1 << -tx
->insn
.dst
[0].shift
);
1242 f
= 1 << tx
->insn
.dst
[0].shift
;
1244 ureg_MUL(tx
->ureg
, rdst
, ureg_src(tx
->regs
.tdst
), ureg_imm1f(tx
->ureg
, f
));
1247 static struct ureg_src
1248 tx_dst_param_as_src(struct shader_translator
*tx
, const struct sm1_dst_param
*param
)
1250 struct ureg_src src
;
1252 assert(!param
->shift
);
1253 assert(!(param
->mod
& NINED3DSPDM_SATURATE
));
1255 switch (param
->file
) {
1258 src
= ureg_src_register(TGSI_FILE_INPUT
, param
->idx
);
1260 assert(!param
->rel
);
1261 assert(param
->idx
< Elements(tx
->regs
.v
));
1262 src
= tx
->regs
.v
[param
->idx
];
1266 src
= ureg_src(tx_dst_param(tx
, param
));
1270 src
= ureg_src_indirect(src
, tx_src_param(tx
, param
->rel
));
1273 WARN("mask is 0, using identity swizzle\n");
1275 if (param
->mask
&& param
->mask
!= NINED3DSP_WRITEMASK_ALL
) {
1279 for (n
= 0, c
= 0; c
< 4; ++c
)
1280 if (param
->mask
& (1 << c
))
1283 for (c
= n
; c
< 4; ++c
)
1285 src
= ureg_swizzle(src
, s
[0], s
[1], s
[2], s
[3]);
1291 NineTranslateInstruction_Mkxn(struct shader_translator
*tx
, const unsigned k
, const unsigned n
)
1293 struct ureg_program
*ureg
= tx
->ureg
;
1294 struct ureg_dst dst
;
1295 struct ureg_src src
[2];
1296 struct sm1_src_param
*src_mat
= &tx
->insn
.src
[1];
1299 dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1300 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1302 for (i
= 0; i
< n
; i
++)
1304 const unsigned m
= (1 << i
);
1306 src
[1] = tx_src_param(tx
, src_mat
);
1309 if (!(dst
.WriteMask
& m
))
1312 /* XXX: src == dst case ? */
1316 ureg_DP3(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1319 ureg_DP4(ureg
, ureg_writemask(dst
, m
), src
[0], src
[1]);
1322 DBG("invalid operation: M%ux%u\n", m
, n
);
1330 #define VNOTSUPPORTED 0, 0
1331 #define V(maj, min) (((maj) << 8) | (min))
1333 static inline const char *
1334 d3dsio_to_string( unsigned opcode
)
1336 static const char *names
[] = {
1436 if (opcode
< Elements(names
)) return names
[opcode
];
1439 case D3DSIO_PHASE
: return "PHASE";
1440 case D3DSIO_COMMENT
: return "COMMENT";
1441 case D3DSIO_END
: return "END";
1447 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1448 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1449 (inst).vert_version.max | \
1450 (inst).frag_version.min | \
1451 (inst).frag_version.max)
1453 #define SPECIAL(name) \
1454 NineTranslateInstruction_##name
1456 #define DECL_SPECIAL(name) \
1458 NineTranslateInstruction_##name( struct shader_translator *tx )
1461 NineTranslateInstruction_Generic(struct shader_translator
*);
1465 return NineTranslateInstruction_Mkxn(tx
, 4, 4);
1470 return NineTranslateInstruction_Mkxn(tx
, 4, 3);
1475 return NineTranslateInstruction_Mkxn(tx
, 3, 4);
1480 return NineTranslateInstruction_Mkxn(tx
, 3, 3);
1485 return NineTranslateInstruction_Mkxn(tx
, 3, 2);
1490 ureg_CMP(tx
->ureg
, tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1491 tx_src_param(tx
, &tx
->insn
.src
[0]),
1492 tx_src_param(tx
, &tx
->insn
.src
[2]),
1493 tx_src_param(tx
, &tx
->insn
.src
[1]));
1499 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1500 struct ureg_dst cgt
;
1501 struct ureg_src cnd
;
1503 /* the coissue flag was a tip for compilers to advise to
1504 * execute two operations at the same time, in cases
1505 * the two executions had same dst with different channels.
1506 * It has no effect on current hw. However it seems CND
1507 * is affected. The handling of this very specific case
1508 * handled below mimick wine behaviour */
1509 if (tx
->insn
.coissue
&& tx
->version
.major
== 1 && tx
->version
.minor
< 4 && tx
->insn
.dst
[0].mask
!= NINED3DSP_WRITEMASK_3
) {
1511 dst
, tx_src_param(tx
, &tx
->insn
.src
[1]));
1515 cnd
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1516 cgt
= tx_scratch(tx
);
1518 if (tx
->version
.major
== 1 && tx
->version
.minor
< 4)
1519 cnd
= ureg_scalar(cnd
, TGSI_SWIZZLE_W
);
1521 ureg_SGT(tx
->ureg
, cgt
, cnd
, ureg_imm1f(tx
->ureg
, 0.5f
));
1523 ureg_CMP(tx
->ureg
, dst
, ureg_negate(ureg_src(cgt
)),
1524 tx_src_param(tx
, &tx
->insn
.src
[1]),
1525 tx_src_param(tx
, &tx
->insn
.src
[2]));
1531 assert(tx
->insn
.src
[0].idx
< tx
->num_inst_labels
);
1532 ureg_CAL(tx
->ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1536 DECL_SPECIAL(CALLNZ
)
1538 struct ureg_program
*ureg
= tx
->ureg
;
1539 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1541 if (!tx
->native_integers
)
1542 ureg_IF(ureg
, src
, tx_cond(tx
));
1544 ureg_UIF(ureg
, src
, tx_cond(tx
));
1545 ureg_CAL(ureg
, &tx
->inst_labels
[tx
->insn
.src
[0].idx
]);
1553 struct ureg_program
*ureg
= tx
->ureg
;
1555 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[1]);
1556 struct ureg_dst ctr
;
1557 struct ureg_dst tmp
;
1558 struct ureg_src ctrx
;
1560 label
= tx_bgnloop(tx
);
1561 ctr
= tx_get_loopctr(tx
, TRUE
);
1562 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1564 /* src: num_iterations - start_value of al - step for al - 0 */
1565 ureg_MOV(ureg
, ctr
, src
);
1566 ureg_BGNLOOP(tx
->ureg
, label
);
1567 tmp
= tx_scratch_scalar(tx
);
1568 /* Initially ctr.x contains the number of iterations.
1569 * ctr.y will contain the updated value of al.
1570 * We decrease ctr.x at the end of every iteration,
1571 * and stop when it reaches 0. */
1573 if (!tx
->native_integers
) {
1574 /* case src and ctr contain floats */
1575 /* to avoid precision issue, we stop when ctr <= 0.5 */
1576 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1577 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1579 /* case src and ctr contain integers */
1580 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1581 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1595 DECL_SPECIAL(ENDLOOP
)
1597 struct ureg_program
*ureg
= tx
->ureg
;
1598 struct ureg_dst ctr
= tx_get_loopctr(tx
, TRUE
);
1599 struct ureg_dst dst_ctrx
, dst_al
;
1600 struct ureg_src src_ctr
, al_counter
;
1602 dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1603 dst_al
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_1
);
1604 src_ctr
= ureg_src(ctr
);
1605 al_counter
= ureg_scalar(src_ctr
, TGSI_SWIZZLE_Z
);
1608 * ctr.y (aL) += step */
1609 if (!tx
->native_integers
) {
1610 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1611 ureg_ADD(ureg
, dst_al
, src_ctr
, al_counter
);
1613 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1614 ureg_UADD(ureg
, dst_al
, src_ctr
, al_counter
);
1616 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1622 unsigned k
= tx
->num_inst_labels
;
1623 unsigned n
= tx
->insn
.src
[0].idx
;
1626 tx
->inst_labels
= REALLOC(tx
->inst_labels
,
1627 k
* sizeof(tx
->inst_labels
[0]),
1628 n
* sizeof(tx
->inst_labels
[0]));
1630 tx
->inst_labels
[n
] = ureg_get_instruction_number(tx
->ureg
);
1634 DECL_SPECIAL(SINCOS
)
1636 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
1637 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1639 assert(!(dst
.WriteMask
& 0xc));
1641 dst
.WriteMask
&= TGSI_WRITEMASK_XY
; /* z undefined, w untouched */
1642 ureg_SCS(tx
->ureg
, dst
, src
);
1649 tx_dst_param(tx
, &tx
->insn
.dst
[0]),
1650 tx_src_param(tx
, &tx
->insn
.src
[0]));
1656 struct ureg_program
*ureg
= tx
->ureg
;
1658 struct ureg_src rep
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1659 struct ureg_dst ctr
;
1660 struct ureg_dst tmp
;
1661 struct ureg_src ctrx
;
1663 label
= tx_bgnloop(tx
);
1664 ctr
= ureg_writemask(tx_get_loopctr(tx
, FALSE
), NINED3DSP_WRITEMASK_0
);
1665 ctrx
= ureg_scalar(ureg_src(ctr
), TGSI_SWIZZLE_X
);
1667 /* NOTE: rep must be constant, so we don't have to save the count */
1668 assert(rep
.File
== TGSI_FILE_CONSTANT
|| rep
.File
== TGSI_FILE_IMMEDIATE
);
1670 /* rep: num_iterations - 0 - 0 - 0 */
1671 ureg_MOV(ureg
, ctr
, rep
);
1672 ureg_BGNLOOP(ureg
, label
);
1673 tmp
= tx_scratch_scalar(tx
);
1674 /* Initially ctr.x contains the number of iterations.
1675 * We decrease ctr.x at the end of every iteration,
1676 * and stop when it reaches 0. */
1678 if (!tx
->native_integers
) {
1679 /* case src and ctr contain floats */
1680 /* to avoid precision issue, we stop when ctr <= 0.5 */
1681 ureg_SGE(ureg
, tmp
, ureg_imm1f(ureg
, 0.5f
), ctrx
);
1682 ureg_IF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1684 /* case src and ctr contain integers */
1685 ureg_ISGE(ureg
, tmp
, ureg_imm1i(ureg
, 0), ctrx
);
1686 ureg_UIF(ureg
, tx_src_scalar(tmp
), tx_cond(tx
));
1695 DECL_SPECIAL(ENDREP
)
1697 struct ureg_program
*ureg
= tx
->ureg
;
1698 struct ureg_dst ctr
= tx_get_loopctr(tx
, FALSE
);
1699 struct ureg_dst dst_ctrx
= ureg_writemask(ctr
, NINED3DSP_WRITEMASK_0
);
1700 struct ureg_src src_ctr
= ureg_src(ctr
);
1703 if (!tx
->native_integers
)
1704 ureg_ADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1f(ureg
, -1.0f
));
1706 ureg_UADD(ureg
, dst_ctrx
, src_ctr
, ureg_imm1i(ureg
, -1));
1708 ureg_ENDLOOP(tx
->ureg
, tx_endloop(tx
));
1715 ureg_ENDIF(tx
->ureg
);
1721 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
1723 if (tx
->native_integers
&& tx
->insn
.src
[0].file
== D3DSPR_CONSTBOOL
)
1724 ureg_UIF(tx
->ureg
, src
, tx_cond(tx
));
1726 ureg_IF(tx
->ureg
, src
, tx_cond(tx
));
1731 static inline unsigned
1732 sm1_insn_flags_to_tgsi_setop(BYTE flags
)
1735 case NINED3DSHADER_REL_OP_GT
: return TGSI_OPCODE_SGT
;
1736 case NINED3DSHADER_REL_OP_EQ
: return TGSI_OPCODE_SEQ
;
1737 case NINED3DSHADER_REL_OP_GE
: return TGSI_OPCODE_SGE
;
1738 case NINED3DSHADER_REL_OP_LT
: return TGSI_OPCODE_SLT
;
1739 case NINED3DSHADER_REL_OP_NE
: return TGSI_OPCODE_SNE
;
1740 case NINED3DSHADER_REL_OP_LE
: return TGSI_OPCODE_SLE
;
1742 assert(!"invalid comparison flags");
1743 return TGSI_OPCODE_SGT
;
1749 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1750 struct ureg_src src
[2];
1751 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1752 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1753 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1754 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2);
1755 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1761 ureg_ELSE(tx
->ureg
, tx_elsecond(tx
));
1765 DECL_SPECIAL(BREAKC
)
1767 const unsigned cmp_op
= sm1_insn_flags_to_tgsi_setop(tx
->insn
.flags
);
1768 struct ureg_src src
[2];
1769 struct ureg_dst tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_X
);
1770 src
[0] = tx_src_param(tx
, &tx
->insn
.src
[0]);
1771 src
[1] = tx_src_param(tx
, &tx
->insn
.src
[1]);
1772 ureg_insn(tx
->ureg
, cmp_op
, &tmp
, 1, src
, 2);
1773 ureg_IF(tx
->ureg
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), tx_cond(tx
));
1776 ureg_ENDIF(tx
->ureg
);
1780 static const char *sm1_declusage_names
[] =
1782 [D3DDECLUSAGE_POSITION
] = "POSITION",
1783 [D3DDECLUSAGE_BLENDWEIGHT
] = "BLENDWEIGHT",
1784 [D3DDECLUSAGE_BLENDINDICES
] = "BLENDINDICES",
1785 [D3DDECLUSAGE_NORMAL
] = "NORMAL",
1786 [D3DDECLUSAGE_PSIZE
] = "PSIZE",
1787 [D3DDECLUSAGE_TEXCOORD
] = "TEXCOORD",
1788 [D3DDECLUSAGE_TANGENT
] = "TANGENT",
1789 [D3DDECLUSAGE_BINORMAL
] = "BINORMAL",
1790 [D3DDECLUSAGE_TESSFACTOR
] = "TESSFACTOR",
1791 [D3DDECLUSAGE_POSITIONT
] = "POSITIONT",
1792 [D3DDECLUSAGE_COLOR
] = "COLOR",
1793 [D3DDECLUSAGE_FOG
] = "FOG",
1794 [D3DDECLUSAGE_DEPTH
] = "DEPTH",
1795 [D3DDECLUSAGE_SAMPLE
] = "SAMPLE"
1798 static inline unsigned
1799 sm1_to_nine_declusage(struct sm1_semantic
*dcl
)
1801 return nine_d3d9_to_nine_declusage(dcl
->usage
, dcl
->usage_idx
);
1805 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic
*sem
,
1807 struct sm1_semantic
*dcl
)
1809 BYTE index
= dcl
->usage_idx
;
1811 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1812 * we match to a TGSI_SEMANTIC_GENERIC with index.
1814 * The index can be anything UINT16 and usage_idx is BYTE,
1815 * so we can fit everything. It doesn't matter if indices
1816 * are close together or low.
1819 * POSITION >= 1: 10 * index + 6
1820 * COLOR >= 2: 10 * (index-1) + 7
1821 * TEXCOORD[0..15]: index
1822 * BLENDWEIGHT: 10 * index + 18
1823 * BLENDINDICES: 10 * index + 19
1824 * NORMAL: 10 * index + 20
1825 * TANGENT: 10 * index + 21
1826 * BINORMAL: 10 * index + 22
1827 * TESSFACTOR: 10 * index + 23
1830 switch (dcl
->usage
) {
1831 case D3DDECLUSAGE_POSITION
:
1832 case D3DDECLUSAGE_POSITIONT
:
1833 case D3DDECLUSAGE_DEPTH
:
1835 sem
->Name
= TGSI_SEMANTIC_POSITION
;
1838 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1839 sem
->Index
= 10 * index
+ 6;
1842 case D3DDECLUSAGE_COLOR
:
1844 sem
->Name
= TGSI_SEMANTIC_COLOR
;
1847 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1848 sem
->Index
= 10 * (index
-1) + 7;
1851 case D3DDECLUSAGE_FOG
:
1853 sem
->Name
= TGSI_SEMANTIC_FOG
;
1856 case D3DDECLUSAGE_PSIZE
:
1858 sem
->Name
= TGSI_SEMANTIC_PSIZE
;
1861 case D3DDECLUSAGE_TEXCOORD
:
1863 if (index
< 8 && tc
)
1864 sem
->Name
= TGSI_SEMANTIC_TEXCOORD
;
1866 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1869 case D3DDECLUSAGE_BLENDWEIGHT
:
1870 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1871 sem
->Index
= 10 * index
+ 18;
1873 case D3DDECLUSAGE_BLENDINDICES
:
1874 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1875 sem
->Index
= 10 * index
+ 19;
1877 case D3DDECLUSAGE_NORMAL
:
1878 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1879 sem
->Index
= 10 * index
+ 20;
1881 case D3DDECLUSAGE_TANGENT
:
1882 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1883 sem
->Index
= 10 * index
+ 21;
1885 case D3DDECLUSAGE_BINORMAL
:
1886 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1887 sem
->Index
= 10 * index
+ 22;
1889 case D3DDECLUSAGE_TESSFACTOR
:
1890 sem
->Name
= TGSI_SEMANTIC_GENERIC
;
1891 sem
->Index
= 10 * index
+ 23;
1893 case D3DDECLUSAGE_SAMPLE
:
1894 sem
->Name
= TGSI_SEMANTIC_COUNT
;
1898 unreachable(!"Invalid DECLUSAGE.");
1903 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
1904 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
1905 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
1906 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
1907 static inline unsigned
1908 d3dstt_to_tgsi_tex(BYTE sampler_type
)
1910 switch (sampler_type
) {
1911 case NINED3DSTT_1D
: return TGSI_TEXTURE_1D
;
1912 case NINED3DSTT_2D
: return TGSI_TEXTURE_2D
;
1913 case NINED3DSTT_VOLUME
: return TGSI_TEXTURE_3D
;
1914 case NINED3DSTT_CUBE
: return TGSI_TEXTURE_CUBE
;
1917 return TGSI_TEXTURE_UNKNOWN
;
1920 static inline unsigned
1921 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type
)
1923 switch (sampler_type
) {
1924 case NINED3DSTT_1D
: return TGSI_TEXTURE_SHADOW1D
;
1925 case NINED3DSTT_2D
: return TGSI_TEXTURE_SHADOW2D
;
1926 case NINED3DSTT_VOLUME
:
1927 case NINED3DSTT_CUBE
:
1930 return TGSI_TEXTURE_UNKNOWN
;
1933 static inline unsigned
1934 ps1x_sampler_type(const struct nine_shader_info
*info
, unsigned stage
)
1936 switch ((info
->sampler_ps1xtypes
>> (stage
* 2)) & 0x3) {
1937 case 1: return TGSI_TEXTURE_1D
;
1938 case 0: return TGSI_TEXTURE_2D
;
1939 case 3: return TGSI_TEXTURE_3D
;
1941 return TGSI_TEXTURE_CUBE
;
1946 sm1_sampler_type_name(BYTE sampler_type
)
1948 switch (sampler_type
) {
1949 case NINED3DSTT_1D
: return "1D";
1950 case NINED3DSTT_2D
: return "2D";
1951 case NINED3DSTT_VOLUME
: return "VOLUME";
1952 case NINED3DSTT_CUBE
: return "CUBE";
1954 return "(D3DSTT_?)";
1958 static inline unsigned
1959 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic
*sem
)
1961 switch (sem
->Name
) {
1962 case TGSI_SEMANTIC_POSITION
:
1963 case TGSI_SEMANTIC_NORMAL
:
1964 return TGSI_INTERPOLATE_LINEAR
;
1965 case TGSI_SEMANTIC_BCOLOR
:
1966 case TGSI_SEMANTIC_COLOR
:
1967 return TGSI_INTERPOLATE_COLOR
;
1968 case TGSI_SEMANTIC_FOG
:
1969 case TGSI_SEMANTIC_GENERIC
:
1970 case TGSI_SEMANTIC_TEXCOORD
:
1971 case TGSI_SEMANTIC_CLIPDIST
:
1972 case TGSI_SEMANTIC_CLIPVERTEX
:
1973 return TGSI_INTERPOLATE_PERSPECTIVE
;
1974 case TGSI_SEMANTIC_EDGEFLAG
:
1975 case TGSI_SEMANTIC_FACE
:
1976 case TGSI_SEMANTIC_INSTANCEID
:
1977 case TGSI_SEMANTIC_PCOORD
:
1978 case TGSI_SEMANTIC_PRIMID
:
1979 case TGSI_SEMANTIC_PSIZE
:
1980 case TGSI_SEMANTIC_VERTEXID
:
1981 return TGSI_INTERPOLATE_CONSTANT
;
1984 return TGSI_INTERPOLATE_CONSTANT
;
1990 struct ureg_program
*ureg
= tx
->ureg
;
1993 struct tgsi_declaration_semantic tgsi
;
1994 struct sm1_semantic sem
;
1995 sm1_read_semantic(tx
, &sem
);
1997 is_input
= sem
.reg
.file
== D3DSPR_INPUT
;
1999 sem
.usage
== D3DDECLUSAGE_SAMPLE
|| sem
.reg
.file
== D3DSPR_SAMPLER
;
2002 sm1_dump_dst_param(&sem
.reg
);
2004 DUMP(" %s\n", sm1_sampler_type_name(sem
.sampler_type
));
2006 if (tx
->version
.major
>= 3)
2007 DUMP(" %s%i\n", sm1_declusage_names
[sem
.usage
], sem
.usage_idx
);
2009 if (sem
.usage
| sem
.usage_idx
)
2010 DUMP(" %u[%u]\n", sem
.usage
, sem
.usage_idx
);
2015 const unsigned m
= 1 << sem
.reg
.idx
;
2016 ureg_DECL_sampler(ureg
, sem
.reg
.idx
);
2017 tx
->info
->sampler_mask
|= m
;
2018 tx
->sampler_targets
[sem
.reg
.idx
] = (tx
->info
->sampler_mask_shadow
& m
) ?
2019 d3dstt_to_tgsi_tex_shadow(sem
.sampler_type
) :
2020 d3dstt_to_tgsi_tex(sem
.sampler_type
);
2024 sm1_declusage_to_tgsi(&tgsi
, tx
->want_texcoord
, &sem
);
2027 /* linkage outside of shader with vertex declaration */
2028 ureg_DECL_vs_input(ureg
, sem
.reg
.idx
);
2029 assert(sem
.reg
.idx
< Elements(tx
->info
->input_map
));
2030 tx
->info
->input_map
[sem
.reg
.idx
] = sm1_to_nine_declusage(&sem
);
2031 tx
->info
->num_inputs
= sem
.reg
.idx
+ 1;
2032 /* NOTE: preserving order in case of indirect access */
2034 if (tx
->version
.major
>= 3) {
2035 /* SM2 output semantic determined by file */
2036 assert(sem
.reg
.mask
!= 0);
2037 if (sem
.usage
== D3DDECLUSAGE_POSITIONT
)
2038 tx
->info
->position_t
= TRUE
;
2039 assert(sem
.reg
.idx
< Elements(tx
->regs
.o
));
2040 tx
->regs
.o
[sem
.reg
.idx
] = ureg_DECL_output_masked(
2041 ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
, 0, 1);
2043 if (tgsi
.Name
== TGSI_SEMANTIC_PSIZE
)
2044 tx
->regs
.oPts
= tx
->regs
.o
[sem
.reg
.idx
];
2047 if (is_input
&& tx
->version
.major
>= 3) {
2048 unsigned interp_location
= 0;
2049 /* SM3 only, SM2 input semantic determined by file */
2050 assert(sem
.reg
.idx
< Elements(tx
->regs
.v
));
2051 if (sem
.reg
.mod
& NINED3DSPDM_CENTROID
||
2052 (tgsi
.Name
== TGSI_SEMANTIC_COLOR
&& tx
->info
->force_color_in_centroid
))
2053 interp_location
= TGSI_INTERPOLATE_LOC_CENTROID
;
2054 tx
->regs
.v
[sem
.reg
.idx
] = ureg_DECL_fs_input_cyl_centroid(
2055 ureg
, tgsi
.Name
, tgsi
.Index
,
2056 nine_tgsi_to_interp_mode(&tgsi
),
2058 interp_location
, 0, 1);
2060 if (!is_input
&& 0) { /* declare in COLOROUT/DEPTHOUT case */
2061 /* FragColor or FragDepth */
2062 assert(sem
.reg
.mask
!= 0);
2063 ureg_DECL_output_masked(ureg
, tgsi
.Name
, tgsi
.Index
, sem
.reg
.mask
,
2072 tx_set_lconstf(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.f
);
2078 tx_set_lconstb(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.b
);
2084 tx_set_lconsti(tx
, tx
->insn
.dst
[0].idx
, tx
->insn
.src
[0].imm
.i
);
2090 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2091 struct ureg_src src
[2] = {
2092 tx_src_param(tx
, &tx
->insn
.src
[0]),
2093 tx_src_param(tx
, &tx
->insn
.src
[1])
2095 ureg_POW(tx
->ureg
, dst
, ureg_abs(src
[0]), src
[1]);
2101 struct ureg_program
*ureg
= tx
->ureg
;
2102 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2103 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2104 struct ureg_dst tmp
= tx_scratch(tx
);
2105 ureg_RSQ(ureg
, tmp
, ureg_abs(src
));
2106 ureg_MIN(ureg
, dst
, ureg_imm1f(ureg
, FLT_MAX
), ureg_src(tmp
));
2112 struct ureg_program
*ureg
= tx
->ureg
;
2113 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2114 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2115 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2116 ureg_LG2(ureg
, tmp
, ureg_abs(src
));
2117 ureg_MAX(ureg
, dst
, ureg_imm1f(ureg
, -FLT_MAX
), tx_src_scalar(tmp
));
2123 struct ureg_program
*ureg
= tx
->ureg
;
2124 struct ureg_dst tmp
= tx_scratch(tx
);
2125 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2126 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2127 ureg_LIT(ureg
, tmp
, src
);
2128 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2129 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2130 * it 0^0 if src.w=0, which value is driver dependent. */
2131 ureg_CMP(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
),
2132 ureg_negate(ureg_scalar(src
, TGSI_SWIZZLE_Y
)),
2133 ureg_src(tmp
), ureg_imm1f(ureg
, 0.0f
));
2134 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XYW
), ureg_src(tmp
));
2140 struct ureg_program
*ureg
= tx
->ureg
;
2141 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2142 struct ureg_src nrm
= tx_src_scalar(tmp
);
2143 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2144 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2145 ureg_DP3(ureg
, tmp
, src
, src
);
2146 ureg_RSQ(ureg
, tmp
, nrm
);
2147 ureg_MIN(ureg
, tmp
, ureg_imm1f(ureg
, FLT_MAX
), nrm
);
2148 ureg_MUL(ureg
, dst
, src
, nrm
);
2152 DECL_SPECIAL(DP2ADD
)
2154 struct ureg_dst tmp
= tx_scratch_scalar(tx
);
2155 struct ureg_src dp2
= tx_src_scalar(tmp
);
2156 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2157 struct ureg_src src
[3];
2159 for (i
= 0; i
< 3; ++i
)
2160 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
2161 assert_replicate_swizzle(&src
[2]);
2163 ureg_DP2(tx
->ureg
, tmp
, src
[0], src
[1]);
2164 ureg_ADD(tx
->ureg
, dst
, src
[2], dp2
);
2169 DECL_SPECIAL(TEXCOORD
)
2171 struct ureg_program
*ureg
= tx
->ureg
;
2172 const unsigned s
= tx
->insn
.dst
[0].idx
;
2173 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2175 tx_texcoord_alloc(tx
, s
);
2176 ureg_MOV(ureg
, ureg_writemask(ureg_saturate(dst
), TGSI_WRITEMASK_XYZ
), tx
->regs
.vT
[s
]);
2177 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(tx
->ureg
, 1.0f
));
2182 DECL_SPECIAL(TEXCOORD_ps14
)
2184 struct ureg_program
*ureg
= tx
->ureg
;
2185 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2186 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2188 assert(tx
->insn
.src
[0].file
== D3DSPR_TEXTURE
);
2190 ureg_MOV(ureg
, dst
, src
);
2195 DECL_SPECIAL(TEXKILL
)
2197 struct ureg_src reg
;
2199 if (tx
->version
.major
> 1 || tx
->version
.minor
> 3) {
2200 reg
= tx_dst_param_as_src(tx
, &tx
->insn
.dst
[0]);
2202 tx_texcoord_alloc(tx
, tx
->insn
.dst
[0].idx
);
2203 reg
= tx
->regs
.vT
[tx
->insn
.dst
[0].idx
];
2205 if (tx
->version
.major
< 2)
2206 reg
= ureg_swizzle(reg
, NINE_SWIZZLE4(X
,Y
,Z
,Z
));
2207 ureg_KILL_IF(tx
->ureg
, reg
);
2212 DECL_SPECIAL(TEXBEM
)
2214 struct ureg_program
*ureg
= tx
->ureg
;
2215 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2216 struct ureg_dst tmp
, tmp2
, texcoord
;
2217 struct ureg_src sample
, m00
, m01
, m10
, m11
;
2218 struct ureg_src bumpenvlscale
, bumpenvloffset
;
2219 const int m
= tx
->insn
.dst
[0].idx
;
2220 const int n
= tx
->insn
.src
[0].idx
;
2222 assert(tx
->version
.major
== 1);
2224 sample
= ureg_DECL_sampler(ureg
, m
);
2225 tx
->info
->sampler_mask
|= 1 << m
;
2227 tx_texcoord_alloc(tx
, m
);
2229 tmp
= tx_scratch(tx
);
2230 tmp2
= tx_scratch(tx
);
2231 texcoord
= tx_scratch(tx
);
2239 nine_info_mark_const_f_used(tx
->info
, 8 + 8 + m
/2);
2240 m00
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, X
);
2241 m01
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Y
);
2242 m10
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Z
);
2243 m11
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, W
);
2245 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2247 bumpenvlscale
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, X
);
2248 bumpenvloffset
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, Y
);
2250 bumpenvlscale
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, Z
);
2251 bumpenvloffset
= NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m
/ 2, W
);
2254 apply_ps1x_projection(tx
, texcoord
, tx
->regs
.vT
[m
], m
);
2256 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2257 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m00
,
2258 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), X
), ureg_src(texcoord
));
2259 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2260 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m10
,
2261 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), Y
),
2262 NINE_APPLY_SWIZZLE(ureg_src(tmp
), X
));
2264 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2265 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m01
,
2266 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), X
), ureg_src(texcoord
));
2267 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2268 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m11
,
2269 NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), Y
),
2270 NINE_APPLY_SWIZZLE(ureg_src(tmp
), Y
));
2272 /* Now the texture coordinates are in tmp.xy */
2274 if (tx
->insn
.opcode
== D3DSIO_TEXBEM
) {
2275 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2276 } else if (tx
->insn
.opcode
== D3DSIO_TEXBEML
) {
2277 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2278 ureg_TEX(ureg
, tmp
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2279 ureg_MAD(ureg
, tmp2
, NINE_APPLY_SWIZZLE(ureg_src(tx
->regs
.tS
[n
]), Z
),
2280 bumpenvlscale
, bumpenvloffset
);
2281 ureg_MUL(ureg
, dst
, ureg_src(tmp
), ureg_src(tmp2
));
2284 tx
->info
->bumpenvmat_needed
= 1;
2289 DECL_SPECIAL(TEXREG2AR
)
2291 struct ureg_program
*ureg
= tx
->ureg
;
2292 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2293 struct ureg_src sample
;
2294 const int m
= tx
->insn
.dst
[0].idx
;
2295 const int n
= tx
->insn
.src
[0].idx
;
2296 assert(m
>= 0 && m
> n
);
2298 sample
= ureg_DECL_sampler(ureg
, m
);
2299 tx
->info
->sampler_mask
|= 1 << m
;
2300 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(ureg_src(tx
->regs
.tS
[n
]), NINE_SWIZZLE4(W
,X
,X
,X
)), sample
);
2305 DECL_SPECIAL(TEXREG2GB
)
2307 struct ureg_program
*ureg
= tx
->ureg
;
2308 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2309 struct ureg_src sample
;
2310 const int m
= tx
->insn
.dst
[0].idx
;
2311 const int n
= tx
->insn
.src
[0].idx
;
2312 assert(m
>= 0 && m
> n
);
2314 sample
= ureg_DECL_sampler(ureg
, m
);
2315 tx
->info
->sampler_mask
|= 1 << m
;
2316 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_swizzle(ureg_src(tx
->regs
.tS
[n
]), NINE_SWIZZLE4(Y
,Z
,Z
,Z
)), sample
);
2321 DECL_SPECIAL(TEXM3x2PAD
)
2323 return D3D_OK
; /* this is just padding */
2326 DECL_SPECIAL(TEXM3x2TEX
)
2328 struct ureg_program
*ureg
= tx
->ureg
;
2329 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2330 struct ureg_src sample
;
2331 const int m
= tx
->insn
.dst
[0].idx
- 1;
2332 const int n
= tx
->insn
.src
[0].idx
;
2333 assert(m
>= 0 && m
> n
);
2335 tx_texcoord_alloc(tx
, m
);
2336 tx_texcoord_alloc(tx
, m
+1);
2338 /* performs the matrix multiplication */
2339 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2340 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2342 sample
= ureg_DECL_sampler(ureg
, m
+ 1);
2343 tx
->info
->sampler_mask
|= 1 << (m
+ 1);
2344 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 1), ureg_src(dst
), sample
);
2349 DECL_SPECIAL(TEXM3x3PAD
)
2351 return D3D_OK
; /* this is just padding */
2354 DECL_SPECIAL(TEXM3x3SPEC
)
2356 struct ureg_program
*ureg
= tx
->ureg
;
2357 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2358 struct ureg_src E
= tx_src_param(tx
, &tx
->insn
.src
[1]);
2359 struct ureg_src sample
;
2360 struct ureg_dst tmp
;
2361 const int m
= tx
->insn
.dst
[0].idx
- 2;
2362 const int n
= tx
->insn
.src
[0].idx
;
2363 assert(m
>= 0 && m
> n
);
2365 tx_texcoord_alloc(tx
, m
);
2366 tx_texcoord_alloc(tx
, m
+1);
2367 tx_texcoord_alloc(tx
, m
+2);
2369 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2370 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2371 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], ureg_src(tx
->regs
.tS
[n
]));
2373 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2374 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2375 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2377 /* At this step, dst = N = (u', w', z').
2378 * We want dst to be the texture sampled at (u'', w'', z''), with
2379 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2380 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2381 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2382 /* at this step tmp.x = 1/N.N */
2383 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), E
);
2384 /* at this step tmp.y = N.E */
2385 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2386 /* at this step tmp.x = N.E/N.N */
2387 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2388 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2389 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2390 ureg_SUB(ureg
, tmp
, ureg_src(tmp
), E
);
2391 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2396 DECL_SPECIAL(TEXREG2RGB
)
2398 struct ureg_program
*ureg
= tx
->ureg
;
2399 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2400 struct ureg_src sample
;
2401 const int m
= tx
->insn
.dst
[0].idx
;
2402 const int n
= tx
->insn
.src
[0].idx
;
2403 assert(m
>= 0 && m
> n
);
2405 sample
= ureg_DECL_sampler(ureg
, m
);
2406 tx
->info
->sampler_mask
|= 1 << m
;
2407 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tx
->regs
.tS
[n
]), sample
);
2412 DECL_SPECIAL(TEXDP3TEX
)
2414 struct ureg_program
*ureg
= tx
->ureg
;
2415 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2416 struct ureg_dst tmp
;
2417 struct ureg_src sample
;
2418 const int m
= tx
->insn
.dst
[0].idx
;
2419 const int n
= tx
->insn
.src
[0].idx
;
2420 assert(m
>= 0 && m
> n
);
2422 tx_texcoord_alloc(tx
, m
);
2424 tmp
= tx_scratch(tx
);
2425 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2426 ureg_MOV(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_YZ
), ureg_imm1f(ureg
, 0.0f
));
2428 sample
= ureg_DECL_sampler(ureg
, m
);
2429 tx
->info
->sampler_mask
|= 1 << m
;
2430 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
), ureg_src(tmp
), sample
);
2435 DECL_SPECIAL(TEXM3x2DEPTH
)
2437 struct ureg_program
*ureg
= tx
->ureg
;
2438 struct ureg_dst tmp
;
2439 const int m
= tx
->insn
.dst
[0].idx
- 1;
2440 const int n
= tx
->insn
.src
[0].idx
;
2441 assert(m
>= 0 && m
> n
);
2443 tx_texcoord_alloc(tx
, m
);
2444 tx_texcoord_alloc(tx
, m
+1);
2446 tmp
= tx_scratch(tx
);
2448 /* performs the matrix multiplication */
2449 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2450 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2452 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Z
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2453 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2454 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Z
));
2455 /* res = 'w' == 0 ? 1.0 : z/w */
2456 ureg_CMP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
))),
2457 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 1.0f
));
2458 /* replace the depth for depth testing with the result */
2459 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2460 TGSI_WRITEMASK_Z
, 0, 1);
2461 ureg_MOV(ureg
, tx
->regs
.oDepth
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2462 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2466 DECL_SPECIAL(TEXDP3
)
2468 struct ureg_program
*ureg
= tx
->ureg
;
2469 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2470 const int m
= tx
->insn
.dst
[0].idx
;
2471 const int n
= tx
->insn
.src
[0].idx
;
2472 assert(m
>= 0 && m
> n
);
2474 tx_texcoord_alloc(tx
, m
);
2476 ureg_DP3(ureg
, dst
, tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2481 DECL_SPECIAL(TEXM3x3
)
2483 struct ureg_program
*ureg
= tx
->ureg
;
2484 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2485 struct ureg_src sample
;
2486 struct ureg_dst E
, tmp
;
2487 const int m
= tx
->insn
.dst
[0].idx
- 2;
2488 const int n
= tx
->insn
.src
[0].idx
;
2489 assert(m
>= 0 && m
> n
);
2491 tx_texcoord_alloc(tx
, m
);
2492 tx_texcoord_alloc(tx
, m
+1);
2493 tx_texcoord_alloc(tx
, m
+2);
2495 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), tx
->regs
.vT
[m
], ureg_src(tx
->regs
.tS
[n
]));
2496 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Y
), tx
->regs
.vT
[m
+1], ureg_src(tx
->regs
.tS
[n
]));
2497 ureg_DP3(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_Z
), tx
->regs
.vT
[m
+2], ureg_src(tx
->regs
.tS
[n
]));
2499 switch (tx
->insn
.opcode
) {
2500 case D3DSIO_TEXM3x3
:
2501 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
2503 case D3DSIO_TEXM3x3TEX
:
2504 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2505 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2506 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(dst
), sample
);
2508 case D3DSIO_TEXM3x3VSPEC
:
2509 sample
= ureg_DECL_sampler(ureg
, m
+ 2);
2510 tx
->info
->sampler_mask
|= 1 << (m
+ 2);
2512 tmp
= ureg_writemask(tx_scratch(tx
), TGSI_WRITEMASK_XYZ
);
2513 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_X
), ureg_scalar(tx
->regs
.vT
[m
], TGSI_SWIZZLE_W
));
2514 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Y
), ureg_scalar(tx
->regs
.vT
[m
+1], TGSI_SWIZZLE_W
));
2515 ureg_MOV(ureg
, ureg_writemask(E
, TGSI_WRITEMASK_Z
), ureg_scalar(tx
->regs
.vT
[m
+2], TGSI_SWIZZLE_W
));
2516 /* At this step, dst = N = (u', w', z').
2517 * We want dst to be the texture sampled at (u'', w'', z''), with
2518 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2519 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(dst
), ureg_src(dst
));
2520 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
2521 /* at this step tmp.x = 1/N.N */
2522 ureg_DP3(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(dst
), ureg_src(E
));
2523 /* at this step tmp.y = N.E */
2524 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
2525 /* at this step tmp.x = N.E/N.N */
2526 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_imm1f(ureg
, 2.0f
));
2527 ureg_MUL(ureg
, tmp
, ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
), ureg_src(dst
));
2528 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2529 ureg_SUB(ureg
, tmp
, ureg_src(tmp
), ureg_src(E
));
2530 ureg_TEX(ureg
, dst
, ps1x_sampler_type(tx
->info
, m
+ 2), ureg_src(tmp
), sample
);
2533 return D3DERR_INVALIDCALL
;
2538 DECL_SPECIAL(TEXDEPTH
)
2540 struct ureg_program
*ureg
= tx
->ureg
;
2542 struct ureg_src r5r
, r5g
;
2544 assert(tx
->insn
.dst
[0].idx
== 5); /* instruction must get r5 here */
2546 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2547 * r5 won't be used afterward, thus we can use r5.ba */
2549 r5r
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_X
);
2550 r5g
= ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Y
);
2552 ureg_RCP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_Z
), r5g
);
2553 ureg_MUL(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), r5r
, ureg_scalar(ureg_src(r5
), TGSI_SWIZZLE_Z
));
2555 ureg_CMP(ureg
, ureg_writemask(r5
, TGSI_WRITEMASK_X
), ureg_negate(ureg_abs(r5g
)),
2556 r5r
, ureg_imm1f(ureg
, 1.0f
));
2557 /* replace the depth for depth testing with the result */
2558 tx
->regs
.oDepth
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_POSITION
, 0,
2559 TGSI_WRITEMASK_Z
, 0, 1);
2560 ureg_MOV(ureg
, tx
->regs
.oDepth
, r5r
);
2567 struct ureg_program
*ureg
= tx
->ureg
;
2568 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2569 struct ureg_src src0
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2570 struct ureg_src src1
= tx_src_param(tx
, &tx
->insn
.src
[1]);
2571 struct ureg_src m00
, m01
, m10
, m11
;
2572 const int m
= tx
->insn
.dst
[0].idx
;
2573 struct ureg_dst tmp
;
2581 nine_info_mark_const_f_used(tx
->info
, 8 + m
);
2582 m00
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, X
);
2583 m01
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Y
);
2584 m10
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, Z
);
2585 m11
= NINE_CONSTANT_SRC_SWIZZLE(8 + m
, W
);
2586 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2587 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m00
,
2588 NINE_APPLY_SWIZZLE(src1
, X
), NINE_APPLY_SWIZZLE(src0
, X
));
2589 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2590 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), m10
,
2591 NINE_APPLY_SWIZZLE(src1
, Y
), NINE_APPLY_SWIZZLE(ureg_src(tmp
), X
));
2593 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2594 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m01
,
2595 NINE_APPLY_SWIZZLE(src1
, X
), src0
);
2596 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2597 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), m11
,
2598 NINE_APPLY_SWIZZLE(src1
, Y
), NINE_APPLY_SWIZZLE(ureg_src(tmp
), Y
));
2599 ureg_MOV(ureg
, ureg_writemask(dst
, TGSI_WRITEMASK_XY
), ureg_src(tmp
));
2601 tx
->info
->bumpenvmat_needed
= 1;
2608 struct ureg_program
*ureg
= tx
->ureg
;
2610 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2611 struct ureg_src src
[2] = {
2612 tx_src_param(tx
, &tx
->insn
.src
[0]),
2613 tx_src_param(tx
, &tx
->insn
.src
[1])
2615 assert(tx
->insn
.src
[1].idx
>= 0 &&
2616 tx
->insn
.src
[1].idx
< Elements(tx
->sampler_targets
));
2617 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2619 switch (tx
->insn
.flags
) {
2621 ureg_TEX(ureg
, dst
, target
, src
[0], src
[1]);
2623 case NINED3DSI_TEXLD_PROJECT
:
2624 ureg_TXP(ureg
, dst
, target
, src
[0], src
[1]);
2626 case NINED3DSI_TEXLD_BIAS
:
2627 ureg_TXB(ureg
, dst
, target
, src
[0], src
[1]);
2631 return D3DERR_INVALIDCALL
;
2636 DECL_SPECIAL(TEXLD_14
)
2638 struct ureg_program
*ureg
= tx
->ureg
;
2639 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2640 struct ureg_src src
= tx_src_param(tx
, &tx
->insn
.src
[0]);
2641 const unsigned s
= tx
->insn
.dst
[0].idx
;
2642 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2644 tx
->info
->sampler_mask
|= 1 << s
;
2645 ureg_TEX(ureg
, dst
, t
, src
, ureg_DECL_sampler(ureg
, s
));
2652 struct ureg_program
*ureg
= tx
->ureg
;
2653 const unsigned s
= tx
->insn
.dst
[0].idx
;
2654 const unsigned t
= ps1x_sampler_type(tx
->info
, s
);
2655 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2656 struct ureg_src src
[2];
2658 tx_texcoord_alloc(tx
, s
);
2660 src
[0] = tx
->regs
.vT
[s
];
2661 src
[1] = ureg_DECL_sampler(ureg
, s
);
2662 tx
->info
->sampler_mask
|= 1 << s
;
2664 TEX_with_ps1x_projection(tx
, dst
, t
, src
[0], src
[1], s
);
2669 DECL_SPECIAL(TEXLDD
)
2672 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2673 struct ureg_src src
[4] = {
2674 tx_src_param(tx
, &tx
->insn
.src
[0]),
2675 tx_src_param(tx
, &tx
->insn
.src
[1]),
2676 tx_src_param(tx
, &tx
->insn
.src
[2]),
2677 tx_src_param(tx
, &tx
->insn
.src
[3])
2679 assert(tx
->insn
.src
[1].idx
>= 0 &&
2680 tx
->insn
.src
[1].idx
< Elements(tx
->sampler_targets
));
2681 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2683 ureg_TXD(tx
->ureg
, dst
, target
, src
[0], src
[2], src
[3], src
[1]);
2687 DECL_SPECIAL(TEXLDL
)
2690 struct ureg_dst dst
= tx_dst_param(tx
, &tx
->insn
.dst
[0]);
2691 struct ureg_src src
[2] = {
2692 tx_src_param(tx
, &tx
->insn
.src
[0]),
2693 tx_src_param(tx
, &tx
->insn
.src
[1])
2695 assert(tx
->insn
.src
[1].idx
>= 0 &&
2696 tx
->insn
.src
[1].idx
< Elements(tx
->sampler_targets
));
2697 target
= tx
->sampler_targets
[tx
->insn
.src
[1].idx
];
2699 ureg_TXL(tx
->ureg
, dst
, target
, src
[0], src
[1]);
2705 STUB(D3DERR_INVALIDCALL
);
2708 DECL_SPECIAL(BREAKP
)
2710 STUB(D3DERR_INVALIDCALL
);
2715 return D3D_OK
; /* we don't care about phase */
2718 DECL_SPECIAL(COMMENT
)
2720 return D3D_OK
; /* nothing to do */
2724 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2725 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2727 struct sm1_op_info inst_table
[] =
2729 _OPI(NOP
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL
), /* 0 */
2730 _OPI(MOV
, MOV
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
2731 _OPI(ADD
, ADD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 2 */
2732 _OPI(SUB
, SUB
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 3 */
2733 _OPI(MAD
, MAD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 4 */
2734 _OPI(MUL
, MUL
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 5 */
2735 _OPI(RCP
, RCP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 6 */
2736 _OPI(RSQ
, RSQ
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ
)), /* 7 */
2737 _OPI(DP3
, DP3
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 8 */
2738 _OPI(DP4
, DP4
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 9 */
2739 _OPI(MIN
, MIN
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 10 */
2740 _OPI(MAX
, MAX
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 11 */
2741 _OPI(SLT
, SLT
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 12 */
2742 _OPI(SGE
, SGE
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 13 */
2743 _OPI(EXP
, EX2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 14 */
2744 _OPI(LOG
, LG2
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG
)), /* 15 */
2745 _OPI(LIT
, LIT
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT
)), /* 16 */
2746 _OPI(DST
, DST
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* 17 */
2747 _OPI(LRP
, LRP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL
), /* 18 */
2748 _OPI(FRC
, FRC
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
), /* 19 */
2750 _OPI(M4x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4
)),
2751 _OPI(M4x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3
)),
2752 _OPI(M3x4
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4
)),
2753 _OPI(M3x3
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3
)),
2754 _OPI(M3x2
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2
)),
2756 _OPI(CALL
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL
)),
2757 _OPI(CALLNZ
, CAL
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ
)),
2758 _OPI(LOOP
, BGNLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP
)),
2759 _OPI(RET
, RET
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET
)),
2760 _OPI(ENDLOOP
, ENDLOOP
, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP
)),
2761 _OPI(LABEL
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL
)),
2763 _OPI(DCL
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL
)),
2765 _OPI(POW
, POW
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW
)),
2766 _OPI(CRS
, XPD
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL
), /* XXX: .w */
2767 _OPI(SGN
, SSG
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN
)), /* ignore src1,2 */
2768 _OPI(ABS
, ABS
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL
),
2769 _OPI(NRM
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM
)), /* NRM doesn't fit */
2771 _OPI(SINCOS
, SCS
, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS
)),
2772 _OPI(SINCOS
, SCS
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS
)),
2774 /* More flow control */
2775 _OPI(REP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP
)),
2776 _OPI(ENDREP
, NOP
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP
)),
2777 _OPI(IF
, IF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF
)),
2778 _OPI(IFC
, IF
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC
)),
2779 _OPI(ELSE
, ELSE
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE
)),
2780 _OPI(ENDIF
, ENDIF
, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF
)),
2781 _OPI(BREAK
, BRK
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL
),
2782 _OPI(BREAKC
, BREAKC
, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC
)),
2783 /* we don't write to the address register, but a normal register (copied
2784 * when needed to the address register), thus we don't use ARR */
2785 _OPI(MOVA
, MOV
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2787 _OPI(DEFB
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB
)),
2788 _OPI(DEFI
, NOP
, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI
)),
2790 _OPI(TEXCOORD
, NOP
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD
)),
2791 _OPI(TEXCOORD
, MOV
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14
)),
2792 _OPI(TEXKILL
, KILL_IF
, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL
)),
2793 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX
)),
2794 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14
)),
2795 _OPI(TEX
, TEX
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD
)),
2796 _OPI(TEXBEM
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM
)),
2797 _OPI(TEXBEML
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM
)),
2798 _OPI(TEXREG2AR
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR
)),
2799 _OPI(TEXREG2GB
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB
)),
2800 _OPI(TEXM3x2PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD
)),
2801 _OPI(TEXM3x2TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX
)),
2802 _OPI(TEXM3x3PAD
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD
)),
2803 _OPI(TEXM3x3TEX
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2804 _OPI(TEXM3x3SPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC
)),
2805 _OPI(TEXM3x3VSPEC
, TEX
, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2807 _OPI(EXPP
, EXP
, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL
),
2808 _OPI(EXPP
, EX2
, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL
),
2809 _OPI(LOGP
, LG2
, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG
)),
2810 _OPI(CND
, NOP
, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND
)),
2812 _OPI(DEF
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF
)),
2814 /* More tex stuff */
2815 _OPI(TEXREG2RGB
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB
)),
2816 _OPI(TEXDP3TEX
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX
)),
2817 _OPI(TEXM3x2DEPTH
, TEX
, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH
)),
2818 _OPI(TEXDP3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3
)),
2819 _OPI(TEXM3x3
, TEX
, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3
)),
2820 _OPI(TEXDEPTH
, TEX
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH
)),
2823 _OPI(CMP
, CMP
, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP
)), /* reversed */
2824 _OPI(BEM
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM
)),
2825 _OPI(DP2ADD
, NOP
, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD
)),
2826 _OPI(DSX
, DDX
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
2827 _OPI(DSY
, DDY
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL
),
2828 _OPI(TEXLDD
, TXD
, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD
)),
2829 _OPI(SETP
, NOP
, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP
)),
2830 _OPI(TEXLDL
, TXL
, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL
)),
2831 _OPI(BREAKP
, BRK
, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP
))
2834 struct sm1_op_info inst_phase
=
2835 _OPI(PHASE
, NOP
, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE
));
2837 struct sm1_op_info inst_comment
=
2838 _OPI(COMMENT
, NOP
, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT
));
2841 create_op_info_map(struct shader_translator
*tx
)
2843 const unsigned version
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
2846 for (i
= 0; i
< Elements(tx
->op_info_map
); ++i
)
2847 tx
->op_info_map
[i
] = -1;
2849 if (tx
->processor
== TGSI_PROCESSOR_VERTEX
) {
2850 for (i
= 0; i
< Elements(inst_table
); ++i
) {
2851 assert(inst_table
[i
].sio
< Elements(tx
->op_info_map
));
2852 if (inst_table
[i
].vert_version
.min
<= version
&&
2853 inst_table
[i
].vert_version
.max
>= version
)
2854 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
2857 for (i
= 0; i
< Elements(inst_table
); ++i
) {
2858 assert(inst_table
[i
].sio
< Elements(tx
->op_info_map
));
2859 if (inst_table
[i
].frag_version
.min
<= version
&&
2860 inst_table
[i
].frag_version
.max
>= version
)
2861 tx
->op_info_map
[inst_table
[i
].sio
] = i
;
2866 static inline HRESULT
2867 NineTranslateInstruction_Generic(struct shader_translator
*tx
)
2869 struct ureg_dst dst
[1];
2870 struct ureg_src src
[4];
2873 for (i
= 0; i
< tx
->insn
.ndst
&& i
< Elements(dst
); ++i
)
2874 dst
[i
] = tx_dst_param(tx
, &tx
->insn
.dst
[i
]);
2875 for (i
= 0; i
< tx
->insn
.nsrc
&& i
< Elements(src
); ++i
)
2876 src
[i
] = tx_src_param(tx
, &tx
->insn
.src
[i
]);
2878 ureg_insn(tx
->ureg
, tx
->insn
.info
->opcode
,
2880 src
, tx
->insn
.nsrc
);
2885 TOKEN_PEEK(struct shader_translator
*tx
)
2887 return *(tx
->parse
);
2891 TOKEN_NEXT(struct shader_translator
*tx
)
2893 return *(tx
->parse
)++;
2897 TOKEN_JUMP(struct shader_translator
*tx
)
2899 if (tx
->parse_next
&& tx
->parse
!= tx
->parse_next
) {
2900 WARN("parse(%p) != parse_next(%p) !\n", tx
->parse
, tx
->parse_next
);
2901 tx
->parse
= tx
->parse_next
;
2905 static inline boolean
2906 sm1_parse_eof(struct shader_translator
*tx
)
2908 return TOKEN_PEEK(tx
) == NINED3DSP_END
;
2912 sm1_read_version(struct shader_translator
*tx
)
2914 const DWORD tok
= TOKEN_NEXT(tx
);
2916 tx
->version
.major
= D3DSHADER_VERSION_MAJOR(tok
);
2917 tx
->version
.minor
= D3DSHADER_VERSION_MINOR(tok
);
2919 switch (tok
>> 16) {
2920 case NINED3D_SM1_VS
: tx
->processor
= TGSI_PROCESSOR_VERTEX
; break;
2921 case NINED3D_SM1_PS
: tx
->processor
= TGSI_PROCESSOR_FRAGMENT
; break;
2923 DBG("Invalid shader type: %x\n", tok
);
2929 /* This is just to check if we parsed the instruction properly. */
2931 sm1_parse_get_skip(struct shader_translator
*tx
)
2933 const DWORD tok
= TOKEN_PEEK(tx
);
2935 if (tx
->version
.major
>= 2) {
2936 tx
->parse_next
= tx
->parse
+ 1 /* this */ +
2937 ((tok
& D3DSI_INSTLENGTH_MASK
) >> D3DSI_INSTLENGTH_SHIFT
);
2939 tx
->parse_next
= NULL
; /* TODO: determine from param count */
2944 sm1_print_comment(const char *comment
, UINT size
)
2952 sm1_parse_comments(struct shader_translator
*tx
, BOOL print
)
2954 DWORD tok
= TOKEN_PEEK(tx
);
2956 while ((tok
& D3DSI_OPCODE_MASK
) == D3DSIO_COMMENT
)
2958 const char *comment
= "";
2959 UINT size
= (tok
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
2960 tx
->parse
+= size
+ 1;
2963 sm1_print_comment(comment
, size
);
2965 tok
= TOKEN_PEEK(tx
);
2970 sm1_parse_get_param(struct shader_translator
*tx
, DWORD
*reg
, DWORD
*rel
)
2972 *reg
= TOKEN_NEXT(tx
);
2974 if (*reg
& D3DSHADER_ADDRMODE_RELATIVE
)
2976 if (tx
->version
.major
< 2)
2978 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT2
) & D3DSP_REGTYPE_MASK2
) |
2979 ((D3DSPR_ADDR
<< D3DSP_REGTYPE_SHIFT
) & D3DSP_REGTYPE_MASK
) |
2982 *rel
= TOKEN_NEXT(tx
);
2987 sm1_parse_dst_param(struct sm1_dst_param
*dst
, DWORD tok
)
2991 (tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
|
2992 (tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
;
2993 dst
->type
= TGSI_RETURN_TYPE_FLOAT
;
2994 dst
->idx
= tok
& D3DSP_REGNUM_MASK
;
2996 dst
->mask
= (tok
& NINED3DSP_WRITEMASK_MASK
) >> NINED3DSP_WRITEMASK_SHIFT
;
2997 dst
->mod
= (tok
& D3DSP_DSTMOD_MASK
) >> D3DSP_DSTMOD_SHIFT
;
2998 shift
= (tok
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
2999 dst
->shift
= (shift
& 0x8) ? -(shift
& 0x7) : shift
& 0x7;
3003 sm1_parse_src_param(struct sm1_src_param
*src
, DWORD tok
)
3006 ((tok
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) |
3007 ((tok
& D3DSP_REGTYPE_MASK2
) >> D3DSP_REGTYPE_SHIFT2
);
3008 src
->type
= TGSI_RETURN_TYPE_FLOAT
;
3009 src
->idx
= tok
& D3DSP_REGNUM_MASK
;
3011 src
->swizzle
= (tok
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
3012 src
->mod
= (tok
& D3DSP_SRCMOD_MASK
) >> D3DSP_SRCMOD_SHIFT
;
3014 switch (src
->file
) {
3015 case D3DSPR_CONST2
: src
->file
= D3DSPR_CONST
; src
->idx
+= 2048; break;
3016 case D3DSPR_CONST3
: src
->file
= D3DSPR_CONST
; src
->idx
+= 4096; break;
3017 case D3DSPR_CONST4
: src
->file
= D3DSPR_CONST
; src
->idx
+= 6144; break;
3024 sm1_parse_immediate(struct shader_translator
*tx
,
3025 struct sm1_src_param
*imm
)
3027 imm
->file
= NINED3DSPR_IMMEDIATE
;
3030 imm
->swizzle
= NINED3DSP_NOSWIZZLE
;
3032 switch (tx
->insn
.opcode
) {
3034 imm
->type
= NINED3DSPTYPE_FLOAT4
;
3035 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
3039 imm
->type
= NINED3DSPTYPE_INT4
;
3040 memcpy(&imm
->imm
.d
[0], tx
->parse
, 4 * sizeof(DWORD
));
3044 imm
->type
= NINED3DSPTYPE_BOOL
;
3045 memcpy(&imm
->imm
.d
[0], tx
->parse
, 1 * sizeof(DWORD
));
3055 sm1_read_dst_param(struct shader_translator
*tx
,
3056 struct sm1_dst_param
*dst
,
3057 struct sm1_src_param
*rel
)
3059 DWORD tok_dst
, tok_rel
= 0;
3061 sm1_parse_get_param(tx
, &tok_dst
, &tok_rel
);
3062 sm1_parse_dst_param(dst
, tok_dst
);
3063 if (tok_dst
& D3DSHADER_ADDRMODE_RELATIVE
) {
3064 sm1_parse_src_param(rel
, tok_rel
);
3070 sm1_read_src_param(struct shader_translator
*tx
,
3071 struct sm1_src_param
*src
,
3072 struct sm1_src_param
*rel
)
3074 DWORD tok_src
, tok_rel
= 0;
3076 sm1_parse_get_param(tx
, &tok_src
, &tok_rel
);
3077 sm1_parse_src_param(src
, tok_src
);
3078 if (tok_src
& D3DSHADER_ADDRMODE_RELATIVE
) {
3080 sm1_parse_src_param(rel
, tok_rel
);
3086 sm1_read_semantic(struct shader_translator
*tx
,
3087 struct sm1_semantic
*sem
)
3089 const DWORD tok_usg
= TOKEN_NEXT(tx
);
3090 const DWORD tok_dst
= TOKEN_NEXT(tx
);
3092 sem
->sampler_type
= (tok_usg
& D3DSP_TEXTURETYPE_MASK
) >> D3DSP_TEXTURETYPE_SHIFT
;
3093 sem
->usage
= (tok_usg
& D3DSP_DCL_USAGE_MASK
) >> D3DSP_DCL_USAGE_SHIFT
;
3094 sem
->usage_idx
= (tok_usg
& D3DSP_DCL_USAGEINDEX_MASK
) >> D3DSP_DCL_USAGEINDEX_SHIFT
;
3096 sm1_parse_dst_param(&sem
->reg
, tok_dst
);
3100 sm1_parse_instruction(struct shader_translator
*tx
)
3102 struct sm1_instruction
*insn
= &tx
->insn
;
3104 struct sm1_op_info
*info
= NULL
;
3107 sm1_parse_comments(tx
, TRUE
);
3108 sm1_parse_get_skip(tx
);
3110 tok
= TOKEN_NEXT(tx
);
3112 insn
->opcode
= tok
& D3DSI_OPCODE_MASK
;
3113 insn
->flags
= (tok
& NINED3DSIO_OPCODE_FLAGS_MASK
) >> NINED3DSIO_OPCODE_FLAGS_SHIFT
;
3114 insn
->coissue
= !!(tok
& D3DSI_COISSUE
);
3115 insn
->predicated
= !!(tok
& NINED3DSHADER_INST_PREDICATED
);
3117 if (insn
->opcode
< Elements(tx
->op_info_map
)) {
3118 int k
= tx
->op_info_map
[insn
->opcode
];
3120 assert(k
< Elements(inst_table
));
3121 info
= &inst_table
[k
];
3124 if (insn
->opcode
== D3DSIO_PHASE
) info
= &inst_phase
;
3125 if (insn
->opcode
== D3DSIO_COMMENT
) info
= &inst_comment
;
3128 DBG("illegal or unhandled opcode: %08x\n", insn
->opcode
);
3133 insn
->ndst
= info
->ndst
;
3134 insn
->nsrc
= info
->nsrc
;
3136 assert(!insn
->predicated
&& "TODO: predicated instructions");
3140 unsigned min
= IS_VS
? info
->vert_version
.min
: info
->frag_version
.min
;
3141 unsigned max
= IS_VS
? info
->vert_version
.max
: info
->frag_version
.max
;
3142 unsigned ver
= (tx
->version
.major
<< 8) | tx
->version
.minor
;
3143 if (ver
< min
|| ver
> max
) {
3144 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3150 for (i
= 0; i
< insn
->ndst
; ++i
)
3151 sm1_read_dst_param(tx
, &insn
->dst
[i
], &insn
->dst_rel
[i
]);
3152 if (insn
->predicated
)
3153 sm1_read_src_param(tx
, &insn
->pred
, NULL
);
3154 for (i
= 0; i
< insn
->nsrc
; ++i
)
3155 sm1_read_src_param(tx
, &insn
->src
[i
], &insn
->src_rel
[i
]);
3157 /* parse here so we can dump them before processing */
3158 if (insn
->opcode
== D3DSIO_DEF
||
3159 insn
->opcode
== D3DSIO_DEFI
||
3160 insn
->opcode
== D3DSIO_DEFB
)
3161 sm1_parse_immediate(tx
, &tx
->insn
.src
[0]);
3163 sm1_dump_instruction(insn
, tx
->cond_depth
+ tx
->loop_depth
);
3164 sm1_instruction_check(insn
);
3169 NineTranslateInstruction_Generic(tx
);
3170 tx_apply_dst0_modifiers(tx
);
3172 tx
->num_scratch
= 0; /* reset */
3178 tx_ctor(struct shader_translator
*tx
, struct nine_shader_info
*info
)
3184 tx
->byte_code
= info
->byte_code
;
3185 tx
->parse
= info
->byte_code
;
3187 for (i
= 0; i
< Elements(info
->input_map
); ++i
)
3188 info
->input_map
[i
] = NINE_DECLUSAGE_NONE
;
3189 info
->num_inputs
= 0;
3191 info
->position_t
= FALSE
;
3192 info
->point_size
= FALSE
;
3194 tx
->info
->const_float_slots
= 0;
3195 tx
->info
->const_int_slots
= 0;
3196 tx
->info
->const_bool_slots
= 0;
3198 info
->sampler_mask
= 0x0;
3199 info
->rt_mask
= 0x0;
3201 info
->lconstf
.data
= NULL
;
3202 info
->lconstf
.ranges
= NULL
;
3204 info
->bumpenvmat_needed
= 0;
3206 for (i
= 0; i
< Elements(tx
->regs
.rL
); ++i
) {
3207 tx
->regs
.rL
[i
] = ureg_dst_undef();
3209 tx
->regs
.address
= ureg_dst_undef();
3210 tx
->regs
.a0
= ureg_dst_undef();
3211 tx
->regs
.p
= ureg_dst_undef();
3212 tx
->regs
.oDepth
= ureg_dst_undef();
3213 tx
->regs
.vPos
= ureg_src_undef();
3214 tx
->regs
.vFace
= ureg_src_undef();
3215 for (i
= 0; i
< Elements(tx
->regs
.o
); ++i
)
3216 tx
->regs
.o
[i
] = ureg_dst_undef();
3217 for (i
= 0; i
< Elements(tx
->regs
.oCol
); ++i
)
3218 tx
->regs
.oCol
[i
] = ureg_dst_undef();
3219 for (i
= 0; i
< Elements(tx
->regs
.vC
); ++i
)
3220 tx
->regs
.vC
[i
] = ureg_src_undef();
3221 for (i
= 0; i
< Elements(tx
->regs
.vT
); ++i
)
3222 tx
->regs
.vT
[i
] = ureg_src_undef();
3224 for (i
= 0; i
< Elements(tx
->lconsti
); ++i
)
3225 tx
->lconsti
[i
].idx
= -1;
3226 for (i
= 0; i
< Elements(tx
->lconstb
); ++i
)
3227 tx
->lconstb
[i
].idx
= -1;
3229 sm1_read_version(tx
);
3231 info
->version
= (tx
->version
.major
<< 4) | tx
->version
.minor
;
3233 create_op_info_map(tx
);
3237 tx_dtor(struct shader_translator
*tx
)
3239 if (tx
->num_inst_labels
)
3240 FREE(tx
->inst_labels
);
3246 static inline unsigned
3247 tgsi_processor_from_type(unsigned shader_type
)
3249 switch (shader_type
) {
3250 case PIPE_SHADER_VERTEX
: return TGSI_PROCESSOR_VERTEX
;
3251 case PIPE_SHADER_FRAGMENT
: return TGSI_PROCESSOR_FRAGMENT
;
3258 shader_add_ps_fog_stage(struct shader_translator
*tx
, struct ureg_src src_col
)
3260 struct ureg_program
*ureg
= tx
->ureg
;
3261 struct ureg_dst oCol0
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
3262 struct ureg_src fog_end
, fog_coeff
, fog_density
;
3263 struct ureg_src fog_vs
, depth
, fog_color
;
3264 struct ureg_dst fog_factor
;
3266 if (!tx
->info
->fog_enable
) {
3267 ureg_MOV(ureg
, oCol0
, src_col
);
3271 if (tx
->info
->fog_mode
!= D3DFOG_NONE
) {
3272 if (tx
->wpos_is_sysval
) {
3273 depth
= ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_POSITION
, 0);
3275 depth
= ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_POSITION
, 0,
3276 TGSI_INTERPOLATE_LINEAR
);
3278 depth
= ureg_scalar(depth
, TGSI_SWIZZLE_Z
);
3281 nine_info_mark_const_f_used(tx
->info
, 33);
3282 fog_color
= NINE_CONSTANT_SRC(32);
3283 fog_factor
= tx_scratch_scalar(tx
);
3285 if (tx
->info
->fog_mode
== D3DFOG_LINEAR
) {
3286 fog_end
= NINE_CONSTANT_SRC_SWIZZLE(33, X
);
3287 fog_coeff
= NINE_CONSTANT_SRC_SWIZZLE(33, Y
);
3288 ureg_SUB(ureg
, fog_factor
, fog_end
, depth
);
3289 ureg_MUL(ureg
, ureg_saturate(fog_factor
), tx_src_scalar(fog_factor
), fog_coeff
);
3290 } else if (tx
->info
->fog_mode
== D3DFOG_EXP
) {
3291 fog_density
= NINE_CONSTANT_SRC_SWIZZLE(33, X
);
3292 ureg_MUL(ureg
, fog_factor
, depth
, fog_density
);
3293 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), ureg_imm1f(ureg
, -1.442695f
));
3294 ureg_EX2(ureg
, fog_factor
, tx_src_scalar(fog_factor
));
3295 } else if (tx
->info
->fog_mode
== D3DFOG_EXP2
) {
3296 fog_density
= NINE_CONSTANT_SRC_SWIZZLE(33, X
);
3297 ureg_MUL(ureg
, fog_factor
, depth
, fog_density
);
3298 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), tx_src_scalar(fog_factor
));
3299 ureg_MUL(ureg
, fog_factor
, tx_src_scalar(fog_factor
), ureg_imm1f(ureg
, -1.442695f
));
3300 ureg_EX2(ureg
, fog_factor
, tx_src_scalar(fog_factor
));
3302 fog_vs
= ureg_scalar(ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_FOG
, 0,
3303 TGSI_INTERPOLATE_PERSPECTIVE
),
3305 ureg_MOV(ureg
, fog_factor
, fog_vs
);
3308 ureg_LRP(ureg
, ureg_writemask(oCol0
, TGSI_WRITEMASK_XYZ
),
3309 tx_src_scalar(fog_factor
), src_col
, fog_color
);
3310 ureg_MOV(ureg
, ureg_writemask(oCol0
, TGSI_WRITEMASK_W
), src_col
);
3313 #define GET_CAP(n) device->screen->get_param( \
3314 device->screen, PIPE_CAP_##n)
3315 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
3316 device->screen, info->type, PIPE_SHADER_CAP_##n)
3319 nine_translate_shader(struct NineDevice9
*device
, struct nine_shader_info
*info
)
3321 struct shader_translator
*tx
;
3322 HRESULT hr
= D3D_OK
;
3323 const unsigned processor
= tgsi_processor_from_type(info
->type
);
3324 unsigned s
, slot_max
;
3325 unsigned max_const_f
;
3327 user_assert(processor
!= ~0, D3DERR_INVALIDCALL
);
3329 tx
= CALLOC_STRUCT(shader_translator
);
3331 return E_OUTOFMEMORY
;
3334 if (((tx
->version
.major
<< 16) | tx
->version
.minor
) > 0x00030000) {
3335 hr
= D3DERR_INVALIDCALL
;
3336 DBG("Unsupported shader version: %u.%u !\n",
3337 tx
->version
.major
, tx
->version
.minor
);
3340 if (tx
->processor
!= processor
) {
3341 hr
= D3DERR_INVALIDCALL
;
3342 DBG("Shader type mismatch: %u / %u !\n", tx
->processor
, processor
);
3345 DUMP("%s%u.%u\n", processor
== TGSI_PROCESSOR_VERTEX
? "VS" : "PS",
3346 tx
->version
.major
, tx
->version
.minor
);
3348 tx
->ureg
= ureg_create(processor
);
3354 tx
->native_integers
= GET_SHADER_CAP(INTEGERS
);
3355 tx
->inline_subroutines
= !GET_SHADER_CAP(SUBROUTINES
);
3356 tx
->lower_preds
= !GET_SHADER_CAP(MAX_PREDS
);
3357 tx
->want_texcoord
= GET_CAP(TGSI_TEXCOORD
);
3358 tx
->shift_wpos
= !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3359 tx
->texcoord_sn
= tx
->want_texcoord
?
3360 TGSI_SEMANTIC_TEXCOORD
: TGSI_SEMANTIC_GENERIC
;
3361 tx
->wpos_is_sysval
= GET_CAP(TGSI_FS_POSITION_IS_SYSVAL
);
3362 tx
->face_is_sysval_integer
= GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL
);
3365 tx
->num_constf_allowed
= NINE_MAX_CONST_F
;
3366 } else if (tx
->version
.major
< 2) {/* IS_PS v1 */
3367 tx
->num_constf_allowed
= 8;
3368 } else if (tx
->version
.major
== 2) {/* IS_PS v2 */
3369 tx
->num_constf_allowed
= 32;
3370 } else {/* IS_PS v3 */
3371 tx
->num_constf_allowed
= NINE_MAX_CONST_F_PS3
;
3374 if (tx
->version
.major
< 2) {
3375 tx
->num_consti_allowed
= 0;
3376 tx
->num_constb_allowed
= 0;
3378 tx
->num_consti_allowed
= NINE_MAX_CONST_I
;
3379 tx
->num_constb_allowed
= NINE_MAX_CONST_B
;
3382 /* VS must always write position. Declare it here to make it the 1st output.
3383 * (Some drivers like nv50 are buggy and rely on that.)
3386 tx
->regs
.oPos
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_POSITION
, 0);
3388 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_ORIGIN
, TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
3389 if (!tx
->shift_wpos
)
3390 ureg_property(tx
->ureg
, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
, TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
3393 while (!sm1_parse_eof(tx
) && !tx
->failure
)
3394 sm1_parse_instruction(tx
);
3395 tx
->parse
++; /* for byte_size */
3398 ERR("Encountered buggy shader\n");
3399 ureg_destroy(tx
->ureg
);
3400 hr
= D3DERR_INVALIDCALL
;
3404 if (IS_PS
&& tx
->version
.major
< 3) {
3405 if (tx
->version
.major
< 2) {
3406 assert(tx
->num_temp
); /* there must be color output */
3407 info
->rt_mask
|= 0x1;
3408 shader_add_ps_fog_stage(tx
, ureg_src(tx
->regs
.r
[0]));
3410 shader_add_ps_fog_stage(tx
, ureg_src(tx
->regs
.oCol
[0]));
3414 if (IS_VS
&& tx
->version
.major
< 3 && ureg_dst_is_undef(tx
->regs
.oFog
) && info
->fog_enable
) {
3415 tx
->regs
.oFog
= ureg_DECL_output(tx
->ureg
, TGSI_SEMANTIC_FOG
, 0);
3416 ureg_MOV(tx
->ureg
, ureg_writemask(tx
->regs
.oFog
, TGSI_WRITEMASK_X
), ureg_imm1f(tx
->ureg
, 0.0f
));
3419 if (info
->position_t
)
3420 ureg_property(tx
->ureg
, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
, TRUE
);
3424 if (IS_VS
&& !ureg_dst_is_undef(tx
->regs
.oPts
))
3425 info
->point_size
= TRUE
;
3427 /* record local constants */
3428 if (tx
->num_lconstf
&& tx
->indirect_const_access
) {
3429 struct nine_range
*ranges
;
3436 data
= MALLOC(tx
->num_lconstf
* 4 * sizeof(float));
3439 info
->lconstf
.data
= data
;
3441 indices
= MALLOC(tx
->num_lconstf
* sizeof(indices
[0]));
3445 /* lazy sort, num_lconstf should be small */
3446 for (n
= 0; n
< tx
->num_lconstf
; ++n
) {
3447 for (k
= 0, i
= 0; i
< tx
->num_lconstf
; ++i
) {
3448 if (tx
->lconstf
[i
].idx
< tx
->lconstf
[k
].idx
)
3451 indices
[n
] = tx
->lconstf
[k
].idx
;
3452 memcpy(&data
[n
* 4], &tx
->lconstf
[k
].imm
.f
[0], 4 * sizeof(float));
3453 tx
->lconstf
[k
].idx
= INT_MAX
;
3457 for (n
= 1, i
= 1; i
< tx
->num_lconstf
; ++i
)
3458 if (indices
[i
] != indices
[i
- 1] + 1)
3460 ranges
= MALLOC(n
* sizeof(ranges
[0]));
3465 info
->lconstf
.ranges
= ranges
;
3468 ranges
[k
].bgn
= indices
[0];
3469 for (i
= 1; i
< tx
->num_lconstf
; ++i
) {
3470 if (indices
[i
] != indices
[i
- 1] + 1) {
3471 ranges
[k
].next
= &ranges
[k
+ 1];
3472 ranges
[k
].end
= indices
[i
- 1] + 1;
3474 ranges
[k
].bgn
= indices
[i
];
3477 ranges
[k
].end
= indices
[i
- 1] + 1;
3478 ranges
[k
].next
= NULL
;
3479 assert(n
== (k
+ 1));
3486 if (info
->const_float_slots
> device
->max_vs_const_f
&&
3487 (info
->const_int_slots
|| info
->const_bool_slots
))
3488 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3491 if (tx
->indirect_const_access
) /* vs only */
3492 info
->const_float_slots
= device
->max_vs_const_f
;
3494 max_const_f
= IS_VS
? device
->max_vs_const_f
: device
->max_ps_const_f
;
3495 slot_max
= info
->const_bool_slots
> 0 ?
3496 max_const_f
+ NINE_MAX_CONST_I
3497 + DIV_ROUND_UP(info
->const_bool_slots
, 4) :
3498 info
->const_int_slots
> 0 ?
3499 max_const_f
+ info
->const_int_slots
:
3500 info
->const_float_slots
;
3502 info
->const_used_size
= sizeof(float[4]) * slot_max
; /* slots start from 1 */
3504 for (s
= 0; s
< slot_max
; s
++)
3505 ureg_DECL_constant(tx
->ureg
, s
);
3507 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE
)) {
3509 const struct tgsi_token
*toks
= ureg_get_tokens(tx
->ureg
, &count
);
3511 ureg_free_tokens(toks
);
3514 info
->cso
= ureg_create_shader_and_destroy(tx
->ureg
, device
->pipe
);
3516 hr
= D3DERR_DRIVERINTERNALERROR
;
3517 FREE(info
->lconstf
.data
);
3518 FREE(info
->lconstf
.ranges
);
3522 info
->byte_size
= (tx
->parse
- tx
->byte_code
) * sizeof(DWORD
);