#include "nine_debug.h"
#include "nine_state.h"
+#include "util/macros.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "pipe/p_shader_tokens.h"
typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
-static INLINE const char *d3dsio_to_string(unsigned opcode);
+static inline const char *d3dsio_to_string(unsigned opcode);
#define NINED3D_SM1_VS 0xfffe
BYTE type;
};
-static INLINE void
+static inline void
assert_replicate_swizzle(const struct ureg_src *reg)
{
assert(reg->SwizzleY == reg->SwizzleX &&
struct sm1_local_const lconstb[NINE_MAX_CONST_B];
boolean indirect_const_access;
+ boolean failure;
struct nine_shader_info *info;
#define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
#define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
+#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
+
+#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
static void
sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
{
INT i;
- assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
+ if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
+ tx->failure = TRUE;
+ return FALSE;
+ }
for (i = 0; i < tx->num_lconstf; ++i) {
if (tx->lconstf[i].idx == index) {
*src = tx->lconstf[i].reg;
static boolean
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- assert(index >= 0 && index < NINE_MAX_CONST_I);
+ if (index < 0 || index >= NINE_MAX_CONST_I) {
+ tx->failure = TRUE;
+ return FALSE;
+ }
if (tx->lconsti[index].idx == index)
*src = tx->lconsti[index].reg;
return tx->lconsti[index].idx == index;
static boolean
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- assert(index >= 0 && index < NINE_MAX_CONST_B);
+ if (index < 0 || index >= NINE_MAX_CONST_B) {
+ tx->failure = TRUE;
+ return FALSE;
+ }
if (tx->lconstb[index].idx == index)
*src = tx->lconstb[index].reg;
return tx->lconstb[index].idx == index;
{
unsigned n;
- /* Anno1404 sets out of range constants. */
- assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
- if (index >= NINE_MAX_CONST_F)
+ FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
+ if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
WARN("lconstf index %i too high, indirect access won't work\n", index);
for (n = 0; n < tx->num_lconstf; ++n)
static void
tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
{
- assert(index >= 0 && index < NINE_MAX_CONST_I);
+ FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
tx->lconsti[index].idx = index;
tx->lconsti[index].reg = tx->native_integers ?
ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
static void
tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
{
- assert(index >= 0 && index < NINE_MAX_CONST_B);
+ FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
tx->lconstb[index].idx = index;
tx->lconstb[index].reg = tx->native_integers ?
ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
tx_scratch(struct shader_translator *tx)
{
- assert(tx->num_scratch < Elements(tx->regs.t));
+ if (tx->num_scratch >= Elements(tx->regs.t)) {
+ tx->failure = TRUE;
+ return tx->regs.t[0];
+ }
if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
return tx->regs.t[tx->num_scratch++];
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator *tx)
{
return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
}
-static INLINE struct ureg_src
+static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)
{
struct ureg_src src = ureg_src(dst);
return src;
}
-static INLINE void
+static inline void
tx_temp_alloc(struct shader_translator *tx, INT idx)
{
assert(idx >= 0);
tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
}
-static INLINE void
+static inline void
tx_addr_alloc(struct shader_translator *tx, INT idx)
{
assert(idx == 0);
tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
}
-static INLINE void
+static inline void
tx_pred_alloc(struct shader_translator *tx, INT idx)
{
assert(idx == 0);
tx->regs.p = ureg_DECL_predicate(tx->ureg);
}
-static INLINE void
+static inline void
tx_texcoord_alloc(struct shader_translator *tx, INT idx)
{
assert(IS_PS);
TGSI_INTERPOLATE_PERSPECTIVE);
}
-static INLINE unsigned *
+static inline unsigned *
tx_bgnloop(struct shader_translator *tx)
{
tx->loop_depth++;
return &tx->loop_labels[tx->loop_depth - 1];
}
-static INLINE unsigned *
+static inline unsigned *
tx_endloop(struct shader_translator *tx)
{
assert(tx->loop_depth);
return ureg_src_undef();
}
-static INLINE unsigned *
+static inline unsigned *
tx_cond(struct shader_translator *tx)
{
assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
return &tx->cond_labels[tx->cond_depth - 1];
}
-static INLINE unsigned *
+static inline unsigned *
tx_elsecond(struct shader_translator *tx)
{
assert(tx->cond_depth);
return &tx->cond_labels[tx->cond_depth - 1];
}
-static INLINE void
+static inline void
tx_endcond(struct shader_translator *tx)
{
assert(tx->cond_depth);
ureg_get_instruction_number(tx->ureg));
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
nine_ureg_dst_register(unsigned file, int index)
{
return ureg_dst(ureg_src_register(file, index));
if (ureg_dst_is_undef(tx->regs.oDepth))
tx->regs.oDepth =
ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
- TGSI_WRITEMASK_Z);
+ TGSI_WRITEMASK_Z, 0, 1);
dst = tx->regs.oDepth; /* XXX: must write .z component */
break;
case D3DSPR_PREDICATE:
#define VNOTSUPPORTED 0, 0
#define V(maj, min) (((maj) << 8) | (min))
-static INLINE const char *
+static inline const char *
d3dsio_to_string( unsigned opcode )
{
static const char *names[] = {
return D3D_OK;
}
-static INLINE unsigned
+static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)
{
switch (flags) {
[D3DDECLUSAGE_SAMPLE] = "SAMPLE"
};
-static INLINE unsigned
+static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic *dcl)
{
return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
-static INLINE unsigned
+static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)
{
switch (sampler_type) {
return TGSI_TEXTURE_UNKNOWN;
}
}
-static INLINE unsigned
+static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
{
switch (sampler_type) {
return TGSI_TEXTURE_UNKNOWN;
}
}
-static INLINE unsigned
+static inline unsigned
ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
{
switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
}
}
-static INLINE unsigned
+static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
{
switch (sem->Name) {
tx->info->position_t = TRUE;
assert(sem.reg.idx < Elements(tx->regs.o));
tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
- ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
+ ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
tx->regs.oPts = tx->regs.o[sem.reg.idx];
ureg, tgsi.Name, tgsi.Index,
nine_tgsi_to_interp_mode(&tgsi),
0, /* cylwrap */
- sem.reg.mod & NINED3DSPDM_CENTROID);
+ sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
} else
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
/* FragColor or FragDepth */
assert(sem.reg.mask != 0);
- ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
+ ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
+ 0, 1);
}
}
return D3D_OK;
return D3D_OK;
}
+DECL_SPECIAL(LIT)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst tmp = tx_scratch(tx);
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+ ureg_LIT(ureg, tmp, src);
+ /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
+ * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
+ * it 0^0 if src.w=0, which value is driver dependent. */
+ ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
+ ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
+ ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
+ return D3D_OK;
+}
+
DECL_SPECIAL(NRM)
{
struct ureg_program *ureg = tx->ureg;
ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
/* replace the depth for depth testing with the result */
- tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
+ tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_WRITEMASK_Z, 0, 1);
ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
/* note that we write nothing to the destination, since it's disallowed to use it afterward */
return D3D_OK;
ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
r5r, ureg_imm1f(ureg, 1.0f));
/* replace the depth for depth testing with the result */
- tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
+ tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_WRITEMASK_Z, 0, 1);
ureg_MOV(ureg, tx->regs.oDepth, r5r);
return D3D_OK;
tx_src_param(tx, &tx->insn.src[2]),
tx_src_param(tx, &tx->insn.src[3])
};
- assert(tx->insn.src[3].idx >= 0 &&
- tx->insn.src[3].idx < Elements(tx->sampler_targets));
+ assert(tx->insn.src[1].idx >= 0 &&
+ tx->insn.src[1].idx < Elements(tx->sampler_targets));
target = tx->sampler_targets[tx->insn.src[1].idx];
ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
tx_src_param(tx, &tx->insn.src[0]),
tx_src_param(tx, &tx->insn.src[1])
};
- assert(tx->insn.src[3].idx >= 0 &&
- tx->insn.src[3].idx < Elements(tx->sampler_targets));
+ assert(tx->insn.src[1].idx >= 0 &&
+ tx->insn.src[1].idx < Elements(tx->sampler_targets));
target = tx->sampler_targets[tx->insn.src[1].idx];
ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
_OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
_OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
_OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
- _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
+ _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
_OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
_OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
_OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
}
}
-static INLINE HRESULT
+static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator *tx)
{
struct ureg_dst dst[1];
return D3D_OK;
}
-static INLINE DWORD
+static inline DWORD
TOKEN_PEEK(struct shader_translator *tx)
{
return *(tx->parse);
}
-static INLINE DWORD
+static inline DWORD
TOKEN_NEXT(struct shader_translator *tx)
{
return *(tx->parse)++;
}
-static INLINE void
+static inline void
TOKEN_JUMP(struct shader_translator *tx)
{
if (tx->parse_next && tx->parse != tx->parse_next) {
}
}
-static INLINE boolean
+static inline boolean
sm1_parse_eof(struct shader_translator *tx)
{
return TOKEN_PEEK(tx) == NINED3DSP_END;
*rel = (1 << 31) |
((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
- (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
+ D3DSP_NOSWIZZLE;
else
*rel = TOKEN_NEXT(tx);
}
FREE(tx);
}
-static INLINE unsigned
+static inline unsigned
tgsi_processor_from_type(unsigned shader_type)
{
switch (shader_type) {
HRESULT hr = D3D_OK;
const unsigned processor = tgsi_processor_from_type(info->type);
unsigned s, slot_max;
+ unsigned max_const_f;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
}
- while (!sm1_parse_eof(tx))
+ while (!sm1_parse_eof(tx) && !tx->failure)
sm1_parse_instruction(tx);
tx->parse++; /* for byte_size */
+ if (tx->failure) {
+ ERR("Encountered buggy shader\n");
+ ureg_destroy(tx->ureg);
+ hr = D3DERR_INVALIDCALL;
+ goto out;
+ }
+
if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
ureg_src(tx->regs.r[0]));
hr = D3D_OK;
}
+ /* r500 */
+ if (info->const_float_slots > device->max_vs_const_f &&
+ (info->const_int_slots || info->const_bool_slots))
+ ERR("Overlapping constant slots. The shader is likely to be buggy\n");
+
+
if (tx->indirect_const_access) /* vs only */
info->const_float_slots = device->max_vs_const_f;
+ max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
slot_max = info->const_bool_slots > 0 ?
- device->max_vs_const_f + NINE_MAX_CONST_I
- + info->const_bool_slots :
+ max_const_f + NINE_MAX_CONST_I
+ + DIV_ROUND_UP(info->const_bool_slots, 4) :
info->const_int_slots > 0 ?
- device->max_vs_const_f + info->const_int_slots :
+ max_const_f + info->const_int_slots :
info->const_float_slots;
info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */