#include "nine_debug.h"
#include "nine_state.h"
+#include "util/macros.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "pipe/p_shader_tokens.h"
typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
-static INLINE const char *d3dsio_to_string(unsigned opcode);
+static inline const char *d3dsio_to_string(unsigned opcode);
#define NINED3D_SM1_VS 0xfffe
BYTE type;
};
-static INLINE void
+static inline void
assert_replicate_swizzle(const struct ureg_src *reg)
{
assert(reg->SwizzleY == reg->SwizzleX &&
struct ureg_src vFace;
struct ureg_src s;
struct ureg_dst p;
- struct ureg_dst a;
+ struct ureg_dst address;
+ struct ureg_dst a0;
struct ureg_dst tS[8]; /* texture stage registers */
struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
struct ureg_dst t[5]; /* scratch TEMPs */
struct ureg_src vC[2]; /* PS color in */
struct ureg_src vT[8]; /* PS texcoord in */
struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
- struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* loop ctr ADDR register */
} regs;
unsigned num_temp; /* Elements(regs.r) */
unsigned num_scratch;
unsigned cond_depth;
unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
unsigned cond_labels[NINE_MAX_COND_DEPTH];
+ boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
unsigned *inst_labels; /* LABEL op */
unsigned num_inst_labels;
struct sm1_local_const lconstb[NINE_MAX_CONST_B];
boolean indirect_const_access;
+ boolean failure;
struct nine_shader_info *info;
#define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
#define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
+#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
+
+#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
static void
sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
{
INT i;
- assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
+ if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
+ tx->failure = TRUE;
+ return FALSE;
+ }
for (i = 0; i < tx->num_lconstf; ++i) {
if (tx->lconstf[i].idx == index) {
*src = tx->lconstf[i].reg;
static boolean
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- assert(index >= 0 && index < NINE_MAX_CONST_I);
+ if (index < 0 || index >= NINE_MAX_CONST_I) {
+ tx->failure = TRUE;
+ return FALSE;
+ }
if (tx->lconsti[index].idx == index)
*src = tx->lconsti[index].reg;
return tx->lconsti[index].idx == index;
static boolean
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- assert(index >= 0 && index < NINE_MAX_CONST_B);
+ if (index < 0 || index >= NINE_MAX_CONST_B) {
+ tx->failure = TRUE;
+ return FALSE;
+ }
if (tx->lconstb[index].idx == index)
*src = tx->lconstb[index].reg;
return tx->lconstb[index].idx == index;
{
unsigned n;
- /* Anno1404 sets out of range constants. */
- assert(index >= 0 && index < (NINE_MAX_CONST_F * 2));
- if (index >= NINE_MAX_CONST_F)
+ FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
+ if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
WARN("lconstf index %i too high, indirect access won't work\n", index);
for (n = 0; n < tx->num_lconstf; ++n)
static void
tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
{
- assert(index >= 0 && index < NINE_MAX_CONST_I);
+ FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
tx->lconsti[index].idx = index;
tx->lconsti[index].reg = tx->native_integers ?
ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
static void
tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
{
- assert(index >= 0 && index < NINE_MAX_CONST_B);
+ FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
tx->lconstb[index].idx = index;
tx->lconstb[index].reg = tx->native_integers ?
ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
tx_scratch(struct shader_translator *tx)
{
- assert(tx->num_scratch < Elements(tx->regs.t));
+ if (tx->num_scratch >= Elements(tx->regs.t)) {
+ tx->failure = TRUE;
+ return tx->regs.t[0];
+ }
if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
return tx->regs.t[tx->num_scratch++];
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator *tx)
{
return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
}
-static INLINE struct ureg_src
+static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)
{
struct ureg_src src = ureg_src(dst);
return src;
}
-/* Need to declare all constants if indirect addressing is used,
- * otherwise we could scan the shader to determine the maximum.
- * TODO: It doesn't really matter for nv50 so I won't do the scan,
- * but radeon drivers might care, if they don't infer it from TGSI.
- */
-static void
-tx_decl_constants(struct shader_translator *tx)
-{
- unsigned i, n = 0;
-
- for (i = 0; i < NINE_MAX_CONST_F; ++i)
- ureg_DECL_constant(tx->ureg, n++);
- for (i = 0; i < NINE_MAX_CONST_I; ++i)
- ureg_DECL_constant(tx->ureg, n++);
- for (i = 0; i < (NINE_MAX_CONST_B / 4); ++i)
- ureg_DECL_constant(tx->ureg, n++);
-}
-
-static INLINE void
+static inline void
tx_temp_alloc(struct shader_translator *tx, INT idx)
{
assert(idx >= 0);
tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
}
-static INLINE void
+static inline void
tx_addr_alloc(struct shader_translator *tx, INT idx)
{
assert(idx == 0);
- if (ureg_dst_is_undef(tx->regs.a))
- tx->regs.a = ureg_DECL_address(tx->ureg);
+ if (ureg_dst_is_undef(tx->regs.address))
+ tx->regs.address = ureg_DECL_address(tx->ureg);
+ if (ureg_dst_is_undef(tx->regs.a0))
+ tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
}
-static INLINE void
+static inline void
tx_pred_alloc(struct shader_translator *tx, INT idx)
{
assert(idx == 0);
tx->regs.p = ureg_DECL_predicate(tx->ureg);
}
-static INLINE void
+static inline void
tx_texcoord_alloc(struct shader_translator *tx, INT idx)
{
assert(IS_PS);
TGSI_INTERPOLATE_PERSPECTIVE);
}
-static INLINE unsigned *
+static inline unsigned *
tx_bgnloop(struct shader_translator *tx)
{
tx->loop_depth++;
return &tx->loop_labels[tx->loop_depth - 1];
}
-static INLINE unsigned *
+static inline unsigned *
tx_endloop(struct shader_translator *tx)
{
assert(tx->loop_depth);
}
static struct ureg_dst
-tx_get_loopctr(struct shader_translator *tx)
+tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
{
const unsigned l = tx->loop_depth - 1;
return ureg_dst_undef();
}
- if (ureg_dst_is_undef(tx->regs.aL[l]))
- {
- struct ureg_dst rreg = ureg_DECL_local_temporary(tx->ureg);
- struct ureg_dst areg = ureg_DECL_address(tx->ureg);
- unsigned c;
-
- assert(l % 4 == 0);
- for (c = l; c < (l + 4) && c < Elements(tx->regs.aL); ++c) {
- tx->regs.rL[c] = ureg_writemask(rreg, 1 << (c & 3));
- tx->regs.aL[c] = ureg_writemask(areg, 1 << (c & 3));
- }
+ if (ureg_dst_is_undef(tx->regs.rL[l])) {
+ /* loop or rep ctr creation */
+ tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
+ tx->loop_or_rep[l] = loop_or_rep;
}
+ /* loop - rep - endloop - endrep not allowed */
+ assert(tx->loop_or_rep[l] == loop_or_rep);
+
return tx->regs.rL[l];
}
-static struct ureg_dst
-tx_get_aL(struct shader_translator *tx)
+
+static struct ureg_src
+tx_get_loopal(struct shader_translator *tx)
{
- if (!ureg_dst_is_undef(tx_get_loopctr(tx)))
- return tx->regs.aL[tx->loop_depth - 1];
- return ureg_dst_undef();
+ int loop_level = tx->loop_depth - 1;
+
+ while (loop_level >= 0) {
+ /* handle loop - rep - endrep - endloop case */
+ if (tx->loop_or_rep[loop_level])
+ /* the value is in the loop counter y component (nine implementation) */
+ return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
+ loop_level--;
+ }
+
+ DBG("aL counter requested outside of loop\n");
+ return ureg_src_undef();
}
-static INLINE unsigned *
+static inline unsigned *
tx_cond(struct shader_translator *tx)
{
assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
return &tx->cond_labels[tx->cond_depth - 1];
}
-static INLINE unsigned *
+static inline unsigned *
tx_elsecond(struct shader_translator *tx)
{
assert(tx->cond_depth);
return &tx->cond_labels[tx->cond_depth - 1];
}
-static INLINE void
+static inline void
tx_endcond(struct shader_translator *tx)
{
assert(tx->cond_depth);
ureg_get_instruction_number(tx->ureg));
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
nine_ureg_dst_register(unsigned file, int index)
{
return ureg_dst(ureg_src_register(file, index));
case D3DSPR_ADDR:
assert(!param->rel);
if (IS_VS) {
- tx_addr_alloc(tx, param->idx);
- src = ureg_src(tx->regs.a);
+ assert(param->idx == 0);
+ /* the address register (vs only) must be
+ * assigned before use */
+ assert(!ureg_dst_is_undef(tx->regs.a0));
+ ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ src = ureg_src(tx->regs.address);
} else {
if (tx->version.major < 2 && tx->version.minor < 4) {
/* no subroutines, so should be defined */
src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
break;
case D3DSPR_CONST:
+ assert(!param->rel || IS_VS);
if (param->rel)
tx->indirect_const_access = TRUE;
if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
nine_info_mark_const_f_used(tx->info, param->idx);
src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
}
+ if (!IS_VS && tx->version.major < 2) {
+ /* ps 1.X clamps constants */
+ tmp = tx_scratch(tx);
+ ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
+ ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
+ src = ureg_src(tmp);
+ }
break;
case D3DSPR_CONST2:
case D3DSPR_CONST3:
src = ureg_imm1f(ureg, 0.0f);
break;
case D3DSPR_CONSTINT:
- if (param->rel || !tx_lconsti(tx, &src, param->idx)) {
- if (!param->rel)
- nine_info_mark_const_i_used(tx->info, param->idx);
+ /* relative adressing only possible for float constants in vs */
+ assert(!param->rel);
+ if (!tx_lconsti(tx, &src, param->idx)) {
+ nine_info_mark_const_i_used(tx->info, param->idx);
src = ureg_src_register(TGSI_FILE_CONSTANT,
tx->info->const_i_base + param->idx);
}
break;
case D3DSPR_CONSTBOOL:
- if (param->rel || !tx_lconstb(tx, &src, param->idx)) {
+ assert(!param->rel);
+ if (!tx_lconstb(tx, &src, param->idx)) {
char r = param->idx / 4;
char s = param->idx & 3;
- if (!param->rel)
- nine_info_mark_const_b_used(tx->info, param->idx);
+ nine_info_mark_const_b_used(tx->info, param->idx);
src = ureg_src_register(TGSI_FILE_CONSTANT,
tx->info->const_b_base + r);
src = ureg_swizzle(src, s, s, s, s);
}
break;
case D3DSPR_LOOP:
- src = tx_src_scalar(tx_get_aL(tx));
+ if (ureg_dst_is_undef(tx->regs.address))
+ tx->regs.address = ureg_DECL_address(ureg);
+ if (!tx->native_integers)
+ ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
+ else
+ ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
+ src = ureg_src(tx->regs.address);
break;
case D3DSPR_MISCTYPE:
switch (param->idx) {
if (param->rel)
src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
+ switch (param->mod) {
+ case NINED3DSPSM_DW:
+ tmp = tx_scratch(tx);
+ /* NOTE: app is not allowed to read w with this modifier */
+ ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
+ ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
+ src = ureg_src(tmp);
+ break;
+ case NINED3DSPSM_DZ:
+ tmp = tx_scratch(tx);
+ /* NOTE: app is not allowed to read z with this modifier */
+ ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
+ ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
+ src = ureg_src(tmp);
+ break;
+ default:
+ break;
+ }
+
if (param->swizzle != NINED3DSP_NOSWIZZLE)
src = ureg_swizzle(src,
(param->swizzle >> 0) & 0x3,
break;
case NINED3DSPSM_DZ:
case NINED3DSPSM_DW:
- /* handled in instruction */
+ /* Already handled*/
break;
case NINED3DSPSM_SIGN:
tmp = tx_scratch(tx);
dst = ureg_dst(tx->regs.vT[param->idx]);
} else {
tx_addr_alloc(tx, param->idx);
- dst = tx->regs.a;
+ dst = tx->regs.a0;
}
break;
case D3DSPR_RASTOUT:
case 1:
if (ureg_dst_is_undef(tx->regs.oFog))
tx->regs.oFog =
- ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
+ ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
dst = tx->regs.oFog;
break;
case 2:
if (ureg_dst_is_undef(tx->regs.oPts))
tx->regs.oPts =
- ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
+ ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
dst = tx->regs.oPts;
break;
default:
if (ureg_dst_is_undef(tx->regs.oDepth))
tx->regs.oDepth =
ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
- TGSI_WRITEMASK_Z);
+ TGSI_WRITEMASK_Z, 0, 1);
dst = tx->regs.oDepth; /* XXX: must write .z component */
break;
case D3DSPR_PREDICATE:
struct ureg_program *ureg = tx->ureg;
struct ureg_dst dst;
struct ureg_src src[2];
+ struct sm1_src_param *src_mat = &tx->insn.src[1];
unsigned i;
dst = tx_dst_param(tx, &tx->insn.dst[0]);
src[0] = tx_src_param(tx, &tx->insn.src[0]);
- src[1] = tx_src_param(tx, &tx->insn.src[1]);
- for (i = 0; i < n; i++, src[1].Index++)
+ for (i = 0; i < n; i++)
{
const unsigned m = (1 << i);
+ src[1] = tx_src_param(tx, src_mat);
+ src_mat->idx++;
+
if (!(dst.WriteMask & m))
continue;
#define VNOTSUPPORTED 0, 0
#define V(maj, min) (((maj) << 8) | (min))
-static INLINE const char *
+static inline const char *
d3dsio_to_string( unsigned opcode )
{
static const char *names[] = {
DECL_SPECIAL(M4x4)
{
- return NineTranslateInstruction_Mkxn(tx, 4, 3);
+ return NineTranslateInstruction_Mkxn(tx, 4, 4);
}
DECL_SPECIAL(M4x3)
struct ureg_dst cgt;
struct ureg_src cnd;
- if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4) {
+ /* the coissue flag was a tip for compilers to advise to
+ * execute two operations at the same time, in cases
+ * the two executions had same dst with different channels.
+ * It has no effect on current hw. However it seems CND
+ * is affected. The handling of this very specific case
+ * handled below mimick wine behaviour */
+ if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
ureg_MOV(tx->ureg,
dst, tx_src_param(tx, &tx->insn.src[1]));
return D3D_OK;
cnd = tx_src_param(tx, &tx->insn.src[0]);
cgt = tx_scratch(tx);
- if (tx->version.major == 1 && tx->version.minor < 4) {
- cgt.WriteMask = TGSI_WRITEMASK_W;
- ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
+ if (tx->version.major == 1 && tx->version.minor < 4)
cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
- } else {
- ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
- }
- ureg_CMP(tx->ureg, dst,
+
+ ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
+
+ ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
tx_src_param(tx, &tx->insn.src[1]),
- tx_src_param(tx, &tx->insn.src[2]), ureg_negate(cnd));
+ tx_src_param(tx, &tx->insn.src[2]));
return D3D_OK;
}
DECL_SPECIAL(CALLNZ)
{
struct ureg_program *ureg = tx->ureg;
- struct ureg_dst tmp = tx_scratch_scalar(tx);
struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
- /* NOTE: source should be const bool, so we can use NOT/SUB instead of [U]SNE 0 */
- if (!tx->insn.flags) {
- if (tx->native_integers)
- ureg_NOT(ureg, tmp, src);
- else
- ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
- }
- ureg_IF(ureg, tx->insn.flags ? src : tx_src_scalar(tmp), tx_cond(tx));
+ if (!tx->native_integers)
+ ureg_IF(ureg, src, tx_cond(tx));
+ else
+ ureg_UIF(ureg, src, tx_cond(tx));
ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
tx_endcond(tx);
ureg_ENDIF(ureg);
DECL_SPECIAL(MOV_vs1x)
{
if (tx->insn.dst[0].file == D3DSPR_ADDR) {
- ureg_ARL(tx->ureg,
+ /* Implementation note: We don't write directly
+ * to the addr register, but to an intermediate
+ * float register.
+ * Contrary to the doc, when writing to ADDR here,
+ * the rounding is not to nearest, but to lowest
+ * (wine test).
+ * Since we use ARR next, substract 0.5. */
+ ureg_SUB(tx->ureg,
tx_dst_param(tx, &tx->insn.dst[0]),
- tx_src_param(tx, &tx->insn.src[0]));
+ tx_src_param(tx, &tx->insn.src[0]),
+ ureg_imm1f(tx->ureg, 0.5f));
return D3D_OK;
}
return NineTranslateInstruction_Generic(tx);
struct ureg_program *ureg = tx->ureg;
unsigned *label;
struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
- struct ureg_src iter = ureg_scalar(src, TGSI_SWIZZLE_X);
- struct ureg_src init = ureg_scalar(src, TGSI_SWIZZLE_Y);
- struct ureg_src step = ureg_scalar(src, TGSI_SWIZZLE_Z);
struct ureg_dst ctr;
- struct ureg_dst tmp = tx_scratch_scalar(tx);
+ struct ureg_dst tmp;
+ struct ureg_src ctrx;
label = tx_bgnloop(tx);
- ctr = tx_get_loopctr(tx);
+ ctr = tx_get_loopctr(tx, TRUE);
+ ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
- ureg_MOV(tx->ureg, ctr, init);
+ /* src: num_iterations - start_value of al - step for al - 0 */
+ ureg_MOV(ureg, ctr, src);
ureg_BGNLOOP(tx->ureg, label);
- if (tx->native_integers) {
- /* we'll let the backend pull up that MAD ... */
- ureg_UMAD(ureg, tmp, iter, step, init);
- ureg_USEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
-#ifdef NINE_TGSI_LAZY_DEVS
- ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
-#endif
- } else {
- /* can't simply use SGE for precision because step might be negative */
- ureg_MAD(ureg, tmp, iter, step, init);
- ureg_SEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
-#ifdef NINE_TGSI_LAZY_DEVS
+ tmp = tx_scratch_scalar(tx);
+ /* Initially ctr.x contains the number of iterations.
+ * ctr.y will contain the updated value of al.
+ * We decrease ctr.x at the end of every iteration,
+ * and stop when it reaches 0. */
+
+ if (!tx->native_integers) {
+ /* case src and ctr contain floats */
+ /* to avoid precision issue, we stop when ctr <= 0.5 */
+ ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
-#endif
+ } else {
+ /* case src and ctr contain integers */
+ ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
+ ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
}
-#ifdef NINE_TGSI_LAZY_DEVS
ureg_BRK(ureg);
tx_endcond(tx);
ureg_ENDIF(ureg);
-#else
- ureg_BREAKC(ureg, tx_src_scalar(tmp));
-#endif
- if (tx->native_integers) {
- ureg_UARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
- ureg_UADD(ureg, ctr, tx_src_scalar(ctr), step);
- } else {
- ureg_ARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
- ureg_ADD(ureg, ctr, tx_src_scalar(ctr), step);
- }
return D3D_OK;
}
DECL_SPECIAL(ENDLOOP)
{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
+ struct ureg_dst dst_ctrx, dst_al;
+ struct ureg_src src_ctr, al_counter;
+
+ dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
+ dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
+ src_ctr = ureg_src(ctr);
+ al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
+
+ /* ctr.x -= 1
+ * ctr.y (aL) += step */
+ if (!tx->native_integers) {
+ ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
+ ureg_ADD(ureg, dst_al, src_ctr, al_counter);
+ } else {
+ ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
+ ureg_UADD(ureg, dst_al, src_ctr, al_counter);
+ }
ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
return D3D_OK;
}
unsigned *label;
struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
struct ureg_dst ctr;
- struct ureg_dst tmp = tx_scratch_scalar(tx);
- struct ureg_src imm =
- tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f);
+ struct ureg_dst tmp;
+ struct ureg_src ctrx;
label = tx_bgnloop(tx);
- ctr = tx_get_loopctr(tx);
+ ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
+ ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
/* NOTE: rep must be constant, so we don't have to save the count */
assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
- ureg_MOV(ureg, ctr, imm);
+ /* rep: num_iterations - 0 - 0 - 0 */
+ ureg_MOV(ureg, ctr, rep);
ureg_BGNLOOP(ureg, label);
- if (tx->native_integers)
- {
- ureg_USGE(ureg, tmp, tx_src_scalar(ctr), rep);
- ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
- }
- else
- {
- ureg_SGE(ureg, tmp, tx_src_scalar(ctr), rep);
+ tmp = tx_scratch_scalar(tx);
+ /* Initially ctr.x contains the number of iterations.
+ * We decrease ctr.x at the end of every iteration,
+ * and stop when it reaches 0. */
+
+ if (!tx->native_integers) {
+ /* case src and ctr contain floats */
+ /* to avoid precision issue, we stop when ctr <= 0.5 */
+ ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
+ } else {
+ /* case src and ctr contain integers */
+ ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
+ ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
}
ureg_BRK(ureg);
tx_endcond(tx);
ureg_ENDIF(ureg);
- if (tx->native_integers) {
- ureg_UADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1u(ureg, 1));
- } else {
- ureg_ADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1f(ureg, 1.0f));
- }
-
return D3D_OK;
}
DECL_SPECIAL(ENDREP)
{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
+ struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
+ struct ureg_src src_ctr = ureg_src(ctr);
+
+ /* ctr.x -= 1 */
+ if (!tx->native_integers)
+ ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
+ else
+ ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
+
ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
return D3D_OK;
}
return D3D_OK;
}
-static INLINE unsigned
+static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)
{
switch (flags) {
[D3DDECLUSAGE_SAMPLE] = "SAMPLE"
};
-static INLINE unsigned
+static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic *dcl)
{
return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
-static INLINE unsigned
+static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)
{
switch (sampler_type) {
return TGSI_TEXTURE_UNKNOWN;
}
}
-static INLINE unsigned
+static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
{
switch (sampler_type) {
return TGSI_TEXTURE_UNKNOWN;
}
}
-static INLINE unsigned
+static inline unsigned
ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
{
switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
}
}
-static INLINE unsigned
+static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
{
switch (sem->Name) {
tx->info->position_t = TRUE;
assert(sem.reg.idx < Elements(tx->regs.o));
tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
- ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
+ ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
tx->regs.oPts = tx->regs.o[sem.reg.idx];
ureg, tgsi.Name, tgsi.Index,
nine_tgsi_to_interp_mode(&tgsi),
0, /* cylwrap */
- sem.reg.mod & NINED3DSPDM_CENTROID);
+ sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
} else
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
/* FragColor or FragDepth */
assert(sem.reg.mask != 0);
- ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
+ ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
+ 0, 1);
}
}
return D3D_OK;
return D3D_OK;
}
+DECL_SPECIAL(POW)
+{
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src[2] = {
+ tx_src_param(tx, &tx->insn.src[0]),
+ tx_src_param(tx, &tx->insn.src[1])
+ };
+ ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
+ return D3D_OK;
+}
+
+DECL_SPECIAL(RSQ)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_dst tmp = tx_scratch(tx);
+ ureg_RSQ(ureg, tmp, ureg_abs(src));
+ ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
+ return D3D_OK;
+}
+
+DECL_SPECIAL(LOG)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst tmp = tx_scratch_scalar(tx);
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+ ureg_LG2(ureg, tmp, ureg_abs(src));
+ ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
+ return D3D_OK;
+}
+
+DECL_SPECIAL(LIT)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst tmp = tx_scratch(tx);
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+ ureg_LIT(ureg, tmp, src);
+ /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
+ * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
+ * it 0^0 if src.w=0, which value is driver dependent. */
+ ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
+ ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
+ ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
+ return D3D_OK;
+}
+
DECL_SPECIAL(NRM)
{
struct ureg_program *ureg = tx->ureg;
struct ureg_dst tmp = tx_scratch_scalar(tx);
struct ureg_src nrm = tx_src_scalar(tmp);
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
ureg_DP3(ureg, tmp, src, src);
ureg_RSQ(ureg, tmp, nrm);
- ureg_MUL(ureg, tx_dst_param(tx, &tx->insn.dst[0]), src, nrm);
+ ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
+ ureg_MUL(ureg, dst, src, nrm);
return D3D_OK;
}
const unsigned s = tx->insn.dst[0].idx;
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- if (ureg_src_is_undef(tx->regs.vT[s]))
- tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
- ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
+ tx_texcoord_alloc(tx, s);
+ ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
return D3D_OK;
}
DECL_SPECIAL(TEXCOORD_ps14)
{
struct ureg_program *ureg = tx->ureg;
- const unsigned s = tx->insn.src[0].idx;
+ struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- if (ureg_src_is_undef(tx->regs.vT[s]))
- tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
- ureg_MOV(ureg, dst, tx->regs.vT[s]); /* XXX is this sufficient ? */
+ assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
+
+ ureg_MOV(ureg, dst, src);
return D3D_OK;
}
DECL_SPECIAL(TEXREG2AR)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src sample;
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
+
+ sample = ureg_DECL_sampler(ureg, m);
+ tx->info->sampler_mask |= 1 << m;
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXREG2GB)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src sample;
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
+
+ sample = ureg_DECL_sampler(ureg, m);
+ tx->info->sampler_mask |= 1 << m;
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXM3x2PAD)
{
- STUB(D3DERR_INVALIDCALL);
+ return D3D_OK; /* this is just padding */
}
DECL_SPECIAL(TEXM3x2TEX)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src sample;
+ const int m = tx->insn.dst[0].idx - 1;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
+
+ tx_texcoord_alloc(tx, m);
+ tx_texcoord_alloc(tx, m+1);
+
+ /* performs the matrix multiplication */
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
+
+ sample = ureg_DECL_sampler(ureg, m + 1);
+ tx->info->sampler_mask |= 1 << (m + 1);
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXM3x3PAD)
DECL_SPECIAL(TEXM3x3SPEC)
{
- STUB(D3DERR_INVALIDCALL);
-}
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
+ struct ureg_src sample;
+ struct ureg_dst tmp;
+ const int m = tx->insn.dst[0].idx - 2;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
-DECL_SPECIAL(TEXM3x3VSPEC)
-{
- STUB(D3DERR_INVALIDCALL);
+ tx_texcoord_alloc(tx, m);
+ tx_texcoord_alloc(tx, m+1);
+ tx_texcoord_alloc(tx, m+2);
+
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
+
+ sample = ureg_DECL_sampler(ureg, m + 2);
+ tx->info->sampler_mask |= 1 << (m + 2);
+ tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
+
+ /* At this step, dst = N = (u', w', z').
+ * We want dst to be the texture sampled at (u'', w'', z''), with
+ * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
+ ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
+ ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+ /* at this step tmp.x = 1/N.N */
+ ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
+ /* at this step tmp.y = N.E */
+ ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
+ /* at this step tmp.x = N.E/N.N */
+ ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
+ ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
+ /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
+ ureg_SUB(ureg, tmp, ureg_src(tmp), E);
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXREG2RGB)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src sample;
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
+
+ sample = ureg_DECL_sampler(ureg, m);
+ tx->info->sampler_mask |= 1 << m;
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXDP3TEX)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_dst tmp;
+ struct ureg_src sample;
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
+
+ tx_texcoord_alloc(tx, m);
+
+ tmp = tx_scratch(tx);
+ ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
+ ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
+
+ sample = ureg_DECL_sampler(ureg, m);
+ tx->info->sampler_mask |= 1 << m;
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXM3x2DEPTH)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst tmp;
+ const int m = tx->insn.dst[0].idx - 1;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
+
+ tx_texcoord_alloc(tx, m);
+ tx_texcoord_alloc(tx, m+1);
+
+ tmp = tx_scratch(tx);
+
+ /* performs the matrix multiplication */
+ ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
+ ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
+
+ ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
+ /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
+ ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
+ /* res = 'w' == 0 ? 1.0 : z/w */
+ ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
+ ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
+ /* replace the depth for depth testing with the result */
+ tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_WRITEMASK_Z, 0, 1);
+ ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+ /* note that we write nothing to the destination, since it's disallowed to use it afterward */
+ return D3D_OK;
}
DECL_SPECIAL(TEXDP3)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
+ assert(m >= 0 && m > n);
+
+ tx_texcoord_alloc(tx, m);
+
+ ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXM3x3)
{
struct ureg_program *ureg = tx->ureg;
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[4];
- int s;
+ struct ureg_src sample;
+ struct ureg_dst E, tmp;
const int m = tx->insn.dst[0].idx - 2;
const int n = tx->insn.src[0].idx;
assert(m >= 0 && m > n);
- for (s = m; s <= (m + 2); ++s) {
- if (ureg_src_is_undef(tx->regs.vT[s]))
- tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
- src[s] = tx->regs.vT[s];
- }
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), src[0], ureg_src(tx->regs.tS[n]));
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), src[1], ureg_src(tx->regs.tS[n]));
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), src[2], ureg_src(tx->regs.tS[n]));
+ tx_texcoord_alloc(tx, m);
+ tx_texcoord_alloc(tx, m+1);
+ tx_texcoord_alloc(tx, m+2);
+
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
+ ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
switch (tx->insn.opcode) {
case D3DSIO_TEXM3x3:
ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
break;
case D3DSIO_TEXM3x3TEX:
- src[3] = ureg_DECL_sampler(ureg, m + 2);
+ sample = ureg_DECL_sampler(ureg, m + 2);
+ tx->info->sampler_mask |= 1 << (m + 2);
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
+ break;
+ case D3DSIO_TEXM3x3VSPEC:
+ sample = ureg_DECL_sampler(ureg, m + 2);
tx->info->sampler_mask |= 1 << (m + 2);
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), src[3]);
+ E = tx_scratch(tx);
+ tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
+ ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
+ ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
+ ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
+ /* At this step, dst = N = (u', w', z').
+ * We want dst to be the texture sampled at (u'', w'', z''), with
+ * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
+ ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
+ ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+ /* at this step tmp.x = 1/N.N */
+ ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
+ /* at this step tmp.y = N.E */
+ ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
+ /* at this step tmp.x = N.E/N.N */
+ ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
+ ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
+ /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
+ ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
break;
default:
return D3DERR_INVALIDCALL;
DECL_SPECIAL(TEXDEPTH)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst r5;
+ struct ureg_src r5r, r5g;
+
+ assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
+
+ /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
+ * r5 won't be used afterward, thus we can use r5.ba */
+ r5 = tx->regs.r[5];
+ r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
+ r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
+
+ ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
+ ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
+ /* r5.r = r/g */
+ ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
+ r5r, ureg_imm1f(ureg, 1.0f));
+ /* replace the depth for depth testing with the result */
+ tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_WRITEMASK_Z, 0, 1);
+ ureg_MOV(ureg, tx->regs.oDepth, r5r);
+
+ return D3D_OK;
}
DECL_SPECIAL(BEM)
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
struct ureg_src src[2];
- if (ureg_src_is_undef(tx->regs.vT[s]))
- tx->regs.vT[s] = ureg_DECL_fs_input(ureg, tx->texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
+ tx_texcoord_alloc(tx, s);
src[0] = tx->regs.vT[s];
src[1] = ureg_DECL_sampler(ureg, s);
tx_src_param(tx, &tx->insn.src[2]),
tx_src_param(tx, &tx->insn.src[3])
};
- assert(tx->insn.src[3].idx >= 0 &&
- tx->insn.src[3].idx < Elements(tx->sampler_targets));
+ assert(tx->insn.src[1].idx >= 0 &&
+ tx->insn.src[1].idx < Elements(tx->sampler_targets));
target = tx->sampler_targets[tx->insn.src[1].idx];
ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
tx_src_param(tx, &tx->insn.src[0]),
tx_src_param(tx, &tx->insn.src[1])
};
- assert(tx->insn.src[3].idx >= 0 &&
- tx->insn.src[3].idx < Elements(tx->sampler_targets));
+ assert(tx->insn.src[1].idx >= 0 &&
+ tx->insn.src[1].idx < Elements(tx->sampler_targets));
target = tx->sampler_targets[tx->insn.src[1].idx];
ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
_OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
_OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
_OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
- _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 7 */
+ _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
_OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
_OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
_OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
_OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
_OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
_OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
- _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 15 */
- _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
+ _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
+ _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
_OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
_OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
_OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
_OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
_OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
- _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALL)),
- _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(CALLNZ)),
+ _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
+ _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
_OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
_OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
_OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
- _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(LABEL)),
+ _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
_OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
- _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL),
+ _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
_OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
_OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
_OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
_OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
_OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
_OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
-
- _OPI(MOVA, ARR, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
+ /* we don't write to the address register, but a normal register (copied
+ * when needed to the address register), thus we don't use ARR */
+ _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
_OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
_OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
_OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
_OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
_OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
- _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEM)),
- _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXBEML)),
- _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2AR)),
- _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXREG2GB)),
- _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2PAD)),
- _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x2TEX)),
- _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3PAD)),
- _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
- _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3SPEC)),
- _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 0, 0, SPECIAL(TEXM3x3VSPEC)),
+ _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
+ _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
+ _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
+ _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
+ _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
+ _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
+ _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
+ _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
+ _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
+ _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
_OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
_OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
- _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
+ _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
_OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
_OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
/* More tex stuff */
- _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXREG2RGB)),
- _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3TEX)),
- _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 0, 0, SPECIAL(TEXM3x2DEPTH)),
- _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXDP3)),
- _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 0, 0, SPECIAL(TEXM3x3)),
- _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(TEXDEPTH)),
+ _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
+ _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
+ _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
+ _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
+ _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
+ _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
/* Misc */
_OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
- _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(BEM)),
+ _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
_OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
_OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
_OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
_OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
- _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(SETP)),
+ _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
_OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
- _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(BREAKP))
+ _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
};
struct sm1_op_info inst_phase =
}
}
-static INLINE HRESULT
+static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator *tx)
{
struct ureg_dst dst[1];
return D3D_OK;
}
-static INLINE DWORD
+static inline DWORD
TOKEN_PEEK(struct shader_translator *tx)
{
return *(tx->parse);
}
-static INLINE DWORD
+static inline DWORD
TOKEN_NEXT(struct shader_translator *tx)
{
return *(tx->parse)++;
}
-static INLINE void
+static inline void
TOKEN_JUMP(struct shader_translator *tx)
{
if (tx->parse_next && tx->parse != tx->parse_next) {
}
}
-static INLINE boolean
+static inline boolean
sm1_parse_eof(struct shader_translator *tx)
{
return TOKEN_PEEK(tx) == NINED3DSP_END;
*rel = (1 << 31) |
((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
- (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT);
+ D3DSP_NOSWIZZLE;
else
*rel = TOKEN_NEXT(tx);
}
info->position_t = FALSE;
info->point_size = FALSE;
- tx->info->const_used_size = 0;
+ tx->info->const_float_slots = 0;
+ tx->info->const_int_slots = 0;
+ tx->info->const_bool_slots = 0;
info->sampler_mask = 0x0;
info->rt_mask = 0x0;
info->lconstf.data = NULL;
info->lconstf.ranges = NULL;
- for (i = 0; i < Elements(tx->regs.aL); ++i) {
- tx->regs.aL[i] = ureg_dst_undef();
+ for (i = 0; i < Elements(tx->regs.rL); ++i) {
tx->regs.rL[i] = ureg_dst_undef();
}
- tx->regs.a = ureg_dst_undef();
+ tx->regs.address = ureg_dst_undef();
+ tx->regs.a0 = ureg_dst_undef();
tx->regs.p = ureg_dst_undef();
tx->regs.oDepth = ureg_dst_undef();
tx->regs.vPos = ureg_src_undef();
FREE(tx);
}
-static INLINE unsigned
+static inline unsigned
tgsi_processor_from_type(unsigned shader_type)
{
switch (shader_type) {
struct shader_translator *tx;
HRESULT hr = D3D_OK;
const unsigned processor = tgsi_processor_from_type(info->type);
+ unsigned s, slot_max;
+ unsigned max_const_f;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
hr = E_OUTOFMEMORY;
goto out;
}
- tx_decl_constants(tx);
tx->native_integers = GET_SHADER_CAP(INTEGERS);
tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
}
- while (!sm1_parse_eof(tx))
+ while (!sm1_parse_eof(tx) && !tx->failure)
sm1_parse_instruction(tx);
tx->parse++; /* for byte_size */
+ if (tx->failure) {
+ ERR("Encountered buggy shader\n");
+ ureg_destroy(tx->ureg);
+ hr = D3DERR_INVALIDCALL;
+ goto out;
+ }
+
if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
ureg_src(tx->regs.r[0]));
if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
info->point_size = TRUE;
- if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
- unsigned count;
- const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
- tgsi_dump(toks, 0);
- ureg_free_tokens(toks);
- }
-
/* record local constants */
if (tx->num_lconstf && tx->indirect_const_access) {
struct nine_range *ranges;
hr = D3D_OK;
}
- if (tx->indirect_const_access)
- info->const_used_size = ~0;
+ /* r500 */
+ if (info->const_float_slots > device->max_vs_const_f &&
+ (info->const_int_slots || info->const_bool_slots))
+ ERR("Overlapping constant slots. The shader is likely to be buggy\n");
+
+
+ if (tx->indirect_const_access) /* vs only */
+ info->const_float_slots = device->max_vs_const_f;
+
+ max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
+ slot_max = info->const_bool_slots > 0 ?
+ max_const_f + NINE_MAX_CONST_I
+ + DIV_ROUND_UP(info->const_bool_slots, 4) :
+ info->const_int_slots > 0 ?
+ max_const_f + info->const_int_slots :
+ info->const_float_slots;
+ info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
+
+ for (s = 0; s < slot_max; s++)
+ ureg_DECL_constant(tx->ureg, s);
+
+ if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
+ unsigned count;
+ const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
+ tgsi_dump(toks, 0);
+ ureg_free_tokens(toks);
+ }
info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
if (!info->cso) {