#include "device9.h"
#include "nine_debug.h"
#include "nine_state.h"
+#include "vertexdeclaration9.h"
#include "util/macros.h"
#include "util/u_memory.h"
DUMP("_co");
DUMP(" ");
- for (i = 0; i < insn->ndst && i < Elements(insn->dst); ++i) {
+ for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
sm1_dump_dst_param(&insn->dst[i]);
DUMP(" ");
}
- for (i = 0; i < insn->nsrc && i < Elements(insn->src); ++i) {
+ for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
sm1_dump_src_param(&insn->src[i]);
DUMP(" ");
}
{
INT idx;
struct ureg_src reg;
- union {
- boolean b;
- float f[4];
- int32_t i[4];
- } imm;
+ float f[4]; /* for indirect addressing of float constants */
};
struct shader_translator
BYTE major;
BYTE minor;
} version;
- unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
+ unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
+ unsigned num_constf_allowed;
+ unsigned num_consti_allowed;
+ unsigned num_constb_allowed;
boolean native_integers;
boolean inline_subroutines;
- boolean lower_preds;
boolean want_texcoord;
boolean shift_wpos;
+ boolean wpos_is_sysval;
+ boolean face_is_sysval_integer;
unsigned texcoord_sn;
struct sm1_instruction insn; /* current instruction */
struct {
struct ureg_dst *r;
struct ureg_dst oPos;
+ struct ureg_dst oPos_out; /* the real output when doing streamout */
struct ureg_dst oFog;
struct ureg_dst oPts;
struct ureg_dst oCol[4];
struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
struct ureg_dst oDepth;
struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
+ struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
struct ureg_src vPos;
struct ureg_src vFace;
struct ureg_src s;
struct ureg_src vT[8]; /* PS texcoord in */
struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
} regs;
- unsigned num_temp; /* Elements(regs.r) */
+ unsigned num_temp; /* ARRAY_SIZE(regs.r) */
unsigned num_scratch;
unsigned loop_depth;
unsigned loop_depth_max;
struct sm1_local_const *lconstf;
unsigned num_lconstf;
- struct sm1_local_const lconsti[NINE_MAX_CONST_I];
- struct sm1_local_const lconstb[NINE_MAX_CONST_B];
+ struct sm1_local_const *lconsti;
+ unsigned num_lconsti;
+ struct sm1_local_const *lconstb;
+ unsigned num_lconstb;
boolean indirect_const_access;
boolean failure;
+ struct nine_vs_output_info output_info[16];
+ int num_outputs;
+
struct nine_shader_info *info;
int16_t op_info_map[D3DSIO_BREAKP + 1];
};
-#define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
-#define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
-#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
+#define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
+#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
}
}
+static void
+nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
+ int mask, int output_index)
+{
+ tx->output_info[tx->num_outputs].output_semantic = Usage;
+ tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
+ tx->output_info[tx->num_outputs].mask = mask;
+ tx->output_info[tx->num_outputs].output_index = output_index;
+ tx->num_outputs++;
+}
+
static boolean
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
{
INT i;
- if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
+
+ if (index < 0 || index >= tx->num_constf_allowed) {
tx->failure = TRUE;
return FALSE;
}
static boolean
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- if (index < 0 || index >= NINE_MAX_CONST_I) {
+ int i;
+
+ if (index < 0 || index >= tx->num_consti_allowed) {
tx->failure = TRUE;
return FALSE;
}
- if (tx->lconsti[index].idx == index)
- *src = tx->lconsti[index].reg;
- return tx->lconsti[index].idx == index;
+ for (i = 0; i < tx->num_lconsti; ++i) {
+ if (tx->lconsti[i].idx == index) {
+ *src = tx->lconsti[i].reg;
+ return TRUE;
+ }
+ }
+ return FALSE;
}
static boolean
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- if (index < 0 || index >= NINE_MAX_CONST_B) {
+ int i;
+
+ if (index < 0 || index >= tx->num_constb_allowed) {
tx->failure = TRUE;
return FALSE;
}
- if (tx->lconstb[index].idx == index)
- *src = tx->lconstb[index].reg;
- return tx->lconstb[index].idx == index;
+ for (i = 0; i < tx->num_lconstb; ++i) {
+ if (tx->lconstb[i].idx == index) {
+ *src = tx->lconstb[i].reg;
+ return TRUE;
+ }
+ }
+ return FALSE;
}
static void
{
unsigned n;
- FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
- if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
- WARN("lconstf index %i too high, indirect access won't work\n", index);
+ FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
for (n = 0; n < tx->num_lconstf; ++n)
if (tx->lconstf[n].idx == index)
tx->lconstf[n].idx = index;
tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
- memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
+ memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
}
static void
tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
{
- FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
- tx->lconsti[index].idx = index;
- tx->lconsti[index].reg = tx->native_integers ?
+ unsigned n;
+
+ FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
+
+ for (n = 0; n < tx->num_lconsti; ++n)
+ if (tx->lconsti[n].idx == index)
+ break;
+ if (n == tx->num_lconsti) {
+ if ((n % 8) == 0) {
+ tx->lconsti = REALLOC(tx->lconsti,
+ (n + 0) * sizeof(tx->lconsti[0]),
+ (n + 8) * sizeof(tx->lconsti[0]));
+ assert(tx->lconsti);
+ }
+ tx->num_lconsti++;
+ }
+
+ tx->lconsti[n].idx = index;
+ tx->lconsti[n].reg = tx->native_integers ?
ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
}
static void
tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
{
- FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
- tx->lconstb[index].idx = index;
- tx->lconstb[index].reg = tx->native_integers ?
+ unsigned n;
+
+ FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
+
+ for (n = 0; n < tx->num_lconstb; ++n)
+ if (tx->lconstb[n].idx == index)
+ break;
+ if (n == tx->num_lconstb) {
+ if ((n % 8) == 0) {
+ tx->lconstb = REALLOC(tx->lconstb,
+ (n + 0) * sizeof(tx->lconstb[0]),
+ (n + 8) * sizeof(tx->lconstb[0]));
+ assert(tx->lconstb);
+ }
+ tx->num_lconstb++;
+ }
+
+ tx->lconstb[n].idx = index;
+ tx->lconstb[n].reg = tx->native_integers ?
ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
}
static inline struct ureg_dst
tx_scratch(struct shader_translator *tx)
{
- if (tx->num_scratch >= Elements(tx->regs.t)) {
+ if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
tx->failure = TRUE;
return tx->regs.t[0];
}
tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
}
+/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
+ * the projection should be applied on the texture. It doesn't
+ * apply on texkill.
+ * The doc is very imprecise here (it says the projection is done
+ * before rasterization, thus in vs, which seems wrong since ps instructions
+ * are affected differently)
+ * For now we only apply to the ps TEX instruction and TEXBEM.
+ * Perhaps some other instructions would need it */
static inline void
-tx_pred_alloc(struct shader_translator *tx, INT idx)
+apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
+ struct ureg_src src, INT idx)
{
- assert(idx == 0);
- if (ureg_dst_is_undef(tx->regs.p))
- tx->regs.p = ureg_DECL_predicate(tx->ureg);
+ struct ureg_dst tmp;
+ unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
+
+ /* no projection */
+ if (dim == 1) {
+ ureg_MOV(tx->ureg, dst, src);
+ } else {
+ tmp = tx_scratch_scalar(tx);
+ ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
+ ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
+ }
+}
+
+static inline void
+TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
+ unsigned target, struct ureg_src src0,
+ struct ureg_src src1, INT idx)
+{
+ unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
+ struct ureg_dst tmp;
+
+ /* dim == 1: no projection
+ * Looks like must be disabled when it makes no
+ * sense according the texture dimensions
+ */
+ if (dim == 1 || dim <= target) {
+ ureg_TEX(tx->ureg, dst, target, src0, src1);
+ } else if (dim == 4) {
+ ureg_TXP(tx->ureg, dst, target, src0, src1);
+ } else {
+ tmp = tx_scratch(tx);
+ apply_ps1x_projection(tx, tmp, src0, idx);
+ ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
+ }
}
static inline void
tx_texcoord_alloc(struct shader_translator *tx, INT idx)
{
assert(IS_PS);
- assert(idx >= 0 && idx < Elements(tx->regs.vT));
+ assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
if (ureg_src_is_undef(tx->regs.vT[idx]))
tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
TGSI_INTERPOLATE_PERSPECTIVE);
return ureg_dst(ureg_src_register(file, index));
}
+static inline struct ureg_src
+nine_get_position_input(struct shader_translator *tx)
+{
+ struct ureg_program *ureg = tx->ureg;
+
+ if (tx->wpos_is_sysval)
+ return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
+ else
+ return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
+ 0, TGSI_INTERPOLATE_LINEAR);
+}
+
static struct ureg_src
tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
{
/* the address register (vs only) must be
* assigned before use */
assert(!ureg_dst_is_undef(tx->regs.a0));
- ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ /* Round to lowest for vs1.1 (contrary to the doc), else
+ * round to nearest */
+ if (tx->version.major < 2 && tx->version.minor < 2)
+ ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ else
+ ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
src = ureg_src(tx->regs.address);
} else {
if (tx->version.major < 2 && tx->version.minor < 4) {
} else {
if (tx->version.major < 3) {
assert(!param->rel);
- src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
- param->idx,
- TGSI_INTERPOLATE_PERSPECTIVE);
+ src = ureg_DECL_fs_input_cyl_centroid(
+ ureg, TGSI_SEMANTIC_COLOR, param->idx,
+ TGSI_INTERPOLATE_COLOR, 0,
+ tx->info->force_color_in_centroid ?
+ TGSI_INTERPOLATE_LOC_CENTROID : 0,
+ 0, 1);
} else {
- assert(!param->rel); /* TODO */
- assert(param->idx < Elements(tx->regs.v));
- src = tx->regs.v[param->idx];
+ if(param->rel) {
+ /* Copy all inputs (non consecutive)
+ * to temp array (consecutive).
+ * This is not good for performance.
+ * A better way would be to have inputs
+ * consecutive (would need implement alternative
+ * way to match vs outputs and ps inputs).
+ * However even with the better way, the temp array
+ * copy would need to be used if some inputs
+ * are not GENERIC or if they have different
+ * interpolation flag. */
+ if (ureg_src_is_undef(tx->regs.v_consecutive)) {
+ int i;
+ tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
+ for (i = 0; i < 10; i++) {
+ if (!ureg_src_is_undef(tx->regs.v[i]))
+ ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
+ else
+ ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
+ }
+ }
+ src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
+ } else {
+ assert(param->idx < ARRAY_SIZE(tx->regs.v));
+ src = tx->regs.v[param->idx];
+ }
}
}
break;
case D3DSPR_PREDICATE:
- assert(!param->rel);
- tx_pred_alloc(tx, param->idx);
- src = ureg_src(tx->regs.p);
+ assert(!"D3DSPR_PREDICATE");
break;
case D3DSPR_SAMPLER:
assert(param->mod == NINED3DSPSM_NONE);
if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
if (!param->rel)
nine_info_mark_const_f_used(tx->info, param->idx);
- src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
+ /* vswp constant handling: we use two buffers
+ * to fit all the float constants. The special handling
+ * doesn't need to be elsewhere, because all the instructions
+ * accessing the constants directly are VS1, and swvp
+ * is VS >= 2 */
+ if (IS_VS && tx->info->swvp_on) {
+ if (!param->rel) {
+ if (param->idx < 4096) {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
+ src = ureg_src_dimension(src, 0);
+ } else {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
+ src = ureg_src_dimension(src, 1);
+ }
+ } else {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
+ src = ureg_src_dimension(src, 0);
+ }
+ } else
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
}
if (!IS_VS && tx->version.major < 2) {
/* ps 1.X clamps constants */
assert(!param->rel);
if (!tx_lconsti(tx, &src, param->idx)) {
nine_info_mark_const_i_used(tx->info, param->idx);
- src = ureg_src_register(TGSI_FILE_CONSTANT,
- tx->info->const_i_base + param->idx);
+ if (IS_VS && tx->info->swvp_on) {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
+ src = ureg_src_dimension(src, 2);
+ } else
+ src = ureg_src_register(TGSI_FILE_CONSTANT,
+ tx->info->const_i_base + param->idx);
}
break;
case D3DSPR_CONSTBOOL:
char r = param->idx / 4;
char s = param->idx & 3;
nine_info_mark_const_b_used(tx->info, param->idx);
- src = ureg_src_register(TGSI_FILE_CONSTANT,
- tx->info->const_b_base + r);
+ if (IS_VS && tx->info->swvp_on) {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, r);
+ src = ureg_src_dimension(src, 3);
+ } else
+ src = ureg_src_register(TGSI_FILE_CONSTANT,
+ tx->info->const_b_base + r);
src = ureg_swizzle(src, s, s, s, s);
}
break;
switch (param->idx) {
case D3DSMO_POSITION:
if (ureg_src_is_undef(tx->regs.vPos))
- tx->regs.vPos = ureg_DECL_fs_input(ureg,
- TGSI_SEMANTIC_POSITION, 0,
- TGSI_INTERPOLATE_LINEAR);
+ tx->regs.vPos = nine_get_position_input(tx);
if (tx->shift_wpos) {
/* TODO: do this only once */
struct ureg_dst wpos = tx_scratch(tx);
- ureg_SUB(ureg, wpos, tx->regs.vPos,
- ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f));
+ ureg_ADD(ureg, wpos, tx->regs.vPos,
+ ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
src = ureg_src(wpos);
} else {
src = tx->regs.vPos;
break;
case D3DSMO_FACE:
if (ureg_src_is_undef(tx->regs.vFace)) {
- tx->regs.vFace = ureg_DECL_fs_input(ureg,
- TGSI_SEMANTIC_FACE, 0,
- TGSI_INTERPOLATE_CONSTANT);
+ if (tx->face_is_sysval_integer) {
+ tmp = tx_scratch(tx);
+ tx->regs.vFace =
+ ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
+
+ /* convert bool to float */
+ ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
+ ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
+ tx->regs.vFace = ureg_src(tmp);
+ } else {
+ tx->regs.vFace = ureg_DECL_fs_input(ureg,
+ TGSI_SEMANTIC_FACE, 0,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
}
src = tx->regs.vFace;
case NINED3DSPSM_DW:
tmp = tx_scratch(tx);
/* NOTE: app is not allowed to read w with this modifier */
- ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
+ ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
src = ureg_src(tmp);
break;
case NINED3DSPSM_DZ:
tmp = tx_scratch(tx);
/* NOTE: app is not allowed to read z with this modifier */
- ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
+ ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
src = ureg_src(tmp);
break;
break;
case NINED3DSPSM_BIAS:
tmp = tx_scratch(tx);
- ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f));
+ ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
src = ureg_src(tmp);
break;
case NINED3DSPSM_BIASNEG:
tmp = tx_scratch(tx);
- ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src);
+ ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
src = ureg_src(tmp);
break;
case NINED3DSPSM_NOT:
/* fall through */
case NINED3DSPSM_COMP:
tmp = tx_scratch(tx);
- ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src);
+ ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
src = ureg_src(tmp);
break;
case NINED3DSPSM_DZ:
break;
case 2:
if (ureg_dst_is_undef(tx->regs.oPts))
- tx->regs.oPts =
- ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
+ tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
dst = tx->regs.oPts;
break;
default:
dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
} else {
assert(!param->rel); /* TODO */
- assert(param->idx < Elements(tx->regs.o));
+ assert(param->idx < ARRAY_SIZE(tx->regs.o));
dst = tx->regs.o[param->idx];
}
break;
assert(!param->rel);
tx->info->rt_mask |= 1 << param->idx;
if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
- /* ps < 3: oCol[0] will have fog blending afterward
- * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+ /* ps < 3: oCol[0] will have fog blending afterward */
if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
- } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
- tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
} else {
tx->regs.oCol[param->idx] =
ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
dst = tx->regs.oDepth; /* XXX: must write .z component */
break;
case D3DSPR_PREDICATE:
- assert(!param->rel);
- tx_pred_alloc(tx, param->idx);
- dst = tx->regs.p;
+ assert(!"D3DSPR_PREDICATE");
break;
case D3DSPR_TEMPFLOAT16:
DBG("unhandled D3DSPR: %u\n", param->file);
src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
} else {
assert(!param->rel);
- assert(param->idx < Elements(tx->regs.v));
+ assert(param->idx < ARRAY_SIZE(tx->regs.v));
src = tx->regs.v[param->idx];
}
break;
"BREAKP"
};
- if (opcode < Elements(names)) return names[opcode];
+ if (opcode < ARRAY_SIZE(names)) return names[opcode];
switch (opcode) {
case D3DSIO_PHASE: return "PHASE";
static HRESULT
NineTranslateInstruction_Generic(struct shader_translator *);
+DECL_SPECIAL(NOP)
+{
+ /* Nothing to do. NOP was used to avoid hangs
+ * with very old d3d drivers. */
+ return D3D_OK;
+}
+
+DECL_SPECIAL(SUB)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+
+ ureg_ADD(ureg, dst, src0, ureg_negate(src1));
+ return D3D_OK;
+}
+
+DECL_SPECIAL(ABS)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+
+ ureg_MOV(ureg, dst, ureg_abs(src));
+ return D3D_OK;
+}
+
+DECL_SPECIAL(XPD)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+
+ ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
+ ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_X, 0),
+ ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y, 0));
+ ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
+ ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y, 0),
+ ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
+ ureg_src(dst));
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
+ ureg_imm1f(ureg, 1));
+ return D3D_OK;
+}
+
DECL_SPECIAL(M4x4)
{
return NineTranslateInstruction_Mkxn(tx, 4, 4);
return D3D_OK;
}
-DECL_SPECIAL(MOV_vs1x)
-{
- if (tx->insn.dst[0].file == D3DSPR_ADDR) {
- /* Implementation note: We don't write directly
- * to the addr register, but to an intermediate
- * float register.
- * Contrary to the doc, when writing to ADDR here,
- * the rounding is not to nearest, but to lowest
- * (wine test).
- * Since we use ARR next, substract 0.5. */
- ureg_SUB(tx->ureg,
- tx_dst_param(tx, &tx->insn.dst[0]),
- tx_src_param(tx, &tx->insn.src[0]),
- ureg_imm1f(tx->ureg, 0.5f));
- return D3D_OK;
- }
- return NineTranslateInstruction_Generic(tx);
-}
-
DECL_SPECIAL(LOOP)
{
struct ureg_program *ureg = tx->ureg;
struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
src[0] = tx_src_param(tx, &tx->insn.src[0]);
src[1] = tx_src_param(tx, &tx->insn.src[1]);
- ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
+ ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
return D3D_OK;
}
struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
src[0] = tx_src_param(tx, &tx->insn.src[0]);
src[1] = tx_src_param(tx, &tx->insn.src[1]);
- ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
+ ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
ureg_BRK(tx->ureg);
tx_endcond(tx);
sem->Index = 0;
break;
default:
- assert(!"Invalid DECLUSAGE.");
+ unreachable("Invalid DECLUSAGE.");
break;
}
}
return TGSI_INTERPOLATE_LINEAR;
case TGSI_SEMANTIC_BCOLOR:
case TGSI_SEMANTIC_COLOR:
+ return TGSI_INTERPOLATE_COLOR;
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_TEXCOORD:
if (is_input) {
/* linkage outside of shader with vertex declaration */
ureg_DECL_vs_input(ureg, sem.reg.idx);
- assert(sem.reg.idx < Elements(tx->info->input_map));
+ assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
- tx->info->num_inputs = sem.reg.idx + 1;
+ tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
/* NOTE: preserving order in case of indirect access */
} else
if (tx->version.major >= 3) {
assert(sem.reg.mask != 0);
if (sem.usage == D3DDECLUSAGE_POSITIONT)
tx->info->position_t = TRUE;
- assert(sem.reg.idx < Elements(tx->regs.o));
+ assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
+ assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
+ nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
+ if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
+ tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
+ tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
+ tx->regs.oPos = tx->regs.o[sem.reg.idx];
+ }
- if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
+ if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
+ tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
tx->regs.oPts = tx->regs.o[sem.reg.idx];
+ }
}
} else {
if (is_input && tx->version.major >= 3) {
+ unsigned interp_location = 0;
/* SM3 only, SM2 input semantic determined by file */
- assert(sem.reg.idx < Elements(tx->regs.v));
+ assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
+ assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
+ /* PositionT and tessfactor forbidden */
+ if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
+ return D3DERR_INVALIDCALL;
+
+ if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
+ /* Position0 is forbidden (likely because vPos already does that) */
+ if (sem.usage == D3DDECLUSAGE_POSITION)
+ return D3DERR_INVALIDCALL;
+ /* Following code is for depth */
+ tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
+ return D3D_OK;
+ }
+
+ if (sem.reg.mod & NINED3DSPDM_CENTROID ||
+ (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
+ interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
+
tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
ureg, tgsi.Name, tgsi.Index,
nine_tgsi_to_interp_mode(&tgsi),
0, /* cylwrap */
- sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
+ interp_location, 0, 1);
} else
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
/* FragColor or FragDepth */
{
struct ureg_program *ureg = tx->ureg;
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_dst tmp, tmp2;
+ struct ureg_dst tmp, tmp2, texcoord;
struct ureg_src sample, m00, m01, m10, m11;
struct ureg_src bumpenvlscale, bumpenvloffset;
const int m = tx->insn.dst[0].idx;
tmp = tx_scratch(tx);
tmp2 = tx_scratch(tx);
+ texcoord = tx_scratch(tx);
/*
* Bump-env-matrix:
* 00 is X
bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
}
+ apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
+
/* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
- NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]);
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
/* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
/* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
- NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]);
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
/* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
/* at this step tmp.xyz = 2 * (N.E / N.N) * N */
- ureg_SUB(ureg, tmp, ureg_src(tmp), E);
+ ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
return D3D_OK;
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
/* at this step tmp.xyz = 2 * (N.E / N.N) * N */
- ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E));
+ ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
break;
default:
tx_src_param(tx, &tx->insn.src[1])
};
assert(tx->insn.src[1].idx >= 0 &&
- tx->insn.src[1].idx < Elements(tx->sampler_targets));
+ tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
target = tx->sampler_targets[tx->insn.src[1].idx];
switch (tx->insn.flags) {
src[1] = ureg_DECL_sampler(ureg, s);
tx->info->sampler_mask |= 1 << s;
- ureg_TEX(ureg, dst, t, src[0], src[1]);
+ TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
return D3D_OK;
}
tx_src_param(tx, &tx->insn.src[3])
};
assert(tx->insn.src[1].idx >= 0 &&
- tx->insn.src[1].idx < Elements(tx->sampler_targets));
+ tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
target = tx->sampler_targets[tx->insn.src[1].idx];
ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
tx_src_param(tx, &tx->insn.src[1])
};
assert(tx->insn.src[1].idx >= 0 &&
- tx->insn.src[1].idx < Elements(tx->sampler_targets));
+ tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
target = tx->sampler_targets[tx->insn.src[1].idx];
ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
struct sm1_op_info inst_table[] =
{
- _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
- _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
- _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
+ _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
+ _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
_OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
- _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
+ _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
_OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
_OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
_OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
_OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
_OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
- _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
+ _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
_OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
- _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
+ _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
_OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
_OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
_OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
_OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
_OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
- _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
+ _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
/* we don't write to the address register, but a normal register (copied
* when needed to the address register), thus we don't use ARR */
_OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
const unsigned version = (tx->version.major << 8) | tx->version.minor;
unsigned i;
- for (i = 0; i < Elements(tx->op_info_map); ++i)
+ for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
tx->op_info_map[i] = -1;
- if (tx->processor == TGSI_PROCESSOR_VERTEX) {
- for (i = 0; i < Elements(inst_table); ++i) {
- assert(inst_table[i].sio < Elements(tx->op_info_map));
+ if (tx->processor == PIPE_SHADER_VERTEX) {
+ for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
+ assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
if (inst_table[i].vert_version.min <= version &&
inst_table[i].vert_version.max >= version)
tx->op_info_map[inst_table[i].sio] = i;
}
} else {
- for (i = 0; i < Elements(inst_table); ++i) {
- assert(inst_table[i].sio < Elements(tx->op_info_map));
+ for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
+ assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
if (inst_table[i].frag_version.min <= version &&
inst_table[i].frag_version.max >= version)
tx->op_info_map[inst_table[i].sio] = i;
struct ureg_src src[4];
unsigned i;
- for (i = 0; i < tx->insn.ndst && i < Elements(dst); ++i)
+ for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
- for (i = 0; i < tx->insn.nsrc && i < Elements(src); ++i)
+ for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
src[i] = tx_src_param(tx, &tx->insn.src[i]);
ureg_insn(tx->ureg, tx->insn.info->opcode,
dst, tx->insn.ndst,
- src, tx->insn.nsrc);
+ src, tx->insn.nsrc, 0);
return D3D_OK;
}
tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
switch (tok >> 16) {
- case NINED3D_SM1_VS: tx->processor = TGSI_PROCESSOR_VERTEX; break;
- case NINED3D_SM1_PS: tx->processor = TGSI_PROCESSOR_FRAGMENT; break;
+ case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
+ case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
default:
DBG("Invalid shader type: %x\n", tok);
tx->processor = ~0;
static void
sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
{
- uint8_t shift;
+ int8_t shift;
dst->file =
(tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
(tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
- dst->shift = (shift & 0x8) ? -(shift & 0x7) : shift & 0x7;
+ dst->shift = (shift & 0x7) - (shift & 0x8);
}
static void
sm1_parse_instruction(struct shader_translator *tx)
{
struct sm1_instruction *insn = &tx->insn;
+ HRESULT hr;
DWORD tok;
struct sm1_op_info *info = NULL;
unsigned i;
insn->coissue = !!(tok & D3DSI_COISSUE);
insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
- if (insn->opcode < Elements(tx->op_info_map)) {
+ if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
int k = tx->op_info_map[insn->opcode];
if (k >= 0) {
- assert(k < Elements(inst_table));
+ assert(k < ARRAY_SIZE(inst_table));
info = &inst_table[k];
}
} else {
sm1_instruction_check(insn);
if (info->handler)
- info->handler(tx);
+ hr = info->handler(tx);
else
- NineTranslateInstruction_Generic(tx);
+ hr = NineTranslateInstruction_Generic(tx);
tx_apply_dst0_modifiers(tx);
+ if (hr != D3D_OK)
+ tx->failure = TRUE;
tx->num_scratch = 0; /* reset */
TOKEN_JUMP(tx);
tx->byte_code = info->byte_code;
tx->parse = info->byte_code;
- for (i = 0; i < Elements(info->input_map); ++i)
+ for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
info->input_map[i] = NINE_DECLUSAGE_NONE;
info->num_inputs = 0;
info->bumpenvmat_needed = 0;
- for (i = 0; i < Elements(tx->regs.rL); ++i) {
+ for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
tx->regs.rL[i] = ureg_dst_undef();
}
tx->regs.address = ureg_dst_undef();
tx->regs.oDepth = ureg_dst_undef();
tx->regs.vPos = ureg_src_undef();
tx->regs.vFace = ureg_src_undef();
- for (i = 0; i < Elements(tx->regs.o); ++i)
+ for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
tx->regs.o[i] = ureg_dst_undef();
- for (i = 0; i < Elements(tx->regs.oCol); ++i)
+ for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
tx->regs.oCol[i] = ureg_dst_undef();
- for (i = 0; i < Elements(tx->regs.vC); ++i)
+ for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
tx->regs.vC[i] = ureg_src_undef();
- for (i = 0; i < Elements(tx->regs.vT); ++i)
+ for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
tx->regs.vT[i] = ureg_src_undef();
- for (i = 0; i < Elements(tx->lconsti); ++i)
- tx->lconsti[i].idx = -1;
- for (i = 0; i < Elements(tx->lconstb); ++i)
- tx->lconstb[i].idx = -1;
-
sm1_read_version(tx);
info->version = (tx->version.major << 4) | tx->version.minor;
+ tx->num_outputs = 0;
+
create_op_info_map(tx);
}
FREE(tx);
}
-static inline unsigned
-tgsi_processor_from_type(unsigned shader_type)
+/* CONST[0].xyz = width/2, -height/2, zmax-zmin
+ * CONST[1].xyz = x+width/2, y+height/2, zmin */
+static void
+shader_add_vs_viewport_transform(struct shader_translator *tx)
{
- switch (shader_type) {
- case PIPE_SHADER_VERTEX: return TGSI_PROCESSOR_VERTEX;
- case PIPE_SHADER_FRAGMENT: return TGSI_PROCESSOR_FRAGMENT;
- default:
- return ~0;
- }
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_src c0 = NINE_CONSTANT_SRC(0);
+ struct ureg_src c1 = NINE_CONSTANT_SRC(1);
+ /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
+
+ c0 = ureg_src_dimension(c0, 4);
+ c1 = ureg_src_dimension(c1, 4);
+ /* TODO: find out when we need to apply the viewport transformation or not.
+ * Likely will be XYZ vs XYZRHW in vdecl_out
+ * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
+ * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
+ */
+ ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
}
static void
{
struct ureg_program *ureg = tx->ureg;
struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ struct ureg_src fog_end, fog_coeff, fog_density;
+ struct ureg_src fog_vs, depth, fog_color;
+ struct ureg_dst fog_factor;
- /* TODO: fog computation */
- ureg_MOV(ureg, oCol0, src_col);
+ if (!tx->info->fog_enable) {
+ ureg_MOV(ureg, oCol0, src_col);
+ return;
+ }
+
+ if (tx->info->fog_mode != D3DFOG_NONE) {
+ depth = nine_get_position_input(tx);
+ depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
+ }
+
+ nine_info_mark_const_f_used(tx->info, 33);
+ fog_color = NINE_CONSTANT_SRC(32);
+ fog_factor = tx_scratch_scalar(tx);
+
+ if (tx->info->fog_mode == D3DFOG_LINEAR) {
+ fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+ fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
+ ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth));
+ ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
+ } else if (tx->info->fog_mode == D3DFOG_EXP) {
+ fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+ ureg_MUL(ureg, fog_factor, depth, fog_density);
+ ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
+ ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
+ } else if (tx->info->fog_mode == D3DFOG_EXP2) {
+ fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+ ureg_MUL(ureg, fog_factor, depth, fog_density);
+ ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
+ ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
+ ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
+ } else {
+ fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
+ TGSI_INTERPOLATE_PERSPECTIVE),
+ TGSI_SWIZZLE_X);
+ ureg_MOV(ureg, fog_factor, fog_vs);
+ }
+
+ ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
+ tx_src_scalar(fog_factor), src_col, fog_color);
+ ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
}
-#define GET_CAP(n) device->screen->get_param( \
- device->screen, PIPE_CAP_##n)
-#define GET_SHADER_CAP(n) device->screen->get_shader_param( \
- device->screen, info->type, PIPE_SHADER_CAP_##n)
+#define GET_CAP(n) screen->get_param( \
+ screen, PIPE_CAP_##n)
+#define GET_SHADER_CAP(n) screen->get_shader_param( \
+ screen, info->type, PIPE_SHADER_CAP_##n)
HRESULT
-nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
+nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
{
struct shader_translator *tx;
HRESULT hr = D3D_OK;
- const unsigned processor = tgsi_processor_from_type(info->type);
- unsigned s, slot_max;
- unsigned max_const_f;
+ const unsigned processor = info->type;
+ struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
goto out;
}
- DUMP("%s%u.%u\n", processor == TGSI_PROCESSOR_VERTEX ? "VS" : "PS",
+ DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
tx->version.major, tx->version.minor);
tx->ureg = ureg_create(processor);
tx->native_integers = GET_SHADER_CAP(INTEGERS);
tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
- tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
tx->texcoord_sn = tx->want_texcoord ?
TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
+ tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
+ tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
+
+ if (IS_VS) {
+ tx->num_constf_allowed = NINE_MAX_CONST_F;
+ } else if (tx->version.major < 2) {/* IS_PS v1 */
+ tx->num_constf_allowed = 8;
+ } else if (tx->version.major == 2) {/* IS_PS v2 */
+ tx->num_constf_allowed = 32;
+ } else {/* IS_PS v3 */
+ tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
+ }
+
+ if (tx->version.major < 2) {
+ tx->num_consti_allowed = 0;
+ tx->num_constb_allowed = 0;
+ } else {
+ tx->num_consti_allowed = NINE_MAX_CONST_I;
+ tx->num_constb_allowed = NINE_MAX_CONST_B;
+ }
+
+ if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
+ tx->num_constf_allowed = 8192;
+ tx->num_consti_allowed = 2048;
+ tx->num_constb_allowed = 2048;
+ }
/* VS must always write position. Declare it here to make it the 1st output.
* (Some drivers like nv50 are buggy and rely on that.)
ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
}
+ if (GET_CAP(TGSI_MUL_ZERO_WINS))
+ ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
+
while (!sm1_parse_eof(tx) && !tx->failure)
sm1_parse_instruction(tx);
tx->parse++; /* for byte_size */
if (tx->failure) {
- ERR("Encountered buggy shader\n");
+ /* For VS shaders, we print the warning later,
+ * we first try with swvp. */
+ if (IS_PS)
+ ERR("Encountered buggy shader\n");
ureg_destroy(tx->ureg);
hr = D3DERR_INVALIDCALL;
goto out;
}
}
- if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog)) {
+ if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
}
- /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
- if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
- struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
- }
-
if (info->position_t)
ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
- ureg_END(tx->ureg);
-
- if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
+ if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
+ struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
+ ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
+ ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
info->point_size = TRUE;
+ }
+
+ if (info->process_vertices)
+ shader_add_vs_viewport_transform(tx);
+
+ ureg_END(tx->ureg);
/* record local constants */
if (tx->num_lconstf && tx->indirect_const_access) {
k = i;
}
indices[n] = tx->lconstf[k].idx;
- memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
+ memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
tx->lconstf[k].idx = INT_MAX;
}
/* r500 */
if (info->const_float_slots > device->max_vs_const_f &&
- (info->const_int_slots || info->const_bool_slots))
+ (info->const_int_slots || info->const_bool_slots) &&
+ (!IS_VS || !info->swvp_on))
ERR("Overlapping constant slots. The shader is likely to be buggy\n");
if (tx->indirect_const_access) /* vs only */
info->const_float_slots = device->max_vs_const_f;
- max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
- slot_max = info->const_bool_slots > 0 ?
- max_const_f + NINE_MAX_CONST_I
- + DIV_ROUND_UP(info->const_bool_slots, 4) :
- info->const_int_slots > 0 ?
- max_const_f + info->const_int_slots :
- info->const_float_slots;
+ if (!IS_VS || !info->swvp_on) {
+ unsigned s, slot_max;
+ unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
- info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
+ slot_max = info->const_bool_slots > 0 ?
+ max_const_f + NINE_MAX_CONST_I
+ + DIV_ROUND_UP(info->const_bool_slots, 4) :
+ info->const_int_slots > 0 ?
+ max_const_f + info->const_int_slots :
+ info->const_float_slots;
+
+ info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
+
+ for (s = 0; s < slot_max; s++)
+ ureg_DECL_constant(tx->ureg, s);
+ } else {
+ ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
+ ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
+ ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
+ ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
+ }
- for (s = 0; s < slot_max; s++)
- ureg_DECL_constant(tx->ureg, s);
+ if (info->process_vertices)
+ ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
- unsigned count;
- const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
+ const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
tgsi_dump(toks, 0);
ureg_free_tokens(toks);
}
- info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
+ if (info->process_vertices) {
+ NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
+ tx->output_info,
+ tx->num_outputs,
+ &(info->so));
+ info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
+ } else
+ info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
if (!info->cso) {
hr = D3DERR_DRIVERINTERNALERROR;
FREE(info->lconstf.data);