use MUL+MAD+MOV instead.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
emit_data->args[1], emit_data->args[0], "");
}
-/* TGSI_OPCODE_XPD */
-
-static void
-xpd_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- dp_fetch_args(bld_base, emit_data, 3);
-}
-
-/**
- * (a * b) - (c * d)
- */
-static LLVMValueRef
-xpd_helper(
- struct lp_build_tgsi_context * bld_base,
- LLVMValueRef a,
- LLVMValueRef b,
- LLVMValueRef c,
- LLVMValueRef d)
-{
- LLVMValueRef tmp0, tmp1;
-
- tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
- tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
-
- return lp_build_sub(&bld_base->base, tmp0, tmp1);
-}
-
-static void
-xpd_emit(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
- emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
- emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
-
- emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
- emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
- emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
-
- emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
- emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
- emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
-
- emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action xpd_action = {
- xpd_fetch_args, /* fetch_args */
- xpd_emit /* emit */
-};
-
/* TGSI_OPCODE_D2F */
static void
d2f_emit(
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
- bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
break;
- case TGSI_OPCODE_XPD:
- return FALSE;
-
case TGSI_OPCODE_COS:
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
}
-static void
-ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
- ttn_move_dest_masked(b, dest,
- nir_fsub(b,
- nir_fmul(b,
- ttn_swizzle(b, src[0], Y, Z, X, X),
- ttn_swizzle(b, src[1], Z, X, Y, X)),
- nir_fmul(b,
- ttn_swizzle(b, src[1], Y, Z, X, X),
- ttn_swizzle(b, src[0], Z, X, Y, X))),
- TGSI_WRITEMASK_XYZ);
- ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
-}
-
static void
ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
{
[TGSI_OPCODE_EX2] = nir_op_fexp2,
[TGSI_OPCODE_LG2] = nir_op_flog2,
[TGSI_OPCODE_POW] = nir_op_fpow,
- [TGSI_OPCODE_XPD] = 0,
[TGSI_OPCODE_COS] = nir_op_fcos,
[TGSI_OPCODE_DDX] = nir_op_fddx,
[TGSI_OPCODE_DDY] = nir_op_fddy,
ttn_lit(b, op_trans[tgsi_op], dest, src);
break;
- case TGSI_OPCODE_XPD:
- ttn_xpd(b, op_trans[tgsi_op], dest, src);
- break;
-
case TGSI_OPCODE_DP2:
ttn_dp2(b, op_trans[tgsi_op], dest, src);
break;
}
}
-static void
-exec_xpd(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_exec_channel r[6];
- union tgsi_exec_channel d[3];
-
- fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[2], &r[0], &r[1]);
-
- fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[5], &r[3], &r[4] );
- micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
-
- fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[3], &r[3], &r[2]);
-
- fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[1], &r[1], &r[5]);
- micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
-
- micro_mul(&r[5], &r[5], &r[4]);
- micro_mul(&r[0], &r[0], &r[2]);
- micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
-
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
- }
-}
-
static void
exec_dst(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
- case TGSI_OPCODE_XPD:
- exec_xpd(mach, inst);
- break;
-
case TGSI_OPCODE_COS:
exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
{ 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "", 31 }, /* removed */
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
}
}
-/* XPD - Cross Product
- * dst.x = src0.y \times src1.z - src1.y \times src0.z
- * dst.y = src0.z \times src1.x - src1.z \times src0.x
- * dst.z = src0.x \times src1.y - src1.x \times src0.y
- * dst.w = 1.0
- *
- * ; needs: 1 tmp, imm{1.0}
- * MUL tmpA.xyz, src1.yzx, src0.zxy
- * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
- * MOV dst.w, imm{1.0}
- */
-#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
-#define XPD_TMP 1
-static void
-transform_xpd(struct tgsi_transform_context *tctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
- struct tgsi_full_dst_register *dst = &inst->Dst[0];
- struct tgsi_full_src_register *src0 = &inst->Src[0];
- struct tgsi_full_src_register *src1 = &inst->Src[1];
- struct tgsi_full_instruction new_inst;
-
- if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
- /* MUL tmpA.xyz, src1.yzx, src0.zxy */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
- reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
- tctx->emit_instruction(tctx, &new_inst);
-
- /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
- new_inst.Instruction.NumSrcRegs = 3;
- reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
- reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
- reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
- new_inst.Src[2].Register.Negate = true;
- tctx->emit_instruction(tctx, &new_inst);
- }
-
- if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
- /* MOV dst.w, imm{1.0} */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
- new_inst.Instruction.NumSrcRegs = 1;
- reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
- tctx->emit_instruction(tctx, &new_inst);
- }
-}
-
/* SCS - Sine Cosine
* dst.x = \cos{src.x}
* dst.y = \sin{src.x}
goto skip;
transform_dst(tctx, inst);
break;
- case TGSI_OPCODE_XPD:
- if (!ctx->config->lower_XPD)
- goto skip;
- transform_xpd(tctx, inst);
- break;
case TGSI_OPCODE_SCS:
if (!ctx->config->lower_SCS)
goto skip;
#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
/* if there are no instructions to lower, then we are done: */
if (!(OPCS(DST) ||
- OPCS(XPD) ||
OPCS(SCS) ||
OPCS(LRP) ||
OPCS(FRC) ||
newlen += DST_GROW * OPCS(DST);
numtmp = MAX2(numtmp, DST_TMP);
}
- if (OPCS(XPD)) {
- newlen += XPD_GROW * OPCS(XPD);
- numtmp = MAX2(numtmp, XPD_TMP);
- }
if (OPCS(SCS)) {
newlen += SCS_GROW * OPCS(SCS);
numtmp = MAX2(numtmp, SCS_TMP);
* enable lowering of TGSI_OPCODE_<opc>
*/
unsigned lower_DST:1;
- unsigned lower_XPD:1;
unsigned lower_SCS:1;
unsigned lower_LRP:1;
unsigned lower_FRC:1;
OP11(EX2)
OP11(LG2)
OP12(POW)
-OP12(XPD)
OP11(COS)
OP11(DDX)
OP11(DDY)
dst = src0.x^{src1.x}
-.. opcode:: XPD - Cross Product
-
-.. math::
-
- dst.x = src0.y \times src1.z - src1.y \times src0.z
-
- dst.y = src0.z \times src1.x - src1.z \times src0.x
-
- dst.z = src0.x \times src1.y - src1.x \times src0.y
-
- dst.w = 1
-
.. opcode:: COS - Cosine
should be set the same way for an entire pipeline. Note that this
applies not only to the literal MUL TGSI opcode, but all FP32
multiplications implied by other operations, such as MAD, FMA, DP2,
-DP3, DP4, DST, LOG, LRP, XPD, and possibly others. If there is a
+DP3, DP4, DST, LOG, LRP, and possibly others. If there is a
mismatch between shaders, then it is unspecified whether this behavior
will be enabled.
.lower_LOG = true,
.lower_DP2 = true,
.lower_TRUNC = true,
- .lower_XPD = true
};
c = CALLOC_STRUCT(etna_compile);
[ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
[ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
- [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
};
static boolean op_has_dst(unsigned opcode)
emit_tex(p, inst, T0_TEXLDP, fs);
break;
- case TGSI_OPCODE_XPD:
- /* Cross product:
- * result.x = src0.y * src1.z - src0.z * src1.y;
- * result.y = src0.z * src1.x - src0.x * src1.z;
- * result.z = src0.x * src1.y - src0.y * src1.x;
- * result.w = undef;
- */
- src0 = src_vector(p, &inst->Src[0], fs);
- src1 = src_vector(p, &inst->Src[1], fs);
- tmp = i915_get_utemp(p);
-
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_ALL, 0,
- swizzle(src0, Z, X, Y, ONE),
- swizzle(src1, Y, Z, X, ONE), 0);
-
- i915_emit_arith(p,
- A0_MAD,
- get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- swizzle(src0, Y, Z, X, ONE),
- swizzle(src1, Z, X, Y, ONE),
- negate(tmp, 1, 1, 1, 0));
- break;
-
default:
i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
p->error = 1;
return mask;
case TGSI_OPCODE_TXQ:
return 1;
- case TGSI_OPCODE_XPD:
- {
- unsigned int x = 0;
- if (mask & 1) x |= 0x6;
- if (mask & 2) x |= 0x5;
- if (mask & 4) x |= 0x3;
- return x;
- }
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_LIT:
handleLIT(dst0);
break;
- case TGSI_OPCODE_XPD:
- FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
- if (c < 3) {
- val0 = getSSA();
- src0 = fetchSrc(1, (c + 1) % 3);
- src1 = fetchSrc(0, (c + 2) % 3);
- mkOp2(OP_MUL, TYPE_F32, val0, src0, src1)
- ->dnz = info->io.mul_zero_wins;
- mkOp1(OP_NEG, TYPE_F32, val0, val0);
-
- src0 = fetchSrc(0, (c + 1) % 3);
- src1 = fetchSrc(1, (c + 2) % 3);
- mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0)
- ->dnz = info->io.mul_zero_wins;
- } else {
- loadImm(dst0[c], 1.0f);
- }
- }
- break;
case TGSI_OPCODE_ISSG:
case TGSI_OPCODE_SSG:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
* POW - EX2 + MUL + LG2
* SUB - ADD, second source negated
* SWZ - MOV
- * XPD -
*
* Register access
* - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
case TGSI_OPCODE_TXP:
nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
break;
- case TGSI_OPCODE_XPD:
- tmp = nvfx_src(temp(fpc));
- nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
- nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
- break;
case TGSI_OPCODE_IF:
// MOVRC0 R31 (TR0.xyzw), R<src>:
* RSQ - LG2 + EX2
* POW - LG2 + MUL + EX2
* SCS - COS + SIN
- * XPD
*
* NV40 Looping
* Loops appear to be fairly expensive on NV40 at least, the proprietary
insn.cc_test = NVFX_COND_LT;
nvfx_vp_emit(vpc, insn);
break;
- case TGSI_OPCODE_XPD:
- tmp = nvfx_src(temp(vpc));
- nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
- nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
- break;
case TGSI_OPCODE_IF:
insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
insn.cc_update = 1;
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
- case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
return 0;
}
-static int tgsi_xpd(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- static const unsigned int src0_swizzle[] = {2, 0, 1};
- static const unsigned int src1_swizzle[] = {1, 2, 0};
- struct r600_bytecode_alu alu;
- uint32_t use_temp = 0;
- int i, r;
-
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
-
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MUL;
- if (i < 3) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
- r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
- } else {
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = i;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.src[1].chan = i;
- }
-
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = i;
- alu.dst.write = 1;
-
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_MULADD;
-
- if (i < 3) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
- r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
- } else {
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = i;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.src[1].chan = i;
- }
-
- alu.src[2].sel = ctx->temp_reg;
- alu.src[2].neg = 1;
- alu.src[2].chan = i;
-
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- alu.dst.chan = i;
- alu.dst.write = 1;
- alu.is_op3 = 1;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
- return 0;
-}
-
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
}
-/**
- * Translate/emit TGSI XPD (vector cross product) instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn)
-{
- SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- const struct src_register src0 = translate_src_register(
- emit, &insn->Src[0] );
- const struct src_register src1 = translate_src_register(
- emit, &insn->Src[1] );
- boolean need_dst_tmp = FALSE;
-
- /* XPD can only output to a temporary */
- if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
- need_dst_tmp = TRUE;
-
- /* The dst reg must not be the same as src0 or src1*/
- if (alias_src_dst(src0, dst) ||
- alias_src_dst(src1, dst))
- need_dst_tmp = TRUE;
-
- if (need_dst_tmp) {
- SVGA3dShaderDestToken tmp = get_temp( emit );
-
- /* Obey DX9 restrictions on mask:
- */
- tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
-
- if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
- return FALSE;
-
- if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
- return FALSE;
- }
- else {
- if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
- return FALSE;
- }
-
- /* Need to emit 1.0 to dst.w?
- */
- if (dst.mask & TGSI_WRITEMASK_W) {
- struct src_register one = get_one_immediate( emit );
-
- if (!submit_op1(emit,
- inst_token( SVGA3DOP_MOV ),
- writemask(dst, TGSI_WRITEMASK_W),
- one))
- return FALSE;
- }
-
- return TRUE;
-}
-
-
/**
* Emit a LRP (linear interpolation) instruction.
*/
case TGSI_OPCODE_BRK:
return emit_brk( emit, insn );
- case TGSI_OPCODE_XPD:
- return emit_xpd( emit, insn );
-
case TGSI_OPCODE_KILL:
return emit_kill( emit, insn );
emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
- emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
return TRUE;
}
-/*
- * Emit code for TGSI_OPCODE_XPD instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst.x = src0.y * src1.z - src1.y * src0.z
- * dst.y = src0.z * src1.x - src1.z * src0.x
- * dst.z = src0.x * src1.y - src1.x * src0.y
- * dst.w = 1
- */
- struct tgsi_full_src_register s0_xxxx =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register s0_yyyy =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register s0_zzzz =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
-
- struct tgsi_full_src_register s1_xxxx =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register s1_yyyy =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register s1_zzzz =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
-
- unsigned tmp1 = get_temp_index(emit);
- struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
- struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
-
- unsigned tmp2 = get_temp_index(emit);
- struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
- struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
- struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
-
- unsigned tmp3 = get_temp_index(emit);
- struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
- struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
- struct tgsi_full_dst_register tmp3_dst_x =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
- struct tgsi_full_dst_register tmp3_dst_y =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
- struct tgsi_full_dst_register tmp3_dst_z =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
- struct tgsi_full_dst_register tmp3_dst_w =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
-
- /* Note: we put all the intermediate computations into tmp3 in case
- * the XPD dest register is that same as one of the src regs (in which
- * case we could clobber a src reg before we're done with it) .
- *
- * Note: we could get by with just one temp register instead of three
- * since we're doing scalar operations and there's enough room in one
- * temp for everything.
- */
-
- /* MUL tmp1, src0.y, src1.z */
- /* MUL tmp2, src1.y, src0.z */
- /* ADD tmp3.x, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
- &s0_yyyy, &s1_zzzz, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
- &s1_yyyy, &s0_zzzz, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MUL tmp1, src0.z, src1.x */
- /* MUL tmp2, src1.z, src0.x */
- /* ADD tmp3.y, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
- &s1_xxxx, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
- &s0_xxxx, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MUL tmp1, src0.x, src1.y */
- /* MUL tmp2, src1.x, src0.y */
- /* ADD tmp3.z, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
- &s1_yyyy, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
- &s0_yyyy, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MOV tmp3.w, 1.0 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- struct tgsi_full_src_register one =
- make_immediate_reg_float(emit, 1.0f);
-
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
- }
-
- /* MOV dst, tmp3 */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
- inst->Instruction.Saturate);
-
-
- free_temp_indexes(emit);
-
- return TRUE;
-}
-
-
/**
* Emit code for TGSI_OPCODE_TXD (explicit derivatives)
*/
return emit_txq(emit, inst);
case TGSI_OPCODE_UIF:
return emit_if(emit, inst);
- case TGSI_OPCODE_XPD:
- return emit_xpd(emit, inst);
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UDIV:
#define TGSI_OPCODE_EX2 28
#define TGSI_OPCODE_LG2 29
#define TGSI_OPCODE_POW 30
-#define TGSI_OPCODE_XPD 31
+/* gap */
#define TGSI_OPCODE_U2I64 32
#define TGSI_OPCODE_CLOCK 33
#define TGSI_OPCODE_I2I64 34
return D3D_OK;
}
+DECL_SPECIAL(XPD)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+
+ ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
+ ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_X, 0),
+ ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y, 0));
+ ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
+ ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y, 0),
+ ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
+ ureg_src(dst));
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
+ ureg_imm1f(ureg, 1));
+ return D3D_OK;
+}
+
DECL_SPECIAL(M4x4)
{
return NineTranslateInstruction_Mkxn(tx, 4, 4);
_OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
_OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
- _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
+ _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
_OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
_OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
_OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
return TGSI_OPCODE_TXB;
case OPCODE_TXP:
return TGSI_OPCODE_TXP;
- case OPCODE_XPD:
- return TGSI_OPCODE_XPD;
case OPCODE_END:
return TGSI_OPCODE_END;
default:
break;
case OPCODE_XPD:
- dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
- ureg_insn( ureg,
- translate_opcode( inst->Opcode ),
- dst, num_dst,
- src, num_src, 0 );
+ ureg_MUL(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
+ ureg_swizzle(src[0], TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_X, 0),
+ ureg_swizzle(src[1], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y, 0));
+ ureg_MAD(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
+ ureg_swizzle(src[0], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y, 0),
+ ureg_negate(ureg_swizzle(src[1], TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
+ ureg_src(dst[0]));
break;
case OPCODE_RSQ: