It's redundant with the source modifier.
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
case TGSI_TYPE_DOUBLE:
case TGSI_TYPE_UNTYPED:
/* modifiers on movs assume data is float */
- res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res);
+ res = lp_build_abs(&bld_base->base, res);
break;
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
/* abs( src0.x) */
- abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
- emit_data->args[0] /* src0.x */);
+ abs_x = lp_build_abs(&bld_base->base, emit_data->args[0] /* src0.x */);
/* log( abs( src0.x ) ) */
log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
* intrinsics.
*/
-/* TGSI_OPCODE_ABS (CPU Only)*/
-
-static void
-abs_emit_cpu(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base,
- emit_data->args[0]);
-}
-
/* TGSI_OPCODE_ADD (CPU Only) */
static void
add_emit_cpu(
struct lp_build_tgsi_context * bld_base)
{
lp_set_default_actions(bld_base);
- bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
- tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
+ tmp0 = lp_build_abs(&bld->bld_base.base, src0);
dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
break;
[TGSI_OPCODE_LG2] = nir_op_flog2,
[TGSI_OPCODE_POW] = nir_op_fpow,
[TGSI_OPCODE_XPD] = 0,
- [TGSI_OPCODE_ABS] = nir_op_fabs,
[TGSI_OPCODE_DPH] = 0,
[TGSI_OPCODE_COS] = nir_op_fcos,
[TGSI_OPCODE_DDX] = nir_op_fddx,
exec_xpd(mach, inst);
break;
- case TGSI_OPCODE_ABS:
- exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
- break;
-
case TGSI_OPCODE_DPH:
exec_dph(mach, inst);
break;
{ 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "", 33 }, /* removed */
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
OP11(LG2)
OP12(POW)
OP12(XPD)
-OP11(ABS)
OP12(DPH)
OP11(COS)
OP11(DDX)
case TGSI_OPCODE_FLR:
case TGSI_OPCODE_ROUND:
case TGSI_OPCODE_POW:
- case TGSI_OPCODE_ABS:
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
case TGSI_OPCODE_DDX:
case TGSI_OPCODE_POW:
translate_pow(ctx, inst);
break;
- case TGSI_OPCODE_ABS:
- instr = ir2_instr_create_alu(cf, MAXv, ~0);
- add_regs_vector_1(ctx, inst, instr);
- instr->regs[1]->flags |= IR2_REG_NEGATE; /* src0 */
- break;
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
translate_trig(ctx, inst, opc);
unsigned num_dst;
unsigned num_src;
} op_table [TGSI_OPCODE_LAST] = {
- [ TGSI_OPCODE_ABS ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 },
[ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 },
uint tmp = 0;
switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ABS:
- src0 = src_vector(p, &inst->Src[0], fs);
- i915_emit_arith(p,
- A0_MAX,
- get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- src0, negate(src0, 1, 1, 1, 1), 0);
- break;
-
case TGSI_OPCODE_ADD:
emit_simple_arith(p, inst, A0_ADD, 2, fs);
break;
[TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 },
[TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 },
[TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 },
- [TGSI_OPCODE_ABS] = { GEN6_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_DPH] = { GEN6_OPCODE_DPH, 1, 2 },
[TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 },
[TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 },
case TGSI_OPCODE_SUB:
src[1] = tsrc_negate(src[1]);
break;
- case TGSI_OPCODE_ABS:
case TGSI_OPCODE_IABS:
src[0] = tsrc_absolute(src[0]);
break;
[TGSI_OPCODE_LG2] = aos_simple,
[TGSI_OPCODE_POW] = aos_simple,
[TGSI_OPCODE_XPD] = aos_XPD,
- [TGSI_OPCODE_ABS] = aos_simple,
[TGSI_OPCODE_DPH] = aos_simple,
[TGSI_OPCODE_COS] = aos_simple,
[TGSI_OPCODE_DDX] = aos_unsupported,
[TGSI_OPCODE_LG2] = soa_scalar_replicate,
[TGSI_OPCODE_POW] = soa_scalar_replicate,
[TGSI_OPCODE_XPD] = soa_XPD,
- [TGSI_OPCODE_ABS] = soa_per_channel,
[TGSI_OPCODE_DPH] = soa_dot_product,
[TGSI_OPCODE_COS] = soa_scalar_replicate,
[TGSI_OPCODE_DDX] = soa_partial_derivative,
NV50_IR_OPCODE_CASE(LG2, LG2);
NV50_IR_OPCODE_CASE(POW, POW);
- NV50_IR_OPCODE_CASE(ABS, ABS);
-
NV50_IR_OPCODE_CASE(COS, COS);
NV50_IR_OPCODE_CASE(DDX, DFDX);
NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
}
break;
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_ABS:
case TGSI_OPCODE_CEIL:
case TGSI_OPCODE_FLR:
case TGSI_OPCODE_TRUNC:
sat = finst->Instruction.Saturate;
switch (finst->Instruction.Opcode) {
- case TGSI_OPCODE_ABS:
- nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none));
- break;
case TGSI_OPCODE_ADD:
nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
break;
}
switch (finst->Instruction.Opcode) {
- case TGSI_OPCODE_ABS:
- nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none));
- break;
case TGSI_OPCODE_ADD:
nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1]));
break;
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
- case TGSI_OPCODE_ABS: return RC_OPCODE_ABS;
case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
case TGSI_OPCODE_SUB:
r600_bytecode_src_toggle_neg(&alu.src[1]);
break;
- case TGSI_OPCODE_ABS:
- r600_bytecode_src_set_abs(&alu.src[0]);
- break;
default:
break;
}
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
[TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2},
+ [33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
[TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2},
+ [33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow},
[TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2},
+ [33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
{
lp_set_default_actions(bld_base);
- bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
translate_opcode(uint opcode)
{
switch (opcode) {
- case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
}
-/**
- * Emit code for TGSI_OPCODE_ABS instruction.
- */
-static boolean
-emit_abs(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst = ABS(s0):
- * dst = abs(s0)
- * Translates into:
- * MOV dst, abs(s0)
- */
- struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
-
- /* MOV dst, abs(s0) */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
- &abs_src0, inst->Instruction.Saturate);
-
- return TRUE;
-}
-
-
/**
* Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
*/
return emit_vertex(emit, inst);
case TGSI_OPCODE_ENDPRIM:
return emit_endprim(emit, inst);
- case TGSI_OPCODE_ABS:
- return emit_abs(emit, inst);
case TGSI_OPCODE_IABS:
return emit_iabs(emit, inst);
case TGSI_OPCODE_ARL:
#define TGSI_OPCODE_POW 30
#define TGSI_OPCODE_XPD 31
#define TGSI_OPCODE_U2I64 32
-#define TGSI_OPCODE_ABS 33
+/* gap */
#define TGSI_OPCODE_I2I64 34
#define TGSI_OPCODE_DPH 35
#define TGSI_OPCODE_COS 36
this->index = index;
this->swizzle = swizzle_for_type(type, component);
this->negate = 0;
+ this->abs = 0;
this->index2D = 0;
this->type = type ? type->base_type : GLSL_TYPE_ERROR;
this->reladdr = NULL;
this->index2D = 0;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
+ this->abs = 0;
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->index2D = index2D;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
+ this->abs = 0;
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->index2D = 0;
this->swizzle = 0;
this->negate = 0;
+ this->abs = 0;
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
int16_t index2D;
uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
int negate:4; /**< NEGATE_XYZW mask from mesa */
+ unsigned abs:1;
enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
unsigned has_index2:1;
gl_register_file file:5; /**< PROGRAM_* from Mesa */
/** Register index should be offset by the integer in this reg. */
st_src_reg *reladdr;
st_src_reg *reladdr2;
+
+ st_src_reg get_abs()
+ {
+ st_src_reg reg = *this;
+ reg.negate = 0;
+ reg.abs = 1;
+ return reg;
+ }
};
class st_dst_reg {
this->index = reg.index;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
+ this->abs = 0;
this->reladdr = reg.reladdr;
this->index2D = reg.index2D;
this->reladdr2 = reg.reladdr2;
case2iu(ISHR, USHR);
case3fid(SSG, ISSG, DSSG);
- case3fid(ABS, IABS, DABS);
case2iu(IBFE, UBFE);
case2iu(IMSB, UMSB);
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
src.reladdr = NULL;
src.negate = 0;
+ src.abs = 0;
if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
if (next_array >= max_num_arrays) {
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
break;
case ir_unop_abs:
- emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
+ if (result_dst.type == GLSL_TYPE_FLOAT)
+ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
+ else if (result_dst.type == GLSL_TYPE_DOUBLE)
+ emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
break;
case ir_unop_sign:
emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
* we want, I choose to use ABS to match DX9 and pre-GLSL RSQ
* behavior.
*/
- emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
- emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src);
+ emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs());
emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src);
}
break;
case ir_unop_bitcast_f2i:
case ir_unop_bitcast_f2u:
/* Make sure we don't propagate the negate modifier to integer opcodes. */
- if (op[0].negate)
+ if (op[0].negate || op[0].abs)
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
else
result_src = op[0];
cbuf.index = 0;
cbuf.reladdr = NULL;
cbuf.negate = 0;
+ cbuf.abs = 0;
assert(ir->type->is_vector() || ir->type->is_scalar());
inst->src[0].file != PROGRAM_ARRAY &&
!inst->src[0].reladdr &&
!inst->src[0].reladdr2 &&
- !inst->src[0].negate) {
+ !inst->src[0].negate &&
+ !inst->src[0].abs) {
for (int i = 0; i < 4; i++) {
if (inst->dst[0].writemask & (1 << i)) {
acp[4 * inst->dst[0].index + i] = inst;
GET_SWZ(src_reg->swizzle, 2) & 0x3,
GET_SWZ(src_reg->swizzle, 3) & 0x3);
+ if (src_reg->abs)
+ src = ureg_abs(src);
+
if ((src_reg->negate & 0xf) == NEGATE_XYZW)
src = ureg_negate(src);
switch( op ) {
case OPCODE_ARL:
return TGSI_OPCODE_ARL;
- case OPCODE_ABS:
- return TGSI_OPCODE_ABS;
case OPCODE_ADD:
return TGSI_OPCODE_ADD;
case OPCODE_CMP:
ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
break;
+ case OPCODE_ABS:
+ ureg_MOV(ureg, dst[0], ureg_abs(src[0]));
+ break;
+
default:
ureg_insn( ureg,
translate_opcode( inst->Opcode ),