return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDIL_exp:
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_fabs:
+ return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDIL_fraction:
"MASK_WRITE $src",
[]
>;
-}
+
+let isPseudo = 1, usesCustomInserter = 1 in {
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FABS $dst, $src0",
+ [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+} // End isPseudo = 1, hasCustomInserter = 1
+
+} // End isCodeGenOnly = 1
/* Generic helper patterns for intrinsics */
/* -------------------------------------- */
// float math instructions start here
//===---------------------------------------------------------------------===//
let mayLoad=0, mayStore=0 in {
-defm ABS : UnaryIntrinsicFloat<IL_OP_ABS, int_AMDIL_fabs>;
defm PIREDUCE : UnaryIntrinsicFloat<IL_OP_PI_REDUCE, int_AMDIL_pireduce>;
defm ROUND_NEGINF : UnaryIntrinsicFloat<IL_OP_ROUND_NEG_INF,
int_AMDIL_round_neginf>;
defm LERP : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
}
defm SUB : BinaryOpMCf32<IL_OP_SUB, fsub>;
-defm FABS : UnaryOpMCf32<IL_OP_ABS, fabs>;
defm NEAR : UnaryOpMCf32<IL_OP_ROUND_NEAR, fnearbyint>;
defm RND_Z : UnaryOpMCf32<IL_OP_ROUND_ZERO, ftrunc>;
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
+#include "AMDGPUUtil.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
lowerImplicitParameter(MI, *BB, MRI, 8);
break;
+ case AMDIL::FABS_R600:
+ MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ break;
+
case AMDIL::R600_LOAD_CONST:
{
int64_t RegIndex = MI->getOperand(1).getImm();
} // End isCodeGenOnly = 1
-
+def FABS_R600 : FABS<R600_Reg32>;
let isPseudo = 1 in {
.addOperand(MI.getOperand(1));
break;
- /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and
- * remove the ABS instruction.*/
- case AMDIL::FABS_f32:
- case AMDIL::ABS_f32:
- MI.getOperand(1).addTargetFlag(MO_FLAG_ABS);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::MOVE_f32))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1));
- break;
-
case AMDIL::CLAMP_f32:
{
MachineOperand lowOp = MI.getOperand(2);
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+
+ case AMDIL::FABS_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ /* VSRC1-2 are unused, but we still need to fill all the
+ * operand slots, so we just reuse the VSRC0 operand */
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(1) // ABS
+ .addImm(0) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(0); // NEG
+ MI->eraseFromParent();
+ break;
+
case AMDIL::SI_INTERP:
LowerSI_INTERP(MI, *BB, I, MRI);
break;
switch (MI.getOpcode()) {
default: break;
- case AMDIL::ABS_f32: return convertABS_f32(MI, MF, DL);
case AMDIL::CLAMP_f32: return convertCLAMP_f32(MI, MF, DL);
}
}
}
-MachineInstr * SIInstrInfo::convertABS_f32(MachineInstr & absInstr,
- MachineFunction &MF, DebugLoc DL) const
-{
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineOperand &dst = absInstr.getOperand(0);
-
- /* Convert the desination register to the VReg_32 class */
- if (TargetRegisterInfo::isVirtualRegister(dst.getReg())) {
- MRI.setRegClass(dst.getReg(), AMDIL::VReg_32RegisterClass);
- }
-
- return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64))
- .addOperand(absInstr.getOperand(0))
- .addOperand(absInstr.getOperand(1))
- /* VSRC1-2 are unused, but we still need to fill all the
- * operand slots, so we just reuse the VSRC0 operand */
- .addOperand(absInstr.getOperand(1))
- .addOperand(absInstr.getOperand(1))
- .addImm(1) // ABS
- .addImm(0) // CLAMP
- .addImm(0) // OMOD
- .addImm(0); // NEG
-}
-
MachineInstr * SIInstrInfo::convertCLAMP_f32(MachineInstr & clampInstr,
MachineFunction &MF, DebugLoc DL) const
{
const SIRegisterInfo RI;
AMDGPUTargetMachine &TM;
- MachineInstr * convertABS_f32(MachineInstr & absInstr, MachineFunction &MF,
- DebugLoc DL) const;
-
MachineInstr * convertCLAMP_f32(MachineInstr & clampInstr,
MachineFunction &MF, DebugLoc DL) const;
(S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */
>;
+def FABS_SI : FABS<VReg_32>;
def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;