From 17f852892346fdf3b1e9eec56b7a55c470279bc8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 25 May 2012 10:29:09 -0400 Subject: [PATCH] radeon/llvm: Use a custom inserter to lower FABS --- .../drivers/radeon/AMDGPUISelLowering.cpp | 2 ++ .../drivers/radeon/AMDGPUInstructions.td | 14 ++++++++++- .../drivers/radeon/AMDILInstructions.td | 2 -- .../drivers/radeon/R600ISelLowering.cpp | 8 ++++++ .../drivers/radeon/R600Instructions.td | 2 +- .../drivers/radeon/R600LowerInstructions.cpp | 10 -------- src/gallium/drivers/radeon/SIISelLowering.cpp | 16 ++++++++++++ src/gallium/drivers/radeon/SIInstrInfo.cpp | 25 ------------------- src/gallium/drivers/radeon/SIInstrInfo.h | 3 --- src/gallium/drivers/radeon/SIInstructions.td | 1 + 10 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp index ab036d1df88..9d076bdf568 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp @@ -64,6 +64,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return LowerIntrinsicIABS(Op, DAG); case AMDGPUIntrinsic::AMDIL_exp: return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDIL_fabs: + return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1)); case AMDGPUIntrinsic::AMDGPU_lrp: return LowerIntrinsicLRP(Op, DAG); case AMDGPUIntrinsic::AMDIL_fraction: diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td index 291081fef43..e1ace508e88 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstructions.td +++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td @@ -47,7 +47,19 @@ let isCodeGenOnly = 1 in { "MASK_WRITE $src", [] >; -} + +let isPseudo = 1, usesCustomInserter = 1 in { + +class FABS : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "FABS $dst, $src0", + [(set rc:$dst, (fabs rc:$src0))] +>; + +} // End isPseudo = 1, hasCustomInserter = 1 + +} // End isCodeGenOnly = 1 /* Generic helper patterns for intrinsics */ /* -------------------------------------- */ diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td index 4d9c1637ad9..6f1f4d55ca9 100644 --- a/src/gallium/drivers/radeon/AMDILInstructions.td +++ b/src/gallium/drivers/radeon/AMDILInstructions.td @@ -196,7 +196,6 @@ def LUSHR : TwoInOneOut; defm PIREDUCE : UnaryIntrinsicFloat; defm ROUND_NEGINF : UnaryIntrinsicFloat; @@ -236,7 +235,6 @@ defm FMA : TernaryIntrinsicFloat; defm LERP : TernaryIntrinsicFloat; } defm SUB : BinaryOpMCf32; -defm FABS : UnaryOpMCf32; defm NEAR : UnaryOpMCf32; defm RND_Z : UnaryOpMCf32; diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 05a31d3ff9b..4d789cb22b6 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "R600ISelLowering.h" +#include "AMDGPUUtil.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -100,6 +101,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( lowerImplicitParameter(MI, *BB, MRI, 8); break; + case AMDIL::FABS_R600: + MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + break; + case AMDIL::R600_LOAD_CONST: { int64_t RegIndex = MI->getOperand(1).getImm(); diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index b6b9fe002ad..cb9a7bba3d6 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -1078,7 +1078,7 @@ def TXD_SHADOW: AMDGPUShaderInst < } // End isCodeGenOnly = 1 - +def FABS_R600 : FABS; let isPseudo = 1 in { diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp index 1946708f681..3a1a12e635f 100644 --- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp +++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp @@ -83,16 +83,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) .addOperand(MI.getOperand(1)); break; - /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and - * remove the ABS instruction.*/ - case AMDIL::FABS_f32: - case AMDIL::ABS_f32: - MI.getOperand(1).addTargetFlag(MO_FLAG_ABS); - BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::MOVE_f32)) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)); - break; - case AMDIL::CLAMP_f32: { MachineOperand lowOp = MI.getOperand(2); diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index 4615db24266..e192af091b6 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -45,6 +45,22 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + + case AMDIL::FABS_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + /* VSRC1-2 are unused, but we still need to fill all the + * operand slots, so we just reuse the VSRC0 operand */ + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(1) // ABS + .addImm(0) // CLAMP + .addImm(0) // OMOD + .addImm(0); // NEG + MI->eraseFromParent(); + break; + case AMDIL::SI_INTERP: LowerSI_INTERP(MI, *BB, I, MRI); break; diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp index d0b39d0ba01..df2cd9bb023 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.cpp +++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp @@ -83,7 +83,6 @@ MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, switch (MI.getOpcode()) { default: break; - case AMDIL::ABS_f32: return convertABS_f32(MI, MF, DL); case AMDIL::CLAMP_f32: return convertCLAMP_f32(MI, MF, DL); } @@ -113,30 +112,6 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const } } -MachineInstr * SIInstrInfo::convertABS_f32(MachineInstr & absInstr, - MachineFunction &MF, DebugLoc DL) const -{ - MachineRegisterInfo &MRI = MF.getRegInfo(); - MachineOperand &dst = absInstr.getOperand(0); - - /* Convert the desination register to the VReg_32 class */ - if (TargetRegisterInfo::isVirtualRegister(dst.getReg())) { - MRI.setRegClass(dst.getReg(), AMDIL::VReg_32RegisterClass); - } - - return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64)) - .addOperand(absInstr.getOperand(0)) - .addOperand(absInstr.getOperand(1)) - /* VSRC1-2 are unused, but we still need to fill all the - * operand slots, so we just reuse the VSRC0 operand */ - .addOperand(absInstr.getOperand(1)) - .addOperand(absInstr.getOperand(1)) - .addImm(1) // ABS - .addImm(0) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG -} - MachineInstr * SIInstrInfo::convertCLAMP_f32(MachineInstr & clampInstr, MachineFunction &MF, DebugLoc DL) const { diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h index 24f7a56f638..1d137d4efd4 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.h +++ b/src/gallium/drivers/radeon/SIInstrInfo.h @@ -25,9 +25,6 @@ private: const SIRegisterInfo RI; AMDGPUTargetMachine &TM; - MachineInstr * convertABS_f32(MachineInstr & absInstr, MachineFunction &MF, - DebugLoc DL) const; - MachineInstr * convertCLAMP_f32(MachineInstr & clampInstr, MachineFunction &MF, DebugLoc DL) const; diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index 8505df9b0ce..87034684f0b 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -907,6 +907,7 @@ def : Pat < (S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */ >; +def FABS_SI : FABS; def : Extract_Element ; def : Extract_Element ; -- 2.30.2