From fb40f88338b6af23faae03ced5906add8507db26 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 6 Sep 2012 22:45:38 +0200 Subject: [PATCH] radeon/llvm: support for interpolation intrinsics Reviewed-by: Tom Stellard --- .../drivers/radeon/AMDGPUISelLowering.cpp | 2 + .../drivers/radeon/AMDGPUISelLowering.h | 2 + .../radeon/R600ExpandSpecialInstrs.cpp | 129 ++++++++++++++++++ .../drivers/radeon/R600ISelLowering.cpp | 88 +++++++++++- src/gallium/drivers/radeon/R600ISelLowering.h | 1 + .../drivers/radeon/R600Instructions.td | 54 ++++++++ .../drivers/radeon/R600IntrinsicsNoOpenCL.td | 10 ++ .../drivers/radeon/R600IntrinsicsOpenCL.td | 10 ++ .../radeon/R600MachineFunctionInfo.cpp | 19 ++- .../drivers/radeon/R600MachineFunctionInfo.h | 5 + 10 files changed, 318 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp index d6304a2307e..04dadc398d4 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp @@ -346,5 +346,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) NODE_NAME_CASE(URECIP) + NODE_NAME_CASE(INTERP) + NODE_NAME_CASE(INTERP_P0) } } diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h index a6d2a50d11b..2d8ed82c117 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h @@ -119,6 +119,8 @@ enum SMIN, UMIN, URECIP, + INTERP, + INTERP_P0, LAST_AMDGPU_ISD_NUMBER }; diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp index 69ab0ffee8c..d6184e55302 100644 --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp @@ -15,6 +15,7 @@ #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" +#include "R600MachineFunctionInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -29,6 +30,9 @@ private: static char ID; const R600InstrInfo *TII; + bool ExpandInputPerspective(MachineInstr& MI); + bool ExpandInputConstant(MachineInstr& MI); + public: R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), TII (static_cast(tm.getInstrInfo())) { } @@ -48,6 +52,126 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { return new R600ExpandSpecialInstrsPass(TM); } +bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) +{ + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + if (MI.getOpcode() != AMDGPU::input_perspective) + return false; + + MachineBasicBlock::iterator I = &MI; + unsigned DstReg = MI.getOperand(0).getReg(); + R600MachineFunctionInfo *MFI = MI.getParent()->getParent() + ->getInfo(); + unsigned IJIndexBase; + + // In Evergreen ISA doc section 8.3.2 : + // We need to interpolate XY and ZW in two different instruction groups. + // An INTERP_* must occupy all 4 slots of an instruction group. + // Output of INTERP_XY is written in X,Y slots + // Output of INTERP_ZW is written in Z,W slots + // + // Thus interpolation requires the following sequences : + // + // AnyGPR.x = INTERP_ZW; (Write Masked Out) + // AnyGPR.y = INTERP_ZW; (Write Masked Out) + // DstGPR.z = INTERP_ZW; + // DstGPR.w = INTERP_ZW; (End of first IG) + // DstGPR.x = INTERP_XY; + // DstGPR.y = INTERP_XY; + // AnyGPR.z = INTERP_XY; (Write Masked Out) + // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) + // + switch (MI.getOperand(1).getImm()) { + case 0: + IJIndexBase = MFI->GetIJPerspectiveIndex(); + break; + case 1: + IJIndexBase = MFI->GetIJLinearIndex(); + break; + default: + assert(0 && "Unknow ij index"); + } + + for (unsigned i = 0; i < 8; i++) { + unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( + 2 * IJIndexBase + ((i + 1) % 2)); + unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( + 4 * MI.getOperand(2).getImm()); + + unsigned Sel; + switch (i % 4) { + case 0:Sel = AMDGPU::sel_x;break; + case 1:Sel = AMDGPU::sel_y;break; + case 2:Sel = AMDGPU::sel_z;break; + case 3:Sel = AMDGPU::sel_w;break; + default:break; + } + + unsigned Res = TRI.getSubReg(DstReg, Sel); + + const MCInstrDesc &Opcode = (i < 4)? + TII->get(AMDGPU::INTERP_ZW): + TII->get(AMDGPU::INTERP_XY); + + MachineInstr *NewMI = BuildMI(*(MI.getParent()), + I, MI.getParent()->findDebugLoc(I), + Opcode, Res) + .addReg(IJIndex) + .addReg(ReadReg) + .addImm(0); + + if (!(i> 1 && i < 6)) { + TII->addFlag(NewMI, 0, MO_FLAG_MASK); + } + + if (i % 4 != 3) + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + + return true; +} + +bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) +{ + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + if (MI.getOpcode() != AMDGPU::input_constant) + return false; + + MachineBasicBlock::iterator I = &MI; + unsigned DstReg = MI.getOperand(0).getReg(); + + for (unsigned i = 0; i < 4; i++) { + unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( + 4 * MI.getOperand(1).getImm() + i); + + unsigned Sel; + switch (i % 4) { + case 0:Sel = AMDGPU::sel_x;break; + case 1:Sel = AMDGPU::sel_y;break; + case 2:Sel = AMDGPU::sel_z;break; + case 3:Sel = AMDGPU::sel_w;break; + default:break; + } + + unsigned Res = TRI.getSubReg(DstReg, Sel); + + MachineInstr *NewMI = BuildMI(*(MI.getParent()), + I, MI.getParent()->findDebugLoc(I), + TII->get(AMDGPU::INTERP_LOAD_P0), Res) + .addReg(ReadReg) + .addImm(0); + + if (i % 4 != 3) + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + + return true; +} + bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); @@ -59,6 +183,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { while (I != MBB.end()) { MachineInstr &MI = *I; I = llvm::next(I); + + if (ExpandInputPerspective(MI)) + continue; + if (ExpandInputConstant(MI)) + continue; bool IsReduction = TII->isReductionOp(MI.getOpcode()); bool IsVector = TII->isVector(MI); diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 6dded2fec37..2fc9c6708ef 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -44,6 +44,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); setOperationAction(ISD::ROTL, MVT::i32, Custom); @@ -240,6 +241,29 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); break; } + case AMDGPU::input_perspective: + { + R600MachineFunctionInfo *MFI = MF->getInfo(); + + // XXX Be more fine about register reservation + for (unsigned i = 0; i < 4; i ++) { + unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); + MFI->ReservedRegs.push_back(ReservedReg); + } + + switch (MI->getOperand(1).getImm()) { + case 0:// Perspective + MFI->HasPerspectiveInterpolation = true; + break; + case 1:// Linear + MFI->HasLinearInterpolation = true; + break; + default: + assert(0 && "Unknow ij index"); + } + + return BB; + } } MI->eraseFromParent(); @@ -294,7 +318,48 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); } - + case AMDGPUIntrinsic::R600_load_input_perspective: { + unsigned slot = cast(Op.getOperand(1))->getZExtValue(); + SDValue FullVector = DAG.getNode( + AMDGPUISD::INTERP, + DL, MVT::v4f32, + DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + } + case AMDGPUIntrinsic::R600_load_input_linear: { + unsigned slot = cast(Op.getOperand(1))->getZExtValue(); + SDValue FullVector = DAG.getNode( + AMDGPUISD::INTERP, + DL, MVT::v4f32, + DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + } + case AMDGPUIntrinsic::R600_load_input_constant: { + unsigned slot = cast(Op.getOperand(1))->getZExtValue(); + SDValue FullVector = DAG.getNode( + AMDGPUISD::INTERP_P0, + DL, MVT::v4f32, + DAG.getConstant(slot / 4 , MVT::i32)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + } + case AMDGPUIntrinsic::R600_load_input_position: { + unsigned slot = cast(Op.getOperand(1))->getZExtValue(); + unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); + SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + RegIndex, MVT::f32); + if ((slot % 4) == 3) { + return DAG.getNode(ISD::FDIV, + DL, VT, + DAG.getConstantFP(1.0f, MVT::f32), + Reg); + } else { + return Reg; + } + } + case r600_read_ngroups_x: return LowerImplicitParameter(DAG, VT, DL, 0); case r600_read_ngroups_y: @@ -347,9 +412,30 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + case ISD::INTRINSIC_WO_CHAIN: + { + unsigned IntrinsicID = + cast(N->getOperand(0))->getZExtValue(); + if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) { + Results.push_back(LowerInputFace(N, DAG)); + } else { + return; + } + } } } +SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const +{ + unsigned slot = cast(Op->getOperand(1))->getZExtValue(); + unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); + SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + RegIndex, MVT::f32); + return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1, + Reg, DAG.getConstantFP(0.0f, MVT::f32), + DAG.getCondCode(ISD::SETUGT)); +} + SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h index 017c6dba7a7..7b9c27ee12e 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.h +++ b/src/gallium/drivers/radeon/R600ISelLowering.h @@ -58,6 +58,7 @@ private: SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; }; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index db4a3b80097..feb97fac822 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -258,6 +258,60 @@ def isEGorCayman : Predicate<"Subtarget.device()" def isR600toCayman : Predicate< "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; +//===----------------------------------------------------------------------===// +// Interpolation Instructions +//===----------------------------------------------------------------------===// + +def INTERP: SDNode<"AMDGPUISD::INTERP", + SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> + >; + +def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> + >; + +let usesCustomInserter = 1 in { +def input_perspective : AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins i32imm:$src0, i32imm:$src1), + "input_perspective $src0 $src1 : dst", + [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; +} // End usesCustomInserter = 1 + +def input_constant : AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins i32imm:$src), + "input_perspective $src : dst", + [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; + + + +def INTERP_XY : InstR600 <0xD6, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), + "INTERP_XY dst", + [], AnyALU> +{ + let FlagOperandIdx = 3; +} + +def INTERP_ZW : InstR600 <0xD7, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), + "INTERP_ZW dst", + [], AnyALU> +{ + let FlagOperandIdx = 3; +} + +def INTERP_LOAD_P0 : InstR600 <0xE0, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src, i32imm:$flags), + "INTERP_LOAD_P0 dst", + [], AnyALU> +{ + let FlagOperandIdx = 2; +} let Predicates = [isR600toCayman] in { diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td index 98af3588188..3b62f0a7303 100644 --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td @@ -13,6 +13,16 @@ let TargetPrefix = "R600", isTarget = 1 in { def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_R600_load_input_perspective : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_constant : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_linear : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_position : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_face : + Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; } let TargetPrefix = "r600", isTarget = 1 in { diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td index 8efa29b5335..00877caf2b9 100644 --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td @@ -13,4 +13,14 @@ let TargetPrefix = "R600", isTarget = 1 in { def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_R600_load_input_perspective : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_constant : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_linear : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_position : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_face : + Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; } diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp index 48443fb57d8..a31848efc99 100644 --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp @@ -12,5 +12,22 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo() + : MachineFunctionInfo(), + HasLinearInterpolation(false), + HasPerspectiveInterpolation(false) { } + +unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const +{ + assert(HasPerspectiveInterpolation); + return 0; +} + +unsigned R600MachineFunctionInfo::GetIJLinearIndex() const +{ + assert(HasLinearInterpolation); + if (HasPerspectiveInterpolation) + return 1; + else + return 0; +} diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h index 948e1924272..68211b25813 100644 --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h @@ -25,6 +25,11 @@ class R600MachineFunctionInfo : public MachineFunctionInfo { public: R600MachineFunctionInfo(const MachineFunction &MF); std::vector ReservedRegs; + bool HasLinearInterpolation; + bool HasPerspectiveInterpolation; + + unsigned GetIJLinearIndex() const; + unsigned GetIJPerspectiveIndex() const; }; -- 2.30.2