From: Tom Stellard Date: Thu, 24 May 2012 16:17:58 +0000 (-0400) Subject: radeon/llvm: Lower UDIV using the Selection DAG X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=33e7db9a1dafdcf5c7c745180831403e0485544d;p=mesa.git radeon/llvm: Lower UDIV using the Selection DAG --- diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp index ea7046102e8..ab036d1df88 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp @@ -33,6 +33,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FEXP2, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::UDIV, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); } SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) @@ -42,6 +44,10 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) default: return AMDILTargetLowering::LowerOperation(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::UDIV: + return DAG.getNode(ISD::UDIVREM, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(0), Op.getOperand(1)).getValue(0); + case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); } } @@ -227,6 +233,114 @@ SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op, return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); } + +SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, + SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + SDValue Num = Op.getOperand(0); + SDValue Den = Op.getOperand(1); + + SmallVector Results; + + // RCP = URECIP(Den) = 2^32 / Den + e + // e is rounding error. + SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); + + // RCP_LO = umulo(RCP, Den) */ + SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); + + // RCP_HI = mulhu (RCP, Den) */ + SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); + + // NEG_RCP_LO = -RCP_LO + SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), + RCP_LO); + + // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) + SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), + NEG_RCP_LO, RCP_LO, + ISD::SETEQ); + // Calculate the rounding error from the URECIP instruction + // E = mulhu(ABS_RCP_LO, RCP) + SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); + + // RCP_A_E = RCP + E + SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); + + // RCP_S_E = RCP - E + SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); + + // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) + SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), + RCP_A_E, RCP_S_E, + ISD::SETEQ); + // Quotient = mulhu(Tmp0, Num) + SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); + + // Num_S_Remainder = Quotient * Den + SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); + + // Remainder = Num - Num_S_Remainder + SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); + + // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) + SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, + DAG.getConstant(-1, VT), + DAG.getConstant(0, VT), + ISD::SETGE); + // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) + SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, + DAG.getConstant(0, VT), + DAG.getConstant(-1, VT), + DAG.getConstant(0, VT), + ISD::SETGE); + // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero + SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, + Remainder_GE_Zero); + + // Calculate Division result: + + // Quotient_A_One = Quotient + 1 + SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, + DAG.getConstant(1, VT)); + + // Quotient_S_One = Quotient - 1 + SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, + DAG.getConstant(1, VT)); + + // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) + SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), + Quotient, Quotient_A_One, ISD::SETEQ); + + // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) + Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), + Quotient_S_One, Div, ISD::SETEQ); + + // Calculate Rem result: + + // Remainder_S_Den = Remainder - Den + SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); + + // Remainder_A_Den = Remainder + Den + SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); + + // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) + SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), + Remainder, Remainder_S_Den, ISD::SETEQ); + + // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) + Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), + Remainder_A_Den, Rem, ISD::SETEQ); + + DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div); + DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem); + + return Op; +} + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -274,5 +388,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const NODE_NAME_CASE(FMIN) NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) + NODE_NAME_CASE(URECIP) } } diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h index 81bc608cebf..9c61cc7f0a6 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h @@ -24,6 +24,7 @@ class AMDGPUTargetLowering : public AMDILTargetLowering private: SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; protected: @@ -63,6 +64,7 @@ enum FMIN, SMIN, UMIN, + URECIP, LAST_AMDGPU_ISD_NUMBER }; diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td b/src/gallium/drivers/radeon/AMDGPUInstrInfo.td index f511d3b9b57..5e44ef9c40b 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.td @@ -47,3 +47,9 @@ def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; + +// urecip - This operation is a helper for integer division, it returns the +// result of 1 / a as a fractional unsigned integer. +// out = (2^32 / a) + e +// e is rounding error +def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index dae63fa2264..96d7e518ba4 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -650,7 +650,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const setOperationAction(ISD::UMUL_LOHI, VT, Expand); if (VT != MVT::i64 && VT != MVT::v2i64) { setOperationAction(ISD::SDIV, VT, Custom); - setOperationAction(ISD::UDIV, VT, Custom); } setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -730,7 +729,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const setOperationAction(ISD::ADD, MVT::v2i64, Expand); setOperationAction(ISD::SREM, MVT::v2i64, Expand); setOperationAction(ISD::Constant , MVT::i64 , Legal); - setOperationAction(ISD::UDIV, MVT::v2i64, Expand); setOperationAction(ISD::SDIV, MVT::v2i64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); @@ -1505,7 +1503,6 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const LOWER(FDIV); LOWER(SDIV); LOWER(SREM); - LOWER(UDIV); LOWER(UREM); LOWER(BUILD_VECTOR); LOWER(INSERT_VECTOR_ELT); @@ -2811,24 +2808,6 @@ AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const return DST; } -SDValue -AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const -{ - EVT OVT = Op.getValueType(); - SDValue DST; - if (OVT.getScalarType() == MVT::i64) { - DST = LowerUDIV64(Op, DAG); - } else if (OVT.getScalarType() == MVT::i32) { - DST = LowerUDIV32(Op, DAG); - } else if (OVT.getScalarType() == MVT::i16 - || OVT.getScalarType() == MVT::i8) { - DST = LowerUDIV24(Op, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} - SDValue AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { @@ -3960,17 +3939,6 @@ AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const } -SDValue -AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const -{ - return SDValue(Op.getNode(), 0); -} - -SDValue -AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const -{ - return SDValue(Op.getNode(), 0); -} SDValue AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 684c845e076..99153574675 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -100,38 +100,3 @@ unsigned R600InstrInfo::getLSHRop() const return AMDIL::LSHR_eg; } } - -unsigned R600InstrInfo::getMULHI_UINT() const -{ - unsigned gen = TM.getSubtarget().device()->getGeneration(); - - if (gen < AMDILDeviceInfo::HD5XXX) { - return AMDIL::MULHI_UINT_r600; - } else { - return AMDIL::MULHI_UINT_eg; - } -} - -unsigned R600InstrInfo::getMULLO_UINT() const -{ - unsigned gen = TM.getSubtarget().device()->getGeneration(); - - if (gen < AMDILDeviceInfo::HD5XXX) { - return AMDIL::MULLO_UINT_r600; - } else { - return AMDIL::MULLO_UINT_eg; - } -} - -unsigned R600InstrInfo::getRECIP_UINT() const -{ - const AMDILDevice * dev = TM.getSubtarget().device(); - - if (dev->getGeneration() < AMDILDeviceInfo::HD5XXX) { - return AMDIL::RECIP_UINT_r600; - } else if (dev->getDeviceFlag() != OCL_DEVICE_CAYMAN) { - return AMDIL::RECIP_UINT_eg; - } else { - return AMDIL::RECIP_UINT_cm; - } -} diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index a499b83b5ef..9dca4839090 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -46,9 +46,6 @@ namespace llvm { unsigned getLSHRop() const; unsigned getASHRop() const; - unsigned getMULHI_UINT() const; - unsigned getMULLO_UINT() const; - unsigned getRECIP_UINT() const; }; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 3d93d33b46c..b56d03cae4f 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -649,8 +649,8 @@ class MULHI_INT_Common inst> : R600_2OP < >; class MULHI_UINT_Common inst> : R600_2OP < - inst, "MULHI $dst, $src0, $src1", - [] + inst, "MULHI $dst, $src0, $src1", + [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] >; class MULLO_INT_Common inst> : R600_2OP < @@ -675,7 +675,7 @@ class RECIP_IEEE_Common inst> : R600_1OP < class RECIP_UINT_Common inst> : R600_1OP < inst, "RECIP_INT $dst, $src", - [] + [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] >; class RECIPSQRT_CLAMPED_Common inst> : R600_1OP < diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp index 75113683032..1946708f681 100644 --- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp +++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp @@ -45,11 +45,6 @@ namespace { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void divMod(MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - bool div = true) const; - public: R600LowerInstructionsPass(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), @@ -115,10 +110,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) break; } - case AMDIL::UDIV_i32: - divMod(MI, MBB, I); - break; - /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */ /* case AMDIL::DIV_INF_f32: { @@ -322,133 +313,3 @@ void R600LowerInstructionsPass::calcAddress(const MachineOperand &ptrOp, .addOperand(ptrOp); } } - -/* Mostly copied from tgsi_divmod() in r600_shader.c */ -void R600LowerInstructionsPass::divMod(MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - bool div) const -{ - unsigned dst = MI.getOperand(0).getReg(); - MachineOperand &numerator = MI.getOperand(1); - MachineOperand &denominator = MI.getOperand(2); - /* rcp = RECIP(denominator) = 2^32 / denominator + e - * e is rounding error */ - unsigned rcp = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getRECIP_UINT()), rcp) - .addOperand(denominator); - - /* rcp_lo = lo(rcp * denominator) */ - unsigned rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), rcp_lo) - .addReg(rcp) - .addOperand(denominator); - - /* rcp_hi = HI (rcp * denominator) */ - unsigned rcp_hi = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), rcp_hi) - .addReg(rcp) - .addOperand(denominator); - - unsigned neg_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), neg_rcp_lo) - .addReg(AMDIL::ZERO) - .addReg(rcp_lo); - - unsigned abs_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), abs_rcp_lo) - .addReg(rcp_hi) - .addReg(neg_rcp_lo) - .addReg(rcp_lo); - - unsigned e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), e) - .addReg(abs_rcp_lo) - .addReg(rcp); - - unsigned rcp_plus_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), rcp_plus_e) - .addReg(rcp) - .addReg(e); - - unsigned rcp_sub_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), rcp_sub_e) - .addReg(rcp) - .addReg(e); - - /* tmp0 = rcp_hi == 0 ? rcp_plus_e : rcp_sub_e */ - unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), tmp0) - .addReg(rcp_hi) - .addReg(rcp_plus_e) - .addReg(rcp_sub_e); - - unsigned q = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), q) - .addReg(tmp0) - .addOperand(numerator); - - /* num_sub_r = q * denominator */ - unsigned num_sub_r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), - num_sub_r) - .addReg(q) - .addOperand(denominator); - - unsigned r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), r) - .addOperand(numerator) - .addReg(num_sub_r); - - unsigned r_ge_den = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_den) - .addReg(r) - .addOperand(denominator); - - unsigned r_ge_zero = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_zero) - .addOperand(numerator) - .addReg(num_sub_r); - - unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::AND_INT), tmp1) - .addReg(r_ge_den) - .addReg(r_ge_zero); - - unsigned val0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - unsigned val1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - unsigned result = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass); - if (div) { - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val0) - .addReg(q) - .addReg(AMDIL::ONE_INT); - - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val1) - .addReg(q) - .addReg(AMDIL::ONE_INT); - - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result) - .addReg(tmp1) - .addReg(q) - .addReg(val0); - } else { - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val0) - .addReg(r) - .addOperand(denominator); - - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val1) - .addReg(r) - .addOperand(denominator); - - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result) - .addReg(tmp1) - .addReg(r) - .addReg(val0); - } - - /* XXX: Do we need to set to MAX_INT if denominator is 0? */ - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), dst) - .addReg(r_ge_zero) - .addReg(val1) - .addReg(result); -}