1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This is the parent TargetLowering class for hardware code gen targets.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUISelLowering.h"
15 #include "AMDILIntrinsicInfo.h"
16 #include "llvm/CodeGen/MachineFunction.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
23 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine
&TM
) :
24 TargetLowering(TM
, new TargetLoweringObjectFileELF())
27 // Initialize target lowering borrowed from AMDIL
30 // We need to custom lower some of the intrinsics
31 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
33 // Library functions. These default to Expand, but we have instructions
35 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
36 setOperationAction(ISD::FEXP2
, MVT::f32
, Legal
);
37 setOperationAction(ISD::FPOW
, MVT::f32
, Legal
);
38 setOperationAction(ISD::FLOG2
, MVT::f32
, Legal
);
39 setOperationAction(ISD::FABS
, MVT::f32
, Legal
);
40 setOperationAction(ISD::FFLOOR
, MVT::f32
, Legal
);
41 setOperationAction(ISD::FRINT
, MVT::f32
, Legal
);
43 setOperationAction(ISD::UDIV
, MVT::i32
, Expand
);
44 setOperationAction(ISD::UDIVREM
, MVT::i32
, Custom
);
45 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
48 //===---------------------------------------------------------------------===//
49 // TargetLowering Callbacks
50 //===---------------------------------------------------------------------===//
52 SDValue
AMDGPUTargetLowering::LowerFormalArguments(
54 CallingConv::ID CallConv
,
56 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
57 DebugLoc DL
, SelectionDAG
&DAG
,
58 SmallVectorImpl
<SDValue
> &InVals
) const
60 for (unsigned i
= 0, e
= Ins
.size(); i
< e
; ++i
) {
61 InVals
.push_back(SDValue());
66 SDValue
AMDGPUTargetLowering::LowerReturn(
68 CallingConv::ID CallConv
,
70 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
71 const SmallVectorImpl
<SDValue
> &OutVals
,
72 DebugLoc DL
, SelectionDAG
&DAG
) const
74 return DAG
.getNode(AMDGPUISD::RET_FLAG
, DL
, MVT::Other
, Chain
);
77 //===---------------------------------------------------------------------===//
78 // Target specific lowering
79 //===---------------------------------------------------------------------===//
81 SDValue
AMDGPUTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
)
84 switch (Op
.getOpcode()) {
87 assert(0 && "Custom lowering code for this"
88 "instruction is not implemented yet!");
91 case ISD::SDIV
: return LowerSDIV(Op
, DAG
);
92 case ISD::SREM
: return LowerSREM(Op
, DAG
);
93 case ISD::SIGN_EXTEND_INREG
: return LowerSIGN_EXTEND_INREG(Op
, DAG
);
94 case ISD::BRCOND
: return LowerBRCOND(Op
, DAG
);
95 // AMDGPU DAG lowering
96 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
97 case ISD::UDIVREM
: return LowerUDIVREM(Op
, DAG
);
102 SDValue
AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
103 SelectionDAG
&DAG
) const
105 unsigned IntrinsicID
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
106 DebugLoc DL
= Op
.getDebugLoc();
107 EVT VT
= Op
.getValueType();
109 switch (IntrinsicID
) {
111 case AMDGPUIntrinsic::AMDIL_abs
:
112 return LowerIntrinsicIABS(Op
, DAG
);
113 case AMDGPUIntrinsic::AMDIL_exp
:
114 return DAG
.getNode(ISD::FEXP2
, DL
, VT
, Op
.getOperand(1));
115 case AMDGPUIntrinsic::AMDGPU_lrp
:
116 return LowerIntrinsicLRP(Op
, DAG
);
117 case AMDGPUIntrinsic::AMDIL_fraction
:
118 return DAG
.getNode(AMDGPUISD::FRACT
, DL
, VT
, Op
.getOperand(1));
119 case AMDGPUIntrinsic::AMDIL_mad
:
120 return DAG
.getNode(AMDGPUISD::MAD
, DL
, VT
, Op
.getOperand(1),
121 Op
.getOperand(2), Op
.getOperand(3));
122 case AMDGPUIntrinsic::AMDIL_max
:
123 return DAG
.getNode(AMDGPUISD::FMAX
, DL
, VT
, Op
.getOperand(1),
125 case AMDGPUIntrinsic::AMDGPU_imax
:
126 return DAG
.getNode(AMDGPUISD::SMAX
, DL
, VT
, Op
.getOperand(1),
128 case AMDGPUIntrinsic::AMDGPU_umax
:
129 return DAG
.getNode(AMDGPUISD::UMAX
, DL
, VT
, Op
.getOperand(1),
131 case AMDGPUIntrinsic::AMDIL_min
:
132 return DAG
.getNode(AMDGPUISD::FMIN
, DL
, VT
, Op
.getOperand(1),
134 case AMDGPUIntrinsic::AMDGPU_imin
:
135 return DAG
.getNode(AMDGPUISD::SMIN
, DL
, VT
, Op
.getOperand(1),
137 case AMDGPUIntrinsic::AMDGPU_umin
:
138 return DAG
.getNode(AMDGPUISD::UMIN
, DL
, VT
, Op
.getOperand(1),
140 case AMDGPUIntrinsic::AMDIL_round_nearest
:
141 return DAG
.getNode(ISD::FRINT
, DL
, VT
, Op
.getOperand(1));
145 ///IABS(a) = SMAX(sub(0, a), a)
146 SDValue
AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op
,
147 SelectionDAG
&DAG
) const
150 DebugLoc DL
= Op
.getDebugLoc();
151 EVT VT
= Op
.getValueType();
152 SDValue Neg
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, VT
),
155 return DAG
.getNode(AMDGPUISD::SMAX
, DL
, VT
, Neg
, Op
.getOperand(1));
158 /// Linear Interpolation
159 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
160 SDValue
AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op
,
161 SelectionDAG
&DAG
) const
163 DebugLoc DL
= Op
.getDebugLoc();
164 EVT VT
= Op
.getValueType();
165 SDValue OneSubA
= DAG
.getNode(ISD::FSUB
, DL
, VT
,
166 DAG
.getConstantFP(1.0f
, MVT::f32
),
168 SDValue OneSubAC
= DAG
.getNode(ISD::FMUL
, DL
, VT
, OneSubA
,
170 return DAG
.getNode(AMDGPUISD::MAD
, DL
, VT
, Op
.getOperand(1),
177 SDValue
AMDGPUTargetLowering::LowerUDIVREM(SDValue Op
,
178 SelectionDAG
&DAG
) const
180 DebugLoc DL
= Op
.getDebugLoc();
181 EVT VT
= Op
.getValueType();
183 SDValue Num
= Op
.getOperand(0);
184 SDValue Den
= Op
.getOperand(1);
186 SmallVector
<SDValue
, 8> Results
;
188 // RCP = URECIP(Den) = 2^32 / Den + e
189 // e is rounding error.
190 SDValue RCP
= DAG
.getNode(AMDGPUISD::URECIP
, DL
, VT
, Den
);
192 // RCP_LO = umulo(RCP, Den) */
193 SDValue RCP_LO
= DAG
.getNode(ISD::UMULO
, DL
, VT
, RCP
, Den
);
195 // RCP_HI = mulhu (RCP, Den) */
196 SDValue RCP_HI
= DAG
.getNode(ISD::MULHU
, DL
, VT
, RCP
, Den
);
198 // NEG_RCP_LO = -RCP_LO
199 SDValue NEG_RCP_LO
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, VT
),
202 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
203 SDValue ABS_RCP_LO
= DAG
.getSelectCC(DL
, RCP_HI
, DAG
.getConstant(0, VT
),
206 // Calculate the rounding error from the URECIP instruction
207 // E = mulhu(ABS_RCP_LO, RCP)
208 SDValue E
= DAG
.getNode(ISD::MULHU
, DL
, VT
, ABS_RCP_LO
, RCP
);
211 SDValue RCP_A_E
= DAG
.getNode(ISD::ADD
, DL
, VT
, RCP
, E
);
214 SDValue RCP_S_E
= DAG
.getNode(ISD::SUB
, DL
, VT
, RCP
, E
);
216 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
217 SDValue Tmp0
= DAG
.getSelectCC(DL
, RCP_HI
, DAG
.getConstant(0, VT
),
220 // Quotient = mulhu(Tmp0, Num)
221 SDValue Quotient
= DAG
.getNode(ISD::MULHU
, DL
, VT
, Tmp0
, Num
);
223 // Num_S_Remainder = Quotient * Den
224 SDValue Num_S_Remainder
= DAG
.getNode(ISD::UMULO
, DL
, VT
, Quotient
, Den
);
226 // Remainder = Num - Num_S_Remainder
227 SDValue Remainder
= DAG
.getNode(ISD::SUB
, DL
, VT
, Num
, Num_S_Remainder
);
229 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
230 SDValue Remainder_GE_Den
= DAG
.getSelectCC(DL
, Remainder
, Den
,
231 DAG
.getConstant(-1, VT
),
232 DAG
.getConstant(0, VT
),
234 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
235 SDValue Remainder_GE_Zero
= DAG
.getSelectCC(DL
, Remainder
,
236 DAG
.getConstant(0, VT
),
237 DAG
.getConstant(-1, VT
),
238 DAG
.getConstant(0, VT
),
240 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
241 SDValue Tmp1
= DAG
.getNode(ISD::AND
, DL
, VT
, Remainder_GE_Den
,
244 // Calculate Division result:
246 // Quotient_A_One = Quotient + 1
247 SDValue Quotient_A_One
= DAG
.getNode(ISD::ADD
, DL
, VT
, Quotient
,
248 DAG
.getConstant(1, VT
));
250 // Quotient_S_One = Quotient - 1
251 SDValue Quotient_S_One
= DAG
.getNode(ISD::SUB
, DL
, VT
, Quotient
,
252 DAG
.getConstant(1, VT
));
254 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
255 SDValue Div
= DAG
.getSelectCC(DL
, Tmp1
, DAG
.getConstant(0, VT
),
256 Quotient
, Quotient_A_One
, ISD::SETEQ
);
258 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
259 Div
= DAG
.getSelectCC(DL
, Remainder_GE_Zero
, DAG
.getConstant(0, VT
),
260 Quotient_S_One
, Div
, ISD::SETEQ
);
262 // Calculate Rem result:
264 // Remainder_S_Den = Remainder - Den
265 SDValue Remainder_S_Den
= DAG
.getNode(ISD::SUB
, DL
, VT
, Remainder
, Den
);
267 // Remainder_A_Den = Remainder + Den
268 SDValue Remainder_A_Den
= DAG
.getNode(ISD::ADD
, DL
, VT
, Remainder
, Den
);
270 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
271 SDValue Rem
= DAG
.getSelectCC(DL
, Tmp1
, DAG
.getConstant(0, VT
),
272 Remainder
, Remainder_S_Den
, ISD::SETEQ
);
274 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
275 Rem
= DAG
.getSelectCC(DL
, Remainder_GE_Zero
, DAG
.getConstant(0, VT
),
276 Remainder_A_Den
, Rem
, ISD::SETEQ
);
278 DAG
.ReplaceAllUsesWith(Op
.getValue(0).getNode(), &Div
);
279 DAG
.ReplaceAllUsesWith(Op
.getValue(1).getNode(), &Rem
);
284 //===----------------------------------------------------------------------===//
286 //===----------------------------------------------------------------------===//
288 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op
) const
290 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
291 return CFP
->isExactlyValue(1.0);
293 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
294 return C
->isAllOnesValue();
299 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op
) const
301 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
302 return CFP
->getValueAPF().isZero();
304 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
305 return C
->isNullValue();
310 SDValue
AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG
&DAG
,
311 const TargetRegisterClass
*RC
,
312 unsigned Reg
, EVT VT
) const {
313 MachineFunction
&MF
= DAG
.getMachineFunction();
314 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
315 unsigned VirtualRegister
;
316 if (!MRI
.isLiveIn(Reg
)) {
317 VirtualRegister
= MRI
.createVirtualRegister(RC
);
318 MRI
.addLiveIn(Reg
, VirtualRegister
);
320 VirtualRegister
= MRI
.getLiveInVirtReg(Reg
);
322 return DAG
.getRegister(VirtualRegister
, VT
);
325 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
327 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode
) const
333 NODE_NAME_CASE(CALL
);
334 NODE_NAME_CASE(UMUL
);
335 NODE_NAME_CASE(DIV_INF
);
336 NODE_NAME_CASE(RET_FLAG
);
337 NODE_NAME_CASE(BRANCH_COND
);
340 NODE_NAME_CASE(FRACT
)
347 NODE_NAME_CASE(URECIP
)
348 NODE_NAME_CASE(INTERP
)
349 NODE_NAME_CASE(INTERP_P0
)