1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This is the parent TargetLowering class for hardware code gen targets.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUISelLowering.h"
15 #include "AMDILIntrinsicInfo.h"
16 #include "AMDGPUUtil.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine
&TM
) :
22 AMDILTargetLowering(TM
)
24 // We need to custom lower some of the intrinsics
25 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
27 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
28 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
30 // Library functions. These default to Expand, but we have instructions
32 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
33 setOperationAction(ISD::FEXP2
, MVT::f32
, Legal
);
34 setOperationAction(ISD::FRINT
, MVT::f32
, Legal
);
36 setOperationAction(ISD::LOAD
, MVT::f32
, Custom
);
38 setOperationAction(ISD::UDIV
, MVT::i32
, Expand
);
39 setOperationAction(ISD::UDIVREM
, MVT::i32
, Custom
);
40 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
43 SDValue
AMDGPUTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
)
46 switch (Op
.getOpcode()) {
47 default: return AMDILTargetLowering::LowerOperation(Op
, DAG
);
48 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
49 case ISD::LOAD
: return BitcastLOAD(Op
, DAG
);
50 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
51 case ISD::UDIVREM
: return LowerUDIVREM(Op
, DAG
);
55 SDValue
AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
56 SelectionDAG
&DAG
) const
58 unsigned IntrinsicID
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
59 DebugLoc DL
= Op
.getDebugLoc();
60 EVT VT
= Op
.getValueType();
62 switch (IntrinsicID
) {
64 case AMDGPUIntrinsic::AMDIL_abs
:
65 return LowerIntrinsicIABS(Op
, DAG
);
66 case AMDGPUIntrinsic::AMDIL_exp
:
67 return DAG
.getNode(ISD::FEXP2
, DL
, VT
, Op
.getOperand(1));
68 case AMDGPUIntrinsic::AMDIL_fabs
:
69 return DAG
.getNode(ISD::FABS
, DL
, VT
, Op
.getOperand(1));
70 case AMDGPUIntrinsic::AMDGPU_lrp
:
71 return LowerIntrinsicLRP(Op
, DAG
);
72 case AMDGPUIntrinsic::AMDIL_fraction
:
73 return DAG
.getNode(AMDGPUISD::FRACT
, DL
, VT
, Op
.getOperand(1));
74 case AMDGPUIntrinsic::AMDIL_mad
:
75 return DAG
.getNode(AMDILISD::MAD
, DL
, VT
, Op
.getOperand(1),
76 Op
.getOperand(2), Op
.getOperand(3));
77 case AMDGPUIntrinsic::AMDIL_max
:
78 return DAG
.getNode(AMDGPUISD::FMAX
, DL
, VT
, Op
.getOperand(1),
80 case AMDGPUIntrinsic::AMDGPU_imax
:
81 return DAG
.getNode(AMDGPUISD::SMAX
, DL
, VT
, Op
.getOperand(1),
83 case AMDGPUIntrinsic::AMDGPU_umax
:
84 return DAG
.getNode(AMDGPUISD::UMAX
, DL
, VT
, Op
.getOperand(1),
86 case AMDGPUIntrinsic::AMDIL_min
:
87 return DAG
.getNode(AMDGPUISD::FMIN
, DL
, VT
, Op
.getOperand(1),
89 case AMDGPUIntrinsic::AMDGPU_imin
:
90 return DAG
.getNode(AMDGPUISD::SMIN
, DL
, VT
, Op
.getOperand(1),
92 case AMDGPUIntrinsic::AMDGPU_umin
:
93 return DAG
.getNode(AMDGPUISD::UMIN
, DL
, VT
, Op
.getOperand(1),
95 case AMDGPUIntrinsic::AMDIL_round_nearest
:
96 return DAG
.getNode(ISD::FRINT
, DL
, VT
, Op
.getOperand(1));
97 case AMDGPUIntrinsic::AMDIL_round_posinf
:
98 return DAG
.getNode(ISD::FCEIL
, DL
, VT
, Op
.getOperand(1));
102 ///IABS(a) = SMAX(sub(0, a), a)
103 SDValue
AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op
,
104 SelectionDAG
&DAG
) const
107 DebugLoc DL
= Op
.getDebugLoc();
108 EVT VT
= Op
.getValueType();
109 SDValue Neg
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, VT
),
112 return DAG
.getNode(AMDGPUISD::SMAX
, DL
, VT
, Neg
, Op
.getOperand(1));
115 /// Linear Interpolation
116 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
117 SDValue
AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op
,
118 SelectionDAG
&DAG
) const
120 DebugLoc DL
= Op
.getDebugLoc();
121 EVT VT
= Op
.getValueType();
122 SDValue OneSubA
= DAG
.getNode(ISD::FSUB
, DL
, VT
,
123 DAG
.getConstantFP(1.0f
, MVT::f32
),
125 SDValue OneSubAC
= DAG
.getNode(ISD::FMUL
, DL
, VT
, OneSubA
,
127 return DAG
.getNode(AMDILISD::MAD
, DL
, VT
, Op
.getOperand(1),
132 /// BitcastLoad - Convert floating point loads to integer loads of the same
133 /// type width and the bitcast the result back to a floating point type.
134 SDValue
AMDGPUTargetLowering::BitcastLOAD(SDValue Op
, SelectionDAG
&DAG
) const
136 DebugLoc DL
= Op
.getDebugLoc();
137 EVT VT
= Op
.getValueType();
140 if (VT
== MVT::f32
) {
145 LoadSDNode
* LD
= dyn_cast
<LoadSDNode
>(Op
);
148 SDValue NewLoad
= DAG
.getLoad (LD
->getAddressingMode(),
149 LD
->getExtensionType(), IntVT
, DL
,
150 LD
->getChain(), LD
->getBasePtr(),
151 LD
->getOffset(), IntVT
,
152 LD
->getMemOperand());
154 SDValue Bitcast
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, NewLoad
);
155 DAG
.ReplaceAllUsesWith(Op
.getValue(0).getNode(), &Bitcast
);
160 SDValue
AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op
,
161 SelectionDAG
&DAG
) const
163 DebugLoc DL
= Op
.getDebugLoc();
164 EVT VT
= Op
.getValueType();
166 SDValue LHS
= Op
.getOperand(0);
167 SDValue RHS
= Op
.getOperand(1);
168 SDValue True
= Op
.getOperand(2);
169 SDValue False
= Op
.getOperand(3);
170 SDValue CC
= Op
.getOperand(4);
171 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
174 // LHS and RHS are guaranteed to be the same value type
175 EVT CompareVT
= LHS
.getValueType();
177 // We need all the operands of SELECT_CC to have the same value type, so if
178 // necessary we need to convert LHS and RHS to be the same type True and
179 // False. True and False are guaranteed to have the same type as this
182 if (CompareVT
!= VT
) {
183 ISD::NodeType ConversionOp
= ISD::DELETED_NODE
;
184 if (VT
== MVT::f32
&& CompareVT
== MVT::i32
) {
185 if (isUnsignedIntSetCC(CCOpcode
)) {
186 ConversionOp
= ISD::UINT_TO_FP
;
188 ConversionOp
= ISD::SINT_TO_FP
;
190 } else if (VT
== MVT::i32
&& CompareVT
== MVT::f32
) {
191 ConversionOp
= ISD::FP_TO_SINT
;
193 // I don't think there will be any other type pairings.
194 assert(!"Unhandled operand type parings in SELECT_CC");
196 // XXX Check the value of LHS and RHS and avoid creating sequences like
198 LHS
= DAG
.getNode(ConversionOp
, DL
, VT
, LHS
);
199 RHS
= DAG
.getNode(ConversionOp
, DL
, VT
, RHS
);
202 // If True is a hardware TRUE value and False is a hardware FALSE value or
203 // vice-versa we can handle this with a native instruction (SET* instructions).
204 if ((isHWTrueValue(True
) && isHWFalseValue(False
))) {
205 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
208 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
209 // we can handle this with a native instruction, but we need to swap true
210 // and false and change the conditional.
211 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
214 // XXX Check if we can lower this to a SELECT or if it is supported by a native
215 // operation. (The code below does this but we don't have the Instruction
216 // selection patterns to do this yet.
218 if (isZero(LHS
) || isZero(RHS
)) {
219 SDValue Cond
= (isZero(LHS
) ? RHS
: LHS
);
230 // We can lower to select
237 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
239 // Supported by a native operation (CNDGE, CNDGT)
240 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
245 // If we make it this for it means we have no native instructions to handle
246 // this SELECT_CC, so we must lower it.
247 SDValue HWTrue
, HWFalse
;
249 if (VT
== MVT::f32
) {
250 HWTrue
= DAG
.getConstantFP(1.0f
, VT
);
251 HWFalse
= DAG
.getConstantFP(0.0f
, VT
);
252 } else if (VT
== MVT::i32
) {
253 HWTrue
= DAG
.getConstant(-1, VT
);
254 HWFalse
= DAG
.getConstant(0, VT
);
257 assert(!"Unhandled value type in LowerSELECT_CC");
260 // Lower this unsupported SELECT_CC into a combination of two supported
261 // SELECT_CC operations.
262 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
264 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
268 SDValue
AMDGPUTargetLowering::LowerUDIVREM(SDValue Op
,
269 SelectionDAG
&DAG
) const
271 DebugLoc DL
= Op
.getDebugLoc();
272 EVT VT
= Op
.getValueType();
274 SDValue Num
= Op
.getOperand(0);
275 SDValue Den
= Op
.getOperand(1);
277 SmallVector
<SDValue
, 8> Results
;
279 // RCP = URECIP(Den) = 2^32 / Den + e
280 // e is rounding error.
281 SDValue RCP
= DAG
.getNode(AMDGPUISD::URECIP
, DL
, VT
, Den
);
283 // RCP_LO = umulo(RCP, Den) */
284 SDValue RCP_LO
= DAG
.getNode(ISD::UMULO
, DL
, VT
, RCP
, Den
);
286 // RCP_HI = mulhu (RCP, Den) */
287 SDValue RCP_HI
= DAG
.getNode(ISD::MULHU
, DL
, VT
, RCP
, Den
);
289 // NEG_RCP_LO = -RCP_LO
290 SDValue NEG_RCP_LO
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, VT
),
293 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
294 SDValue ABS_RCP_LO
= DAG
.getSelectCC(DL
, RCP_HI
, DAG
.getConstant(0, VT
),
297 // Calculate the rounding error from the URECIP instruction
298 // E = mulhu(ABS_RCP_LO, RCP)
299 SDValue E
= DAG
.getNode(ISD::MULHU
, DL
, VT
, ABS_RCP_LO
, RCP
);
302 SDValue RCP_A_E
= DAG
.getNode(ISD::ADD
, DL
, VT
, RCP
, E
);
305 SDValue RCP_S_E
= DAG
.getNode(ISD::SUB
, DL
, VT
, RCP
, E
);
307 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
308 SDValue Tmp0
= DAG
.getSelectCC(DL
, RCP_HI
, DAG
.getConstant(0, VT
),
311 // Quotient = mulhu(Tmp0, Num)
312 SDValue Quotient
= DAG
.getNode(ISD::MULHU
, DL
, VT
, Tmp0
, Num
);
314 // Num_S_Remainder = Quotient * Den
315 SDValue Num_S_Remainder
= DAG
.getNode(ISD::UMULO
, DL
, VT
, Quotient
, Den
);
317 // Remainder = Num - Num_S_Remainder
318 SDValue Remainder
= DAG
.getNode(ISD::SUB
, DL
, VT
, Num
, Num_S_Remainder
);
320 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
321 SDValue Remainder_GE_Den
= DAG
.getSelectCC(DL
, Remainder
, Den
,
322 DAG
.getConstant(-1, VT
),
323 DAG
.getConstant(0, VT
),
325 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
326 SDValue Remainder_GE_Zero
= DAG
.getSelectCC(DL
, Remainder
,
327 DAG
.getConstant(0, VT
),
328 DAG
.getConstant(-1, VT
),
329 DAG
.getConstant(0, VT
),
331 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
332 SDValue Tmp1
= DAG
.getNode(ISD::AND
, DL
, VT
, Remainder_GE_Den
,
335 // Calculate Division result:
337 // Quotient_A_One = Quotient + 1
338 SDValue Quotient_A_One
= DAG
.getNode(ISD::ADD
, DL
, VT
, Quotient
,
339 DAG
.getConstant(1, VT
));
341 // Quotient_S_One = Quotient - 1
342 SDValue Quotient_S_One
= DAG
.getNode(ISD::SUB
, DL
, VT
, Quotient
,
343 DAG
.getConstant(1, VT
));
345 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
346 SDValue Div
= DAG
.getSelectCC(DL
, Tmp1
, DAG
.getConstant(0, VT
),
347 Quotient
, Quotient_A_One
, ISD::SETEQ
);
349 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
350 Div
= DAG
.getSelectCC(DL
, Remainder_GE_Zero
, DAG
.getConstant(0, VT
),
351 Quotient_S_One
, Div
, ISD::SETEQ
);
353 // Calculate Rem result:
355 // Remainder_S_Den = Remainder - Den
356 SDValue Remainder_S_Den
= DAG
.getNode(ISD::SUB
, DL
, VT
, Remainder
, Den
);
358 // Remainder_A_Den = Remainder + Den
359 SDValue Remainder_A_Den
= DAG
.getNode(ISD::ADD
, DL
, VT
, Remainder
, Den
);
361 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
362 SDValue Rem
= DAG
.getSelectCC(DL
, Tmp1
, DAG
.getConstant(0, VT
),
363 Remainder
, Remainder_S_Den
, ISD::SETEQ
);
365 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
366 Rem
= DAG
.getSelectCC(DL
, Remainder_GE_Zero
, DAG
.getConstant(0, VT
),
367 Remainder_A_Den
, Rem
, ISD::SETEQ
);
369 DAG
.ReplaceAllUsesWith(Op
.getValue(0).getNode(), &Div
);
370 DAG
.ReplaceAllUsesWith(Op
.getValue(1).getNode(), &Rem
);
375 //===----------------------------------------------------------------------===//
377 //===----------------------------------------------------------------------===//
379 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op
) const
381 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
382 return CFP
->isExactlyValue(1.0);
384 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
385 return C
->isAllOnesValue();
390 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op
) const
392 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
393 return CFP
->getValueAPF().isZero();
395 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
396 return C
->isNullValue();
401 void AMDGPUTargetLowering::addLiveIn(MachineInstr
* MI
,
402 MachineFunction
* MF
, MachineRegisterInfo
& MRI
,
403 const TargetInstrInfo
* TII
, unsigned reg
) const
405 AMDGPU::utilAddLiveIn(MF
, MRI
, TII
, reg
, MI
->getOperand(0).getReg());
408 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
410 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode
) const
413 default: return AMDILTargetLowering::getTargetNodeName(Opcode
);
415 NODE_NAME_CASE(FRACT
)
422 NODE_NAME_CASE(URECIP
)