1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This is the parent TargetLowering class for hardware code gen targets.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUISelLowering.h"
15 #include "AMDILIntrinsicInfo.h"
16 #include "AMDGPUUtil.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine
&TM
) :
22 AMDILTargetLowering(TM
)
24 // We need to custom lower some of the intrinsics
25 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
27 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
28 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
30 // Library functions. These default to Expand, but we have instructions
32 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
33 setOperationAction(ISD::FEXP2
, MVT::f32
, Legal
);
34 setOperationAction(ISD::FRINT
, MVT::f32
, Legal
);
36 setOperationAction(ISD::LOAD
, MVT::f32
, Custom
);
37 setOperationAction(ISD::LOAD
, MVT::v4f32
, Custom
);
39 setOperationAction(ISD::UDIV
, MVT::i32
, Expand
);
40 setOperationAction(ISD::UDIVREM
, MVT::i32
, Custom
);
41 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
44 SDValue
AMDGPUTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
)
47 switch (Op
.getOpcode()) {
48 default: return AMDILTargetLowering::LowerOperation(Op
, DAG
);
49 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
50 case ISD::LOAD
: return BitcastLOAD(Op
, DAG
);
51 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
52 case ISD::UDIVREM
: return LowerUDIVREM(Op
, DAG
);
56 SDValue
AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
57 SelectionDAG
&DAG
) const
59 unsigned IntrinsicID
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
60 DebugLoc DL
= Op
.getDebugLoc();
61 EVT VT
= Op
.getValueType();
63 switch (IntrinsicID
) {
65 case AMDGPUIntrinsic::AMDIL_abs
:
66 return LowerIntrinsicIABS(Op
, DAG
);
67 case AMDGPUIntrinsic::AMDIL_exp
:
68 return DAG
.getNode(ISD::FEXP2
, DL
, VT
, Op
.getOperand(1));
69 case AMDGPUIntrinsic::AMDIL_fabs
:
70 return DAG
.getNode(ISD::FABS
, DL
, VT
, Op
.getOperand(1));
71 case AMDGPUIntrinsic::AMDGPU_lrp
:
72 return LowerIntrinsicLRP(Op
, DAG
);
73 case AMDGPUIntrinsic::AMDIL_fraction
:
74 return DAG
.getNode(AMDGPUISD::FRACT
, DL
, VT
, Op
.getOperand(1));
75 case AMDGPUIntrinsic::AMDIL_mad
:
76 return DAG
.getNode(AMDILISD::MAD
, DL
, VT
, Op
.getOperand(1),
77 Op
.getOperand(2), Op
.getOperand(3));
78 case AMDGPUIntrinsic::AMDIL_max
:
79 return DAG
.getNode(AMDGPUISD::FMAX
, DL
, VT
, Op
.getOperand(1),
81 case AMDGPUIntrinsic::AMDGPU_imax
:
82 return DAG
.getNode(AMDGPUISD::SMAX
, DL
, VT
, Op
.getOperand(1),
84 case AMDGPUIntrinsic::AMDGPU_umax
:
85 return DAG
.getNode(AMDGPUISD::UMAX
, DL
, VT
, Op
.getOperand(1),
87 case AMDGPUIntrinsic::AMDIL_min
:
88 return DAG
.getNode(AMDGPUISD::FMIN
, DL
, VT
, Op
.getOperand(1),
90 case AMDGPUIntrinsic::AMDGPU_imin
:
91 return DAG
.getNode(AMDGPUISD::SMIN
, DL
, VT
, Op
.getOperand(1),
93 case AMDGPUIntrinsic::AMDGPU_umin
:
94 return DAG
.getNode(AMDGPUISD::UMIN
, DL
, VT
, Op
.getOperand(1),
96 case AMDGPUIntrinsic::AMDIL_round_nearest
:
97 return DAG
.getNode(ISD::FRINT
, DL
, VT
, Op
.getOperand(1));
98 case AMDGPUIntrinsic::AMDIL_round_posinf
:
99 return DAG
.getNode(ISD::FCEIL
, DL
, VT
, Op
.getOperand(1));
103 ///IABS(a) = SMAX(sub(0, a), a)
104 SDValue
AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op
,
105 SelectionDAG
&DAG
) const
108 DebugLoc DL
= Op
.getDebugLoc();
109 EVT VT
= Op
.getValueType();
110 SDValue Neg
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, VT
),
113 return DAG
.getNode(AMDGPUISD::SMAX
, DL
, VT
, Neg
, Op
.getOperand(1));
116 /// Linear Interpolation
117 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
118 SDValue
AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op
,
119 SelectionDAG
&DAG
) const
121 DebugLoc DL
= Op
.getDebugLoc();
122 EVT VT
= Op
.getValueType();
123 SDValue OneSubA
= DAG
.getNode(ISD::FSUB
, DL
, VT
,
124 DAG
.getConstantFP(1.0f
, MVT::f32
),
126 SDValue OneSubAC
= DAG
.getNode(ISD::FMUL
, DL
, VT
, OneSubA
,
128 return DAG
.getNode(AMDILISD::MAD
, DL
, VT
, Op
.getOperand(1),
133 /// BitcastLoad - Convert floating point loads to integer loads of the same
134 /// type width and the bitcast the result back to a floating point type.
135 SDValue
AMDGPUTargetLowering::BitcastLOAD(SDValue Op
, SelectionDAG
&DAG
) const
137 DebugLoc DL
= Op
.getDebugLoc();
138 EVT VT
= Op
.getValueType();
141 if (VT
== MVT::f32
) {
143 } else if (VT
== MVT::v4f32
) {
148 LoadSDNode
* LD
= dyn_cast
<LoadSDNode
>(Op
);
151 SDValue NewLoad
= DAG
.getLoad (LD
->getAddressingMode(),
152 LD
->getExtensionType(), IntVT
, DL
,
153 LD
->getChain(), LD
->getBasePtr(),
154 LD
->getOffset(), IntVT
,
155 LD
->getMemOperand());
157 SDValue Bitcast
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, NewLoad
);
158 DAG
.ReplaceAllUsesWith(Op
.getValue(0).getNode(), &Bitcast
);
163 SDValue
AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op
,
164 SelectionDAG
&DAG
) const
166 DebugLoc DL
= Op
.getDebugLoc();
167 EVT VT
= Op
.getValueType();
169 SDValue LHS
= Op
.getOperand(0);
170 SDValue RHS
= Op
.getOperand(1);
171 SDValue True
= Op
.getOperand(2);
172 SDValue False
= Op
.getOperand(3);
173 SDValue CC
= Op
.getOperand(4);
174 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
177 // LHS and RHS are guaranteed to be the same value type
178 EVT CompareVT
= LHS
.getValueType();
180 // We need all the operands of SELECT_CC to have the same value type, so if
181 // necessary we need to convert LHS and RHS to be the same type True and
182 // False. True and False are guaranteed to have the same type as this
185 if (CompareVT
!= VT
) {
186 ISD::NodeType ConversionOp
= ISD::DELETED_NODE
;
187 if (VT
== MVT::f32
&& CompareVT
== MVT::i32
) {
188 if (isUnsignedIntSetCC(CCOpcode
)) {
189 ConversionOp
= ISD::UINT_TO_FP
;
191 ConversionOp
= ISD::SINT_TO_FP
;
193 } else if (VT
== MVT::i32
&& CompareVT
== MVT::f32
) {
194 ConversionOp
= ISD::FP_TO_SINT
;
196 // I don't think there will be any other type pairings.
197 assert(!"Unhandled operand type parings in SELECT_CC");
199 // XXX Check the value of LHS and RHS and avoid creating sequences like
201 LHS
= DAG
.getNode(ConversionOp
, DL
, VT
, LHS
);
202 RHS
= DAG
.getNode(ConversionOp
, DL
, VT
, RHS
);
205 // If True is a hardware TRUE value and False is a hardware FALSE value or
206 // vice-versa we can handle this with a native instruction (SET* instructions).
207 if ((isHWTrueValue(True
) && isHWFalseValue(False
))) {
208 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
211 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
212 // we can handle this with a native instruction, but we need to swap true
213 // and false and change the conditional.
214 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
217 // XXX Check if we can lower this to a SELECT or if it is supported by a native
218 // operation. (The code below does this but we don't have the Instruction
219 // selection patterns to do this yet.
221 if (isZero(LHS
) || isZero(RHS
)) {
222 SDValue Cond
= (isZero(LHS
) ? RHS
: LHS
);
233 // We can lower to select
240 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
242 // Supported by a native operation (CNDGE, CNDGT)
243 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
248 // If we make it this for it means we have no native instructions to handle
249 // this SELECT_CC, so we must lower it.
250 SDValue HWTrue
, HWFalse
;
252 if (VT
== MVT::f32
) {
253 HWTrue
= DAG
.getConstantFP(1.0f
, VT
);
254 HWFalse
= DAG
.getConstantFP(0.0f
, VT
);
255 } else if (VT
== MVT::i32
) {
256 HWTrue
= DAG
.getConstant(-1, VT
);
257 HWFalse
= DAG
.getConstant(0, VT
);
260 assert(!"Unhandled value type in LowerSELECT_CC");
263 // Lower this unsupported SELECT_CC into a combination of two supported
264 // SELECT_CC operations.
265 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
267 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
271 SDValue
AMDGPUTargetLowering::LowerUDIVREM(SDValue Op
,
272 SelectionDAG
&DAG
) const
274 DebugLoc DL
= Op
.getDebugLoc();
275 EVT VT
= Op
.getValueType();
277 SDValue Num
= Op
.getOperand(0);
278 SDValue Den
= Op
.getOperand(1);
280 SmallVector
<SDValue
, 8> Results
;
282 // RCP = URECIP(Den) = 2^32 / Den + e
283 // e is rounding error.
284 SDValue RCP
= DAG
.getNode(AMDGPUISD::URECIP
, DL
, VT
, Den
);
286 // RCP_LO = umulo(RCP, Den) */
287 SDValue RCP_LO
= DAG
.getNode(ISD::UMULO
, DL
, VT
, RCP
, Den
);
289 // RCP_HI = mulhu (RCP, Den) */
290 SDValue RCP_HI
= DAG
.getNode(ISD::MULHU
, DL
, VT
, RCP
, Den
);
292 // NEG_RCP_LO = -RCP_LO
293 SDValue NEG_RCP_LO
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, VT
),
296 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
297 SDValue ABS_RCP_LO
= DAG
.getSelectCC(DL
, RCP_HI
, DAG
.getConstant(0, VT
),
300 // Calculate the rounding error from the URECIP instruction
301 // E = mulhu(ABS_RCP_LO, RCP)
302 SDValue E
= DAG
.getNode(ISD::MULHU
, DL
, VT
, ABS_RCP_LO
, RCP
);
305 SDValue RCP_A_E
= DAG
.getNode(ISD::ADD
, DL
, VT
, RCP
, E
);
308 SDValue RCP_S_E
= DAG
.getNode(ISD::SUB
, DL
, VT
, RCP
, E
);
310 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
311 SDValue Tmp0
= DAG
.getSelectCC(DL
, RCP_HI
, DAG
.getConstant(0, VT
),
314 // Quotient = mulhu(Tmp0, Num)
315 SDValue Quotient
= DAG
.getNode(ISD::MULHU
, DL
, VT
, Tmp0
, Num
);
317 // Num_S_Remainder = Quotient * Den
318 SDValue Num_S_Remainder
= DAG
.getNode(ISD::UMULO
, DL
, VT
, Quotient
, Den
);
320 // Remainder = Num - Num_S_Remainder
321 SDValue Remainder
= DAG
.getNode(ISD::SUB
, DL
, VT
, Num
, Num_S_Remainder
);
323 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
324 SDValue Remainder_GE_Den
= DAG
.getSelectCC(DL
, Remainder
, Den
,
325 DAG
.getConstant(-1, VT
),
326 DAG
.getConstant(0, VT
),
328 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
329 SDValue Remainder_GE_Zero
= DAG
.getSelectCC(DL
, Remainder
,
330 DAG
.getConstant(0, VT
),
331 DAG
.getConstant(-1, VT
),
332 DAG
.getConstant(0, VT
),
334 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
335 SDValue Tmp1
= DAG
.getNode(ISD::AND
, DL
, VT
, Remainder_GE_Den
,
338 // Calculate Division result:
340 // Quotient_A_One = Quotient + 1
341 SDValue Quotient_A_One
= DAG
.getNode(ISD::ADD
, DL
, VT
, Quotient
,
342 DAG
.getConstant(1, VT
));
344 // Quotient_S_One = Quotient - 1
345 SDValue Quotient_S_One
= DAG
.getNode(ISD::SUB
, DL
, VT
, Quotient
,
346 DAG
.getConstant(1, VT
));
348 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
349 SDValue Div
= DAG
.getSelectCC(DL
, Tmp1
, DAG
.getConstant(0, VT
),
350 Quotient
, Quotient_A_One
, ISD::SETEQ
);
352 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
353 Div
= DAG
.getSelectCC(DL
, Remainder_GE_Zero
, DAG
.getConstant(0, VT
),
354 Quotient_S_One
, Div
, ISD::SETEQ
);
356 // Calculate Rem result:
358 // Remainder_S_Den = Remainder - Den
359 SDValue Remainder_S_Den
= DAG
.getNode(ISD::SUB
, DL
, VT
, Remainder
, Den
);
361 // Remainder_A_Den = Remainder + Den
362 SDValue Remainder_A_Den
= DAG
.getNode(ISD::ADD
, DL
, VT
, Remainder
, Den
);
364 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
365 SDValue Rem
= DAG
.getSelectCC(DL
, Tmp1
, DAG
.getConstant(0, VT
),
366 Remainder
, Remainder_S_Den
, ISD::SETEQ
);
368 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
369 Rem
= DAG
.getSelectCC(DL
, Remainder_GE_Zero
, DAG
.getConstant(0, VT
),
370 Remainder_A_Den
, Rem
, ISD::SETEQ
);
372 DAG
.ReplaceAllUsesWith(Op
.getValue(0).getNode(), &Div
);
373 DAG
.ReplaceAllUsesWith(Op
.getValue(1).getNode(), &Rem
);
378 //===----------------------------------------------------------------------===//
380 //===----------------------------------------------------------------------===//
382 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op
) const
384 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
385 return CFP
->isExactlyValue(1.0);
387 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
388 return C
->isAllOnesValue();
393 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op
) const
395 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
396 return CFP
->getValueAPF().isZero();
398 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
399 return C
->isNullValue();
404 void AMDGPUTargetLowering::addLiveIn(MachineInstr
* MI
,
405 MachineFunction
* MF
, MachineRegisterInfo
& MRI
,
406 const TargetInstrInfo
* TII
, unsigned reg
) const
408 AMDGPU::utilAddLiveIn(MF
, MRI
, TII
, reg
, MI
->getOperand(0).getReg());
411 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
413 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode
) const
416 default: return AMDILTargetLowering::getTargetNodeName(Opcode
);
418 NODE_NAME_CASE(FRACT
)
425 NODE_NAME_CASE(URECIP
)