1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // This file contains TargetLowering functions borrowed from AMDLI.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUISelLowering.h"
15 #include "AMDGPURegisterInfo.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILSubtarget.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27 #include "llvm/DerivedTypes.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetOptions.h"
35 //===----------------------------------------------------------------------===//
36 // Calling Convention Implementation
37 //===----------------------------------------------------------------------===//
38 #include "AMDGPUGenCallingConv.inc"
40 //===----------------------------------------------------------------------===//
41 // TargetLowering Implementation Help Functions Begin
42 //===----------------------------------------------------------------------===//
46 getConversionNode(SelectionDAG
&DAG
, SDValue
& Src
, SDValue
& Dst
, bool asType
)
48 DebugLoc DL
= Src
.getDebugLoc();
49 EVT svt
= Src
.getValueType().getScalarType();
50 EVT dvt
= Dst
.getValueType().getScalarType();
51 if (svt
.isFloatingPoint() && dvt
.isFloatingPoint()) {
52 if (dvt
.bitsGT(svt
)) {
53 Src
= DAG
.getNode(ISD::FP_EXTEND
, DL
, dvt
, Src
);
54 } else if (svt
.bitsLT(svt
)) {
55 Src
= DAG
.getNode(ISD::FP_ROUND
, DL
, dvt
, Src
,
56 DAG
.getConstant(1, MVT::i32
));
58 } else if (svt
.isInteger() && dvt
.isInteger()) {
59 if (!svt
.bitsEq(dvt
)) {
60 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
62 } else if (svt
.isInteger()) {
63 unsigned opcode
= (asType
) ? ISD::BITCAST
: ISD::SINT_TO_FP
;
64 if (!svt
.bitsEq(dvt
)) {
65 if (dvt
.getSimpleVT().SimpleTy
== MVT::f32
) {
66 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i32
);
67 } else if (dvt
.getSimpleVT().SimpleTy
== MVT::f64
) {
68 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i64
);
70 assert(0 && "We only support 32 and 64bit fp types");
73 Src
= DAG
.getNode(opcode
, DL
, dvt
, Src
);
74 } else if (dvt
.isInteger()) {
75 unsigned opcode
= (asType
) ? ISD::BITCAST
: ISD::FP_TO_SINT
;
76 if (svt
.getSimpleVT().SimpleTy
== MVT::f32
) {
77 Src
= DAG
.getNode(opcode
, DL
, MVT::i32
, Src
);
78 } else if (svt
.getSimpleVT().SimpleTy
== MVT::f64
) {
79 Src
= DAG
.getNode(opcode
, DL
, MVT::i64
, Src
);
81 assert(0 && "We only support 32 and 64bit fp types");
83 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
88 } // End namespace AMDPGU
89 } // End namespace llvm
91 //===----------------------------------------------------------------------===//
92 // TargetLowering Implementation Help Functions End
93 //===----------------------------------------------------------------------===//
95 //===----------------------------------------------------------------------===//
96 // TargetLowering Class Implementation Begins
97 //===----------------------------------------------------------------------===//
98 void AMDGPUTargetLowering::InitAMDILLowering()
147 size_t numTypes
= sizeof(types
) / sizeof(*types
);
148 size_t numFloatTypes
= sizeof(FloatTypes
) / sizeof(*FloatTypes
);
149 size_t numIntTypes
= sizeof(IntTypes
) / sizeof(*IntTypes
);
150 size_t numVectorTypes
= sizeof(VectorTypes
) / sizeof(*VectorTypes
);
152 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
153 // These are the current register classes that are
156 for (unsigned int x
= 0; x
< numTypes
; ++x
) {
157 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)types
[x
];
159 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
160 // We cannot sextinreg, expand to shifts
161 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Custom
);
162 setOperationAction(ISD::SUBE
, VT
, Expand
);
163 setOperationAction(ISD::SUBC
, VT
, Expand
);
164 setOperationAction(ISD::ADDE
, VT
, Expand
);
165 setOperationAction(ISD::ADDC
, VT
, Expand
);
166 setOperationAction(ISD::BRCOND
, VT
, Custom
);
167 setOperationAction(ISD::BR_JT
, VT
, Expand
);
168 setOperationAction(ISD::BRIND
, VT
, Expand
);
169 // TODO: Implement custom UREM/SREM routines
170 setOperationAction(ISD::SREM
, VT
, Expand
);
171 setOperationAction(ISD::SELECT
, VT
, Custom
);
172 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
173 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
174 if (VT
!= MVT::i64
&& VT
!= MVT::v2i64
) {
175 setOperationAction(ISD::SDIV
, VT
, Custom
);
178 for (unsigned int x
= 0; x
< numFloatTypes
; ++x
) {
179 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)FloatTypes
[x
];
181 // IL does not have these operations for floating point types
182 setOperationAction(ISD::FP_ROUND_INREG
, VT
, Expand
);
183 setOperationAction(ISD::SETOLT
, VT
, Expand
);
184 setOperationAction(ISD::SETOGE
, VT
, Expand
);
185 setOperationAction(ISD::SETOGT
, VT
, Expand
);
186 setOperationAction(ISD::SETOLE
, VT
, Expand
);
187 setOperationAction(ISD::SETULT
, VT
, Expand
);
188 setOperationAction(ISD::SETUGE
, VT
, Expand
);
189 setOperationAction(ISD::SETUGT
, VT
, Expand
);
190 setOperationAction(ISD::SETULE
, VT
, Expand
);
193 for (unsigned int x
= 0; x
< numIntTypes
; ++x
) {
194 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)IntTypes
[x
];
196 // GPU also does not have divrem function for signed or unsigned
197 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
199 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
200 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
201 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
203 // GPU doesn't have a rotl, rotr, or byteswap instruction
204 setOperationAction(ISD::ROTR
, VT
, Expand
);
205 setOperationAction(ISD::BSWAP
, VT
, Expand
);
207 // GPU doesn't have any counting operators
208 setOperationAction(ISD::CTPOP
, VT
, Expand
);
209 setOperationAction(ISD::CTTZ
, VT
, Expand
);
210 setOperationAction(ISD::CTLZ
, VT
, Expand
);
213 for ( unsigned int ii
= 0; ii
< numVectorTypes
; ++ii
)
215 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)VectorTypes
[ii
];
217 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
218 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Expand
);
219 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
220 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
221 // setOperationAction(ISD::VSETCC, VT, Expand);
222 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
223 setOperationAction(ISD::SELECT
, VT
, Expand
);
226 if (STM
.device()->isSupported(AMDILDeviceInfo::LongOps
)) {
227 setOperationAction(ISD::MULHU
, MVT::i64
, Expand
);
228 setOperationAction(ISD::MULHU
, MVT::v2i64
, Expand
);
229 setOperationAction(ISD::MULHS
, MVT::i64
, Expand
);
230 setOperationAction(ISD::MULHS
, MVT::v2i64
, Expand
);
231 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
232 setOperationAction(ISD::SREM
, MVT::v2i64
, Expand
);
233 setOperationAction(ISD::Constant
, MVT::i64
, Legal
);
234 setOperationAction(ISD::SDIV
, MVT::v2i64
, Expand
);
235 setOperationAction(ISD::TRUNCATE
, MVT::v2i64
, Expand
);
236 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2i64
, Expand
);
237 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2i64
, Expand
);
238 setOperationAction(ISD::ANY_EXTEND
, MVT::v2i64
, Expand
);
240 if (STM
.device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
241 // we support loading/storing v2f64 but not operations on the type
242 setOperationAction(ISD::FADD
, MVT::v2f64
, Expand
);
243 setOperationAction(ISD::FSUB
, MVT::v2f64
, Expand
);
244 setOperationAction(ISD::FMUL
, MVT::v2f64
, Expand
);
245 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v2f64
, Expand
);
246 setOperationAction(ISD::FP_EXTEND
, MVT::v2f64
, Expand
);
247 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
248 // We want to expand vector conversions into their scalar
250 setOperationAction(ISD::TRUNCATE
, MVT::v2f64
, Expand
);
251 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2f64
, Expand
);
252 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2f64
, Expand
);
253 setOperationAction(ISD::ANY_EXTEND
, MVT::v2f64
, Expand
);
254 setOperationAction(ISD::FABS
, MVT::f64
, Expand
);
255 setOperationAction(ISD::FABS
, MVT::v2f64
, Expand
);
257 // TODO: Fix the UDIV24 algorithm so it works for these
258 // types correctly. This needs vector comparisons
259 // for this to work correctly.
260 setOperationAction(ISD::UDIV
, MVT::v2i8
, Expand
);
261 setOperationAction(ISD::UDIV
, MVT::v4i8
, Expand
);
262 setOperationAction(ISD::UDIV
, MVT::v2i16
, Expand
);
263 setOperationAction(ISD::UDIV
, MVT::v4i16
, Expand
);
264 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Custom
);
265 setOperationAction(ISD::SUBC
, MVT::Other
, Expand
);
266 setOperationAction(ISD::ADDE
, MVT::Other
, Expand
);
267 setOperationAction(ISD::ADDC
, MVT::Other
, Expand
);
268 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
269 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
270 setOperationAction(ISD::BRIND
, MVT::Other
, Expand
);
271 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
273 setOperationAction(ISD::BUILD_VECTOR
, MVT::Other
, Custom
);
275 // Use the default implementation.
276 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
277 setOperationAction(ISD::Constant
, MVT::i32
, Legal
);
279 setSchedulingPreference(Sched::RegPressure
);
280 setPow2DivIsCheap(false);
281 setPrefLoopAlignment(16);
282 setSelectIsExpensive(true);
283 setJumpIsExpensive(true);
285 maxStoresPerMemcpy
= 4096;
286 maxStoresPerMemmove
= 4096;
287 maxStoresPerMemset
= 4096;
291 #undef numVectorTypes
296 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
297 const CallInst
&I
, unsigned Intrinsic
) const
301 // The backend supports 32 and 64 bit floating point immediates
303 AMDGPUTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const
305 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
306 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
314 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT
) const
316 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
317 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
325 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
326 // be zero. Op is expected to be a target specific node. Used by DAG
330 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
334 const SelectionDAG
&DAG
,
335 unsigned Depth
) const
339 KnownZero
= KnownOne
= APInt(KnownOne
.getBitWidth(), 0); // Don't know anything
340 switch (Op
.getOpcode()) {
343 DAG
.ComputeMaskedBits(
349 DAG
.ComputeMaskedBits(
354 assert((KnownZero
& KnownOne
) == 0
355 && "Bits known to be one AND zero?");
356 assert((KnownZero2
& KnownOne2
) == 0
357 && "Bits known to be one AND zero?");
358 // Only known if known in both the LHS and RHS
359 KnownOne
&= KnownOne2
;
360 KnownZero
&= KnownZero2
;
365 //===----------------------------------------------------------------------===//
366 // Other Lowering Hooks
367 //===----------------------------------------------------------------------===//
370 AMDGPUTargetLowering::LowerSDIV(SDValue Op
, SelectionDAG
&DAG
) const
372 EVT OVT
= Op
.getValueType();
374 if (OVT
.getScalarType() == MVT::i64
) {
375 DST
= LowerSDIV64(Op
, DAG
);
376 } else if (OVT
.getScalarType() == MVT::i32
) {
377 DST
= LowerSDIV32(Op
, DAG
);
378 } else if (OVT
.getScalarType() == MVT::i16
379 || OVT
.getScalarType() == MVT::i8
) {
380 DST
= LowerSDIV24(Op
, DAG
);
382 DST
= SDValue(Op
.getNode(), 0);
388 AMDGPUTargetLowering::LowerSREM(SDValue Op
, SelectionDAG
&DAG
) const
390 EVT OVT
= Op
.getValueType();
392 if (OVT
.getScalarType() == MVT::i64
) {
393 DST
= LowerSREM64(Op
, DAG
);
394 } else if (OVT
.getScalarType() == MVT::i32
) {
395 DST
= LowerSREM32(Op
, DAG
);
396 } else if (OVT
.getScalarType() == MVT::i16
) {
397 DST
= LowerSREM16(Op
, DAG
);
398 } else if (OVT
.getScalarType() == MVT::i8
) {
399 DST
= LowerSREM8(Op
, DAG
);
401 DST
= SDValue(Op
.getNode(), 0);
407 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op
, SelectionDAG
&DAG
) const
409 EVT VT
= Op
.getValueType();
414 DebugLoc DL
= Op
.getDebugLoc();
415 Nodes1
= DAG
.getNode(AMDGPUISD::VBUILD
,
417 VT
, Op
.getOperand(0));
419 bool allEqual
= true;
420 for (unsigned x
= 1, y
= Op
.getNumOperands(); x
< y
; ++x
) {
421 if (Op
.getOperand(0) != Op
.getOperand(x
)) {
430 switch(Op
.getNumOperands()) {
435 fourth
= Op
.getOperand(3);
436 if (fourth
.getOpcode() != ISD::UNDEF
) {
437 Nodes1
= DAG
.getNode(
438 ISD::INSERT_VECTOR_ELT
,
443 DAG
.getConstant(7, MVT::i32
));
446 third
= Op
.getOperand(2);
447 if (third
.getOpcode() != ISD::UNDEF
) {
448 Nodes1
= DAG
.getNode(
449 ISD::INSERT_VECTOR_ELT
,
454 DAG
.getConstant(6, MVT::i32
));
457 second
= Op
.getOperand(1);
458 if (second
.getOpcode() != ISD::UNDEF
) {
459 Nodes1
= DAG
.getNode(
460 ISD::INSERT_VECTOR_ELT
,
465 DAG
.getConstant(5, MVT::i32
));
473 AMDGPUTargetLowering::LowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const
475 SDValue Cond
= Op
.getOperand(0);
476 SDValue LHS
= Op
.getOperand(1);
477 SDValue RHS
= Op
.getOperand(2);
478 DebugLoc DL
= Op
.getDebugLoc();
479 Cond
= AMDGPU::getConversionNode(DAG
, Cond
, Op
, true);
480 Cond
= DAG
.getNode(AMDGPUISD::CMOVLOG
,
482 Op
.getValueType(), Cond
, LHS
, RHS
);
487 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
, SelectionDAG
&DAG
) const
489 SDValue Data
= Op
.getOperand(0);
490 VTSDNode
*BaseType
= cast
<VTSDNode
>(Op
.getOperand(1));
491 DebugLoc DL
= Op
.getDebugLoc();
492 EVT DVT
= Data
.getValueType();
493 EVT BVT
= BaseType
->getVT();
494 unsigned baseBits
= BVT
.getScalarType().getSizeInBits();
495 unsigned srcBits
= DVT
.isSimple() ? DVT
.getScalarType().getSizeInBits() : 1;
496 unsigned shiftBits
= srcBits
- baseBits
;
498 // If the op is less than 32 bits, then it needs to extend to 32bits
499 // so it can properly keep the upper bits valid.
500 EVT IVT
= genIntType(32, DVT
.isVector() ? DVT
.getVectorNumElements() : 1);
501 Data
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, IVT
, Data
);
502 shiftBits
= 32 - baseBits
;
505 SDValue Shift
= DAG
.getConstant(shiftBits
, DVT
);
506 // Shift left by 'Shift' bits.
507 Data
= DAG
.getNode(ISD::SHL
, DL
, DVT
, Data
, Shift
);
508 // Signed shift Right by 'Shift' bits.
509 Data
= DAG
.getNode(ISD::SRA
, DL
, DVT
, Data
, Shift
);
511 // Once the sign extension is done, the op needs to be converted to
512 // its original type.
513 Data
= DAG
.getSExtOrTrunc(Data
, DL
, Op
.getOperand(0).getValueType());
518 AMDGPUTargetLowering::genIntType(uint32_t size
, uint32_t numEle
) const
520 int iSize
= (size
* numEle
);
521 int vEle
= (iSize
>> ((size
== 64) ? 6 : 5));
527 return EVT(MVT::i64
);
529 return EVT(MVT::getVectorVT(MVT::i64
, vEle
));
533 return EVT(MVT::i32
);
535 return EVT(MVT::getVectorVT(MVT::i32
, vEle
));
541 AMDGPUTargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const
543 SDValue Chain
= Op
.getOperand(0);
544 SDValue Cond
= Op
.getOperand(1);
545 SDValue Jump
= Op
.getOperand(2);
547 Result
= DAG
.getNode(
548 AMDGPUISD::BRANCH_COND
,
556 AMDGPUTargetLowering::LowerSDIV24(SDValue Op
, SelectionDAG
&DAG
) const
558 DebugLoc DL
= Op
.getDebugLoc();
559 EVT OVT
= Op
.getValueType();
560 SDValue LHS
= Op
.getOperand(0);
561 SDValue RHS
= Op
.getOperand(1);
564 if (!OVT
.isVector()) {
567 } else if (OVT
.getVectorNumElements() == 2) {
570 } else if (OVT
.getVectorNumElements() == 4) {
574 unsigned bitsize
= OVT
.getScalarType().getSizeInBits();
575 // char|short jq = ia ^ ib;
576 SDValue jq
= DAG
.getNode(ISD::XOR
, DL
, OVT
, LHS
, RHS
);
578 // jq = jq >> (bitsize - 2)
579 jq
= DAG
.getNode(ISD::SRA
, DL
, OVT
, jq
, DAG
.getConstant(bitsize
- 2, OVT
));
582 jq
= DAG
.getNode(ISD::OR
, DL
, OVT
, jq
, DAG
.getConstant(1, OVT
));
585 jq
= DAG
.getSExtOrTrunc(jq
, DL
, INTTY
);
587 // int ia = (int)LHS;
588 SDValue ia
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
591 SDValue ib
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
593 // float fa = (float)ia;
594 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
596 // float fb = (float)ib;
597 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
599 // float fq = native_divide(fa, fb);
600 SDValue fq
= DAG
.getNode(AMDGPUISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
603 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
605 // float fqneg = -fq;
606 SDValue fqneg
= DAG
.getNode(ISD::FNEG
, DL
, FLTTY
, fq
);
608 // float fr = mad(fqneg, fb, fa);
609 SDValue fr
= DAG
.getNode(AMDGPUISD::MAD
, DL
, FLTTY
, fqneg
, fb
, fa
);
612 SDValue iq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
615 fr
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fr
);
618 fb
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fb
);
620 // int cv = fr >= fb;
622 if (INTTY
== MVT::i32
) {
623 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
625 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
627 // jq = (cv ? jq : 0);
628 jq
= DAG
.getNode(AMDGPUISD::CMOVLOG
, DL
, OVT
, cv
, jq
,
629 DAG
.getConstant(0, OVT
));
631 iq
= DAG
.getSExtOrTrunc(iq
, DL
, OVT
);
632 iq
= DAG
.getNode(ISD::ADD
, DL
, OVT
, iq
, jq
);
637 AMDGPUTargetLowering::LowerSDIV32(SDValue Op
, SelectionDAG
&DAG
) const
639 DebugLoc DL
= Op
.getDebugLoc();
640 EVT OVT
= Op
.getValueType();
641 SDValue LHS
= Op
.getOperand(0);
642 SDValue RHS
= Op
.getOperand(1);
643 // The LowerSDIV32 function generates equivalent to the following IL.
653 // ixor r10, r10, r11
664 SDValue r10
= DAG
.getSelectCC(DL
,
665 r0
, DAG
.getConstant(0, OVT
),
666 DAG
.getConstant(-1, MVT::i32
),
667 DAG
.getConstant(0, MVT::i32
),
671 SDValue r11
= DAG
.getSelectCC(DL
,
672 r1
, DAG
.getConstant(0, OVT
),
673 DAG
.getConstant(-1, MVT::i32
),
674 DAG
.getConstant(0, MVT::i32
),
678 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
681 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
684 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
687 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
690 r0
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, r0
, r1
);
692 // ixor r10, r10, r11
693 r10
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r10
, r11
);
696 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
699 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
704 AMDGPUTargetLowering::LowerSDIV64(SDValue Op
, SelectionDAG
&DAG
) const
706 return SDValue(Op
.getNode(), 0);
710 AMDGPUTargetLowering::LowerSREM8(SDValue Op
, SelectionDAG
&DAG
) const
712 DebugLoc DL
= Op
.getDebugLoc();
713 EVT OVT
= Op
.getValueType();
714 MVT INTTY
= MVT::i32
;
715 if (OVT
== MVT::v2i8
) {
717 } else if (OVT
== MVT::v4i8
) {
720 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
721 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
722 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
723 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
728 AMDGPUTargetLowering::LowerSREM16(SDValue Op
, SelectionDAG
&DAG
) const
730 DebugLoc DL
= Op
.getDebugLoc();
731 EVT OVT
= Op
.getValueType();
732 MVT INTTY
= MVT::i32
;
733 if (OVT
== MVT::v2i16
) {
735 } else if (OVT
== MVT::v4i16
) {
738 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
739 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
740 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
741 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
746 AMDGPUTargetLowering::LowerSREM32(SDValue Op
, SelectionDAG
&DAG
) const
748 DebugLoc DL
= Op
.getDebugLoc();
749 EVT OVT
= Op
.getValueType();
750 SDValue LHS
= Op
.getOperand(0);
751 SDValue RHS
= Op
.getOperand(1);
752 // The LowerSREM32 function generates equivalent to the following IL.
774 SDValue r10
= DAG
.getSetCC(DL
, OVT
, r0
, DAG
.getConstant(0, OVT
), ISD::SETLT
);
777 SDValue r11
= DAG
.getSetCC(DL
, OVT
, r1
, DAG
.getConstant(0, OVT
), ISD::SETLT
);
780 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
783 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
786 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
789 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
792 SDValue r20
= DAG
.getNode(ISD::UREM
, DL
, OVT
, r0
, r1
);
795 r20
= DAG
.getNode(AMDGPUISD::UMUL
, DL
, OVT
, r20
, r1
);
798 r0
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r0
, r20
);
801 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
804 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
809 AMDGPUTargetLowering::LowerSREM64(SDValue Op
, SelectionDAG
&DAG
) const
811 return SDValue(Op
.getNode(), 0);