1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // This file contains TargetLowering functions borrowed from AMDLI.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUISelLowering.h"
15 #include "AMDGPURegisterInfo.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27 #include "llvm/DerivedTypes.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetOptions.h"
35 //===----------------------------------------------------------------------===//
36 // Calling Convention Implementation
37 //===----------------------------------------------------------------------===//
38 #include "AMDGPUGenCallingConv.inc"
40 //===----------------------------------------------------------------------===//
41 // TargetLowering Implementation Help Functions End
42 //===----------------------------------------------------------------------===//
44 //===----------------------------------------------------------------------===//
45 // TargetLowering Class Implementation Begins
46 //===----------------------------------------------------------------------===//
47 void AMDGPUTargetLowering::InitAMDILLowering()
96 size_t numTypes
= sizeof(types
) / sizeof(*types
);
97 size_t numFloatTypes
= sizeof(FloatTypes
) / sizeof(*FloatTypes
);
98 size_t numIntTypes
= sizeof(IntTypes
) / sizeof(*IntTypes
);
99 size_t numVectorTypes
= sizeof(VectorTypes
) / sizeof(*VectorTypes
);
101 const AMDGPUSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDGPUSubtarget
>();
102 // These are the current register classes that are
105 for (unsigned int x
= 0; x
< numTypes
; ++x
) {
106 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)types
[x
];
108 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
109 // We cannot sextinreg, expand to shifts
110 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Custom
);
111 setOperationAction(ISD::SUBE
, VT
, Expand
);
112 setOperationAction(ISD::SUBC
, VT
, Expand
);
113 setOperationAction(ISD::ADDE
, VT
, Expand
);
114 setOperationAction(ISD::ADDC
, VT
, Expand
);
115 setOperationAction(ISD::BRCOND
, VT
, Custom
);
116 setOperationAction(ISD::BR_JT
, VT
, Expand
);
117 setOperationAction(ISD::BRIND
, VT
, Expand
);
118 // TODO: Implement custom UREM/SREM routines
119 setOperationAction(ISD::SREM
, VT
, Expand
);
120 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
121 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
122 if (VT
!= MVT::i64
&& VT
!= MVT::v2i64
) {
123 setOperationAction(ISD::SDIV
, VT
, Custom
);
126 for (unsigned int x
= 0; x
< numFloatTypes
; ++x
) {
127 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)FloatTypes
[x
];
129 // IL does not have these operations for floating point types
130 setOperationAction(ISD::FP_ROUND_INREG
, VT
, Expand
);
131 setOperationAction(ISD::SETOLT
, VT
, Expand
);
132 setOperationAction(ISD::SETOGE
, VT
, Expand
);
133 setOperationAction(ISD::SETOGT
, VT
, Expand
);
134 setOperationAction(ISD::SETOLE
, VT
, Expand
);
135 setOperationAction(ISD::SETULT
, VT
, Expand
);
136 setOperationAction(ISD::SETUGE
, VT
, Expand
);
137 setOperationAction(ISD::SETUGT
, VT
, Expand
);
138 setOperationAction(ISD::SETULE
, VT
, Expand
);
141 for (unsigned int x
= 0; x
< numIntTypes
; ++x
) {
142 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)IntTypes
[x
];
144 // GPU also does not have divrem function for signed or unsigned
145 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
147 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
148 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
149 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
151 // GPU doesn't have a rotl, rotr, or byteswap instruction
152 setOperationAction(ISD::ROTR
, VT
, Expand
);
153 setOperationAction(ISD::BSWAP
, VT
, Expand
);
155 // GPU doesn't have any counting operators
156 setOperationAction(ISD::CTPOP
, VT
, Expand
);
157 setOperationAction(ISD::CTTZ
, VT
, Expand
);
158 setOperationAction(ISD::CTLZ
, VT
, Expand
);
161 for ( unsigned int ii
= 0; ii
< numVectorTypes
; ++ii
)
163 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)VectorTypes
[ii
];
165 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
166 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Expand
);
167 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
168 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
169 // setOperationAction(ISD::VSETCC, VT, Expand);
170 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
173 if (STM
.device()->isSupported(AMDGPUDeviceInfo::LongOps
)) {
174 setOperationAction(ISD::MULHU
, MVT::i64
, Expand
);
175 setOperationAction(ISD::MULHU
, MVT::v2i64
, Expand
);
176 setOperationAction(ISD::MULHS
, MVT::i64
, Expand
);
177 setOperationAction(ISD::MULHS
, MVT::v2i64
, Expand
);
178 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
179 setOperationAction(ISD::SREM
, MVT::v2i64
, Expand
);
180 setOperationAction(ISD::Constant
, MVT::i64
, Legal
);
181 setOperationAction(ISD::SDIV
, MVT::v2i64
, Expand
);
182 setOperationAction(ISD::TRUNCATE
, MVT::v2i64
, Expand
);
183 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2i64
, Expand
);
184 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2i64
, Expand
);
185 setOperationAction(ISD::ANY_EXTEND
, MVT::v2i64
, Expand
);
187 if (STM
.device()->isSupported(AMDGPUDeviceInfo::DoubleOps
)) {
188 // we support loading/storing v2f64 but not operations on the type
189 setOperationAction(ISD::FADD
, MVT::v2f64
, Expand
);
190 setOperationAction(ISD::FSUB
, MVT::v2f64
, Expand
);
191 setOperationAction(ISD::FMUL
, MVT::v2f64
, Expand
);
192 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v2f64
, Expand
);
193 setOperationAction(ISD::FP_EXTEND
, MVT::v2f64
, Expand
);
194 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
195 // We want to expand vector conversions into their scalar
197 setOperationAction(ISD::TRUNCATE
, MVT::v2f64
, Expand
);
198 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2f64
, Expand
);
199 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2f64
, Expand
);
200 setOperationAction(ISD::ANY_EXTEND
, MVT::v2f64
, Expand
);
201 setOperationAction(ISD::FABS
, MVT::f64
, Expand
);
202 setOperationAction(ISD::FABS
, MVT::v2f64
, Expand
);
204 // TODO: Fix the UDIV24 algorithm so it works for these
205 // types correctly. This needs vector comparisons
206 // for this to work correctly.
207 setOperationAction(ISD::UDIV
, MVT::v2i8
, Expand
);
208 setOperationAction(ISD::UDIV
, MVT::v4i8
, Expand
);
209 setOperationAction(ISD::UDIV
, MVT::v2i16
, Expand
);
210 setOperationAction(ISD::UDIV
, MVT::v4i16
, Expand
);
211 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Custom
);
212 setOperationAction(ISD::SUBC
, MVT::Other
, Expand
);
213 setOperationAction(ISD::ADDE
, MVT::Other
, Expand
);
214 setOperationAction(ISD::ADDC
, MVT::Other
, Expand
);
215 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
216 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
217 setOperationAction(ISD::BRIND
, MVT::Other
, Expand
);
218 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
220 setOperationAction(ISD::BUILD_VECTOR
, MVT::Other
, Custom
);
222 // Use the default implementation.
223 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
224 setOperationAction(ISD::Constant
, MVT::i32
, Legal
);
226 setSchedulingPreference(Sched::RegPressure
);
227 setPow2DivIsCheap(false);
228 setPrefLoopAlignment(16);
229 setSelectIsExpensive(true);
230 setJumpIsExpensive(true);
232 maxStoresPerMemcpy
= 4096;
233 maxStoresPerMemmove
= 4096;
234 maxStoresPerMemset
= 4096;
238 #undef numVectorTypes
243 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
244 const CallInst
&I
, unsigned Intrinsic
) const
248 // The backend supports 32 and 64 bit floating point immediates
250 AMDGPUTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const
252 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
253 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
261 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT
) const
263 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
264 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
272 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
273 // be zero. Op is expected to be a target specific node. Used by DAG
277 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
281 const SelectionDAG
&DAG
,
282 unsigned Depth
) const
286 KnownZero
= KnownOne
= APInt(KnownOne
.getBitWidth(), 0); // Don't know anything
287 switch (Op
.getOpcode()) {
290 DAG
.ComputeMaskedBits(
296 DAG
.ComputeMaskedBits(
301 assert((KnownZero
& KnownOne
) == 0
302 && "Bits known to be one AND zero?");
303 assert((KnownZero2
& KnownOne2
) == 0
304 && "Bits known to be one AND zero?");
305 // Only known if known in both the LHS and RHS
306 KnownOne
&= KnownOne2
;
307 KnownZero
&= KnownZero2
;
312 //===----------------------------------------------------------------------===//
313 // Other Lowering Hooks
314 //===----------------------------------------------------------------------===//
317 AMDGPUTargetLowering::LowerSDIV(SDValue Op
, SelectionDAG
&DAG
) const
319 EVT OVT
= Op
.getValueType();
321 if (OVT
.getScalarType() == MVT::i64
) {
322 DST
= LowerSDIV64(Op
, DAG
);
323 } else if (OVT
.getScalarType() == MVT::i32
) {
324 DST
= LowerSDIV32(Op
, DAG
);
325 } else if (OVT
.getScalarType() == MVT::i16
326 || OVT
.getScalarType() == MVT::i8
) {
327 DST
= LowerSDIV24(Op
, DAG
);
329 DST
= SDValue(Op
.getNode(), 0);
335 AMDGPUTargetLowering::LowerSREM(SDValue Op
, SelectionDAG
&DAG
) const
337 EVT OVT
= Op
.getValueType();
339 if (OVT
.getScalarType() == MVT::i64
) {
340 DST
= LowerSREM64(Op
, DAG
);
341 } else if (OVT
.getScalarType() == MVT::i32
) {
342 DST
= LowerSREM32(Op
, DAG
);
343 } else if (OVT
.getScalarType() == MVT::i16
) {
344 DST
= LowerSREM16(Op
, DAG
);
345 } else if (OVT
.getScalarType() == MVT::i8
) {
346 DST
= LowerSREM8(Op
, DAG
);
348 DST
= SDValue(Op
.getNode(), 0);
354 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op
, SelectionDAG
&DAG
) const
356 EVT VT
= Op
.getValueType();
361 DebugLoc DL
= Op
.getDebugLoc();
362 Nodes1
= DAG
.getNode(AMDGPUISD::VBUILD
,
364 VT
, Op
.getOperand(0));
366 bool allEqual
= true;
367 for (unsigned x
= 1, y
= Op
.getNumOperands(); x
< y
; ++x
) {
368 if (Op
.getOperand(0) != Op
.getOperand(x
)) {
377 switch(Op
.getNumOperands()) {
382 fourth
= Op
.getOperand(3);
383 if (fourth
.getOpcode() != ISD::UNDEF
) {
384 Nodes1
= DAG
.getNode(
385 ISD::INSERT_VECTOR_ELT
,
390 DAG
.getConstant(7, MVT::i32
));
393 third
= Op
.getOperand(2);
394 if (third
.getOpcode() != ISD::UNDEF
) {
395 Nodes1
= DAG
.getNode(
396 ISD::INSERT_VECTOR_ELT
,
401 DAG
.getConstant(6, MVT::i32
));
404 second
= Op
.getOperand(1);
405 if (second
.getOpcode() != ISD::UNDEF
) {
406 Nodes1
= DAG
.getNode(
407 ISD::INSERT_VECTOR_ELT
,
412 DAG
.getConstant(5, MVT::i32
));
420 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
, SelectionDAG
&DAG
) const
422 SDValue Data
= Op
.getOperand(0);
423 VTSDNode
*BaseType
= cast
<VTSDNode
>(Op
.getOperand(1));
424 DebugLoc DL
= Op
.getDebugLoc();
425 EVT DVT
= Data
.getValueType();
426 EVT BVT
= BaseType
->getVT();
427 unsigned baseBits
= BVT
.getScalarType().getSizeInBits();
428 unsigned srcBits
= DVT
.isSimple() ? DVT
.getScalarType().getSizeInBits() : 1;
429 unsigned shiftBits
= srcBits
- baseBits
;
431 // If the op is less than 32 bits, then it needs to extend to 32bits
432 // so it can properly keep the upper bits valid.
433 EVT IVT
= genIntType(32, DVT
.isVector() ? DVT
.getVectorNumElements() : 1);
434 Data
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, IVT
, Data
);
435 shiftBits
= 32 - baseBits
;
438 SDValue Shift
= DAG
.getConstant(shiftBits
, DVT
);
439 // Shift left by 'Shift' bits.
440 Data
= DAG
.getNode(ISD::SHL
, DL
, DVT
, Data
, Shift
);
441 // Signed shift Right by 'Shift' bits.
442 Data
= DAG
.getNode(ISD::SRA
, DL
, DVT
, Data
, Shift
);
444 // Once the sign extension is done, the op needs to be converted to
445 // its original type.
446 Data
= DAG
.getSExtOrTrunc(Data
, DL
, Op
.getOperand(0).getValueType());
451 AMDGPUTargetLowering::genIntType(uint32_t size
, uint32_t numEle
) const
453 int iSize
= (size
* numEle
);
454 int vEle
= (iSize
>> ((size
== 64) ? 6 : 5));
460 return EVT(MVT::i64
);
462 return EVT(MVT::getVectorVT(MVT::i64
, vEle
));
466 return EVT(MVT::i32
);
468 return EVT(MVT::getVectorVT(MVT::i32
, vEle
));
474 AMDGPUTargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const
476 SDValue Chain
= Op
.getOperand(0);
477 SDValue Cond
= Op
.getOperand(1);
478 SDValue Jump
= Op
.getOperand(2);
480 Result
= DAG
.getNode(
481 AMDGPUISD::BRANCH_COND
,
489 AMDGPUTargetLowering::LowerSDIV24(SDValue Op
, SelectionDAG
&DAG
) const
491 DebugLoc DL
= Op
.getDebugLoc();
492 EVT OVT
= Op
.getValueType();
493 SDValue LHS
= Op
.getOperand(0);
494 SDValue RHS
= Op
.getOperand(1);
497 if (!OVT
.isVector()) {
500 } else if (OVT
.getVectorNumElements() == 2) {
503 } else if (OVT
.getVectorNumElements() == 4) {
507 unsigned bitsize
= OVT
.getScalarType().getSizeInBits();
508 // char|short jq = ia ^ ib;
509 SDValue jq
= DAG
.getNode(ISD::XOR
, DL
, OVT
, LHS
, RHS
);
511 // jq = jq >> (bitsize - 2)
512 jq
= DAG
.getNode(ISD::SRA
, DL
, OVT
, jq
, DAG
.getConstant(bitsize
- 2, OVT
));
515 jq
= DAG
.getNode(ISD::OR
, DL
, OVT
, jq
, DAG
.getConstant(1, OVT
));
518 jq
= DAG
.getSExtOrTrunc(jq
, DL
, INTTY
);
520 // int ia = (int)LHS;
521 SDValue ia
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
524 SDValue ib
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
526 // float fa = (float)ia;
527 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
529 // float fb = (float)ib;
530 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
532 // float fq = native_divide(fa, fb);
533 SDValue fq
= DAG
.getNode(AMDGPUISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
536 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
538 // float fqneg = -fq;
539 SDValue fqneg
= DAG
.getNode(ISD::FNEG
, DL
, FLTTY
, fq
);
541 // float fr = mad(fqneg, fb, fa);
542 SDValue fr
= DAG
.getNode(AMDGPUISD::MAD
, DL
, FLTTY
, fqneg
, fb
, fa
);
545 SDValue iq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
548 fr
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fr
);
551 fb
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fb
);
553 // int cv = fr >= fb;
555 if (INTTY
== MVT::i32
) {
556 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
558 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
560 // jq = (cv ? jq : 0);
561 jq
= DAG
.getNode(ISD::SELECT
, DL
, OVT
, cv
, jq
,
562 DAG
.getConstant(0, OVT
));
564 iq
= DAG
.getSExtOrTrunc(iq
, DL
, OVT
);
565 iq
= DAG
.getNode(ISD::ADD
, DL
, OVT
, iq
, jq
);
570 AMDGPUTargetLowering::LowerSDIV32(SDValue Op
, SelectionDAG
&DAG
) const
572 DebugLoc DL
= Op
.getDebugLoc();
573 EVT OVT
= Op
.getValueType();
574 SDValue LHS
= Op
.getOperand(0);
575 SDValue RHS
= Op
.getOperand(1);
576 // The LowerSDIV32 function generates equivalent to the following IL.
586 // ixor r10, r10, r11
597 SDValue r10
= DAG
.getSelectCC(DL
,
598 r0
, DAG
.getConstant(0, OVT
),
599 DAG
.getConstant(-1, MVT::i32
),
600 DAG
.getConstant(0, MVT::i32
),
604 SDValue r11
= DAG
.getSelectCC(DL
,
605 r1
, DAG
.getConstant(0, OVT
),
606 DAG
.getConstant(-1, MVT::i32
),
607 DAG
.getConstant(0, MVT::i32
),
611 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
614 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
617 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
620 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
623 r0
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, r0
, r1
);
625 // ixor r10, r10, r11
626 r10
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r10
, r11
);
629 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
632 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
637 AMDGPUTargetLowering::LowerSDIV64(SDValue Op
, SelectionDAG
&DAG
) const
639 return SDValue(Op
.getNode(), 0);
643 AMDGPUTargetLowering::LowerSREM8(SDValue Op
, SelectionDAG
&DAG
) const
645 DebugLoc DL
= Op
.getDebugLoc();
646 EVT OVT
= Op
.getValueType();
647 MVT INTTY
= MVT::i32
;
648 if (OVT
== MVT::v2i8
) {
650 } else if (OVT
== MVT::v4i8
) {
653 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
654 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
655 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
656 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
661 AMDGPUTargetLowering::LowerSREM16(SDValue Op
, SelectionDAG
&DAG
) const
663 DebugLoc DL
= Op
.getDebugLoc();
664 EVT OVT
= Op
.getValueType();
665 MVT INTTY
= MVT::i32
;
666 if (OVT
== MVT::v2i16
) {
668 } else if (OVT
== MVT::v4i16
) {
671 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
672 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
673 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
674 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
679 AMDGPUTargetLowering::LowerSREM32(SDValue Op
, SelectionDAG
&DAG
) const
681 DebugLoc DL
= Op
.getDebugLoc();
682 EVT OVT
= Op
.getValueType();
683 SDValue LHS
= Op
.getOperand(0);
684 SDValue RHS
= Op
.getOperand(1);
685 // The LowerSREM32 function generates equivalent to the following IL.
707 SDValue r10
= DAG
.getSetCC(DL
, OVT
, r0
, DAG
.getConstant(0, OVT
), ISD::SETLT
);
710 SDValue r11
= DAG
.getSetCC(DL
, OVT
, r1
, DAG
.getConstant(0, OVT
), ISD::SETLT
);
713 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
716 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
719 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
722 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
725 SDValue r20
= DAG
.getNode(ISD::UREM
, DL
, OVT
, r0
, r1
);
728 r20
= DAG
.getNode(AMDGPUISD::UMUL
, DL
, OVT
, r20
, r1
);
731 r0
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r0
, r20
);
734 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
737 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
742 AMDGPUTargetLowering::LowerSREM64(SDValue Op
, SelectionDAG
&DAG
) const
744 return SDValue(Op
.getNode(), 0);