radeon/llvm: Merge AMDILTargetLowering class into AMDGPUTargetLowering
[mesa.git] / src / gallium / drivers / radeon / AMDILISelLowering.cpp
1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file contains TargetLowering functions borrowed from AMDLI.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUISelLowering.h"
15 #include "AMDGPURegisterInfo.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILSubtarget.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27 #include "llvm/DerivedTypes.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetOptions.h"
33
34 using namespace llvm;
35 //===----------------------------------------------------------------------===//
36 // Calling Convention Implementation
37 //===----------------------------------------------------------------------===//
38 #include "AMDGPUGenCallingConv.inc"
39
40 //===----------------------------------------------------------------------===//
41 // TargetLowering Implementation Help Functions Begin
42 //===----------------------------------------------------------------------===//
43 namespace llvm {
44 namespace AMDGPU {
45 static SDValue
46 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
47 {
48 DebugLoc DL = Src.getDebugLoc();
49 EVT svt = Src.getValueType().getScalarType();
50 EVT dvt = Dst.getValueType().getScalarType();
51 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
52 if (dvt.bitsGT(svt)) {
53 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
54 } else if (svt.bitsLT(svt)) {
55 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
56 DAG.getConstant(1, MVT::i32));
57 }
58 } else if (svt.isInteger() && dvt.isInteger()) {
59 if (!svt.bitsEq(dvt)) {
60 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
61 }
62 } else if (svt.isInteger()) {
63 unsigned opcode = (asType) ? ISD::BITCAST : ISD::SINT_TO_FP;
64 if (!svt.bitsEq(dvt)) {
65 if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
66 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
67 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
68 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
69 } else {
70 assert(0 && "We only support 32 and 64bit fp types");
71 }
72 }
73 Src = DAG.getNode(opcode, DL, dvt, Src);
74 } else if (dvt.isInteger()) {
75 unsigned opcode = (asType) ? ISD::BITCAST : ISD::FP_TO_SINT;
76 if (svt.getSimpleVT().SimpleTy == MVT::f32) {
77 Src = DAG.getNode(opcode, DL, MVT::i32, Src);
78 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
79 Src = DAG.getNode(opcode, DL, MVT::i64, Src);
80 } else {
81 assert(0 && "We only support 32 and 64bit fp types");
82 }
83 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
84 }
85 return Src;
86 }
87
88 } // End namespace AMDPGU
89 } // End namespace llvm
90
91 //===----------------------------------------------------------------------===//
92 // TargetLowering Implementation Help Functions End
93 //===----------------------------------------------------------------------===//
94
95 //===----------------------------------------------------------------------===//
96 // TargetLowering Class Implementation Begins
97 //===----------------------------------------------------------------------===//
98 void AMDGPUTargetLowering::InitAMDILLowering()
99 {
100 int types[] =
101 {
102 (int)MVT::i8,
103 (int)MVT::i16,
104 (int)MVT::i32,
105 (int)MVT::f32,
106 (int)MVT::f64,
107 (int)MVT::i64,
108 (int)MVT::v2i8,
109 (int)MVT::v4i8,
110 (int)MVT::v2i16,
111 (int)MVT::v4i16,
112 (int)MVT::v4f32,
113 (int)MVT::v4i32,
114 (int)MVT::v2f32,
115 (int)MVT::v2i32,
116 (int)MVT::v2f64,
117 (int)MVT::v2i64
118 };
119
120 int IntTypes[] =
121 {
122 (int)MVT::i8,
123 (int)MVT::i16,
124 (int)MVT::i32,
125 (int)MVT::i64
126 };
127
128 int FloatTypes[] =
129 {
130 (int)MVT::f32,
131 (int)MVT::f64
132 };
133
134 int VectorTypes[] =
135 {
136 (int)MVT::v2i8,
137 (int)MVT::v4i8,
138 (int)MVT::v2i16,
139 (int)MVT::v4i16,
140 (int)MVT::v4f32,
141 (int)MVT::v4i32,
142 (int)MVT::v2f32,
143 (int)MVT::v2i32,
144 (int)MVT::v2f64,
145 (int)MVT::v2i64
146 };
147 size_t numTypes = sizeof(types) / sizeof(*types);
148 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
149 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
150 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
151
152 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
153 // These are the current register classes that are
154 // supported
155
156 for (unsigned int x = 0; x < numTypes; ++x) {
157 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
158
159 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
160 // We cannot sextinreg, expand to shifts
161 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
162 setOperationAction(ISD::SUBE, VT, Expand);
163 setOperationAction(ISD::SUBC, VT, Expand);
164 setOperationAction(ISD::ADDE, VT, Expand);
165 setOperationAction(ISD::ADDC, VT, Expand);
166 setOperationAction(ISD::BRCOND, VT, Custom);
167 setOperationAction(ISD::BR_JT, VT, Expand);
168 setOperationAction(ISD::BRIND, VT, Expand);
169 // TODO: Implement custom UREM/SREM routines
170 setOperationAction(ISD::SREM, VT, Expand);
171 setOperationAction(ISD::SELECT, VT, Custom);
172 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
173 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
174 if (VT != MVT::i64 && VT != MVT::v2i64) {
175 setOperationAction(ISD::SDIV, VT, Custom);
176 }
177 }
178 for (unsigned int x = 0; x < numFloatTypes; ++x) {
179 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
180
181 // IL does not have these operations for floating point types
182 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
183 setOperationAction(ISD::SETOLT, VT, Expand);
184 setOperationAction(ISD::SETOGE, VT, Expand);
185 setOperationAction(ISD::SETOGT, VT, Expand);
186 setOperationAction(ISD::SETOLE, VT, Expand);
187 setOperationAction(ISD::SETULT, VT, Expand);
188 setOperationAction(ISD::SETUGE, VT, Expand);
189 setOperationAction(ISD::SETUGT, VT, Expand);
190 setOperationAction(ISD::SETULE, VT, Expand);
191 }
192
193 for (unsigned int x = 0; x < numIntTypes; ++x) {
194 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
195
196 // GPU also does not have divrem function for signed or unsigned
197 setOperationAction(ISD::SDIVREM, VT, Expand);
198
199 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
200 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
201 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
202
203 // GPU doesn't have a rotl, rotr, or byteswap instruction
204 setOperationAction(ISD::ROTR, VT, Expand);
205 setOperationAction(ISD::BSWAP, VT, Expand);
206
207 // GPU doesn't have any counting operators
208 setOperationAction(ISD::CTPOP, VT, Expand);
209 setOperationAction(ISD::CTTZ, VT, Expand);
210 setOperationAction(ISD::CTLZ, VT, Expand);
211 }
212
213 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
214 {
215 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
216
217 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
218 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
219 setOperationAction(ISD::SDIVREM, VT, Expand);
220 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
221 // setOperationAction(ISD::VSETCC, VT, Expand);
222 setOperationAction(ISD::SELECT_CC, VT, Expand);
223 setOperationAction(ISD::SELECT, VT, Expand);
224
225 }
226 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
227 setOperationAction(ISD::MULHU, MVT::i64, Expand);
228 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
229 setOperationAction(ISD::MULHS, MVT::i64, Expand);
230 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
231 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
232 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
233 setOperationAction(ISD::Constant , MVT::i64 , Legal);
234 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
235 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
236 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
237 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
238 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
239 }
240 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
241 // we support loading/storing v2f64 but not operations on the type
242 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
243 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
244 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
245 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
246 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
247 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
248 // We want to expand vector conversions into their scalar
249 // counterparts.
250 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
251 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
252 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
253 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
254 setOperationAction(ISD::FABS, MVT::f64, Expand);
255 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
256 }
257 // TODO: Fix the UDIV24 algorithm so it works for these
258 // types correctly. This needs vector comparisons
259 // for this to work correctly.
260 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
261 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
262 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
263 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
264 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
265 setOperationAction(ISD::SUBC, MVT::Other, Expand);
266 setOperationAction(ISD::ADDE, MVT::Other, Expand);
267 setOperationAction(ISD::ADDC, MVT::Other, Expand);
268 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
269 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
270 setOperationAction(ISD::BRIND, MVT::Other, Expand);
271 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
272
273 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
274
275 // Use the default implementation.
276 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
277 setOperationAction(ISD::Constant , MVT::i32 , Legal);
278
279 setSchedulingPreference(Sched::RegPressure);
280 setPow2DivIsCheap(false);
281 setPrefLoopAlignment(16);
282 setSelectIsExpensive(true);
283 setJumpIsExpensive(true);
284
285 maxStoresPerMemcpy = 4096;
286 maxStoresPerMemmove = 4096;
287 maxStoresPerMemset = 4096;
288
289 #undef numTypes
290 #undef numIntTypes
291 #undef numVectorTypes
292 #undef numFloatTypes
293 }
294
295 bool
296 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
297 const CallInst &I, unsigned Intrinsic) const
298 {
299 return false;
300 }
301 // The backend supports 32 and 64 bit floating point immediates
302 bool
303 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
304 {
305 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
306 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
307 return true;
308 } else {
309 return false;
310 }
311 }
312
313 bool
314 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
315 {
316 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
317 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
318 return false;
319 } else {
320 return true;
321 }
322 }
323
324
325 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
326 // be zero. Op is expected to be a target specific node. Used by DAG
327 // combiner.
328
329 void
330 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
331 const SDValue Op,
332 APInt &KnownZero,
333 APInt &KnownOne,
334 const SelectionDAG &DAG,
335 unsigned Depth) const
336 {
337 APInt KnownZero2;
338 APInt KnownOne2;
339 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
340 switch (Op.getOpcode()) {
341 default: break;
342 case ISD::SELECT_CC:
343 DAG.ComputeMaskedBits(
344 Op.getOperand(1),
345 KnownZero,
346 KnownOne,
347 Depth + 1
348 );
349 DAG.ComputeMaskedBits(
350 Op.getOperand(0),
351 KnownZero2,
352 KnownOne2
353 );
354 assert((KnownZero & KnownOne) == 0
355 && "Bits known to be one AND zero?");
356 assert((KnownZero2 & KnownOne2) == 0
357 && "Bits known to be one AND zero?");
358 // Only known if known in both the LHS and RHS
359 KnownOne &= KnownOne2;
360 KnownZero &= KnownZero2;
361 break;
362 };
363 }
364
365 //===----------------------------------------------------------------------===//
366 // Other Lowering Hooks
367 //===----------------------------------------------------------------------===//
368
369 SDValue
370 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
371 {
372 EVT OVT = Op.getValueType();
373 SDValue DST;
374 if (OVT.getScalarType() == MVT::i64) {
375 DST = LowerSDIV64(Op, DAG);
376 } else if (OVT.getScalarType() == MVT::i32) {
377 DST = LowerSDIV32(Op, DAG);
378 } else if (OVT.getScalarType() == MVT::i16
379 || OVT.getScalarType() == MVT::i8) {
380 DST = LowerSDIV24(Op, DAG);
381 } else {
382 DST = SDValue(Op.getNode(), 0);
383 }
384 return DST;
385 }
386
387 SDValue
388 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
389 {
390 EVT OVT = Op.getValueType();
391 SDValue DST;
392 if (OVT.getScalarType() == MVT::i64) {
393 DST = LowerSREM64(Op, DAG);
394 } else if (OVT.getScalarType() == MVT::i32) {
395 DST = LowerSREM32(Op, DAG);
396 } else if (OVT.getScalarType() == MVT::i16) {
397 DST = LowerSREM16(Op, DAG);
398 } else if (OVT.getScalarType() == MVT::i8) {
399 DST = LowerSREM8(Op, DAG);
400 } else {
401 DST = SDValue(Op.getNode(), 0);
402 }
403 return DST;
404 }
405
406 SDValue
407 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
408 {
409 EVT VT = Op.getValueType();
410 SDValue Nodes1;
411 SDValue second;
412 SDValue third;
413 SDValue fourth;
414 DebugLoc DL = Op.getDebugLoc();
415 Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
416 DL,
417 VT, Op.getOperand(0));
418 #if 0
419 bool allEqual = true;
420 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
421 if (Op.getOperand(0) != Op.getOperand(x)) {
422 allEqual = false;
423 break;
424 }
425 }
426 if (allEqual) {
427 return Nodes1;
428 }
429 #endif
430 switch(Op.getNumOperands()) {
431 default:
432 case 1:
433 break;
434 case 4:
435 fourth = Op.getOperand(3);
436 if (fourth.getOpcode() != ISD::UNDEF) {
437 Nodes1 = DAG.getNode(
438 ISD::INSERT_VECTOR_ELT,
439 DL,
440 Op.getValueType(),
441 Nodes1,
442 fourth,
443 DAG.getConstant(7, MVT::i32));
444 }
445 case 3:
446 third = Op.getOperand(2);
447 if (third.getOpcode() != ISD::UNDEF) {
448 Nodes1 = DAG.getNode(
449 ISD::INSERT_VECTOR_ELT,
450 DL,
451 Op.getValueType(),
452 Nodes1,
453 third,
454 DAG.getConstant(6, MVT::i32));
455 }
456 case 2:
457 second = Op.getOperand(1);
458 if (second.getOpcode() != ISD::UNDEF) {
459 Nodes1 = DAG.getNode(
460 ISD::INSERT_VECTOR_ELT,
461 DL,
462 Op.getValueType(),
463 Nodes1,
464 second,
465 DAG.getConstant(5, MVT::i32));
466 }
467 break;
468 };
469 return Nodes1;
470 }
471
472 SDValue
473 AMDGPUTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
474 {
475 SDValue Cond = Op.getOperand(0);
476 SDValue LHS = Op.getOperand(1);
477 SDValue RHS = Op.getOperand(2);
478 DebugLoc DL = Op.getDebugLoc();
479 Cond = AMDGPU::getConversionNode(DAG, Cond, Op, true);
480 Cond = DAG.getNode(AMDGPUISD::CMOVLOG,
481 DL,
482 Op.getValueType(), Cond, LHS, RHS);
483 return Cond;
484 }
485
486 SDValue
487 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
488 {
489 SDValue Data = Op.getOperand(0);
490 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
491 DebugLoc DL = Op.getDebugLoc();
492 EVT DVT = Data.getValueType();
493 EVT BVT = BaseType->getVT();
494 unsigned baseBits = BVT.getScalarType().getSizeInBits();
495 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
496 unsigned shiftBits = srcBits - baseBits;
497 if (srcBits < 32) {
498 // If the op is less than 32 bits, then it needs to extend to 32bits
499 // so it can properly keep the upper bits valid.
500 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
501 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
502 shiftBits = 32 - baseBits;
503 DVT = IVT;
504 }
505 SDValue Shift = DAG.getConstant(shiftBits, DVT);
506 // Shift left by 'Shift' bits.
507 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
508 // Signed shift Right by 'Shift' bits.
509 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
510 if (srcBits < 32) {
511 // Once the sign extension is done, the op needs to be converted to
512 // its original type.
513 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
514 }
515 return Data;
516 }
517 EVT
518 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
519 {
520 int iSize = (size * numEle);
521 int vEle = (iSize >> ((size == 64) ? 6 : 5));
522 if (!vEle) {
523 vEle = 1;
524 }
525 if (size == 64) {
526 if (vEle == 1) {
527 return EVT(MVT::i64);
528 } else {
529 return EVT(MVT::getVectorVT(MVT::i64, vEle));
530 }
531 } else {
532 if (vEle == 1) {
533 return EVT(MVT::i32);
534 } else {
535 return EVT(MVT::getVectorVT(MVT::i32, vEle));
536 }
537 }
538 }
539
540 SDValue
541 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
542 {
543 SDValue Chain = Op.getOperand(0);
544 SDValue Cond = Op.getOperand(1);
545 SDValue Jump = Op.getOperand(2);
546 SDValue Result;
547 Result = DAG.getNode(
548 AMDGPUISD::BRANCH_COND,
549 Op.getDebugLoc(),
550 Op.getValueType(),
551 Chain, Jump, Cond);
552 return Result;
553 }
554
555 SDValue
556 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
557 {
558 DebugLoc DL = Op.getDebugLoc();
559 EVT OVT = Op.getValueType();
560 SDValue LHS = Op.getOperand(0);
561 SDValue RHS = Op.getOperand(1);
562 MVT INTTY;
563 MVT FLTTY;
564 if (!OVT.isVector()) {
565 INTTY = MVT::i32;
566 FLTTY = MVT::f32;
567 } else if (OVT.getVectorNumElements() == 2) {
568 INTTY = MVT::v2i32;
569 FLTTY = MVT::v2f32;
570 } else if (OVT.getVectorNumElements() == 4) {
571 INTTY = MVT::v4i32;
572 FLTTY = MVT::v4f32;
573 }
574 unsigned bitsize = OVT.getScalarType().getSizeInBits();
575 // char|short jq = ia ^ ib;
576 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
577
578 // jq = jq >> (bitsize - 2)
579 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
580
581 // jq = jq | 0x1
582 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
583
584 // jq = (int)jq
585 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
586
587 // int ia = (int)LHS;
588 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
589
590 // int ib, (int)RHS;
591 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
592
593 // float fa = (float)ia;
594 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
595
596 // float fb = (float)ib;
597 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
598
599 // float fq = native_divide(fa, fb);
600 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
601
602 // fq = trunc(fq);
603 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
604
605 // float fqneg = -fq;
606 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
607
608 // float fr = mad(fqneg, fb, fa);
609 SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
610
611 // int iq = (int)fq;
612 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
613
614 // fr = fabs(fr);
615 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
616
617 // fb = fabs(fb);
618 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
619
620 // int cv = fr >= fb;
621 SDValue cv;
622 if (INTTY == MVT::i32) {
623 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
624 } else {
625 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
626 }
627 // jq = (cv ? jq : 0);
628 jq = DAG.getNode(AMDGPUISD::CMOVLOG, DL, OVT, cv, jq,
629 DAG.getConstant(0, OVT));
630 // dst = iq + jq;
631 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
632 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
633 return iq;
634 }
635
636 SDValue
637 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
638 {
639 DebugLoc DL = Op.getDebugLoc();
640 EVT OVT = Op.getValueType();
641 SDValue LHS = Op.getOperand(0);
642 SDValue RHS = Op.getOperand(1);
643 // The LowerSDIV32 function generates equivalent to the following IL.
644 // mov r0, LHS
645 // mov r1, RHS
646 // ilt r10, r0, 0
647 // ilt r11, r1, 0
648 // iadd r0, r0, r10
649 // iadd r1, r1, r11
650 // ixor r0, r0, r10
651 // ixor r1, r1, r11
652 // udiv r0, r0, r1
653 // ixor r10, r10, r11
654 // iadd r0, r0, r10
655 // ixor DST, r0, r10
656
657 // mov r0, LHS
658 SDValue r0 = LHS;
659
660 // mov r1, RHS
661 SDValue r1 = RHS;
662
663 // ilt r10, r0, 0
664 SDValue r10 = DAG.getSelectCC(DL,
665 r0, DAG.getConstant(0, OVT),
666 DAG.getConstant(-1, MVT::i32),
667 DAG.getConstant(0, MVT::i32),
668 ISD::SETLT);
669
670 // ilt r11, r1, 0
671 SDValue r11 = DAG.getSelectCC(DL,
672 r1, DAG.getConstant(0, OVT),
673 DAG.getConstant(-1, MVT::i32),
674 DAG.getConstant(0, MVT::i32),
675 ISD::SETLT);
676
677 // iadd r0, r0, r10
678 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
679
680 // iadd r1, r1, r11
681 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
682
683 // ixor r0, r0, r10
684 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
685
686 // ixor r1, r1, r11
687 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
688
689 // udiv r0, r0, r1
690 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
691
692 // ixor r10, r10, r11
693 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
694
695 // iadd r0, r0, r10
696 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
697
698 // ixor DST, r0, r10
699 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
700 return DST;
701 }
702
703 SDValue
704 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
705 {
706 return SDValue(Op.getNode(), 0);
707 }
708
709 SDValue
710 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
711 {
712 DebugLoc DL = Op.getDebugLoc();
713 EVT OVT = Op.getValueType();
714 MVT INTTY = MVT::i32;
715 if (OVT == MVT::v2i8) {
716 INTTY = MVT::v2i32;
717 } else if (OVT == MVT::v4i8) {
718 INTTY = MVT::v4i32;
719 }
720 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
721 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
722 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
723 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
724 return LHS;
725 }
726
727 SDValue
728 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
729 {
730 DebugLoc DL = Op.getDebugLoc();
731 EVT OVT = Op.getValueType();
732 MVT INTTY = MVT::i32;
733 if (OVT == MVT::v2i16) {
734 INTTY = MVT::v2i32;
735 } else if (OVT == MVT::v4i16) {
736 INTTY = MVT::v4i32;
737 }
738 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
739 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
740 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
741 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
742 return LHS;
743 }
744
745 SDValue
746 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
747 {
748 DebugLoc DL = Op.getDebugLoc();
749 EVT OVT = Op.getValueType();
750 SDValue LHS = Op.getOperand(0);
751 SDValue RHS = Op.getOperand(1);
752 // The LowerSREM32 function generates equivalent to the following IL.
753 // mov r0, LHS
754 // mov r1, RHS
755 // ilt r10, r0, 0
756 // ilt r11, r1, 0
757 // iadd r0, r0, r10
758 // iadd r1, r1, r11
759 // ixor r0, r0, r10
760 // ixor r1, r1, r11
761 // udiv r20, r0, r1
762 // umul r20, r20, r1
763 // sub r0, r0, r20
764 // iadd r0, r0, r10
765 // ixor DST, r0, r10
766
767 // mov r0, LHS
768 SDValue r0 = LHS;
769
770 // mov r1, RHS
771 SDValue r1 = RHS;
772
773 // ilt r10, r0, 0
774 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
775
776 // ilt r11, r1, 0
777 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
778
779 // iadd r0, r0, r10
780 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
781
782 // iadd r1, r1, r11
783 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
784
785 // ixor r0, r0, r10
786 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
787
788 // ixor r1, r1, r11
789 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
790
791 // udiv r20, r0, r1
792 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
793
794 // umul r20, r20, r1
795 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
796
797 // sub r0, r0, r20
798 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
799
800 // iadd r0, r0, r10
801 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
802
803 // ixor DST, r0, r10
804 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
805 return DST;
806 }
807
808 SDValue
809 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
810 {
811 return SDValue(Op.getNode(), 0);
812 }