radeonsi: Fix sampler views for depth textures.
[mesa.git] / src / gallium / drivers / radeon / AMDILISelLowering.cpp
1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file contains TargetLowering functions borrowed from AMDLI.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUISelLowering.h"
15 #include "AMDGPURegisterInfo.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27 #include "llvm/DerivedTypes.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetOptions.h"
33
34 using namespace llvm;
35 //===----------------------------------------------------------------------===//
36 // Calling Convention Implementation
37 //===----------------------------------------------------------------------===//
38 #include "AMDGPUGenCallingConv.inc"
39
40 //===----------------------------------------------------------------------===//
41 // TargetLowering Implementation Help Functions End
42 //===----------------------------------------------------------------------===//
43
44 //===----------------------------------------------------------------------===//
45 // TargetLowering Class Implementation Begins
46 //===----------------------------------------------------------------------===//
47 void AMDGPUTargetLowering::InitAMDILLowering()
48 {
49 int types[] =
50 {
51 (int)MVT::i8,
52 (int)MVT::i16,
53 (int)MVT::i32,
54 (int)MVT::f32,
55 (int)MVT::f64,
56 (int)MVT::i64,
57 (int)MVT::v2i8,
58 (int)MVT::v4i8,
59 (int)MVT::v2i16,
60 (int)MVT::v4i16,
61 (int)MVT::v4f32,
62 (int)MVT::v4i32,
63 (int)MVT::v2f32,
64 (int)MVT::v2i32,
65 (int)MVT::v2f64,
66 (int)MVT::v2i64
67 };
68
69 int IntTypes[] =
70 {
71 (int)MVT::i8,
72 (int)MVT::i16,
73 (int)MVT::i32,
74 (int)MVT::i64
75 };
76
77 int FloatTypes[] =
78 {
79 (int)MVT::f32,
80 (int)MVT::f64
81 };
82
83 int VectorTypes[] =
84 {
85 (int)MVT::v2i8,
86 (int)MVT::v4i8,
87 (int)MVT::v2i16,
88 (int)MVT::v4i16,
89 (int)MVT::v4f32,
90 (int)MVT::v4i32,
91 (int)MVT::v2f32,
92 (int)MVT::v2i32,
93 (int)MVT::v2f64,
94 (int)MVT::v2i64
95 };
96 size_t numTypes = sizeof(types) / sizeof(*types);
97 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
98 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
99 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
100
101 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
102 // These are the current register classes that are
103 // supported
104
105 for (unsigned int x = 0; x < numTypes; ++x) {
106 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
107
108 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
109 // We cannot sextinreg, expand to shifts
110 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
111 setOperationAction(ISD::SUBE, VT, Expand);
112 setOperationAction(ISD::SUBC, VT, Expand);
113 setOperationAction(ISD::ADDE, VT, Expand);
114 setOperationAction(ISD::ADDC, VT, Expand);
115 setOperationAction(ISD::BRCOND, VT, Custom);
116 setOperationAction(ISD::BR_JT, VT, Expand);
117 setOperationAction(ISD::BRIND, VT, Expand);
118 // TODO: Implement custom UREM/SREM routines
119 setOperationAction(ISD::SREM, VT, Expand);
120 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
121 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
122 if (VT != MVT::i64 && VT != MVT::v2i64) {
123 setOperationAction(ISD::SDIV, VT, Custom);
124 }
125 }
126 for (unsigned int x = 0; x < numFloatTypes; ++x) {
127 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
128
129 // IL does not have these operations for floating point types
130 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
131 setOperationAction(ISD::SETOLT, VT, Expand);
132 setOperationAction(ISD::SETOGE, VT, Expand);
133 setOperationAction(ISD::SETOGT, VT, Expand);
134 setOperationAction(ISD::SETOLE, VT, Expand);
135 setOperationAction(ISD::SETULT, VT, Expand);
136 setOperationAction(ISD::SETUGE, VT, Expand);
137 setOperationAction(ISD::SETUGT, VT, Expand);
138 setOperationAction(ISD::SETULE, VT, Expand);
139 }
140
141 for (unsigned int x = 0; x < numIntTypes; ++x) {
142 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
143
144 // GPU also does not have divrem function for signed or unsigned
145 setOperationAction(ISD::SDIVREM, VT, Expand);
146
147 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
148 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
149 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
150
151 // GPU doesn't have a rotl, rotr, or byteswap instruction
152 setOperationAction(ISD::ROTR, VT, Expand);
153 setOperationAction(ISD::BSWAP, VT, Expand);
154
155 // GPU doesn't have any counting operators
156 setOperationAction(ISD::CTPOP, VT, Expand);
157 setOperationAction(ISD::CTTZ, VT, Expand);
158 setOperationAction(ISD::CTLZ, VT, Expand);
159 }
160
161 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
162 {
163 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
164
165 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
166 setOperationAction(ISD::SDIVREM, VT, Expand);
167 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
168 // setOperationAction(ISD::VSETCC, VT, Expand);
169 setOperationAction(ISD::SELECT_CC, VT, Expand);
170
171 }
172 if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
173 setOperationAction(ISD::MULHU, MVT::i64, Expand);
174 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
175 setOperationAction(ISD::MULHS, MVT::i64, Expand);
176 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
177 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
178 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
179 setOperationAction(ISD::Constant , MVT::i64 , Legal);
180 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
181 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
182 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
183 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
184 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
185 }
186 if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
187 // we support loading/storing v2f64 but not operations on the type
188 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
189 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
190 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
191 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
192 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
193 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
194 // We want to expand vector conversions into their scalar
195 // counterparts.
196 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
197 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
198 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
199 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
200 setOperationAction(ISD::FABS, MVT::f64, Expand);
201 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
202 }
203 // TODO: Fix the UDIV24 algorithm so it works for these
204 // types correctly. This needs vector comparisons
205 // for this to work correctly.
206 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
207 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
208 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
209 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
210 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
211 setOperationAction(ISD::SUBC, MVT::Other, Expand);
212 setOperationAction(ISD::ADDE, MVT::Other, Expand);
213 setOperationAction(ISD::ADDC, MVT::Other, Expand);
214 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
215 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
216 setOperationAction(ISD::BRIND, MVT::Other, Expand);
217 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
218
219
220 // Use the default implementation.
221 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
222 setOperationAction(ISD::Constant , MVT::i32 , Legal);
223
224 setSchedulingPreference(Sched::RegPressure);
225 setPow2DivIsCheap(false);
226 setPrefLoopAlignment(16);
227 setSelectIsExpensive(true);
228 setJumpIsExpensive(true);
229
230 maxStoresPerMemcpy = 4096;
231 maxStoresPerMemmove = 4096;
232 maxStoresPerMemset = 4096;
233
234 #undef numTypes
235 #undef numIntTypes
236 #undef numVectorTypes
237 #undef numFloatTypes
238 }
239
240 bool
241 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
242 const CallInst &I, unsigned Intrinsic) const
243 {
244 return false;
245 }
246 // The backend supports 32 and 64 bit floating point immediates
247 bool
248 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
249 {
250 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
251 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
252 return true;
253 } else {
254 return false;
255 }
256 }
257
258 bool
259 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
260 {
261 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
262 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
263 return false;
264 } else {
265 return true;
266 }
267 }
268
269
270 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
271 // be zero. Op is expected to be a target specific node. Used by DAG
272 // combiner.
273
274 void
275 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
276 const SDValue Op,
277 APInt &KnownZero,
278 APInt &KnownOne,
279 const SelectionDAG &DAG,
280 unsigned Depth) const
281 {
282 APInt KnownZero2;
283 APInt KnownOne2;
284 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
285 switch (Op.getOpcode()) {
286 default: break;
287 case ISD::SELECT_CC:
288 DAG.ComputeMaskedBits(
289 Op.getOperand(1),
290 KnownZero,
291 KnownOne,
292 Depth + 1
293 );
294 DAG.ComputeMaskedBits(
295 Op.getOperand(0),
296 KnownZero2,
297 KnownOne2
298 );
299 assert((KnownZero & KnownOne) == 0
300 && "Bits known to be one AND zero?");
301 assert((KnownZero2 & KnownOne2) == 0
302 && "Bits known to be one AND zero?");
303 // Only known if known in both the LHS and RHS
304 KnownOne &= KnownOne2;
305 KnownZero &= KnownZero2;
306 break;
307 };
308 }
309
310 //===----------------------------------------------------------------------===//
311 // Other Lowering Hooks
312 //===----------------------------------------------------------------------===//
313
314 SDValue
315 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
316 {
317 EVT OVT = Op.getValueType();
318 SDValue DST;
319 if (OVT.getScalarType() == MVT::i64) {
320 DST = LowerSDIV64(Op, DAG);
321 } else if (OVT.getScalarType() == MVT::i32) {
322 DST = LowerSDIV32(Op, DAG);
323 } else if (OVT.getScalarType() == MVT::i16
324 || OVT.getScalarType() == MVT::i8) {
325 DST = LowerSDIV24(Op, DAG);
326 } else {
327 DST = SDValue(Op.getNode(), 0);
328 }
329 return DST;
330 }
331
332 SDValue
333 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
334 {
335 EVT OVT = Op.getValueType();
336 SDValue DST;
337 if (OVT.getScalarType() == MVT::i64) {
338 DST = LowerSREM64(Op, DAG);
339 } else if (OVT.getScalarType() == MVT::i32) {
340 DST = LowerSREM32(Op, DAG);
341 } else if (OVT.getScalarType() == MVT::i16) {
342 DST = LowerSREM16(Op, DAG);
343 } else if (OVT.getScalarType() == MVT::i8) {
344 DST = LowerSREM8(Op, DAG);
345 } else {
346 DST = SDValue(Op.getNode(), 0);
347 }
348 return DST;
349 }
350
351 SDValue
352 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
353 {
354 SDValue Data = Op.getOperand(0);
355 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
356 DebugLoc DL = Op.getDebugLoc();
357 EVT DVT = Data.getValueType();
358 EVT BVT = BaseType->getVT();
359 unsigned baseBits = BVT.getScalarType().getSizeInBits();
360 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
361 unsigned shiftBits = srcBits - baseBits;
362 if (srcBits < 32) {
363 // If the op is less than 32 bits, then it needs to extend to 32bits
364 // so it can properly keep the upper bits valid.
365 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
366 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
367 shiftBits = 32 - baseBits;
368 DVT = IVT;
369 }
370 SDValue Shift = DAG.getConstant(shiftBits, DVT);
371 // Shift left by 'Shift' bits.
372 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
373 // Signed shift Right by 'Shift' bits.
374 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
375 if (srcBits < 32) {
376 // Once the sign extension is done, the op needs to be converted to
377 // its original type.
378 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
379 }
380 return Data;
381 }
382 EVT
383 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
384 {
385 int iSize = (size * numEle);
386 int vEle = (iSize >> ((size == 64) ? 6 : 5));
387 if (!vEle) {
388 vEle = 1;
389 }
390 if (size == 64) {
391 if (vEle == 1) {
392 return EVT(MVT::i64);
393 } else {
394 return EVT(MVT::getVectorVT(MVT::i64, vEle));
395 }
396 } else {
397 if (vEle == 1) {
398 return EVT(MVT::i32);
399 } else {
400 return EVT(MVT::getVectorVT(MVT::i32, vEle));
401 }
402 }
403 }
404
405 SDValue
406 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
407 {
408 SDValue Chain = Op.getOperand(0);
409 SDValue Cond = Op.getOperand(1);
410 SDValue Jump = Op.getOperand(2);
411 SDValue Result;
412 Result = DAG.getNode(
413 AMDGPUISD::BRANCH_COND,
414 Op.getDebugLoc(),
415 Op.getValueType(),
416 Chain, Jump, Cond);
417 return Result;
418 }
419
420 SDValue
421 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
422 {
423 DebugLoc DL = Op.getDebugLoc();
424 EVT OVT = Op.getValueType();
425 SDValue LHS = Op.getOperand(0);
426 SDValue RHS = Op.getOperand(1);
427 MVT INTTY;
428 MVT FLTTY;
429 if (!OVT.isVector()) {
430 INTTY = MVT::i32;
431 FLTTY = MVT::f32;
432 } else if (OVT.getVectorNumElements() == 2) {
433 INTTY = MVT::v2i32;
434 FLTTY = MVT::v2f32;
435 } else if (OVT.getVectorNumElements() == 4) {
436 INTTY = MVT::v4i32;
437 FLTTY = MVT::v4f32;
438 }
439 unsigned bitsize = OVT.getScalarType().getSizeInBits();
440 // char|short jq = ia ^ ib;
441 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
442
443 // jq = jq >> (bitsize - 2)
444 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
445
446 // jq = jq | 0x1
447 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
448
449 // jq = (int)jq
450 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
451
452 // int ia = (int)LHS;
453 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
454
455 // int ib, (int)RHS;
456 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
457
458 // float fa = (float)ia;
459 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
460
461 // float fb = (float)ib;
462 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
463
464 // float fq = native_divide(fa, fb);
465 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
466
467 // fq = trunc(fq);
468 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
469
470 // float fqneg = -fq;
471 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
472
473 // float fr = mad(fqneg, fb, fa);
474 SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
475
476 // int iq = (int)fq;
477 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
478
479 // fr = fabs(fr);
480 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
481
482 // fb = fabs(fb);
483 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
484
485 // int cv = fr >= fb;
486 SDValue cv;
487 if (INTTY == MVT::i32) {
488 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
489 } else {
490 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
491 }
492 // jq = (cv ? jq : 0);
493 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
494 DAG.getConstant(0, OVT));
495 // dst = iq + jq;
496 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
497 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
498 return iq;
499 }
500
501 SDValue
502 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
503 {
504 DebugLoc DL = Op.getDebugLoc();
505 EVT OVT = Op.getValueType();
506 SDValue LHS = Op.getOperand(0);
507 SDValue RHS = Op.getOperand(1);
508 // The LowerSDIV32 function generates equivalent to the following IL.
509 // mov r0, LHS
510 // mov r1, RHS
511 // ilt r10, r0, 0
512 // ilt r11, r1, 0
513 // iadd r0, r0, r10
514 // iadd r1, r1, r11
515 // ixor r0, r0, r10
516 // ixor r1, r1, r11
517 // udiv r0, r0, r1
518 // ixor r10, r10, r11
519 // iadd r0, r0, r10
520 // ixor DST, r0, r10
521
522 // mov r0, LHS
523 SDValue r0 = LHS;
524
525 // mov r1, RHS
526 SDValue r1 = RHS;
527
528 // ilt r10, r0, 0
529 SDValue r10 = DAG.getSelectCC(DL,
530 r0, DAG.getConstant(0, OVT),
531 DAG.getConstant(-1, MVT::i32),
532 DAG.getConstant(0, MVT::i32),
533 ISD::SETLT);
534
535 // ilt r11, r1, 0
536 SDValue r11 = DAG.getSelectCC(DL,
537 r1, DAG.getConstant(0, OVT),
538 DAG.getConstant(-1, MVT::i32),
539 DAG.getConstant(0, MVT::i32),
540 ISD::SETLT);
541
542 // iadd r0, r0, r10
543 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
544
545 // iadd r1, r1, r11
546 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
547
548 // ixor r0, r0, r10
549 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
550
551 // ixor r1, r1, r11
552 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
553
554 // udiv r0, r0, r1
555 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
556
557 // ixor r10, r10, r11
558 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
559
560 // iadd r0, r0, r10
561 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
562
563 // ixor DST, r0, r10
564 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
565 return DST;
566 }
567
568 SDValue
569 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
570 {
571 return SDValue(Op.getNode(), 0);
572 }
573
574 SDValue
575 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
576 {
577 DebugLoc DL = Op.getDebugLoc();
578 EVT OVT = Op.getValueType();
579 MVT INTTY = MVT::i32;
580 if (OVT == MVT::v2i8) {
581 INTTY = MVT::v2i32;
582 } else if (OVT == MVT::v4i8) {
583 INTTY = MVT::v4i32;
584 }
585 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
586 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
587 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
588 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
589 return LHS;
590 }
591
592 SDValue
593 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
594 {
595 DebugLoc DL = Op.getDebugLoc();
596 EVT OVT = Op.getValueType();
597 MVT INTTY = MVT::i32;
598 if (OVT == MVT::v2i16) {
599 INTTY = MVT::v2i32;
600 } else if (OVT == MVT::v4i16) {
601 INTTY = MVT::v4i32;
602 }
603 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
604 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
605 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
606 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
607 return LHS;
608 }
609
610 SDValue
611 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
612 {
613 DebugLoc DL = Op.getDebugLoc();
614 EVT OVT = Op.getValueType();
615 SDValue LHS = Op.getOperand(0);
616 SDValue RHS = Op.getOperand(1);
617 // The LowerSREM32 function generates equivalent to the following IL.
618 // mov r0, LHS
619 // mov r1, RHS
620 // ilt r10, r0, 0
621 // ilt r11, r1, 0
622 // iadd r0, r0, r10
623 // iadd r1, r1, r11
624 // ixor r0, r0, r10
625 // ixor r1, r1, r11
626 // udiv r20, r0, r1
627 // umul r20, r20, r1
628 // sub r0, r0, r20
629 // iadd r0, r0, r10
630 // ixor DST, r0, r10
631
632 // mov r0, LHS
633 SDValue r0 = LHS;
634
635 // mov r1, RHS
636 SDValue r1 = RHS;
637
638 // ilt r10, r0, 0
639 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
640
641 // ilt r11, r1, 0
642 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
643
644 // iadd r0, r0, r10
645 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
646
647 // iadd r1, r1, r11
648 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
649
650 // ixor r0, r0, r10
651 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
652
653 // ixor r1, r1, r11
654 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
655
656 // udiv r20, r0, r1
657 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
658
659 // umul r20, r20, r1
660 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
661
662 // sub r0, r0, r20
663 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
664
665 // iadd r0, r0, r10
666 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
667
668 // ixor DST, r0, r10
669 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
670 return DST;
671 }
672
673 SDValue
674 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
675 {
676 return SDValue(Op.getNode(), 0);
677 }