fb33583542529ec6fc4dd82ed638594b96834df6
[mesa.git] / src / gallium / drivers / radeon / AMDILISelLowering.cpp
1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILRegisterInfo.h"
19 #include "AMDILSubtarget.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetOptions.h"
34
35 using namespace llvm;
36 #define ISDBITCAST ISD::BITCAST
37 #define MVTGLUE MVT::Glue
38 //===----------------------------------------------------------------------===//
39 // Calling Convention Implementation
40 //===----------------------------------------------------------------------===//
41 #include "AMDGPUGenCallingConv.inc"
42
43 //===----------------------------------------------------------------------===//
44 // TargetLowering Implementation Help Functions Begin
45 //===----------------------------------------------------------------------===//
46 static SDValue
47 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48 {
49 DebugLoc DL = Src.getDebugLoc();
50 EVT svt = Src.getValueType().getScalarType();
51 EVT dvt = Dst.getValueType().getScalarType();
52 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53 if (dvt.bitsGT(svt)) {
54 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55 } else if (svt.bitsLT(svt)) {
56 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57 DAG.getConstant(1, MVT::i32));
58 }
59 } else if (svt.isInteger() && dvt.isInteger()) {
60 if (!svt.bitsEq(dvt)) {
61 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62 }
63 } else if (svt.isInteger()) {
64 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
65 if (!svt.bitsEq(dvt)) {
66 if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
67 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
68 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
69 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
70 } else {
71 assert(0 && "We only support 32 and 64bit fp types");
72 }
73 }
74 Src = DAG.getNode(opcode, DL, dvt, Src);
75 } else if (dvt.isInteger()) {
76 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
77 if (svt.getSimpleVT().SimpleTy == MVT::f32) {
78 Src = DAG.getNode(opcode, DL, MVT::i32, Src);
79 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
80 Src = DAG.getNode(opcode, DL, MVT::i64, Src);
81 } else {
82 assert(0 && "We only support 32 and 64bit fp types");
83 }
84 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
85 }
86 return Src;
87 }
88 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
89 // condition.
90 static AMDILCC::CondCodes
91 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
92 {
93 switch (CC) {
94 default:
95 {
96 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
97 assert(0 && "Unknown condition code!");
98 }
99 case ISD::SETO:
100 switch(type) {
101 case MVT::f32:
102 return AMDILCC::IL_CC_F_O;
103 case MVT::f64:
104 return AMDILCC::IL_CC_D_O;
105 default:
106 assert(0 && "Opcode combination not generated correctly!");
107 return AMDILCC::COND_ERROR;
108 };
109 case ISD::SETUO:
110 switch(type) {
111 case MVT::f32:
112 return AMDILCC::IL_CC_F_UO;
113 case MVT::f64:
114 return AMDILCC::IL_CC_D_UO;
115 default:
116 assert(0 && "Opcode combination not generated correctly!");
117 return AMDILCC::COND_ERROR;
118 };
119 case ISD::SETGT:
120 switch (type) {
121 case MVT::i1:
122 case MVT::i8:
123 case MVT::i16:
124 case MVT::i32:
125 return AMDILCC::IL_CC_I_GT;
126 case MVT::f32:
127 return AMDILCC::IL_CC_F_GT;
128 case MVT::f64:
129 return AMDILCC::IL_CC_D_GT;
130 case MVT::i64:
131 return AMDILCC::IL_CC_L_GT;
132 default:
133 assert(0 && "Opcode combination not generated correctly!");
134 return AMDILCC::COND_ERROR;
135 };
136 case ISD::SETGE:
137 switch (type) {
138 case MVT::i1:
139 case MVT::i8:
140 case MVT::i16:
141 case MVT::i32:
142 return AMDILCC::IL_CC_I_GE;
143 case MVT::f32:
144 return AMDILCC::IL_CC_F_GE;
145 case MVT::f64:
146 return AMDILCC::IL_CC_D_GE;
147 case MVT::i64:
148 return AMDILCC::IL_CC_L_GE;
149 default:
150 assert(0 && "Opcode combination not generated correctly!");
151 return AMDILCC::COND_ERROR;
152 };
153 case ISD::SETLT:
154 switch (type) {
155 case MVT::i1:
156 case MVT::i8:
157 case MVT::i16:
158 case MVT::i32:
159 return AMDILCC::IL_CC_I_LT;
160 case MVT::f32:
161 return AMDILCC::IL_CC_F_LT;
162 case MVT::f64:
163 return AMDILCC::IL_CC_D_LT;
164 case MVT::i64:
165 return AMDILCC::IL_CC_L_LT;
166 default:
167 assert(0 && "Opcode combination not generated correctly!");
168 return AMDILCC::COND_ERROR;
169 };
170 case ISD::SETLE:
171 switch (type) {
172 case MVT::i1:
173 case MVT::i8:
174 case MVT::i16:
175 case MVT::i32:
176 return AMDILCC::IL_CC_I_LE;
177 case MVT::f32:
178 return AMDILCC::IL_CC_F_LE;
179 case MVT::f64:
180 return AMDILCC::IL_CC_D_LE;
181 case MVT::i64:
182 return AMDILCC::IL_CC_L_LE;
183 default:
184 assert(0 && "Opcode combination not generated correctly!");
185 return AMDILCC::COND_ERROR;
186 };
187 case ISD::SETNE:
188 switch (type) {
189 case MVT::i1:
190 case MVT::i8:
191 case MVT::i16:
192 case MVT::i32:
193 return AMDILCC::IL_CC_I_NE;
194 case MVT::f32:
195 return AMDILCC::IL_CC_F_NE;
196 case MVT::f64:
197 return AMDILCC::IL_CC_D_NE;
198 case MVT::i64:
199 return AMDILCC::IL_CC_L_NE;
200 default:
201 assert(0 && "Opcode combination not generated correctly!");
202 return AMDILCC::COND_ERROR;
203 };
204 case ISD::SETEQ:
205 switch (type) {
206 case MVT::i1:
207 case MVT::i8:
208 case MVT::i16:
209 case MVT::i32:
210 return AMDILCC::IL_CC_I_EQ;
211 case MVT::f32:
212 return AMDILCC::IL_CC_F_EQ;
213 case MVT::f64:
214 return AMDILCC::IL_CC_D_EQ;
215 case MVT::i64:
216 return AMDILCC::IL_CC_L_EQ;
217 default:
218 assert(0 && "Opcode combination not generated correctly!");
219 return AMDILCC::COND_ERROR;
220 };
221 case ISD::SETUGT:
222 switch (type) {
223 case MVT::i1:
224 case MVT::i8:
225 case MVT::i16:
226 case MVT::i32:
227 return AMDILCC::IL_CC_U_GT;
228 case MVT::f32:
229 return AMDILCC::IL_CC_F_UGT;
230 case MVT::f64:
231 return AMDILCC::IL_CC_D_UGT;
232 case MVT::i64:
233 return AMDILCC::IL_CC_UL_GT;
234 default:
235 assert(0 && "Opcode combination not generated correctly!");
236 return AMDILCC::COND_ERROR;
237 };
238 case ISD::SETUGE:
239 switch (type) {
240 case MVT::i1:
241 case MVT::i8:
242 case MVT::i16:
243 case MVT::i32:
244 return AMDILCC::IL_CC_U_GE;
245 case MVT::f32:
246 return AMDILCC::IL_CC_F_UGE;
247 case MVT::f64:
248 return AMDILCC::IL_CC_D_UGE;
249 case MVT::i64:
250 return AMDILCC::IL_CC_UL_GE;
251 default:
252 assert(0 && "Opcode combination not generated correctly!");
253 return AMDILCC::COND_ERROR;
254 };
255 case ISD::SETULT:
256 switch (type) {
257 case MVT::i1:
258 case MVT::i8:
259 case MVT::i16:
260 case MVT::i32:
261 return AMDILCC::IL_CC_U_LT;
262 case MVT::f32:
263 return AMDILCC::IL_CC_F_ULT;
264 case MVT::f64:
265 return AMDILCC::IL_CC_D_ULT;
266 case MVT::i64:
267 return AMDILCC::IL_CC_UL_LT;
268 default:
269 assert(0 && "Opcode combination not generated correctly!");
270 return AMDILCC::COND_ERROR;
271 };
272 case ISD::SETULE:
273 switch (type) {
274 case MVT::i1:
275 case MVT::i8:
276 case MVT::i16:
277 case MVT::i32:
278 return AMDILCC::IL_CC_U_LE;
279 case MVT::f32:
280 return AMDILCC::IL_CC_F_ULE;
281 case MVT::f64:
282 return AMDILCC::IL_CC_D_ULE;
283 case MVT::i64:
284 return AMDILCC::IL_CC_UL_LE;
285 default:
286 assert(0 && "Opcode combination not generated correctly!");
287 return AMDILCC::COND_ERROR;
288 };
289 case ISD::SETUNE:
290 switch (type) {
291 case MVT::i1:
292 case MVT::i8:
293 case MVT::i16:
294 case MVT::i32:
295 return AMDILCC::IL_CC_U_NE;
296 case MVT::f32:
297 return AMDILCC::IL_CC_F_UNE;
298 case MVT::f64:
299 return AMDILCC::IL_CC_D_UNE;
300 case MVT::i64:
301 return AMDILCC::IL_CC_UL_NE;
302 default:
303 assert(0 && "Opcode combination not generated correctly!");
304 return AMDILCC::COND_ERROR;
305 };
306 case ISD::SETUEQ:
307 switch (type) {
308 case MVT::i1:
309 case MVT::i8:
310 case MVT::i16:
311 case MVT::i32:
312 return AMDILCC::IL_CC_U_EQ;
313 case MVT::f32:
314 return AMDILCC::IL_CC_F_UEQ;
315 case MVT::f64:
316 return AMDILCC::IL_CC_D_UEQ;
317 case MVT::i64:
318 return AMDILCC::IL_CC_UL_EQ;
319 default:
320 assert(0 && "Opcode combination not generated correctly!");
321 return AMDILCC::COND_ERROR;
322 };
323 case ISD::SETOGT:
324 switch (type) {
325 case MVT::f32:
326 return AMDILCC::IL_CC_F_OGT;
327 case MVT::f64:
328 return AMDILCC::IL_CC_D_OGT;
329 case MVT::i1:
330 case MVT::i8:
331 case MVT::i16:
332 case MVT::i32:
333 case MVT::i64:
334 default:
335 assert(0 && "Opcode combination not generated correctly!");
336 return AMDILCC::COND_ERROR;
337 };
338 case ISD::SETOGE:
339 switch (type) {
340 case MVT::f32:
341 return AMDILCC::IL_CC_F_OGE;
342 case MVT::f64:
343 return AMDILCC::IL_CC_D_OGE;
344 case MVT::i1:
345 case MVT::i8:
346 case MVT::i16:
347 case MVT::i32:
348 case MVT::i64:
349 default:
350 assert(0 && "Opcode combination not generated correctly!");
351 return AMDILCC::COND_ERROR;
352 };
353 case ISD::SETOLT:
354 switch (type) {
355 case MVT::f32:
356 return AMDILCC::IL_CC_F_OLT;
357 case MVT::f64:
358 return AMDILCC::IL_CC_D_OLT;
359 case MVT::i1:
360 case MVT::i8:
361 case MVT::i16:
362 case MVT::i32:
363 case MVT::i64:
364 default:
365 assert(0 && "Opcode combination not generated correctly!");
366 return AMDILCC::COND_ERROR;
367 };
368 case ISD::SETOLE:
369 switch (type) {
370 case MVT::f32:
371 return AMDILCC::IL_CC_F_OLE;
372 case MVT::f64:
373 return AMDILCC::IL_CC_D_OLE;
374 case MVT::i1:
375 case MVT::i8:
376 case MVT::i16:
377 case MVT::i32:
378 case MVT::i64:
379 default:
380 assert(0 && "Opcode combination not generated correctly!");
381 return AMDILCC::COND_ERROR;
382 };
383 case ISD::SETONE:
384 switch (type) {
385 case MVT::f32:
386 return AMDILCC::IL_CC_F_ONE;
387 case MVT::f64:
388 return AMDILCC::IL_CC_D_ONE;
389 case MVT::i1:
390 case MVT::i8:
391 case MVT::i16:
392 case MVT::i32:
393 case MVT::i64:
394 default:
395 assert(0 && "Opcode combination not generated correctly!");
396 return AMDILCC::COND_ERROR;
397 };
398 case ISD::SETOEQ:
399 switch (type) {
400 case MVT::f32:
401 return AMDILCC::IL_CC_F_OEQ;
402 case MVT::f64:
403 return AMDILCC::IL_CC_D_OEQ;
404 case MVT::i1:
405 case MVT::i8:
406 case MVT::i16:
407 case MVT::i32:
408 case MVT::i64:
409 default:
410 assert(0 && "Opcode combination not generated correctly!");
411 return AMDILCC::COND_ERROR;
412 };
413 };
414 }
415
416 SDValue
417 AMDILTargetLowering::LowerMemArgument(
418 SDValue Chain,
419 CallingConv::ID CallConv,
420 const SmallVectorImpl<ISD::InputArg> &Ins,
421 DebugLoc dl, SelectionDAG &DAG,
422 const CCValAssign &VA,
423 MachineFrameInfo *MFI,
424 unsigned i) const
425 {
426 // Create the nodes corresponding to a load from this parameter slot.
427 ISD::ArgFlagsTy Flags = Ins[i].Flags;
428
429 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
430 getTargetMachine().Options.GuaranteedTailCallOpt;
431 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
432
433 // FIXME: For now, all byval parameter objects are marked mutable. This can
434 // be changed with more analysis.
435 // In case of tail call optimization mark all arguments mutable. Since they
436 // could be overwritten by lowering of arguments in case of a tail call.
437 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
438 VA.getLocMemOffset(), isImmutable);
439 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
440
441 if (Flags.isByVal())
442 return FIN;
443 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
444 MachinePointerInfo::getFixedStack(FI),
445 false, false, false, 0);
446 }
447 //===----------------------------------------------------------------------===//
448 // TargetLowering Implementation Help Functions End
449 //===----------------------------------------------------------------------===//
450
451 //===----------------------------------------------------------------------===//
452 // TargetLowering Class Implementation Begins
453 //===----------------------------------------------------------------------===//
454 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
455 : TargetLowering(TM, new TargetLoweringObjectFileELF())
456 {
457 int types[] =
458 {
459 (int)MVT::i8,
460 (int)MVT::i16,
461 (int)MVT::i32,
462 (int)MVT::f32,
463 (int)MVT::f64,
464 (int)MVT::i64,
465 (int)MVT::v2i8,
466 (int)MVT::v4i8,
467 (int)MVT::v2i16,
468 (int)MVT::v4i16,
469 (int)MVT::v4f32,
470 (int)MVT::v4i32,
471 (int)MVT::v2f32,
472 (int)MVT::v2i32,
473 (int)MVT::v2f64,
474 (int)MVT::v2i64
475 };
476
477 int IntTypes[] =
478 {
479 (int)MVT::i8,
480 (int)MVT::i16,
481 (int)MVT::i32,
482 (int)MVT::i64
483 };
484
485 int FloatTypes[] =
486 {
487 (int)MVT::f32,
488 (int)MVT::f64
489 };
490
491 int VectorTypes[] =
492 {
493 (int)MVT::v2i8,
494 (int)MVT::v4i8,
495 (int)MVT::v2i16,
496 (int)MVT::v4i16,
497 (int)MVT::v4f32,
498 (int)MVT::v4i32,
499 (int)MVT::v2f32,
500 (int)MVT::v2i32,
501 (int)MVT::v2f64,
502 (int)MVT::v2i64
503 };
504 size_t numTypes = sizeof(types) / sizeof(*types);
505 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
506 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
507 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
508
509 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
510 // These are the current register classes that are
511 // supported
512
513 for (unsigned int x = 0; x < numTypes; ++x) {
514 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
515
516 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
517 // We cannot sextinreg, expand to shifts
518 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
519 setOperationAction(ISD::SUBE, VT, Expand);
520 setOperationAction(ISD::SUBC, VT, Expand);
521 setOperationAction(ISD::ADDE, VT, Expand);
522 setOperationAction(ISD::ADDC, VT, Expand);
523 setOperationAction(ISD::SETCC, VT, Custom);
524 setOperationAction(ISD::BRCOND, VT, Custom);
525 setOperationAction(ISD::BR_CC, VT, Custom);
526 setOperationAction(ISD::BR_JT, VT, Expand);
527 setOperationAction(ISD::BRIND, VT, Expand);
528 // TODO: Implement custom UREM/SREM routines
529 setOperationAction(ISD::SREM, VT, Expand);
530 setOperationAction(ISD::GlobalAddress, VT, Custom);
531 setOperationAction(ISD::JumpTable, VT, Custom);
532 setOperationAction(ISD::ConstantPool, VT, Custom);
533 setOperationAction(ISD::SELECT, VT, Custom);
534 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
535 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
536 if (VT != MVT::i64 && VT != MVT::v2i64) {
537 setOperationAction(ISD::SDIV, VT, Custom);
538 }
539 }
540 for (unsigned int x = 0; x < numFloatTypes; ++x) {
541 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
542
543 // IL does not have these operations for floating point types
544 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
545 setOperationAction(ISD::SETOLT, VT, Expand);
546 setOperationAction(ISD::SETOGE, VT, Expand);
547 setOperationAction(ISD::SETOGT, VT, Expand);
548 setOperationAction(ISD::SETOLE, VT, Expand);
549 setOperationAction(ISD::SETULT, VT, Expand);
550 setOperationAction(ISD::SETUGE, VT, Expand);
551 setOperationAction(ISD::SETUGT, VT, Expand);
552 setOperationAction(ISD::SETULE, VT, Expand);
553 }
554
555 for (unsigned int x = 0; x < numIntTypes; ++x) {
556 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
557
558 // GPU also does not have divrem function for signed or unsigned
559 setOperationAction(ISD::SDIVREM, VT, Expand);
560
561 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
562 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
563 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
564
565 // GPU doesn't have a rotl, rotr, or byteswap instruction
566 setOperationAction(ISD::ROTR, VT, Expand);
567 setOperationAction(ISD::BSWAP, VT, Expand);
568
569 // GPU doesn't have any counting operators
570 setOperationAction(ISD::CTPOP, VT, Expand);
571 setOperationAction(ISD::CTTZ, VT, Expand);
572 setOperationAction(ISD::CTLZ, VT, Expand);
573 }
574
575 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
576 {
577 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
578
579 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
580 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
581 setOperationAction(ISD::SDIVREM, VT, Expand);
582 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
583 // setOperationAction(ISD::VSETCC, VT, Expand);
584 setOperationAction(ISD::SETCC, VT, Expand);
585 setOperationAction(ISD::SELECT_CC, VT, Expand);
586 setOperationAction(ISD::SELECT, VT, Expand);
587
588 }
589 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
590 setOperationAction(ISD::MULHU, MVT::i64, Expand);
591 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
592 setOperationAction(ISD::MULHS, MVT::i64, Expand);
593 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
594 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
595 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
596 setOperationAction(ISD::Constant , MVT::i64 , Legal);
597 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
598 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
599 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
600 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
601 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
602 }
603 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
604 // we support loading/storing v2f64 but not operations on the type
605 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
606 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
607 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
608 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
609 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
610 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
611 // We want to expand vector conversions into their scalar
612 // counterparts.
613 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
614 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
615 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
616 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
617 setOperationAction(ISD::FABS, MVT::f64, Expand);
618 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
619 }
620 // TODO: Fix the UDIV24 algorithm so it works for these
621 // types correctly. This needs vector comparisons
622 // for this to work correctly.
623 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
624 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
625 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
626 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
627 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
628 setOperationAction(ISD::SUBC, MVT::Other, Expand);
629 setOperationAction(ISD::ADDE, MVT::Other, Expand);
630 setOperationAction(ISD::ADDC, MVT::Other, Expand);
631 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
632 setOperationAction(ISD::BR_CC, MVT::Other, Custom);
633 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
634 setOperationAction(ISD::BRIND, MVT::Other, Expand);
635 setOperationAction(ISD::SETCC, MVT::Other, Custom);
636 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
637
638 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
639 // Use the default implementation.
640 setOperationAction(ISD::VAARG , MVT::Other, Expand);
641 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
642 setOperationAction(ISD::VAEND , MVT::Other, Expand);
643 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
644 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
645 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
646 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
647 setOperationAction(ISD::Constant , MVT::i32 , Legal);
648 setOperationAction(ISD::TRAP , MVT::Other , Legal);
649
650 setStackPointerRegisterToSaveRestore(AMDGPU::SP);
651 setSchedulingPreference(Sched::RegPressure);
652 setPow2DivIsCheap(false);
653 setPrefLoopAlignment(16);
654 setSelectIsExpensive(true);
655 setJumpIsExpensive(true);
656
657 maxStoresPerMemcpy = 4096;
658 maxStoresPerMemmove = 4096;
659 maxStoresPerMemset = 4096;
660
661 #undef numTypes
662 #undef numIntTypes
663 #undef numVectorTypes
664 #undef numFloatTypes
665 }
666
667 const char *
668 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
669 {
670 switch (Opcode) {
671 default: return 0;
672 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
673 case AMDILISD::MAD: return "AMDILISD::MAD";
674 case AMDILISD::CALL: return "AMDILISD::CALL";
675 case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
676 case AMDILISD::UMUL: return "AMDILISD::UMUL";
677 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
678 case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
679 case AMDILISD::CMP: return "AMDILISD::CMP";
680 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
681 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
682 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
683 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
684 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
685 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
686 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
687 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
688
689 };
690 }
691 bool
692 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
693 const CallInst &I, unsigned Intrinsic) const
694 {
695 return false;
696 }
697 // The backend supports 32 and 64 bit floating point immediates
698 bool
699 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
700 {
701 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
702 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
703 return true;
704 } else {
705 return false;
706 }
707 }
708
709 bool
710 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
711 {
712 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
713 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
714 return false;
715 } else {
716 return true;
717 }
718 }
719
720
721 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
722 // be zero. Op is expected to be a target specific node. Used by DAG
723 // combiner.
724
725 void
726 AMDILTargetLowering::computeMaskedBitsForTargetNode(
727 const SDValue Op,
728 APInt &KnownZero,
729 APInt &KnownOne,
730 const SelectionDAG &DAG,
731 unsigned Depth) const
732 {
733 APInt KnownZero2;
734 APInt KnownOne2;
735 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
736 switch (Op.getOpcode()) {
737 default: break;
738 case AMDILISD::SELECT_CC:
739 DAG.ComputeMaskedBits(
740 Op.getOperand(1),
741 KnownZero,
742 KnownOne,
743 Depth + 1
744 );
745 DAG.ComputeMaskedBits(
746 Op.getOperand(0),
747 KnownZero2,
748 KnownOne2
749 );
750 assert((KnownZero & KnownOne) == 0
751 && "Bits known to be one AND zero?");
752 assert((KnownZero2 & KnownOne2) == 0
753 && "Bits known to be one AND zero?");
754 // Only known if known in both the LHS and RHS
755 KnownOne &= KnownOne2;
756 KnownZero &= KnownZero2;
757 break;
758 };
759 }
760
761 // This is the function that determines which calling convention should
762 // be used. Currently there is only one calling convention
763 CCAssignFn*
764 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
765 {
766 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
767 return CC_AMDIL32;
768 }
769
770 // LowerCallResult - Lower the result values of an ISD::CALL into the
771 // appropriate copies out of appropriate physical registers. This assumes that
772 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
773 // being lowered. The returns a SDNode with the same number of values as the
774 // ISD::CALL.
775 SDValue
776 AMDILTargetLowering::LowerCallResult(
777 SDValue Chain,
778 SDValue InFlag,
779 CallingConv::ID CallConv,
780 bool isVarArg,
781 const SmallVectorImpl<ISD::InputArg> &Ins,
782 DebugLoc dl,
783 SelectionDAG &DAG,
784 SmallVectorImpl<SDValue> &InVals) const
785 {
786 // Assign locations to each value returned by this call
787 SmallVector<CCValAssign, 16> RVLocs;
788 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
789 getTargetMachine(), RVLocs, *DAG.getContext());
790 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
791
792 // Copy all of the result registers out of their specified physreg.
793 for (unsigned i = 0; i != RVLocs.size(); ++i) {
794 EVT CopyVT = RVLocs[i].getValVT();
795 if (RVLocs[i].isRegLoc()) {
796 Chain = DAG.getCopyFromReg(
797 Chain,
798 dl,
799 RVLocs[i].getLocReg(),
800 CopyVT,
801 InFlag
802 ).getValue(1);
803 SDValue Val = Chain.getValue(0);
804 InFlag = Chain.getValue(2);
805 InVals.push_back(Val);
806 }
807 }
808
809 return Chain;
810
811 }
812
813 //===----------------------------------------------------------------------===//
814 // Other Lowering Hooks
815 //===----------------------------------------------------------------------===//
816
817 // Recursively assign SDNodeOrdering to any unordered nodes
818 // This is necessary to maintain source ordering of instructions
819 // under -O0 to avoid odd-looking "skipping around" issues.
820 static const SDValue
821 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
822 {
823 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
824 DAG.AssignOrdering( New.getNode(), order );
825 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
826 Ordered( DAG, order, New.getOperand(i) );
827 }
828 return New;
829 }
830
831 #define LOWER(A) \
832 case ISD:: A: \
833 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
834
835 SDValue
836 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
837 {
838 switch (Op.getOpcode()) {
839 default:
840 Op.getNode()->dump();
841 assert(0 && "Custom lowering code for this"
842 "instruction is not implemented yet!");
843 break;
844 LOWER(GlobalAddress);
845 LOWER(JumpTable);
846 LOWER(ConstantPool);
847 LOWER(ExternalSymbol);
848 LOWER(SDIV);
849 LOWER(SREM);
850 LOWER(BUILD_VECTOR);
851 LOWER(SELECT);
852 LOWER(SETCC);
853 LOWER(SIGN_EXTEND_INREG);
854 LOWER(DYNAMIC_STACKALLOC);
855 LOWER(BRCOND);
856 LOWER(BR_CC);
857 }
858 return Op;
859 }
860
861 #undef LOWER
862
863 SDValue
864 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
865 {
866 SDValue DST = Op;
867 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
868 const GlobalValue *G = GADN->getGlobal();
869 DebugLoc DL = Op.getDebugLoc();
870 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
871 if (!GV) {
872 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
873 } else {
874 if (GV->hasInitializer()) {
875 const Constant *C = dyn_cast<Constant>(GV->getInitializer());
876 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
877 DST = DAG.getConstant(CI->getValue(), Op.getValueType());
878 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
879 DST = DAG.getConstantFP(CF->getValueAPF(),
880 Op.getValueType());
881 } else if (dyn_cast<ConstantAggregateZero>(C)) {
882 EVT VT = Op.getValueType();
883 if (VT.isInteger()) {
884 DST = DAG.getConstant(0, VT);
885 } else {
886 DST = DAG.getConstantFP(0, VT);
887 }
888 } else {
889 assert(!"lowering this type of Global Address "
890 "not implemented yet!");
891 C->dump();
892 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
893 }
894 } else {
895 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
896 }
897 }
898 return DST;
899 }
900
901 SDValue
902 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
903 {
904 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
905 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
906 return Result;
907 }
908 SDValue
909 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
910 {
911 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
912 EVT PtrVT = Op.getValueType();
913 SDValue Result;
914 if (CP->isMachineConstantPoolEntry()) {
915 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
916 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
917 } else {
918 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
919 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
920 }
921 return Result;
922 }
923
924 SDValue
925 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
926 {
927 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
928 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
929 return Result;
930 }
931
932 /// LowerFORMAL_ARGUMENTS - transform physical registers into
933 /// virtual registers and generate load operations for
934 /// arguments places on the stack.
935 /// TODO: isVarArg, hasStructRet, isMemReg
936 SDValue
937 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
938 CallingConv::ID CallConv,
939 bool isVarArg,
940 const SmallVectorImpl<ISD::InputArg> &Ins,
941 DebugLoc dl,
942 SelectionDAG &DAG,
943 SmallVectorImpl<SDValue> &InVals)
944 const
945 {
946
947 MachineFunction &MF = DAG.getMachineFunction();
948 MachineFrameInfo *MFI = MF.getFrameInfo();
949 //const Function *Fn = MF.getFunction();
950 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
951
952 SmallVector<CCValAssign, 16> ArgLocs;
953 CallingConv::ID CC = MF.getFunction()->getCallingConv();
954 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
955
956 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
957 getTargetMachine(), ArgLocs, *DAG.getContext());
958
959 // When more calling conventions are added, they need to be chosen here
960 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
961 SDValue StackPtr;
962
963 //unsigned int FirstStackArgLoc = 0;
964
965 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
966 CCValAssign &VA = ArgLocs[i];
967 if (VA.isRegLoc()) {
968 EVT RegVT = VA.getLocVT();
969 const TargetRegisterClass *RC = getRegClassFor(
970 RegVT.getSimpleVT().SimpleTy);
971
972 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
973 SDValue ArgValue = DAG.getCopyFromReg(
974 Chain,
975 dl,
976 Reg,
977 RegVT);
978 // If this is an 8 or 16-bit value, it is really passed
979 // promoted to 32 bits. Insert an assert[sz]ext to capture
980 // this, then truncate to the right size.
981
982 if (VA.getLocInfo() == CCValAssign::SExt) {
983 ArgValue = DAG.getNode(
984 ISD::AssertSext,
985 dl,
986 RegVT,
987 ArgValue,
988 DAG.getValueType(VA.getValVT()));
989 } else if (VA.getLocInfo() == CCValAssign::ZExt) {
990 ArgValue = DAG.getNode(
991 ISD::AssertZext,
992 dl,
993 RegVT,
994 ArgValue,
995 DAG.getValueType(VA.getValVT()));
996 }
997 if (VA.getLocInfo() != CCValAssign::Full) {
998 ArgValue = DAG.getNode(
999 ISD::TRUNCATE,
1000 dl,
1001 VA.getValVT(),
1002 ArgValue);
1003 }
1004 // Add the value to the list of arguments
1005 // to be passed in registers
1006 InVals.push_back(ArgValue);
1007 if (isVarArg) {
1008 assert(0 && "Variable arguments are not yet supported");
1009 // See MipsISelLowering.cpp for ideas on how to implement
1010 }
1011 } else if(VA.isMemLoc()) {
1012 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1013 dl, DAG, VA, MFI, i));
1014 } else {
1015 assert(0 && "found a Value Assign that is "
1016 "neither a register or a memory location");
1017 }
1018 }
1019 /*if (hasStructRet) {
1020 assert(0 && "Has struct return is not yet implemented");
1021 // See MipsISelLowering.cpp for ideas on how to implement
1022 }*/
1023
1024 if (isVarArg) {
1025 assert(0 && "Variable arguments are not yet supported");
1026 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1027 }
1028 // This needs to be changed to non-zero if the return function needs
1029 // to pop bytes
1030 return Chain;
1031 }
1032 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1033 /// by "Src" to address "Dst" with size and alignment information specified by
1034 /// the specific parameter attribute. The copy will be passed as a byval
1035 /// function parameter.
1036 static SDValue
1037 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1038 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1039 assert(0 && "MemCopy does not exist yet");
1040 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1041
1042 return DAG.getMemcpy(Chain,
1043 Src.getDebugLoc(),
1044 Dst, Src, SizeNode, Flags.getByValAlign(),
1045 /*IsVol=*/false, /*AlwaysInline=*/true,
1046 MachinePointerInfo(), MachinePointerInfo());
1047 }
1048
1049 SDValue
1050 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1051 SDValue StackPtr, SDValue Arg,
1052 DebugLoc dl, SelectionDAG &DAG,
1053 const CCValAssign &VA,
1054 ISD::ArgFlagsTy Flags) const
1055 {
1056 unsigned int LocMemOffset = VA.getLocMemOffset();
1057 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1058 PtrOff = DAG.getNode(ISD::ADD,
1059 dl,
1060 getPointerTy(), StackPtr, PtrOff);
1061 if (Flags.isByVal()) {
1062 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1063 } else {
1064 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1065 MachinePointerInfo::getStack(LocMemOffset),
1066 false, false, 0);
1067 }
1068 return PtrOff;
1069 }
1070 /// LowerCAL - functions arguments are copied from virtual
1071 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1072 /// CALLSEQ_END are emitted.
1073 /// TODO: isVarArg, isTailCall, hasStructRet
1074 SDValue
1075 AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1076 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1077 bool& isTailCall,
1078 const SmallVectorImpl<ISD::OutputArg> &Outs,
1079 const SmallVectorImpl<SDValue> &OutVals,
1080 const SmallVectorImpl<ISD::InputArg> &Ins,
1081 DebugLoc dl, SelectionDAG &DAG,
1082 SmallVectorImpl<SDValue> &InVals)
1083 const
1084 {
1085 isTailCall = false;
1086 MachineFunction& MF = DAG.getMachineFunction();
1087 // FIXME: DO we need to handle fast calling conventions and tail call
1088 // optimizations?? X86/PPC ISelLowering
1089 /*bool hasStructRet = (TheCall->getNumArgs())
1090 ? TheCall->getArgFlags(0).device()->isSRet()
1091 : false;*/
1092
1093 MachineFrameInfo *MFI = MF.getFrameInfo();
1094
1095 // Analyze operands of the call, assigning locations to each operand
1096 SmallVector<CCValAssign, 16> ArgLocs;
1097 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1098 getTargetMachine(), ArgLocs, *DAG.getContext());
1099 // Analyize the calling operands, but need to change
1100 // if we have more than one calling convetion
1101 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1102
1103 unsigned int NumBytes = CCInfo.getNextStackOffset();
1104 if (isTailCall) {
1105 assert(isTailCall && "Tail Call not handled yet!");
1106 // See X86/PPC ISelLowering
1107 }
1108
1109 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1110
1111 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1112 SmallVector<SDValue, 8> MemOpChains;
1113 SDValue StackPtr;
1114 //unsigned int FirstStacArgLoc = 0;
1115 //int LastArgStackLoc = 0;
1116
1117 // Walk the register/memloc assignments, insert copies/loads
1118 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1119 CCValAssign &VA = ArgLocs[i];
1120 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1121 // Arguments start after the 5 first operands of ISD::CALL
1122 SDValue Arg = OutVals[i];
1123 //Promote the value if needed
1124 switch(VA.getLocInfo()) {
1125 default: assert(0 && "Unknown loc info!");
1126 case CCValAssign::Full:
1127 break;
1128 case CCValAssign::SExt:
1129 Arg = DAG.getNode(ISD::SIGN_EXTEND,
1130 dl,
1131 VA.getLocVT(), Arg);
1132 break;
1133 case CCValAssign::ZExt:
1134 Arg = DAG.getNode(ISD::ZERO_EXTEND,
1135 dl,
1136 VA.getLocVT(), Arg);
1137 break;
1138 case CCValAssign::AExt:
1139 Arg = DAG.getNode(ISD::ANY_EXTEND,
1140 dl,
1141 VA.getLocVT(), Arg);
1142 break;
1143 }
1144
1145 if (VA.isRegLoc()) {
1146 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1147 } else if (VA.isMemLoc()) {
1148 // Create the frame index object for this incoming parameter
1149 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1150 VA.getLocMemOffset(), true);
1151 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1152
1153 // emit ISD::STORE whichs stores the
1154 // parameter value to a stack Location
1155 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1156 MachinePointerInfo::getFixedStack(FI),
1157 false, false, 0));
1158 } else {
1159 assert(0 && "Not a Reg/Mem Loc, major error!");
1160 }
1161 }
1162 if (!MemOpChains.empty()) {
1163 Chain = DAG.getNode(ISD::TokenFactor,
1164 dl,
1165 MVT::Other,
1166 &MemOpChains[0],
1167 MemOpChains.size());
1168 }
1169 SDValue InFlag;
1170 if (!isTailCall) {
1171 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1172 Chain = DAG.getCopyToReg(Chain,
1173 dl,
1174 RegsToPass[i].first,
1175 RegsToPass[i].second,
1176 InFlag);
1177 InFlag = Chain.getValue(1);
1178 }
1179 }
1180
1181 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1182 // every direct call is) turn it into a TargetGlobalAddress/
1183 // TargetExternalSymbol
1184 // node so that legalize doesn't hack it.
1185 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1186 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1187 }
1188 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1189 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1190 }
1191 else if (isTailCall) {
1192 assert(0 && "Tail calls are not handled yet");
1193 // see X86 ISelLowering for ideas on implementation: 1708
1194 }
1195
1196 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1197 SmallVector<SDValue, 8> Ops;
1198
1199 if (isTailCall) {
1200 assert(0 && "Tail calls are not handled yet");
1201 // see X86 ISelLowering for ideas on implementation: 1721
1202 }
1203 // If this is a direct call, pass the chain and the callee
1204 if (Callee.getNode()) {
1205 Ops.push_back(Chain);
1206 Ops.push_back(Callee);
1207 }
1208
1209 if (isTailCall) {
1210 assert(0 && "Tail calls are not handled yet");
1211 // see X86 ISelLowering for ideas on implementation: 1739
1212 }
1213
1214 // Add argument registers to the end of the list so that they are known
1215 // live into the call
1216 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1217 Ops.push_back(DAG.getRegister(
1218 RegsToPass[i].first,
1219 RegsToPass[i].second.getValueType()));
1220 }
1221 if (InFlag.getNode()) {
1222 Ops.push_back(InFlag);
1223 }
1224
1225 // Emit Tail Call
1226 if (isTailCall) {
1227 assert(0 && "Tail calls are not handled yet");
1228 // see X86 ISelLowering for ideas on implementation: 1762
1229 }
1230
1231 Chain = DAG.getNode(AMDILISD::CALL,
1232 dl,
1233 NodeTys, &Ops[0], Ops.size());
1234 InFlag = Chain.getValue(1);
1235
1236 // Create the CALLSEQ_END node
1237 Chain = DAG.getCALLSEQ_END(
1238 Chain,
1239 DAG.getIntPtrConstant(NumBytes, true),
1240 DAG.getIntPtrConstant(0, true),
1241 InFlag);
1242 InFlag = Chain.getValue(1);
1243 // Handle result values, copying them out of physregs into vregs that
1244 // we return
1245 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1246 InVals);
1247 }
1248
1249 SDValue
1250 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1251 {
1252 EVT OVT = Op.getValueType();
1253 SDValue DST;
1254 if (OVT.getScalarType() == MVT::i64) {
1255 DST = LowerSDIV64(Op, DAG);
1256 } else if (OVT.getScalarType() == MVT::i32) {
1257 DST = LowerSDIV32(Op, DAG);
1258 } else if (OVT.getScalarType() == MVT::i16
1259 || OVT.getScalarType() == MVT::i8) {
1260 DST = LowerSDIV24(Op, DAG);
1261 } else {
1262 DST = SDValue(Op.getNode(), 0);
1263 }
1264 return DST;
1265 }
1266
1267 SDValue
1268 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1269 {
1270 EVT OVT = Op.getValueType();
1271 SDValue DST;
1272 if (OVT.getScalarType() == MVT::i64) {
1273 DST = LowerSREM64(Op, DAG);
1274 } else if (OVT.getScalarType() == MVT::i32) {
1275 DST = LowerSREM32(Op, DAG);
1276 } else if (OVT.getScalarType() == MVT::i16) {
1277 DST = LowerSREM16(Op, DAG);
1278 } else if (OVT.getScalarType() == MVT::i8) {
1279 DST = LowerSREM8(Op, DAG);
1280 } else {
1281 DST = SDValue(Op.getNode(), 0);
1282 }
1283 return DST;
1284 }
1285
1286 SDValue
1287 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1288 {
1289 EVT VT = Op.getValueType();
1290 SDValue Nodes1;
1291 SDValue second;
1292 SDValue third;
1293 SDValue fourth;
1294 DebugLoc DL = Op.getDebugLoc();
1295 Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1296 DL,
1297 VT, Op.getOperand(0));
1298 #if 0
1299 bool allEqual = true;
1300 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1301 if (Op.getOperand(0) != Op.getOperand(x)) {
1302 allEqual = false;
1303 break;
1304 }
1305 }
1306 if (allEqual) {
1307 return Nodes1;
1308 }
1309 #endif
1310 switch(Op.getNumOperands()) {
1311 default:
1312 case 1:
1313 break;
1314 case 4:
1315 fourth = Op.getOperand(3);
1316 if (fourth.getOpcode() != ISD::UNDEF) {
1317 Nodes1 = DAG.getNode(
1318 ISD::INSERT_VECTOR_ELT,
1319 DL,
1320 Op.getValueType(),
1321 Nodes1,
1322 fourth,
1323 DAG.getConstant(7, MVT::i32));
1324 }
1325 case 3:
1326 third = Op.getOperand(2);
1327 if (third.getOpcode() != ISD::UNDEF) {
1328 Nodes1 = DAG.getNode(
1329 ISD::INSERT_VECTOR_ELT,
1330 DL,
1331 Op.getValueType(),
1332 Nodes1,
1333 third,
1334 DAG.getConstant(6, MVT::i32));
1335 }
1336 case 2:
1337 second = Op.getOperand(1);
1338 if (second.getOpcode() != ISD::UNDEF) {
1339 Nodes1 = DAG.getNode(
1340 ISD::INSERT_VECTOR_ELT,
1341 DL,
1342 Op.getValueType(),
1343 Nodes1,
1344 second,
1345 DAG.getConstant(5, MVT::i32));
1346 }
1347 break;
1348 };
1349 return Nodes1;
1350 }
1351
1352 SDValue
1353 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1354 {
1355 SDValue Cond = Op.getOperand(0);
1356 SDValue LHS = Op.getOperand(1);
1357 SDValue RHS = Op.getOperand(2);
1358 DebugLoc DL = Op.getDebugLoc();
1359 Cond = getConversionNode(DAG, Cond, Op, true);
1360 Cond = DAG.getNode(AMDILISD::CMOVLOG,
1361 DL,
1362 Op.getValueType(), Cond, LHS, RHS);
1363 return Cond;
1364 }
1365 SDValue
1366 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1367 {
1368 SDValue Cond;
1369 SDValue LHS = Op.getOperand(0);
1370 SDValue RHS = Op.getOperand(1);
1371 SDValue CC = Op.getOperand(2);
1372 DebugLoc DL = Op.getDebugLoc();
1373 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1374 unsigned int AMDILCC = CondCCodeToCC(
1375 SetCCOpcode,
1376 LHS.getValueType().getSimpleVT().SimpleTy);
1377 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1378 Cond = DAG.getNode(
1379 ISD::SELECT_CC,
1380 Op.getDebugLoc(),
1381 MVT::i32,
1382 LHS, RHS,
1383 DAG.getConstant(-1, MVT::i32),
1384 DAG.getConstant(0, MVT::i32),
1385 CC);
1386 Cond = getConversionNode(DAG, Cond, Op, true);
1387 Cond = DAG.getNode(
1388 ISD::AND,
1389 DL,
1390 Cond.getValueType(),
1391 DAG.getConstant(1, Cond.getValueType()),
1392 Cond);
1393 return Cond;
1394 }
1395
1396 SDValue
1397 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1398 {
1399 SDValue Data = Op.getOperand(0);
1400 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1401 DebugLoc DL = Op.getDebugLoc();
1402 EVT DVT = Data.getValueType();
1403 EVT BVT = BaseType->getVT();
1404 unsigned baseBits = BVT.getScalarType().getSizeInBits();
1405 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1406 unsigned shiftBits = srcBits - baseBits;
1407 if (srcBits < 32) {
1408 // If the op is less than 32 bits, then it needs to extend to 32bits
1409 // so it can properly keep the upper bits valid.
1410 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1411 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1412 shiftBits = 32 - baseBits;
1413 DVT = IVT;
1414 }
1415 SDValue Shift = DAG.getConstant(shiftBits, DVT);
1416 // Shift left by 'Shift' bits.
1417 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1418 // Signed shift Right by 'Shift' bits.
1419 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1420 if (srcBits < 32) {
1421 // Once the sign extension is done, the op needs to be converted to
1422 // its original type.
1423 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1424 }
1425 return Data;
1426 }
1427 EVT
1428 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1429 {
1430 int iSize = (size * numEle);
1431 int vEle = (iSize >> ((size == 64) ? 6 : 5));
1432 if (!vEle) {
1433 vEle = 1;
1434 }
1435 if (size == 64) {
1436 if (vEle == 1) {
1437 return EVT(MVT::i64);
1438 } else {
1439 return EVT(MVT::getVectorVT(MVT::i64, vEle));
1440 }
1441 } else {
1442 if (vEle == 1) {
1443 return EVT(MVT::i32);
1444 } else {
1445 return EVT(MVT::getVectorVT(MVT::i32, vEle));
1446 }
1447 }
1448 }
1449
1450 SDValue
1451 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1452 SelectionDAG &DAG) const
1453 {
1454 SDValue Chain = Op.getOperand(0);
1455 SDValue Size = Op.getOperand(1);
1456 unsigned int SPReg = AMDGPU::SP;
1457 DebugLoc DL = Op.getDebugLoc();
1458 SDValue SP = DAG.getCopyFromReg(Chain,
1459 DL,
1460 SPReg, MVT::i32);
1461 SDValue NewSP = DAG.getNode(ISD::ADD,
1462 DL,
1463 MVT::i32, SP, Size);
1464 Chain = DAG.getCopyToReg(SP.getValue(1),
1465 DL,
1466 SPReg, NewSP);
1467 SDValue Ops[2] = {NewSP, Chain};
1468 Chain = DAG.getMergeValues(Ops, 2 ,DL);
1469 return Chain;
1470 }
1471 SDValue
1472 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
1473 {
1474 SDValue Chain = Op.getOperand(0);
1475 SDValue Cond = Op.getOperand(1);
1476 SDValue Jump = Op.getOperand(2);
1477 SDValue Result;
1478 Result = DAG.getNode(
1479 AMDILISD::BRANCH_COND,
1480 Op.getDebugLoc(),
1481 Op.getValueType(),
1482 Chain, Jump, Cond);
1483 return Result;
1484 }
1485
1486 SDValue
1487 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
1488 {
1489 SDValue Chain = Op.getOperand(0);
1490 SDValue CC = Op.getOperand(1);
1491 SDValue LHS = Op.getOperand(2);
1492 SDValue RHS = Op.getOperand(3);
1493 SDValue JumpT = Op.getOperand(4);
1494 SDValue CmpValue;
1495 SDValue Result;
1496 CmpValue = DAG.getNode(
1497 ISD::SELECT_CC,
1498 Op.getDebugLoc(),
1499 MVT::i32,
1500 LHS, RHS,
1501 DAG.getConstant(-1, MVT::i32),
1502 DAG.getConstant(0, MVT::i32),
1503 CC);
1504 Result = DAG.getNode(
1505 AMDILISD::BRANCH_COND,
1506 CmpValue.getDebugLoc(),
1507 MVT::Other, Chain,
1508 JumpT, CmpValue);
1509 return Result;
1510 }
1511
1512 // LowerRET - Lower an ISD::RET node.
1513 SDValue
1514 AMDILTargetLowering::LowerReturn(SDValue Chain,
1515 CallingConv::ID CallConv, bool isVarArg,
1516 const SmallVectorImpl<ISD::OutputArg> &Outs,
1517 const SmallVectorImpl<SDValue> &OutVals,
1518 DebugLoc dl, SelectionDAG &DAG)
1519 const
1520 {
1521 //MachineFunction& MF = DAG.getMachineFunction();
1522 // CCValAssign - represent the assignment of the return value
1523 // to a location
1524 SmallVector<CCValAssign, 16> RVLocs;
1525
1526 // CCState - Info about the registers and stack slot
1527 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1528 getTargetMachine(), RVLocs, *DAG.getContext());
1529
1530 // Analyze return values of ISD::RET
1531 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
1532 // If this is the first return lowered for this function, add
1533 // the regs to the liveout set for the function
1534 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1535 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1536 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
1537 MRI.addLiveOut(RVLocs[i].getLocReg());
1538 }
1539 }
1540 // FIXME: implement this when tail call is implemented
1541 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
1542 // both x86 and ppc implement this in ISelLowering
1543
1544 // Regular return here
1545 SDValue Flag;
1546 SmallVector<SDValue, 6> RetOps;
1547 RetOps.push_back(Chain);
1548 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
1549 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1550 CCValAssign &VA = RVLocs[i];
1551 SDValue ValToCopy = OutVals[i];
1552 assert(VA.isRegLoc() && "Can only return in registers!");
1553 // ISD::Ret => ret chain, (regnum1, val1), ...
1554 // So i * 2 + 1 index only the regnums
1555 Chain = DAG.getCopyToReg(Chain,
1556 dl,
1557 VA.getLocReg(),
1558 ValToCopy,
1559 Flag);
1560 // guarantee that all emitted copies are stuck together
1561 // avoiding something bad
1562 Flag = Chain.getValue(1);
1563 }
1564 /*if (MF.getFunction()->hasStructRetAttr()) {
1565 assert(0 && "Struct returns are not yet implemented!");
1566 // Both MIPS and X86 have this
1567 }*/
1568 RetOps[0] = Chain;
1569 if (Flag.getNode())
1570 RetOps.push_back(Flag);
1571
1572 Flag = DAG.getNode(AMDILISD::RET_FLAG,
1573 dl,
1574 MVT::Other, &RetOps[0], RetOps.size());
1575 return Flag;
1576 }
1577
1578 unsigned int
1579 AMDILTargetLowering::getFunctionAlignment(const Function *) const
1580 {
1581 return 0;
1582 }
1583
1584 SDValue
1585 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
1586 {
1587 DebugLoc DL = Op.getDebugLoc();
1588 EVT OVT = Op.getValueType();
1589 SDValue LHS = Op.getOperand(0);
1590 SDValue RHS = Op.getOperand(1);
1591 MVT INTTY;
1592 MVT FLTTY;
1593 if (!OVT.isVector()) {
1594 INTTY = MVT::i32;
1595 FLTTY = MVT::f32;
1596 } else if (OVT.getVectorNumElements() == 2) {
1597 INTTY = MVT::v2i32;
1598 FLTTY = MVT::v2f32;
1599 } else if (OVT.getVectorNumElements() == 4) {
1600 INTTY = MVT::v4i32;
1601 FLTTY = MVT::v4f32;
1602 }
1603 unsigned bitsize = OVT.getScalarType().getSizeInBits();
1604 // char|short jq = ia ^ ib;
1605 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
1606
1607 // jq = jq >> (bitsize - 2)
1608 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
1609
1610 // jq = jq | 0x1
1611 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
1612
1613 // jq = (int)jq
1614 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
1615
1616 // int ia = (int)LHS;
1617 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
1618
1619 // int ib, (int)RHS;
1620 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
1621
1622 // float fa = (float)ia;
1623 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
1624
1625 // float fb = (float)ib;
1626 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
1627
1628 // float fq = native_divide(fa, fb);
1629 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
1630
1631 // fq = trunc(fq);
1632 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
1633
1634 // float fqneg = -fq;
1635 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
1636
1637 // float fr = mad(fqneg, fb, fa);
1638 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
1639
1640 // int iq = (int)fq;
1641 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
1642
1643 // fr = fabs(fr);
1644 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
1645
1646 // fb = fabs(fb);
1647 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
1648
1649 // int cv = fr >= fb;
1650 SDValue cv;
1651 if (INTTY == MVT::i32) {
1652 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1653 } else {
1654 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1655 }
1656 // jq = (cv ? jq : 0);
1657 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
1658 DAG.getConstant(0, OVT));
1659 // dst = iq + jq;
1660 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
1661 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
1662 return iq;
1663 }
1664
1665 SDValue
1666 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
1667 {
1668 DebugLoc DL = Op.getDebugLoc();
1669 EVT OVT = Op.getValueType();
1670 SDValue LHS = Op.getOperand(0);
1671 SDValue RHS = Op.getOperand(1);
1672 // The LowerSDIV32 function generates equivalent to the following IL.
1673 // mov r0, LHS
1674 // mov r1, RHS
1675 // ilt r10, r0, 0
1676 // ilt r11, r1, 0
1677 // iadd r0, r0, r10
1678 // iadd r1, r1, r11
1679 // ixor r0, r0, r10
1680 // ixor r1, r1, r11
1681 // udiv r0, r0, r1
1682 // ixor r10, r10, r11
1683 // iadd r0, r0, r10
1684 // ixor DST, r0, r10
1685
1686 // mov r0, LHS
1687 SDValue r0 = LHS;
1688
1689 // mov r1, RHS
1690 SDValue r1 = RHS;
1691
1692 // ilt r10, r0, 0
1693 SDValue r10 = DAG.getSelectCC(DL,
1694 r0, DAG.getConstant(0, OVT),
1695 DAG.getConstant(-1, MVT::i32),
1696 DAG.getConstant(0, MVT::i32),
1697 ISD::SETLT);
1698
1699 // ilt r11, r1, 0
1700 SDValue r11 = DAG.getSelectCC(DL,
1701 r1, DAG.getConstant(0, OVT),
1702 DAG.getConstant(-1, MVT::i32),
1703 DAG.getConstant(0, MVT::i32),
1704 ISD::SETLT);
1705
1706 // iadd r0, r0, r10
1707 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1708
1709 // iadd r1, r1, r11
1710 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1711
1712 // ixor r0, r0, r10
1713 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1714
1715 // ixor r1, r1, r11
1716 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1717
1718 // udiv r0, r0, r1
1719 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
1720
1721 // ixor r10, r10, r11
1722 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
1723
1724 // iadd r0, r0, r10
1725 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1726
1727 // ixor DST, r0, r10
1728 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1729 return DST;
1730 }
1731
1732 SDValue
1733 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
1734 {
1735 return SDValue(Op.getNode(), 0);
1736 }
1737
1738 SDValue
1739 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
1740 {
1741 DebugLoc DL = Op.getDebugLoc();
1742 EVT OVT = Op.getValueType();
1743 MVT INTTY = MVT::i32;
1744 if (OVT == MVT::v2i8) {
1745 INTTY = MVT::v2i32;
1746 } else if (OVT == MVT::v4i8) {
1747 INTTY = MVT::v4i32;
1748 }
1749 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1750 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1751 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1752 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1753 return LHS;
1754 }
1755
1756 SDValue
1757 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
1758 {
1759 DebugLoc DL = Op.getDebugLoc();
1760 EVT OVT = Op.getValueType();
1761 MVT INTTY = MVT::i32;
1762 if (OVT == MVT::v2i16) {
1763 INTTY = MVT::v2i32;
1764 } else if (OVT == MVT::v4i16) {
1765 INTTY = MVT::v4i32;
1766 }
1767 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1768 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1769 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1770 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1771 return LHS;
1772 }
1773
1774 SDValue
1775 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
1776 {
1777 DebugLoc DL = Op.getDebugLoc();
1778 EVT OVT = Op.getValueType();
1779 SDValue LHS = Op.getOperand(0);
1780 SDValue RHS = Op.getOperand(1);
1781 // The LowerSREM32 function generates equivalent to the following IL.
1782 // mov r0, LHS
1783 // mov r1, RHS
1784 // ilt r10, r0, 0
1785 // ilt r11, r1, 0
1786 // iadd r0, r0, r10
1787 // iadd r1, r1, r11
1788 // ixor r0, r0, r10
1789 // ixor r1, r1, r11
1790 // udiv r20, r0, r1
1791 // umul r20, r20, r1
1792 // sub r0, r0, r20
1793 // iadd r0, r0, r10
1794 // ixor DST, r0, r10
1795
1796 // mov r0, LHS
1797 SDValue r0 = LHS;
1798
1799 // mov r1, RHS
1800 SDValue r1 = RHS;
1801
1802 // ilt r10, r0, 0
1803 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1804 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1805 r0, DAG.getConstant(0, OVT));
1806
1807 // ilt r11, r1, 0
1808 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1809 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1810 r1, DAG.getConstant(0, OVT));
1811
1812 // iadd r0, r0, r10
1813 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1814
1815 // iadd r1, r1, r11
1816 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1817
1818 // ixor r0, r0, r10
1819 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1820
1821 // ixor r1, r1, r11
1822 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1823
1824 // udiv r20, r0, r1
1825 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
1826
1827 // umul r20, r20, r1
1828 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
1829
1830 // sub r0, r0, r20
1831 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
1832
1833 // iadd r0, r0, r10
1834 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1835
1836 // ixor DST, r0, r10
1837 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1838 return DST;
1839 }
1840
1841 SDValue
1842 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
1843 {
1844 return SDValue(Op.getNode(), 0);
1845 }