radeonsi: Handle TGSI CONST registers
[mesa.git] / src / gallium / drivers / radeon / AMDILISelLowering.cpp
1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILRegisterInfo.h"
19 #include "AMDILSubtarget.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetOptions.h"
34
35 using namespace llvm;
36 #define ISDBITCAST ISD::BITCAST
37 #define MVTGLUE MVT::Glue
38 //===----------------------------------------------------------------------===//
39 // Calling Convention Implementation
40 //===----------------------------------------------------------------------===//
41 #include "AMDILGenCallingConv.inc"
42
43 //===----------------------------------------------------------------------===//
44 // TargetLowering Implementation Help Functions Begin
45 //===----------------------------------------------------------------------===//
46 static SDValue
47 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48 {
49 DebugLoc DL = Src.getDebugLoc();
50 EVT svt = Src.getValueType().getScalarType();
51 EVT dvt = Dst.getValueType().getScalarType();
52 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53 if (dvt.bitsGT(svt)) {
54 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55 } else if (svt.bitsLT(svt)) {
56 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57 DAG.getConstant(1, MVT::i32));
58 }
59 } else if (svt.isInteger() && dvt.isInteger()) {
60 if (!svt.bitsEq(dvt)) {
61 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62 } else {
63 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
64 }
65 } else if (svt.isInteger()) {
66 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
67 if (!svt.bitsEq(dvt)) {
68 if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
69 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
70 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
71 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
72 } else {
73 assert(0 && "We only support 32 and 64bit fp types");
74 }
75 }
76 Src = DAG.getNode(opcode, DL, dvt, Src);
77 } else if (dvt.isInteger()) {
78 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
79 if (svt.getSimpleVT().SimpleTy == MVT::f32) {
80 Src = DAG.getNode(opcode, DL, MVT::i32, Src);
81 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
82 Src = DAG.getNode(opcode, DL, MVT::i64, Src);
83 } else {
84 assert(0 && "We only support 32 and 64bit fp types");
85 }
86 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
87 }
88 return Src;
89 }
90 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
91 // condition.
92 static AMDILCC::CondCodes
93 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
94 {
95 switch (CC) {
96 default:
97 {
98 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
99 assert(0 && "Unknown condition code!");
100 }
101 case ISD::SETO:
102 switch(type) {
103 case MVT::f32:
104 return AMDILCC::IL_CC_F_O;
105 case MVT::f64:
106 return AMDILCC::IL_CC_D_O;
107 default:
108 assert(0 && "Opcode combination not generated correctly!");
109 return AMDILCC::COND_ERROR;
110 };
111 case ISD::SETUO:
112 switch(type) {
113 case MVT::f32:
114 return AMDILCC::IL_CC_F_UO;
115 case MVT::f64:
116 return AMDILCC::IL_CC_D_UO;
117 default:
118 assert(0 && "Opcode combination not generated correctly!");
119 return AMDILCC::COND_ERROR;
120 };
121 case ISD::SETGT:
122 switch (type) {
123 case MVT::i1:
124 case MVT::i8:
125 case MVT::i16:
126 case MVT::i32:
127 return AMDILCC::IL_CC_I_GT;
128 case MVT::f32:
129 return AMDILCC::IL_CC_F_GT;
130 case MVT::f64:
131 return AMDILCC::IL_CC_D_GT;
132 case MVT::i64:
133 return AMDILCC::IL_CC_L_GT;
134 default:
135 assert(0 && "Opcode combination not generated correctly!");
136 return AMDILCC::COND_ERROR;
137 };
138 case ISD::SETGE:
139 switch (type) {
140 case MVT::i1:
141 case MVT::i8:
142 case MVT::i16:
143 case MVT::i32:
144 return AMDILCC::IL_CC_I_GE;
145 case MVT::f32:
146 return AMDILCC::IL_CC_F_GE;
147 case MVT::f64:
148 return AMDILCC::IL_CC_D_GE;
149 case MVT::i64:
150 return AMDILCC::IL_CC_L_GE;
151 default:
152 assert(0 && "Opcode combination not generated correctly!");
153 return AMDILCC::COND_ERROR;
154 };
155 case ISD::SETLT:
156 switch (type) {
157 case MVT::i1:
158 case MVT::i8:
159 case MVT::i16:
160 case MVT::i32:
161 return AMDILCC::IL_CC_I_LT;
162 case MVT::f32:
163 return AMDILCC::IL_CC_F_LT;
164 case MVT::f64:
165 return AMDILCC::IL_CC_D_LT;
166 case MVT::i64:
167 return AMDILCC::IL_CC_L_LT;
168 default:
169 assert(0 && "Opcode combination not generated correctly!");
170 return AMDILCC::COND_ERROR;
171 };
172 case ISD::SETLE:
173 switch (type) {
174 case MVT::i1:
175 case MVT::i8:
176 case MVT::i16:
177 case MVT::i32:
178 return AMDILCC::IL_CC_I_LE;
179 case MVT::f32:
180 return AMDILCC::IL_CC_F_LE;
181 case MVT::f64:
182 return AMDILCC::IL_CC_D_LE;
183 case MVT::i64:
184 return AMDILCC::IL_CC_L_LE;
185 default:
186 assert(0 && "Opcode combination not generated correctly!");
187 return AMDILCC::COND_ERROR;
188 };
189 case ISD::SETNE:
190 switch (type) {
191 case MVT::i1:
192 case MVT::i8:
193 case MVT::i16:
194 case MVT::i32:
195 return AMDILCC::IL_CC_I_NE;
196 case MVT::f32:
197 return AMDILCC::IL_CC_F_NE;
198 case MVT::f64:
199 return AMDILCC::IL_CC_D_NE;
200 case MVT::i64:
201 return AMDILCC::IL_CC_L_NE;
202 default:
203 assert(0 && "Opcode combination not generated correctly!");
204 return AMDILCC::COND_ERROR;
205 };
206 case ISD::SETEQ:
207 switch (type) {
208 case MVT::i1:
209 case MVT::i8:
210 case MVT::i16:
211 case MVT::i32:
212 return AMDILCC::IL_CC_I_EQ;
213 case MVT::f32:
214 return AMDILCC::IL_CC_F_EQ;
215 case MVT::f64:
216 return AMDILCC::IL_CC_D_EQ;
217 case MVT::i64:
218 return AMDILCC::IL_CC_L_EQ;
219 default:
220 assert(0 && "Opcode combination not generated correctly!");
221 return AMDILCC::COND_ERROR;
222 };
223 case ISD::SETUGT:
224 switch (type) {
225 case MVT::i1:
226 case MVT::i8:
227 case MVT::i16:
228 case MVT::i32:
229 return AMDILCC::IL_CC_U_GT;
230 case MVT::f32:
231 return AMDILCC::IL_CC_F_UGT;
232 case MVT::f64:
233 return AMDILCC::IL_CC_D_UGT;
234 case MVT::i64:
235 return AMDILCC::IL_CC_UL_GT;
236 default:
237 assert(0 && "Opcode combination not generated correctly!");
238 return AMDILCC::COND_ERROR;
239 };
240 case ISD::SETUGE:
241 switch (type) {
242 case MVT::i1:
243 case MVT::i8:
244 case MVT::i16:
245 case MVT::i32:
246 return AMDILCC::IL_CC_U_GE;
247 case MVT::f32:
248 return AMDILCC::IL_CC_F_UGE;
249 case MVT::f64:
250 return AMDILCC::IL_CC_D_UGE;
251 case MVT::i64:
252 return AMDILCC::IL_CC_UL_GE;
253 default:
254 assert(0 && "Opcode combination not generated correctly!");
255 return AMDILCC::COND_ERROR;
256 };
257 case ISD::SETULT:
258 switch (type) {
259 case MVT::i1:
260 case MVT::i8:
261 case MVT::i16:
262 case MVT::i32:
263 return AMDILCC::IL_CC_U_LT;
264 case MVT::f32:
265 return AMDILCC::IL_CC_F_ULT;
266 case MVT::f64:
267 return AMDILCC::IL_CC_D_ULT;
268 case MVT::i64:
269 return AMDILCC::IL_CC_UL_LT;
270 default:
271 assert(0 && "Opcode combination not generated correctly!");
272 return AMDILCC::COND_ERROR;
273 };
274 case ISD::SETULE:
275 switch (type) {
276 case MVT::i1:
277 case MVT::i8:
278 case MVT::i16:
279 case MVT::i32:
280 return AMDILCC::IL_CC_U_LE;
281 case MVT::f32:
282 return AMDILCC::IL_CC_F_ULE;
283 case MVT::f64:
284 return AMDILCC::IL_CC_D_ULE;
285 case MVT::i64:
286 return AMDILCC::IL_CC_UL_LE;
287 default:
288 assert(0 && "Opcode combination not generated correctly!");
289 return AMDILCC::COND_ERROR;
290 };
291 case ISD::SETUNE:
292 switch (type) {
293 case MVT::i1:
294 case MVT::i8:
295 case MVT::i16:
296 case MVT::i32:
297 return AMDILCC::IL_CC_U_NE;
298 case MVT::f32:
299 return AMDILCC::IL_CC_F_UNE;
300 case MVT::f64:
301 return AMDILCC::IL_CC_D_UNE;
302 case MVT::i64:
303 return AMDILCC::IL_CC_UL_NE;
304 default:
305 assert(0 && "Opcode combination not generated correctly!");
306 return AMDILCC::COND_ERROR;
307 };
308 case ISD::SETUEQ:
309 switch (type) {
310 case MVT::i1:
311 case MVT::i8:
312 case MVT::i16:
313 case MVT::i32:
314 return AMDILCC::IL_CC_U_EQ;
315 case MVT::f32:
316 return AMDILCC::IL_CC_F_UEQ;
317 case MVT::f64:
318 return AMDILCC::IL_CC_D_UEQ;
319 case MVT::i64:
320 return AMDILCC::IL_CC_UL_EQ;
321 default:
322 assert(0 && "Opcode combination not generated correctly!");
323 return AMDILCC::COND_ERROR;
324 };
325 case ISD::SETOGT:
326 switch (type) {
327 case MVT::f32:
328 return AMDILCC::IL_CC_F_OGT;
329 case MVT::f64:
330 return AMDILCC::IL_CC_D_OGT;
331 case MVT::i1:
332 case MVT::i8:
333 case MVT::i16:
334 case MVT::i32:
335 case MVT::i64:
336 default:
337 assert(0 && "Opcode combination not generated correctly!");
338 return AMDILCC::COND_ERROR;
339 };
340 case ISD::SETOGE:
341 switch (type) {
342 case MVT::f32:
343 return AMDILCC::IL_CC_F_OGE;
344 case MVT::f64:
345 return AMDILCC::IL_CC_D_OGE;
346 case MVT::i1:
347 case MVT::i8:
348 case MVT::i16:
349 case MVT::i32:
350 case MVT::i64:
351 default:
352 assert(0 && "Opcode combination not generated correctly!");
353 return AMDILCC::COND_ERROR;
354 };
355 case ISD::SETOLT:
356 switch (type) {
357 case MVT::f32:
358 return AMDILCC::IL_CC_F_OLT;
359 case MVT::f64:
360 return AMDILCC::IL_CC_D_OLT;
361 case MVT::i1:
362 case MVT::i8:
363 case MVT::i16:
364 case MVT::i32:
365 case MVT::i64:
366 default:
367 assert(0 && "Opcode combination not generated correctly!");
368 return AMDILCC::COND_ERROR;
369 };
370 case ISD::SETOLE:
371 switch (type) {
372 case MVT::f32:
373 return AMDILCC::IL_CC_F_OLE;
374 case MVT::f64:
375 return AMDILCC::IL_CC_D_OLE;
376 case MVT::i1:
377 case MVT::i8:
378 case MVT::i16:
379 case MVT::i32:
380 case MVT::i64:
381 default:
382 assert(0 && "Opcode combination not generated correctly!");
383 return AMDILCC::COND_ERROR;
384 };
385 case ISD::SETONE:
386 switch (type) {
387 case MVT::f32:
388 return AMDILCC::IL_CC_F_ONE;
389 case MVT::f64:
390 return AMDILCC::IL_CC_D_ONE;
391 case MVT::i1:
392 case MVT::i8:
393 case MVT::i16:
394 case MVT::i32:
395 case MVT::i64:
396 default:
397 assert(0 && "Opcode combination not generated correctly!");
398 return AMDILCC::COND_ERROR;
399 };
400 case ISD::SETOEQ:
401 switch (type) {
402 case MVT::f32:
403 return AMDILCC::IL_CC_F_OEQ;
404 case MVT::f64:
405 return AMDILCC::IL_CC_D_OEQ;
406 case MVT::i1:
407 case MVT::i8:
408 case MVT::i16:
409 case MVT::i32:
410 case MVT::i64:
411 default:
412 assert(0 && "Opcode combination not generated correctly!");
413 return AMDILCC::COND_ERROR;
414 };
415 };
416 }
417
418 /// Helper function used by LowerFormalArguments
419 static const TargetRegisterClass*
420 getRegClassFromType(unsigned int type) {
421 switch (type) {
422 default:
423 assert(0 && "Passed in type does not match any register classes.");
424 case MVT::i8:
425 return &AMDIL::GPRI8RegClass;
426 case MVT::i16:
427 return &AMDIL::GPRI16RegClass;
428 case MVT::i32:
429 return &AMDIL::GPRI32RegClass;
430 case MVT::f32:
431 return &AMDIL::GPRF32RegClass;
432 case MVT::i64:
433 return &AMDIL::GPRI64RegClass;
434 case MVT::f64:
435 return &AMDIL::GPRF64RegClass;
436 case MVT::v4f32:
437 return &AMDIL::GPRV4F32RegClass;
438 case MVT::v4i8:
439 return &AMDIL::GPRV4I8RegClass;
440 case MVT::v4i16:
441 return &AMDIL::GPRV4I16RegClass;
442 case MVT::v4i32:
443 return &AMDIL::GPRV4I32RegClass;
444 case MVT::v2f32:
445 return &AMDIL::GPRV2F32RegClass;
446 case MVT::v2i8:
447 return &AMDIL::GPRV2I8RegClass;
448 case MVT::v2i16:
449 return &AMDIL::GPRV2I16RegClass;
450 case MVT::v2i32:
451 return &AMDIL::GPRV2I32RegClass;
452 case MVT::v2f64:
453 return &AMDIL::GPRV2F64RegClass;
454 case MVT::v2i64:
455 return &AMDIL::GPRV2I64RegClass;
456 }
457 }
458
459 SDValue
460 AMDILTargetLowering::LowerMemArgument(
461 SDValue Chain,
462 CallingConv::ID CallConv,
463 const SmallVectorImpl<ISD::InputArg> &Ins,
464 DebugLoc dl, SelectionDAG &DAG,
465 const CCValAssign &VA,
466 MachineFrameInfo *MFI,
467 unsigned i) const
468 {
469 // Create the nodes corresponding to a load from this parameter slot.
470 ISD::ArgFlagsTy Flags = Ins[i].Flags;
471
472 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
473 getTargetMachine().Options.GuaranteedTailCallOpt;
474 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
475
476 // FIXME: For now, all byval parameter objects are marked mutable. This can
477 // be changed with more analysis.
478 // In case of tail call optimization mark all arguments mutable. Since they
479 // could be overwritten by lowering of arguments in case of a tail call.
480 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
481 VA.getLocMemOffset(), isImmutable);
482 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
483
484 if (Flags.isByVal())
485 return FIN;
486 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
487 MachinePointerInfo::getFixedStack(FI),
488 false, false, false, 0);
489 }
490 //===----------------------------------------------------------------------===//
491 // TargetLowering Implementation Help Functions End
492 //===----------------------------------------------------------------------===//
493 //===----------------------------------------------------------------------===//
494 // Instruction generation functions
495 //===----------------------------------------------------------------------===//
496 MachineOperand
497 AMDILTargetLowering::convertToReg(MachineOperand op) const
498 {
499 if (op.isReg()) {
500 return op;
501 } else if (op.isImm()) {
502 uint32_t loadReg
503 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
504 generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
505 .addImm(op.getImm());
506 op.ChangeToRegister(loadReg, false);
507 } else if (op.isFPImm()) {
508 uint32_t loadReg
509 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
510 generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
511 .addFPImm(op.getFPImm());
512 op.ChangeToRegister(loadReg, false);
513 } else if (op.isMBB()) {
514 op.ChangeToRegister(0, false);
515 } else if (op.isFI()) {
516 op.ChangeToRegister(0, false);
517 } else if (op.isCPI()) {
518 op.ChangeToRegister(0, false);
519 } else if (op.isJTI()) {
520 op.ChangeToRegister(0, false);
521 } else if (op.isGlobal()) {
522 op.ChangeToRegister(0, false);
523 } else if (op.isSymbol()) {
524 op.ChangeToRegister(0, false);
525 }/* else if (op.isMetadata()) {
526 op.ChangeToRegister(0, false);
527 }*/
528 return op;
529 }
530
531 //===----------------------------------------------------------------------===//
532 // TargetLowering Class Implementation Begins
533 //===----------------------------------------------------------------------===//
534 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
535 : TargetLowering(TM, new TargetLoweringObjectFileELF())
536 {
537 int types[] =
538 {
539 (int)MVT::i8,
540 (int)MVT::i16,
541 (int)MVT::i32,
542 (int)MVT::f32,
543 (int)MVT::f64,
544 (int)MVT::i64,
545 (int)MVT::v2i8,
546 (int)MVT::v4i8,
547 (int)MVT::v2i16,
548 (int)MVT::v4i16,
549 (int)MVT::v4f32,
550 (int)MVT::v4i32,
551 (int)MVT::v2f32,
552 (int)MVT::v2i32,
553 (int)MVT::v2f64,
554 (int)MVT::v2i64
555 };
556
557 int IntTypes[] =
558 {
559 (int)MVT::i8,
560 (int)MVT::i16,
561 (int)MVT::i32,
562 (int)MVT::i64
563 };
564
565 int FloatTypes[] =
566 {
567 (int)MVT::f32,
568 (int)MVT::f64
569 };
570
571 int VectorTypes[] =
572 {
573 (int)MVT::v2i8,
574 (int)MVT::v4i8,
575 (int)MVT::v2i16,
576 (int)MVT::v4i16,
577 (int)MVT::v4f32,
578 (int)MVT::v4i32,
579 (int)MVT::v2f32,
580 (int)MVT::v2i32,
581 (int)MVT::v2f64,
582 (int)MVT::v2i64
583 };
584 size_t numTypes = sizeof(types) / sizeof(*types);
585 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
586 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
587 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
588
589 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
590 // These are the current register classes that are
591 // supported
592
593 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
594 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
595
596 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
597 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
598 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
599 }
600 if (STM.device()->isSupported(AMDILDeviceInfo::ByteOps)) {
601 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
602 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
603 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
604 setOperationAction(ISD::Constant , MVT::i8 , Legal);
605 }
606 if (STM.device()->isSupported(AMDILDeviceInfo::ShortOps)) {
607 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
608 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
609 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
610 setOperationAction(ISD::Constant , MVT::i16 , Legal);
611 }
612 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
613 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
614 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
615 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
616 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
617 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
618 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
619 }
620
621 for (unsigned int x = 0; x < numTypes; ++x) {
622 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
623
624 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
625 // We cannot sextinreg, expand to shifts
626 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
627 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
628 setOperationAction(ISD::FP_ROUND, VT, Expand);
629 setOperationAction(ISD::SUBE, VT, Expand);
630 setOperationAction(ISD::SUBC, VT, Expand);
631 setOperationAction(ISD::ADDE, VT, Expand);
632 setOperationAction(ISD::ADDC, VT, Expand);
633 setOperationAction(ISD::SETCC, VT, Custom);
634 setOperationAction(ISD::BRCOND, VT, Custom);
635 setOperationAction(ISD::BR_CC, VT, Custom);
636 setOperationAction(ISD::BR_JT, VT, Expand);
637 setOperationAction(ISD::BRIND, VT, Expand);
638 // TODO: Implement custom UREM/SREM routines
639 setOperationAction(ISD::UREM, VT, Expand);
640 setOperationAction(ISD::SREM, VT, Expand);
641 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
642 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
643 setOperationAction(ISD::GlobalAddress, VT, Custom);
644 setOperationAction(ISD::JumpTable, VT, Custom);
645 setOperationAction(ISD::ConstantPool, VT, Custom);
646 setOperationAction(ISD::SELECT_CC, VT, Custom);
647 setOperationAction(ISD::SELECT, VT, Custom);
648 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
649 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
650 if (VT != MVT::i64 && VT != MVT::v2i64) {
651 setOperationAction(ISD::SDIV, VT, Custom);
652 }
653 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
654 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
655 }
656 for (unsigned int x = 0; x < numFloatTypes; ++x) {
657 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
658
659 // IL does not have these operations for floating point types
660 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
661 setOperationAction(ISD::FP_ROUND, VT, Custom);
662 setOperationAction(ISD::SETOLT, VT, Expand);
663 setOperationAction(ISD::SETOGE, VT, Expand);
664 setOperationAction(ISD::SETOGT, VT, Expand);
665 setOperationAction(ISD::SETOLE, VT, Expand);
666 setOperationAction(ISD::SETULT, VT, Expand);
667 setOperationAction(ISD::SETUGE, VT, Expand);
668 setOperationAction(ISD::SETUGT, VT, Expand);
669 setOperationAction(ISD::SETULE, VT, Expand);
670 }
671
672 for (unsigned int x = 0; x < numIntTypes; ++x) {
673 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
674
675 // GPU also does not have divrem function for signed or unsigned
676 setOperationAction(ISD::SDIVREM, VT, Expand);
677 setOperationAction(ISD::UDIVREM, VT, Expand);
678 setOperationAction(ISD::FP_ROUND, VT, Expand);
679
680 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
681 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
682 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
683
684 // GPU doesn't have a rotl, rotr, or byteswap instruction
685 setOperationAction(ISD::ROTR, VT, Expand);
686 setOperationAction(ISD::ROTL, VT, Expand);
687 setOperationAction(ISD::BSWAP, VT, Expand);
688
689 // GPU doesn't have any counting operators
690 setOperationAction(ISD::CTPOP, VT, Expand);
691 setOperationAction(ISD::CTTZ, VT, Expand);
692 setOperationAction(ISD::CTLZ, VT, Expand);
693 }
694
695 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
696 {
697 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
698
699 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
700 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
701 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
702 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
703 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
704 setOperationAction(ISD::FP_ROUND, VT, Expand);
705 setOperationAction(ISD::SDIVREM, VT, Expand);
706 setOperationAction(ISD::UDIVREM, VT, Expand);
707 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
708 // setOperationAction(ISD::VSETCC, VT, Expand);
709 setOperationAction(ISD::SETCC, VT, Expand);
710 setOperationAction(ISD::SELECT_CC, VT, Expand);
711 setOperationAction(ISD::SELECT, VT, Expand);
712
713 }
714 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
715 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
716 if (STM.calVersion() < CAL_VERSION_SC_139
717 || STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
718 setOperationAction(ISD::MUL, MVT::i64, Custom);
719 }
720 setOperationAction(ISD::SUB, MVT::i64, Custom);
721 setOperationAction(ISD::ADD, MVT::i64, Custom);
722 setOperationAction(ISD::MULHU, MVT::i64, Expand);
723 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
724 setOperationAction(ISD::MULHS, MVT::i64, Expand);
725 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
726 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
727 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
728 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
729 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
730 setOperationAction(ISD::Constant , MVT::i64 , Legal);
731 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
732 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
733 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
734 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
735 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
736 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
737 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
738 }
739 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
740 // we support loading/storing v2f64 but not operations on the type
741 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
742 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
743 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
744 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
745 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
746 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
747 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
748 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
749 // We want to expand vector conversions into their scalar
750 // counterparts.
751 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
752 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
753 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
754 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
755 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
756 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
757 setOperationAction(ISD::FABS, MVT::f64, Expand);
758 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
759 }
760 // TODO: Fix the UDIV24 algorithm so it works for these
761 // types correctly. This needs vector comparisons
762 // for this to work correctly.
763 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
764 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
765 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
766 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
767 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
768 setOperationAction(ISD::SUBC, MVT::Other, Expand);
769 setOperationAction(ISD::ADDE, MVT::Other, Expand);
770 setOperationAction(ISD::ADDC, MVT::Other, Expand);
771 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
772 setOperationAction(ISD::BR_CC, MVT::Other, Custom);
773 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
774 setOperationAction(ISD::BRIND, MVT::Other, Expand);
775 setOperationAction(ISD::SETCC, MVT::Other, Custom);
776 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
777 setOperationAction(ISD::FDIV, MVT::f32, Custom);
778 setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
779 setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
780
781 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
782 // Use the default implementation.
783 setOperationAction(ISD::VAARG , MVT::Other, Expand);
784 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
785 setOperationAction(ISD::VAEND , MVT::Other, Expand);
786 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
787 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
788 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
789 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
790 setOperationAction(ISD::Constant , MVT::i32 , Legal);
791 setOperationAction(ISD::TRAP , MVT::Other , Legal);
792
793 setStackPointerRegisterToSaveRestore(AMDIL::SP);
794 setSchedulingPreference(Sched::RegPressure);
795 setPow2DivIsCheap(false);
796 setPrefLoopAlignment(16);
797 setSelectIsExpensive(true);
798 setJumpIsExpensive(true);
799
800 maxStoresPerMemcpy = 4096;
801 maxStoresPerMemmove = 4096;
802 maxStoresPerMemset = 4096;
803
804 #undef numTypes
805 #undef numIntTypes
806 #undef numVectorTypes
807 #undef numFloatTypes
808 }
809
810 const char *
811 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
812 {
813 switch (Opcode) {
814 default: return 0;
815 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
816 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
817 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
818 case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
819 case AMDILISD::CMOV: return "AMDILISD::CMOV";
820 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
821 case AMDILISD::INEGATE: return "AMDILISD::INEGATE";
822 case AMDILISD::MAD: return "AMDILISD::MAD";
823 case AMDILISD::UMAD: return "AMDILISD::UMAD";
824 case AMDILISD::CALL: return "AMDILISD::CALL";
825 case AMDILISD::RET: return "AMDILISD::RET";
826 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
827 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
828 case AMDILISD::ADD: return "AMDILISD::ADD";
829 case AMDILISD::UMUL: return "AMDILISD::UMUL";
830 case AMDILISD::AND: return "AMDILISD::AND";
831 case AMDILISD::OR: return "AMDILISD::OR";
832 case AMDILISD::NOT: return "AMDILISD::NOT";
833 case AMDILISD::XOR: return "AMDILISD::XOR";
834 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
835 case AMDILISD::SMAX: return "AMDILISD::SMAX";
836 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
837 case AMDILISD::MOVE: return "AMDILISD::MOVE";
838 case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
839 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
840 case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
841 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
842 case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
843 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
844 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
845 case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
846 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
847 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
848 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
849 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
850 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
851 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
852 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
853 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
854 case AMDILISD::CMP: return "AMDILISD::CMP";
855 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
856 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
857 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
858 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
859 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
860 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
861 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
862 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
863 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
864 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
865 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
866 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
867 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
868 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
869 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
870 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
871 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
872 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
873 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
874 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
875 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
876 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
877 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
878 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
879 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
880 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
881 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
882 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
883 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
884 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
885 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
886 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
887 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
888 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
889 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
890 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
891 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
892 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
893 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
894 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
895 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
896 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
897 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
898 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
899 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
900 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
901 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
902 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
903 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
904 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
905 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
906 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
907 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
908 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
909 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
910 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
911 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
912 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
913 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
914 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
915 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
916 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
917 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
918 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
919 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
920 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
921 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
922 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
923 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
924 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
925 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
926 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
927 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
928 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
929 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
930 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
931 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
932 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
933 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
934 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
935 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
936 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
937 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
938 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
939 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
940 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
941 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
942 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
943 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
944 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
945 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
946 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
947 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
948 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
949 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
950 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
951 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
952 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
953 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
954 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
955 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
956 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
957 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
958 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
959 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
960 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
961 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
962 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
963 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
964
965 };
966 }
967 bool
968 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
969 const CallInst &I, unsigned Intrinsic) const
970 {
971 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
972 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
973 return false;
974 }
975 bool bitCastToInt = false;
976 unsigned IntNo;
977 bool isRet = true;
978 const AMDILSubtarget *STM = &this->getTargetMachine()
979 .getSubtarget<AMDILSubtarget>();
980 switch (Intrinsic) {
981 default: return false; // Don't custom lower most intrinsics.
982 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
983 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
984 IntNo = AMDILISD::ATOM_G_ADD; break;
985 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
986 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
987 isRet = false;
988 IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
989 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
990 case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
991 IntNo = AMDILISD::ATOM_L_ADD; break;
992 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
993 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
994 isRet = false;
995 IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
996 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
997 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
998 IntNo = AMDILISD::ATOM_R_ADD; break;
999 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1000 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1001 isRet = false;
1002 IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1003 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1004 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1005 IntNo = AMDILISD::ATOM_G_AND; break;
1006 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1007 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1008 isRet = false;
1009 IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1010 case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1011 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1012 IntNo = AMDILISD::ATOM_L_AND; break;
1013 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1014 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1015 isRet = false;
1016 IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1017 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1018 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1019 IntNo = AMDILISD::ATOM_R_AND; break;
1020 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1021 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1022 isRet = false;
1023 IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1024 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1025 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1026 IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1027 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1028 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1029 isRet = false;
1030 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1031 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1032 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1033 IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1034 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1035 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1036 isRet = false;
1037 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1038 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1039 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1040 IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1041 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1042 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1043 isRet = false;
1044 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1045 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1046 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1047 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1048 IntNo = AMDILISD::ATOM_G_DEC;
1049 } else {
1050 IntNo = AMDILISD::ATOM_G_SUB;
1051 }
1052 break;
1053 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1054 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1055 isRet = false;
1056 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1057 IntNo = AMDILISD::ATOM_G_DEC_NORET;
1058 } else {
1059 IntNo = AMDILISD::ATOM_G_SUB_NORET;
1060 }
1061 break;
1062 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1063 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1064 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1065 IntNo = AMDILISD::ATOM_L_DEC;
1066 } else {
1067 IntNo = AMDILISD::ATOM_L_SUB;
1068 }
1069 break;
1070 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1071 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1072 isRet = false;
1073 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1074 IntNo = AMDILISD::ATOM_L_DEC_NORET;
1075 } else {
1076 IntNo = AMDILISD::ATOM_L_SUB_NORET;
1077 }
1078 break;
1079 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1080 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1081 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1082 IntNo = AMDILISD::ATOM_R_DEC;
1083 } else {
1084 IntNo = AMDILISD::ATOM_R_SUB;
1085 }
1086 break;
1087 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1088 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1089 isRet = false;
1090 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1091 IntNo = AMDILISD::ATOM_R_DEC_NORET;
1092 } else {
1093 IntNo = AMDILISD::ATOM_R_SUB_NORET;
1094 }
1095 break;
1096 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1097 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1098 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1099 IntNo = AMDILISD::ATOM_G_INC;
1100 } else {
1101 IntNo = AMDILISD::ATOM_G_ADD;
1102 }
1103 break;
1104 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1105 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1106 isRet = false;
1107 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1108 IntNo = AMDILISD::ATOM_G_INC_NORET;
1109 } else {
1110 IntNo = AMDILISD::ATOM_G_ADD_NORET;
1111 }
1112 break;
1113 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1114 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1115 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1116 IntNo = AMDILISD::ATOM_L_INC;
1117 } else {
1118 IntNo = AMDILISD::ATOM_L_ADD;
1119 }
1120 break;
1121 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1122 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1123 isRet = false;
1124 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1125 IntNo = AMDILISD::ATOM_L_INC_NORET;
1126 } else {
1127 IntNo = AMDILISD::ATOM_L_ADD_NORET;
1128 }
1129 break;
1130 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1131 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1132 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1133 IntNo = AMDILISD::ATOM_R_INC;
1134 } else {
1135 IntNo = AMDILISD::ATOM_R_ADD;
1136 }
1137 break;
1138 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1139 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1140 isRet = false;
1141 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1142 IntNo = AMDILISD::ATOM_R_INC_NORET;
1143 } else {
1144 IntNo = AMDILISD::ATOM_R_ADD_NORET;
1145 }
1146 break;
1147 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1148 IntNo = AMDILISD::ATOM_G_MAX; break;
1149 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1150 IntNo = AMDILISD::ATOM_G_UMAX; break;
1151 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1152 isRet = false;
1153 IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1154 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1155 isRet = false;
1156 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1157 case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1158 IntNo = AMDILISD::ATOM_L_MAX; break;
1159 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1160 IntNo = AMDILISD::ATOM_L_UMAX; break;
1161 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1162 isRet = false;
1163 IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1164 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1165 isRet = false;
1166 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1167 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1168 IntNo = AMDILISD::ATOM_R_MAX; break;
1169 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1170 IntNo = AMDILISD::ATOM_R_UMAX; break;
1171 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1172 isRet = false;
1173 IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1174 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1175 isRet = false;
1176 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1177 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1178 IntNo = AMDILISD::ATOM_G_MIN; break;
1179 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1180 IntNo = AMDILISD::ATOM_G_UMIN; break;
1181 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1182 isRet = false;
1183 IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1184 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1185 isRet = false;
1186 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1187 case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1188 IntNo = AMDILISD::ATOM_L_MIN; break;
1189 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1190 IntNo = AMDILISD::ATOM_L_UMIN; break;
1191 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1192 isRet = false;
1193 IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1194 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1195 isRet = false;
1196 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1197 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1198 IntNo = AMDILISD::ATOM_R_MIN; break;
1199 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1200 IntNo = AMDILISD::ATOM_R_UMIN; break;
1201 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1202 isRet = false;
1203 IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1204 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1205 isRet = false;
1206 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1207 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1208 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1209 IntNo = AMDILISD::ATOM_G_OR; break;
1210 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1211 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1212 isRet = false;
1213 IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1214 case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1215 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1216 IntNo = AMDILISD::ATOM_L_OR; break;
1217 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1218 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1219 isRet = false;
1220 IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1221 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1222 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1223 IntNo = AMDILISD::ATOM_R_OR; break;
1224 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1225 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1226 isRet = false;
1227 IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1228 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1229 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1230 IntNo = AMDILISD::ATOM_G_SUB; break;
1231 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1232 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1233 isRet = false;
1234 IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1235 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1236 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1237 IntNo = AMDILISD::ATOM_L_SUB; break;
1238 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1239 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1240 isRet = false;
1241 IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1242 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1243 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1244 IntNo = AMDILISD::ATOM_R_SUB; break;
1245 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1246 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1247 isRet = false;
1248 IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1249 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1250 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1251 IntNo = AMDILISD::ATOM_G_RSUB; break;
1252 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1253 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1254 isRet = false;
1255 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1256 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1257 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1258 IntNo = AMDILISD::ATOM_L_RSUB; break;
1259 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1260 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1261 isRet = false;
1262 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1263 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1264 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1265 IntNo = AMDILISD::ATOM_R_RSUB; break;
1266 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1267 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1268 isRet = false;
1269 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1270 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1271 bitCastToInt = true;
1272 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1273 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1274 IntNo = AMDILISD::ATOM_G_XCHG; break;
1275 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1276 bitCastToInt = true;
1277 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1278 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1279 isRet = false;
1280 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1281 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1282 bitCastToInt = true;
1283 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1284 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1285 IntNo = AMDILISD::ATOM_L_XCHG; break;
1286 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1287 bitCastToInt = true;
1288 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1289 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1290 isRet = false;
1291 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1292 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1293 bitCastToInt = true;
1294 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1295 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1296 IntNo = AMDILISD::ATOM_R_XCHG; break;
1297 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1298 bitCastToInt = true;
1299 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1300 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1301 isRet = false;
1302 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1303 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1304 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1305 IntNo = AMDILISD::ATOM_G_XOR; break;
1306 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1307 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1308 isRet = false;
1309 IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1310 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1311 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1312 IntNo = AMDILISD::ATOM_L_XOR; break;
1313 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1314 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1315 isRet = false;
1316 IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1317 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1318 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1319 IntNo = AMDILISD::ATOM_R_XOR; break;
1320 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1321 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1322 isRet = false;
1323 IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1324 case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1325 IntNo = AMDILISD::APPEND_ALLOC; break;
1326 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1327 isRet = false;
1328 IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1329 case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1330 IntNo = AMDILISD::APPEND_CONSUME; break;
1331 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1332 isRet = false;
1333 IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1334 };
1335
1336 Info.opc = IntNo;
1337 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1338 Info.ptrVal = I.getOperand(0);
1339 Info.offset = 0;
1340 Info.align = 4;
1341 Info.vol = true;
1342 Info.readMem = isRet;
1343 Info.writeMem = true;
1344 return true;
1345 }
1346 // The backend supports 32 and 64 bit floating point immediates
1347 bool
1348 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1349 {
1350 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1351 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1352 return true;
1353 } else {
1354 return false;
1355 }
1356 }
1357
1358 bool
1359 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1360 {
1361 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1362 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1363 return false;
1364 } else {
1365 return true;
1366 }
1367 }
1368
1369
1370 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1371 // be zero. Op is expected to be a target specific node. Used by DAG
1372 // combiner.
1373
1374 void
1375 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1376 const SDValue Op,
1377 APInt &KnownZero,
1378 APInt &KnownOne,
1379 const SelectionDAG &DAG,
1380 unsigned Depth) const
1381 {
1382 APInt KnownZero2;
1383 APInt KnownOne2;
1384 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1385 switch (Op.getOpcode()) {
1386 default: break;
1387 case AMDILISD::SELECT_CC:
1388 DAG.ComputeMaskedBits(
1389 Op.getOperand(1),
1390 KnownZero,
1391 KnownOne,
1392 Depth + 1
1393 );
1394 DAG.ComputeMaskedBits(
1395 Op.getOperand(0),
1396 KnownZero2,
1397 KnownOne2
1398 );
1399 assert((KnownZero & KnownOne) == 0
1400 && "Bits known to be one AND zero?");
1401 assert((KnownZero2 & KnownOne2) == 0
1402 && "Bits known to be one AND zero?");
1403 // Only known if known in both the LHS and RHS
1404 KnownOne &= KnownOne2;
1405 KnownZero &= KnownZero2;
1406 break;
1407 };
1408 }
1409
1410 // This is the function that determines which calling convention should
1411 // be used. Currently there is only one calling convention
1412 CCAssignFn*
1413 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1414 {
1415 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1416 return CC_AMDIL32;
1417 }
1418
1419 // LowerCallResult - Lower the result values of an ISD::CALL into the
1420 // appropriate copies out of appropriate physical registers. This assumes that
1421 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1422 // being lowered. The returns a SDNode with the same number of values as the
1423 // ISD::CALL.
1424 SDValue
1425 AMDILTargetLowering::LowerCallResult(
1426 SDValue Chain,
1427 SDValue InFlag,
1428 CallingConv::ID CallConv,
1429 bool isVarArg,
1430 const SmallVectorImpl<ISD::InputArg> &Ins,
1431 DebugLoc dl,
1432 SelectionDAG &DAG,
1433 SmallVectorImpl<SDValue> &InVals) const
1434 {
1435 // Assign locations to each value returned by this call
1436 SmallVector<CCValAssign, 16> RVLocs;
1437 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1438 getTargetMachine(), RVLocs, *DAG.getContext());
1439 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
1440
1441 // Copy all of the result registers out of their specified physreg.
1442 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1443 EVT CopyVT = RVLocs[i].getValVT();
1444 if (RVLocs[i].isRegLoc()) {
1445 Chain = DAG.getCopyFromReg(
1446 Chain,
1447 dl,
1448 RVLocs[i].getLocReg(),
1449 CopyVT,
1450 InFlag
1451 ).getValue(1);
1452 SDValue Val = Chain.getValue(0);
1453 InFlag = Chain.getValue(2);
1454 InVals.push_back(Val);
1455 }
1456 }
1457
1458 return Chain;
1459
1460 }
1461
1462 //===----------------------------------------------------------------------===//
1463 // Other Lowering Hooks
1464 //===----------------------------------------------------------------------===//
1465
1466 // Recursively assign SDNodeOrdering to any unordered nodes
1467 // This is necessary to maintain source ordering of instructions
1468 // under -O0 to avoid odd-looking "skipping around" issues.
1469 static const SDValue
1470 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
1471 {
1472 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
1473 DAG.AssignOrdering( New.getNode(), order );
1474 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
1475 Ordered( DAG, order, New.getOperand(i) );
1476 }
1477 return New;
1478 }
1479
1480 #define LOWER(A) \
1481 case ISD:: A: \
1482 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1483
1484 SDValue
1485 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
1486 {
1487 switch (Op.getOpcode()) {
1488 default:
1489 Op.getNode()->dump();
1490 assert(0 && "Custom lowering code for this"
1491 "instruction is not implemented yet!");
1492 break;
1493 LOWER(GlobalAddress);
1494 LOWER(JumpTable);
1495 LOWER(ConstantPool);
1496 LOWER(ExternalSymbol);
1497 LOWER(FP_TO_UINT);
1498 LOWER(UINT_TO_FP);
1499 LOWER(MUL);
1500 LOWER(SUB);
1501 LOWER(FDIV);
1502 LOWER(SDIV);
1503 LOWER(SREM);
1504 LOWER(UREM);
1505 LOWER(BUILD_VECTOR);
1506 LOWER(INSERT_VECTOR_ELT);
1507 LOWER(EXTRACT_VECTOR_ELT);
1508 LOWER(EXTRACT_SUBVECTOR);
1509 LOWER(SCALAR_TO_VECTOR);
1510 LOWER(CONCAT_VECTORS);
1511 LOWER(SELECT);
1512 LOWER(SETCC);
1513 LOWER(SIGN_EXTEND_INREG);
1514 LOWER(DYNAMIC_STACKALLOC);
1515 LOWER(BRCOND);
1516 LOWER(BR_CC);
1517 LOWER(FP_ROUND);
1518 }
1519 return Op;
1520 }
1521
1522 int
1523 AMDILTargetLowering::getVarArgsFrameOffset() const
1524 {
1525 return VarArgsFrameOffset;
1526 }
1527 #undef LOWER
1528
1529 SDValue
1530 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
1531 {
1532 SDValue DST = Op;
1533 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
1534 const GlobalValue *G = GADN->getGlobal();
1535 DebugLoc DL = Op.getDebugLoc();
1536 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
1537 if (!GV) {
1538 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1539 } else {
1540 if (GV->hasInitializer()) {
1541 const Constant *C = dyn_cast<Constant>(GV->getInitializer());
1542 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
1543 DST = DAG.getConstant(CI->getValue(), Op.getValueType());
1544 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
1545 DST = DAG.getConstantFP(CF->getValueAPF(),
1546 Op.getValueType());
1547 } else if (dyn_cast<ConstantAggregateZero>(C)) {
1548 EVT VT = Op.getValueType();
1549 if (VT.isInteger()) {
1550 DST = DAG.getConstant(0, VT);
1551 } else {
1552 DST = DAG.getConstantFP(0, VT);
1553 }
1554 } else {
1555 assert(!"lowering this type of Global Address "
1556 "not implemented yet!");
1557 C->dump();
1558 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1559 }
1560 } else {
1561 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1562 }
1563 }
1564 return DST;
1565 }
1566
1567 SDValue
1568 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
1569 {
1570 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1571 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
1572 return Result;
1573 }
1574 SDValue
1575 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
1576 {
1577 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1578 EVT PtrVT = Op.getValueType();
1579 SDValue Result;
1580 if (CP->isMachineConstantPoolEntry()) {
1581 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1582 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1583 } else {
1584 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1585 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1586 }
1587 return Result;
1588 }
1589
1590 SDValue
1591 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
1592 {
1593 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
1594 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
1595 return Result;
1596 }
1597
1598 /// LowerFORMAL_ARGUMENTS - transform physical registers into
1599 /// virtual registers and generate load operations for
1600 /// arguments places on the stack.
1601 /// TODO: isVarArg, hasStructRet, isMemReg
1602 SDValue
1603 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
1604 CallingConv::ID CallConv,
1605 bool isVarArg,
1606 const SmallVectorImpl<ISD::InputArg> &Ins,
1607 DebugLoc dl,
1608 SelectionDAG &DAG,
1609 SmallVectorImpl<SDValue> &InVals)
1610 const
1611 {
1612
1613 MachineFunction &MF = DAG.getMachineFunction();
1614 MachineFrameInfo *MFI = MF.getFrameInfo();
1615 //const Function *Fn = MF.getFunction();
1616 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1617
1618 SmallVector<CCValAssign, 16> ArgLocs;
1619 CallingConv::ID CC = MF.getFunction()->getCallingConv();
1620 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1621
1622 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
1623 getTargetMachine(), ArgLocs, *DAG.getContext());
1624
1625 // When more calling conventions are added, they need to be chosen here
1626 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
1627 SDValue StackPtr;
1628
1629 //unsigned int FirstStackArgLoc = 0;
1630
1631 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1632 CCValAssign &VA = ArgLocs[i];
1633 if (VA.isRegLoc()) {
1634 EVT RegVT = VA.getLocVT();
1635 const TargetRegisterClass *RC = getRegClassFromType(
1636 RegVT.getSimpleVT().SimpleTy);
1637
1638 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
1639 SDValue ArgValue = DAG.getCopyFromReg(
1640 Chain,
1641 dl,
1642 Reg,
1643 RegVT);
1644 // If this is an 8 or 16-bit value, it is really passed
1645 // promoted to 32 bits. Insert an assert[sz]ext to capture
1646 // this, then truncate to the right size.
1647
1648 if (VA.getLocInfo() == CCValAssign::SExt) {
1649 ArgValue = DAG.getNode(
1650 ISD::AssertSext,
1651 dl,
1652 RegVT,
1653 ArgValue,
1654 DAG.getValueType(VA.getValVT()));
1655 } else if (VA.getLocInfo() == CCValAssign::ZExt) {
1656 ArgValue = DAG.getNode(
1657 ISD::AssertZext,
1658 dl,
1659 RegVT,
1660 ArgValue,
1661 DAG.getValueType(VA.getValVT()));
1662 }
1663 if (VA.getLocInfo() != CCValAssign::Full) {
1664 ArgValue = DAG.getNode(
1665 ISD::TRUNCATE,
1666 dl,
1667 VA.getValVT(),
1668 ArgValue);
1669 }
1670 // Add the value to the list of arguments
1671 // to be passed in registers
1672 InVals.push_back(ArgValue);
1673 if (isVarArg) {
1674 assert(0 && "Variable arguments are not yet supported");
1675 // See MipsISelLowering.cpp for ideas on how to implement
1676 }
1677 } else if(VA.isMemLoc()) {
1678 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1679 dl, DAG, VA, MFI, i));
1680 } else {
1681 assert(0 && "found a Value Assign that is "
1682 "neither a register or a memory location");
1683 }
1684 }
1685 /*if (hasStructRet) {
1686 assert(0 && "Has struct return is not yet implemented");
1687 // See MipsISelLowering.cpp for ideas on how to implement
1688 }*/
1689
1690 if (isVarArg) {
1691 assert(0 && "Variable arguments are not yet supported");
1692 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1693 }
1694 // This needs to be changed to non-zero if the return function needs
1695 // to pop bytes
1696 return Chain;
1697 }
1698 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1699 /// by "Src" to address "Dst" with size and alignment information specified by
1700 /// the specific parameter attribute. The copy will be passed as a byval
1701 /// function parameter.
1702 static SDValue
1703 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1704 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1705 assert(0 && "MemCopy does not exist yet");
1706 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1707
1708 return DAG.getMemcpy(Chain,
1709 Src.getDebugLoc(),
1710 Dst, Src, SizeNode, Flags.getByValAlign(),
1711 /*IsVol=*/false, /*AlwaysInline=*/true,
1712 MachinePointerInfo(), MachinePointerInfo());
1713 }
1714
1715 SDValue
1716 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1717 SDValue StackPtr, SDValue Arg,
1718 DebugLoc dl, SelectionDAG &DAG,
1719 const CCValAssign &VA,
1720 ISD::ArgFlagsTy Flags) const
1721 {
1722 unsigned int LocMemOffset = VA.getLocMemOffset();
1723 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1724 PtrOff = DAG.getNode(ISD::ADD,
1725 dl,
1726 getPointerTy(), StackPtr, PtrOff);
1727 if (Flags.isByVal()) {
1728 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1729 } else {
1730 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1731 MachinePointerInfo::getStack(LocMemOffset),
1732 false, false, 0);
1733 }
1734 return PtrOff;
1735 }
1736 /// LowerCAL - functions arguments are copied from virtual
1737 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1738 /// CALLSEQ_END are emitted.
1739 /// TODO: isVarArg, isTailCall, hasStructRet
1740 SDValue
1741 AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1742 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1743 bool& isTailCall,
1744 const SmallVectorImpl<ISD::OutputArg> &Outs,
1745 const SmallVectorImpl<SDValue> &OutVals,
1746 const SmallVectorImpl<ISD::InputArg> &Ins,
1747 DebugLoc dl, SelectionDAG &DAG,
1748 SmallVectorImpl<SDValue> &InVals)
1749 const
1750 {
1751 isTailCall = false;
1752 MachineFunction& MF = DAG.getMachineFunction();
1753 // FIXME: DO we need to handle fast calling conventions and tail call
1754 // optimizations?? X86/PPC ISelLowering
1755 /*bool hasStructRet = (TheCall->getNumArgs())
1756 ? TheCall->getArgFlags(0).device()->isSRet()
1757 : false;*/
1758
1759 MachineFrameInfo *MFI = MF.getFrameInfo();
1760
1761 // Analyze operands of the call, assigning locations to each operand
1762 SmallVector<CCValAssign, 16> ArgLocs;
1763 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1764 getTargetMachine(), ArgLocs, *DAG.getContext());
1765 // Analyize the calling operands, but need to change
1766 // if we have more than one calling convetion
1767 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1768
1769 unsigned int NumBytes = CCInfo.getNextStackOffset();
1770 if (isTailCall) {
1771 assert(isTailCall && "Tail Call not handled yet!");
1772 // See X86/PPC ISelLowering
1773 }
1774
1775 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1776
1777 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1778 SmallVector<SDValue, 8> MemOpChains;
1779 SDValue StackPtr;
1780 //unsigned int FirstStacArgLoc = 0;
1781 //int LastArgStackLoc = 0;
1782
1783 // Walk the register/memloc assignments, insert copies/loads
1784 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1785 CCValAssign &VA = ArgLocs[i];
1786 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1787 // Arguments start after the 5 first operands of ISD::CALL
1788 SDValue Arg = OutVals[i];
1789 //Promote the value if needed
1790 switch(VA.getLocInfo()) {
1791 default: assert(0 && "Unknown loc info!");
1792 case CCValAssign::Full:
1793 break;
1794 case CCValAssign::SExt:
1795 Arg = DAG.getNode(ISD::SIGN_EXTEND,
1796 dl,
1797 VA.getLocVT(), Arg);
1798 break;
1799 case CCValAssign::ZExt:
1800 Arg = DAG.getNode(ISD::ZERO_EXTEND,
1801 dl,
1802 VA.getLocVT(), Arg);
1803 break;
1804 case CCValAssign::AExt:
1805 Arg = DAG.getNode(ISD::ANY_EXTEND,
1806 dl,
1807 VA.getLocVT(), Arg);
1808 break;
1809 }
1810
1811 if (VA.isRegLoc()) {
1812 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1813 } else if (VA.isMemLoc()) {
1814 // Create the frame index object for this incoming parameter
1815 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1816 VA.getLocMemOffset(), true);
1817 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1818
1819 // emit ISD::STORE whichs stores the
1820 // parameter value to a stack Location
1821 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1822 MachinePointerInfo::getFixedStack(FI),
1823 false, false, 0));
1824 } else {
1825 assert(0 && "Not a Reg/Mem Loc, major error!");
1826 }
1827 }
1828 if (!MemOpChains.empty()) {
1829 Chain = DAG.getNode(ISD::TokenFactor,
1830 dl,
1831 MVT::Other,
1832 &MemOpChains[0],
1833 MemOpChains.size());
1834 }
1835 SDValue InFlag;
1836 if (!isTailCall) {
1837 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1838 Chain = DAG.getCopyToReg(Chain,
1839 dl,
1840 RegsToPass[i].first,
1841 RegsToPass[i].second,
1842 InFlag);
1843 InFlag = Chain.getValue(1);
1844 }
1845 }
1846
1847 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1848 // every direct call is) turn it into a TargetGlobalAddress/
1849 // TargetExternalSymbol
1850 // node so that legalize doesn't hack it.
1851 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1852 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1853 }
1854 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1855 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1856 }
1857 else if (isTailCall) {
1858 assert(0 && "Tail calls are not handled yet");
1859 // see X86 ISelLowering for ideas on implementation: 1708
1860 }
1861
1862 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1863 SmallVector<SDValue, 8> Ops;
1864
1865 if (isTailCall) {
1866 assert(0 && "Tail calls are not handled yet");
1867 // see X86 ISelLowering for ideas on implementation: 1721
1868 }
1869 // If this is a direct call, pass the chain and the callee
1870 if (Callee.getNode()) {
1871 Ops.push_back(Chain);
1872 Ops.push_back(Callee);
1873 }
1874
1875 if (isTailCall) {
1876 assert(0 && "Tail calls are not handled yet");
1877 // see X86 ISelLowering for ideas on implementation: 1739
1878 }
1879
1880 // Add argument registers to the end of the list so that they are known
1881 // live into the call
1882 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1883 Ops.push_back(DAG.getRegister(
1884 RegsToPass[i].first,
1885 RegsToPass[i].second.getValueType()));
1886 }
1887 if (InFlag.getNode()) {
1888 Ops.push_back(InFlag);
1889 }
1890
1891 // Emit Tail Call
1892 if (isTailCall) {
1893 assert(0 && "Tail calls are not handled yet");
1894 // see X86 ISelLowering for ideas on implementation: 1762
1895 }
1896
1897 Chain = DAG.getNode(AMDILISD::CALL,
1898 dl,
1899 NodeTys, &Ops[0], Ops.size());
1900 InFlag = Chain.getValue(1);
1901
1902 // Create the CALLSEQ_END node
1903 Chain = DAG.getCALLSEQ_END(
1904 Chain,
1905 DAG.getIntPtrConstant(NumBytes, true),
1906 DAG.getIntPtrConstant(0, true),
1907 InFlag);
1908 InFlag = Chain.getValue(1);
1909 // Handle result values, copying them out of physregs into vregs that
1910 // we return
1911 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1912 InVals);
1913 }
1914
1915 SDValue
1916 AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
1917 uint32_t bits) const
1918 {
1919 DebugLoc DL = Op.getDebugLoc();
1920 EVT INTTY = Op.getValueType();
1921 EVT FPTY;
1922 if (INTTY.isVector()) {
1923 FPTY = EVT(MVT::getVectorVT(MVT::f32,
1924 INTTY.getVectorNumElements()));
1925 } else {
1926 FPTY = EVT(MVT::f32);
1927 }
1928 /* static inline uint
1929 __clz_Nbit(uint x)
1930 {
1931 int xor = 0x3f800000U | x;
1932 float tp = as_float(xor);
1933 float t = tp + -1.0f;
1934 uint tint = as_uint(t);
1935 int cmp = (x != 0);
1936 uint tsrc = tint >> 23;
1937 uint tmask = tsrc & 0xffU;
1938 uint cst = (103 + N)U - tmask;
1939 return cmp ? cst : N;
1940 }
1941 */
1942 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
1943 && "genCLZu16 only works on 32bit types");
1944 // uint x = Op
1945 SDValue x = Op;
1946 // xornode = 0x3f800000 | x
1947 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
1948 DAG.getConstant(0x3f800000, INTTY), x);
1949 // float tp = as_float(xornode)
1950 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
1951 // float t = tp + -1.0f
1952 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
1953 DAG.getConstantFP(-1.0f, FPTY));
1954 // uint tint = as_uint(t)
1955 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
1956 // int cmp = (x != 0)
1957 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
1958 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
1959 DAG.getConstant(0, INTTY));
1960 // uint tsrc = tint >> 23
1961 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
1962 DAG.getConstant(23, INTTY));
1963 // uint tmask = tsrc & 0xFF
1964 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
1965 DAG.getConstant(0xFFU, INTTY));
1966 // uint cst = (103 + bits) - tmask
1967 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
1968 DAG.getConstant((103U + bits), INTTY), tmask);
1969 // return cmp ? cst : N
1970 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
1971 DAG.getConstant(bits, INTTY));
1972 return cst;
1973 }
1974
1975 SDValue
1976 AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
1977 {
1978 SDValue DST = SDValue();
1979 DebugLoc DL = Op.getDebugLoc();
1980 EVT INTTY = Op.getValueType();
1981 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
1982 if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
1983 //__clz_32bit(uint u)
1984 //{
1985 // int z = __amdil_ffb_hi(u) ;
1986 // return z < 0 ? 32 : z;
1987 // }
1988 // uint u = op
1989 SDValue u = Op;
1990 // int z = __amdil_ffb_hi(u)
1991 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
1992 // int cmp = z < 0
1993 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
1994 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1995 z, DAG.getConstant(0, INTTY));
1996 // return cmp ? 32 : z
1997 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
1998 DAG.getConstant(32, INTTY), z);
1999 } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2000 // static inline uint
2001 //__clz_32bit(uint x)
2002 //{
2003 // uint zh = __clz_16bit(x >> 16);
2004 // uint zl = __clz_16bit(x & 0xffffU);
2005 // return zh == 16U ? 16U + zl : zh;
2006 //}
2007 // uint x = Op
2008 SDValue x = Op;
2009 // uint xs16 = x >> 16
2010 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2011 DAG.getConstant(16, INTTY));
2012 // uint zh = __clz_16bit(xs16)
2013 SDValue zh = genCLZuN(xs16, DAG, 16);
2014 // uint xa16 = x & 0xFFFF
2015 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2016 DAG.getConstant(0xFFFFU, INTTY));
2017 // uint zl = __clz_16bit(xa16)
2018 SDValue zl = genCLZuN(xa16, DAG, 16);
2019 // uint cmp = zh == 16U
2020 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2021 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2022 zh, DAG.getConstant(16U, INTTY));
2023 // uint zl16 = zl + 16
2024 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2025 DAG.getConstant(16, INTTY), zl);
2026 // return cmp ? zl16 : zh
2027 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2028 cmp, zl16, zh);
2029 } else {
2030 assert(0 && "Attempting to generate a CLZ function with an"
2031 " unknown graphics card");
2032 }
2033 return DST;
2034 }
2035 SDValue
2036 AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2037 {
2038 SDValue DST = SDValue();
2039 DebugLoc DL = Op.getDebugLoc();
2040 EVT INTTY;
2041 EVT LONGTY = Op.getValueType();
2042 bool isVec = LONGTY.isVector();
2043 if (isVec) {
2044 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2045 .getVectorNumElements()));
2046 } else {
2047 INTTY = EVT(MVT::i32);
2048 }
2049 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2050 if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2051 // Evergreen:
2052 // static inline uint
2053 // __clz_u64(ulong x)
2054 // {
2055 //uint zhi = __clz_32bit((uint)(x >> 32));
2056 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2057 //return zhi == 32U ? 32U + zlo : zhi;
2058 //}
2059 //ulong x = op
2060 SDValue x = Op;
2061 // uint xhi = x >> 32
2062 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2063 // uint xlo = x & 0xFFFFFFFF
2064 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2065 // uint zhi = __clz_32bit(xhi)
2066 SDValue zhi = genCLZu32(xhi, DAG);
2067 // uint zlo = __clz_32bit(xlo)
2068 SDValue zlo = genCLZu32(xlo, DAG);
2069 // uint cmp = zhi == 32
2070 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2071 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2072 zhi, DAG.getConstant(32U, INTTY));
2073 // uint zlop32 = 32 + zlo
2074 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2075 DAG.getConstant(32U, INTTY), zlo);
2076 // return cmp ? zlop32: zhi
2077 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2078 } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2079 // HD4XXX:
2080 // static inline uint
2081 //__clz_64bit(ulong x)
2082 //{
2083 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2084 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2085 //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2086 //uint r = zh == 18U ? 18U + zm : zh;
2087 //return zh + zm == 41U ? 41U + zl : r;
2088 //}
2089 //ulong x = Op
2090 SDValue x = Op;
2091 // ulong xs46 = x >> 46
2092 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2093 DAG.getConstant(46, LONGTY));
2094 // uint ixs46 = (uint)xs46
2095 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2096 // ulong xs23 = x >> 23
2097 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2098 DAG.getConstant(23, LONGTY));
2099 // uint ixs23 = (uint)xs23
2100 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2101 // uint xs23m23 = ixs23 & 0x7FFFFF
2102 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2103 DAG.getConstant(0x7fffffU, INTTY));
2104 // uint ix = (uint)x
2105 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2106 // uint xm23 = ix & 0x7FFFFF
2107 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2108 DAG.getConstant(0x7fffffU, INTTY));
2109 // uint zh = __clz_23bit(ixs46)
2110 SDValue zh = genCLZuN(ixs46, DAG, 23);
2111 // uint zm = __clz_23bit(xs23m23)
2112 SDValue zm = genCLZuN(xs23m23, DAG, 23);
2113 // uint zl = __clz_23bit(xm23)
2114 SDValue zl = genCLZuN(xm23, DAG, 23);
2115 // uint zhm5 = zh - 5
2116 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2117 DAG.getConstant(-5U, INTTY));
2118 SDValue const18 = DAG.getConstant(18, INTTY);
2119 SDValue const41 = DAG.getConstant(41, INTTY);
2120 // uint cmp1 = zh = 18
2121 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2122 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2123 zhm5, const18);
2124 // uint zhm5zm = zhm5 + zh
2125 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2126 // uint cmp2 = zhm5zm == 41
2127 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2128 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2129 zhm5zm, const41);
2130 // uint zmp18 = zhm5 + 18
2131 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2132 // uint zlp41 = zl + 41
2133 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2134 // uint r = cmp1 ? zmp18 : zh
2135 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2136 cmp1, zmp18, zhm5);
2137 // return cmp2 ? zlp41 : r
2138 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2139 } else {
2140 assert(0 && "Attempting to generate a CLZ function with an"
2141 " unknown graphics card");
2142 }
2143 return DST;
2144 }
2145 SDValue
2146 AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2147 bool includeSign) const
2148 {
2149 EVT INTVT;
2150 EVT LONGVT;
2151 SDValue DST;
2152 DebugLoc DL = RHS.getDebugLoc();
2153 EVT RHSVT = RHS.getValueType();
2154 bool isVec = RHSVT.isVector();
2155 if (isVec) {
2156 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2157 .getVectorNumElements()));
2158 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2159 .getVectorNumElements()));
2160 } else {
2161 LONGVT = EVT(MVT::i64);
2162 INTVT = EVT(MVT::i32);
2163 }
2164 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2165 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2166 // unsigned version:
2167 // uint uhi = (uint)(d * 0x1.0p-32);
2168 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2169 // return as_ulong2((uint2)(ulo, uhi));
2170 //
2171 // signed version:
2172 // double ad = fabs(d);
2173 // long l = unsigned_version(ad);
2174 // long nl = -l;
2175 // return d == ad ? l : nl;
2176 SDValue d = RHS;
2177 if (includeSign) {
2178 d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2179 }
2180 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2181 DAG.getConstantFP(0x2f800000, RHSVT));
2182 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2183 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2184 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2185 DAG.getConstantFP(0xcf800000, RHSVT), d);
2186 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2187 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2188 if (includeSign) {
2189 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2190 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2191 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2192 RHS, d);
2193 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
2194 }
2195 DST = l;
2196 } else {
2197 /*
2198 __attribute__((always_inline)) long
2199 cast_f64_to_i64(double d)
2200 {
2201 // Convert d in to 32-bit components
2202 long x = as_long(d);
2203 xhi = LCOMPHI(x);
2204 xlo = LCOMPLO(x);
2205
2206 // Generate 'normalized' mantissa
2207 mhi = xhi | 0x00100000; // hidden bit
2208 mhi <<= 11;
2209 temp = xlo >> (32 - 11);
2210 mhi |= temp
2211 mlo = xlo << 11;
2212
2213 // Compute shift right count from exponent
2214 e = (xhi >> (52-32)) & 0x7ff;
2215 sr = 1023 + 63 - e;
2216 srge64 = sr >= 64;
2217 srge32 = sr >= 32;
2218
2219 // Compute result for 0 <= sr < 32
2220 rhi0 = mhi >> (sr &31);
2221 rlo0 = mlo >> (sr &31);
2222 temp = mhi << (32 - sr);
2223 temp |= rlo0;
2224 rlo0 = sr ? temp : rlo0;
2225
2226 // Compute result for 32 <= sr
2227 rhi1 = 0;
2228 rlo1 = srge64 ? 0 : rhi0;
2229
2230 // Pick between the 2 results
2231 rhi = srge32 ? rhi1 : rhi0;
2232 rlo = srge32 ? rlo1 : rlo0;
2233
2234 // Optional saturate on overflow
2235 srlt0 = sr < 0;
2236 rhi = srlt0 ? MAXVALUE : rhi;
2237 rlo = srlt0 ? MAXVALUE : rlo;
2238
2239 // Create long
2240 res = LCREATE( rlo, rhi );
2241
2242 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2243 if (includeSign) {
2244 sign = ((signed int) xhi) >> 31; fill with sign bit
2245 sign = LCREATE( sign, sign );
2246 res += sign;
2247 res ^= sign;
2248 }
2249
2250 return res;
2251 }
2252 */
2253 SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2254 SDValue c32 = DAG.getConstant( 32, INTVT );
2255
2256 // Convert d in to 32-bit components
2257 SDValue d = RHS;
2258 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2259 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2260 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2261
2262 // Generate 'normalized' mantissa
2263 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2264 xhi, DAG.getConstant( 0x00100000, INTVT ) );
2265 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2266 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2267 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2268 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2269 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
2270
2271 // Compute shift right count from exponent
2272 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2273 xhi, DAG.getConstant( 52-32, INTVT ) );
2274 e = DAG.getNode( ISD::AND, DL, INTVT,
2275 e, DAG.getConstant( 0x7ff, INTVT ) );
2276 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2277 DAG.getConstant( 1023 + 63, INTVT ), e );
2278 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2279 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2280 sr, DAG.getConstant(64, INTVT));
2281 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2282 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2283 sr, DAG.getConstant(32, INTVT));
2284
2285 // Compute result for 0 <= sr < 32
2286 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2287 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
2288 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
2289 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
2290 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
2291 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
2292
2293 // Compute result for 32 <= sr
2294 SDValue rhi1 = DAG.getConstant( 0, INTVT );
2295 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2296 srge64, rhi1, rhi0 );
2297
2298 // Pick between the 2 results
2299 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2300 srge32, rhi1, rhi0 );
2301 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2302 srge32, rlo1, rlo0 );
2303
2304 // Create long
2305 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2306
2307 // Deal with sign bit
2308 if (includeSign) {
2309 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2310 xhi, DAG.getConstant( 31, INTVT ) );
2311 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
2312 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
2313 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
2314 }
2315 DST = res;
2316 }
2317 return DST;
2318 }
2319 SDValue
2320 AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
2321 bool includeSign) const
2322 {
2323 EVT INTVT;
2324 EVT LONGVT;
2325 DebugLoc DL = RHS.getDebugLoc();
2326 EVT RHSVT = RHS.getValueType();
2327 bool isVec = RHSVT.isVector();
2328 if (isVec) {
2329 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2330 RHSVT.getVectorNumElements()));
2331 INTVT = EVT(MVT::getVectorVT(MVT::i32,
2332 RHSVT.getVectorNumElements()));
2333 } else {
2334 LONGVT = EVT(MVT::i64);
2335 INTVT = EVT(MVT::i32);
2336 }
2337 /*
2338 __attribute__((always_inline)) int
2339 cast_f64_to_[u|i]32(double d)
2340 {
2341 // Convert d in to 32-bit components
2342 long x = as_long(d);
2343 xhi = LCOMPHI(x);
2344 xlo = LCOMPLO(x);
2345
2346 // Generate 'normalized' mantissa
2347 mhi = xhi | 0x00100000; // hidden bit
2348 mhi <<= 11;
2349 temp = xlo >> (32 - 11);
2350 mhi |= temp
2351
2352 // Compute shift right count from exponent
2353 e = (xhi >> (52-32)) & 0x7ff;
2354 sr = 1023 + 31 - e;
2355 srge32 = sr >= 32;
2356
2357 // Compute result for 0 <= sr < 32
2358 res = mhi >> (sr &31);
2359 res = srge32 ? 0 : res;
2360
2361 // Optional saturate on overflow
2362 srlt0 = sr < 0;
2363 res = srlt0 ? MAXVALUE : res;
2364
2365 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2366 if (includeSign) {
2367 sign = ((signed int) xhi) >> 31; fill with sign bit
2368 res += sign;
2369 res ^= sign;
2370 }
2371
2372 return res;
2373 }
2374 */
2375 SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2376
2377 // Convert d in to 32-bit components
2378 SDValue d = RHS;
2379 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2380 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2381 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2382
2383 // Generate 'normalized' mantissa
2384 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2385 xhi, DAG.getConstant( 0x00100000, INTVT ) );
2386 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2387 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2388 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2389 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2390
2391 // Compute shift right count from exponent
2392 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2393 xhi, DAG.getConstant( 52-32, INTVT ) );
2394 e = DAG.getNode( ISD::AND, DL, INTVT,
2395 e, DAG.getConstant( 0x7ff, INTVT ) );
2396 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2397 DAG.getConstant( 1023 + 31, INTVT ), e );
2398 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2399 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2400 sr, DAG.getConstant(32, INTVT));
2401
2402 // Compute result for 0 <= sr < 32
2403 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2404 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2405 srge32, DAG.getConstant(0,INTVT), res );
2406
2407 // Deal with sign bit
2408 if (includeSign) {
2409 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2410 xhi, DAG.getConstant( 31, INTVT ) );
2411 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
2412 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
2413 }
2414 return res;
2415 }
2416
2417 SDValue
2418 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
2419 {
2420 SDValue DST;
2421 SDValue RHS = Op.getOperand(0);
2422 EVT RHSVT = RHS.getValueType();
2423 MVT RST = RHSVT.getScalarType().getSimpleVT();
2424 EVT LHSVT = Op.getValueType();
2425 MVT LST = LHSVT.getScalarType().getSimpleVT();
2426 DebugLoc DL = Op.getDebugLoc();
2427 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2428 if (RST == MVT::f64 && RHSVT.isVector()
2429 && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2430 // We dont support vector 64bit floating point convertions.
2431 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
2432 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2433 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2434 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
2435 if (!x) {
2436 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2437 } else {
2438 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
2439 DST, op, DAG.getTargetConstant(x, MVT::i32));
2440 }
2441
2442 }
2443 } else {
2444 if (RST == MVT::f64
2445 && LST == MVT::i32) {
2446 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2447 DST = SDValue(Op.getNode(), 0);
2448 } else {
2449 DST = genf64toi32(RHS, DAG, false);
2450 }
2451 } else if (RST == MVT::f64
2452 && LST == MVT::i64) {
2453 DST = genf64toi64(RHS, DAG, false);
2454 } else if (RST == MVT::f64
2455 && (LST == MVT::i8 || LST == MVT::i16)) {
2456 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2457 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
2458 } else {
2459 SDValue ToInt = genf64toi32(RHS, DAG, false);
2460 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
2461 }
2462
2463 } else {
2464 DST = SDValue(Op.getNode(), 0);
2465 }
2466 }
2467 return DST;
2468 }
2469 SDValue
2470 AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
2471 SelectionDAG &DAG) const
2472 {
2473 EVT RHSVT = RHS.getValueType();
2474 DebugLoc DL = RHS.getDebugLoc();
2475 EVT INTVT;
2476 EVT LONGVT;
2477 bool isVec = RHSVT.isVector();
2478 if (isVec) {
2479 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2480 RHSVT.getVectorNumElements()));
2481 INTVT = EVT(MVT::getVectorVT(MVT::i32,
2482 RHSVT.getVectorNumElements()));
2483 } else {
2484 LONGVT = EVT(MVT::i64);
2485 INTVT = EVT(MVT::i32);
2486 }
2487 SDValue x = RHS;
2488 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2489 if (STM.calVersion() >= CAL_VERSION_SC_135) {
2490 // unsigned x = RHS;
2491 // ulong xd = (ulong)(0x4330_0000 << 32) | x;
2492 // double d = as_double( xd );
2493 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
2494 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
2495 DAG.getConstant( 0x43300000, INTVT ) );
2496 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2497 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
2498 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
2499 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
2500 } else {
2501 SDValue clz = genCLZu32(x, DAG);
2502
2503 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
2504 // Except for an input 0... which requires a 0 exponent
2505 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2506 DAG.getConstant( (1023+31), INTVT), clz );
2507 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
2508
2509 // Normalize frac
2510 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
2511
2512 // Eliminate hidden bit
2513 rhi = DAG.getNode( ISD::AND, DL, INTVT,
2514 rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2515
2516 // Pack exponent and frac
2517 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
2518 rhi, DAG.getConstant( (32 - 11), INTVT ) );
2519 rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2520 rhi, DAG.getConstant( 11, INTVT ) );
2521 exp = DAG.getNode( ISD::SHL, DL, INTVT,
2522 exp, DAG.getConstant( 20, INTVT ) );
2523 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2524
2525 // Convert 2 x 32 in to 1 x 64, then to double precision float type
2526 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2527 return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2528 }
2529 }
2530 SDValue
2531 AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
2532 SelectionDAG &DAG) const
2533 {
2534 EVT RHSVT = RHS.getValueType();
2535 DebugLoc DL = RHS.getDebugLoc();
2536 EVT INTVT;
2537 EVT LONGVT;
2538 bool isVec = RHSVT.isVector();
2539 if (isVec) {
2540 INTVT = EVT(MVT::getVectorVT(MVT::i32,
2541 RHSVT.getVectorNumElements()));
2542 } else {
2543 INTVT = EVT(MVT::i32);
2544 }
2545 LONGVT = RHSVT;
2546 SDValue x = RHS;
2547 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2548 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2549 // double dhi = (double)(as_uint2(x).y);
2550 // double dlo = (double)(as_uint2(x).x);
2551 // return mad(dhi, 0x1.0p+32, dlo)
2552 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
2553 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
2554 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
2555 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
2556 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
2557 DAG.getConstantFP(0x4f800000, LHSVT), dlo);
2558 } else if (STM.calVersion() >= CAL_VERSION_SC_135) {
2559 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
2560 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
2561 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
2562 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
2563 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
2564 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2565 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
2566 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
2567 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
2568 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
2569 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
2570 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
2571 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
2572
2573 } else {
2574 SDValue clz = genCLZu64(x, DAG);
2575 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2576 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2577
2578 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
2579 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2580 DAG.getConstant( (1023+63), INTVT), clz );
2581 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
2582 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2583 mash, exp, mash ); // exp = exp, or 0 if input was 0
2584
2585 // Normalize frac
2586 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
2587 clz, DAG.getConstant( 31, INTVT ) );
2588 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
2589 DAG.getConstant( 32, INTVT ), clz31 );
2590 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
2591 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
2592 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
2593 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
2594 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2595 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2596 SDValue rlo2 = DAG.getConstant( 0, INTVT );
2597 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
2598 clz, DAG.getConstant( 32, INTVT ) );
2599 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2600 clz32, rhi2, rhi1 );
2601 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2602 clz32, rlo2, rlo1 );
2603
2604 // Eliminate hidden bit
2605 rhi = DAG.getNode( ISD::AND, DL, INTVT,
2606 rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2607
2608 // Save bits needed to round properly
2609 SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
2610 rlo, DAG.getConstant( 0x7ff, INTVT ) );
2611
2612 // Pack exponent and frac
2613 rlo = DAG.getNode( ISD::SRL, DL, INTVT,
2614 rlo, DAG.getConstant( 11, INTVT ) );
2615 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
2616 rhi, DAG.getConstant( (32 - 11), INTVT ) );
2617 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
2618 rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2619 rhi, DAG.getConstant( 11, INTVT ) );
2620 exp = DAG.getNode( ISD::SHL, DL, INTVT,
2621 exp, DAG.getConstant( 20, INTVT ) );
2622 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2623
2624 // Compute rounding bit
2625 SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
2626 rlo, DAG.getConstant( 1, INTVT ) );
2627 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
2628 round, DAG.getConstant( 0x3ff, INTVT ) );
2629 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2630 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
2631 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
2632 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
2633 round = DAG.getNode( ISD::SRL, DL, INTVT,
2634 round, DAG.getConstant( 10, INTVT ) );
2635 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
2636
2637 // Add rounding bit
2638 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
2639 round, DAG.getConstant( 0, INTVT ) );
2640 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2641 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
2642 return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2643 }
2644 }
2645 SDValue
2646 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
2647 {
2648 SDValue RHS = Op.getOperand(0);
2649 EVT RHSVT = RHS.getValueType();
2650 MVT RST = RHSVT.getScalarType().getSimpleVT();
2651 EVT LHSVT = Op.getValueType();
2652 MVT LST = LHSVT.getScalarType().getSimpleVT();
2653 DebugLoc DL = Op.getDebugLoc();
2654 SDValue DST;
2655 EVT INTVT;
2656 EVT LONGVT;
2657 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2658 if (LST == MVT::f64 && LHSVT.isVector()
2659 && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2660 // We dont support vector 64bit floating point convertions.
2661 DST = Op;
2662 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
2663 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2664 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2665 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
2666 if (!x) {
2667 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2668 } else {
2669 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
2670 op, DAG.getTargetConstant(x, MVT::i32));
2671 }
2672
2673 }
2674 } else {
2675
2676 if (RST == MVT::i32
2677 && LST == MVT::f64) {
2678 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2679 DST = SDValue(Op.getNode(), 0);
2680 } else {
2681 DST = genu32tof64(RHS, LHSVT, DAG);
2682 }
2683 } else if (RST == MVT::i64
2684 && LST == MVT::f64) {
2685 DST = genu64tof64(RHS, LHSVT, DAG);
2686 } else {
2687 DST = SDValue(Op.getNode(), 0);
2688 }
2689 }
2690 return DST;
2691 }
2692
2693 SDValue
2694 AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
2695 {
2696 SDValue LHS = Op.getOperand(0);
2697 SDValue RHS = Op.getOperand(1);
2698 DebugLoc DL = Op.getDebugLoc();
2699 EVT OVT = Op.getValueType();
2700 SDValue DST;
2701 bool isVec = RHS.getValueType().isVector();
2702 if (OVT.getScalarType() == MVT::i64) {
2703 MVT INTTY = MVT::i32;
2704 if (OVT == MVT::v2i64) {
2705 INTTY = MVT::v2i32;
2706 }
2707 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2708 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2709 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2710 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2711 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2712 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2713 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
2714 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
2715 //TODO: need to use IBORROW on HD5XXX and later hardware
2716 SDValue cmp;
2717 if (OVT == MVT::i64) {
2718 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2719 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2720 LHSLO, RHSLO);
2721 } else {
2722 SDValue cmplo;
2723 SDValue cmphi;
2724 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2725 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
2726 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2727 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
2728 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2729 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
2730 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2731 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
2732 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
2733 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2734 LHSRLO, RHSRLO);
2735 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
2736 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2737 LHSRHI, RHSRHI);
2738 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
2739 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
2740 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
2741 }
2742 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2743 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2744 INTLO, INTHI);
2745 } else {
2746 DST = SDValue(Op.getNode(), 0);
2747 }
2748 return DST;
2749 }
2750 SDValue
2751 AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
2752 {
2753 EVT OVT = Op.getValueType();
2754 SDValue DST;
2755 if (OVT.getScalarType() == MVT::f64) {
2756 DST = LowerFDIV64(Op, DAG);
2757 } else if (OVT.getScalarType() == MVT::f32) {
2758 DST = LowerFDIV32(Op, DAG);
2759 } else {
2760 DST = SDValue(Op.getNode(), 0);
2761 }
2762 return DST;
2763 }
2764
2765 SDValue
2766 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
2767 {
2768 EVT OVT = Op.getValueType();
2769 SDValue DST;
2770 if (OVT.getScalarType() == MVT::i64) {
2771 DST = LowerSDIV64(Op, DAG);
2772 } else if (OVT.getScalarType() == MVT::i32) {
2773 DST = LowerSDIV32(Op, DAG);
2774 } else if (OVT.getScalarType() == MVT::i16
2775 || OVT.getScalarType() == MVT::i8) {
2776 DST = LowerSDIV24(Op, DAG);
2777 } else {
2778 DST = SDValue(Op.getNode(), 0);
2779 }
2780 return DST;
2781 }
2782
2783 SDValue
2784 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
2785 {
2786 EVT OVT = Op.getValueType();
2787 SDValue DST;
2788 if (OVT.getScalarType() == MVT::i64) {
2789 DST = LowerSREM64(Op, DAG);
2790 } else if (OVT.getScalarType() == MVT::i32) {
2791 DST = LowerSREM32(Op, DAG);
2792 } else if (OVT.getScalarType() == MVT::i16) {
2793 DST = LowerSREM16(Op, DAG);
2794 } else if (OVT.getScalarType() == MVT::i8) {
2795 DST = LowerSREM8(Op, DAG);
2796 } else {
2797 DST = SDValue(Op.getNode(), 0);
2798 }
2799 return DST;
2800 }
2801
2802 SDValue
2803 AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
2804 {
2805 EVT OVT = Op.getValueType();
2806 SDValue DST;
2807 if (OVT.getScalarType() == MVT::i64) {
2808 DST = LowerUREM64(Op, DAG);
2809 } else if (OVT.getScalarType() == MVT::i32) {
2810 DST = LowerUREM32(Op, DAG);
2811 } else if (OVT.getScalarType() == MVT::i16) {
2812 DST = LowerUREM16(Op, DAG);
2813 } else if (OVT.getScalarType() == MVT::i8) {
2814 DST = LowerUREM8(Op, DAG);
2815 } else {
2816 DST = SDValue(Op.getNode(), 0);
2817 }
2818 return DST;
2819 }
2820
2821 SDValue
2822 AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
2823 {
2824 DebugLoc DL = Op.getDebugLoc();
2825 EVT OVT = Op.getValueType();
2826 SDValue DST;
2827 bool isVec = OVT.isVector();
2828 if (OVT.getScalarType() != MVT::i64)
2829 {
2830 DST = SDValue(Op.getNode(), 0);
2831 } else {
2832 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
2833 // TODO: This needs to be turned into a tablegen pattern
2834 SDValue LHS = Op.getOperand(0);
2835 SDValue RHS = Op.getOperand(1);
2836
2837 MVT INTTY = MVT::i32;
2838 if (OVT == MVT::v2i64) {
2839 INTTY = MVT::v2i32;
2840 }
2841 // mul64(h1, l1, h0, l0)
2842 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
2843 DL,
2844 INTTY, LHS);
2845 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
2846 DL,
2847 INTTY, LHS);
2848 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
2849 DL,
2850 INTTY, RHS);
2851 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
2852 DL,
2853 INTTY, RHS);
2854 // MULLO_UINT_1 r1, h0, l1
2855 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
2856 DL,
2857 INTTY, RHSHI, LHSLO);
2858 // MULLO_UINT_1 r2, h1, l0
2859 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
2860 DL,
2861 INTTY, RHSLO, LHSHI);
2862 // ADD_INT hr, r1, r2
2863 SDValue ADDHI = DAG.getNode(ISD::ADD,
2864 DL,
2865 INTTY, RHILLO, RLOHHI);
2866 // MULHI_UINT_1 r3, l1, l0
2867 SDValue RLOLLO = DAG.getNode(ISD::MULHU,
2868 DL,
2869 INTTY, RHSLO, LHSLO);
2870 // ADD_INT hr, hr, r3
2871 SDValue HIGH = DAG.getNode(ISD::ADD,
2872 DL,
2873 INTTY, ADDHI, RLOLLO);
2874 // MULLO_UINT_1 l3, l1, l0
2875 SDValue LOW = DAG.getNode(AMDILISD::UMUL,
2876 DL,
2877 INTTY, LHSLO, RHSLO);
2878 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
2879 DL,
2880 OVT, LOW, HIGH);
2881 }
2882 return DST;
2883 }
2884 SDValue
2885 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
2886 {
2887 EVT VT = Op.getValueType();
2888 SDValue Nodes1;
2889 SDValue second;
2890 SDValue third;
2891 SDValue fourth;
2892 DebugLoc DL = Op.getDebugLoc();
2893 Nodes1 = DAG.getNode(AMDILISD::VBUILD,
2894 DL,
2895 VT, Op.getOperand(0));
2896 #if 0
2897 bool allEqual = true;
2898 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
2899 if (Op.getOperand(0) != Op.getOperand(x)) {
2900 allEqual = false;
2901 break;
2902 }
2903 }
2904 if (allEqual) {
2905 return Nodes1;
2906 }
2907 #endif
2908 switch(Op.getNumOperands()) {
2909 default:
2910 case 1:
2911 break;
2912 case 4:
2913 fourth = Op.getOperand(3);
2914 if (fourth.getOpcode() != ISD::UNDEF) {
2915 Nodes1 = DAG.getNode(
2916 ISD::INSERT_VECTOR_ELT,
2917 DL,
2918 Op.getValueType(),
2919 Nodes1,
2920 fourth,
2921 DAG.getConstant(7, MVT::i32));
2922 }
2923 case 3:
2924 third = Op.getOperand(2);
2925 if (third.getOpcode() != ISD::UNDEF) {
2926 Nodes1 = DAG.getNode(
2927 ISD::INSERT_VECTOR_ELT,
2928 DL,
2929 Op.getValueType(),
2930 Nodes1,
2931 third,
2932 DAG.getConstant(6, MVT::i32));
2933 }
2934 case 2:
2935 second = Op.getOperand(1);
2936 if (second.getOpcode() != ISD::UNDEF) {
2937 Nodes1 = DAG.getNode(
2938 ISD::INSERT_VECTOR_ELT,
2939 DL,
2940 Op.getValueType(),
2941 Nodes1,
2942 second,
2943 DAG.getConstant(5, MVT::i32));
2944 }
2945 break;
2946 };
2947 return Nodes1;
2948 }
2949
2950 SDValue
2951 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2952 SelectionDAG &DAG) const
2953 {
2954 DebugLoc DL = Op.getDebugLoc();
2955 EVT VT = Op.getValueType();
2956 const SDValue *ptr = NULL;
2957 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2958 uint32_t swizzleNum = 0;
2959 SDValue DST;
2960 if (!VT.isVector()) {
2961 SDValue Res = Op.getOperand(0);
2962 return Res;
2963 }
2964
2965 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
2966 ptr = &Op.getOperand(1);
2967 } else {
2968 ptr = &Op.getOperand(0);
2969 }
2970 if (CSDN) {
2971 swizzleNum = (uint32_t)CSDN->getZExtValue();
2972 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
2973 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
2974 DST = DAG.getNode(AMDILISD::VINSERT,
2975 DL,
2976 VT,
2977 Op.getOperand(0),
2978 *ptr,
2979 DAG.getTargetConstant(mask2, MVT::i32),
2980 DAG.getTargetConstant(mask3, MVT::i32));
2981 } else {
2982 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
2983 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
2984 SDValue res = DAG.getNode(AMDILISD::VINSERT,
2985 DL, VT, Op.getOperand(0), *ptr,
2986 DAG.getTargetConstant(mask2, MVT::i32),
2987 DAG.getTargetConstant(mask3, MVT::i32));
2988 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
2989 mask2 = 0x04030201 & ~(0xFF << (x * 8));
2990 mask3 = 0x01010101 & (0xFF << (x * 8));
2991 SDValue t = DAG.getNode(AMDILISD::VINSERT,
2992 DL, VT, Op.getOperand(0), *ptr,
2993 DAG.getTargetConstant(mask2, MVT::i32),
2994 DAG.getTargetConstant(mask3, MVT::i32));
2995 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
2996 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
2997 Op.getOperand(2), DAG.getConstant(x, MVT::i32));
2998 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
2999 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3000 }
3001 DST = res;
3002 }
3003 return DST;
3004 }
3005
3006 SDValue
3007 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3008 SelectionDAG &DAG) const
3009 {
3010 EVT VT = Op.getValueType();
3011 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3012 uint64_t swizzleNum = 0;
3013 DebugLoc DL = Op.getDebugLoc();
3014 SDValue Res;
3015 if (!Op.getOperand(0).getValueType().isVector()) {
3016 Res = Op.getOperand(0);
3017 return Res;
3018 }
3019 if (CSDN) {
3020 // Static vector extraction
3021 swizzleNum = CSDN->getZExtValue() + 1;
3022 Res = DAG.getNode(AMDILISD::VEXTRACT,
3023 DL, VT,
3024 Op.getOperand(0),
3025 DAG.getTargetConstant(swizzleNum, MVT::i32));
3026 } else {
3027 SDValue Op1 = Op.getOperand(1);
3028 uint32_t vecSize = 4;
3029 SDValue Op0 = Op.getOperand(0);
3030 SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
3031 DL, VT, Op0,
3032 DAG.getTargetConstant(1, MVT::i32));
3033 if (Op0.getValueType().isVector()) {
3034 vecSize = Op0.getValueType().getVectorNumElements();
3035 }
3036 for (uint32_t x = 2; x <= vecSize; ++x) {
3037 SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
3038 DL, VT, Op0,
3039 DAG.getTargetConstant(x, MVT::i32));
3040 SDValue c = DAG.getNode(AMDILISD::CMP,
3041 DL, Op1.getValueType(),
3042 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3043 Op1, DAG.getConstant(x, MVT::i32));
3044 res = DAG.getNode(AMDILISD::CMOVLOG, DL,
3045 VT, c, t, res);
3046
3047 }
3048 Res = res;
3049 }
3050 return Res;
3051 }
3052
3053 SDValue
3054 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3055 SelectionDAG &DAG) const
3056 {
3057 uint32_t vecSize = Op.getValueType().getVectorNumElements();
3058 SDValue src = Op.getOperand(0);
3059 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3060 uint64_t offset = 0;
3061 EVT vecType = Op.getValueType().getVectorElementType();
3062 DebugLoc DL = Op.getDebugLoc();
3063 SDValue Result;
3064 if (CSDN) {
3065 offset = CSDN->getZExtValue();
3066 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3067 DL,vecType, src, DAG.getConstant(offset, MVT::i32));
3068 Result = DAG.getNode(AMDILISD::VBUILD, DL,
3069 Op.getValueType(), Result);
3070 for (uint32_t x = 1; x < vecSize; ++x) {
3071 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3072 src, DAG.getConstant(offset + x, MVT::i32));
3073 if (elt.getOpcode() != ISD::UNDEF) {
3074 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3075 Op.getValueType(), Result, elt,
3076 DAG.getConstant(x, MVT::i32));
3077 }
3078 }
3079 } else {
3080 SDValue idx = Op.getOperand(1);
3081 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3082 DL, vecType, src, idx);
3083 Result = DAG.getNode(AMDILISD::VBUILD, DL,
3084 Op.getValueType(), Result);
3085 for (uint32_t x = 1; x < vecSize; ++x) {
3086 idx = DAG.getNode(ISD::ADD, DL, vecType,
3087 idx, DAG.getConstant(1, MVT::i32));
3088 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3089 src, idx);
3090 if (elt.getOpcode() != ISD::UNDEF) {
3091 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3092 Op.getValueType(), Result, elt, idx);
3093 }
3094 }
3095 }
3096 return Result;
3097 }
3098 SDValue
3099 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
3100 SelectionDAG &DAG) const
3101 {
3102 SDValue Res = DAG.getNode(AMDILISD::VBUILD,
3103 Op.getDebugLoc(),
3104 Op.getValueType(),
3105 Op.getOperand(0));
3106 return Res;
3107 }
3108 SDValue
3109 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
3110 {
3111 SDValue Cond = Op.getOperand(0);
3112 SDValue LHS = Op.getOperand(1);
3113 SDValue RHS = Op.getOperand(2);
3114 DebugLoc DL = Op.getDebugLoc();
3115 Cond = getConversionNode(DAG, Cond, Op, true);
3116 Cond = DAG.getNode(AMDILISD::CMOVLOG,
3117 DL,
3118 Op.getValueType(), Cond, LHS, RHS);
3119 return Cond;
3120 }
3121 SDValue
3122 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
3123 {
3124 SDValue Cond;
3125 SDValue LHS = Op.getOperand(0);
3126 SDValue RHS = Op.getOperand(1);
3127 SDValue CC = Op.getOperand(2);
3128 DebugLoc DL = Op.getDebugLoc();
3129 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3130 unsigned int AMDILCC = CondCCodeToCC(
3131 SetCCOpcode,
3132 LHS.getValueType().getSimpleVT().SimpleTy);
3133 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
3134 Cond = DAG.getNode(
3135 ISD::SELECT_CC,
3136 Op.getDebugLoc(),
3137 LHS.getValueType(),
3138 LHS, RHS,
3139 DAG.getConstant(-1, MVT::i32),
3140 DAG.getConstant(0, MVT::i32),
3141 CC);
3142 Cond = getConversionNode(DAG, Cond, Op, true);
3143 Cond = DAG.getNode(
3144 ISD::AND,
3145 DL,
3146 Cond.getValueType(),
3147 DAG.getConstant(1, Cond.getValueType()),
3148 Cond);
3149 return Cond;
3150 }
3151
3152 SDValue
3153 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
3154 {
3155 SDValue Data = Op.getOperand(0);
3156 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
3157 DebugLoc DL = Op.getDebugLoc();
3158 EVT DVT = Data.getValueType();
3159 EVT BVT = BaseType->getVT();
3160 unsigned baseBits = BVT.getScalarType().getSizeInBits();
3161 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
3162 unsigned shiftBits = srcBits - baseBits;
3163 if (srcBits < 32) {
3164 // If the op is less than 32 bits, then it needs to extend to 32bits
3165 // so it can properly keep the upper bits valid.
3166 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
3167 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
3168 shiftBits = 32 - baseBits;
3169 DVT = IVT;
3170 }
3171 SDValue Shift = DAG.getConstant(shiftBits, DVT);
3172 // Shift left by 'Shift' bits.
3173 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
3174 // Signed shift Right by 'Shift' bits.
3175 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
3176 if (srcBits < 32) {
3177 // Once the sign extension is done, the op needs to be converted to
3178 // its original type.
3179 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
3180 }
3181 return Data;
3182 }
3183 EVT
3184 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
3185 {
3186 int iSize = (size * numEle);
3187 int vEle = (iSize >> ((size == 64) ? 6 : 5));
3188 if (!vEle) {
3189 vEle = 1;
3190 }
3191 if (size == 64) {
3192 if (vEle == 1) {
3193 return EVT(MVT::i64);
3194 } else {
3195 return EVT(MVT::getVectorVT(MVT::i64, vEle));
3196 }
3197 } else {
3198 if (vEle == 1) {
3199 return EVT(MVT::i32);
3200 } else {
3201 return EVT(MVT::getVectorVT(MVT::i32, vEle));
3202 }
3203 }
3204 }
3205
3206 SDValue
3207 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
3208 SelectionDAG &DAG) const
3209 {
3210 SDValue Chain = Op.getOperand(0);
3211 SDValue Size = Op.getOperand(1);
3212 unsigned int SPReg = AMDIL::SP;
3213 DebugLoc DL = Op.getDebugLoc();
3214 SDValue SP = DAG.getCopyFromReg(Chain,
3215 DL,
3216 SPReg, MVT::i32);
3217 SDValue NewSP = DAG.getNode(ISD::ADD,
3218 DL,
3219 MVT::i32, SP, Size);
3220 Chain = DAG.getCopyToReg(SP.getValue(1),
3221 DL,
3222 SPReg, NewSP);
3223 SDValue Ops[2] = {NewSP, Chain};
3224 Chain = DAG.getMergeValues(Ops, 2 ,DL);
3225 return Chain;
3226 }
3227 SDValue
3228 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
3229 {
3230 SDValue Chain = Op.getOperand(0);
3231 SDValue Cond = Op.getOperand(1);
3232 SDValue Jump = Op.getOperand(2);
3233 SDValue Result;
3234 Result = DAG.getNode(
3235 AMDILISD::BRANCH_COND,
3236 Op.getDebugLoc(),
3237 Op.getValueType(),
3238 Chain, Jump, Cond);
3239 return Result;
3240 }
3241
3242 SDValue
3243 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
3244 {
3245 SDValue Chain = Op.getOperand(0);
3246 SDValue CC = Op.getOperand(1);
3247 SDValue LHS = Op.getOperand(2);
3248 SDValue RHS = Op.getOperand(3);
3249 SDValue JumpT = Op.getOperand(4);
3250 SDValue CmpValue;
3251 SDValue Result;
3252 CmpValue = DAG.getNode(
3253 ISD::SELECT_CC,
3254 Op.getDebugLoc(),
3255 LHS.getValueType(),
3256 LHS, RHS,
3257 DAG.getConstant(-1, MVT::i32),
3258 DAG.getConstant(0, MVT::i32),
3259 CC);
3260 Result = DAG.getNode(
3261 AMDILISD::BRANCH_COND,
3262 CmpValue.getDebugLoc(),
3263 MVT::Other, Chain,
3264 JumpT, CmpValue);
3265 return Result;
3266 }
3267
3268 SDValue
3269 AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
3270 {
3271 SDValue Result = DAG.getNode(
3272 AMDILISD::DP_TO_FP,
3273 Op.getDebugLoc(),
3274 Op.getValueType(),
3275 Op.getOperand(0),
3276 Op.getOperand(1));
3277 return Result;
3278 }
3279
3280 SDValue
3281 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
3282 {
3283 SDValue Result = DAG.getNode(
3284 AMDILISD::VCONCAT,
3285 Op.getDebugLoc(),
3286 Op.getValueType(),
3287 Op.getOperand(0),
3288 Op.getOperand(1));
3289 return Result;
3290 }
3291 // LowerRET - Lower an ISD::RET node.
3292 SDValue
3293 AMDILTargetLowering::LowerReturn(SDValue Chain,
3294 CallingConv::ID CallConv, bool isVarArg,
3295 const SmallVectorImpl<ISD::OutputArg> &Outs,
3296 const SmallVectorImpl<SDValue> &OutVals,
3297 DebugLoc dl, SelectionDAG &DAG)
3298 const
3299 {
3300 //MachineFunction& MF = DAG.getMachineFunction();
3301 // CCValAssign - represent the assignment of the return value
3302 // to a location
3303 SmallVector<CCValAssign, 16> RVLocs;
3304
3305 // CCState - Info about the registers and stack slot
3306 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3307 getTargetMachine(), RVLocs, *DAG.getContext());
3308
3309 // Analyze return values of ISD::RET
3310 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
3311 // If this is the first return lowered for this function, add
3312 // the regs to the liveout set for the function
3313 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
3314 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
3315 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
3316 MRI.addLiveOut(RVLocs[i].getLocReg());
3317 }
3318 }
3319 // FIXME: implement this when tail call is implemented
3320 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
3321 // both x86 and ppc implement this in ISelLowering
3322
3323 // Regular return here
3324 SDValue Flag;
3325 SmallVector<SDValue, 6> RetOps;
3326 RetOps.push_back(Chain);
3327 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
3328 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
3329 CCValAssign &VA = RVLocs[i];
3330 SDValue ValToCopy = OutVals[i];
3331 assert(VA.isRegLoc() && "Can only return in registers!");
3332 // ISD::Ret => ret chain, (regnum1, val1), ...
3333 // So i * 2 + 1 index only the regnums
3334 Chain = DAG.getCopyToReg(Chain,
3335 dl,
3336 VA.getLocReg(),
3337 ValToCopy,
3338 Flag);
3339 // guarantee that all emitted copies are stuck together
3340 // avoiding something bad
3341 Flag = Chain.getValue(1);
3342 }
3343 /*if (MF.getFunction()->hasStructRetAttr()) {
3344 assert(0 && "Struct returns are not yet implemented!");
3345 // Both MIPS and X86 have this
3346 }*/
3347 RetOps[0] = Chain;
3348 if (Flag.getNode())
3349 RetOps.push_back(Flag);
3350
3351 Flag = DAG.getNode(AMDILISD::RET_FLAG,
3352 dl,
3353 MVT::Other, &RetOps[0], RetOps.size());
3354 return Flag;
3355 }
3356
3357 unsigned int
3358 AMDILTargetLowering::getFunctionAlignment(const Function *) const
3359 {
3360 return 0;
3361 }
3362
3363 void
3364 AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
3365 MachineBasicBlock::iterator &BBI,
3366 DebugLoc *DL, const TargetInstrInfo *TII) const
3367 {
3368 mBB = BB;
3369 mBBI = BBI;
3370 mDL = DL;
3371 mTII = TII;
3372 }
3373 uint32_t
3374 AMDILTargetLowering::genVReg(uint32_t regType) const
3375 {
3376 return mBB->getParent()->getRegInfo().createVirtualRegister(
3377 getTargetMachine().getRegisterInfo()->getRegClass(regType));
3378 }
3379
3380 MachineInstrBuilder
3381 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
3382 {
3383 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
3384 }
3385
3386 MachineInstrBuilder
3387 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3388 uint32_t src1) const
3389 {
3390 return generateMachineInst(opcode, dst).addReg(src1);
3391 }
3392
3393 MachineInstrBuilder
3394 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3395 uint32_t src1, uint32_t src2) const
3396 {
3397 return generateMachineInst(opcode, dst, src1).addReg(src2);
3398 }
3399
3400 MachineInstrBuilder
3401 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3402 uint32_t src1, uint32_t src2, uint32_t src3) const
3403 {
3404 return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
3405 }
3406
3407
3408 SDValue
3409 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
3410 {
3411 DebugLoc DL = Op.getDebugLoc();
3412 EVT OVT = Op.getValueType();
3413 SDValue LHS = Op.getOperand(0);
3414 SDValue RHS = Op.getOperand(1);
3415 MVT INTTY;
3416 MVT FLTTY;
3417 if (!OVT.isVector()) {
3418 INTTY = MVT::i32;
3419 FLTTY = MVT::f32;
3420 } else if (OVT.getVectorNumElements() == 2) {
3421 INTTY = MVT::v2i32;
3422 FLTTY = MVT::v2f32;
3423 } else if (OVT.getVectorNumElements() == 4) {
3424 INTTY = MVT::v4i32;
3425 FLTTY = MVT::v4f32;
3426 }
3427 unsigned bitsize = OVT.getScalarType().getSizeInBits();
3428 // char|short jq = ia ^ ib;
3429 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
3430
3431 // jq = jq >> (bitsize - 2)
3432 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
3433
3434 // jq = jq | 0x1
3435 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
3436
3437 // jq = (int)jq
3438 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
3439
3440 // int ia = (int)LHS;
3441 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
3442
3443 // int ib, (int)RHS;
3444 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
3445
3446 // float fa = (float)ia;
3447 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
3448
3449 // float fb = (float)ib;
3450 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
3451
3452 // float fq = native_divide(fa, fb);
3453 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
3454
3455 // fq = trunc(fq);
3456 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
3457
3458 // float fqneg = -fq;
3459 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
3460
3461 // float fr = mad(fqneg, fb, fa);
3462 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
3463
3464 // int iq = (int)fq;
3465 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
3466
3467 // fr = fabs(fr);
3468 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
3469
3470 // fb = fabs(fb);
3471 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
3472
3473 // int cv = fr >= fb;
3474 SDValue cv;
3475 if (INTTY == MVT::i32) {
3476 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
3477 } else {
3478 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
3479 }
3480 // jq = (cv ? jq : 0);
3481 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
3482 DAG.getConstant(0, OVT));
3483 // dst = iq + jq;
3484 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
3485 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
3486 return iq;
3487 }
3488
3489 SDValue
3490 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
3491 {
3492 DebugLoc DL = Op.getDebugLoc();
3493 EVT OVT = Op.getValueType();
3494 SDValue LHS = Op.getOperand(0);
3495 SDValue RHS = Op.getOperand(1);
3496 // The LowerSDIV32 function generates equivalent to the following IL.
3497 // mov r0, LHS
3498 // mov r1, RHS
3499 // ilt r10, r0, 0
3500 // ilt r11, r1, 0
3501 // iadd r0, r0, r10
3502 // iadd r1, r1, r11
3503 // ixor r0, r0, r10
3504 // ixor r1, r1, r11
3505 // udiv r0, r0, r1
3506 // ixor r10, r10, r11
3507 // iadd r0, r0, r10
3508 // ixor DST, r0, r10
3509
3510 // mov r0, LHS
3511 SDValue r0 = LHS;
3512
3513 // mov r1, RHS
3514 SDValue r1 = RHS;
3515
3516 // ilt r10, r0, 0
3517 SDValue r10 = DAG.getSelectCC(DL,
3518 r0, DAG.getConstant(0, OVT),
3519 DAG.getConstant(-1, MVT::i32),
3520 DAG.getConstant(0, MVT::i32),
3521 ISD::SETLT);
3522
3523 // ilt r11, r1, 0
3524 SDValue r11 = DAG.getSelectCC(DL,
3525 r1, DAG.getConstant(0, OVT),
3526 DAG.getConstant(-1, MVT::i32),
3527 DAG.getConstant(0, MVT::i32),
3528 ISD::SETLT);
3529
3530 // iadd r0, r0, r10
3531 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3532
3533 // iadd r1, r1, r11
3534 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
3535
3536 // ixor r0, r0, r10
3537 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3538
3539 // ixor r1, r1, r11
3540 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
3541
3542 // udiv r0, r0, r1
3543 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
3544
3545 // ixor r10, r10, r11
3546 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
3547
3548 // iadd r0, r0, r10
3549 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3550
3551 // ixor DST, r0, r10
3552 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3553 return DST;
3554 }
3555
3556 SDValue
3557 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
3558 {
3559 return SDValue(Op.getNode(), 0);
3560 }
3561
3562 SDValue
3563 AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
3564 {
3565 DebugLoc DL = Op.getDebugLoc();
3566 EVT OVT = Op.getValueType();
3567 SDValue LHS = Op.getOperand(0);
3568 SDValue RHS = Op.getOperand(1);
3569 MVT INTTY;
3570 MVT FLTTY;
3571 if (!OVT.isVector()) {
3572 INTTY = MVT::i32;
3573 FLTTY = MVT::f32;
3574 } else if (OVT.getVectorNumElements() == 2) {
3575 INTTY = MVT::v2i32;
3576 FLTTY = MVT::v2f32;
3577 } else if (OVT.getVectorNumElements() == 4) {
3578 INTTY = MVT::v4i32;
3579 FLTTY = MVT::v4f32;
3580 }
3581
3582 // The LowerUDIV24 function implements the following CL.
3583 // int ia = (int)LHS
3584 // float fa = (float)ia
3585 // int ib = (int)RHS
3586 // float fb = (float)ib
3587 // float fq = native_divide(fa, fb)
3588 // fq = trunc(fq)
3589 // float t = mad(fq, fb, fb)
3590 // int iq = (int)fq - (t <= fa)
3591 // return (type)iq
3592
3593 // int ia = (int)LHS
3594 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
3595
3596 // float fa = (float)ia
3597 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
3598
3599 // int ib = (int)RHS
3600 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
3601
3602 // float fb = (float)ib
3603 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
3604
3605 // float fq = native_divide(fa, fb)
3606 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
3607
3608 // fq = trunc(fq)
3609 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
3610
3611 // float t = mad(fq, fb, fb)
3612 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
3613
3614 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
3615 SDValue iq;
3616 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
3617 if (INTTY == MVT::i32) {
3618 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
3619 } else {
3620 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
3621 }
3622 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
3623
3624
3625 // return (type)iq
3626 iq = DAG.getZExtOrTrunc(iq, DL, OVT);
3627 return iq;
3628
3629 }
3630
3631 SDValue
3632 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
3633 {
3634 DebugLoc DL = Op.getDebugLoc();
3635 EVT OVT = Op.getValueType();
3636 MVT INTTY = MVT::i32;
3637 if (OVT == MVT::v2i8) {
3638 INTTY = MVT::v2i32;
3639 } else if (OVT == MVT::v4i8) {
3640 INTTY = MVT::v4i32;
3641 }
3642 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
3643 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
3644 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
3645 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
3646 return LHS;
3647 }
3648
3649 SDValue
3650 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
3651 {
3652 DebugLoc DL = Op.getDebugLoc();
3653 EVT OVT = Op.getValueType();
3654 MVT INTTY = MVT::i32;
3655 if (OVT == MVT::v2i16) {
3656 INTTY = MVT::v2i32;
3657 } else if (OVT == MVT::v4i16) {
3658 INTTY = MVT::v4i32;
3659 }
3660 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
3661 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
3662 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
3663 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
3664 return LHS;
3665 }
3666
3667 SDValue
3668 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
3669 {
3670 DebugLoc DL = Op.getDebugLoc();
3671 EVT OVT = Op.getValueType();
3672 SDValue LHS = Op.getOperand(0);
3673 SDValue RHS = Op.getOperand(1);
3674 // The LowerSREM32 function generates equivalent to the following IL.
3675 // mov r0, LHS
3676 // mov r1, RHS
3677 // ilt r10, r0, 0
3678 // ilt r11, r1, 0
3679 // iadd r0, r0, r10
3680 // iadd r1, r1, r11
3681 // ixor r0, r0, r10
3682 // ixor r1, r1, r11
3683 // udiv r20, r0, r1
3684 // umul r20, r20, r1
3685 // sub r0, r0, r20
3686 // iadd r0, r0, r10
3687 // ixor DST, r0, r10
3688
3689 // mov r0, LHS
3690 SDValue r0 = LHS;
3691
3692 // mov r1, RHS
3693 SDValue r1 = RHS;
3694
3695 // ilt r10, r0, 0
3696 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
3697 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
3698 r0, DAG.getConstant(0, OVT));
3699
3700 // ilt r11, r1, 0
3701 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
3702 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
3703 r1, DAG.getConstant(0, OVT));
3704
3705 // iadd r0, r0, r10
3706 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3707
3708 // iadd r1, r1, r11
3709 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
3710
3711 // ixor r0, r0, r10
3712 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3713
3714 // ixor r1, r1, r11
3715 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
3716
3717 // udiv r20, r0, r1
3718 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
3719
3720 // umul r20, r20, r1
3721 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
3722
3723 // sub r0, r0, r20
3724 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
3725
3726 // iadd r0, r0, r10
3727 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3728
3729 // ixor DST, r0, r10
3730 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3731 return DST;
3732 }
3733
3734 SDValue
3735 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
3736 {
3737 return SDValue(Op.getNode(), 0);
3738 }
3739
3740 SDValue
3741 AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
3742 {
3743 DebugLoc DL = Op.getDebugLoc();
3744 EVT OVT = Op.getValueType();
3745 MVT INTTY = MVT::i32;
3746 if (OVT == MVT::v2i8) {
3747 INTTY = MVT::v2i32;
3748 } else if (OVT == MVT::v4i8) {
3749 INTTY = MVT::v4i32;
3750 }
3751 SDValue LHS = Op.getOperand(0);
3752 SDValue RHS = Op.getOperand(1);
3753 // The LowerUREM8 function generates equivalent to the following IL.
3754 // mov r0, as_u32(LHS)
3755 // mov r1, as_u32(RHS)
3756 // and r10, r0, 0xFF
3757 // and r11, r1, 0xFF
3758 // cmov_logical r3, r11, r11, 0x1
3759 // udiv r3, r10, r3
3760 // cmov_logical r3, r11, r3, 0
3761 // umul r3, r3, r11
3762 // sub r3, r10, r3
3763 // and as_u8(DST), r3, 0xFF
3764
3765 // mov r0, as_u32(LHS)
3766 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
3767
3768 // mov r1, as_u32(RHS)
3769 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
3770
3771 // and r10, r0, 0xFF
3772 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
3773 DAG.getConstant(0xFF, INTTY));
3774
3775 // and r11, r1, 0xFF
3776 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
3777 DAG.getConstant(0xFF, INTTY));
3778
3779 // cmov_logical r3, r11, r11, 0x1
3780 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
3781 DAG.getConstant(0x01, INTTY));
3782
3783 // udiv r3, r10, r3
3784 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
3785
3786 // cmov_logical r3, r11, r3, 0
3787 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
3788 DAG.getConstant(0, INTTY));
3789
3790 // umul r3, r3, r11
3791 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
3792
3793 // sub r3, r10, r3
3794 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
3795
3796 // and as_u8(DST), r3, 0xFF
3797 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
3798 DAG.getConstant(0xFF, INTTY));
3799 DST = DAG.getZExtOrTrunc(DST, DL, OVT);
3800 return DST;
3801 }
3802
3803 SDValue
3804 AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
3805 {
3806 DebugLoc DL = Op.getDebugLoc();
3807 EVT OVT = Op.getValueType();
3808 MVT INTTY = MVT::i32;
3809 if (OVT == MVT::v2i16) {
3810 INTTY = MVT::v2i32;
3811 } else if (OVT == MVT::v4i16) {
3812 INTTY = MVT::v4i32;
3813 }
3814 SDValue LHS = Op.getOperand(0);
3815 SDValue RHS = Op.getOperand(1);
3816 // The LowerUREM16 function generatest equivalent to the following IL.
3817 // mov r0, LHS
3818 // mov r1, RHS
3819 // DIV = LowerUDIV16(LHS, RHS)
3820 // and r10, r0, 0xFFFF
3821 // and r11, r1, 0xFFFF
3822 // cmov_logical r3, r11, r11, 0x1
3823 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
3824 // and r3, r3, 0xFFFF
3825 // cmov_logical r3, r11, r3, 0
3826 // umul r3, r3, r11
3827 // sub r3, r10, r3
3828 // and DST, r3, 0xFFFF
3829
3830 // mov r0, LHS
3831 SDValue r0 = LHS;
3832
3833 // mov r1, RHS
3834 SDValue r1 = RHS;
3835
3836 // and r10, r0, 0xFFFF
3837 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
3838 DAG.getConstant(0xFFFF, OVT));
3839
3840 // and r11, r1, 0xFFFF
3841 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
3842 DAG.getConstant(0xFFFF, OVT));
3843
3844 // cmov_logical r3, r11, r11, 0x1
3845 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
3846 DAG.getConstant(0x01, OVT));
3847
3848 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
3849 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
3850 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
3851 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
3852 r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
3853 r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
3854
3855 // and r3, r3, 0xFFFF
3856 r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
3857 DAG.getConstant(0xFFFF, OVT));
3858
3859 // cmov_logical r3, r11, r3, 0
3860 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
3861 DAG.getConstant(0, OVT));
3862 // umul r3, r3, r11
3863 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
3864
3865 // sub r3, r10, r3
3866 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
3867
3868 // and DST, r3, 0xFFFF
3869 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
3870 DAG.getConstant(0xFFFF, OVT));
3871 return DST;
3872 }
3873
3874 SDValue
3875 AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
3876 {
3877 DebugLoc DL = Op.getDebugLoc();
3878 EVT OVT = Op.getValueType();
3879 SDValue LHS = Op.getOperand(0);
3880 SDValue RHS = Op.getOperand(1);
3881 // The LowerUREM32 function generates equivalent to the following IL.
3882 // udiv r20, LHS, RHS
3883 // umul r20, r20, RHS
3884 // sub DST, LHS, r20
3885
3886 // udiv r20, LHS, RHS
3887 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
3888
3889 // umul r20, r20, RHS
3890 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
3891
3892 // sub DST, LHS, r20
3893 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
3894 return DST;
3895 }
3896
3897 SDValue
3898 AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
3899 {
3900 return SDValue(Op.getNode(), 0);
3901 }
3902
3903
3904 SDValue
3905 AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
3906 {
3907 DebugLoc DL = Op.getDebugLoc();
3908 EVT OVT = Op.getValueType();
3909 MVT INTTY = MVT::i32;
3910 if (OVT == MVT::v2f32) {
3911 INTTY = MVT::v2i32;
3912 } else if (OVT == MVT::v4f32) {
3913 INTTY = MVT::v4i32;
3914 }
3915 SDValue LHS = Op.getOperand(0);
3916 SDValue RHS = Op.getOperand(1);
3917 SDValue DST;
3918 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
3919 if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
3920 // TODO: This doesn't work for vector types yet
3921 // The LowerFDIV32 function generates equivalent to the following
3922 // IL:
3923 // mov r20, as_int(LHS)
3924 // mov r21, as_int(RHS)
3925 // and r30, r20, 0x7f800000
3926 // and r31, r20, 0x807FFFFF
3927 // and r32, r21, 0x7f800000
3928 // and r33, r21, 0x807FFFFF
3929 // ieq r40, r30, 0x7F800000
3930 // ieq r41, r31, 0x7F800000
3931 // ieq r42, r32, 0
3932 // ieq r43, r33, 0
3933 // and r50, r20, 0x80000000
3934 // and r51, r21, 0x80000000
3935 // ior r32, r32, 0x3f800000
3936 // ior r33, r33, 0x3f800000
3937 // cmov_logical r32, r42, r50, r32
3938 // cmov_logical r33, r43, r51, r33
3939 // cmov_logical r32, r40, r20, r32
3940 // cmov_logical r33, r41, r21, r33
3941 // ior r50, r40, r41
3942 // ior r51, r42, r43
3943 // ior r50, r50, r51
3944 // inegate r52, r31
3945 // iadd r30, r30, r52
3946 // cmov_logical r30, r50, 0, r30
3947 // div_zeroop(infinity) r21, 1.0, r33
3948 // mul_ieee r20, r32, r21
3949 // and r22, r20, 0x7FFFFFFF
3950 // and r23, r20, 0x80000000
3951 // ishr r60, r22, 0x00000017
3952 // ishr r61, r30, 0x00000017
3953 // iadd r20, r20, r30
3954 // iadd r21, r22, r30
3955 // iadd r60, r60, r61
3956 // ige r42, 0, R60
3957 // ior r41, r23, 0x7F800000
3958 // ige r40, r60, 0x000000FF
3959 // cmov_logical r40, r50, 0, r40
3960 // cmov_logical r20, r42, r23, r20
3961 // cmov_logical DST, r40, r41, r20
3962 // as_float(DST)
3963
3964 // mov r20, as_int(LHS)
3965 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
3966
3967 // mov r21, as_int(RHS)
3968 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
3969
3970 // and r30, r20, 0x7f800000
3971 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
3972 DAG.getConstant(0x7F800000, INTTY));
3973
3974 // and r31, r21, 0x7f800000
3975 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
3976 DAG.getConstant(0x7f800000, INTTY));
3977
3978 // and r32, r20, 0x807FFFFF
3979 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
3980 DAG.getConstant(0x807FFFFF, INTTY));
3981
3982 // and r33, r21, 0x807FFFFF
3983 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
3984 DAG.getConstant(0x807FFFFF, INTTY));
3985
3986 // ieq r40, r30, 0x7F800000
3987 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3988 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
3989 R30, DAG.getConstant(0x7F800000, INTTY));
3990
3991 // ieq r41, r31, 0x7F800000
3992 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3993 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
3994 R31, DAG.getConstant(0x7F800000, INTTY));
3995
3996 // ieq r42, r30, 0
3997 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3998 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
3999 R30, DAG.getConstant(0, INTTY));
4000
4001 // ieq r43, r31, 0
4002 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4003 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4004 R31, DAG.getConstant(0, INTTY));
4005
4006 // and r50, r20, 0x80000000
4007 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4008 DAG.getConstant(0x80000000, INTTY));
4009
4010 // and r51, r21, 0x80000000
4011 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4012 DAG.getConstant(0x80000000, INTTY));
4013
4014 // ior r32, r32, 0x3f800000
4015 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
4016 DAG.getConstant(0x3F800000, INTTY));
4017
4018 // ior r33, r33, 0x3f800000
4019 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
4020 DAG.getConstant(0x3F800000, INTTY));
4021
4022 // cmov_logical r32, r42, r50, r32
4023 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
4024
4025 // cmov_logical r33, r43, r51, r33
4026 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
4027
4028 // cmov_logical r32, r40, r20, r32
4029 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
4030
4031 // cmov_logical r33, r41, r21, r33
4032 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
4033
4034 // ior r50, r40, r41
4035 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
4036
4037 // ior r51, r42, r43
4038 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
4039
4040 // ior r50, r50, r51
4041 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
4042
4043 // inegate r52, r31
4044 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
4045
4046 // iadd r30, r30, r52
4047 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
4048
4049 // cmov_logical r30, r50, 0, r30
4050 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4051 DAG.getConstant(0, INTTY), R30);
4052
4053 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4054 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4055 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4056 DAG.getConstantFP(1.0f, OVT), R33);
4057
4058 // mul_ieee as_int(r20), as_float(r32), r21
4059 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4060 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4061 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4062
4063 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4064 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4065 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4066 DAG.getConstantFP(1.0f, OVT), R33);
4067
4068 // mul_ieee as_int(r20), as_float(r32), r21
4069 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4070 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4071 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4072
4073 // and r22, r20, 0x7FFFFFFF
4074 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4075 DAG.getConstant(0x7FFFFFFF, INTTY));
4076
4077 // and r23, r20, 0x80000000
4078 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4079 DAG.getConstant(0x80000000, INTTY));
4080
4081 // ishr r60, r22, 0x00000017
4082 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
4083 DAG.getConstant(0x00000017, INTTY));
4084
4085 // ishr r61, r30, 0x00000017
4086 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
4087 DAG.getConstant(0x00000017, INTTY));
4088
4089 // iadd r20, r20, r30
4090 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
4091
4092 // iadd r21, r22, r30
4093 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
4094
4095 // iadd r60, r60, r61
4096 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
4097
4098 // ige r42, 0, R60
4099 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4100 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4101 DAG.getConstant(0, INTTY),
4102 R60);
4103
4104 // ior r41, r23, 0x7F800000
4105 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
4106 DAG.getConstant(0x7F800000, INTTY));
4107
4108 // ige r40, r60, 0x000000FF
4109 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4110 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4111 R60,
4112 DAG.getConstant(0x0000000FF, INTTY));
4113
4114 // cmov_logical r40, r50, 0, r40
4115 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4116 DAG.getConstant(0, INTTY),
4117 R40);
4118
4119 // cmov_logical r20, r42, r23, r20
4120 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
4121
4122 // cmov_logical DST, r40, r41, r20
4123 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
4124
4125 // as_float(DST)
4126 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
4127 } else {
4128 // The following sequence of DAG nodes produce the following IL:
4129 // fabs r1, RHS
4130 // lt r2, 0x1.0p+96f, r1
4131 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4132 // mul_ieee r1, RHS, r3
4133 // div_zeroop(infinity) r0, LHS, r1
4134 // mul_ieee DST, r0, r3
4135
4136 // fabs r1, RHS
4137 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
4138 // lt r2, 0x1.0p+96f, r1
4139 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4140 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
4141 DAG.getConstant(0x6f800000, INTTY), r1);
4142 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4143 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
4144 DAG.getConstant(0x2f800000, INTTY),
4145 DAG.getConstant(0x3f800000, INTTY));
4146 // mul_ieee r1, RHS, r3
4147 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
4148 // div_zeroop(infinity) r0, LHS, r1
4149 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
4150 // mul_ieee DST, r0, r3
4151 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
4152 }
4153 return DST;
4154 }
4155
4156 SDValue
4157 AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
4158 {
4159 return SDValue(Op.getNode(), 0);
4160 }