radeon/llvm: Remove AMDIL binary instrutions (OR, AND, XOR, NOT)
[mesa.git] / src / gallium / drivers / radeon / AMDILISelLowering.cpp
1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILSubtarget.h"
19 #include "AMDILTargetMachine.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetOptions.h"
33
34 using namespace llvm;
35 #define ISDBITCAST ISD::BITCAST
36 #define MVTGLUE MVT::Glue
37 //===----------------------------------------------------------------------===//
38 // Calling Convention Implementation
39 //===----------------------------------------------------------------------===//
40 #include "AMDILGenCallingConv.inc"
41
42 //===----------------------------------------------------------------------===//
43 // TargetLowering Implementation Help Functions Begin
44 //===----------------------------------------------------------------------===//
45 static SDValue
46 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
47 {
48 DebugLoc DL = Src.getDebugLoc();
49 EVT svt = Src.getValueType().getScalarType();
50 EVT dvt = Dst.getValueType().getScalarType();
51 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
52 if (dvt.bitsGT(svt)) {
53 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
54 } else if (svt.bitsLT(svt)) {
55 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
56 DAG.getConstant(1, MVT::i32));
57 }
58 } else if (svt.isInteger() && dvt.isInteger()) {
59 if (!svt.bitsEq(dvt)) {
60 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
61 } else {
62 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
63 }
64 } else if (svt.isInteger()) {
65 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
66 if (!svt.bitsEq(dvt)) {
67 if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
68 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
69 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
70 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
71 } else {
72 assert(0 && "We only support 32 and 64bit fp types");
73 }
74 }
75 Src = DAG.getNode(opcode, DL, dvt, Src);
76 } else if (dvt.isInteger()) {
77 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
78 if (svt.getSimpleVT().SimpleTy == MVT::f32) {
79 Src = DAG.getNode(opcode, DL, MVT::i32, Src);
80 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
81 Src = DAG.getNode(opcode, DL, MVT::i64, Src);
82 } else {
83 assert(0 && "We only support 32 and 64bit fp types");
84 }
85 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
86 }
87 return Src;
88 }
89 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
90 // condition.
91 static AMDILCC::CondCodes
92 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
93 {
94 switch (CC) {
95 default:
96 {
97 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
98 assert(0 && "Unknown condition code!");
99 }
100 case ISD::SETO:
101 switch(type) {
102 case MVT::f32:
103 return AMDILCC::IL_CC_F_O;
104 case MVT::f64:
105 return AMDILCC::IL_CC_D_O;
106 default:
107 assert(0 && "Opcode combination not generated correctly!");
108 return AMDILCC::COND_ERROR;
109 };
110 case ISD::SETUO:
111 switch(type) {
112 case MVT::f32:
113 return AMDILCC::IL_CC_F_UO;
114 case MVT::f64:
115 return AMDILCC::IL_CC_D_UO;
116 default:
117 assert(0 && "Opcode combination not generated correctly!");
118 return AMDILCC::COND_ERROR;
119 };
120 case ISD::SETGT:
121 switch (type) {
122 case MVT::i1:
123 case MVT::i8:
124 case MVT::i16:
125 case MVT::i32:
126 return AMDILCC::IL_CC_I_GT;
127 case MVT::f32:
128 return AMDILCC::IL_CC_F_GT;
129 case MVT::f64:
130 return AMDILCC::IL_CC_D_GT;
131 case MVT::i64:
132 return AMDILCC::IL_CC_L_GT;
133 default:
134 assert(0 && "Opcode combination not generated correctly!");
135 return AMDILCC::COND_ERROR;
136 };
137 case ISD::SETGE:
138 switch (type) {
139 case MVT::i1:
140 case MVT::i8:
141 case MVT::i16:
142 case MVT::i32:
143 return AMDILCC::IL_CC_I_GE;
144 case MVT::f32:
145 return AMDILCC::IL_CC_F_GE;
146 case MVT::f64:
147 return AMDILCC::IL_CC_D_GE;
148 case MVT::i64:
149 return AMDILCC::IL_CC_L_GE;
150 default:
151 assert(0 && "Opcode combination not generated correctly!");
152 return AMDILCC::COND_ERROR;
153 };
154 case ISD::SETLT:
155 switch (type) {
156 case MVT::i1:
157 case MVT::i8:
158 case MVT::i16:
159 case MVT::i32:
160 return AMDILCC::IL_CC_I_LT;
161 case MVT::f32:
162 return AMDILCC::IL_CC_F_LT;
163 case MVT::f64:
164 return AMDILCC::IL_CC_D_LT;
165 case MVT::i64:
166 return AMDILCC::IL_CC_L_LT;
167 default:
168 assert(0 && "Opcode combination not generated correctly!");
169 return AMDILCC::COND_ERROR;
170 };
171 case ISD::SETLE:
172 switch (type) {
173 case MVT::i1:
174 case MVT::i8:
175 case MVT::i16:
176 case MVT::i32:
177 return AMDILCC::IL_CC_I_LE;
178 case MVT::f32:
179 return AMDILCC::IL_CC_F_LE;
180 case MVT::f64:
181 return AMDILCC::IL_CC_D_LE;
182 case MVT::i64:
183 return AMDILCC::IL_CC_L_LE;
184 default:
185 assert(0 && "Opcode combination not generated correctly!");
186 return AMDILCC::COND_ERROR;
187 };
188 case ISD::SETNE:
189 switch (type) {
190 case MVT::i1:
191 case MVT::i8:
192 case MVT::i16:
193 case MVT::i32:
194 return AMDILCC::IL_CC_I_NE;
195 case MVT::f32:
196 return AMDILCC::IL_CC_F_NE;
197 case MVT::f64:
198 return AMDILCC::IL_CC_D_NE;
199 case MVT::i64:
200 return AMDILCC::IL_CC_L_NE;
201 default:
202 assert(0 && "Opcode combination not generated correctly!");
203 return AMDILCC::COND_ERROR;
204 };
205 case ISD::SETEQ:
206 switch (type) {
207 case MVT::i1:
208 case MVT::i8:
209 case MVT::i16:
210 case MVT::i32:
211 return AMDILCC::IL_CC_I_EQ;
212 case MVT::f32:
213 return AMDILCC::IL_CC_F_EQ;
214 case MVT::f64:
215 return AMDILCC::IL_CC_D_EQ;
216 case MVT::i64:
217 return AMDILCC::IL_CC_L_EQ;
218 default:
219 assert(0 && "Opcode combination not generated correctly!");
220 return AMDILCC::COND_ERROR;
221 };
222 case ISD::SETUGT:
223 switch (type) {
224 case MVT::i1:
225 case MVT::i8:
226 case MVT::i16:
227 case MVT::i32:
228 return AMDILCC::IL_CC_U_GT;
229 case MVT::f32:
230 return AMDILCC::IL_CC_F_UGT;
231 case MVT::f64:
232 return AMDILCC::IL_CC_D_UGT;
233 case MVT::i64:
234 return AMDILCC::IL_CC_UL_GT;
235 default:
236 assert(0 && "Opcode combination not generated correctly!");
237 return AMDILCC::COND_ERROR;
238 };
239 case ISD::SETUGE:
240 switch (type) {
241 case MVT::i1:
242 case MVT::i8:
243 case MVT::i16:
244 case MVT::i32:
245 return AMDILCC::IL_CC_U_GE;
246 case MVT::f32:
247 return AMDILCC::IL_CC_F_UGE;
248 case MVT::f64:
249 return AMDILCC::IL_CC_D_UGE;
250 case MVT::i64:
251 return AMDILCC::IL_CC_UL_GE;
252 default:
253 assert(0 && "Opcode combination not generated correctly!");
254 return AMDILCC::COND_ERROR;
255 };
256 case ISD::SETULT:
257 switch (type) {
258 case MVT::i1:
259 case MVT::i8:
260 case MVT::i16:
261 case MVT::i32:
262 return AMDILCC::IL_CC_U_LT;
263 case MVT::f32:
264 return AMDILCC::IL_CC_F_ULT;
265 case MVT::f64:
266 return AMDILCC::IL_CC_D_ULT;
267 case MVT::i64:
268 return AMDILCC::IL_CC_UL_LT;
269 default:
270 assert(0 && "Opcode combination not generated correctly!");
271 return AMDILCC::COND_ERROR;
272 };
273 case ISD::SETULE:
274 switch (type) {
275 case MVT::i1:
276 case MVT::i8:
277 case MVT::i16:
278 case MVT::i32:
279 return AMDILCC::IL_CC_U_LE;
280 case MVT::f32:
281 return AMDILCC::IL_CC_F_ULE;
282 case MVT::f64:
283 return AMDILCC::IL_CC_D_ULE;
284 case MVT::i64:
285 return AMDILCC::IL_CC_UL_LE;
286 default:
287 assert(0 && "Opcode combination not generated correctly!");
288 return AMDILCC::COND_ERROR;
289 };
290 case ISD::SETUNE:
291 switch (type) {
292 case MVT::i1:
293 case MVT::i8:
294 case MVT::i16:
295 case MVT::i32:
296 return AMDILCC::IL_CC_U_NE;
297 case MVT::f32:
298 return AMDILCC::IL_CC_F_UNE;
299 case MVT::f64:
300 return AMDILCC::IL_CC_D_UNE;
301 case MVT::i64:
302 return AMDILCC::IL_CC_UL_NE;
303 default:
304 assert(0 && "Opcode combination not generated correctly!");
305 return AMDILCC::COND_ERROR;
306 };
307 case ISD::SETUEQ:
308 switch (type) {
309 case MVT::i1:
310 case MVT::i8:
311 case MVT::i16:
312 case MVT::i32:
313 return AMDILCC::IL_CC_U_EQ;
314 case MVT::f32:
315 return AMDILCC::IL_CC_F_UEQ;
316 case MVT::f64:
317 return AMDILCC::IL_CC_D_UEQ;
318 case MVT::i64:
319 return AMDILCC::IL_CC_UL_EQ;
320 default:
321 assert(0 && "Opcode combination not generated correctly!");
322 return AMDILCC::COND_ERROR;
323 };
324 case ISD::SETOGT:
325 switch (type) {
326 case MVT::f32:
327 return AMDILCC::IL_CC_F_OGT;
328 case MVT::f64:
329 return AMDILCC::IL_CC_D_OGT;
330 case MVT::i1:
331 case MVT::i8:
332 case MVT::i16:
333 case MVT::i32:
334 case MVT::i64:
335 default:
336 assert(0 && "Opcode combination not generated correctly!");
337 return AMDILCC::COND_ERROR;
338 };
339 case ISD::SETOGE:
340 switch (type) {
341 case MVT::f32:
342 return AMDILCC::IL_CC_F_OGE;
343 case MVT::f64:
344 return AMDILCC::IL_CC_D_OGE;
345 case MVT::i1:
346 case MVT::i8:
347 case MVT::i16:
348 case MVT::i32:
349 case MVT::i64:
350 default:
351 assert(0 && "Opcode combination not generated correctly!");
352 return AMDILCC::COND_ERROR;
353 };
354 case ISD::SETOLT:
355 switch (type) {
356 case MVT::f32:
357 return AMDILCC::IL_CC_F_OLT;
358 case MVT::f64:
359 return AMDILCC::IL_CC_D_OLT;
360 case MVT::i1:
361 case MVT::i8:
362 case MVT::i16:
363 case MVT::i32:
364 case MVT::i64:
365 default:
366 assert(0 && "Opcode combination not generated correctly!");
367 return AMDILCC::COND_ERROR;
368 };
369 case ISD::SETOLE:
370 switch (type) {
371 case MVT::f32:
372 return AMDILCC::IL_CC_F_OLE;
373 case MVT::f64:
374 return AMDILCC::IL_CC_D_OLE;
375 case MVT::i1:
376 case MVT::i8:
377 case MVT::i16:
378 case MVT::i32:
379 case MVT::i64:
380 default:
381 assert(0 && "Opcode combination not generated correctly!");
382 return AMDILCC::COND_ERROR;
383 };
384 case ISD::SETONE:
385 switch (type) {
386 case MVT::f32:
387 return AMDILCC::IL_CC_F_ONE;
388 case MVT::f64:
389 return AMDILCC::IL_CC_D_ONE;
390 case MVT::i1:
391 case MVT::i8:
392 case MVT::i16:
393 case MVT::i32:
394 case MVT::i64:
395 default:
396 assert(0 && "Opcode combination not generated correctly!");
397 return AMDILCC::COND_ERROR;
398 };
399 case ISD::SETOEQ:
400 switch (type) {
401 case MVT::f32:
402 return AMDILCC::IL_CC_F_OEQ;
403 case MVT::f64:
404 return AMDILCC::IL_CC_D_OEQ;
405 case MVT::i1:
406 case MVT::i8:
407 case MVT::i16:
408 case MVT::i32:
409 case MVT::i64:
410 default:
411 assert(0 && "Opcode combination not generated correctly!");
412 return AMDILCC::COND_ERROR;
413 };
414 };
415 }
416
417 /// Helper function used by LowerFormalArguments
418 static const TargetRegisterClass*
419 getRegClassFromType(unsigned int type) {
420 switch (type) {
421 default:
422 assert(0 && "Passed in type does not match any register classes.");
423 case MVT::i8:
424 return &AMDIL::GPRI8RegClass;
425 case MVT::i16:
426 return &AMDIL::GPRI16RegClass;
427 case MVT::i32:
428 return &AMDIL::GPRI32RegClass;
429 case MVT::f32:
430 return &AMDIL::GPRF32RegClass;
431 case MVT::i64:
432 return &AMDIL::GPRI64RegClass;
433 case MVT::f64:
434 return &AMDIL::GPRF64RegClass;
435 case MVT::v4f32:
436 return &AMDIL::GPRV4F32RegClass;
437 case MVT::v4i8:
438 return &AMDIL::GPRV4I8RegClass;
439 case MVT::v4i16:
440 return &AMDIL::GPRV4I16RegClass;
441 case MVT::v4i32:
442 return &AMDIL::GPRV4I32RegClass;
443 case MVT::v2f32:
444 return &AMDIL::GPRV2F32RegClass;
445 case MVT::v2i8:
446 return &AMDIL::GPRV2I8RegClass;
447 case MVT::v2i16:
448 return &AMDIL::GPRV2I16RegClass;
449 case MVT::v2i32:
450 return &AMDIL::GPRV2I32RegClass;
451 case MVT::v2f64:
452 return &AMDIL::GPRV2F64RegClass;
453 case MVT::v2i64:
454 return &AMDIL::GPRV2I64RegClass;
455 }
456 }
457
458 SDValue
459 AMDILTargetLowering::LowerMemArgument(
460 SDValue Chain,
461 CallingConv::ID CallConv,
462 const SmallVectorImpl<ISD::InputArg> &Ins,
463 DebugLoc dl, SelectionDAG &DAG,
464 const CCValAssign &VA,
465 MachineFrameInfo *MFI,
466 unsigned i) const
467 {
468 // Create the nodes corresponding to a load from this parameter slot.
469 ISD::ArgFlagsTy Flags = Ins[i].Flags;
470
471 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
472 getTargetMachine().Options.GuaranteedTailCallOpt;
473 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
474
475 // FIXME: For now, all byval parameter objects are marked mutable. This can
476 // be changed with more analysis.
477 // In case of tail call optimization mark all arguments mutable. Since they
478 // could be overwritten by lowering of arguments in case of a tail call.
479 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
480 VA.getLocMemOffset(), isImmutable);
481 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
482
483 if (Flags.isByVal())
484 return FIN;
485 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
486 MachinePointerInfo::getFixedStack(FI),
487 false, false, false, 0);
488 }
489 //===----------------------------------------------------------------------===//
490 // TargetLowering Implementation Help Functions End
491 //===----------------------------------------------------------------------===//
492 //===----------------------------------------------------------------------===//
493 // Instruction generation functions
494 //===----------------------------------------------------------------------===//
495 uint32_t
496 AMDILTargetLowering::addExtensionInstructions(
497 uint32_t reg, bool signedShift,
498 unsigned int simpleVT) const
499 {
500 int shiftSize = 0;
501 uint32_t LShift, RShift;
502 switch(simpleVT)
503 {
504 default:
505 return reg;
506 case AMDIL::GPRI8RegClassID:
507 shiftSize = 24;
508 LShift = AMDIL::SHL_i8;
509 if (signedShift) {
510 RShift = AMDIL::SHR_i8;
511 } else {
512 RShift = AMDIL::USHR_i8;
513 }
514 break;
515 case AMDIL::GPRV2I8RegClassID:
516 shiftSize = 24;
517 LShift = AMDIL::SHL_v2i8;
518 if (signedShift) {
519 RShift = AMDIL::SHR_v2i8;
520 } else {
521 RShift = AMDIL::USHR_v2i8;
522 }
523 break;
524 case AMDIL::GPRV4I8RegClassID:
525 shiftSize = 24;
526 LShift = AMDIL::SHL_v4i8;
527 if (signedShift) {
528 RShift = AMDIL::SHR_v4i8;
529 } else {
530 RShift = AMDIL::USHR_v4i8;
531 }
532 break;
533 case AMDIL::GPRI16RegClassID:
534 shiftSize = 16;
535 LShift = AMDIL::SHL_i16;
536 if (signedShift) {
537 RShift = AMDIL::SHR_i16;
538 } else {
539 RShift = AMDIL::USHR_i16;
540 }
541 break;
542 case AMDIL::GPRV2I16RegClassID:
543 shiftSize = 16;
544 LShift = AMDIL::SHL_v2i16;
545 if (signedShift) {
546 RShift = AMDIL::SHR_v2i16;
547 } else {
548 RShift = AMDIL::USHR_v2i16;
549 }
550 break;
551 case AMDIL::GPRV4I16RegClassID:
552 shiftSize = 16;
553 LShift = AMDIL::SHL_v4i16;
554 if (signedShift) {
555 RShift = AMDIL::SHR_v4i16;
556 } else {
557 RShift = AMDIL::USHR_v4i16;
558 }
559 break;
560 };
561 uint32_t LoadReg = genVReg(simpleVT);
562 uint32_t tmp1 = genVReg(simpleVT);
563 uint32_t tmp2 = genVReg(simpleVT);
564 generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
565 generateMachineInst(LShift, tmp1, reg, LoadReg);
566 generateMachineInst(RShift, tmp2, tmp1, LoadReg);
567 return tmp2;
568 }
569
570 MachineOperand
571 AMDILTargetLowering::convertToReg(MachineOperand op) const
572 {
573 if (op.isReg()) {
574 return op;
575 } else if (op.isImm()) {
576 uint32_t loadReg
577 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
578 generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
579 .addImm(op.getImm());
580 op.ChangeToRegister(loadReg, false);
581 } else if (op.isFPImm()) {
582 uint32_t loadReg
583 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
584 generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
585 .addFPImm(op.getFPImm());
586 op.ChangeToRegister(loadReg, false);
587 } else if (op.isMBB()) {
588 op.ChangeToRegister(0, false);
589 } else if (op.isFI()) {
590 op.ChangeToRegister(0, false);
591 } else if (op.isCPI()) {
592 op.ChangeToRegister(0, false);
593 } else if (op.isJTI()) {
594 op.ChangeToRegister(0, false);
595 } else if (op.isGlobal()) {
596 op.ChangeToRegister(0, false);
597 } else if (op.isSymbol()) {
598 op.ChangeToRegister(0, false);
599 }/* else if (op.isMetadata()) {
600 op.ChangeToRegister(0, false);
601 }*/
602 return op;
603 }
604
605 //===----------------------------------------------------------------------===//
606 // TargetLowering Class Implementation Begins
607 //===----------------------------------------------------------------------===//
608 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
609 : TargetLowering(TM, new TargetLoweringObjectFileELF())
610 {
611 int types[] =
612 {
613 (int)MVT::i8,
614 (int)MVT::i16,
615 (int)MVT::i32,
616 (int)MVT::f32,
617 (int)MVT::f64,
618 (int)MVT::i64,
619 (int)MVT::v2i8,
620 (int)MVT::v4i8,
621 (int)MVT::v2i16,
622 (int)MVT::v4i16,
623 (int)MVT::v4f32,
624 (int)MVT::v4i32,
625 (int)MVT::v2f32,
626 (int)MVT::v2i32,
627 (int)MVT::v2f64,
628 (int)MVT::v2i64
629 };
630
631 int IntTypes[] =
632 {
633 (int)MVT::i8,
634 (int)MVT::i16,
635 (int)MVT::i32,
636 (int)MVT::i64
637 };
638
639 int FloatTypes[] =
640 {
641 (int)MVT::f32,
642 (int)MVT::f64
643 };
644
645 int VectorTypes[] =
646 {
647 (int)MVT::v2i8,
648 (int)MVT::v4i8,
649 (int)MVT::v2i16,
650 (int)MVT::v4i16,
651 (int)MVT::v4f32,
652 (int)MVT::v4i32,
653 (int)MVT::v2f32,
654 (int)MVT::v2i32,
655 (int)MVT::v2f64,
656 (int)MVT::v2i64
657 };
658 size_t numTypes = sizeof(types) / sizeof(*types);
659 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
660 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
661 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
662
663 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
664 &this->getTargetMachine())->getSubtargetImpl();
665 // These are the current register classes that are
666 // supported
667
668 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
669 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
670
671 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
672 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
673 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
674 }
675 if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
676 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
677 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
678 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
679 setOperationAction(ISD::Constant , MVT::i8 , Legal);
680 }
681 if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
682 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
683 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
684 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
685 setOperationAction(ISD::Constant , MVT::i16 , Legal);
686 }
687 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
688 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
689 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
690 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
691 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
692 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
693 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
694 }
695
696 for (unsigned int x = 0; x < numTypes; ++x) {
697 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
698
699 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
700 // We cannot sextinreg, expand to shifts
701 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
702 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
703 setOperationAction(ISD::FP_ROUND, VT, Expand);
704 setOperationAction(ISD::SUBE, VT, Expand);
705 setOperationAction(ISD::SUBC, VT, Expand);
706 setOperationAction(ISD::ADD, VT, Custom);
707 setOperationAction(ISD::ADDE, VT, Expand);
708 setOperationAction(ISD::ADDC, VT, Expand);
709 setOperationAction(ISD::SETCC, VT, Custom);
710 setOperationAction(ISD::BRCOND, VT, Custom);
711 setOperationAction(ISD::BR_CC, VT, Custom);
712 setOperationAction(ISD::BR_JT, VT, Expand);
713 setOperationAction(ISD::BRIND, VT, Expand);
714 // TODO: Implement custom UREM/SREM routines
715 setOperationAction(ISD::UREM, VT, Expand);
716 setOperationAction(ISD::SREM, VT, Expand);
717 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
718 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
719 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
720 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
721 setOperationAction(ISDBITCAST, VT, Custom);
722 setOperationAction(ISD::GlobalAddress, VT, Custom);
723 setOperationAction(ISD::JumpTable, VT, Custom);
724 setOperationAction(ISD::ConstantPool, VT, Custom);
725 setOperationAction(ISD::SELECT_CC, VT, Custom);
726 setOperationAction(ISD::SELECT, VT, Custom);
727 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
728 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
729 if (VT != MVT::i64 && VT != MVT::v2i64) {
730 setOperationAction(ISD::SDIV, VT, Custom);
731 setOperationAction(ISD::UDIV, VT, Custom);
732 }
733 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
734 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
735 }
736 for (unsigned int x = 0; x < numFloatTypes; ++x) {
737 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
738
739 // IL does not have these operations for floating point types
740 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
741 setOperationAction(ISD::FP_ROUND, VT, Custom);
742 setOperationAction(ISD::SETOLT, VT, Expand);
743 setOperationAction(ISD::SETOGE, VT, Expand);
744 setOperationAction(ISD::SETOGT, VT, Expand);
745 setOperationAction(ISD::SETOLE, VT, Expand);
746 setOperationAction(ISD::SETULT, VT, Expand);
747 setOperationAction(ISD::SETUGE, VT, Expand);
748 setOperationAction(ISD::SETUGT, VT, Expand);
749 setOperationAction(ISD::SETULE, VT, Expand);
750 }
751
752 for (unsigned int x = 0; x < numIntTypes; ++x) {
753 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
754
755 // GPU also does not have divrem function for signed or unsigned
756 setOperationAction(ISD::SDIVREM, VT, Expand);
757 setOperationAction(ISD::UDIVREM, VT, Expand);
758 setOperationAction(ISD::FP_ROUND, VT, Expand);
759
760 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
761 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
762 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
763
764 // GPU doesn't have a rotl, rotr, or byteswap instruction
765 setOperationAction(ISD::ROTR, VT, Expand);
766 setOperationAction(ISD::ROTL, VT, Expand);
767 setOperationAction(ISD::BSWAP, VT, Expand);
768
769 // GPU doesn't have any counting operators
770 setOperationAction(ISD::CTPOP, VT, Expand);
771 setOperationAction(ISD::CTTZ, VT, Expand);
772 setOperationAction(ISD::CTLZ, VT, Expand);
773 }
774
775 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
776 {
777 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
778
779 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
780 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
781 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
782 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
783 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
784 setOperationAction(ISD::FP_ROUND, VT, Expand);
785 setOperationAction(ISD::SDIVREM, VT, Expand);
786 setOperationAction(ISD::UDIVREM, VT, Expand);
787 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
788 // setOperationAction(ISD::VSETCC, VT, Expand);
789 setOperationAction(ISD::SETCC, VT, Expand);
790 setOperationAction(ISD::SELECT_CC, VT, Expand);
791 setOperationAction(ISD::SELECT, VT, Expand);
792
793 }
794 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
795 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
796 if (stm->calVersion() < CAL_VERSION_SC_139
797 || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
798 setOperationAction(ISD::MUL, MVT::i64, Custom);
799 }
800 setOperationAction(ISD::SUB, MVT::i64, Custom);
801 setOperationAction(ISD::ADD, MVT::i64, Custom);
802 setOperationAction(ISD::MULHU, MVT::i64, Expand);
803 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
804 setOperationAction(ISD::MULHS, MVT::i64, Expand);
805 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
806 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
807 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
808 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
809 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
810 setOperationAction(ISD::Constant , MVT::i64 , Legal);
811 setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
812 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
813 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
814 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
815 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
816 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
817 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
818 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
819 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
820 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
821 }
822 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
823 // we support loading/storing v2f64 but not operations on the type
824 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
825 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
826 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
827 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
828 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
829 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
830 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
831 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
832 // We want to expand vector conversions into their scalar
833 // counterparts.
834 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
835 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
836 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
837 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
838 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
839 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
840 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
841 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
842 setOperationAction(ISD::FABS, MVT::f64, Expand);
843 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
844 }
845 // TODO: Fix the UDIV24 algorithm so it works for these
846 // types correctly. This needs vector comparisons
847 // for this to work correctly.
848 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
849 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
850 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
851 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
852 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
853 setOperationAction(ISD::SUBC, MVT::Other, Expand);
854 setOperationAction(ISD::ADDE, MVT::Other, Expand);
855 setOperationAction(ISD::ADDC, MVT::Other, Expand);
856 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
857 setOperationAction(ISD::BR_CC, MVT::Other, Custom);
858 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
859 setOperationAction(ISD::BRIND, MVT::Other, Expand);
860 setOperationAction(ISD::SETCC, MVT::Other, Custom);
861 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
862 setOperationAction(ISD::FDIV, MVT::f32, Custom);
863 setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
864 setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
865
866 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
867 // Use the default implementation.
868 setOperationAction(ISD::VAARG , MVT::Other, Expand);
869 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
870 setOperationAction(ISD::VAEND , MVT::Other, Expand);
871 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
872 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
873 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
874 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
875 setOperationAction(ISD::Constant , MVT::i32 , Legal);
876 setOperationAction(ISD::TRAP , MVT::Other , Legal);
877
878 setStackPointerRegisterToSaveRestore(AMDIL::SP);
879 setSchedulingPreference(Sched::RegPressure);
880 setPow2DivIsCheap(false);
881 setPrefLoopAlignment(16);
882 setSelectIsExpensive(true);
883 setJumpIsExpensive(true);
884 computeRegisterProperties();
885
886 maxStoresPerMemcpy = 4096;
887 maxStoresPerMemmove = 4096;
888 maxStoresPerMemset = 4096;
889
890 #undef numTypes
891 #undef numIntTypes
892 #undef numVectorTypes
893 #undef numFloatTypes
894 }
895
896 const char *
897 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
898 {
899 switch (Opcode) {
900 default: return 0;
901 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
902 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
903 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
904 case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
905 case AMDILISD::CMOV: return "AMDILISD::CMOV";
906 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
907 case AMDILISD::INEGATE: return "AMDILISD::INEGATE";
908 case AMDILISD::MAD: return "AMDILISD::MAD";
909 case AMDILISD::UMAD: return "AMDILISD::UMAD";
910 case AMDILISD::CALL: return "AMDILISD::CALL";
911 case AMDILISD::RET: return "AMDILISD::RET";
912 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
913 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
914 case AMDILISD::ADD: return "AMDILISD::ADD";
915 case AMDILISD::UMUL: return "AMDILISD::UMUL";
916 case AMDILISD::AND: return "AMDILISD::AND";
917 case AMDILISD::OR: return "AMDILISD::OR";
918 case AMDILISD::NOT: return "AMDILISD::NOT";
919 case AMDILISD::XOR: return "AMDILISD::XOR";
920 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
921 case AMDILISD::SMAX: return "AMDILISD::SMAX";
922 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
923 case AMDILISD::MOVE: return "AMDILISD::MOVE";
924 case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
925 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
926 case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
927 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
928 case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
929 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
930 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
931 case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
932 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
933 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
934 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
935 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
936 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
937 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
938 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
939 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
940 case AMDILISD::CMP: return "AMDILISD::CMP";
941 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
942 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
943 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
944 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
945 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
946 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
947 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
948 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
949 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
950 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
951 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
952 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
953 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
954 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
955 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
956 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
957 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
958 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
959 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
960 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
961 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
962 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
963 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
964 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
965 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
966 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
967 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
968 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
969 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
970 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
971 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
972 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
973 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
974 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
975 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
976 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
977 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
978 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
979 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
980 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
981 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
982 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
983 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
984 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
985 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
986 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
987 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
988 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
989 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
990 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
991 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
992 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
993 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
994 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
995 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
996 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
997 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
998 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
999 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
1000 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
1001 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
1002 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
1003 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
1004 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
1005 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
1006 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
1007 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
1008 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
1009 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
1010 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
1011 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
1012 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
1013 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
1014 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
1015 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
1016 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
1017 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
1018 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
1019 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
1020 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
1021 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
1022 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
1023 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
1024 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
1025 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1026 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
1027 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
1028 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
1029 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
1030 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
1031 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
1032 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
1033 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
1034 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
1035 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
1036 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
1037 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
1038 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
1039 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
1040 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
1041 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
1042 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
1043 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
1044 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
1045 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
1046 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
1047 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
1048 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
1049 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
1050
1051 };
1052 }
1053 bool
1054 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1055 const CallInst &I, unsigned Intrinsic) const
1056 {
1057 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1058 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
1059 return false;
1060 }
1061 bool bitCastToInt = false;
1062 unsigned IntNo;
1063 bool isRet = true;
1064 const AMDILSubtarget *STM = &this->getTargetMachine()
1065 .getSubtarget<AMDILSubtarget>();
1066 switch (Intrinsic) {
1067 default: return false; // Don't custom lower most intrinsics.
1068 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
1069 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
1070 IntNo = AMDILISD::ATOM_G_ADD; break;
1071 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
1072 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
1073 isRet = false;
1074 IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
1075 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
1076 case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
1077 IntNo = AMDILISD::ATOM_L_ADD; break;
1078 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
1079 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
1080 isRet = false;
1081 IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
1082 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
1083 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1084 IntNo = AMDILISD::ATOM_R_ADD; break;
1085 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1086 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1087 isRet = false;
1088 IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1089 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1090 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1091 IntNo = AMDILISD::ATOM_G_AND; break;
1092 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1093 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1094 isRet = false;
1095 IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1096 case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1097 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1098 IntNo = AMDILISD::ATOM_L_AND; break;
1099 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1100 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1101 isRet = false;
1102 IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1103 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1104 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1105 IntNo = AMDILISD::ATOM_R_AND; break;
1106 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1107 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1108 isRet = false;
1109 IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1110 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1111 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1112 IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1113 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1114 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1115 isRet = false;
1116 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1117 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1118 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1119 IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1120 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1121 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1122 isRet = false;
1123 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1124 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1125 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1126 IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1127 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1128 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1129 isRet = false;
1130 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1131 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1132 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1133 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1134 IntNo = AMDILISD::ATOM_G_DEC;
1135 } else {
1136 IntNo = AMDILISD::ATOM_G_SUB;
1137 }
1138 break;
1139 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1140 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1141 isRet = false;
1142 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1143 IntNo = AMDILISD::ATOM_G_DEC_NORET;
1144 } else {
1145 IntNo = AMDILISD::ATOM_G_SUB_NORET;
1146 }
1147 break;
1148 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1149 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1150 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1151 IntNo = AMDILISD::ATOM_L_DEC;
1152 } else {
1153 IntNo = AMDILISD::ATOM_L_SUB;
1154 }
1155 break;
1156 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1157 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1158 isRet = false;
1159 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1160 IntNo = AMDILISD::ATOM_L_DEC_NORET;
1161 } else {
1162 IntNo = AMDILISD::ATOM_L_SUB_NORET;
1163 }
1164 break;
1165 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1166 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1167 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1168 IntNo = AMDILISD::ATOM_R_DEC;
1169 } else {
1170 IntNo = AMDILISD::ATOM_R_SUB;
1171 }
1172 break;
1173 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1174 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1175 isRet = false;
1176 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1177 IntNo = AMDILISD::ATOM_R_DEC_NORET;
1178 } else {
1179 IntNo = AMDILISD::ATOM_R_SUB_NORET;
1180 }
1181 break;
1182 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1183 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1184 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1185 IntNo = AMDILISD::ATOM_G_INC;
1186 } else {
1187 IntNo = AMDILISD::ATOM_G_ADD;
1188 }
1189 break;
1190 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1191 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1192 isRet = false;
1193 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1194 IntNo = AMDILISD::ATOM_G_INC_NORET;
1195 } else {
1196 IntNo = AMDILISD::ATOM_G_ADD_NORET;
1197 }
1198 break;
1199 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1200 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1201 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1202 IntNo = AMDILISD::ATOM_L_INC;
1203 } else {
1204 IntNo = AMDILISD::ATOM_L_ADD;
1205 }
1206 break;
1207 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1208 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1209 isRet = false;
1210 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1211 IntNo = AMDILISD::ATOM_L_INC_NORET;
1212 } else {
1213 IntNo = AMDILISD::ATOM_L_ADD_NORET;
1214 }
1215 break;
1216 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1217 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1218 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1219 IntNo = AMDILISD::ATOM_R_INC;
1220 } else {
1221 IntNo = AMDILISD::ATOM_R_ADD;
1222 }
1223 break;
1224 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1225 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1226 isRet = false;
1227 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1228 IntNo = AMDILISD::ATOM_R_INC_NORET;
1229 } else {
1230 IntNo = AMDILISD::ATOM_R_ADD_NORET;
1231 }
1232 break;
1233 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1234 IntNo = AMDILISD::ATOM_G_MAX; break;
1235 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1236 IntNo = AMDILISD::ATOM_G_UMAX; break;
1237 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1238 isRet = false;
1239 IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1240 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1241 isRet = false;
1242 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1243 case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1244 IntNo = AMDILISD::ATOM_L_MAX; break;
1245 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1246 IntNo = AMDILISD::ATOM_L_UMAX; break;
1247 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1248 isRet = false;
1249 IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1250 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1251 isRet = false;
1252 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1253 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1254 IntNo = AMDILISD::ATOM_R_MAX; break;
1255 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1256 IntNo = AMDILISD::ATOM_R_UMAX; break;
1257 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1258 isRet = false;
1259 IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1260 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1261 isRet = false;
1262 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1263 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1264 IntNo = AMDILISD::ATOM_G_MIN; break;
1265 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1266 IntNo = AMDILISD::ATOM_G_UMIN; break;
1267 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1268 isRet = false;
1269 IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1270 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1271 isRet = false;
1272 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1273 case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1274 IntNo = AMDILISD::ATOM_L_MIN; break;
1275 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1276 IntNo = AMDILISD::ATOM_L_UMIN; break;
1277 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1278 isRet = false;
1279 IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1280 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1281 isRet = false;
1282 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1283 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1284 IntNo = AMDILISD::ATOM_R_MIN; break;
1285 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1286 IntNo = AMDILISD::ATOM_R_UMIN; break;
1287 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1288 isRet = false;
1289 IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1290 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1291 isRet = false;
1292 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1293 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1294 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1295 IntNo = AMDILISD::ATOM_G_OR; break;
1296 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1297 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1298 isRet = false;
1299 IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1300 case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1301 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1302 IntNo = AMDILISD::ATOM_L_OR; break;
1303 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1304 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1305 isRet = false;
1306 IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1307 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1308 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1309 IntNo = AMDILISD::ATOM_R_OR; break;
1310 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1311 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1312 isRet = false;
1313 IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1314 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1315 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1316 IntNo = AMDILISD::ATOM_G_SUB; break;
1317 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1318 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1319 isRet = false;
1320 IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1321 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1322 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1323 IntNo = AMDILISD::ATOM_L_SUB; break;
1324 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1325 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1326 isRet = false;
1327 IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1328 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1329 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1330 IntNo = AMDILISD::ATOM_R_SUB; break;
1331 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1332 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1333 isRet = false;
1334 IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1335 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1336 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1337 IntNo = AMDILISD::ATOM_G_RSUB; break;
1338 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1339 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1340 isRet = false;
1341 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1342 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1343 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1344 IntNo = AMDILISD::ATOM_L_RSUB; break;
1345 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1346 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1347 isRet = false;
1348 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1349 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1350 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1351 IntNo = AMDILISD::ATOM_R_RSUB; break;
1352 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1353 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1354 isRet = false;
1355 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1356 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1357 bitCastToInt = true;
1358 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1359 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1360 IntNo = AMDILISD::ATOM_G_XCHG; break;
1361 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1362 bitCastToInt = true;
1363 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1364 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1365 isRet = false;
1366 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1367 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1368 bitCastToInt = true;
1369 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1370 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1371 IntNo = AMDILISD::ATOM_L_XCHG; break;
1372 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1373 bitCastToInt = true;
1374 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1375 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1376 isRet = false;
1377 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1378 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1379 bitCastToInt = true;
1380 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1381 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1382 IntNo = AMDILISD::ATOM_R_XCHG; break;
1383 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1384 bitCastToInt = true;
1385 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1386 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1387 isRet = false;
1388 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1389 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1390 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1391 IntNo = AMDILISD::ATOM_G_XOR; break;
1392 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1393 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1394 isRet = false;
1395 IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1396 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1397 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1398 IntNo = AMDILISD::ATOM_L_XOR; break;
1399 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1400 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1401 isRet = false;
1402 IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1403 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1404 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1405 IntNo = AMDILISD::ATOM_R_XOR; break;
1406 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1407 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1408 isRet = false;
1409 IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1410 case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1411 IntNo = AMDILISD::APPEND_ALLOC; break;
1412 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1413 isRet = false;
1414 IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1415 case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1416 IntNo = AMDILISD::APPEND_CONSUME; break;
1417 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1418 isRet = false;
1419 IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1420 };
1421
1422 Info.opc = IntNo;
1423 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1424 Info.ptrVal = I.getOperand(0);
1425 Info.offset = 0;
1426 Info.align = 4;
1427 Info.vol = true;
1428 Info.readMem = isRet;
1429 Info.writeMem = true;
1430 return true;
1431 }
1432 // The backend supports 32 and 64 bit floating point immediates
1433 bool
1434 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1435 {
1436 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1437 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1438 return true;
1439 } else {
1440 return false;
1441 }
1442 }
1443
1444 bool
1445 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1446 {
1447 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1448 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1449 return false;
1450 } else {
1451 return true;
1452 }
1453 }
1454
1455
1456 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1457 // be zero. Op is expected to be a target specific node. Used by DAG
1458 // combiner.
1459
1460 void
1461 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1462 const SDValue Op,
1463 APInt &KnownZero,
1464 APInt &KnownOne,
1465 const SelectionDAG &DAG,
1466 unsigned Depth) const
1467 {
1468 APInt KnownZero2;
1469 APInt KnownOne2;
1470 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1471 switch (Op.getOpcode()) {
1472 default: break;
1473 case AMDILISD::SELECT_CC:
1474 DAG.ComputeMaskedBits(
1475 Op.getOperand(1),
1476 KnownZero,
1477 KnownOne,
1478 Depth + 1
1479 );
1480 DAG.ComputeMaskedBits(
1481 Op.getOperand(0),
1482 KnownZero2,
1483 KnownOne2
1484 );
1485 assert((KnownZero & KnownOne) == 0
1486 && "Bits known to be one AND zero?");
1487 assert((KnownZero2 & KnownOne2) == 0
1488 && "Bits known to be one AND zero?");
1489 // Only known if known in both the LHS and RHS
1490 KnownOne &= KnownOne2;
1491 KnownZero &= KnownZero2;
1492 break;
1493 };
1494 }
1495
1496 // This is the function that determines which calling convention should
1497 // be used. Currently there is only one calling convention
1498 CCAssignFn*
1499 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1500 {
1501 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1502 return CC_AMDIL32;
1503 }
1504
1505 // LowerCallResult - Lower the result values of an ISD::CALL into the
1506 // appropriate copies out of appropriate physical registers. This assumes that
1507 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1508 // being lowered. The returns a SDNode with the same number of values as the
1509 // ISD::CALL.
1510 SDValue
1511 AMDILTargetLowering::LowerCallResult(
1512 SDValue Chain,
1513 SDValue InFlag,
1514 CallingConv::ID CallConv,
1515 bool isVarArg,
1516 const SmallVectorImpl<ISD::InputArg> &Ins,
1517 DebugLoc dl,
1518 SelectionDAG &DAG,
1519 SmallVectorImpl<SDValue> &InVals) const
1520 {
1521 // Assign locations to each value returned by this call
1522 SmallVector<CCValAssign, 16> RVLocs;
1523 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1524 getTargetMachine(), RVLocs, *DAG.getContext());
1525 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
1526
1527 // Copy all of the result registers out of their specified physreg.
1528 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1529 EVT CopyVT = RVLocs[i].getValVT();
1530 if (RVLocs[i].isRegLoc()) {
1531 Chain = DAG.getCopyFromReg(
1532 Chain,
1533 dl,
1534 RVLocs[i].getLocReg(),
1535 CopyVT,
1536 InFlag
1537 ).getValue(1);
1538 SDValue Val = Chain.getValue(0);
1539 InFlag = Chain.getValue(2);
1540 InVals.push_back(Val);
1541 }
1542 }
1543
1544 return Chain;
1545
1546 }
1547
1548 //===----------------------------------------------------------------------===//
1549 // Other Lowering Hooks
1550 //===----------------------------------------------------------------------===//
1551
1552 // Recursively assign SDNodeOrdering to any unordered nodes
1553 // This is necessary to maintain source ordering of instructions
1554 // under -O0 to avoid odd-looking "skipping around" issues.
1555 static const SDValue
1556 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
1557 {
1558 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
1559 DAG.AssignOrdering( New.getNode(), order );
1560 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
1561 Ordered( DAG, order, New.getOperand(i) );
1562 }
1563 return New;
1564 }
1565
1566 #define LOWER(A) \
1567 case ISD:: A: \
1568 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1569
1570 SDValue
1571 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
1572 {
1573 switch (Op.getOpcode()) {
1574 default:
1575 Op.getNode()->dump();
1576 assert(0 && "Custom lowering code for this"
1577 "instruction is not implemented yet!");
1578 break;
1579 LOWER(GlobalAddress);
1580 LOWER(JumpTable);
1581 LOWER(ConstantPool);
1582 LOWER(ExternalSymbol);
1583 LOWER(FP_TO_SINT);
1584 LOWER(FP_TO_UINT);
1585 LOWER(SINT_TO_FP);
1586 LOWER(UINT_TO_FP);
1587 LOWER(ADD);
1588 LOWER(MUL);
1589 LOWER(SUB);
1590 LOWER(FDIV);
1591 LOWER(SDIV);
1592 LOWER(SREM);
1593 LOWER(UDIV);
1594 LOWER(UREM);
1595 LOWER(BUILD_VECTOR);
1596 LOWER(INSERT_VECTOR_ELT);
1597 LOWER(EXTRACT_VECTOR_ELT);
1598 LOWER(EXTRACT_SUBVECTOR);
1599 LOWER(SCALAR_TO_VECTOR);
1600 LOWER(CONCAT_VECTORS);
1601 LOWER(SELECT);
1602 LOWER(SETCC);
1603 LOWER(SIGN_EXTEND_INREG);
1604 LOWER(BITCAST);
1605 LOWER(DYNAMIC_STACKALLOC);
1606 LOWER(BRCOND);
1607 LOWER(BR_CC);
1608 LOWER(FP_ROUND);
1609 }
1610 return Op;
1611 }
1612
1613 int
1614 AMDILTargetLowering::getVarArgsFrameOffset() const
1615 {
1616 return VarArgsFrameOffset;
1617 }
1618 #undef LOWER
1619
1620 SDValue
1621 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
1622 {
1623 SDValue DST = Op;
1624 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
1625 const GlobalValue *G = GADN->getGlobal();
1626 DebugLoc DL = Op.getDebugLoc();
1627 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
1628 if (!GV) {
1629 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1630 } else {
1631 if (GV->hasInitializer()) {
1632 const Constant *C = dyn_cast<Constant>(GV->getInitializer());
1633 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
1634 DST = DAG.getConstant(CI->getValue(), Op.getValueType());
1635 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
1636 DST = DAG.getConstantFP(CF->getValueAPF(),
1637 Op.getValueType());
1638 } else if (dyn_cast<ConstantAggregateZero>(C)) {
1639 EVT VT = Op.getValueType();
1640 if (VT.isInteger()) {
1641 DST = DAG.getConstant(0, VT);
1642 } else {
1643 DST = DAG.getConstantFP(0, VT);
1644 }
1645 } else {
1646 assert(!"lowering this type of Global Address "
1647 "not implemented yet!");
1648 C->dump();
1649 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1650 }
1651 } else {
1652 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1653 }
1654 }
1655 return DST;
1656 }
1657
1658 SDValue
1659 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
1660 {
1661 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1662 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
1663 return Result;
1664 }
1665 SDValue
1666 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
1667 {
1668 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1669 EVT PtrVT = Op.getValueType();
1670 SDValue Result;
1671 if (CP->isMachineConstantPoolEntry()) {
1672 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1673 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1674 } else {
1675 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1676 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1677 }
1678 return Result;
1679 }
1680
1681 SDValue
1682 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
1683 {
1684 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
1685 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
1686 return Result;
1687 }
1688
1689 /// LowerFORMAL_ARGUMENTS - transform physical registers into
1690 /// virtual registers and generate load operations for
1691 /// arguments places on the stack.
1692 /// TODO: isVarArg, hasStructRet, isMemReg
1693 SDValue
1694 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
1695 CallingConv::ID CallConv,
1696 bool isVarArg,
1697 const SmallVectorImpl<ISD::InputArg> &Ins,
1698 DebugLoc dl,
1699 SelectionDAG &DAG,
1700 SmallVectorImpl<SDValue> &InVals)
1701 const
1702 {
1703
1704 MachineFunction &MF = DAG.getMachineFunction();
1705 MachineFrameInfo *MFI = MF.getFrameInfo();
1706 //const Function *Fn = MF.getFunction();
1707 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1708
1709 SmallVector<CCValAssign, 16> ArgLocs;
1710 CallingConv::ID CC = MF.getFunction()->getCallingConv();
1711 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1712
1713 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
1714 getTargetMachine(), ArgLocs, *DAG.getContext());
1715
1716 // When more calling conventions are added, they need to be chosen here
1717 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
1718 SDValue StackPtr;
1719
1720 //unsigned int FirstStackArgLoc = 0;
1721
1722 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1723 CCValAssign &VA = ArgLocs[i];
1724 if (VA.isRegLoc()) {
1725 EVT RegVT = VA.getLocVT();
1726 const TargetRegisterClass *RC = getRegClassFromType(
1727 RegVT.getSimpleVT().SimpleTy);
1728
1729 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
1730 SDValue ArgValue = DAG.getCopyFromReg(
1731 Chain,
1732 dl,
1733 Reg,
1734 RegVT);
1735 // If this is an 8 or 16-bit value, it is really passed
1736 // promoted to 32 bits. Insert an assert[sz]ext to capture
1737 // this, then truncate to the right size.
1738
1739 if (VA.getLocInfo() == CCValAssign::SExt) {
1740 ArgValue = DAG.getNode(
1741 ISD::AssertSext,
1742 dl,
1743 RegVT,
1744 ArgValue,
1745 DAG.getValueType(VA.getValVT()));
1746 } else if (VA.getLocInfo() == CCValAssign::ZExt) {
1747 ArgValue = DAG.getNode(
1748 ISD::AssertZext,
1749 dl,
1750 RegVT,
1751 ArgValue,
1752 DAG.getValueType(VA.getValVT()));
1753 }
1754 if (VA.getLocInfo() != CCValAssign::Full) {
1755 ArgValue = DAG.getNode(
1756 ISD::TRUNCATE,
1757 dl,
1758 VA.getValVT(),
1759 ArgValue);
1760 }
1761 // Add the value to the list of arguments
1762 // to be passed in registers
1763 InVals.push_back(ArgValue);
1764 if (isVarArg) {
1765 assert(0 && "Variable arguments are not yet supported");
1766 // See MipsISelLowering.cpp for ideas on how to implement
1767 }
1768 } else if(VA.isMemLoc()) {
1769 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1770 dl, DAG, VA, MFI, i));
1771 } else {
1772 assert(0 && "found a Value Assign that is "
1773 "neither a register or a memory location");
1774 }
1775 }
1776 /*if (hasStructRet) {
1777 assert(0 && "Has struct return is not yet implemented");
1778 // See MipsISelLowering.cpp for ideas on how to implement
1779 }*/
1780
1781 if (isVarArg) {
1782 assert(0 && "Variable arguments are not yet supported");
1783 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1784 }
1785 // This needs to be changed to non-zero if the return function needs
1786 // to pop bytes
1787 return Chain;
1788 }
1789 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1790 /// by "Src" to address "Dst" with size and alignment information specified by
1791 /// the specific parameter attribute. The copy will be passed as a byval
1792 /// function parameter.
1793 static SDValue
1794 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1795 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1796 assert(0 && "MemCopy does not exist yet");
1797 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1798
1799 return DAG.getMemcpy(Chain,
1800 Src.getDebugLoc(),
1801 Dst, Src, SizeNode, Flags.getByValAlign(),
1802 /*IsVol=*/false, /*AlwaysInline=*/true,
1803 MachinePointerInfo(), MachinePointerInfo());
1804 }
1805
1806 SDValue
1807 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1808 SDValue StackPtr, SDValue Arg,
1809 DebugLoc dl, SelectionDAG &DAG,
1810 const CCValAssign &VA,
1811 ISD::ArgFlagsTy Flags) const
1812 {
1813 unsigned int LocMemOffset = VA.getLocMemOffset();
1814 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1815 PtrOff = DAG.getNode(ISD::ADD,
1816 dl,
1817 getPointerTy(), StackPtr, PtrOff);
1818 if (Flags.isByVal()) {
1819 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1820 } else {
1821 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1822 MachinePointerInfo::getStack(LocMemOffset),
1823 false, false, 0);
1824 }
1825 return PtrOff;
1826 }
1827 /// LowerCAL - functions arguments are copied from virtual
1828 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1829 /// CALLSEQ_END are emitted.
1830 /// TODO: isVarArg, isTailCall, hasStructRet
1831 SDValue
1832 AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1833 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1834 bool& isTailCall,
1835 const SmallVectorImpl<ISD::OutputArg> &Outs,
1836 const SmallVectorImpl<SDValue> &OutVals,
1837 const SmallVectorImpl<ISD::InputArg> &Ins,
1838 DebugLoc dl, SelectionDAG &DAG,
1839 SmallVectorImpl<SDValue> &InVals)
1840 const
1841 {
1842 isTailCall = false;
1843 MachineFunction& MF = DAG.getMachineFunction();
1844 // FIXME: DO we need to handle fast calling conventions and tail call
1845 // optimizations?? X86/PPC ISelLowering
1846 /*bool hasStructRet = (TheCall->getNumArgs())
1847 ? TheCall->getArgFlags(0).device()->isSRet()
1848 : false;*/
1849
1850 MachineFrameInfo *MFI = MF.getFrameInfo();
1851
1852 // Analyze operands of the call, assigning locations to each operand
1853 SmallVector<CCValAssign, 16> ArgLocs;
1854 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1855 getTargetMachine(), ArgLocs, *DAG.getContext());
1856 // Analyize the calling operands, but need to change
1857 // if we have more than one calling convetion
1858 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1859
1860 unsigned int NumBytes = CCInfo.getNextStackOffset();
1861 if (isTailCall) {
1862 assert(isTailCall && "Tail Call not handled yet!");
1863 // See X86/PPC ISelLowering
1864 }
1865
1866 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1867
1868 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1869 SmallVector<SDValue, 8> MemOpChains;
1870 SDValue StackPtr;
1871 //unsigned int FirstStacArgLoc = 0;
1872 //int LastArgStackLoc = 0;
1873
1874 // Walk the register/memloc assignments, insert copies/loads
1875 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1876 CCValAssign &VA = ArgLocs[i];
1877 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1878 // Arguments start after the 5 first operands of ISD::CALL
1879 SDValue Arg = OutVals[i];
1880 //Promote the value if needed
1881 switch(VA.getLocInfo()) {
1882 default: assert(0 && "Unknown loc info!");
1883 case CCValAssign::Full:
1884 break;
1885 case CCValAssign::SExt:
1886 Arg = DAG.getNode(ISD::SIGN_EXTEND,
1887 dl,
1888 VA.getLocVT(), Arg);
1889 break;
1890 case CCValAssign::ZExt:
1891 Arg = DAG.getNode(ISD::ZERO_EXTEND,
1892 dl,
1893 VA.getLocVT(), Arg);
1894 break;
1895 case CCValAssign::AExt:
1896 Arg = DAG.getNode(ISD::ANY_EXTEND,
1897 dl,
1898 VA.getLocVT(), Arg);
1899 break;
1900 }
1901
1902 if (VA.isRegLoc()) {
1903 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1904 } else if (VA.isMemLoc()) {
1905 // Create the frame index object for this incoming parameter
1906 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1907 VA.getLocMemOffset(), true);
1908 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1909
1910 // emit ISD::STORE whichs stores the
1911 // parameter value to a stack Location
1912 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1913 MachinePointerInfo::getFixedStack(FI),
1914 false, false, 0));
1915 } else {
1916 assert(0 && "Not a Reg/Mem Loc, major error!");
1917 }
1918 }
1919 if (!MemOpChains.empty()) {
1920 Chain = DAG.getNode(ISD::TokenFactor,
1921 dl,
1922 MVT::Other,
1923 &MemOpChains[0],
1924 MemOpChains.size());
1925 }
1926 SDValue InFlag;
1927 if (!isTailCall) {
1928 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1929 Chain = DAG.getCopyToReg(Chain,
1930 dl,
1931 RegsToPass[i].first,
1932 RegsToPass[i].second,
1933 InFlag);
1934 InFlag = Chain.getValue(1);
1935 }
1936 }
1937
1938 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1939 // every direct call is) turn it into a TargetGlobalAddress/
1940 // TargetExternalSymbol
1941 // node so that legalize doesn't hack it.
1942 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1943 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1944 }
1945 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1946 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1947 }
1948 else if (isTailCall) {
1949 assert(0 && "Tail calls are not handled yet");
1950 // see X86 ISelLowering for ideas on implementation: 1708
1951 }
1952
1953 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1954 SmallVector<SDValue, 8> Ops;
1955
1956 if (isTailCall) {
1957 assert(0 && "Tail calls are not handled yet");
1958 // see X86 ISelLowering for ideas on implementation: 1721
1959 }
1960 // If this is a direct call, pass the chain and the callee
1961 if (Callee.getNode()) {
1962 Ops.push_back(Chain);
1963 Ops.push_back(Callee);
1964 }
1965
1966 if (isTailCall) {
1967 assert(0 && "Tail calls are not handled yet");
1968 // see X86 ISelLowering for ideas on implementation: 1739
1969 }
1970
1971 // Add argument registers to the end of the list so that they are known
1972 // live into the call
1973 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1974 Ops.push_back(DAG.getRegister(
1975 RegsToPass[i].first,
1976 RegsToPass[i].second.getValueType()));
1977 }
1978 if (InFlag.getNode()) {
1979 Ops.push_back(InFlag);
1980 }
1981
1982 // Emit Tail Call
1983 if (isTailCall) {
1984 assert(0 && "Tail calls are not handled yet");
1985 // see X86 ISelLowering for ideas on implementation: 1762
1986 }
1987
1988 Chain = DAG.getNode(AMDILISD::CALL,
1989 dl,
1990 NodeTys, &Ops[0], Ops.size());
1991 InFlag = Chain.getValue(1);
1992
1993 // Create the CALLSEQ_END node
1994 Chain = DAG.getCALLSEQ_END(
1995 Chain,
1996 DAG.getIntPtrConstant(NumBytes, true),
1997 DAG.getIntPtrConstant(0, true),
1998 InFlag);
1999 InFlag = Chain.getValue(1);
2000 // Handle result values, copying them out of physregs into vregs that
2001 // we return
2002 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2003 InVals);
2004 }
2005 static void checkMADType(
2006 SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
2007 {
2008 bool globalLoadStore = false;
2009 is24bitMAD = false;
2010 is32bitMAD = false;
2011 return;
2012 assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
2013 "this to work correctly!");
2014 if (Op.getNode()->use_empty()) {
2015 return;
2016 }
2017 for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
2018 nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
2019 SDNode *ptr = *nBegin;
2020 const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
2021 // If we are not a LSBaseSDNode then we don't do this
2022 // optimization.
2023 // If we are a LSBaseSDNode, but the op is not the offset
2024 // or base pointer, then we don't do this optimization
2025 // (i.e. we are the value being stored)
2026 if (!lsNode ||
2027 (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
2028 return;
2029 }
2030 const PointerType *PT =
2031 dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
2032 unsigned as = PT->getAddressSpace();
2033 switch(as) {
2034 default:
2035 globalLoadStore = true;
2036 case AMDILAS::PRIVATE_ADDRESS:
2037 if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
2038 globalLoadStore = true;
2039 }
2040 break;
2041 case AMDILAS::CONSTANT_ADDRESS:
2042 if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
2043 globalLoadStore = true;
2044 }
2045 break;
2046 case AMDILAS::LOCAL_ADDRESS:
2047 if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
2048 globalLoadStore = true;
2049 }
2050 break;
2051 case AMDILAS::REGION_ADDRESS:
2052 if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
2053 globalLoadStore = true;
2054 }
2055 break;
2056 }
2057 }
2058 if (globalLoadStore) {
2059 is32bitMAD = true;
2060 } else {
2061 is24bitMAD = true;
2062 }
2063 }
2064
2065 SDValue
2066 AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
2067 {
2068 SDValue LHS = Op.getOperand(0);
2069 SDValue RHS = Op.getOperand(1);
2070 DebugLoc DL = Op.getDebugLoc();
2071 EVT OVT = Op.getValueType();
2072 SDValue DST;
2073 const AMDILSubtarget *stm = &this->getTargetMachine()
2074 .getSubtarget<AMDILSubtarget>();
2075 bool isVec = OVT.isVector();
2076 if (OVT.getScalarType() == MVT::i64) {
2077 MVT INTTY = MVT::i32;
2078 if (OVT == MVT::v2i64) {
2079 INTTY = MVT::v2i32;
2080 }
2081 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
2082 && INTTY == MVT::i32) {
2083 DST = DAG.getNode(AMDILISD::ADD,
2084 DL,
2085 OVT,
2086 LHS, RHS);
2087 } else {
2088 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2089 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2090 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2091 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2092 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2093 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2094 INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
2095 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
2096 SDValue cmp;
2097 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2098 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2099 INTLO, RHSLO);
2100 cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
2101 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2102 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2103 INTLO, INTHI);
2104 }
2105 } else {
2106 if (LHS.getOpcode() == ISD::FrameIndex ||
2107 RHS.getOpcode() == ISD::FrameIndex) {
2108 DST = DAG.getNode(AMDILISD::ADDADDR,
2109 DL,
2110 OVT,
2111 LHS, RHS);
2112 } else {
2113 if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2114 && LHS.getNumOperands()
2115 && RHS.getNumOperands()) {
2116 bool is24bitMAD = false;
2117 bool is32bitMAD = false;
2118 const ConstantSDNode *LHSConstOpCode =
2119 dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
2120 const ConstantSDNode *RHSConstOpCode =
2121 dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
2122 if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
2123 || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
2124 || LHS.getOpcode() == ISD::MUL
2125 || RHS.getOpcode() == ISD::MUL) {
2126 SDValue Op1, Op2, Op3;
2127 // FIXME: Fix this so that it works for unsigned 24bit ops.
2128 if (LHS.getOpcode() == ISD::MUL) {
2129 Op1 = LHS.getOperand(0);
2130 Op2 = LHS.getOperand(1);
2131 Op3 = RHS;
2132 } else if (RHS.getOpcode() == ISD::MUL) {
2133 Op1 = RHS.getOperand(0);
2134 Op2 = RHS.getOperand(1);
2135 Op3 = LHS;
2136 } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
2137 Op1 = LHS.getOperand(0);
2138 Op2 = DAG.getConstant(
2139 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
2140 Op3 = RHS;
2141 } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
2142 Op1 = RHS.getOperand(0);
2143 Op2 = DAG.getConstant(
2144 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
2145 Op3 = LHS;
2146 }
2147 checkMADType(Op, stm, is24bitMAD, is32bitMAD);
2148 // We can possibly do a MAD transform!
2149 if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
2150 uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
2151 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2152 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2153 DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
2154 Op1, Op2, Op3);
2155 } else if(is32bitMAD) {
2156 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2157 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2158 DL, Tys, DAG.getEntryNode(),
2159 DAG.getConstant(
2160 AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
2161 Op1, Op2, Op3);
2162 }
2163 }
2164 }
2165 DST = DAG.getNode(AMDILISD::ADD,
2166 DL,
2167 OVT,
2168 LHS, RHS);
2169 }
2170 }
2171 return DST;
2172 }
2173 SDValue
2174 AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
2175 uint32_t bits) const
2176 {
2177 DebugLoc DL = Op.getDebugLoc();
2178 EVT INTTY = Op.getValueType();
2179 EVT FPTY;
2180 if (INTTY.isVector()) {
2181 FPTY = EVT(MVT::getVectorVT(MVT::f32,
2182 INTTY.getVectorNumElements()));
2183 } else {
2184 FPTY = EVT(MVT::f32);
2185 }
2186 /* static inline uint
2187 __clz_Nbit(uint x)
2188 {
2189 int xor = 0x3f800000U | x;
2190 float tp = as_float(xor);
2191 float t = tp + -1.0f;
2192 uint tint = as_uint(t);
2193 int cmp = (x != 0);
2194 uint tsrc = tint >> 23;
2195 uint tmask = tsrc & 0xffU;
2196 uint cst = (103 + N)U - tmask;
2197 return cmp ? cst : N;
2198 }
2199 */
2200 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
2201 && "genCLZu16 only works on 32bit types");
2202 // uint x = Op
2203 SDValue x = Op;
2204 // xornode = 0x3f800000 | x
2205 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
2206 DAG.getConstant(0x3f800000, INTTY), x);
2207 // float tp = as_float(xornode)
2208 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
2209 // float t = tp + -1.0f
2210 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
2211 DAG.getConstantFP(-1.0f, FPTY));
2212 // uint tint = as_uint(t)
2213 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
2214 // int cmp = (x != 0)
2215 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2216 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
2217 DAG.getConstant(0, INTTY));
2218 // uint tsrc = tint >> 23
2219 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
2220 DAG.getConstant(23, INTTY));
2221 // uint tmask = tsrc & 0xFF
2222 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
2223 DAG.getConstant(0xFFU, INTTY));
2224 // uint cst = (103 + bits) - tmask
2225 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
2226 DAG.getConstant((103U + bits), INTTY), tmask);
2227 // return cmp ? cst : N
2228 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
2229 DAG.getConstant(bits, INTTY));
2230 return cst;
2231 }
2232
2233 SDValue
2234 AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
2235 {
2236 SDValue DST = SDValue();
2237 DebugLoc DL = Op.getDebugLoc();
2238 EVT INTTY = Op.getValueType();
2239 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2240 &this->getTargetMachine())->getSubtargetImpl();
2241 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2242 //__clz_32bit(uint u)
2243 //{
2244 // int z = __amdil_ffb_hi(u) ;
2245 // return z < 0 ? 32 : z;
2246 // }
2247 // uint u = op
2248 SDValue u = Op;
2249 // int z = __amdil_ffb_hi(u)
2250 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
2251 // int cmp = z < 0
2252 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2253 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2254 z, DAG.getConstant(0, INTTY));
2255 // return cmp ? 32 : z
2256 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2257 DAG.getConstant(32, INTTY), z);
2258 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2259 // static inline uint
2260 //__clz_32bit(uint x)
2261 //{
2262 // uint zh = __clz_16bit(x >> 16);
2263 // uint zl = __clz_16bit(x & 0xffffU);
2264 // return zh == 16U ? 16U + zl : zh;
2265 //}
2266 // uint x = Op
2267 SDValue x = Op;
2268 // uint xs16 = x >> 16
2269 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2270 DAG.getConstant(16, INTTY));
2271 // uint zh = __clz_16bit(xs16)
2272 SDValue zh = genCLZuN(xs16, DAG, 16);
2273 // uint xa16 = x & 0xFFFF
2274 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2275 DAG.getConstant(0xFFFFU, INTTY));
2276 // uint zl = __clz_16bit(xa16)
2277 SDValue zl = genCLZuN(xa16, DAG, 16);
2278 // uint cmp = zh == 16U
2279 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2280 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2281 zh, DAG.getConstant(16U, INTTY));
2282 // uint zl16 = zl + 16
2283 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2284 DAG.getConstant(16, INTTY), zl);
2285 // return cmp ? zl16 : zh
2286 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2287 cmp, zl16, zh);
2288 } else {
2289 assert(0 && "Attempting to generate a CLZ function with an"
2290 " unknown graphics card");
2291 }
2292 return DST;
2293 }
2294 SDValue
2295 AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2296 {
2297 SDValue DST = SDValue();
2298 DebugLoc DL = Op.getDebugLoc();
2299 EVT INTTY;
2300 EVT LONGTY = Op.getValueType();
2301 bool isVec = LONGTY.isVector();
2302 if (isVec) {
2303 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2304 .getVectorNumElements()));
2305 } else {
2306 INTTY = EVT(MVT::i32);
2307 }
2308 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2309 &this->getTargetMachine())->getSubtargetImpl();
2310 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2311 // Evergreen:
2312 // static inline uint
2313 // __clz_u64(ulong x)
2314 // {
2315 //uint zhi = __clz_32bit((uint)(x >> 32));
2316 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2317 //return zhi == 32U ? 32U + zlo : zhi;
2318 //}
2319 //ulong x = op
2320 SDValue x = Op;
2321 // uint xhi = x >> 32
2322 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2323 // uint xlo = x & 0xFFFFFFFF
2324 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2325 // uint zhi = __clz_32bit(xhi)
2326 SDValue zhi = genCLZu32(xhi, DAG);
2327 // uint zlo = __clz_32bit(xlo)
2328 SDValue zlo = genCLZu32(xlo, DAG);
2329 // uint cmp = zhi == 32
2330 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2331 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2332 zhi, DAG.getConstant(32U, INTTY));
2333 // uint zlop32 = 32 + zlo
2334 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2335 DAG.getConstant(32U, INTTY), zlo);
2336 // return cmp ? zlop32: zhi
2337 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2338 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2339 // HD4XXX:
2340 // static inline uint
2341 //__clz_64bit(ulong x)
2342 //{
2343 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2344 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2345 //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2346 //uint r = zh == 18U ? 18U + zm : zh;
2347 //return zh + zm == 41U ? 41U + zl : r;
2348 //}
2349 //ulong x = Op
2350 SDValue x = Op;
2351 // ulong xs46 = x >> 46
2352 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2353 DAG.getConstant(46, LONGTY));
2354 // uint ixs46 = (uint)xs46
2355 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2356 // ulong xs23 = x >> 23
2357 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2358 DAG.getConstant(23, LONGTY));
2359 // uint ixs23 = (uint)xs23
2360 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2361 // uint xs23m23 = ixs23 & 0x7FFFFF
2362 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2363 DAG.getConstant(0x7fffffU, INTTY));
2364 // uint ix = (uint)x
2365 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2366 // uint xm23 = ix & 0x7FFFFF
2367 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2368 DAG.getConstant(0x7fffffU, INTTY));
2369 // uint zh = __clz_23bit(ixs46)
2370 SDValue zh = genCLZuN(ixs46, DAG, 23);
2371 // uint zm = __clz_23bit(xs23m23)
2372 SDValue zm = genCLZuN(xs23m23, DAG, 23);
2373 // uint zl = __clz_23bit(xm23)
2374 SDValue zl = genCLZuN(xm23, DAG, 23);
2375 // uint zhm5 = zh - 5
2376 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2377 DAG.getConstant(-5U, INTTY));
2378 SDValue const18 = DAG.getConstant(18, INTTY);
2379 SDValue const41 = DAG.getConstant(41, INTTY);
2380 // uint cmp1 = zh = 18
2381 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2382 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2383 zhm5, const18);
2384 // uint zhm5zm = zhm5 + zh
2385 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2386 // uint cmp2 = zhm5zm == 41
2387 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2388 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2389 zhm5zm, const41);
2390 // uint zmp18 = zhm5 + 18
2391 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2392 // uint zlp41 = zl + 41
2393 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2394 // uint r = cmp1 ? zmp18 : zh
2395 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2396 cmp1, zmp18, zhm5);
2397 // return cmp2 ? zlp41 : r
2398 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2399 } else {
2400 assert(0 && "Attempting to generate a CLZ function with an"
2401 " unknown graphics card");
2402 }
2403 return DST;
2404 }
2405 SDValue
2406 AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2407 bool includeSign) const
2408 {
2409 EVT INTVT;
2410 EVT LONGVT;
2411 SDValue DST;
2412 DebugLoc DL = RHS.getDebugLoc();
2413 EVT RHSVT = RHS.getValueType();
2414 bool isVec = RHSVT.isVector();
2415 if (isVec) {
2416 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2417 .getVectorNumElements()));
2418 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2419 .getVectorNumElements()));
2420 } else {
2421 LONGVT = EVT(MVT::i64);
2422 INTVT = EVT(MVT::i32);
2423 }
2424 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2425 &this->getTargetMachine())->getSubtargetImpl();
2426 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2427 // unsigned version:
2428 // uint uhi = (uint)(d * 0x1.0p-32);
2429 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2430 // return as_ulong2((uint2)(ulo, uhi));
2431 //
2432 // signed version:
2433 // double ad = fabs(d);
2434 // long l = unsigned_version(ad);
2435 // long nl = -l;
2436 // return d == ad ? l : nl;
2437 SDValue d = RHS;
2438 if (includeSign) {
2439 d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2440 }
2441 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2442 DAG.getConstantFP(0x2f800000, RHSVT));
2443 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2444 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2445 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2446 DAG.getConstantFP(0xcf800000, RHSVT), d);
2447 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2448 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2449 if (includeSign) {
2450 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2451 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2452 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2453 RHS, d);
2454 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
2455 }
2456 DST = l;
2457 } else {
2458 /*
2459 __attribute__((always_inline)) long
2460 cast_f64_to_i64(double d)
2461 {
2462 // Convert d in to 32-bit components
2463 long x = as_long(d);
2464 xhi = LCOMPHI(x);
2465 xlo = LCOMPLO(x);
2466
2467 // Generate 'normalized' mantissa
2468 mhi = xhi | 0x00100000; // hidden bit
2469 mhi <<= 11;
2470 temp = xlo >> (32 - 11);
2471 mhi |= temp
2472 mlo = xlo << 11;
2473
2474 // Compute shift right count from exponent
2475 e = (xhi >> (52-32)) & 0x7ff;
2476 sr = 1023 + 63 - e;
2477 srge64 = sr >= 64;
2478 srge32 = sr >= 32;
2479
2480 // Compute result for 0 <= sr < 32
2481 rhi0 = mhi >> (sr &31);
2482 rlo0 = mlo >> (sr &31);
2483 temp = mhi << (32 - sr);
2484 temp |= rlo0;
2485 rlo0 = sr ? temp : rlo0;
2486
2487 // Compute result for 32 <= sr
2488 rhi1 = 0;
2489 rlo1 = srge64 ? 0 : rhi0;
2490
2491 // Pick between the 2 results
2492 rhi = srge32 ? rhi1 : rhi0;
2493 rlo = srge32 ? rlo1 : rlo0;
2494
2495 // Optional saturate on overflow
2496 srlt0 = sr < 0;
2497 rhi = srlt0 ? MAXVALUE : rhi;
2498 rlo = srlt0 ? MAXVALUE : rlo;
2499
2500 // Create long
2501 res = LCREATE( rlo, rhi );
2502
2503 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2504 if (includeSign) {
2505 sign = ((signed int) xhi) >> 31; fill with sign bit
2506 sign = LCREATE( sign, sign );
2507 res += sign;
2508 res ^= sign;
2509 }
2510
2511 return res;
2512 }
2513 */
2514 SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2515 SDValue c32 = DAG.getConstant( 32, INTVT );
2516
2517 // Convert d in to 32-bit components
2518 SDValue d = RHS;
2519 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2520 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2521 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2522
2523 // Generate 'normalized' mantissa
2524 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2525 xhi, DAG.getConstant( 0x00100000, INTVT ) );
2526 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2527 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2528 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2529 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2530 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
2531
2532 // Compute shift right count from exponent
2533 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2534 xhi, DAG.getConstant( 52-32, INTVT ) );
2535 e = DAG.getNode( ISD::AND, DL, INTVT,
2536 e, DAG.getConstant( 0x7ff, INTVT ) );
2537 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2538 DAG.getConstant( 1023 + 63, INTVT ), e );
2539 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2540 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2541 sr, DAG.getConstant(64, INTVT));
2542 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2543 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2544 sr, DAG.getConstant(32, INTVT));
2545
2546 // Compute result for 0 <= sr < 32
2547 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2548 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
2549 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
2550 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
2551 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
2552 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
2553
2554 // Compute result for 32 <= sr
2555 SDValue rhi1 = DAG.getConstant( 0, INTVT );
2556 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2557 srge64, rhi1, rhi0 );
2558
2559 // Pick between the 2 results
2560 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2561 srge32, rhi1, rhi0 );
2562 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2563 srge32, rlo1, rlo0 );
2564
2565 // Create long
2566 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2567
2568 // Deal with sign bit
2569 if (includeSign) {
2570 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2571 xhi, DAG.getConstant( 31, INTVT ) );
2572 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
2573 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
2574 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
2575 }
2576 DST = res;
2577 }
2578 return DST;
2579 }
2580 SDValue
2581 AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
2582 bool includeSign) const
2583 {
2584 EVT INTVT;
2585 EVT LONGVT;
2586 DebugLoc DL = RHS.getDebugLoc();
2587 EVT RHSVT = RHS.getValueType();
2588 bool isVec = RHSVT.isVector();
2589 if (isVec) {
2590 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2591 RHSVT.getVectorNumElements()));
2592 INTVT = EVT(MVT::getVectorVT(MVT::i32,
2593 RHSVT.getVectorNumElements()));
2594 } else {
2595 LONGVT = EVT(MVT::i64);
2596 INTVT = EVT(MVT::i32);
2597 }
2598 /*
2599 __attribute__((always_inline)) int
2600 cast_f64_to_[u|i]32(double d)
2601 {
2602 // Convert d in to 32-bit components
2603 long x = as_long(d);
2604 xhi = LCOMPHI(x);
2605 xlo = LCOMPLO(x);
2606
2607 // Generate 'normalized' mantissa
2608 mhi = xhi | 0x00100000; // hidden bit
2609 mhi <<= 11;
2610 temp = xlo >> (32 - 11);
2611 mhi |= temp
2612
2613 // Compute shift right count from exponent
2614 e = (xhi >> (52-32)) & 0x7ff;
2615 sr = 1023 + 31 - e;
2616 srge32 = sr >= 32;
2617
2618 // Compute result for 0 <= sr < 32
2619 res = mhi >> (sr &31);
2620 res = srge32 ? 0 : res;
2621
2622 // Optional saturate on overflow
2623 srlt0 = sr < 0;
2624 res = srlt0 ? MAXVALUE : res;
2625
2626 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2627 if (includeSign) {
2628 sign = ((signed int) xhi) >> 31; fill with sign bit
2629 res += sign;
2630 res ^= sign;
2631 }
2632
2633 return res;
2634 }
2635 */
2636 SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2637
2638 // Convert d in to 32-bit components
2639 SDValue d = RHS;
2640 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2641 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2642 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2643
2644 // Generate 'normalized' mantissa
2645 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2646 xhi, DAG.getConstant( 0x00100000, INTVT ) );
2647 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2648 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2649 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2650 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2651
2652 // Compute shift right count from exponent
2653 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2654 xhi, DAG.getConstant( 52-32, INTVT ) );
2655 e = DAG.getNode( ISD::AND, DL, INTVT,
2656 e, DAG.getConstant( 0x7ff, INTVT ) );
2657 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2658 DAG.getConstant( 1023 + 31, INTVT ), e );
2659 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2660 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2661 sr, DAG.getConstant(32, INTVT));
2662
2663 // Compute result for 0 <= sr < 32
2664 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2665 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2666 srge32, DAG.getConstant(0,INTVT), res );
2667
2668 // Deal with sign bit
2669 if (includeSign) {
2670 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2671 xhi, DAG.getConstant( 31, INTVT ) );
2672 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
2673 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
2674 }
2675 return res;
2676 }
2677 SDValue
2678 AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
2679 {
2680 SDValue RHS = Op.getOperand(0);
2681 EVT RHSVT = RHS.getValueType();
2682 MVT RST = RHSVT.getScalarType().getSimpleVT();
2683 EVT LHSVT = Op.getValueType();
2684 MVT LST = LHSVT.getScalarType().getSimpleVT();
2685 DebugLoc DL = Op.getDebugLoc();
2686 SDValue DST;
2687 const AMDILTargetMachine*
2688 amdtm = reinterpret_cast<const AMDILTargetMachine*>
2689 (&this->getTargetMachine());
2690 const AMDILSubtarget*
2691 stm = static_cast<const AMDILSubtarget*>(
2692 amdtm->getSubtargetImpl());
2693 if (RST == MVT::f64 && RHSVT.isVector()
2694 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2695 // We dont support vector 64bit floating point convertions.
2696 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
2697 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2698 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2699 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
2700 if (!x) {
2701 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2702 } else {
2703 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
2704 DST, op, DAG.getTargetConstant(x, MVT::i32));
2705 }
2706 }
2707 } else {
2708 if (RST == MVT::f64
2709 && LST == MVT::i32) {
2710 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2711 DST = SDValue(Op.getNode(), 0);
2712 } else {
2713 DST = genf64toi32(RHS, DAG, true);
2714 }
2715 } else if (RST == MVT::f64
2716 && LST == MVT::i64) {
2717 DST = genf64toi64(RHS, DAG, true);
2718 } else if (RST == MVT::f64
2719 && (LST == MVT::i8 || LST == MVT::i16)) {
2720 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2721 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
2722 } else {
2723 SDValue ToInt = genf64toi32(RHS, DAG, true);
2724 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
2725 }
2726
2727 } else {
2728 DST = SDValue(Op.getNode(), 0);
2729 }
2730 }
2731 return DST;
2732 }
2733
2734 SDValue
2735 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
2736 {
2737 SDValue DST;
2738 SDValue RHS = Op.getOperand(0);
2739 EVT RHSVT = RHS.getValueType();
2740 MVT RST = RHSVT.getScalarType().getSimpleVT();
2741 EVT LHSVT = Op.getValueType();
2742 MVT LST = LHSVT.getScalarType().getSimpleVT();
2743 DebugLoc DL = Op.getDebugLoc();
2744 const AMDILTargetMachine*
2745 amdtm = reinterpret_cast<const AMDILTargetMachine*>
2746 (&this->getTargetMachine());
2747 const AMDILSubtarget*
2748 stm = static_cast<const AMDILSubtarget*>(
2749 amdtm->getSubtargetImpl());
2750 if (RST == MVT::f64 && RHSVT.isVector()
2751 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2752 // We dont support vector 64bit floating point convertions.
2753 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
2754 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2755 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2756 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
2757 if (!x) {
2758 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2759 } else {
2760 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
2761 DST, op, DAG.getTargetConstant(x, MVT::i32));
2762 }
2763
2764 }
2765 } else {
2766 if (RST == MVT::f64
2767 && LST == MVT::i32) {
2768 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2769 DST = SDValue(Op.getNode(), 0);
2770 } else {
2771 DST = genf64toi32(RHS, DAG, false);
2772 }
2773 } else if (RST == MVT::f64
2774 && LST == MVT::i64) {
2775 DST = genf64toi64(RHS, DAG, false);
2776 } else if (RST == MVT::f64
2777 && (LST == MVT::i8 || LST == MVT::i16)) {
2778 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2779 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
2780 } else {
2781 SDValue ToInt = genf64toi32(RHS, DAG, false);
2782 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
2783 }
2784
2785 } else {
2786 DST = SDValue(Op.getNode(), 0);
2787 }
2788 }
2789 return DST;
2790 }
2791 SDValue
2792 AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
2793 SelectionDAG &DAG) const
2794 {
2795 EVT RHSVT = RHS.getValueType();
2796 DebugLoc DL = RHS.getDebugLoc();
2797 EVT INTVT;
2798 EVT LONGVT;
2799 bool isVec = RHSVT.isVector();
2800 if (isVec) {
2801 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2802 RHSVT.getVectorNumElements()));
2803 INTVT = EVT(MVT::getVectorVT(MVT::i32,
2804 RHSVT.getVectorNumElements()));
2805 } else {
2806 LONGVT = EVT(MVT::i64);
2807 INTVT = EVT(MVT::i32);
2808 }
2809 SDValue x = RHS;
2810 const AMDILTargetMachine*
2811 amdtm = reinterpret_cast<const AMDILTargetMachine*>
2812 (&this->getTargetMachine());
2813 const AMDILSubtarget*
2814 stm = static_cast<const AMDILSubtarget*>(
2815 amdtm->getSubtargetImpl());
2816 if (stm->calVersion() >= CAL_VERSION_SC_135) {
2817 // unsigned x = RHS;
2818 // ulong xd = (ulong)(0x4330_0000 << 32) | x;
2819 // double d = as_double( xd );
2820 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
2821 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
2822 DAG.getConstant( 0x43300000, INTVT ) );
2823 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2824 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
2825 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
2826 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
2827 } else {
2828 SDValue clz = genCLZu32(x, DAG);
2829
2830 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
2831 // Except for an input 0... which requires a 0 exponent
2832 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2833 DAG.getConstant( (1023+31), INTVT), clz );
2834 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
2835
2836 // Normalize frac
2837 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
2838
2839 // Eliminate hidden bit
2840 rhi = DAG.getNode( ISD::AND, DL, INTVT,
2841 rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2842
2843 // Pack exponent and frac
2844 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
2845 rhi, DAG.getConstant( (32 - 11), INTVT ) );
2846 rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2847 rhi, DAG.getConstant( 11, INTVT ) );
2848 exp = DAG.getNode( ISD::SHL, DL, INTVT,
2849 exp, DAG.getConstant( 20, INTVT ) );
2850 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2851
2852 // Convert 2 x 32 in to 1 x 64, then to double precision float type
2853 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2854 return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2855 }
2856 }
2857 SDValue
2858 AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
2859 SelectionDAG &DAG) const
2860 {
2861 EVT RHSVT = RHS.getValueType();
2862 DebugLoc DL = RHS.getDebugLoc();
2863 EVT INTVT;
2864 EVT LONGVT;
2865 bool isVec = RHSVT.isVector();
2866 if (isVec) {
2867 INTVT = EVT(MVT::getVectorVT(MVT::i32,
2868 RHSVT.getVectorNumElements()));
2869 } else {
2870 INTVT = EVT(MVT::i32);
2871 }
2872 LONGVT = RHSVT;
2873 SDValue x = RHS;
2874 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2875 &this->getTargetMachine())->getSubtargetImpl();
2876 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2877 // double dhi = (double)(as_uint2(x).y);
2878 // double dlo = (double)(as_uint2(x).x);
2879 // return mad(dhi, 0x1.0p+32, dlo)
2880 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
2881 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
2882 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
2883 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
2884 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
2885 DAG.getConstantFP(0x4f800000, LHSVT), dlo);
2886 } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
2887 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
2888 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
2889 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
2890 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
2891 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
2892 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2893 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
2894 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
2895 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
2896 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
2897 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
2898 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
2899 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
2900
2901 } else {
2902 SDValue clz = genCLZu64(x, DAG);
2903 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2904 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2905
2906 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
2907 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2908 DAG.getConstant( (1023+63), INTVT), clz );
2909 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
2910 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2911 mash, exp, mash ); // exp = exp, or 0 if input was 0
2912
2913 // Normalize frac
2914 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
2915 clz, DAG.getConstant( 31, INTVT ) );
2916 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
2917 DAG.getConstant( 32, INTVT ), clz31 );
2918 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
2919 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
2920 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
2921 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
2922 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2923 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2924 SDValue rlo2 = DAG.getConstant( 0, INTVT );
2925 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
2926 clz, DAG.getConstant( 32, INTVT ) );
2927 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2928 clz32, rhi2, rhi1 );
2929 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2930 clz32, rlo2, rlo1 );
2931
2932 // Eliminate hidden bit
2933 rhi = DAG.getNode( ISD::AND, DL, INTVT,
2934 rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2935
2936 // Save bits needed to round properly
2937 SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
2938 rlo, DAG.getConstant( 0x7ff, INTVT ) );
2939
2940 // Pack exponent and frac
2941 rlo = DAG.getNode( ISD::SRL, DL, INTVT,
2942 rlo, DAG.getConstant( 11, INTVT ) );
2943 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
2944 rhi, DAG.getConstant( (32 - 11), INTVT ) );
2945 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
2946 rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2947 rhi, DAG.getConstant( 11, INTVT ) );
2948 exp = DAG.getNode( ISD::SHL, DL, INTVT,
2949 exp, DAG.getConstant( 20, INTVT ) );
2950 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2951
2952 // Compute rounding bit
2953 SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
2954 rlo, DAG.getConstant( 1, INTVT ) );
2955 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
2956 round, DAG.getConstant( 0x3ff, INTVT ) );
2957 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2958 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
2959 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
2960 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
2961 round = DAG.getNode( ISD::SRL, DL, INTVT,
2962 round, DAG.getConstant( 10, INTVT ) );
2963 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
2964
2965 // Add rounding bit
2966 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
2967 round, DAG.getConstant( 0, INTVT ) );
2968 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2969 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
2970 return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2971 }
2972 }
2973 SDValue
2974 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
2975 {
2976 SDValue RHS = Op.getOperand(0);
2977 EVT RHSVT = RHS.getValueType();
2978 MVT RST = RHSVT.getScalarType().getSimpleVT();
2979 EVT LHSVT = Op.getValueType();
2980 MVT LST = LHSVT.getScalarType().getSimpleVT();
2981 DebugLoc DL = Op.getDebugLoc();
2982 SDValue DST;
2983 EVT INTVT;
2984 EVT LONGVT;
2985 const AMDILTargetMachine*
2986 amdtm = reinterpret_cast<const AMDILTargetMachine*>
2987 (&this->getTargetMachine());
2988 const AMDILSubtarget*
2989 stm = static_cast<const AMDILSubtarget*>(
2990 amdtm->getSubtargetImpl());
2991 if (LST == MVT::f64 && LHSVT.isVector()
2992 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2993 // We dont support vector 64bit floating point convertions.
2994 DST = Op;
2995 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
2996 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2997 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2998 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
2999 if (!x) {
3000 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3001 } else {
3002 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3003 op, DAG.getTargetConstant(x, MVT::i32));
3004 }
3005
3006 }
3007 } else {
3008
3009 if (RST == MVT::i32
3010 && LST == MVT::f64) {
3011 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3012 DST = SDValue(Op.getNode(), 0);
3013 } else {
3014 DST = genu32tof64(RHS, LHSVT, DAG);
3015 }
3016 } else if (RST == MVT::i64
3017 && LST == MVT::f64) {
3018 DST = genu64tof64(RHS, LHSVT, DAG);
3019 } else {
3020 DST = SDValue(Op.getNode(), 0);
3021 }
3022 }
3023 return DST;
3024 }
3025
3026 SDValue
3027 AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3028 {
3029 SDValue RHS = Op.getOperand(0);
3030 EVT RHSVT = RHS.getValueType();
3031 MVT RST = RHSVT.getScalarType().getSimpleVT();
3032 EVT INTVT;
3033 EVT LONGVT;
3034 SDValue DST;
3035 bool isVec = RHSVT.isVector();
3036 DebugLoc DL = Op.getDebugLoc();
3037 EVT LHSVT = Op.getValueType();
3038 MVT LST = LHSVT.getScalarType().getSimpleVT();
3039 const AMDILTargetMachine*
3040 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3041 (&this->getTargetMachine());
3042 const AMDILSubtarget*
3043 stm = static_cast<const AMDILSubtarget*>(
3044 amdtm->getSubtargetImpl());
3045 if (LST == MVT::f64 && LHSVT.isVector()
3046 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3047 // We dont support vector 64bit floating point convertions.
3048 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3049 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3050 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3051 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3052 if (!x) {
3053 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3054 } else {
3055 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3056 op, DAG.getTargetConstant(x, MVT::i32));
3057 }
3058
3059 }
3060 } else {
3061
3062 if (isVec) {
3063 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3064 RHSVT.getVectorNumElements()));
3065 INTVT = EVT(MVT::getVectorVT(MVT::i32,
3066 RHSVT.getVectorNumElements()));
3067 } else {
3068 LONGVT = EVT(MVT::i64);
3069 INTVT = EVT(MVT::i32);
3070 }
3071 MVT RST = RHSVT.getScalarType().getSimpleVT();
3072 if ((RST == MVT::i32 || RST == MVT::i64)
3073 && LST == MVT::f64) {
3074 if (RST == MVT::i32) {
3075 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3076 DST = SDValue(Op.getNode(), 0);
3077 return DST;
3078 }
3079 }
3080 SDValue c31 = DAG.getConstant( 31, INTVT );
3081 SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
3082
3083 SDValue S; // Sign, as 0 or -1
3084 SDValue Sbit; // Sign bit, as one bit, MSB only.
3085 if (RST == MVT::i32) {
3086 Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
3087 S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
3088 } else { // 64-bit case... SRA of 64-bit values is slow
3089 SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
3090 Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
3091 SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
3092 S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
3093 }
3094
3095 // get abs() of input value, given sign as S (0 or -1)
3096 // SpI = RHS + S
3097 SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
3098 // SpIxS = SpI ^ S
3099 SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
3100
3101 // Convert unsigned value to double precision
3102 SDValue R;
3103 if (RST == MVT::i32) {
3104 // r = cast_u32_to_f64(SpIxS)
3105 R = genu32tof64(SpIxS, LHSVT, DAG);
3106 } else {
3107 // r = cast_u64_to_f64(SpIxS)
3108 R = genu64tof64(SpIxS, LHSVT, DAG);
3109 }
3110
3111 // drop in the sign bit
3112 SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
3113 SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
3114 SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
3115 thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
3116 t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
3117 DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
3118 } else {
3119 DST = SDValue(Op.getNode(), 0);
3120 }
3121 }
3122 return DST;
3123 }
3124 SDValue
3125 AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
3126 {
3127 SDValue LHS = Op.getOperand(0);
3128 SDValue RHS = Op.getOperand(1);
3129 DebugLoc DL = Op.getDebugLoc();
3130 EVT OVT = Op.getValueType();
3131 SDValue DST;
3132 bool isVec = RHS.getValueType().isVector();
3133 if (OVT.getScalarType() == MVT::i64) {
3134 /*const AMDILTargetMachine*
3135 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3136 (&this->getTargetMachine());
3137 const AMDILSubtarget*
3138 stm = dynamic_cast<const AMDILSubtarget*>(
3139 amdtm->getSubtargetImpl());*/
3140 MVT INTTY = MVT::i32;
3141 if (OVT == MVT::v2i64) {
3142 INTTY = MVT::v2i32;
3143 }
3144 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
3145 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3146 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
3147 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
3148 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
3149 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
3150 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
3151 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
3152 //TODO: need to use IBORROW on HD5XXX and later hardware
3153 SDValue cmp;
3154 if (OVT == MVT::i64) {
3155 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3156 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3157 LHSLO, RHSLO);
3158 } else {
3159 SDValue cmplo;
3160 SDValue cmphi;
3161 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3162 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
3163 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3164 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
3165 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3166 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
3167 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3168 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
3169 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3170 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3171 LHSRLO, RHSRLO);
3172 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3173 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3174 LHSRHI, RHSRHI);
3175 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
3176 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
3177 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
3178 }
3179 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
3180 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
3181 INTLO, INTHI);
3182 } else {
3183 DST = SDValue(Op.getNode(), 0);
3184 }
3185 return DST;
3186 }
3187 SDValue
3188 AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
3189 {
3190 EVT OVT = Op.getValueType();
3191 SDValue DST;
3192 if (OVT.getScalarType() == MVT::f64) {
3193 DST = LowerFDIV64(Op, DAG);
3194 } else if (OVT.getScalarType() == MVT::f32) {
3195 DST = LowerFDIV32(Op, DAG);
3196 } else {
3197 DST = SDValue(Op.getNode(), 0);
3198 }
3199 return DST;
3200 }
3201
3202 SDValue
3203 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
3204 {
3205 EVT OVT = Op.getValueType();
3206 SDValue DST;
3207 if (OVT.getScalarType() == MVT::i64) {
3208 DST = LowerSDIV64(Op, DAG);
3209 } else if (OVT.getScalarType() == MVT::i32) {
3210 DST = LowerSDIV32(Op, DAG);
3211 } else if (OVT.getScalarType() == MVT::i16
3212 || OVT.getScalarType() == MVT::i8) {
3213 DST = LowerSDIV24(Op, DAG);
3214 } else {
3215 DST = SDValue(Op.getNode(), 0);
3216 }
3217 return DST;
3218 }
3219
3220 SDValue
3221 AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
3222 {
3223 EVT OVT = Op.getValueType();
3224 SDValue DST;
3225 if (OVT.getScalarType() == MVT::i64) {
3226 DST = LowerUDIV64(Op, DAG);
3227 } else if (OVT.getScalarType() == MVT::i32) {
3228 DST = LowerUDIV32(Op, DAG);
3229 } else if (OVT.getScalarType() == MVT::i16
3230 || OVT.getScalarType() == MVT::i8) {
3231 DST = LowerUDIV24(Op, DAG);
3232 } else {
3233 DST = SDValue(Op.getNode(), 0);
3234 }
3235 return DST;
3236 }
3237
3238 SDValue
3239 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
3240 {
3241 EVT OVT = Op.getValueType();
3242 SDValue DST;
3243 if (OVT.getScalarType() == MVT::i64) {
3244 DST = LowerSREM64(Op, DAG);
3245 } else if (OVT.getScalarType() == MVT::i32) {
3246 DST = LowerSREM32(Op, DAG);
3247 } else if (OVT.getScalarType() == MVT::i16) {
3248 DST = LowerSREM16(Op, DAG);
3249 } else if (OVT.getScalarType() == MVT::i8) {
3250 DST = LowerSREM8(Op, DAG);
3251 } else {
3252 DST = SDValue(Op.getNode(), 0);
3253 }
3254 return DST;
3255 }
3256
3257 SDValue
3258 AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
3259 {
3260 EVT OVT = Op.getValueType();
3261 SDValue DST;
3262 if (OVT.getScalarType() == MVT::i64) {
3263 DST = LowerUREM64(Op, DAG);
3264 } else if (OVT.getScalarType() == MVT::i32) {
3265 DST = LowerUREM32(Op, DAG);
3266 } else if (OVT.getScalarType() == MVT::i16) {
3267 DST = LowerUREM16(Op, DAG);
3268 } else if (OVT.getScalarType() == MVT::i8) {
3269 DST = LowerUREM8(Op, DAG);
3270 } else {
3271 DST = SDValue(Op.getNode(), 0);
3272 }
3273 return DST;
3274 }
3275
3276 SDValue
3277 AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
3278 {
3279 DebugLoc DL = Op.getDebugLoc();
3280 EVT OVT = Op.getValueType();
3281 SDValue DST;
3282 bool isVec = OVT.isVector();
3283 if (OVT.getScalarType() != MVT::i64)
3284 {
3285 DST = SDValue(Op.getNode(), 0);
3286 } else {
3287 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
3288 // TODO: This needs to be turned into a tablegen pattern
3289 SDValue LHS = Op.getOperand(0);
3290 SDValue RHS = Op.getOperand(1);
3291
3292 MVT INTTY = MVT::i32;
3293 if (OVT == MVT::v2i64) {
3294 INTTY = MVT::v2i32;
3295 }
3296 // mul64(h1, l1, h0, l0)
3297 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3298 DL,
3299 INTTY, LHS);
3300 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3301 DL,
3302 INTTY, LHS);
3303 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3304 DL,
3305 INTTY, RHS);
3306 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3307 DL,
3308 INTTY, RHS);
3309 // MULLO_UINT_1 r1, h0, l1
3310 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
3311 DL,
3312 INTTY, RHSHI, LHSLO);
3313 // MULLO_UINT_1 r2, h1, l0
3314 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
3315 DL,
3316 INTTY, RHSLO, LHSHI);
3317 // ADD_INT hr, r1, r2
3318 SDValue ADDHI = DAG.getNode(ISD::ADD,
3319 DL,
3320 INTTY, RHILLO, RLOHHI);
3321 // MULHI_UINT_1 r3, l1, l0
3322 SDValue RLOLLO = DAG.getNode(ISD::MULHU,
3323 DL,
3324 INTTY, RHSLO, LHSLO);
3325 // ADD_INT hr, hr, r3
3326 SDValue HIGH = DAG.getNode(ISD::ADD,
3327 DL,
3328 INTTY, ADDHI, RLOLLO);
3329 // MULLO_UINT_1 l3, l1, l0
3330 SDValue LOW = DAG.getNode(AMDILISD::UMUL,
3331 DL,
3332 INTTY, LHSLO, RHSLO);
3333 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
3334 DL,
3335 OVT, LOW, HIGH);
3336 }
3337 return DST;
3338 }
3339 SDValue
3340 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
3341 {
3342 EVT VT = Op.getValueType();
3343 SDValue Nodes1;
3344 SDValue second;
3345 SDValue third;
3346 SDValue fourth;
3347 DebugLoc DL = Op.getDebugLoc();
3348 Nodes1 = DAG.getNode(AMDILISD::VBUILD,
3349 DL,
3350 VT, Op.getOperand(0));
3351 #if 0
3352 bool allEqual = true;
3353 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
3354 if (Op.getOperand(0) != Op.getOperand(x)) {
3355 allEqual = false;
3356 break;
3357 }
3358 }
3359 if (allEqual) {
3360 return Nodes1;
3361 }
3362 #endif
3363 switch(Op.getNumOperands()) {
3364 default:
3365 case 1:
3366 break;
3367 case 4:
3368 fourth = Op.getOperand(3);
3369 if (fourth.getOpcode() != ISD::UNDEF) {
3370 Nodes1 = DAG.getNode(
3371 ISD::INSERT_VECTOR_ELT,
3372 DL,
3373 Op.getValueType(),
3374 Nodes1,
3375 fourth,
3376 DAG.getConstant(7, MVT::i32));
3377 }
3378 case 3:
3379 third = Op.getOperand(2);
3380 if (third.getOpcode() != ISD::UNDEF) {
3381 Nodes1 = DAG.getNode(
3382 ISD::INSERT_VECTOR_ELT,
3383 DL,
3384 Op.getValueType(),
3385 Nodes1,
3386 third,
3387 DAG.getConstant(6, MVT::i32));
3388 }
3389 case 2:
3390 second = Op.getOperand(1);
3391 if (second.getOpcode() != ISD::UNDEF) {
3392 Nodes1 = DAG.getNode(
3393 ISD::INSERT_VECTOR_ELT,
3394 DL,
3395 Op.getValueType(),
3396 Nodes1,
3397 second,
3398 DAG.getConstant(5, MVT::i32));
3399 }
3400 break;
3401 };
3402 return Nodes1;
3403 }
3404
3405 SDValue
3406 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3407 SelectionDAG &DAG) const
3408 {
3409 DebugLoc DL = Op.getDebugLoc();
3410 EVT VT = Op.getValueType();
3411 const SDValue *ptr = NULL;
3412 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3413 uint32_t swizzleNum = 0;
3414 SDValue DST;
3415 if (!VT.isVector()) {
3416 SDValue Res = Op.getOperand(0);
3417 return Res;
3418 }
3419
3420 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
3421 ptr = &Op.getOperand(1);
3422 } else {
3423 ptr = &Op.getOperand(0);
3424 }
3425 if (CSDN) {
3426 swizzleNum = (uint32_t)CSDN->getZExtValue();
3427 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3428 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3429 DST = DAG.getNode(AMDILISD::VINSERT,
3430 DL,
3431 VT,
3432 Op.getOperand(0),
3433 *ptr,
3434 DAG.getTargetConstant(mask2, MVT::i32),
3435 DAG.getTargetConstant(mask3, MVT::i32));
3436 } else {
3437 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3438 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3439 SDValue res = DAG.getNode(AMDILISD::VINSERT,
3440 DL, VT, Op.getOperand(0), *ptr,
3441 DAG.getTargetConstant(mask2, MVT::i32),
3442 DAG.getTargetConstant(mask3, MVT::i32));
3443 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3444 mask2 = 0x04030201 & ~(0xFF << (x * 8));
3445 mask3 = 0x01010101 & (0xFF << (x * 8));
3446 SDValue t = DAG.getNode(AMDILISD::VINSERT,
3447 DL, VT, Op.getOperand(0), *ptr,
3448 DAG.getTargetConstant(mask2, MVT::i32),
3449 DAG.getTargetConstant(mask3, MVT::i32));
3450 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3451 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3452 Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3453 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
3454 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3455 }
3456 DST = res;
3457 }
3458 return DST;
3459 }
3460
3461 SDValue
3462 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3463 SelectionDAG &DAG) const
3464 {
3465 EVT VT = Op.getValueType();
3466 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3467 uint64_t swizzleNum = 0;
3468 DebugLoc DL = Op.getDebugLoc();
3469 SDValue Res;
3470 if (!Op.getOperand(0).getValueType().isVector()) {
3471 Res = Op.getOperand(0);
3472 return Res;
3473 }
3474 if (CSDN) {
3475 // Static vector extraction
3476 swizzleNum = CSDN->getZExtValue() + 1;
3477 Res = DAG.getNode(AMDILISD::VEXTRACT,
3478 DL, VT,
3479 Op.getOperand(0),
3480 DAG.getTargetConstant(swizzleNum, MVT::i32));
3481 } else {
3482 SDValue Op1 = Op.getOperand(1);
3483 uint32_t vecSize = 4;
3484 SDValue Op0 = Op.getOperand(0);
3485 SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
3486 DL, VT, Op0,
3487 DAG.getTargetConstant(1, MVT::i32));
3488 if (Op0.getValueType().isVector()) {
3489 vecSize = Op0.getValueType().getVectorNumElements();
3490 }
3491 for (uint32_t x = 2; x <= vecSize; ++x) {
3492 SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
3493 DL, VT, Op0,
3494 DAG.getTargetConstant(x, MVT::i32));
3495 SDValue c = DAG.getNode(AMDILISD::CMP,
3496 DL, Op1.getValueType(),
3497 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3498 Op1, DAG.getConstant(x, MVT::i32));
3499 res = DAG.getNode(AMDILISD::CMOVLOG, DL,
3500 VT, c, t, res);
3501
3502 }
3503 Res = res;
3504 }
3505 return Res;
3506 }
3507
3508 SDValue
3509 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3510 SelectionDAG &DAG) const
3511 {
3512 uint32_t vecSize = Op.getValueType().getVectorNumElements();
3513 SDValue src = Op.getOperand(0);
3514 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3515 uint64_t offset = 0;
3516 EVT vecType = Op.getValueType().getVectorElementType();
3517 DebugLoc DL = Op.getDebugLoc();
3518 SDValue Result;
3519 if (CSDN) {
3520 offset = CSDN->getZExtValue();
3521 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3522 DL,vecType, src, DAG.getConstant(offset, MVT::i32));
3523 Result = DAG.getNode(AMDILISD::VBUILD, DL,
3524 Op.getValueType(), Result);
3525 for (uint32_t x = 1; x < vecSize; ++x) {
3526 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3527 src, DAG.getConstant(offset + x, MVT::i32));
3528 if (elt.getOpcode() != ISD::UNDEF) {
3529 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3530 Op.getValueType(), Result, elt,
3531 DAG.getConstant(x, MVT::i32));
3532 }
3533 }
3534 } else {
3535 SDValue idx = Op.getOperand(1);
3536 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3537 DL, vecType, src, idx);
3538 Result = DAG.getNode(AMDILISD::VBUILD, DL,
3539 Op.getValueType(), Result);
3540 for (uint32_t x = 1; x < vecSize; ++x) {
3541 idx = DAG.getNode(ISD::ADD, DL, vecType,
3542 idx, DAG.getConstant(1, MVT::i32));
3543 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3544 src, idx);
3545 if (elt.getOpcode() != ISD::UNDEF) {
3546 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3547 Op.getValueType(), Result, elt, idx);
3548 }
3549 }
3550 }
3551 return Result;
3552 }
3553 SDValue
3554 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
3555 SelectionDAG &DAG) const
3556 {
3557 SDValue Res = DAG.getNode(AMDILISD::VBUILD,
3558 Op.getDebugLoc(),
3559 Op.getValueType(),
3560 Op.getOperand(0));
3561 return Res;
3562 }
3563 SDValue
3564 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
3565 {
3566 SDValue Cond = Op.getOperand(0);
3567 SDValue LHS = Op.getOperand(1);
3568 SDValue RHS = Op.getOperand(2);
3569 DebugLoc DL = Op.getDebugLoc();
3570 Cond = getConversionNode(DAG, Cond, Op, true);
3571 Cond = DAG.getNode(AMDILISD::CMOVLOG,
3572 DL,
3573 Op.getValueType(), Cond, LHS, RHS);
3574 return Cond;
3575 }
3576 SDValue
3577 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
3578 {
3579 SDValue Cond;
3580 SDValue LHS = Op.getOperand(0);
3581 SDValue RHS = Op.getOperand(1);
3582 SDValue CC = Op.getOperand(2);
3583 DebugLoc DL = Op.getDebugLoc();
3584 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3585 unsigned int AMDILCC = CondCCodeToCC(
3586 SetCCOpcode,
3587 LHS.getValueType().getSimpleVT().SimpleTy);
3588 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
3589 Cond = DAG.getNode(
3590 ISD::SELECT_CC,
3591 Op.getDebugLoc(),
3592 LHS.getValueType(),
3593 LHS, RHS,
3594 DAG.getConstant(-1, MVT::i32),
3595 DAG.getConstant(0, MVT::i32),
3596 CC);
3597 Cond = getConversionNode(DAG, Cond, Op, true);
3598 Cond = DAG.getNode(
3599 ISD::AND,
3600 DL,
3601 Cond.getValueType(),
3602 DAG.getConstant(1, Cond.getValueType()),
3603 Cond);
3604 return Cond;
3605 }
3606
3607 SDValue
3608 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
3609 {
3610 SDValue Data = Op.getOperand(0);
3611 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
3612 DebugLoc DL = Op.getDebugLoc();
3613 EVT DVT = Data.getValueType();
3614 EVT BVT = BaseType->getVT();
3615 unsigned baseBits = BVT.getScalarType().getSizeInBits();
3616 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
3617 unsigned shiftBits = srcBits - baseBits;
3618 if (srcBits < 32) {
3619 // If the op is less than 32 bits, then it needs to extend to 32bits
3620 // so it can properly keep the upper bits valid.
3621 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
3622 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
3623 shiftBits = 32 - baseBits;
3624 DVT = IVT;
3625 }
3626 SDValue Shift = DAG.getConstant(shiftBits, DVT);
3627 // Shift left by 'Shift' bits.
3628 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
3629 // Signed shift Right by 'Shift' bits.
3630 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
3631 if (srcBits < 32) {
3632 // Once the sign extension is done, the op needs to be converted to
3633 // its original type.
3634 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
3635 }
3636 return Data;
3637 }
3638 EVT
3639 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
3640 {
3641 int iSize = (size * numEle);
3642 int vEle = (iSize >> ((size == 64) ? 6 : 5));
3643 if (!vEle) {
3644 vEle = 1;
3645 }
3646 if (size == 64) {
3647 if (vEle == 1) {
3648 return EVT(MVT::i64);
3649 } else {
3650 return EVT(MVT::getVectorVT(MVT::i64, vEle));
3651 }
3652 } else {
3653 if (vEle == 1) {
3654 return EVT(MVT::i32);
3655 } else {
3656 return EVT(MVT::getVectorVT(MVT::i32, vEle));
3657 }
3658 }
3659 }
3660
3661 SDValue
3662 AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
3663 {
3664 SDValue Src = Op.getOperand(0);
3665 SDValue Dst = Op;
3666 SDValue Res;
3667 DebugLoc DL = Op.getDebugLoc();
3668 EVT SrcVT = Src.getValueType();
3669 EVT DstVT = Dst.getValueType();
3670 // Lets bitcast the floating point types to an
3671 // equivalent integer type before converting to vectors.
3672 if (SrcVT.getScalarType().isFloatingPoint()) {
3673 Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
3674 SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
3675 SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
3676 Src);
3677 SrcVT = Src.getValueType();
3678 }
3679 uint32_t ScalarSrcSize = SrcVT.getScalarType()
3680 .getSimpleVT().getSizeInBits();
3681 uint32_t ScalarDstSize = DstVT.getScalarType()
3682 .getSimpleVT().getSizeInBits();
3683 uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
3684 uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
3685 bool isVec = SrcVT.isVector();
3686 if (DstVT.getScalarType().isInteger() &&
3687 (SrcVT.getScalarType().isInteger()
3688 || SrcVT.getScalarType().isFloatingPoint())) {
3689 if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
3690 || (ScalarSrcSize == 64
3691 && DstNumEle == 4
3692 && ScalarDstSize == 16)) {
3693 // This is the problematic case when bitcasting i64 <-> <4 x i16>
3694 // This approach is a little different as we cannot generate a
3695 // <4 x i64> vector
3696 // as that is illegal in our backend and we are already past
3697 // the DAG legalizer.
3698 // So, in this case, we will do the following conversion.
3699 // Case 1:
3700 // %dst = <4 x i16> %src bitconvert i64 ==>
3701 // %tmp = <4 x i16> %src convert <4 x i32>
3702 // %tmp = <4 x i32> %tmp and 0xFFFF
3703 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
3704 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
3705 // %dst = <2 x i32> %tmp bitcast i64
3706 // case 2:
3707 // %dst = i64 %src bitconvert <4 x i16> ==>
3708 // %tmp = i64 %src bitcast <2 x i32>
3709 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
3710 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
3711 // %tmp = <4 x i32> %tmp and 0xFFFF
3712 // %dst = <4 x i16> %tmp bitcast <4 x i32>
3713 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
3714 DAG.getConstant(0xFFFF, MVT::i32));
3715 SDValue const16 = DAG.getConstant(16, MVT::i32);
3716 if (ScalarDstSize == 64) {
3717 // case 1
3718 Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
3719 Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
3720 SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3721 Op, DAG.getConstant(0, MVT::i32));
3722 SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3723 Op, DAG.getConstant(1, MVT::i32));
3724 y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
3725 SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3726 Op, DAG.getConstant(2, MVT::i32));
3727 SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3728 Op, DAG.getConstant(3, MVT::i32));
3729 w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
3730 x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
3731 y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
3732 Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
3733 return Res;
3734 } else {
3735 // case 2
3736 SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
3737 SDValue lor16
3738 = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
3739 SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
3740 SDValue hir16
3741 = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
3742 SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
3743 MVT::v4i32, lo);
3744 SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3745 getPointerTy(), DAG.getConstant(1, MVT::i32));
3746 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3747 resVec, lor16, idxVal);
3748 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3749 getPointerTy(), DAG.getConstant(2, MVT::i32));
3750 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3751 resVec, hi, idxVal);
3752 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3753 getPointerTy(), DAG.getConstant(3, MVT::i32));
3754 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3755 resVec, hir16, idxVal);
3756 resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
3757 Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
3758 return Res;
3759 }
3760 } else {
3761 // There are four cases we need to worry about for bitcasts
3762 // where the size of all
3763 // source, intermediates and result is <= 128 bits, unlike
3764 // the above case
3765 // 1) Sub32bit bitcast 32bitAlign
3766 // %dst = <4 x i8> bitcast i32
3767 // (also <[2|4] x i16> to <[2|4] x i32>)
3768 // 2) 32bitAlign bitcast Sub32bit
3769 // %dst = i32 bitcast <4 x i8>
3770 // 3) Sub32bit bitcast LargerSub32bit
3771 // %dst = <2 x i8> bitcast i16
3772 // (also <4 x i8> to <2 x i16>)
3773 // 4) Sub32bit bitcast SmallerSub32bit
3774 // %dst = i16 bitcast <2 x i8>
3775 // (also <2 x i16> to <4 x i8>)
3776 // This also only handles types that are powers of two
3777 if ((ScalarDstSize & (ScalarDstSize - 1))
3778 || (ScalarSrcSize & (ScalarSrcSize - 1))) {
3779 } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
3780 // case 1:
3781 EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
3782 #if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
3783 SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
3784 #else
3785 SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3786 DAG.getUNDEF(IntTy.getScalarType()));
3787 for (uint32_t x = 0; x < SrcNumEle; ++x) {
3788 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3789 getPointerTy(), DAG.getConstant(x, MVT::i32));
3790 SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3791 SrcVT.getScalarType(), Src,
3792 DAG.getConstant(x, MVT::i32));
3793 temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
3794 res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
3795 res, temp, idx);
3796 }
3797 #endif
3798 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3799 DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
3800 SDValue *newEle = new SDValue[SrcNumEle];
3801 res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
3802 for (uint32_t x = 0; x < SrcNumEle; ++x) {
3803 newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3804 IntTy.getScalarType(), res,
3805 DAG.getConstant(x, MVT::i32));
3806 }
3807 uint32_t Ratio = SrcNumEle / DstNumEle;
3808 for (uint32_t x = 0; x < SrcNumEle; ++x) {
3809 if (x % Ratio) {
3810 newEle[x] = DAG.getNode(ISD::SHL, DL,
3811 IntTy.getScalarType(), newEle[x],
3812 DAG.getConstant(ScalarSrcSize * (x % Ratio),
3813 MVT::i32));
3814 }
3815 }
3816 for (uint32_t x = 0; x < SrcNumEle; x += 2) {
3817 newEle[x] = DAG.getNode(ISD::OR, DL,
3818 IntTy.getScalarType(), newEle[x], newEle[x + 1]);
3819 }
3820 if (ScalarSrcSize == 8) {
3821 for (uint32_t x = 0; x < SrcNumEle; x += 4) {
3822 newEle[x] = DAG.getNode(ISD::OR, DL,
3823 IntTy.getScalarType(), newEle[x], newEle[x + 2]);
3824 }
3825 if (DstNumEle == 1) {
3826 Dst = newEle[0];
3827 } else {
3828 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
3829 newEle[0]);
3830 for (uint32_t x = 1; x < DstNumEle; ++x) {
3831 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3832 getPointerTy(), DAG.getConstant(x, MVT::i32));
3833 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3834 DstVT, Dst, newEle[x * 4], idx);
3835 }
3836 }
3837 } else {
3838 if (DstNumEle == 1) {
3839 Dst = newEle[0];
3840 } else {
3841 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
3842 newEle[0]);
3843 for (uint32_t x = 1; x < DstNumEle; ++x) {
3844 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3845 getPointerTy(), DAG.getConstant(x, MVT::i32));
3846 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3847 DstVT, Dst, newEle[x * 2], idx);
3848 }
3849 }
3850 }
3851 delete [] newEle;
3852 return Dst;
3853 } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
3854 // case 2:
3855 EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
3856 SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3857 DAG.getUNDEF(IntTy.getScalarType()));
3858 uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
3859 for (uint32_t x = 0; x < SrcNumEle; ++x) {
3860 for (uint32_t y = 0; y < mult; ++y) {
3861 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3862 getPointerTy(),
3863 DAG.getConstant(x * mult + y, MVT::i32));
3864 SDValue t;
3865 if (SrcNumEle > 1) {
3866 t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3867 DL, SrcVT.getScalarType(), Src,
3868 DAG.getConstant(x, MVT::i32));
3869 } else {
3870 t = Src;
3871 }
3872 if (y != 0) {
3873 t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
3874 t, DAG.getConstant(y * ScalarDstSize,
3875 MVT::i32));
3876 }
3877 vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
3878 DL, IntTy, vec, t, idx);
3879 }
3880 }
3881 Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
3882 return Dst;
3883 } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
3884 // case 3:
3885 SDValue *numEle = new SDValue[SrcNumEle];
3886 for (uint32_t x = 0; x < SrcNumEle; ++x) {
3887 numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3888 MVT::i8, Src, DAG.getConstant(x, MVT::i32));
3889 numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
3890 numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
3891 DAG.getConstant(0xFF, MVT::i16));
3892 }
3893 for (uint32_t x = 1; x < SrcNumEle; x += 2) {
3894 numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
3895 DAG.getConstant(8, MVT::i16));
3896 numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
3897 numEle[x-1], numEle[x]);
3898 }
3899 if (DstNumEle > 1) {
3900 // If we are not a scalar i16, the only other case is a
3901 // v2i16 since we can't have v8i8 at this point, v4i16
3902 // cannot be generated
3903 Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
3904 numEle[0]);
3905 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3906 getPointerTy(), DAG.getConstant(1, MVT::i32));
3907 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
3908 Dst, numEle[2], idx);
3909 } else {
3910 Dst = numEle[0];
3911 }
3912 delete [] numEle;
3913 return Dst;
3914 } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
3915 // case 4:
3916 SDValue *numEle = new SDValue[DstNumEle];
3917 for (uint32_t x = 0; x < SrcNumEle; ++x) {
3918 numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3919 MVT::i16, Src, DAG.getConstant(x, MVT::i32));
3920 numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
3921 numEle[x * 2], DAG.getConstant(8, MVT::i16));
3922 }
3923 MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
3924 Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
3925 for (uint32_t x = 1; x < DstNumEle; ++x) {
3926 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3927 getPointerTy(), DAG.getConstant(x, MVT::i32));
3928 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
3929 Dst, numEle[x], idx);
3930 }
3931 delete [] numEle;
3932 ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
3933 Res = DAG.getSExtOrTrunc(Dst, DL, ty);
3934 return Res;
3935 }
3936 }
3937 }
3938 Res = DAG.getNode(AMDILISD::BITCONV,
3939 Dst.getDebugLoc(),
3940 Dst.getValueType(), Src);
3941 return Res;
3942 }
3943
3944 SDValue
3945 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
3946 SelectionDAG &DAG) const
3947 {
3948 SDValue Chain = Op.getOperand(0);
3949 SDValue Size = Op.getOperand(1);
3950 unsigned int SPReg = AMDIL::SP;
3951 DebugLoc DL = Op.getDebugLoc();
3952 SDValue SP = DAG.getCopyFromReg(Chain,
3953 DL,
3954 SPReg, MVT::i32);
3955 SDValue NewSP = DAG.getNode(ISD::ADD,
3956 DL,
3957 MVT::i32, SP, Size);
3958 Chain = DAG.getCopyToReg(SP.getValue(1),
3959 DL,
3960 SPReg, NewSP);
3961 SDValue Ops[2] = {NewSP, Chain};
3962 Chain = DAG.getMergeValues(Ops, 2 ,DL);
3963 return Chain;
3964 }
3965 SDValue
3966 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
3967 {
3968 SDValue Chain = Op.getOperand(0);
3969 SDValue Cond = Op.getOperand(1);
3970 SDValue Jump = Op.getOperand(2);
3971 SDValue Result;
3972 Result = DAG.getNode(
3973 AMDILISD::BRANCH_COND,
3974 Op.getDebugLoc(),
3975 Op.getValueType(),
3976 Chain, Jump, Cond);
3977 return Result;
3978 }
3979
3980 SDValue
3981 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
3982 {
3983 SDValue Chain = Op.getOperand(0);
3984 SDValue CC = Op.getOperand(1);
3985 SDValue LHS = Op.getOperand(2);
3986 SDValue RHS = Op.getOperand(3);
3987 SDValue JumpT = Op.getOperand(4);
3988 SDValue CmpValue;
3989 SDValue Result;
3990 CmpValue = DAG.getNode(
3991 ISD::SELECT_CC,
3992 Op.getDebugLoc(),
3993 LHS.getValueType(),
3994 LHS, RHS,
3995 DAG.getConstant(-1, MVT::i32),
3996 DAG.getConstant(0, MVT::i32),
3997 CC);
3998 Result = DAG.getNode(
3999 AMDILISD::BRANCH_COND,
4000 CmpValue.getDebugLoc(),
4001 MVT::Other, Chain,
4002 JumpT, CmpValue);
4003 return Result;
4004 }
4005
4006 SDValue
4007 AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
4008 {
4009 SDValue Result = DAG.getNode(
4010 AMDILISD::DP_TO_FP,
4011 Op.getDebugLoc(),
4012 Op.getValueType(),
4013 Op.getOperand(0),
4014 Op.getOperand(1));
4015 return Result;
4016 }
4017
4018 SDValue
4019 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
4020 {
4021 SDValue Result = DAG.getNode(
4022 AMDILISD::VCONCAT,
4023 Op.getDebugLoc(),
4024 Op.getValueType(),
4025 Op.getOperand(0),
4026 Op.getOperand(1));
4027 return Result;
4028 }
4029 // LowerRET - Lower an ISD::RET node.
4030 SDValue
4031 AMDILTargetLowering::LowerReturn(SDValue Chain,
4032 CallingConv::ID CallConv, bool isVarArg,
4033 const SmallVectorImpl<ISD::OutputArg> &Outs,
4034 const SmallVectorImpl<SDValue> &OutVals,
4035 DebugLoc dl, SelectionDAG &DAG)
4036 const
4037 {
4038 //MachineFunction& MF = DAG.getMachineFunction();
4039 // CCValAssign - represent the assignment of the return value
4040 // to a location
4041 SmallVector<CCValAssign, 16> RVLocs;
4042
4043 // CCState - Info about the registers and stack slot
4044 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4045 getTargetMachine(), RVLocs, *DAG.getContext());
4046
4047 // Analyze return values of ISD::RET
4048 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
4049 // If this is the first return lowered for this function, add
4050 // the regs to the liveout set for the function
4051 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
4052 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4053 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
4054 MRI.addLiveOut(RVLocs[i].getLocReg());
4055 }
4056 }
4057 // FIXME: implement this when tail call is implemented
4058 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4059 // both x86 and ppc implement this in ISelLowering
4060
4061 // Regular return here
4062 SDValue Flag;
4063 SmallVector<SDValue, 6> RetOps;
4064 RetOps.push_back(Chain);
4065 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
4066 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4067 CCValAssign &VA = RVLocs[i];
4068 SDValue ValToCopy = OutVals[i];
4069 assert(VA.isRegLoc() && "Can only return in registers!");
4070 // ISD::Ret => ret chain, (regnum1, val1), ...
4071 // So i * 2 + 1 index only the regnums
4072 Chain = DAG.getCopyToReg(Chain,
4073 dl,
4074 VA.getLocReg(),
4075 ValToCopy,
4076 Flag);
4077 // guarantee that all emitted copies are stuck together
4078 // avoiding something bad
4079 Flag = Chain.getValue(1);
4080 }
4081 /*if (MF.getFunction()->hasStructRetAttr()) {
4082 assert(0 && "Struct returns are not yet implemented!");
4083 // Both MIPS and X86 have this
4084 }*/
4085 RetOps[0] = Chain;
4086 if (Flag.getNode())
4087 RetOps.push_back(Flag);
4088
4089 Flag = DAG.getNode(AMDILISD::RET_FLAG,
4090 dl,
4091 MVT::Other, &RetOps[0], RetOps.size());
4092 return Flag;
4093 }
4094
4095 unsigned int
4096 AMDILTargetLowering::getFunctionAlignment(const Function *) const
4097 {
4098 return 0;
4099 }
4100
4101 void
4102 AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
4103 MachineBasicBlock::iterator &BBI,
4104 DebugLoc *DL, const TargetInstrInfo *TII) const
4105 {
4106 mBB = BB;
4107 mBBI = BBI;
4108 mDL = DL;
4109 mTII = TII;
4110 }
4111 uint32_t
4112 AMDILTargetLowering::genVReg(uint32_t regType) const
4113 {
4114 return mBB->getParent()->getRegInfo().createVirtualRegister(
4115 getTargetMachine().getRegisterInfo()->getRegClass(regType));
4116 }
4117
4118 MachineInstrBuilder
4119 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
4120 {
4121 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
4122 }
4123
4124 MachineInstrBuilder
4125 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4126 uint32_t src1) const
4127 {
4128 return generateMachineInst(opcode, dst).addReg(src1);
4129 }
4130
4131 MachineInstrBuilder
4132 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4133 uint32_t src1, uint32_t src2) const
4134 {
4135 return generateMachineInst(opcode, dst, src1).addReg(src2);
4136 }
4137
4138 MachineInstrBuilder
4139 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4140 uint32_t src1, uint32_t src2, uint32_t src3) const
4141 {
4142 return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
4143 }
4144
4145
4146 SDValue
4147 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
4148 {
4149 DebugLoc DL = Op.getDebugLoc();
4150 EVT OVT = Op.getValueType();
4151 SDValue LHS = Op.getOperand(0);
4152 SDValue RHS = Op.getOperand(1);
4153 MVT INTTY;
4154 MVT FLTTY;
4155 if (!OVT.isVector()) {
4156 INTTY = MVT::i32;
4157 FLTTY = MVT::f32;
4158 } else if (OVT.getVectorNumElements() == 2) {
4159 INTTY = MVT::v2i32;
4160 FLTTY = MVT::v2f32;
4161 } else if (OVT.getVectorNumElements() == 4) {
4162 INTTY = MVT::v4i32;
4163 FLTTY = MVT::v4f32;
4164 }
4165 unsigned bitsize = OVT.getScalarType().getSizeInBits();
4166 // char|short jq = ia ^ ib;
4167 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
4168
4169 // jq = jq >> (bitsize - 2)
4170 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
4171
4172 // jq = jq | 0x1
4173 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
4174
4175 // jq = (int)jq
4176 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
4177
4178 // int ia = (int)LHS;
4179 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4180
4181 // int ib, (int)RHS;
4182 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4183
4184 // float fa = (float)ia;
4185 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4186
4187 // float fb = (float)ib;
4188 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4189
4190 // float fq = native_divide(fa, fb);
4191 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4192
4193 // fq = trunc(fq);
4194 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4195
4196 // float fqneg = -fq;
4197 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
4198
4199 // float fr = mad(fqneg, fb, fa);
4200 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
4201
4202 // int iq = (int)fq;
4203 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4204
4205 // fr = fabs(fr);
4206 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
4207
4208 // fb = fabs(fb);
4209 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
4210
4211 // int cv = fr >= fb;
4212 SDValue cv;
4213 if (INTTY == MVT::i32) {
4214 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4215 } else {
4216 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4217 }
4218 // jq = (cv ? jq : 0);
4219 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
4220 DAG.getConstant(0, OVT));
4221 // dst = iq + jq;
4222 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
4223 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
4224 return iq;
4225 }
4226
4227 SDValue
4228 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
4229 {
4230 DebugLoc DL = Op.getDebugLoc();
4231 EVT OVT = Op.getValueType();
4232 SDValue LHS = Op.getOperand(0);
4233 SDValue RHS = Op.getOperand(1);
4234 // The LowerSDIV32 function generates equivalent to the following IL.
4235 // mov r0, LHS
4236 // mov r1, RHS
4237 // ilt r10, r0, 0
4238 // ilt r11, r1, 0
4239 // iadd r0, r0, r10
4240 // iadd r1, r1, r11
4241 // ixor r0, r0, r10
4242 // ixor r1, r1, r11
4243 // udiv r0, r0, r1
4244 // ixor r10, r10, r11
4245 // iadd r0, r0, r10
4246 // ixor DST, r0, r10
4247
4248 // mov r0, LHS
4249 SDValue r0 = LHS;
4250
4251 // mov r1, RHS
4252 SDValue r1 = RHS;
4253
4254 // ilt r10, r0, 0
4255 SDValue r10 = DAG.getSelectCC(DL,
4256 r0, DAG.getConstant(0, OVT),
4257 DAG.getConstant(-1, MVT::i32),
4258 DAG.getConstant(0, MVT::i32),
4259 ISD::SETLT);
4260
4261 // ilt r11, r1, 0
4262 SDValue r11 = DAG.getSelectCC(DL,
4263 r1, DAG.getConstant(0, OVT),
4264 DAG.getConstant(-1, MVT::i32),
4265 DAG.getConstant(0, MVT::i32),
4266 ISD::SETLT);
4267
4268 // iadd r0, r0, r10
4269 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4270
4271 // iadd r1, r1, r11
4272 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4273
4274 // ixor r0, r0, r10
4275 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4276
4277 // ixor r1, r1, r11
4278 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4279
4280 // udiv r0, r0, r1
4281 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
4282
4283 // ixor r10, r10, r11
4284 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
4285
4286 // iadd r0, r0, r10
4287 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4288
4289 // ixor DST, r0, r10
4290 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4291 return DST;
4292 }
4293
4294 SDValue
4295 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
4296 {
4297 return SDValue(Op.getNode(), 0);
4298 }
4299
4300 SDValue
4301 AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
4302 {
4303 DebugLoc DL = Op.getDebugLoc();
4304 EVT OVT = Op.getValueType();
4305 SDValue LHS = Op.getOperand(0);
4306 SDValue RHS = Op.getOperand(1);
4307 MVT INTTY;
4308 MVT FLTTY;
4309 if (!OVT.isVector()) {
4310 INTTY = MVT::i32;
4311 FLTTY = MVT::f32;
4312 } else if (OVT.getVectorNumElements() == 2) {
4313 INTTY = MVT::v2i32;
4314 FLTTY = MVT::v2f32;
4315 } else if (OVT.getVectorNumElements() == 4) {
4316 INTTY = MVT::v4i32;
4317 FLTTY = MVT::v4f32;
4318 }
4319
4320 // The LowerUDIV24 function implements the following CL.
4321 // int ia = (int)LHS
4322 // float fa = (float)ia
4323 // int ib = (int)RHS
4324 // float fb = (float)ib
4325 // float fq = native_divide(fa, fb)
4326 // fq = trunc(fq)
4327 // float t = mad(fq, fb, fb)
4328 // int iq = (int)fq - (t <= fa)
4329 // return (type)iq
4330
4331 // int ia = (int)LHS
4332 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
4333
4334 // float fa = (float)ia
4335 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4336
4337 // int ib = (int)RHS
4338 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
4339
4340 // float fb = (float)ib
4341 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4342
4343 // float fq = native_divide(fa, fb)
4344 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4345
4346 // fq = trunc(fq)
4347 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4348
4349 // float t = mad(fq, fb, fb)
4350 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
4351
4352 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
4353 SDValue iq;
4354 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4355 if (INTTY == MVT::i32) {
4356 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
4357 } else {
4358 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
4359 }
4360 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
4361
4362
4363 // return (type)iq
4364 iq = DAG.getZExtOrTrunc(iq, DL, OVT);
4365 return iq;
4366
4367 }
4368
4369 SDValue
4370 AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
4371 {
4372 return SDValue(Op.getNode(), 0);
4373 }
4374
4375 SDValue
4376 AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
4377 {
4378 return SDValue(Op.getNode(), 0);
4379 }
4380 SDValue
4381 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
4382 {
4383 DebugLoc DL = Op.getDebugLoc();
4384 EVT OVT = Op.getValueType();
4385 MVT INTTY = MVT::i32;
4386 if (OVT == MVT::v2i8) {
4387 INTTY = MVT::v2i32;
4388 } else if (OVT == MVT::v4i8) {
4389 INTTY = MVT::v4i32;
4390 }
4391 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
4392 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
4393 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
4394 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
4395 return LHS;
4396 }
4397
4398 SDValue
4399 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
4400 {
4401 DebugLoc DL = Op.getDebugLoc();
4402 EVT OVT = Op.getValueType();
4403 MVT INTTY = MVT::i32;
4404 if (OVT == MVT::v2i16) {
4405 INTTY = MVT::v2i32;
4406 } else if (OVT == MVT::v4i16) {
4407 INTTY = MVT::v4i32;
4408 }
4409 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
4410 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
4411 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
4412 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
4413 return LHS;
4414 }
4415
4416 SDValue
4417 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
4418 {
4419 DebugLoc DL = Op.getDebugLoc();
4420 EVT OVT = Op.getValueType();
4421 SDValue LHS = Op.getOperand(0);
4422 SDValue RHS = Op.getOperand(1);
4423 // The LowerSREM32 function generates equivalent to the following IL.
4424 // mov r0, LHS
4425 // mov r1, RHS
4426 // ilt r10, r0, 0
4427 // ilt r11, r1, 0
4428 // iadd r0, r0, r10
4429 // iadd r1, r1, r11
4430 // ixor r0, r0, r10
4431 // ixor r1, r1, r11
4432 // udiv r20, r0, r1
4433 // umul r20, r20, r1
4434 // sub r0, r0, r20
4435 // iadd r0, r0, r10
4436 // ixor DST, r0, r10
4437
4438 // mov r0, LHS
4439 SDValue r0 = LHS;
4440
4441 // mov r1, RHS
4442 SDValue r1 = RHS;
4443
4444 // ilt r10, r0, 0
4445 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4446 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4447 r0, DAG.getConstant(0, OVT));
4448
4449 // ilt r11, r1, 0
4450 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4451 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4452 r1, DAG.getConstant(0, OVT));
4453
4454 // iadd r0, r0, r10
4455 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4456
4457 // iadd r1, r1, r11
4458 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4459
4460 // ixor r0, r0, r10
4461 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4462
4463 // ixor r1, r1, r11
4464 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4465
4466 // udiv r20, r0, r1
4467 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
4468
4469 // umul r20, r20, r1
4470 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
4471
4472 // sub r0, r0, r20
4473 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
4474
4475 // iadd r0, r0, r10
4476 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4477
4478 // ixor DST, r0, r10
4479 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4480 return DST;
4481 }
4482
4483 SDValue
4484 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
4485 {
4486 return SDValue(Op.getNode(), 0);
4487 }
4488
4489 SDValue
4490 AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
4491 {
4492 DebugLoc DL = Op.getDebugLoc();
4493 EVT OVT = Op.getValueType();
4494 MVT INTTY = MVT::i32;
4495 if (OVT == MVT::v2i8) {
4496 INTTY = MVT::v2i32;
4497 } else if (OVT == MVT::v4i8) {
4498 INTTY = MVT::v4i32;
4499 }
4500 SDValue LHS = Op.getOperand(0);
4501 SDValue RHS = Op.getOperand(1);
4502 // The LowerUREM8 function generates equivalent to the following IL.
4503 // mov r0, as_u32(LHS)
4504 // mov r1, as_u32(RHS)
4505 // and r10, r0, 0xFF
4506 // and r11, r1, 0xFF
4507 // cmov_logical r3, r11, r11, 0x1
4508 // udiv r3, r10, r3
4509 // cmov_logical r3, r11, r3, 0
4510 // umul r3, r3, r11
4511 // sub r3, r10, r3
4512 // and as_u8(DST), r3, 0xFF
4513
4514 // mov r0, as_u32(LHS)
4515 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4516
4517 // mov r1, as_u32(RHS)
4518 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4519
4520 // and r10, r0, 0xFF
4521 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
4522 DAG.getConstant(0xFF, INTTY));
4523
4524 // and r11, r1, 0xFF
4525 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
4526 DAG.getConstant(0xFF, INTTY));
4527
4528 // cmov_logical r3, r11, r11, 0x1
4529 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
4530 DAG.getConstant(0x01, INTTY));
4531
4532 // udiv r3, r10, r3
4533 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
4534
4535 // cmov_logical r3, r11, r3, 0
4536 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
4537 DAG.getConstant(0, INTTY));
4538
4539 // umul r3, r3, r11
4540 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
4541
4542 // sub r3, r10, r3
4543 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
4544
4545 // and as_u8(DST), r3, 0xFF
4546 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
4547 DAG.getConstant(0xFF, INTTY));
4548 DST = DAG.getZExtOrTrunc(DST, DL, OVT);
4549 return DST;
4550 }
4551
4552 SDValue
4553 AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
4554 {
4555 DebugLoc DL = Op.getDebugLoc();
4556 EVT OVT = Op.getValueType();
4557 MVT INTTY = MVT::i32;
4558 if (OVT == MVT::v2i16) {
4559 INTTY = MVT::v2i32;
4560 } else if (OVT == MVT::v4i16) {
4561 INTTY = MVT::v4i32;
4562 }
4563 SDValue LHS = Op.getOperand(0);
4564 SDValue RHS = Op.getOperand(1);
4565 // The LowerUREM16 function generatest equivalent to the following IL.
4566 // mov r0, LHS
4567 // mov r1, RHS
4568 // DIV = LowerUDIV16(LHS, RHS)
4569 // and r10, r0, 0xFFFF
4570 // and r11, r1, 0xFFFF
4571 // cmov_logical r3, r11, r11, 0x1
4572 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4573 // and r3, r3, 0xFFFF
4574 // cmov_logical r3, r11, r3, 0
4575 // umul r3, r3, r11
4576 // sub r3, r10, r3
4577 // and DST, r3, 0xFFFF
4578
4579 // mov r0, LHS
4580 SDValue r0 = LHS;
4581
4582 // mov r1, RHS
4583 SDValue r1 = RHS;
4584
4585 // and r10, r0, 0xFFFF
4586 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
4587 DAG.getConstant(0xFFFF, OVT));
4588
4589 // and r11, r1, 0xFFFF
4590 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
4591 DAG.getConstant(0xFFFF, OVT));
4592
4593 // cmov_logical r3, r11, r11, 0x1
4594 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
4595 DAG.getConstant(0x01, OVT));
4596
4597 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4598 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
4599 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
4600 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
4601 r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
4602 r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
4603
4604 // and r3, r3, 0xFFFF
4605 r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
4606 DAG.getConstant(0xFFFF, OVT));
4607
4608 // cmov_logical r3, r11, r3, 0
4609 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
4610 DAG.getConstant(0, OVT));
4611 // umul r3, r3, r11
4612 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
4613
4614 // sub r3, r10, r3
4615 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
4616
4617 // and DST, r3, 0xFFFF
4618 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
4619 DAG.getConstant(0xFFFF, OVT));
4620 return DST;
4621 }
4622
4623 SDValue
4624 AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
4625 {
4626 DebugLoc DL = Op.getDebugLoc();
4627 EVT OVT = Op.getValueType();
4628 SDValue LHS = Op.getOperand(0);
4629 SDValue RHS = Op.getOperand(1);
4630 // The LowerUREM32 function generates equivalent to the following IL.
4631 // udiv r20, LHS, RHS
4632 // umul r20, r20, RHS
4633 // sub DST, LHS, r20
4634
4635 // udiv r20, LHS, RHS
4636 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
4637
4638 // umul r20, r20, RHS
4639 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
4640
4641 // sub DST, LHS, r20
4642 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
4643 return DST;
4644 }
4645
4646 SDValue
4647 AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
4648 {
4649 return SDValue(Op.getNode(), 0);
4650 }
4651
4652
4653 SDValue
4654 AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
4655 {
4656 DebugLoc DL = Op.getDebugLoc();
4657 EVT OVT = Op.getValueType();
4658 MVT INTTY = MVT::i32;
4659 if (OVT == MVT::v2f32) {
4660 INTTY = MVT::v2i32;
4661 } else if (OVT == MVT::v4f32) {
4662 INTTY = MVT::v4i32;
4663 }
4664 SDValue LHS = Op.getOperand(0);
4665 SDValue RHS = Op.getOperand(1);
4666 SDValue DST;
4667 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
4668 &this->getTargetMachine())->getSubtargetImpl();
4669 if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
4670 // TODO: This doesn't work for vector types yet
4671 // The LowerFDIV32 function generates equivalent to the following
4672 // IL:
4673 // mov r20, as_int(LHS)
4674 // mov r21, as_int(RHS)
4675 // and r30, r20, 0x7f800000
4676 // and r31, r20, 0x807FFFFF
4677 // and r32, r21, 0x7f800000
4678 // and r33, r21, 0x807FFFFF
4679 // ieq r40, r30, 0x7F800000
4680 // ieq r41, r31, 0x7F800000
4681 // ieq r42, r32, 0
4682 // ieq r43, r33, 0
4683 // and r50, r20, 0x80000000
4684 // and r51, r21, 0x80000000
4685 // ior r32, r32, 0x3f800000
4686 // ior r33, r33, 0x3f800000
4687 // cmov_logical r32, r42, r50, r32
4688 // cmov_logical r33, r43, r51, r33
4689 // cmov_logical r32, r40, r20, r32
4690 // cmov_logical r33, r41, r21, r33
4691 // ior r50, r40, r41
4692 // ior r51, r42, r43
4693 // ior r50, r50, r51
4694 // inegate r52, r31
4695 // iadd r30, r30, r52
4696 // cmov_logical r30, r50, 0, r30
4697 // div_zeroop(infinity) r21, 1.0, r33
4698 // mul_ieee r20, r32, r21
4699 // and r22, r20, 0x7FFFFFFF
4700 // and r23, r20, 0x80000000
4701 // ishr r60, r22, 0x00000017
4702 // ishr r61, r30, 0x00000017
4703 // iadd r20, r20, r30
4704 // iadd r21, r22, r30
4705 // iadd r60, r60, r61
4706 // ige r42, 0, R60
4707 // ior r41, r23, 0x7F800000
4708 // ige r40, r60, 0x000000FF
4709 // cmov_logical r40, r50, 0, r40
4710 // cmov_logical r20, r42, r23, r20
4711 // cmov_logical DST, r40, r41, r20
4712 // as_float(DST)
4713
4714 // mov r20, as_int(LHS)
4715 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
4716
4717 // mov r21, as_int(RHS)
4718 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
4719
4720 // and r30, r20, 0x7f800000
4721 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4722 DAG.getConstant(0x7F800000, INTTY));
4723
4724 // and r31, r21, 0x7f800000
4725 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4726 DAG.getConstant(0x7f800000, INTTY));
4727
4728 // and r32, r20, 0x807FFFFF
4729 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4730 DAG.getConstant(0x807FFFFF, INTTY));
4731
4732 // and r33, r21, 0x807FFFFF
4733 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4734 DAG.getConstant(0x807FFFFF, INTTY));
4735
4736 // ieq r40, r30, 0x7F800000
4737 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4738 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4739 R30, DAG.getConstant(0x7F800000, INTTY));
4740
4741 // ieq r41, r31, 0x7F800000
4742 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4743 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4744 R31, DAG.getConstant(0x7F800000, INTTY));
4745
4746 // ieq r42, r30, 0
4747 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4748 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4749 R30, DAG.getConstant(0, INTTY));
4750
4751 // ieq r43, r31, 0
4752 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4753 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4754 R31, DAG.getConstant(0, INTTY));
4755
4756 // and r50, r20, 0x80000000
4757 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4758 DAG.getConstant(0x80000000, INTTY));
4759
4760 // and r51, r21, 0x80000000
4761 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4762 DAG.getConstant(0x80000000, INTTY));
4763
4764 // ior r32, r32, 0x3f800000
4765 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
4766 DAG.getConstant(0x3F800000, INTTY));
4767
4768 // ior r33, r33, 0x3f800000
4769 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
4770 DAG.getConstant(0x3F800000, INTTY));
4771
4772 // cmov_logical r32, r42, r50, r32
4773 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
4774
4775 // cmov_logical r33, r43, r51, r33
4776 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
4777
4778 // cmov_logical r32, r40, r20, r32
4779 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
4780
4781 // cmov_logical r33, r41, r21, r33
4782 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
4783
4784 // ior r50, r40, r41
4785 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
4786
4787 // ior r51, r42, r43
4788 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
4789
4790 // ior r50, r50, r51
4791 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
4792
4793 // inegate r52, r31
4794 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
4795
4796 // iadd r30, r30, r52
4797 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
4798
4799 // cmov_logical r30, r50, 0, r30
4800 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4801 DAG.getConstant(0, INTTY), R30);
4802
4803 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4804 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4805 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4806 DAG.getConstantFP(1.0f, OVT), R33);
4807
4808 // mul_ieee as_int(r20), as_float(r32), r21
4809 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4810 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4811 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4812
4813 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4814 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4815 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4816 DAG.getConstantFP(1.0f, OVT), R33);
4817
4818 // mul_ieee as_int(r20), as_float(r32), r21
4819 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4820 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4821 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4822
4823 // and r22, r20, 0x7FFFFFFF
4824 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4825 DAG.getConstant(0x7FFFFFFF, INTTY));
4826
4827 // and r23, r20, 0x80000000
4828 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4829 DAG.getConstant(0x80000000, INTTY));
4830
4831 // ishr r60, r22, 0x00000017
4832 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
4833 DAG.getConstant(0x00000017, INTTY));
4834
4835 // ishr r61, r30, 0x00000017
4836 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
4837 DAG.getConstant(0x00000017, INTTY));
4838
4839 // iadd r20, r20, r30
4840 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
4841
4842 // iadd r21, r22, r30
4843 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
4844
4845 // iadd r60, r60, r61
4846 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
4847
4848 // ige r42, 0, R60
4849 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4850 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4851 DAG.getConstant(0, INTTY),
4852 R60);
4853
4854 // ior r41, r23, 0x7F800000
4855 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
4856 DAG.getConstant(0x7F800000, INTTY));
4857
4858 // ige r40, r60, 0x000000FF
4859 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4860 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4861 R60,
4862 DAG.getConstant(0x0000000FF, INTTY));
4863
4864 // cmov_logical r40, r50, 0, r40
4865 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4866 DAG.getConstant(0, INTTY),
4867 R40);
4868
4869 // cmov_logical r20, r42, r23, r20
4870 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
4871
4872 // cmov_logical DST, r40, r41, r20
4873 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
4874
4875 // as_float(DST)
4876 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
4877 } else {
4878 // The following sequence of DAG nodes produce the following IL:
4879 // fabs r1, RHS
4880 // lt r2, 0x1.0p+96f, r1
4881 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4882 // mul_ieee r1, RHS, r3
4883 // div_zeroop(infinity) r0, LHS, r1
4884 // mul_ieee DST, r0, r3
4885
4886 // fabs r1, RHS
4887 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
4888 // lt r2, 0x1.0p+96f, r1
4889 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4890 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
4891 DAG.getConstant(0x6f800000, INTTY), r1);
4892 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4893 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
4894 DAG.getConstant(0x2f800000, INTTY),
4895 DAG.getConstant(0x3f800000, INTTY));
4896 // mul_ieee r1, RHS, r3
4897 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
4898 // div_zeroop(infinity) r0, LHS, r1
4899 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
4900 // mul_ieee DST, r0, r3
4901 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
4902 }
4903 return DST;
4904 }
4905
4906 SDValue
4907 AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
4908 {
4909 return SDValue(Op.getNode(), 0);
4910 }