radeon/llvm: use correct intrinsic for CEIL
[mesa.git] / src / gallium / drivers / radeon / AMDILISelLowering.cpp
1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILSubtarget.h"
19 #include "AMDILTargetMachine.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetOptions.h"
33
34 using namespace llvm;
35 #define ISDBITCAST ISD::BITCAST
36 #define MVTGLUE MVT::Glue
37 //===----------------------------------------------------------------------===//
38 // Calling Convention Implementation
39 //===----------------------------------------------------------------------===//
40 #include "AMDILGenCallingConv.inc"
41
42 //===----------------------------------------------------------------------===//
43 // TargetLowering Implementation Help Functions Begin
44 //===----------------------------------------------------------------------===//
45 static SDValue
46 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
47 {
48 DebugLoc DL = Src.getDebugLoc();
49 EVT svt = Src.getValueType().getScalarType();
50 EVT dvt = Dst.getValueType().getScalarType();
51 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
52 if (dvt.bitsGT(svt)) {
53 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
54 } else if (svt.bitsLT(svt)) {
55 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
56 DAG.getConstant(1, MVT::i32));
57 }
58 } else if (svt.isInteger() && dvt.isInteger()) {
59 if (!svt.bitsEq(dvt)) {
60 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
61 } else {
62 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
63 }
64 } else if (svt.isInteger()) {
65 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
66 if (!svt.bitsEq(dvt)) {
67 if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
68 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
69 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
70 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
71 } else {
72 assert(0 && "We only support 32 and 64bit fp types");
73 }
74 }
75 Src = DAG.getNode(opcode, DL, dvt, Src);
76 } else if (dvt.isInteger()) {
77 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
78 if (svt.getSimpleVT().SimpleTy == MVT::f32) {
79 Src = DAG.getNode(opcode, DL, MVT::i32, Src);
80 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
81 Src = DAG.getNode(opcode, DL, MVT::i64, Src);
82 } else {
83 assert(0 && "We only support 32 and 64bit fp types");
84 }
85 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
86 }
87 return Src;
88 }
89 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
90 // condition.
91 static AMDILCC::CondCodes
92 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
93 {
94 switch (CC) {
95 default:
96 {
97 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
98 assert(0 && "Unknown condition code!");
99 }
100 case ISD::SETO:
101 switch(type) {
102 case MVT::f32:
103 return AMDILCC::IL_CC_F_O;
104 case MVT::f64:
105 return AMDILCC::IL_CC_D_O;
106 default:
107 assert(0 && "Opcode combination not generated correctly!");
108 return AMDILCC::COND_ERROR;
109 };
110 case ISD::SETUO:
111 switch(type) {
112 case MVT::f32:
113 return AMDILCC::IL_CC_F_UO;
114 case MVT::f64:
115 return AMDILCC::IL_CC_D_UO;
116 default:
117 assert(0 && "Opcode combination not generated correctly!");
118 return AMDILCC::COND_ERROR;
119 };
120 case ISD::SETGT:
121 switch (type) {
122 case MVT::i1:
123 case MVT::i8:
124 case MVT::i16:
125 case MVT::i32:
126 return AMDILCC::IL_CC_I_GT;
127 case MVT::f32:
128 return AMDILCC::IL_CC_F_GT;
129 case MVT::f64:
130 return AMDILCC::IL_CC_D_GT;
131 case MVT::i64:
132 return AMDILCC::IL_CC_L_GT;
133 default:
134 assert(0 && "Opcode combination not generated correctly!");
135 return AMDILCC::COND_ERROR;
136 };
137 case ISD::SETGE:
138 switch (type) {
139 case MVT::i1:
140 case MVT::i8:
141 case MVT::i16:
142 case MVT::i32:
143 return AMDILCC::IL_CC_I_GE;
144 case MVT::f32:
145 return AMDILCC::IL_CC_F_GE;
146 case MVT::f64:
147 return AMDILCC::IL_CC_D_GE;
148 case MVT::i64:
149 return AMDILCC::IL_CC_L_GE;
150 default:
151 assert(0 && "Opcode combination not generated correctly!");
152 return AMDILCC::COND_ERROR;
153 };
154 case ISD::SETLT:
155 switch (type) {
156 case MVT::i1:
157 case MVT::i8:
158 case MVT::i16:
159 case MVT::i32:
160 return AMDILCC::IL_CC_I_LT;
161 case MVT::f32:
162 return AMDILCC::IL_CC_F_LT;
163 case MVT::f64:
164 return AMDILCC::IL_CC_D_LT;
165 case MVT::i64:
166 return AMDILCC::IL_CC_L_LT;
167 default:
168 assert(0 && "Opcode combination not generated correctly!");
169 return AMDILCC::COND_ERROR;
170 };
171 case ISD::SETLE:
172 switch (type) {
173 case MVT::i1:
174 case MVT::i8:
175 case MVT::i16:
176 case MVT::i32:
177 return AMDILCC::IL_CC_I_LE;
178 case MVT::f32:
179 return AMDILCC::IL_CC_F_LE;
180 case MVT::f64:
181 return AMDILCC::IL_CC_D_LE;
182 case MVT::i64:
183 return AMDILCC::IL_CC_L_LE;
184 default:
185 assert(0 && "Opcode combination not generated correctly!");
186 return AMDILCC::COND_ERROR;
187 };
188 case ISD::SETNE:
189 switch (type) {
190 case MVT::i1:
191 case MVT::i8:
192 case MVT::i16:
193 case MVT::i32:
194 return AMDILCC::IL_CC_I_NE;
195 case MVT::f32:
196 return AMDILCC::IL_CC_F_NE;
197 case MVT::f64:
198 return AMDILCC::IL_CC_D_NE;
199 case MVT::i64:
200 return AMDILCC::IL_CC_L_NE;
201 default:
202 assert(0 && "Opcode combination not generated correctly!");
203 return AMDILCC::COND_ERROR;
204 };
205 case ISD::SETEQ:
206 switch (type) {
207 case MVT::i1:
208 case MVT::i8:
209 case MVT::i16:
210 case MVT::i32:
211 return AMDILCC::IL_CC_I_EQ;
212 case MVT::f32:
213 return AMDILCC::IL_CC_F_EQ;
214 case MVT::f64:
215 return AMDILCC::IL_CC_D_EQ;
216 case MVT::i64:
217 return AMDILCC::IL_CC_L_EQ;
218 default:
219 assert(0 && "Opcode combination not generated correctly!");
220 return AMDILCC::COND_ERROR;
221 };
222 case ISD::SETUGT:
223 switch (type) {
224 case MVT::i1:
225 case MVT::i8:
226 case MVT::i16:
227 case MVT::i32:
228 return AMDILCC::IL_CC_U_GT;
229 case MVT::f32:
230 return AMDILCC::IL_CC_F_UGT;
231 case MVT::f64:
232 return AMDILCC::IL_CC_D_UGT;
233 case MVT::i64:
234 return AMDILCC::IL_CC_UL_GT;
235 default:
236 assert(0 && "Opcode combination not generated correctly!");
237 return AMDILCC::COND_ERROR;
238 };
239 case ISD::SETUGE:
240 switch (type) {
241 case MVT::i1:
242 case MVT::i8:
243 case MVT::i16:
244 case MVT::i32:
245 return AMDILCC::IL_CC_U_GE;
246 case MVT::f32:
247 return AMDILCC::IL_CC_F_UGE;
248 case MVT::f64:
249 return AMDILCC::IL_CC_D_UGE;
250 case MVT::i64:
251 return AMDILCC::IL_CC_UL_GE;
252 default:
253 assert(0 && "Opcode combination not generated correctly!");
254 return AMDILCC::COND_ERROR;
255 };
256 case ISD::SETULT:
257 switch (type) {
258 case MVT::i1:
259 case MVT::i8:
260 case MVT::i16:
261 case MVT::i32:
262 return AMDILCC::IL_CC_U_LT;
263 case MVT::f32:
264 return AMDILCC::IL_CC_F_ULT;
265 case MVT::f64:
266 return AMDILCC::IL_CC_D_ULT;
267 case MVT::i64:
268 return AMDILCC::IL_CC_UL_LT;
269 default:
270 assert(0 && "Opcode combination not generated correctly!");
271 return AMDILCC::COND_ERROR;
272 };
273 case ISD::SETULE:
274 switch (type) {
275 case MVT::i1:
276 case MVT::i8:
277 case MVT::i16:
278 case MVT::i32:
279 return AMDILCC::IL_CC_U_LE;
280 case MVT::f32:
281 return AMDILCC::IL_CC_F_ULE;
282 case MVT::f64:
283 return AMDILCC::IL_CC_D_ULE;
284 case MVT::i64:
285 return AMDILCC::IL_CC_UL_LE;
286 default:
287 assert(0 && "Opcode combination not generated correctly!");
288 return AMDILCC::COND_ERROR;
289 };
290 case ISD::SETUNE:
291 switch (type) {
292 case MVT::i1:
293 case MVT::i8:
294 case MVT::i16:
295 case MVT::i32:
296 return AMDILCC::IL_CC_U_NE;
297 case MVT::f32:
298 return AMDILCC::IL_CC_F_UNE;
299 case MVT::f64:
300 return AMDILCC::IL_CC_D_UNE;
301 case MVT::i64:
302 return AMDILCC::IL_CC_UL_NE;
303 default:
304 assert(0 && "Opcode combination not generated correctly!");
305 return AMDILCC::COND_ERROR;
306 };
307 case ISD::SETUEQ:
308 switch (type) {
309 case MVT::i1:
310 case MVT::i8:
311 case MVT::i16:
312 case MVT::i32:
313 return AMDILCC::IL_CC_U_EQ;
314 case MVT::f32:
315 return AMDILCC::IL_CC_F_UEQ;
316 case MVT::f64:
317 return AMDILCC::IL_CC_D_UEQ;
318 case MVT::i64:
319 return AMDILCC::IL_CC_UL_EQ;
320 default:
321 assert(0 && "Opcode combination not generated correctly!");
322 return AMDILCC::COND_ERROR;
323 };
324 case ISD::SETOGT:
325 switch (type) {
326 case MVT::f32:
327 return AMDILCC::IL_CC_F_OGT;
328 case MVT::f64:
329 return AMDILCC::IL_CC_D_OGT;
330 case MVT::i1:
331 case MVT::i8:
332 case MVT::i16:
333 case MVT::i32:
334 case MVT::i64:
335 default:
336 assert(0 && "Opcode combination not generated correctly!");
337 return AMDILCC::COND_ERROR;
338 };
339 case ISD::SETOGE:
340 switch (type) {
341 case MVT::f32:
342 return AMDILCC::IL_CC_F_OGE;
343 case MVT::f64:
344 return AMDILCC::IL_CC_D_OGE;
345 case MVT::i1:
346 case MVT::i8:
347 case MVT::i16:
348 case MVT::i32:
349 case MVT::i64:
350 default:
351 assert(0 && "Opcode combination not generated correctly!");
352 return AMDILCC::COND_ERROR;
353 };
354 case ISD::SETOLT:
355 switch (type) {
356 case MVT::f32:
357 return AMDILCC::IL_CC_F_OLT;
358 case MVT::f64:
359 return AMDILCC::IL_CC_D_OLT;
360 case MVT::i1:
361 case MVT::i8:
362 case MVT::i16:
363 case MVT::i32:
364 case MVT::i64:
365 default:
366 assert(0 && "Opcode combination not generated correctly!");
367 return AMDILCC::COND_ERROR;
368 };
369 case ISD::SETOLE:
370 switch (type) {
371 case MVT::f32:
372 return AMDILCC::IL_CC_F_OLE;
373 case MVT::f64:
374 return AMDILCC::IL_CC_D_OLE;
375 case MVT::i1:
376 case MVT::i8:
377 case MVT::i16:
378 case MVT::i32:
379 case MVT::i64:
380 default:
381 assert(0 && "Opcode combination not generated correctly!");
382 return AMDILCC::COND_ERROR;
383 };
384 case ISD::SETONE:
385 switch (type) {
386 case MVT::f32:
387 return AMDILCC::IL_CC_F_ONE;
388 case MVT::f64:
389 return AMDILCC::IL_CC_D_ONE;
390 case MVT::i1:
391 case MVT::i8:
392 case MVT::i16:
393 case MVT::i32:
394 case MVT::i64:
395 default:
396 assert(0 && "Opcode combination not generated correctly!");
397 return AMDILCC::COND_ERROR;
398 };
399 case ISD::SETOEQ:
400 switch (type) {
401 case MVT::f32:
402 return AMDILCC::IL_CC_F_OEQ;
403 case MVT::f64:
404 return AMDILCC::IL_CC_D_OEQ;
405 case MVT::i1:
406 case MVT::i8:
407 case MVT::i16:
408 case MVT::i32:
409 case MVT::i64:
410 default:
411 assert(0 && "Opcode combination not generated correctly!");
412 return AMDILCC::COND_ERROR;
413 };
414 };
415 }
416
417 static unsigned int
418 translateToOpcode(uint64_t CCCode, unsigned int regClass)
419 {
420 switch (CCCode) {
421 case AMDILCC::IL_CC_D_EQ:
422 case AMDILCC::IL_CC_D_OEQ:
423 if (regClass == AMDIL::GPRV2F64RegClassID) {
424 return (unsigned int)AMDIL::DEQ_v2f64;
425 } else {
426 return (unsigned int)AMDIL::DEQ;
427 }
428 case AMDILCC::IL_CC_D_LE:
429 case AMDILCC::IL_CC_D_OLE:
430 case AMDILCC::IL_CC_D_ULE:
431 case AMDILCC::IL_CC_D_GE:
432 case AMDILCC::IL_CC_D_OGE:
433 case AMDILCC::IL_CC_D_UGE:
434 return (unsigned int)AMDIL::DGE;
435 case AMDILCC::IL_CC_D_LT:
436 case AMDILCC::IL_CC_D_OLT:
437 case AMDILCC::IL_CC_D_ULT:
438 case AMDILCC::IL_CC_D_GT:
439 case AMDILCC::IL_CC_D_OGT:
440 case AMDILCC::IL_CC_D_UGT:
441 return (unsigned int)AMDIL::DLT;
442 case AMDILCC::IL_CC_D_NE:
443 case AMDILCC::IL_CC_D_UNE:
444 return (unsigned int)AMDIL::DNE;
445 case AMDILCC::IL_CC_F_EQ:
446 case AMDILCC::IL_CC_F_OEQ:
447 return (unsigned int)AMDIL::FEQ;
448 case AMDILCC::IL_CC_F_LE:
449 case AMDILCC::IL_CC_F_ULE:
450 case AMDILCC::IL_CC_F_OLE:
451 case AMDILCC::IL_CC_F_GE:
452 case AMDILCC::IL_CC_F_UGE:
453 case AMDILCC::IL_CC_F_OGE:
454 return (unsigned int)AMDIL::FGE;
455 case AMDILCC::IL_CC_F_LT:
456 case AMDILCC::IL_CC_F_OLT:
457 case AMDILCC::IL_CC_F_ULT:
458 case AMDILCC::IL_CC_F_GT:
459 case AMDILCC::IL_CC_F_OGT:
460 case AMDILCC::IL_CC_F_UGT:
461 if (regClass == AMDIL::GPRV2F32RegClassID) {
462 return (unsigned int)AMDIL::FLT_v2f32;
463 } else if (regClass == AMDIL::GPRV4F32RegClassID) {
464 return (unsigned int)AMDIL::FLT_v4f32;
465 } else {
466 return (unsigned int)AMDIL::FLT;
467 }
468 case AMDILCC::IL_CC_F_NE:
469 case AMDILCC::IL_CC_F_UNE:
470 return (unsigned int)AMDIL::FNE;
471 case AMDILCC::IL_CC_I_EQ:
472 case AMDILCC::IL_CC_U_EQ:
473 if (regClass == AMDIL::GPRI32RegClassID
474 || regClass == AMDIL::GPRI8RegClassID
475 || regClass == AMDIL::GPRI16RegClassID) {
476 return (unsigned int)AMDIL::IEQ;
477 } else if (regClass == AMDIL::GPRV2I32RegClassID
478 || regClass == AMDIL::GPRV2I8RegClassID
479 || regClass == AMDIL::GPRV2I16RegClassID) {
480 return (unsigned int)AMDIL::IEQ_v2i32;
481 } else if (regClass == AMDIL::GPRV4I32RegClassID
482 || regClass == AMDIL::GPRV4I8RegClassID
483 || regClass == AMDIL::GPRV4I16RegClassID) {
484 return (unsigned int)AMDIL::IEQ_v4i32;
485 } else {
486 assert(!"Unknown reg class!");
487 }
488 case AMDILCC::IL_CC_L_EQ:
489 case AMDILCC::IL_CC_UL_EQ:
490 return (unsigned int)AMDIL::LEQ;
491 case AMDILCC::IL_CC_I_GE:
492 case AMDILCC::IL_CC_I_LE:
493 if (regClass == AMDIL::GPRI32RegClassID
494 || regClass == AMDIL::GPRI8RegClassID
495 || regClass == AMDIL::GPRI16RegClassID) {
496 return (unsigned int)AMDIL::IGE;
497 } else if (regClass == AMDIL::GPRV2I32RegClassID
498 || regClass == AMDIL::GPRI8RegClassID
499 || regClass == AMDIL::GPRI16RegClassID) {
500 return (unsigned int)AMDIL::IGE_v2i32;
501 } else if (regClass == AMDIL::GPRV4I32RegClassID
502 || regClass == AMDIL::GPRI8RegClassID
503 || regClass == AMDIL::GPRI16RegClassID) {
504 return (unsigned int)AMDIL::IGE_v4i32;
505 } else {
506 assert(!"Unknown reg class!");
507 }
508 case AMDILCC::IL_CC_I_LT:
509 case AMDILCC::IL_CC_I_GT:
510 if (regClass == AMDIL::GPRI32RegClassID
511 || regClass == AMDIL::GPRI8RegClassID
512 || regClass == AMDIL::GPRI16RegClassID) {
513 return (unsigned int)AMDIL::ILT;
514 } else if (regClass == AMDIL::GPRV2I32RegClassID
515 || regClass == AMDIL::GPRI8RegClassID
516 || regClass == AMDIL::GPRI16RegClassID) {
517 return (unsigned int)AMDIL::ILT_v2i32;
518 } else if (regClass == AMDIL::GPRV4I32RegClassID
519 || regClass == AMDIL::GPRI8RegClassID
520 || regClass == AMDIL::GPRI16RegClassID) {
521 return (unsigned int)AMDIL::ILT_v4i32;
522 } else {
523 assert(!"Unknown reg class!");
524 }
525 case AMDILCC::IL_CC_L_GE:
526 return (unsigned int)AMDIL::LGE;
527 case AMDILCC::IL_CC_L_LE:
528 return (unsigned int)AMDIL::LLE;
529 case AMDILCC::IL_CC_L_LT:
530 return (unsigned int)AMDIL::LLT;
531 case AMDILCC::IL_CC_L_GT:
532 return (unsigned int)AMDIL::LGT;
533 case AMDILCC::IL_CC_I_NE:
534 case AMDILCC::IL_CC_U_NE:
535 if (regClass == AMDIL::GPRI32RegClassID
536 || regClass == AMDIL::GPRI8RegClassID
537 || regClass == AMDIL::GPRI16RegClassID) {
538 return (unsigned int)AMDIL::INE;
539 } else if (regClass == AMDIL::GPRV2I32RegClassID
540 || regClass == AMDIL::GPRI8RegClassID
541 || regClass == AMDIL::GPRI16RegClassID) {
542 return (unsigned int)AMDIL::INE_v2i32;
543 } else if (regClass == AMDIL::GPRV4I32RegClassID
544 || regClass == AMDIL::GPRI8RegClassID
545 || regClass == AMDIL::GPRI16RegClassID) {
546 return (unsigned int)AMDIL::INE_v4i32;
547 } else {
548 assert(!"Unknown reg class!");
549 }
550 case AMDILCC::IL_CC_U_GE:
551 case AMDILCC::IL_CC_U_LE:
552 if (regClass == AMDIL::GPRI32RegClassID
553 || regClass == AMDIL::GPRI8RegClassID
554 || regClass == AMDIL::GPRI16RegClassID) {
555 return (unsigned int)AMDIL::UGE;
556 } else if (regClass == AMDIL::GPRV2I32RegClassID
557 || regClass == AMDIL::GPRI8RegClassID
558 || regClass == AMDIL::GPRI16RegClassID) {
559 return (unsigned int)AMDIL::UGE_v2i32;
560 } else if (regClass == AMDIL::GPRV4I32RegClassID
561 || regClass == AMDIL::GPRI8RegClassID
562 || regClass == AMDIL::GPRI16RegClassID) {
563 return (unsigned int)AMDIL::UGE_v4i32;
564 } else {
565 assert(!"Unknown reg class!");
566 }
567 case AMDILCC::IL_CC_L_NE:
568 case AMDILCC::IL_CC_UL_NE:
569 return (unsigned int)AMDIL::LNE;
570 case AMDILCC::IL_CC_UL_GE:
571 return (unsigned int)AMDIL::ULGE;
572 case AMDILCC::IL_CC_UL_LE:
573 return (unsigned int)AMDIL::ULLE;
574 case AMDILCC::IL_CC_U_LT:
575 if (regClass == AMDIL::GPRI32RegClassID
576 || regClass == AMDIL::GPRI8RegClassID
577 || regClass == AMDIL::GPRI16RegClassID) {
578 return (unsigned int)AMDIL::ULT;
579 } else if (regClass == AMDIL::GPRV2I32RegClassID
580 || regClass == AMDIL::GPRI8RegClassID
581 || regClass == AMDIL::GPRI16RegClassID) {
582 return (unsigned int)AMDIL::ULT_v2i32;
583 } else if (regClass == AMDIL::GPRV4I32RegClassID
584 || regClass == AMDIL::GPRI8RegClassID
585 || regClass == AMDIL::GPRI16RegClassID) {
586 return (unsigned int)AMDIL::ULT_v4i32;
587 } else {
588 assert(!"Unknown reg class!");
589 }
590 case AMDILCC::IL_CC_U_GT:
591 if (regClass == AMDIL::GPRI32RegClassID
592 || regClass == AMDIL::GPRI8RegClassID
593 || regClass == AMDIL::GPRI16RegClassID) {
594 return (unsigned int)AMDIL::UGT;
595 } else if (regClass == AMDIL::GPRV2I32RegClassID
596 || regClass == AMDIL::GPRI8RegClassID
597 || regClass == AMDIL::GPRI16RegClassID) {
598 return (unsigned int)AMDIL::UGT_v2i32;
599 } else if (regClass == AMDIL::GPRV4I32RegClassID
600 || regClass == AMDIL::GPRI8RegClassID
601 || regClass == AMDIL::GPRI16RegClassID) {
602 return (unsigned int)AMDIL::UGT_v4i32;
603 } else {
604 assert(!"Unknown reg class!");
605 }
606 case AMDILCC::IL_CC_UL_LT:
607 return (unsigned int)AMDIL::ULLT;
608 case AMDILCC::IL_CC_UL_GT:
609 return (unsigned int)AMDIL::ULGT;
610 case AMDILCC::IL_CC_F_UEQ:
611 case AMDILCC::IL_CC_D_UEQ:
612 case AMDILCC::IL_CC_F_ONE:
613 case AMDILCC::IL_CC_D_ONE:
614 case AMDILCC::IL_CC_F_O:
615 case AMDILCC::IL_CC_F_UO:
616 case AMDILCC::IL_CC_D_O:
617 case AMDILCC::IL_CC_D_UO:
618 // we don't care
619 return 0;
620
621 }
622 errs()<<"Opcode: "<<CCCode<<"\n";
623 assert(0 && "Unknown opcode retrieved");
624 return 0;
625 }
626
627 /// Helper function used by LowerFormalArguments
628 static const TargetRegisterClass*
629 getRegClassFromType(unsigned int type) {
630 switch (type) {
631 default:
632 assert(0 && "Passed in type does not match any register classes.");
633 case MVT::i8:
634 return &AMDIL::GPRI8RegClass;
635 case MVT::i16:
636 return &AMDIL::GPRI16RegClass;
637 case MVT::i32:
638 return &AMDIL::GPRI32RegClass;
639 case MVT::f32:
640 return &AMDIL::GPRF32RegClass;
641 case MVT::i64:
642 return &AMDIL::GPRI64RegClass;
643 case MVT::f64:
644 return &AMDIL::GPRF64RegClass;
645 case MVT::v4f32:
646 return &AMDIL::GPRV4F32RegClass;
647 case MVT::v4i8:
648 return &AMDIL::GPRV4I8RegClass;
649 case MVT::v4i16:
650 return &AMDIL::GPRV4I16RegClass;
651 case MVT::v4i32:
652 return &AMDIL::GPRV4I32RegClass;
653 case MVT::v2f32:
654 return &AMDIL::GPRV2F32RegClass;
655 case MVT::v2i8:
656 return &AMDIL::GPRV2I8RegClass;
657 case MVT::v2i16:
658 return &AMDIL::GPRV2I16RegClass;
659 case MVT::v2i32:
660 return &AMDIL::GPRV2I32RegClass;
661 case MVT::v2f64:
662 return &AMDIL::GPRV2F64RegClass;
663 case MVT::v2i64:
664 return &AMDIL::GPRV2I64RegClass;
665 }
666 }
667
668 SDValue
669 AMDILTargetLowering::LowerMemArgument(
670 SDValue Chain,
671 CallingConv::ID CallConv,
672 const SmallVectorImpl<ISD::InputArg> &Ins,
673 DebugLoc dl, SelectionDAG &DAG,
674 const CCValAssign &VA,
675 MachineFrameInfo *MFI,
676 unsigned i) const
677 {
678 // Create the nodes corresponding to a load from this parameter slot.
679 ISD::ArgFlagsTy Flags = Ins[i].Flags;
680
681 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
682 getTargetMachine().Options.GuaranteedTailCallOpt;
683 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
684
685 // FIXME: For now, all byval parameter objects are marked mutable. This can
686 // be changed with more analysis.
687 // In case of tail call optimization mark all arguments mutable. Since they
688 // could be overwritten by lowering of arguments in case of a tail call.
689 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
690 VA.getLocMemOffset(), isImmutable);
691 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
692
693 if (Flags.isByVal())
694 return FIN;
695 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
696 MachinePointerInfo::getFixedStack(FI),
697 false, false, false, 0);
698 }
699 //===----------------------------------------------------------------------===//
700 // TargetLowering Implementation Help Functions End
701 //===----------------------------------------------------------------------===//
702 //===----------------------------------------------------------------------===//
703 // Instruction generation functions
704 //===----------------------------------------------------------------------===//
705 uint32_t
706 AMDILTargetLowering::addExtensionInstructions(
707 uint32_t reg, bool signedShift,
708 unsigned int simpleVT) const
709 {
710 int shiftSize = 0;
711 uint32_t LShift, RShift;
712 switch(simpleVT)
713 {
714 default:
715 return reg;
716 case AMDIL::GPRI8RegClassID:
717 shiftSize = 24;
718 LShift = AMDIL::SHL_i8;
719 if (signedShift) {
720 RShift = AMDIL::SHR_i8;
721 } else {
722 RShift = AMDIL::USHR_i8;
723 }
724 break;
725 case AMDIL::GPRV2I8RegClassID:
726 shiftSize = 24;
727 LShift = AMDIL::SHL_v2i8;
728 if (signedShift) {
729 RShift = AMDIL::SHR_v2i8;
730 } else {
731 RShift = AMDIL::USHR_v2i8;
732 }
733 break;
734 case AMDIL::GPRV4I8RegClassID:
735 shiftSize = 24;
736 LShift = AMDIL::SHL_v4i8;
737 if (signedShift) {
738 RShift = AMDIL::SHR_v4i8;
739 } else {
740 RShift = AMDIL::USHR_v4i8;
741 }
742 break;
743 case AMDIL::GPRI16RegClassID:
744 shiftSize = 16;
745 LShift = AMDIL::SHL_i16;
746 if (signedShift) {
747 RShift = AMDIL::SHR_i16;
748 } else {
749 RShift = AMDIL::USHR_i16;
750 }
751 break;
752 case AMDIL::GPRV2I16RegClassID:
753 shiftSize = 16;
754 LShift = AMDIL::SHL_v2i16;
755 if (signedShift) {
756 RShift = AMDIL::SHR_v2i16;
757 } else {
758 RShift = AMDIL::USHR_v2i16;
759 }
760 break;
761 case AMDIL::GPRV4I16RegClassID:
762 shiftSize = 16;
763 LShift = AMDIL::SHL_v4i16;
764 if (signedShift) {
765 RShift = AMDIL::SHR_v4i16;
766 } else {
767 RShift = AMDIL::USHR_v4i16;
768 }
769 break;
770 };
771 uint32_t LoadReg = genVReg(simpleVT);
772 uint32_t tmp1 = genVReg(simpleVT);
773 uint32_t tmp2 = genVReg(simpleVT);
774 generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
775 generateMachineInst(LShift, tmp1, reg, LoadReg);
776 generateMachineInst(RShift, tmp2, tmp1, LoadReg);
777 return tmp2;
778 }
779
780 MachineOperand
781 AMDILTargetLowering::convertToReg(MachineOperand op) const
782 {
783 if (op.isReg()) {
784 return op;
785 } else if (op.isImm()) {
786 uint32_t loadReg
787 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
788 generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
789 .addImm(op.getImm());
790 op.ChangeToRegister(loadReg, false);
791 } else if (op.isFPImm()) {
792 uint32_t loadReg
793 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
794 generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
795 .addFPImm(op.getFPImm());
796 op.ChangeToRegister(loadReg, false);
797 } else if (op.isMBB()) {
798 op.ChangeToRegister(0, false);
799 } else if (op.isFI()) {
800 op.ChangeToRegister(0, false);
801 } else if (op.isCPI()) {
802 op.ChangeToRegister(0, false);
803 } else if (op.isJTI()) {
804 op.ChangeToRegister(0, false);
805 } else if (op.isGlobal()) {
806 op.ChangeToRegister(0, false);
807 } else if (op.isSymbol()) {
808 op.ChangeToRegister(0, false);
809 }/* else if (op.isMetadata()) {
810 op.ChangeToRegister(0, false);
811 }*/
812 return op;
813 }
814
815 void
816 AMDILTargetLowering::generateCMPInstr(
817 MachineInstr *MI,
818 MachineBasicBlock *BB,
819 const TargetInstrInfo& TII)
820 const
821 {
822 MachineOperand DST = MI->getOperand(0);
823 MachineOperand CC = MI->getOperand(1);
824 MachineOperand LHS = MI->getOperand(2);
825 MachineOperand RHS = MI->getOperand(3);
826 int64_t ccCode = CC.getImm();
827 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
828 unsigned int opCode = translateToOpcode(ccCode, simpleVT);
829 DebugLoc DL = MI->getDebugLoc();
830 MachineBasicBlock::iterator BBI = MI;
831 setPrivateData(BB, BBI, &DL, &TII);
832 if (!LHS.isReg()) {
833 LHS = convertToReg(LHS);
834 }
835 if (!RHS.isReg()) {
836 RHS = convertToReg(RHS);
837 }
838 switch (ccCode) {
839 case AMDILCC::IL_CC_I_EQ:
840 case AMDILCC::IL_CC_I_NE:
841 case AMDILCC::IL_CC_I_GE:
842 case AMDILCC::IL_CC_I_LT:
843 {
844 uint32_t lhsreg = addExtensionInstructions(
845 LHS.getReg(), true, simpleVT);
846 uint32_t rhsreg = addExtensionInstructions(
847 RHS.getReg(), true, simpleVT);
848 generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
849 }
850 break;
851 case AMDILCC::IL_CC_U_EQ:
852 case AMDILCC::IL_CC_U_NE:
853 case AMDILCC::IL_CC_U_GE:
854 case AMDILCC::IL_CC_U_LT:
855 case AMDILCC::IL_CC_D_EQ:
856 case AMDILCC::IL_CC_F_EQ:
857 case AMDILCC::IL_CC_F_OEQ:
858 case AMDILCC::IL_CC_D_OEQ:
859 case AMDILCC::IL_CC_D_NE:
860 case AMDILCC::IL_CC_F_NE:
861 case AMDILCC::IL_CC_F_UNE:
862 case AMDILCC::IL_CC_D_UNE:
863 case AMDILCC::IL_CC_D_GE:
864 case AMDILCC::IL_CC_F_GE:
865 case AMDILCC::IL_CC_D_OGE:
866 case AMDILCC::IL_CC_F_OGE:
867 case AMDILCC::IL_CC_D_LT:
868 case AMDILCC::IL_CC_F_LT:
869 case AMDILCC::IL_CC_F_OLT:
870 case AMDILCC::IL_CC_D_OLT:
871 generateMachineInst(opCode, DST.getReg(),
872 LHS.getReg(), RHS.getReg());
873 break;
874 case AMDILCC::IL_CC_I_GT:
875 case AMDILCC::IL_CC_I_LE:
876 {
877 uint32_t lhsreg = addExtensionInstructions(
878 LHS.getReg(), true, simpleVT);
879 uint32_t rhsreg = addExtensionInstructions(
880 RHS.getReg(), true, simpleVT);
881 generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
882 }
883 break;
884 case AMDILCC::IL_CC_U_GT:
885 case AMDILCC::IL_CC_U_LE:
886 case AMDILCC::IL_CC_F_GT:
887 case AMDILCC::IL_CC_D_GT:
888 case AMDILCC::IL_CC_F_OGT:
889 case AMDILCC::IL_CC_D_OGT:
890 case AMDILCC::IL_CC_F_LE:
891 case AMDILCC::IL_CC_D_LE:
892 case AMDILCC::IL_CC_D_OLE:
893 case AMDILCC::IL_CC_F_OLE:
894 generateMachineInst(opCode, DST.getReg(),
895 RHS.getReg(), LHS.getReg());
896 break;
897 case AMDILCC::IL_CC_F_UGT:
898 case AMDILCC::IL_CC_F_ULE:
899 {
900 uint32_t VReg[4] = {
901 genVReg(simpleVT), genVReg(simpleVT),
902 genVReg(simpleVT), genVReg(simpleVT)
903 };
904 generateMachineInst(opCode, VReg[0],
905 RHS.getReg(), LHS.getReg());
906 generateMachineInst(AMDIL::FNE, VReg[1],
907 RHS.getReg(), RHS.getReg());
908 generateMachineInst(AMDIL::FNE, VReg[2],
909 LHS.getReg(), LHS.getReg());
910 generateMachineInst(AMDIL::BINARY_OR_f32,
911 VReg[3], VReg[0], VReg[1]);
912 generateMachineInst(AMDIL::BINARY_OR_f32,
913 DST.getReg(), VReg[2], VReg[3]);
914 }
915 break;
916 case AMDILCC::IL_CC_F_ULT:
917 case AMDILCC::IL_CC_F_UGE:
918 {
919 uint32_t VReg[4] = {
920 genVReg(simpleVT), genVReg(simpleVT),
921 genVReg(simpleVT), genVReg(simpleVT)
922 };
923 generateMachineInst(opCode, VReg[0],
924 LHS.getReg(), RHS.getReg());
925 generateMachineInst(AMDIL::FNE, VReg[1],
926 RHS.getReg(), RHS.getReg());
927 generateMachineInst(AMDIL::FNE, VReg[2],
928 LHS.getReg(), LHS.getReg());
929 generateMachineInst(AMDIL::BINARY_OR_f32,
930 VReg[3], VReg[0], VReg[1]);
931 generateMachineInst(AMDIL::BINARY_OR_f32,
932 DST.getReg(), VReg[2], VReg[3]);
933 }
934 break;
935 case AMDILCC::IL_CC_D_UGT:
936 case AMDILCC::IL_CC_D_ULE:
937 {
938 uint32_t regID = AMDIL::GPRF64RegClassID;
939 uint32_t VReg[4] = {
940 genVReg(regID), genVReg(regID),
941 genVReg(regID), genVReg(regID)
942 };
943 // The result of a double comparison is a 32bit result
944 generateMachineInst(opCode, VReg[0],
945 RHS.getReg(), LHS.getReg());
946 generateMachineInst(AMDIL::DNE, VReg[1],
947 RHS.getReg(), RHS.getReg());
948 generateMachineInst(AMDIL::DNE, VReg[2],
949 LHS.getReg(), LHS.getReg());
950 generateMachineInst(AMDIL::BINARY_OR_f32,
951 VReg[3], VReg[0], VReg[1]);
952 generateMachineInst(AMDIL::BINARY_OR_f32,
953 DST.getReg(), VReg[2], VReg[3]);
954 }
955 break;
956 case AMDILCC::IL_CC_D_UGE:
957 case AMDILCC::IL_CC_D_ULT:
958 {
959 uint32_t regID = AMDIL::GPRF64RegClassID;
960 uint32_t VReg[4] = {
961 genVReg(regID), genVReg(regID),
962 genVReg(regID), genVReg(regID)
963 };
964 // The result of a double comparison is a 32bit result
965 generateMachineInst(opCode, VReg[0],
966 LHS.getReg(), RHS.getReg());
967 generateMachineInst(AMDIL::DNE, VReg[1],
968 RHS.getReg(), RHS.getReg());
969 generateMachineInst(AMDIL::DNE, VReg[2],
970 LHS.getReg(), LHS.getReg());
971 generateMachineInst(AMDIL::BINARY_OR_f32,
972 VReg[3], VReg[0], VReg[1]);
973 generateMachineInst(AMDIL::BINARY_OR_f32,
974 DST.getReg(), VReg[2], VReg[3]);
975 }
976 break;
977 case AMDILCC::IL_CC_F_UEQ:
978 {
979 uint32_t VReg[4] = {
980 genVReg(simpleVT), genVReg(simpleVT),
981 genVReg(simpleVT), genVReg(simpleVT)
982 };
983 generateMachineInst(AMDIL::FEQ, VReg[0],
984 LHS.getReg(), RHS.getReg());
985 generateMachineInst(AMDIL::FNE, VReg[1],
986 LHS.getReg(), LHS.getReg());
987 generateMachineInst(AMDIL::FNE, VReg[2],
988 RHS.getReg(), RHS.getReg());
989 generateMachineInst(AMDIL::BINARY_OR_f32,
990 VReg[3], VReg[0], VReg[1]);
991 generateMachineInst(AMDIL::BINARY_OR_f32,
992 DST.getReg(), VReg[2], VReg[3]);
993 }
994 break;
995 case AMDILCC::IL_CC_F_ONE:
996 {
997 uint32_t VReg[4] = {
998 genVReg(simpleVT), genVReg(simpleVT),
999 genVReg(simpleVT), genVReg(simpleVT)
1000 };
1001 generateMachineInst(AMDIL::FNE, VReg[0],
1002 LHS.getReg(), RHS.getReg());
1003 generateMachineInst(AMDIL::FEQ, VReg[1],
1004 LHS.getReg(), LHS.getReg());
1005 generateMachineInst(AMDIL::FEQ, VReg[2],
1006 RHS.getReg(), RHS.getReg());
1007 generateMachineInst(AMDIL::BINARY_AND_f32,
1008 VReg[3], VReg[0], VReg[1]);
1009 generateMachineInst(AMDIL::BINARY_AND_f32,
1010 DST.getReg(), VReg[2], VReg[3]);
1011 }
1012 break;
1013 case AMDILCC::IL_CC_D_UEQ:
1014 {
1015 uint32_t regID = AMDIL::GPRF64RegClassID;
1016 uint32_t VReg[4] = {
1017 genVReg(regID), genVReg(regID),
1018 genVReg(regID), genVReg(regID)
1019 };
1020 // The result of a double comparison is a 32bit result
1021 generateMachineInst(AMDIL::DEQ, VReg[0],
1022 LHS.getReg(), RHS.getReg());
1023 generateMachineInst(AMDIL::DNE, VReg[1],
1024 LHS.getReg(), LHS.getReg());
1025 generateMachineInst(AMDIL::DNE, VReg[2],
1026 RHS.getReg(), RHS.getReg());
1027 generateMachineInst(AMDIL::BINARY_OR_f32,
1028 VReg[3], VReg[0], VReg[1]);
1029 generateMachineInst(AMDIL::BINARY_OR_f32,
1030 DST.getReg(), VReg[2], VReg[3]);
1031
1032 }
1033 break;
1034 case AMDILCC::IL_CC_D_ONE:
1035 {
1036 uint32_t regID = AMDIL::GPRF64RegClassID;
1037 uint32_t VReg[4] = {
1038 genVReg(regID), genVReg(regID),
1039 genVReg(regID), genVReg(regID)
1040 };
1041 // The result of a double comparison is a 32bit result
1042 generateMachineInst(AMDIL::DNE, VReg[0],
1043 LHS.getReg(), RHS.getReg());
1044 generateMachineInst(AMDIL::DEQ, VReg[1],
1045 LHS.getReg(), LHS.getReg());
1046 generateMachineInst(AMDIL::DEQ, VReg[2],
1047 RHS.getReg(), RHS.getReg());
1048 generateMachineInst(AMDIL::BINARY_AND_f32,
1049 VReg[3], VReg[0], VReg[1]);
1050 generateMachineInst(AMDIL::BINARY_AND_f32,
1051 DST.getReg(), VReg[2], VReg[3]);
1052
1053 }
1054 break;
1055 case AMDILCC::IL_CC_F_O:
1056 {
1057 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1058 generateMachineInst(AMDIL::FEQ, VReg[0],
1059 RHS.getReg(), RHS.getReg());
1060 generateMachineInst(AMDIL::FEQ, VReg[1],
1061 LHS.getReg(), LHS.getReg());
1062 generateMachineInst(AMDIL::BINARY_AND_f32,
1063 DST.getReg(), VReg[0], VReg[1]);
1064 }
1065 break;
1066 case AMDILCC::IL_CC_D_O:
1067 {
1068 uint32_t regID = AMDIL::GPRF64RegClassID;
1069 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1070 // The result of a double comparison is a 32bit result
1071 generateMachineInst(AMDIL::DEQ, VReg[0],
1072 RHS.getReg(), RHS.getReg());
1073 generateMachineInst(AMDIL::DEQ, VReg[1],
1074 LHS.getReg(), LHS.getReg());
1075 generateMachineInst(AMDIL::BINARY_AND_f32,
1076 DST.getReg(), VReg[0], VReg[1]);
1077 }
1078 break;
1079 case AMDILCC::IL_CC_F_UO:
1080 {
1081 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1082 generateMachineInst(AMDIL::FNE, VReg[0],
1083 RHS.getReg(), RHS.getReg());
1084 generateMachineInst(AMDIL::FNE, VReg[1],
1085 LHS.getReg(), LHS.getReg());
1086 generateMachineInst(AMDIL::BINARY_OR_f32,
1087 DST.getReg(), VReg[0], VReg[1]);
1088 }
1089 break;
1090 case AMDILCC::IL_CC_D_UO:
1091 {
1092 uint32_t regID = AMDIL::GPRF64RegClassID;
1093 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1094 // The result of a double comparison is a 32bit result
1095 generateMachineInst(AMDIL::DNE, VReg[0],
1096 RHS.getReg(), RHS.getReg());
1097 generateMachineInst(AMDIL::DNE, VReg[1],
1098 LHS.getReg(), LHS.getReg());
1099 generateMachineInst(AMDIL::BINARY_OR_f32,
1100 DST.getReg(), VReg[0], VReg[1]);
1101 }
1102 break;
1103 case AMDILCC::IL_CC_L_LE:
1104 case AMDILCC::IL_CC_L_GE:
1105 case AMDILCC::IL_CC_L_EQ:
1106 case AMDILCC::IL_CC_L_NE:
1107 case AMDILCC::IL_CC_L_LT:
1108 case AMDILCC::IL_CC_L_GT:
1109 case AMDILCC::IL_CC_UL_LE:
1110 case AMDILCC::IL_CC_UL_GE:
1111 case AMDILCC::IL_CC_UL_EQ:
1112 case AMDILCC::IL_CC_UL_NE:
1113 case AMDILCC::IL_CC_UL_LT:
1114 case AMDILCC::IL_CC_UL_GT:
1115 {
1116 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1117 &this->getTargetMachine())->getSubtargetImpl();
1118 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
1119 generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
1120 } else {
1121 generateLongRelational(MI, opCode);
1122 }
1123 }
1124 break;
1125 case AMDILCC::COND_ERROR:
1126 assert(0 && "Invalid CC code");
1127 break;
1128 };
1129 }
1130
1131 //===----------------------------------------------------------------------===//
1132 // TargetLowering Class Implementation Begins
1133 //===----------------------------------------------------------------------===//
1134 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
1135 : TargetLowering(TM, new TargetLoweringObjectFileELF())
1136 {
1137 int types[] =
1138 {
1139 (int)MVT::i8,
1140 (int)MVT::i16,
1141 (int)MVT::i32,
1142 (int)MVT::f32,
1143 (int)MVT::f64,
1144 (int)MVT::i64,
1145 (int)MVT::v2i8,
1146 (int)MVT::v4i8,
1147 (int)MVT::v2i16,
1148 (int)MVT::v4i16,
1149 (int)MVT::v4f32,
1150 (int)MVT::v4i32,
1151 (int)MVT::v2f32,
1152 (int)MVT::v2i32,
1153 (int)MVT::v2f64,
1154 (int)MVT::v2i64
1155 };
1156
1157 int IntTypes[] =
1158 {
1159 (int)MVT::i8,
1160 (int)MVT::i16,
1161 (int)MVT::i32,
1162 (int)MVT::i64
1163 };
1164
1165 int FloatTypes[] =
1166 {
1167 (int)MVT::f32,
1168 (int)MVT::f64
1169 };
1170
1171 int VectorTypes[] =
1172 {
1173 (int)MVT::v2i8,
1174 (int)MVT::v4i8,
1175 (int)MVT::v2i16,
1176 (int)MVT::v4i16,
1177 (int)MVT::v4f32,
1178 (int)MVT::v4i32,
1179 (int)MVT::v2f32,
1180 (int)MVT::v2i32,
1181 (int)MVT::v2f64,
1182 (int)MVT::v2i64
1183 };
1184 size_t numTypes = sizeof(types) / sizeof(*types);
1185 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
1186 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
1187 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
1188
1189 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1190 &this->getTargetMachine())->getSubtargetImpl();
1191 // These are the current register classes that are
1192 // supported
1193
1194 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
1195 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
1196
1197 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1198 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
1199 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
1200 }
1201 if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
1202 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
1203 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
1204 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
1205 setOperationAction(ISD::Constant , MVT::i8 , Legal);
1206 }
1207 if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
1208 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
1209 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
1210 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
1211 setOperationAction(ISD::Constant , MVT::i16 , Legal);
1212 }
1213 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
1214 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
1215 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
1216 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
1217 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1218 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
1219 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
1220 }
1221
1222 for (unsigned int x = 0; x < numTypes; ++x) {
1223 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
1224
1225 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
1226 // We cannot sextinreg, expand to shifts
1227 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1228 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1229 setOperationAction(ISD::FP_ROUND, VT, Expand);
1230 setOperationAction(ISD::OR, VT, Custom);
1231 setOperationAction(ISD::SUBE, VT, Expand);
1232 setOperationAction(ISD::SUBC, VT, Expand);
1233 setOperationAction(ISD::ADD, VT, Custom);
1234 setOperationAction(ISD::ADDE, VT, Expand);
1235 setOperationAction(ISD::ADDC, VT, Expand);
1236 setOperationAction(ISD::SETCC, VT, Custom);
1237 setOperationAction(ISD::BRCOND, VT, Custom);
1238 setOperationAction(ISD::BR_CC, VT, Custom);
1239 setOperationAction(ISD::BR_JT, VT, Expand);
1240 setOperationAction(ISD::BRIND, VT, Expand);
1241 // TODO: Implement custom UREM/SREM routines
1242 setOperationAction(ISD::UREM, VT, Expand);
1243 setOperationAction(ISD::SREM, VT, Expand);
1244 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1245 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1246 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1247 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1248 setOperationAction(ISDBITCAST, VT, Custom);
1249 setOperationAction(ISD::GlobalAddress, VT, Custom);
1250 setOperationAction(ISD::JumpTable, VT, Custom);
1251 setOperationAction(ISD::ConstantPool, VT, Custom);
1252 setOperationAction(ISD::SELECT_CC, VT, Custom);
1253 setOperationAction(ISD::SELECT, VT, Custom);
1254 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1255 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1256 if (VT != MVT::i64 && VT != MVT::v2i64) {
1257 setOperationAction(ISD::SDIV, VT, Custom);
1258 setOperationAction(ISD::UDIV, VT, Custom);
1259 }
1260 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1261 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1262 }
1263 for (unsigned int x = 0; x < numFloatTypes; ++x) {
1264 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
1265
1266 // IL does not have these operations for floating point types
1267 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
1268 setOperationAction(ISD::FP_ROUND, VT, Custom);
1269 setOperationAction(ISD::SETOLT, VT, Expand);
1270 setOperationAction(ISD::SETOGE, VT, Expand);
1271 setOperationAction(ISD::SETOGT, VT, Expand);
1272 setOperationAction(ISD::SETOLE, VT, Expand);
1273 setOperationAction(ISD::SETULT, VT, Expand);
1274 setOperationAction(ISD::SETUGE, VT, Expand);
1275 setOperationAction(ISD::SETUGT, VT, Expand);
1276 setOperationAction(ISD::SETULE, VT, Expand);
1277 }
1278
1279 for (unsigned int x = 0; x < numIntTypes; ++x) {
1280 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
1281
1282 // GPU also does not have divrem function for signed or unsigned
1283 setOperationAction(ISD::SDIVREM, VT, Expand);
1284 setOperationAction(ISD::UDIVREM, VT, Expand);
1285 setOperationAction(ISD::FP_ROUND, VT, Expand);
1286
1287 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
1288 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1289 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1290
1291 // GPU doesn't have a rotl, rotr, or byteswap instruction
1292 setOperationAction(ISD::ROTR, VT, Expand);
1293 setOperationAction(ISD::ROTL, VT, Expand);
1294 setOperationAction(ISD::BSWAP, VT, Expand);
1295
1296 // GPU doesn't have any counting operators
1297 setOperationAction(ISD::CTPOP, VT, Expand);
1298 setOperationAction(ISD::CTTZ, VT, Expand);
1299 setOperationAction(ISD::CTLZ, VT, Expand);
1300 }
1301
1302 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
1303 {
1304 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
1305
1306 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1307 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1308 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1309 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
1310 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1311 setOperationAction(ISD::FP_ROUND, VT, Expand);
1312 setOperationAction(ISD::SDIVREM, VT, Expand);
1313 setOperationAction(ISD::UDIVREM, VT, Expand);
1314 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1315 // setOperationAction(ISD::VSETCC, VT, Expand);
1316 setOperationAction(ISD::SETCC, VT, Expand);
1317 setOperationAction(ISD::SELECT_CC, VT, Expand);
1318 setOperationAction(ISD::SELECT, VT, Expand);
1319
1320 }
1321 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
1322 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1323 if (stm->calVersion() < CAL_VERSION_SC_139
1324 || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
1325 setOperationAction(ISD::MUL, MVT::i64, Custom);
1326 }
1327 setOperationAction(ISD::SUB, MVT::i64, Custom);
1328 setOperationAction(ISD::ADD, MVT::i64, Custom);
1329 setOperationAction(ISD::MULHU, MVT::i64, Expand);
1330 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
1331 setOperationAction(ISD::MULHS, MVT::i64, Expand);
1332 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
1333 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1334 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1335 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1336 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
1337 setOperationAction(ISD::Constant , MVT::i64 , Legal);
1338 setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
1339 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
1340 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
1341 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
1342 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
1343 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
1344 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
1345 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
1346 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
1347 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
1348 }
1349 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1350 // we support loading/storing v2f64 but not operations on the type
1351 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
1352 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
1353 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
1354 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
1355 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
1356 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
1357 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
1358 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
1359 // We want to expand vector conversions into their scalar
1360 // counterparts.
1361 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
1362 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
1363 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
1364 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
1365 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
1366 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
1367 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
1368 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
1369 setOperationAction(ISD::FABS, MVT::f64, Expand);
1370 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
1371 }
1372 // TODO: Fix the UDIV24 algorithm so it works for these
1373 // types correctly. This needs vector comparisons
1374 // for this to work correctly.
1375 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
1376 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
1377 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
1378 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
1379 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
1380 setOperationAction(ISD::SUBC, MVT::Other, Expand);
1381 setOperationAction(ISD::ADDE, MVT::Other, Expand);
1382 setOperationAction(ISD::ADDC, MVT::Other, Expand);
1383 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1384 setOperationAction(ISD::BR_CC, MVT::Other, Custom);
1385 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1386 setOperationAction(ISD::BRIND, MVT::Other, Expand);
1387 setOperationAction(ISD::SETCC, MVT::Other, Custom);
1388 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
1389 setOperationAction(ISD::FDIV, MVT::f32, Custom);
1390 setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
1391 setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
1392
1393 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
1394 // Use the default implementation.
1395 setOperationAction(ISD::VAARG , MVT::Other, Expand);
1396 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
1397 setOperationAction(ISD::VAEND , MVT::Other, Expand);
1398 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
1399 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
1400 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
1401 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
1402 setOperationAction(ISD::Constant , MVT::i32 , Legal);
1403 setOperationAction(ISD::TRAP , MVT::Other , Legal);
1404
1405 setStackPointerRegisterToSaveRestore(AMDIL::SP);
1406 setSchedulingPreference(Sched::RegPressure);
1407 setPow2DivIsCheap(false);
1408 setPrefLoopAlignment(16);
1409 setSelectIsExpensive(true);
1410 setJumpIsExpensive(true);
1411 computeRegisterProperties();
1412
1413 maxStoresPerMemcpy = 4096;
1414 maxStoresPerMemmove = 4096;
1415 maxStoresPerMemset = 4096;
1416
1417 #undef numTypes
1418 #undef numIntTypes
1419 #undef numVectorTypes
1420 #undef numFloatTypes
1421 }
1422
1423 const char *
1424 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
1425 {
1426 switch (Opcode) {
1427 default: return 0;
1428 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
1429 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
1430 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
1431 case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
1432 case AMDILISD::CMOV: return "AMDILISD::CMOV";
1433 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
1434 case AMDILISD::INEGATE: return "AMDILISD::INEGATE";
1435 case AMDILISD::MAD: return "AMDILISD::MAD";
1436 case AMDILISD::UMAD: return "AMDILISD::UMAD";
1437 case AMDILISD::CALL: return "AMDILISD::CALL";
1438 case AMDILISD::RET: return "AMDILISD::RET";
1439 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
1440 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
1441 case AMDILISD::ADD: return "AMDILISD::ADD";
1442 case AMDILISD::UMUL: return "AMDILISD::UMUL";
1443 case AMDILISD::AND: return "AMDILISD::AND";
1444 case AMDILISD::OR: return "AMDILISD::OR";
1445 case AMDILISD::NOT: return "AMDILISD::NOT";
1446 case AMDILISD::XOR: return "AMDILISD::XOR";
1447 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
1448 case AMDILISD::SMAX: return "AMDILISD::SMAX";
1449 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
1450 case AMDILISD::MOVE: return "AMDILISD::MOVE";
1451 case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
1452 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
1453 case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
1454 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
1455 case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
1456 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
1457 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
1458 case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
1459 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
1460 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
1461 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
1462 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
1463 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
1464 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
1465 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
1466 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
1467 case AMDILISD::CMP: return "AMDILISD::CMP";
1468 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
1469 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
1470 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
1471 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
1472 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
1473 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
1474 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
1475 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
1476 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
1477 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
1478 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
1479 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
1480 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
1481 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
1482 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
1483 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
1484 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
1485 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
1486 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
1487 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
1488 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
1489 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
1490 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
1491 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
1492 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
1493 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
1494 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
1495 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
1496 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
1497 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
1498 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
1499 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
1500 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
1501 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
1502 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
1503 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
1504 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
1505 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
1506 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
1507 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
1508 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
1509 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
1510 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
1511 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
1512 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
1513 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
1514 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
1515 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
1516 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
1517 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
1518 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
1519 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
1520 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
1521 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
1522 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
1523 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
1524 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
1525 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
1526 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
1527 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
1528 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
1529 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
1530 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
1531 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
1532 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
1533 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
1534 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
1535 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
1536 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
1537 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
1538 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
1539 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
1540 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
1541 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
1542 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
1543 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
1544 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
1545 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
1546 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
1547 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
1548 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
1549 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
1550 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
1551 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
1552 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1553 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
1554 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
1555 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
1556 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
1557 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
1558 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
1559 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
1560 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
1561 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
1562 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
1563 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
1564 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
1565 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
1566 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
1567 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
1568 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
1569 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
1570 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
1571 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
1572 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
1573 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
1574 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
1575 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
1576 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
1577
1578 };
1579 }
1580 bool
1581 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1582 const CallInst &I, unsigned Intrinsic) const
1583 {
1584 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1585 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
1586 return false;
1587 }
1588 bool bitCastToInt = false;
1589 unsigned IntNo;
1590 bool isRet = true;
1591 const AMDILSubtarget *STM = &this->getTargetMachine()
1592 .getSubtarget<AMDILSubtarget>();
1593 switch (Intrinsic) {
1594 default: return false; // Don't custom lower most intrinsics.
1595 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
1596 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
1597 IntNo = AMDILISD::ATOM_G_ADD; break;
1598 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
1599 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
1600 isRet = false;
1601 IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
1602 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
1603 case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
1604 IntNo = AMDILISD::ATOM_L_ADD; break;
1605 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
1606 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
1607 isRet = false;
1608 IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
1609 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
1610 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1611 IntNo = AMDILISD::ATOM_R_ADD; break;
1612 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1613 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1614 isRet = false;
1615 IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1616 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1617 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1618 IntNo = AMDILISD::ATOM_G_AND; break;
1619 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1620 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1621 isRet = false;
1622 IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1623 case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1624 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1625 IntNo = AMDILISD::ATOM_L_AND; break;
1626 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1627 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1628 isRet = false;
1629 IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1630 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1631 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1632 IntNo = AMDILISD::ATOM_R_AND; break;
1633 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1634 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1635 isRet = false;
1636 IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1637 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1638 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1639 IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1640 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1641 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1642 isRet = false;
1643 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1644 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1645 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1646 IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1647 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1648 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1649 isRet = false;
1650 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1651 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1652 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1653 IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1654 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1655 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1656 isRet = false;
1657 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1658 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1659 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1660 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1661 IntNo = AMDILISD::ATOM_G_DEC;
1662 } else {
1663 IntNo = AMDILISD::ATOM_G_SUB;
1664 }
1665 break;
1666 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1667 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1668 isRet = false;
1669 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1670 IntNo = AMDILISD::ATOM_G_DEC_NORET;
1671 } else {
1672 IntNo = AMDILISD::ATOM_G_SUB_NORET;
1673 }
1674 break;
1675 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1676 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1677 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1678 IntNo = AMDILISD::ATOM_L_DEC;
1679 } else {
1680 IntNo = AMDILISD::ATOM_L_SUB;
1681 }
1682 break;
1683 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1684 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1685 isRet = false;
1686 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1687 IntNo = AMDILISD::ATOM_L_DEC_NORET;
1688 } else {
1689 IntNo = AMDILISD::ATOM_L_SUB_NORET;
1690 }
1691 break;
1692 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1693 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1694 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1695 IntNo = AMDILISD::ATOM_R_DEC;
1696 } else {
1697 IntNo = AMDILISD::ATOM_R_SUB;
1698 }
1699 break;
1700 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1701 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1702 isRet = false;
1703 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1704 IntNo = AMDILISD::ATOM_R_DEC_NORET;
1705 } else {
1706 IntNo = AMDILISD::ATOM_R_SUB_NORET;
1707 }
1708 break;
1709 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1710 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1711 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1712 IntNo = AMDILISD::ATOM_G_INC;
1713 } else {
1714 IntNo = AMDILISD::ATOM_G_ADD;
1715 }
1716 break;
1717 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1718 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1719 isRet = false;
1720 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1721 IntNo = AMDILISD::ATOM_G_INC_NORET;
1722 } else {
1723 IntNo = AMDILISD::ATOM_G_ADD_NORET;
1724 }
1725 break;
1726 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1727 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1728 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1729 IntNo = AMDILISD::ATOM_L_INC;
1730 } else {
1731 IntNo = AMDILISD::ATOM_L_ADD;
1732 }
1733 break;
1734 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1735 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1736 isRet = false;
1737 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1738 IntNo = AMDILISD::ATOM_L_INC_NORET;
1739 } else {
1740 IntNo = AMDILISD::ATOM_L_ADD_NORET;
1741 }
1742 break;
1743 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1744 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1745 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1746 IntNo = AMDILISD::ATOM_R_INC;
1747 } else {
1748 IntNo = AMDILISD::ATOM_R_ADD;
1749 }
1750 break;
1751 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1752 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1753 isRet = false;
1754 if (STM->calVersion() >= CAL_VERSION_SC_136) {
1755 IntNo = AMDILISD::ATOM_R_INC_NORET;
1756 } else {
1757 IntNo = AMDILISD::ATOM_R_ADD_NORET;
1758 }
1759 break;
1760 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1761 IntNo = AMDILISD::ATOM_G_MAX; break;
1762 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1763 IntNo = AMDILISD::ATOM_G_UMAX; break;
1764 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1765 isRet = false;
1766 IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1767 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1768 isRet = false;
1769 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1770 case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1771 IntNo = AMDILISD::ATOM_L_MAX; break;
1772 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1773 IntNo = AMDILISD::ATOM_L_UMAX; break;
1774 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1775 isRet = false;
1776 IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1777 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1778 isRet = false;
1779 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1780 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1781 IntNo = AMDILISD::ATOM_R_MAX; break;
1782 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1783 IntNo = AMDILISD::ATOM_R_UMAX; break;
1784 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1785 isRet = false;
1786 IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1787 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1788 isRet = false;
1789 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1790 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1791 IntNo = AMDILISD::ATOM_G_MIN; break;
1792 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1793 IntNo = AMDILISD::ATOM_G_UMIN; break;
1794 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1795 isRet = false;
1796 IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1797 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1798 isRet = false;
1799 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1800 case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1801 IntNo = AMDILISD::ATOM_L_MIN; break;
1802 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1803 IntNo = AMDILISD::ATOM_L_UMIN; break;
1804 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1805 isRet = false;
1806 IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1807 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1808 isRet = false;
1809 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1810 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1811 IntNo = AMDILISD::ATOM_R_MIN; break;
1812 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1813 IntNo = AMDILISD::ATOM_R_UMIN; break;
1814 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1815 isRet = false;
1816 IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1817 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1818 isRet = false;
1819 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1820 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1821 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1822 IntNo = AMDILISD::ATOM_G_OR; break;
1823 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1824 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1825 isRet = false;
1826 IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1827 case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1828 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1829 IntNo = AMDILISD::ATOM_L_OR; break;
1830 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1831 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1832 isRet = false;
1833 IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1834 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1835 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1836 IntNo = AMDILISD::ATOM_R_OR; break;
1837 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1838 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1839 isRet = false;
1840 IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1841 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1842 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1843 IntNo = AMDILISD::ATOM_G_SUB; break;
1844 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1845 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1846 isRet = false;
1847 IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1848 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1849 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1850 IntNo = AMDILISD::ATOM_L_SUB; break;
1851 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1852 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1853 isRet = false;
1854 IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1855 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1856 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1857 IntNo = AMDILISD::ATOM_R_SUB; break;
1858 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1859 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1860 isRet = false;
1861 IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1862 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1863 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1864 IntNo = AMDILISD::ATOM_G_RSUB; break;
1865 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1866 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1867 isRet = false;
1868 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1869 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1870 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1871 IntNo = AMDILISD::ATOM_L_RSUB; break;
1872 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1873 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1874 isRet = false;
1875 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1876 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1877 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1878 IntNo = AMDILISD::ATOM_R_RSUB; break;
1879 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1880 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1881 isRet = false;
1882 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1883 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1884 bitCastToInt = true;
1885 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1886 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1887 IntNo = AMDILISD::ATOM_G_XCHG; break;
1888 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1889 bitCastToInt = true;
1890 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1891 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1892 isRet = false;
1893 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1894 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1895 bitCastToInt = true;
1896 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1897 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1898 IntNo = AMDILISD::ATOM_L_XCHG; break;
1899 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1900 bitCastToInt = true;
1901 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1902 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1903 isRet = false;
1904 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1905 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1906 bitCastToInt = true;
1907 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1908 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1909 IntNo = AMDILISD::ATOM_R_XCHG; break;
1910 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1911 bitCastToInt = true;
1912 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1913 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1914 isRet = false;
1915 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1916 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1917 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1918 IntNo = AMDILISD::ATOM_G_XOR; break;
1919 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1920 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1921 isRet = false;
1922 IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1923 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1924 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1925 IntNo = AMDILISD::ATOM_L_XOR; break;
1926 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1927 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1928 isRet = false;
1929 IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1930 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1931 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1932 IntNo = AMDILISD::ATOM_R_XOR; break;
1933 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1934 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1935 isRet = false;
1936 IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1937 case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1938 IntNo = AMDILISD::APPEND_ALLOC; break;
1939 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1940 isRet = false;
1941 IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1942 case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1943 IntNo = AMDILISD::APPEND_CONSUME; break;
1944 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1945 isRet = false;
1946 IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1947 };
1948
1949 Info.opc = IntNo;
1950 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1951 Info.ptrVal = I.getOperand(0);
1952 Info.offset = 0;
1953 Info.align = 4;
1954 Info.vol = true;
1955 Info.readMem = isRet;
1956 Info.writeMem = true;
1957 return true;
1958 }
1959 // The backend supports 32 and 64 bit floating point immediates
1960 bool
1961 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1962 {
1963 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1964 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1965 return true;
1966 } else {
1967 return false;
1968 }
1969 }
1970
1971 bool
1972 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1973 {
1974 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1975 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1976 return false;
1977 } else {
1978 return true;
1979 }
1980 }
1981
1982
1983 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1984 // be zero. Op is expected to be a target specific node. Used by DAG
1985 // combiner.
1986
1987 void
1988 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1989 const SDValue Op,
1990 APInt &KnownZero,
1991 APInt &KnownOne,
1992 const SelectionDAG &DAG,
1993 unsigned Depth) const
1994 {
1995 APInt KnownZero2;
1996 APInt KnownOne2;
1997 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1998 switch (Op.getOpcode()) {
1999 default: break;
2000 case AMDILISD::SELECT_CC:
2001 DAG.ComputeMaskedBits(
2002 Op.getOperand(1),
2003 KnownZero,
2004 KnownOne,
2005 Depth + 1
2006 );
2007 DAG.ComputeMaskedBits(
2008 Op.getOperand(0),
2009 KnownZero2,
2010 KnownOne2
2011 );
2012 assert((KnownZero & KnownOne) == 0
2013 && "Bits known to be one AND zero?");
2014 assert((KnownZero2 & KnownOne2) == 0
2015 && "Bits known to be one AND zero?");
2016 // Only known if known in both the LHS and RHS
2017 KnownOne &= KnownOne2;
2018 KnownZero &= KnownZero2;
2019 break;
2020 };
2021 }
2022
2023 // This is the function that determines which calling convention should
2024 // be used. Currently there is only one calling convention
2025 CCAssignFn*
2026 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
2027 {
2028 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2029 return CC_AMDIL32;
2030 }
2031
2032 // LowerCallResult - Lower the result values of an ISD::CALL into the
2033 // appropriate copies out of appropriate physical registers. This assumes that
2034 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
2035 // being lowered. The returns a SDNode with the same number of values as the
2036 // ISD::CALL.
2037 SDValue
2038 AMDILTargetLowering::LowerCallResult(
2039 SDValue Chain,
2040 SDValue InFlag,
2041 CallingConv::ID CallConv,
2042 bool isVarArg,
2043 const SmallVectorImpl<ISD::InputArg> &Ins,
2044 DebugLoc dl,
2045 SelectionDAG &DAG,
2046 SmallVectorImpl<SDValue> &InVals) const
2047 {
2048 // Assign locations to each value returned by this call
2049 SmallVector<CCValAssign, 16> RVLocs;
2050 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2051 getTargetMachine(), RVLocs, *DAG.getContext());
2052 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
2053
2054 // Copy all of the result registers out of their specified physreg.
2055 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2056 EVT CopyVT = RVLocs[i].getValVT();
2057 if (RVLocs[i].isRegLoc()) {
2058 Chain = DAG.getCopyFromReg(
2059 Chain,
2060 dl,
2061 RVLocs[i].getLocReg(),
2062 CopyVT,
2063 InFlag
2064 ).getValue(1);
2065 SDValue Val = Chain.getValue(0);
2066 InFlag = Chain.getValue(2);
2067 InVals.push_back(Val);
2068 }
2069 }
2070
2071 return Chain;
2072
2073 }
2074
2075 //===----------------------------------------------------------------------===//
2076 // Other Lowering Hooks
2077 //===----------------------------------------------------------------------===//
2078
2079 MachineBasicBlock *
2080 AMDILTargetLowering::EmitInstrWithCustomInserter(
2081 MachineInstr *MI, MachineBasicBlock *BB) const
2082 {
2083 const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
2084 switch (MI->getOpcode()) {
2085 ExpandCaseToAllTypes(AMDIL::CMP);
2086 generateCMPInstr(MI, BB, TII);
2087 MI->eraseFromParent();
2088 break;
2089 default:
2090 break;
2091 }
2092 return BB;
2093 }
2094
2095 // Recursively assign SDNodeOrdering to any unordered nodes
2096 // This is necessary to maintain source ordering of instructions
2097 // under -O0 to avoid odd-looking "skipping around" issues.
2098 static const SDValue
2099 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
2100 {
2101 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
2102 DAG.AssignOrdering( New.getNode(), order );
2103 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
2104 Ordered( DAG, order, New.getOperand(i) );
2105 }
2106 return New;
2107 }
2108
2109 #define LOWER(A) \
2110 case ISD:: A: \
2111 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
2112
2113 SDValue
2114 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2115 {
2116 switch (Op.getOpcode()) {
2117 default:
2118 Op.getNode()->dump();
2119 assert(0 && "Custom lowering code for this"
2120 "instruction is not implemented yet!");
2121 break;
2122 LOWER(GlobalAddress);
2123 LOWER(JumpTable);
2124 LOWER(ConstantPool);
2125 LOWER(ExternalSymbol);
2126 LOWER(FP_TO_SINT);
2127 LOWER(FP_TO_UINT);
2128 LOWER(SINT_TO_FP);
2129 LOWER(UINT_TO_FP);
2130 LOWER(ADD);
2131 LOWER(MUL);
2132 LOWER(SUB);
2133 LOWER(FDIV);
2134 LOWER(SDIV);
2135 LOWER(SREM);
2136 LOWER(UDIV);
2137 LOWER(UREM);
2138 LOWER(BUILD_VECTOR);
2139 LOWER(INSERT_VECTOR_ELT);
2140 LOWER(EXTRACT_VECTOR_ELT);
2141 LOWER(EXTRACT_SUBVECTOR);
2142 LOWER(SCALAR_TO_VECTOR);
2143 LOWER(CONCAT_VECTORS);
2144 LOWER(AND);
2145 LOWER(OR);
2146 LOWER(SELECT);
2147 LOWER(SELECT_CC);
2148 LOWER(SETCC);
2149 LOWER(SIGN_EXTEND_INREG);
2150 LOWER(BITCAST);
2151 LOWER(DYNAMIC_STACKALLOC);
2152 LOWER(BRCOND);
2153 LOWER(BR_CC);
2154 LOWER(FP_ROUND);
2155 }
2156 return Op;
2157 }
2158
2159 int
2160 AMDILTargetLowering::getVarArgsFrameOffset() const
2161 {
2162 return VarArgsFrameOffset;
2163 }
2164 #undef LOWER
2165
2166 SDValue
2167 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
2168 {
2169 SDValue DST = Op;
2170 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
2171 const GlobalValue *G = GADN->getGlobal();
2172 DebugLoc DL = Op.getDebugLoc();
2173 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
2174 if (!GV) {
2175 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2176 } else {
2177 if (GV->hasInitializer()) {
2178 const Constant *C = dyn_cast<Constant>(GV->getInitializer());
2179 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
2180 DST = DAG.getConstant(CI->getValue(), Op.getValueType());
2181 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
2182 DST = DAG.getConstantFP(CF->getValueAPF(),
2183 Op.getValueType());
2184 } else if (dyn_cast<ConstantAggregateZero>(C)) {
2185 EVT VT = Op.getValueType();
2186 if (VT.isInteger()) {
2187 DST = DAG.getConstant(0, VT);
2188 } else {
2189 DST = DAG.getConstantFP(0, VT);
2190 }
2191 } else {
2192 assert(!"lowering this type of Global Address "
2193 "not implemented yet!");
2194 C->dump();
2195 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2196 }
2197 } else {
2198 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2199 }
2200 }
2201 return DST;
2202 }
2203
2204 SDValue
2205 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
2206 {
2207 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2208 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
2209 return Result;
2210 }
2211 SDValue
2212 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
2213 {
2214 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2215 EVT PtrVT = Op.getValueType();
2216 SDValue Result;
2217 if (CP->isMachineConstantPoolEntry()) {
2218 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2219 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2220 } else {
2221 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2222 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2223 }
2224 return Result;
2225 }
2226
2227 SDValue
2228 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
2229 {
2230 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2231 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
2232 return Result;
2233 }
2234
2235 /// LowerFORMAL_ARGUMENTS - transform physical registers into
2236 /// virtual registers and generate load operations for
2237 /// arguments places on the stack.
2238 /// TODO: isVarArg, hasStructRet, isMemReg
2239 SDValue
2240 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
2241 CallingConv::ID CallConv,
2242 bool isVarArg,
2243 const SmallVectorImpl<ISD::InputArg> &Ins,
2244 DebugLoc dl,
2245 SelectionDAG &DAG,
2246 SmallVectorImpl<SDValue> &InVals)
2247 const
2248 {
2249
2250 MachineFunction &MF = DAG.getMachineFunction();
2251 MachineFrameInfo *MFI = MF.getFrameInfo();
2252 //const Function *Fn = MF.getFunction();
2253 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
2254
2255 SmallVector<CCValAssign, 16> ArgLocs;
2256 CallingConv::ID CC = MF.getFunction()->getCallingConv();
2257 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
2258
2259 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
2260 getTargetMachine(), ArgLocs, *DAG.getContext());
2261
2262 // When more calling conventions are added, they need to be chosen here
2263 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
2264 SDValue StackPtr;
2265
2266 //unsigned int FirstStackArgLoc = 0;
2267
2268 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2269 CCValAssign &VA = ArgLocs[i];
2270 if (VA.isRegLoc()) {
2271 EVT RegVT = VA.getLocVT();
2272 const TargetRegisterClass *RC = getRegClassFromType(
2273 RegVT.getSimpleVT().SimpleTy);
2274
2275 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
2276 SDValue ArgValue = DAG.getCopyFromReg(
2277 Chain,
2278 dl,
2279 Reg,
2280 RegVT);
2281 // If this is an 8 or 16-bit value, it is really passed
2282 // promoted to 32 bits. Insert an assert[sz]ext to capture
2283 // this, then truncate to the right size.
2284
2285 if (VA.getLocInfo() == CCValAssign::SExt) {
2286 ArgValue = DAG.getNode(
2287 ISD::AssertSext,
2288 dl,
2289 RegVT,
2290 ArgValue,
2291 DAG.getValueType(VA.getValVT()));
2292 } else if (VA.getLocInfo() == CCValAssign::ZExt) {
2293 ArgValue = DAG.getNode(
2294 ISD::AssertZext,
2295 dl,
2296 RegVT,
2297 ArgValue,
2298 DAG.getValueType(VA.getValVT()));
2299 }
2300 if (VA.getLocInfo() != CCValAssign::Full) {
2301 ArgValue = DAG.getNode(
2302 ISD::TRUNCATE,
2303 dl,
2304 VA.getValVT(),
2305 ArgValue);
2306 }
2307 // Add the value to the list of arguments
2308 // to be passed in registers
2309 InVals.push_back(ArgValue);
2310 if (isVarArg) {
2311 assert(0 && "Variable arguments are not yet supported");
2312 // See MipsISelLowering.cpp for ideas on how to implement
2313 }
2314 } else if(VA.isMemLoc()) {
2315 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
2316 dl, DAG, VA, MFI, i));
2317 } else {
2318 assert(0 && "found a Value Assign that is "
2319 "neither a register or a memory location");
2320 }
2321 }
2322 /*if (hasStructRet) {
2323 assert(0 && "Has struct return is not yet implemented");
2324 // See MipsISelLowering.cpp for ideas on how to implement
2325 }*/
2326
2327 if (isVarArg) {
2328 assert(0 && "Variable arguments are not yet supported");
2329 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
2330 }
2331 // This needs to be changed to non-zero if the return function needs
2332 // to pop bytes
2333 return Chain;
2334 }
2335 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2336 /// by "Src" to address "Dst" with size and alignment information specified by
2337 /// the specific parameter attribute. The copy will be passed as a byval
2338 /// function parameter.
2339 static SDValue
2340 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2341 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
2342 assert(0 && "MemCopy does not exist yet");
2343 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2344
2345 return DAG.getMemcpy(Chain,
2346 Src.getDebugLoc(),
2347 Dst, Src, SizeNode, Flags.getByValAlign(),
2348 /*IsVol=*/false, /*AlwaysInline=*/true,
2349 MachinePointerInfo(), MachinePointerInfo());
2350 }
2351
2352 SDValue
2353 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
2354 SDValue StackPtr, SDValue Arg,
2355 DebugLoc dl, SelectionDAG &DAG,
2356 const CCValAssign &VA,
2357 ISD::ArgFlagsTy Flags) const
2358 {
2359 unsigned int LocMemOffset = VA.getLocMemOffset();
2360 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2361 PtrOff = DAG.getNode(ISD::ADD,
2362 dl,
2363 getPointerTy(), StackPtr, PtrOff);
2364 if (Flags.isByVal()) {
2365 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
2366 } else {
2367 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
2368 MachinePointerInfo::getStack(LocMemOffset),
2369 false, false, 0);
2370 }
2371 return PtrOff;
2372 }
2373 /// LowerCAL - functions arguments are copied from virtual
2374 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
2375 /// CALLSEQ_END are emitted.
2376 /// TODO: isVarArg, isTailCall, hasStructRet
2377 SDValue
2378 AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
2379 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
2380 bool& isTailCall,
2381 const SmallVectorImpl<ISD::OutputArg> &Outs,
2382 const SmallVectorImpl<SDValue> &OutVals,
2383 const SmallVectorImpl<ISD::InputArg> &Ins,
2384 DebugLoc dl, SelectionDAG &DAG,
2385 SmallVectorImpl<SDValue> &InVals)
2386 const
2387 {
2388 isTailCall = false;
2389 MachineFunction& MF = DAG.getMachineFunction();
2390 // FIXME: DO we need to handle fast calling conventions and tail call
2391 // optimizations?? X86/PPC ISelLowering
2392 /*bool hasStructRet = (TheCall->getNumArgs())
2393 ? TheCall->getArgFlags(0).device()->isSRet()
2394 : false;*/
2395
2396 MachineFrameInfo *MFI = MF.getFrameInfo();
2397
2398 // Analyze operands of the call, assigning locations to each operand
2399 SmallVector<CCValAssign, 16> ArgLocs;
2400 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2401 getTargetMachine(), ArgLocs, *DAG.getContext());
2402 // Analyize the calling operands, but need to change
2403 // if we have more than one calling convetion
2404 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
2405
2406 unsigned int NumBytes = CCInfo.getNextStackOffset();
2407 if (isTailCall) {
2408 assert(isTailCall && "Tail Call not handled yet!");
2409 // See X86/PPC ISelLowering
2410 }
2411
2412 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2413
2414 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
2415 SmallVector<SDValue, 8> MemOpChains;
2416 SDValue StackPtr;
2417 //unsigned int FirstStacArgLoc = 0;
2418 //int LastArgStackLoc = 0;
2419
2420 // Walk the register/memloc assignments, insert copies/loads
2421 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2422 CCValAssign &VA = ArgLocs[i];
2423 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
2424 // Arguments start after the 5 first operands of ISD::CALL
2425 SDValue Arg = OutVals[i];
2426 //Promote the value if needed
2427 switch(VA.getLocInfo()) {
2428 default: assert(0 && "Unknown loc info!");
2429 case CCValAssign::Full:
2430 break;
2431 case CCValAssign::SExt:
2432 Arg = DAG.getNode(ISD::SIGN_EXTEND,
2433 dl,
2434 VA.getLocVT(), Arg);
2435 break;
2436 case CCValAssign::ZExt:
2437 Arg = DAG.getNode(ISD::ZERO_EXTEND,
2438 dl,
2439 VA.getLocVT(), Arg);
2440 break;
2441 case CCValAssign::AExt:
2442 Arg = DAG.getNode(ISD::ANY_EXTEND,
2443 dl,
2444 VA.getLocVT(), Arg);
2445 break;
2446 }
2447
2448 if (VA.isRegLoc()) {
2449 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2450 } else if (VA.isMemLoc()) {
2451 // Create the frame index object for this incoming parameter
2452 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
2453 VA.getLocMemOffset(), true);
2454 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
2455
2456 // emit ISD::STORE whichs stores the
2457 // parameter value to a stack Location
2458 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
2459 MachinePointerInfo::getFixedStack(FI),
2460 false, false, 0));
2461 } else {
2462 assert(0 && "Not a Reg/Mem Loc, major error!");
2463 }
2464 }
2465 if (!MemOpChains.empty()) {
2466 Chain = DAG.getNode(ISD::TokenFactor,
2467 dl,
2468 MVT::Other,
2469 &MemOpChains[0],
2470 MemOpChains.size());
2471 }
2472 SDValue InFlag;
2473 if (!isTailCall) {
2474 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2475 Chain = DAG.getCopyToReg(Chain,
2476 dl,
2477 RegsToPass[i].first,
2478 RegsToPass[i].second,
2479 InFlag);
2480 InFlag = Chain.getValue(1);
2481 }
2482 }
2483
2484 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
2485 // every direct call is) turn it into a TargetGlobalAddress/
2486 // TargetExternalSymbol
2487 // node so that legalize doesn't hack it.
2488 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2489 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
2490 }
2491 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2492 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
2493 }
2494 else if (isTailCall) {
2495 assert(0 && "Tail calls are not handled yet");
2496 // see X86 ISelLowering for ideas on implementation: 1708
2497 }
2498
2499 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
2500 SmallVector<SDValue, 8> Ops;
2501
2502 if (isTailCall) {
2503 assert(0 && "Tail calls are not handled yet");
2504 // see X86 ISelLowering for ideas on implementation: 1721
2505 }
2506 // If this is a direct call, pass the chain and the callee
2507 if (Callee.getNode()) {
2508 Ops.push_back(Chain);
2509 Ops.push_back(Callee);
2510 }
2511
2512 if (isTailCall) {
2513 assert(0 && "Tail calls are not handled yet");
2514 // see X86 ISelLowering for ideas on implementation: 1739
2515 }
2516
2517 // Add argument registers to the end of the list so that they are known
2518 // live into the call
2519 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2520 Ops.push_back(DAG.getRegister(
2521 RegsToPass[i].first,
2522 RegsToPass[i].second.getValueType()));
2523 }
2524 if (InFlag.getNode()) {
2525 Ops.push_back(InFlag);
2526 }
2527
2528 // Emit Tail Call
2529 if (isTailCall) {
2530 assert(0 && "Tail calls are not handled yet");
2531 // see X86 ISelLowering for ideas on implementation: 1762
2532 }
2533
2534 Chain = DAG.getNode(AMDILISD::CALL,
2535 dl,
2536 NodeTys, &Ops[0], Ops.size());
2537 InFlag = Chain.getValue(1);
2538
2539 // Create the CALLSEQ_END node
2540 Chain = DAG.getCALLSEQ_END(
2541 Chain,
2542 DAG.getIntPtrConstant(NumBytes, true),
2543 DAG.getIntPtrConstant(0, true),
2544 InFlag);
2545 InFlag = Chain.getValue(1);
2546 // Handle result values, copying them out of physregs into vregs that
2547 // we return
2548 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2549 InVals);
2550 }
2551 static void checkMADType(
2552 SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
2553 {
2554 bool globalLoadStore = false;
2555 is24bitMAD = false;
2556 is32bitMAD = false;
2557 return;
2558 assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
2559 "this to work correctly!");
2560 if (Op.getNode()->use_empty()) {
2561 return;
2562 }
2563 for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
2564 nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
2565 SDNode *ptr = *nBegin;
2566 const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
2567 // If we are not a LSBaseSDNode then we don't do this
2568 // optimization.
2569 // If we are a LSBaseSDNode, but the op is not the offset
2570 // or base pointer, then we don't do this optimization
2571 // (i.e. we are the value being stored)
2572 if (!lsNode ||
2573 (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
2574 return;
2575 }
2576 const PointerType *PT =
2577 dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
2578 unsigned as = PT->getAddressSpace();
2579 switch(as) {
2580 default:
2581 globalLoadStore = true;
2582 case AMDILAS::PRIVATE_ADDRESS:
2583 if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
2584 globalLoadStore = true;
2585 }
2586 break;
2587 case AMDILAS::CONSTANT_ADDRESS:
2588 if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
2589 globalLoadStore = true;
2590 }
2591 break;
2592 case AMDILAS::LOCAL_ADDRESS:
2593 if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
2594 globalLoadStore = true;
2595 }
2596 break;
2597 case AMDILAS::REGION_ADDRESS:
2598 if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
2599 globalLoadStore = true;
2600 }
2601 break;
2602 }
2603 }
2604 if (globalLoadStore) {
2605 is32bitMAD = true;
2606 } else {
2607 is24bitMAD = true;
2608 }
2609 }
2610
2611 SDValue
2612 AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
2613 {
2614 SDValue LHS = Op.getOperand(0);
2615 SDValue RHS = Op.getOperand(1);
2616 DebugLoc DL = Op.getDebugLoc();
2617 EVT OVT = Op.getValueType();
2618 SDValue DST;
2619 const AMDILSubtarget *stm = &this->getTargetMachine()
2620 .getSubtarget<AMDILSubtarget>();
2621 bool isVec = OVT.isVector();
2622 if (OVT.getScalarType() == MVT::i64) {
2623 MVT INTTY = MVT::i32;
2624 if (OVT == MVT::v2i64) {
2625 INTTY = MVT::v2i32;
2626 }
2627 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
2628 && INTTY == MVT::i32) {
2629 DST = DAG.getNode(AMDILISD::ADD,
2630 DL,
2631 OVT,
2632 LHS, RHS);
2633 } else {
2634 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2635 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2636 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2637 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2638 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2639 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2640 INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
2641 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
2642 SDValue cmp;
2643 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2644 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2645 INTLO, RHSLO);
2646 cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
2647 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2648 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2649 INTLO, INTHI);
2650 }
2651 } else {
2652 if (LHS.getOpcode() == ISD::FrameIndex ||
2653 RHS.getOpcode() == ISD::FrameIndex) {
2654 DST = DAG.getNode(AMDILISD::ADDADDR,
2655 DL,
2656 OVT,
2657 LHS, RHS);
2658 } else {
2659 if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2660 && LHS.getNumOperands()
2661 && RHS.getNumOperands()) {
2662 bool is24bitMAD = false;
2663 bool is32bitMAD = false;
2664 const ConstantSDNode *LHSConstOpCode =
2665 dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
2666 const ConstantSDNode *RHSConstOpCode =
2667 dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
2668 if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
2669 || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
2670 || LHS.getOpcode() == ISD::MUL
2671 || RHS.getOpcode() == ISD::MUL) {
2672 SDValue Op1, Op2, Op3;
2673 // FIXME: Fix this so that it works for unsigned 24bit ops.
2674 if (LHS.getOpcode() == ISD::MUL) {
2675 Op1 = LHS.getOperand(0);
2676 Op2 = LHS.getOperand(1);
2677 Op3 = RHS;
2678 } else if (RHS.getOpcode() == ISD::MUL) {
2679 Op1 = RHS.getOperand(0);
2680 Op2 = RHS.getOperand(1);
2681 Op3 = LHS;
2682 } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
2683 Op1 = LHS.getOperand(0);
2684 Op2 = DAG.getConstant(
2685 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
2686 Op3 = RHS;
2687 } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
2688 Op1 = RHS.getOperand(0);
2689 Op2 = DAG.getConstant(
2690 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
2691 Op3 = LHS;
2692 }
2693 checkMADType(Op, stm, is24bitMAD, is32bitMAD);
2694 // We can possibly do a MAD transform!
2695 if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
2696 uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
2697 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2698 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2699 DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
2700 Op1, Op2, Op3);
2701 } else if(is32bitMAD) {
2702 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2703 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2704 DL, Tys, DAG.getEntryNode(),
2705 DAG.getConstant(
2706 AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
2707 Op1, Op2, Op3);
2708 }
2709 }
2710 }
2711 DST = DAG.getNode(AMDILISD::ADD,
2712 DL,
2713 OVT,
2714 LHS, RHS);
2715 }
2716 }
2717 return DST;
2718 }
2719 SDValue
2720 AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
2721 uint32_t bits) const
2722 {
2723 DebugLoc DL = Op.getDebugLoc();
2724 EVT INTTY = Op.getValueType();
2725 EVT FPTY;
2726 if (INTTY.isVector()) {
2727 FPTY = EVT(MVT::getVectorVT(MVT::f32,
2728 INTTY.getVectorNumElements()));
2729 } else {
2730 FPTY = EVT(MVT::f32);
2731 }
2732 /* static inline uint
2733 __clz_Nbit(uint x)
2734 {
2735 int xor = 0x3f800000U | x;
2736 float tp = as_float(xor);
2737 float t = tp + -1.0f;
2738 uint tint = as_uint(t);
2739 int cmp = (x != 0);
2740 uint tsrc = tint >> 23;
2741 uint tmask = tsrc & 0xffU;
2742 uint cst = (103 + N)U - tmask;
2743 return cmp ? cst : N;
2744 }
2745 */
2746 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
2747 && "genCLZu16 only works on 32bit types");
2748 // uint x = Op
2749 SDValue x = Op;
2750 // xornode = 0x3f800000 | x
2751 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
2752 DAG.getConstant(0x3f800000, INTTY), x);
2753 // float tp = as_float(xornode)
2754 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
2755 // float t = tp + -1.0f
2756 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
2757 DAG.getConstantFP(-1.0f, FPTY));
2758 // uint tint = as_uint(t)
2759 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
2760 // int cmp = (x != 0)
2761 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2762 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
2763 DAG.getConstant(0, INTTY));
2764 // uint tsrc = tint >> 23
2765 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
2766 DAG.getConstant(23, INTTY));
2767 // uint tmask = tsrc & 0xFF
2768 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
2769 DAG.getConstant(0xFFU, INTTY));
2770 // uint cst = (103 + bits) - tmask
2771 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
2772 DAG.getConstant((103U + bits), INTTY), tmask);
2773 // return cmp ? cst : N
2774 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
2775 DAG.getConstant(bits, INTTY));
2776 return cst;
2777 }
2778
2779 SDValue
2780 AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
2781 {
2782 SDValue DST = SDValue();
2783 DebugLoc DL = Op.getDebugLoc();
2784 EVT INTTY = Op.getValueType();
2785 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2786 &this->getTargetMachine())->getSubtargetImpl();
2787 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2788 //__clz_32bit(uint u)
2789 //{
2790 // int z = __amdil_ffb_hi(u) ;
2791 // return z < 0 ? 32 : z;
2792 // }
2793 // uint u = op
2794 SDValue u = Op;
2795 // int z = __amdil_ffb_hi(u)
2796 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
2797 // int cmp = z < 0
2798 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2799 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2800 z, DAG.getConstant(0, INTTY));
2801 // return cmp ? 32 : z
2802 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2803 DAG.getConstant(32, INTTY), z);
2804 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2805 // static inline uint
2806 //__clz_32bit(uint x)
2807 //{
2808 // uint zh = __clz_16bit(x >> 16);
2809 // uint zl = __clz_16bit(x & 0xffffU);
2810 // return zh == 16U ? 16U + zl : zh;
2811 //}
2812 // uint x = Op
2813 SDValue x = Op;
2814 // uint xs16 = x >> 16
2815 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2816 DAG.getConstant(16, INTTY));
2817 // uint zh = __clz_16bit(xs16)
2818 SDValue zh = genCLZuN(xs16, DAG, 16);
2819 // uint xa16 = x & 0xFFFF
2820 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2821 DAG.getConstant(0xFFFFU, INTTY));
2822 // uint zl = __clz_16bit(xa16)
2823 SDValue zl = genCLZuN(xa16, DAG, 16);
2824 // uint cmp = zh == 16U
2825 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2826 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2827 zh, DAG.getConstant(16U, INTTY));
2828 // uint zl16 = zl + 16
2829 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2830 DAG.getConstant(16, INTTY), zl);
2831 // return cmp ? zl16 : zh
2832 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2833 cmp, zl16, zh);
2834 } else {
2835 assert(0 && "Attempting to generate a CLZ function with an"
2836 " unknown graphics card");
2837 }
2838 return DST;
2839 }
2840 SDValue
2841 AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2842 {
2843 SDValue DST = SDValue();
2844 DebugLoc DL = Op.getDebugLoc();
2845 EVT INTTY;
2846 EVT LONGTY = Op.getValueType();
2847 bool isVec = LONGTY.isVector();
2848 if (isVec) {
2849 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2850 .getVectorNumElements()));
2851 } else {
2852 INTTY = EVT(MVT::i32);
2853 }
2854 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2855 &this->getTargetMachine())->getSubtargetImpl();
2856 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2857 // Evergreen:
2858 // static inline uint
2859 // __clz_u64(ulong x)
2860 // {
2861 //uint zhi = __clz_32bit((uint)(x >> 32));
2862 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2863 //return zhi == 32U ? 32U + zlo : zhi;
2864 //}
2865 //ulong x = op
2866 SDValue x = Op;
2867 // uint xhi = x >> 32
2868 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2869 // uint xlo = x & 0xFFFFFFFF
2870 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2871 // uint zhi = __clz_32bit(xhi)
2872 SDValue zhi = genCLZu32(xhi, DAG);
2873 // uint zlo = __clz_32bit(xlo)
2874 SDValue zlo = genCLZu32(xlo, DAG);
2875 // uint cmp = zhi == 32
2876 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2877 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2878 zhi, DAG.getConstant(32U, INTTY));
2879 // uint zlop32 = 32 + zlo
2880 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2881 DAG.getConstant(32U, INTTY), zlo);
2882 // return cmp ? zlop32: zhi
2883 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2884 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2885 // HD4XXX:
2886 // static inline uint
2887 //__clz_64bit(ulong x)
2888 //{
2889 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2890 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2891 //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2892 //uint r = zh == 18U ? 18U + zm : zh;
2893 //return zh + zm == 41U ? 41U + zl : r;
2894 //}
2895 //ulong x = Op
2896 SDValue x = Op;
2897 // ulong xs46 = x >> 46
2898 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2899 DAG.getConstant(46, LONGTY));
2900 // uint ixs46 = (uint)xs46
2901 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2902 // ulong xs23 = x >> 23
2903 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2904 DAG.getConstant(23, LONGTY));
2905 // uint ixs23 = (uint)xs23
2906 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2907 // uint xs23m23 = ixs23 & 0x7FFFFF
2908 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2909 DAG.getConstant(0x7fffffU, INTTY));
2910 // uint ix = (uint)x
2911 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2912 // uint xm23 = ix & 0x7FFFFF
2913 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2914 DAG.getConstant(0x7fffffU, INTTY));
2915 // uint zh = __clz_23bit(ixs46)
2916 SDValue zh = genCLZuN(ixs46, DAG, 23);
2917 // uint zm = __clz_23bit(xs23m23)
2918 SDValue zm = genCLZuN(xs23m23, DAG, 23);
2919 // uint zl = __clz_23bit(xm23)
2920 SDValue zl = genCLZuN(xm23, DAG, 23);
2921 // uint zhm5 = zh - 5
2922 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2923 DAG.getConstant(-5U, INTTY));
2924 SDValue const18 = DAG.getConstant(18, INTTY);
2925 SDValue const41 = DAG.getConstant(41, INTTY);
2926 // uint cmp1 = zh = 18
2927 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2928 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2929 zhm5, const18);
2930 // uint zhm5zm = zhm5 + zh
2931 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2932 // uint cmp2 = zhm5zm == 41
2933 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2934 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2935 zhm5zm, const41);
2936 // uint zmp18 = zhm5 + 18
2937 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2938 // uint zlp41 = zl + 41
2939 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2940 // uint r = cmp1 ? zmp18 : zh
2941 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2942 cmp1, zmp18, zhm5);
2943 // return cmp2 ? zlp41 : r
2944 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2945 } else {
2946 assert(0 && "Attempting to generate a CLZ function with an"
2947 " unknown graphics card");
2948 }
2949 return DST;
2950 }
2951 SDValue
2952 AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2953 bool includeSign) const
2954 {
2955 EVT INTVT;
2956 EVT LONGVT;
2957 SDValue DST;
2958 DebugLoc DL = RHS.getDebugLoc();
2959 EVT RHSVT = RHS.getValueType();
2960 bool isVec = RHSVT.isVector();
2961 if (isVec) {
2962 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2963 .getVectorNumElements()));
2964 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2965 .getVectorNumElements()));
2966 } else {
2967 LONGVT = EVT(MVT::i64);
2968 INTVT = EVT(MVT::i32);
2969 }
2970 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2971 &this->getTargetMachine())->getSubtargetImpl();
2972 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2973 // unsigned version:
2974 // uint uhi = (uint)(d * 0x1.0p-32);
2975 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2976 // return as_ulong2((uint2)(ulo, uhi));
2977 //
2978 // signed version:
2979 // double ad = fabs(d);
2980 // long l = unsigned_version(ad);
2981 // long nl = -l;
2982 // return d == ad ? l : nl;
2983 SDValue d = RHS;
2984 if (includeSign) {
2985 d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2986 }
2987 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2988 DAG.getConstantFP(0x2f800000, RHSVT));
2989 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2990 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2991 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2992 DAG.getConstantFP(0xcf800000, RHSVT), d);
2993 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2994 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2995 if (includeSign) {
2996 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2997 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2998 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2999 RHS, d);
3000 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
3001 }
3002 DST = l;
3003 } else {
3004 /*
3005 __attribute__((always_inline)) long
3006 cast_f64_to_i64(double d)
3007 {
3008 // Convert d in to 32-bit components
3009 long x = as_long(d);
3010 xhi = LCOMPHI(x);
3011 xlo = LCOMPLO(x);
3012
3013 // Generate 'normalized' mantissa
3014 mhi = xhi | 0x00100000; // hidden bit
3015 mhi <<= 11;
3016 temp = xlo >> (32 - 11);
3017 mhi |= temp
3018 mlo = xlo << 11;
3019
3020 // Compute shift right count from exponent
3021 e = (xhi >> (52-32)) & 0x7ff;
3022 sr = 1023 + 63 - e;
3023 srge64 = sr >= 64;
3024 srge32 = sr >= 32;
3025
3026 // Compute result for 0 <= sr < 32
3027 rhi0 = mhi >> (sr &31);
3028 rlo0 = mlo >> (sr &31);
3029 temp = mhi << (32 - sr);
3030 temp |= rlo0;
3031 rlo0 = sr ? temp : rlo0;
3032
3033 // Compute result for 32 <= sr
3034 rhi1 = 0;
3035 rlo1 = srge64 ? 0 : rhi0;
3036
3037 // Pick between the 2 results
3038 rhi = srge32 ? rhi1 : rhi0;
3039 rlo = srge32 ? rlo1 : rlo0;
3040
3041 // Optional saturate on overflow
3042 srlt0 = sr < 0;
3043 rhi = srlt0 ? MAXVALUE : rhi;
3044 rlo = srlt0 ? MAXVALUE : rlo;
3045
3046 // Create long
3047 res = LCREATE( rlo, rhi );
3048
3049 // Deal with sign bit (ignoring whether result is signed or unsigned value)
3050 if (includeSign) {
3051 sign = ((signed int) xhi) >> 31; fill with sign bit
3052 sign = LCREATE( sign, sign );
3053 res += sign;
3054 res ^= sign;
3055 }
3056
3057 return res;
3058 }
3059 */
3060 SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3061 SDValue c32 = DAG.getConstant( 32, INTVT );
3062
3063 // Convert d in to 32-bit components
3064 SDValue d = RHS;
3065 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3066 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3067 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3068
3069 // Generate 'normalized' mantissa
3070 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3071 xhi, DAG.getConstant( 0x00100000, INTVT ) );
3072 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3073 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3074 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3075 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3076 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
3077
3078 // Compute shift right count from exponent
3079 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3080 xhi, DAG.getConstant( 52-32, INTVT ) );
3081 e = DAG.getNode( ISD::AND, DL, INTVT,
3082 e, DAG.getConstant( 0x7ff, INTVT ) );
3083 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3084 DAG.getConstant( 1023 + 63, INTVT ), e );
3085 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3086 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3087 sr, DAG.getConstant(64, INTVT));
3088 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3089 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3090 sr, DAG.getConstant(32, INTVT));
3091
3092 // Compute result for 0 <= sr < 32
3093 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3094 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
3095 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
3096 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
3097 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
3098 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
3099
3100 // Compute result for 32 <= sr
3101 SDValue rhi1 = DAG.getConstant( 0, INTVT );
3102 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3103 srge64, rhi1, rhi0 );
3104
3105 // Pick between the 2 results
3106 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3107 srge32, rhi1, rhi0 );
3108 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3109 srge32, rlo1, rlo0 );
3110
3111 // Create long
3112 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3113
3114 // Deal with sign bit
3115 if (includeSign) {
3116 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3117 xhi, DAG.getConstant( 31, INTVT ) );
3118 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
3119 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
3120 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
3121 }
3122 DST = res;
3123 }
3124 return DST;
3125 }
3126 SDValue
3127 AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
3128 bool includeSign) const
3129 {
3130 EVT INTVT;
3131 EVT LONGVT;
3132 DebugLoc DL = RHS.getDebugLoc();
3133 EVT RHSVT = RHS.getValueType();
3134 bool isVec = RHSVT.isVector();
3135 if (isVec) {
3136 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3137 RHSVT.getVectorNumElements()));
3138 INTVT = EVT(MVT::getVectorVT(MVT::i32,
3139 RHSVT.getVectorNumElements()));
3140 } else {
3141 LONGVT = EVT(MVT::i64);
3142 INTVT = EVT(MVT::i32);
3143 }
3144 /*
3145 __attribute__((always_inline)) int
3146 cast_f64_to_[u|i]32(double d)
3147 {
3148 // Convert d in to 32-bit components
3149 long x = as_long(d);
3150 xhi = LCOMPHI(x);
3151 xlo = LCOMPLO(x);
3152
3153 // Generate 'normalized' mantissa
3154 mhi = xhi | 0x00100000; // hidden bit
3155 mhi <<= 11;
3156 temp = xlo >> (32 - 11);
3157 mhi |= temp
3158
3159 // Compute shift right count from exponent
3160 e = (xhi >> (52-32)) & 0x7ff;
3161 sr = 1023 + 31 - e;
3162 srge32 = sr >= 32;
3163
3164 // Compute result for 0 <= sr < 32
3165 res = mhi >> (sr &31);
3166 res = srge32 ? 0 : res;
3167
3168 // Optional saturate on overflow
3169 srlt0 = sr < 0;
3170 res = srlt0 ? MAXVALUE : res;
3171
3172 // Deal with sign bit (ignoring whether result is signed or unsigned value)
3173 if (includeSign) {
3174 sign = ((signed int) xhi) >> 31; fill with sign bit
3175 res += sign;
3176 res ^= sign;
3177 }
3178
3179 return res;
3180 }
3181 */
3182 SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3183
3184 // Convert d in to 32-bit components
3185 SDValue d = RHS;
3186 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3187 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3188 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3189
3190 // Generate 'normalized' mantissa
3191 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3192 xhi, DAG.getConstant( 0x00100000, INTVT ) );
3193 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3194 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3195 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3196 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3197
3198 // Compute shift right count from exponent
3199 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3200 xhi, DAG.getConstant( 52-32, INTVT ) );
3201 e = DAG.getNode( ISD::AND, DL, INTVT,
3202 e, DAG.getConstant( 0x7ff, INTVT ) );
3203 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3204 DAG.getConstant( 1023 + 31, INTVT ), e );
3205 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3206 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3207 sr, DAG.getConstant(32, INTVT));
3208
3209 // Compute result for 0 <= sr < 32
3210 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3211 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3212 srge32, DAG.getConstant(0,INTVT), res );
3213
3214 // Deal with sign bit
3215 if (includeSign) {
3216 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3217 xhi, DAG.getConstant( 31, INTVT ) );
3218 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
3219 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
3220 }
3221 return res;
3222 }
3223 SDValue
3224 AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
3225 {
3226 SDValue RHS = Op.getOperand(0);
3227 EVT RHSVT = RHS.getValueType();
3228 MVT RST = RHSVT.getScalarType().getSimpleVT();
3229 EVT LHSVT = Op.getValueType();
3230 MVT LST = LHSVT.getScalarType().getSimpleVT();
3231 DebugLoc DL = Op.getDebugLoc();
3232 SDValue DST;
3233 const AMDILTargetMachine*
3234 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3235 (&this->getTargetMachine());
3236 const AMDILSubtarget*
3237 stm = static_cast<const AMDILSubtarget*>(
3238 amdtm->getSubtargetImpl());
3239 if (RST == MVT::f64 && RHSVT.isVector()
3240 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3241 // We dont support vector 64bit floating point convertions.
3242 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3243 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3244 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3245 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3246 if (!x) {
3247 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3248 } else {
3249 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3250 DST, op, DAG.getTargetConstant(x, MVT::i32));
3251 }
3252 }
3253 } else {
3254 if (RST == MVT::f64
3255 && LST == MVT::i32) {
3256 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3257 DST = SDValue(Op.getNode(), 0);
3258 } else {
3259 DST = genf64toi32(RHS, DAG, true);
3260 }
3261 } else if (RST == MVT::f64
3262 && LST == MVT::i64) {
3263 DST = genf64toi64(RHS, DAG, true);
3264 } else if (RST == MVT::f64
3265 && (LST == MVT::i8 || LST == MVT::i16)) {
3266 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3267 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3268 } else {
3269 SDValue ToInt = genf64toi32(RHS, DAG, true);
3270 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3271 }
3272
3273 } else {
3274 DST = SDValue(Op.getNode(), 0);
3275 }
3276 }
3277 return DST;
3278 }
3279
3280 SDValue
3281 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
3282 {
3283 SDValue DST;
3284 SDValue RHS = Op.getOperand(0);
3285 EVT RHSVT = RHS.getValueType();
3286 MVT RST = RHSVT.getScalarType().getSimpleVT();
3287 EVT LHSVT = Op.getValueType();
3288 MVT LST = LHSVT.getScalarType().getSimpleVT();
3289 DebugLoc DL = Op.getDebugLoc();
3290 const AMDILTargetMachine*
3291 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3292 (&this->getTargetMachine());
3293 const AMDILSubtarget*
3294 stm = static_cast<const AMDILSubtarget*>(
3295 amdtm->getSubtargetImpl());
3296 if (RST == MVT::f64 && RHSVT.isVector()
3297 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3298 // We dont support vector 64bit floating point convertions.
3299 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3300 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3301 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3302 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3303 if (!x) {
3304 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3305 } else {
3306 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3307 DST, op, DAG.getTargetConstant(x, MVT::i32));
3308 }
3309
3310 }
3311 } else {
3312 if (RST == MVT::f64
3313 && LST == MVT::i32) {
3314 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3315 DST = SDValue(Op.getNode(), 0);
3316 } else {
3317 DST = genf64toi32(RHS, DAG, false);
3318 }
3319 } else if (RST == MVT::f64
3320 && LST == MVT::i64) {
3321 DST = genf64toi64(RHS, DAG, false);
3322 } else if (RST == MVT::f64
3323 && (LST == MVT::i8 || LST == MVT::i16)) {
3324 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3325 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3326 } else {
3327 SDValue ToInt = genf64toi32(RHS, DAG, false);
3328 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3329 }
3330
3331 } else {
3332 DST = SDValue(Op.getNode(), 0);
3333 }
3334 }
3335 return DST;
3336 }
3337 SDValue
3338 AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
3339 SelectionDAG &DAG) const
3340 {
3341 EVT RHSVT = RHS.getValueType();
3342 DebugLoc DL = RHS.getDebugLoc();
3343 EVT INTVT;
3344 EVT LONGVT;
3345 bool isVec = RHSVT.isVector();
3346 if (isVec) {
3347 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3348 RHSVT.getVectorNumElements()));
3349 INTVT = EVT(MVT::getVectorVT(MVT::i32,
3350 RHSVT.getVectorNumElements()));
3351 } else {
3352 LONGVT = EVT(MVT::i64);
3353 INTVT = EVT(MVT::i32);
3354 }
3355 SDValue x = RHS;
3356 const AMDILTargetMachine*
3357 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3358 (&this->getTargetMachine());
3359 const AMDILSubtarget*
3360 stm = static_cast<const AMDILSubtarget*>(
3361 amdtm->getSubtargetImpl());
3362 if (stm->calVersion() >= CAL_VERSION_SC_135) {
3363 // unsigned x = RHS;
3364 // ulong xd = (ulong)(0x4330_0000 << 32) | x;
3365 // double d = as_double( xd );
3366 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
3367 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
3368 DAG.getConstant( 0x43300000, INTVT ) );
3369 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3370 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
3371 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
3372 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
3373 } else {
3374 SDValue clz = genCLZu32(x, DAG);
3375
3376 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
3377 // Except for an input 0... which requires a 0 exponent
3378 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3379 DAG.getConstant( (1023+31), INTVT), clz );
3380 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
3381
3382 // Normalize frac
3383 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
3384
3385 // Eliminate hidden bit
3386 rhi = DAG.getNode( ISD::AND, DL, INTVT,
3387 rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3388
3389 // Pack exponent and frac
3390 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
3391 rhi, DAG.getConstant( (32 - 11), INTVT ) );
3392 rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3393 rhi, DAG.getConstant( 11, INTVT ) );
3394 exp = DAG.getNode( ISD::SHL, DL, INTVT,
3395 exp, DAG.getConstant( 20, INTVT ) );
3396 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3397
3398 // Convert 2 x 32 in to 1 x 64, then to double precision float type
3399 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3400 return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3401 }
3402 }
3403 SDValue
3404 AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
3405 SelectionDAG &DAG) const
3406 {
3407 EVT RHSVT = RHS.getValueType();
3408 DebugLoc DL = RHS.getDebugLoc();
3409 EVT INTVT;
3410 EVT LONGVT;
3411 bool isVec = RHSVT.isVector();
3412 if (isVec) {
3413 INTVT = EVT(MVT::getVectorVT(MVT::i32,
3414 RHSVT.getVectorNumElements()));
3415 } else {
3416 INTVT = EVT(MVT::i32);
3417 }
3418 LONGVT = RHSVT;
3419 SDValue x = RHS;
3420 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
3421 &this->getTargetMachine())->getSubtargetImpl();
3422 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3423 // double dhi = (double)(as_uint2(x).y);
3424 // double dlo = (double)(as_uint2(x).x);
3425 // return mad(dhi, 0x1.0p+32, dlo)
3426 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
3427 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
3428 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
3429 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
3430 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
3431 DAG.getConstantFP(0x4f800000, LHSVT), dlo);
3432 } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
3433 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
3434 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
3435 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
3436 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
3437 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
3438 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3439 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
3440 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
3441 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
3442 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
3443 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
3444 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
3445 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
3446
3447 } else {
3448 SDValue clz = genCLZu64(x, DAG);
3449 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3450 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3451
3452 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
3453 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3454 DAG.getConstant( (1023+63), INTVT), clz );
3455 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
3456 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3457 mash, exp, mash ); // exp = exp, or 0 if input was 0
3458
3459 // Normalize frac
3460 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
3461 clz, DAG.getConstant( 31, INTVT ) );
3462 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
3463 DAG.getConstant( 32, INTVT ), clz31 );
3464 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
3465 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
3466 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
3467 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
3468 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3469 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3470 SDValue rlo2 = DAG.getConstant( 0, INTVT );
3471 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
3472 clz, DAG.getConstant( 32, INTVT ) );
3473 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3474 clz32, rhi2, rhi1 );
3475 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3476 clz32, rlo2, rlo1 );
3477
3478 // Eliminate hidden bit
3479 rhi = DAG.getNode( ISD::AND, DL, INTVT,
3480 rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3481
3482 // Save bits needed to round properly
3483 SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
3484 rlo, DAG.getConstant( 0x7ff, INTVT ) );
3485
3486 // Pack exponent and frac
3487 rlo = DAG.getNode( ISD::SRL, DL, INTVT,
3488 rlo, DAG.getConstant( 11, INTVT ) );
3489 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
3490 rhi, DAG.getConstant( (32 - 11), INTVT ) );
3491 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
3492 rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3493 rhi, DAG.getConstant( 11, INTVT ) );
3494 exp = DAG.getNode( ISD::SHL, DL, INTVT,
3495 exp, DAG.getConstant( 20, INTVT ) );
3496 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3497
3498 // Compute rounding bit
3499 SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
3500 rlo, DAG.getConstant( 1, INTVT ) );
3501 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
3502 round, DAG.getConstant( 0x3ff, INTVT ) );
3503 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3504 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
3505 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
3506 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
3507 round = DAG.getNode( ISD::SRL, DL, INTVT,
3508 round, DAG.getConstant( 10, INTVT ) );
3509 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
3510
3511 // Add rounding bit
3512 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
3513 round, DAG.getConstant( 0, INTVT ) );
3514 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3515 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
3516 return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3517 }
3518 }
3519 SDValue
3520 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3521 {
3522 SDValue RHS = Op.getOperand(0);
3523 EVT RHSVT = RHS.getValueType();
3524 MVT RST = RHSVT.getScalarType().getSimpleVT();
3525 EVT LHSVT = Op.getValueType();
3526 MVT LST = LHSVT.getScalarType().getSimpleVT();
3527 DebugLoc DL = Op.getDebugLoc();
3528 SDValue DST;
3529 EVT INTVT;
3530 EVT LONGVT;
3531 const AMDILTargetMachine*
3532 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3533 (&this->getTargetMachine());
3534 const AMDILSubtarget*
3535 stm = static_cast<const AMDILSubtarget*>(
3536 amdtm->getSubtargetImpl());
3537 if (LST == MVT::f64 && LHSVT.isVector()
3538 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3539 // We dont support vector 64bit floating point convertions.
3540 DST = Op;
3541 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3542 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3543 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3544 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3545 if (!x) {
3546 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3547 } else {
3548 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3549 op, DAG.getTargetConstant(x, MVT::i32));
3550 }
3551
3552 }
3553 } else {
3554
3555 if (RST == MVT::i32
3556 && LST == MVT::f64) {
3557 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3558 DST = SDValue(Op.getNode(), 0);
3559 } else {
3560 DST = genu32tof64(RHS, LHSVT, DAG);
3561 }
3562 } else if (RST == MVT::i64
3563 && LST == MVT::f64) {
3564 DST = genu64tof64(RHS, LHSVT, DAG);
3565 } else {
3566 DST = SDValue(Op.getNode(), 0);
3567 }
3568 }
3569 return DST;
3570 }
3571
3572 SDValue
3573 AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3574 {
3575 SDValue RHS = Op.getOperand(0);
3576 EVT RHSVT = RHS.getValueType();
3577 MVT RST = RHSVT.getScalarType().getSimpleVT();
3578 EVT INTVT;
3579 EVT LONGVT;
3580 SDValue DST;
3581 bool isVec = RHSVT.isVector();
3582 DebugLoc DL = Op.getDebugLoc();
3583 EVT LHSVT = Op.getValueType();
3584 MVT LST = LHSVT.getScalarType().getSimpleVT();
3585 const AMDILTargetMachine*
3586 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3587 (&this->getTargetMachine());
3588 const AMDILSubtarget*
3589 stm = static_cast<const AMDILSubtarget*>(
3590 amdtm->getSubtargetImpl());
3591 if (LST == MVT::f64 && LHSVT.isVector()
3592 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3593 // We dont support vector 64bit floating point convertions.
3594 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3595 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3596 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3597 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3598 if (!x) {
3599 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3600 } else {
3601 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3602 op, DAG.getTargetConstant(x, MVT::i32));
3603 }
3604
3605 }
3606 } else {
3607
3608 if (isVec) {
3609 LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3610 RHSVT.getVectorNumElements()));
3611 INTVT = EVT(MVT::getVectorVT(MVT::i32,
3612 RHSVT.getVectorNumElements()));
3613 } else {
3614 LONGVT = EVT(MVT::i64);
3615 INTVT = EVT(MVT::i32);
3616 }
3617 MVT RST = RHSVT.getScalarType().getSimpleVT();
3618 if ((RST == MVT::i32 || RST == MVT::i64)
3619 && LST == MVT::f64) {
3620 if (RST == MVT::i32) {
3621 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3622 DST = SDValue(Op.getNode(), 0);
3623 return DST;
3624 }
3625 }
3626 SDValue c31 = DAG.getConstant( 31, INTVT );
3627 SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
3628
3629 SDValue S; // Sign, as 0 or -1
3630 SDValue Sbit; // Sign bit, as one bit, MSB only.
3631 if (RST == MVT::i32) {
3632 Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
3633 S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
3634 } else { // 64-bit case... SRA of 64-bit values is slow
3635 SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
3636 Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
3637 SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
3638 S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
3639 }
3640
3641 // get abs() of input value, given sign as S (0 or -1)
3642 // SpI = RHS + S
3643 SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
3644 // SpIxS = SpI ^ S
3645 SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
3646
3647 // Convert unsigned value to double precision
3648 SDValue R;
3649 if (RST == MVT::i32) {
3650 // r = cast_u32_to_f64(SpIxS)
3651 R = genu32tof64(SpIxS, LHSVT, DAG);
3652 } else {
3653 // r = cast_u64_to_f64(SpIxS)
3654 R = genu64tof64(SpIxS, LHSVT, DAG);
3655 }
3656
3657 // drop in the sign bit
3658 SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
3659 SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
3660 SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
3661 thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
3662 t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
3663 DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
3664 } else {
3665 DST = SDValue(Op.getNode(), 0);
3666 }
3667 }
3668 return DST;
3669 }
3670 SDValue
3671 AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
3672 {
3673 SDValue LHS = Op.getOperand(0);
3674 SDValue RHS = Op.getOperand(1);
3675 DebugLoc DL = Op.getDebugLoc();
3676 EVT OVT = Op.getValueType();
3677 SDValue DST;
3678 bool isVec = RHS.getValueType().isVector();
3679 if (OVT.getScalarType() == MVT::i64) {
3680 /*const AMDILTargetMachine*
3681 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3682 (&this->getTargetMachine());
3683 const AMDILSubtarget*
3684 stm = dynamic_cast<const AMDILSubtarget*>(
3685 amdtm->getSubtargetImpl());*/
3686 MVT INTTY = MVT::i32;
3687 if (OVT == MVT::v2i64) {
3688 INTTY = MVT::v2i32;
3689 }
3690 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
3691 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3692 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
3693 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
3694 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
3695 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
3696 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
3697 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
3698 //TODO: need to use IBORROW on HD5XXX and later hardware
3699 SDValue cmp;
3700 if (OVT == MVT::i64) {
3701 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3702 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3703 LHSLO, RHSLO);
3704 } else {
3705 SDValue cmplo;
3706 SDValue cmphi;
3707 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3708 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
3709 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3710 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
3711 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3712 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
3713 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3714 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
3715 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3716 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3717 LHSRLO, RHSRLO);
3718 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3719 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3720 LHSRHI, RHSRHI);
3721 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
3722 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
3723 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
3724 }
3725 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
3726 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
3727 INTLO, INTHI);
3728 } else {
3729 DST = SDValue(Op.getNode(), 0);
3730 }
3731 return DST;
3732 }
3733 SDValue
3734 AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
3735 {
3736 EVT OVT = Op.getValueType();
3737 SDValue DST;
3738 if (OVT.getScalarType() == MVT::f64) {
3739 DST = LowerFDIV64(Op, DAG);
3740 } else if (OVT.getScalarType() == MVT::f32) {
3741 DST = LowerFDIV32(Op, DAG);
3742 } else {
3743 DST = SDValue(Op.getNode(), 0);
3744 }
3745 return DST;
3746 }
3747
3748 SDValue
3749 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
3750 {
3751 EVT OVT = Op.getValueType();
3752 SDValue DST;
3753 if (OVT.getScalarType() == MVT::i64) {
3754 DST = LowerSDIV64(Op, DAG);
3755 } else if (OVT.getScalarType() == MVT::i32) {
3756 DST = LowerSDIV32(Op, DAG);
3757 } else if (OVT.getScalarType() == MVT::i16
3758 || OVT.getScalarType() == MVT::i8) {
3759 DST = LowerSDIV24(Op, DAG);
3760 } else {
3761 DST = SDValue(Op.getNode(), 0);
3762 }
3763 return DST;
3764 }
3765
3766 SDValue
3767 AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
3768 {
3769 EVT OVT = Op.getValueType();
3770 SDValue DST;
3771 if (OVT.getScalarType() == MVT::i64) {
3772 DST = LowerUDIV64(Op, DAG);
3773 } else if (OVT.getScalarType() == MVT::i32) {
3774 DST = LowerUDIV32(Op, DAG);
3775 } else if (OVT.getScalarType() == MVT::i16
3776 || OVT.getScalarType() == MVT::i8) {
3777 DST = LowerUDIV24(Op, DAG);
3778 } else {
3779 DST = SDValue(Op.getNode(), 0);
3780 }
3781 return DST;
3782 }
3783
3784 SDValue
3785 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
3786 {
3787 EVT OVT = Op.getValueType();
3788 SDValue DST;
3789 if (OVT.getScalarType() == MVT::i64) {
3790 DST = LowerSREM64(Op, DAG);
3791 } else if (OVT.getScalarType() == MVT::i32) {
3792 DST = LowerSREM32(Op, DAG);
3793 } else if (OVT.getScalarType() == MVT::i16) {
3794 DST = LowerSREM16(Op, DAG);
3795 } else if (OVT.getScalarType() == MVT::i8) {
3796 DST = LowerSREM8(Op, DAG);
3797 } else {
3798 DST = SDValue(Op.getNode(), 0);
3799 }
3800 return DST;
3801 }
3802
3803 SDValue
3804 AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
3805 {
3806 EVT OVT = Op.getValueType();
3807 SDValue DST;
3808 if (OVT.getScalarType() == MVT::i64) {
3809 DST = LowerUREM64(Op, DAG);
3810 } else if (OVT.getScalarType() == MVT::i32) {
3811 DST = LowerUREM32(Op, DAG);
3812 } else if (OVT.getScalarType() == MVT::i16) {
3813 DST = LowerUREM16(Op, DAG);
3814 } else if (OVT.getScalarType() == MVT::i8) {
3815 DST = LowerUREM8(Op, DAG);
3816 } else {
3817 DST = SDValue(Op.getNode(), 0);
3818 }
3819 return DST;
3820 }
3821
3822 SDValue
3823 AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
3824 {
3825 DebugLoc DL = Op.getDebugLoc();
3826 EVT OVT = Op.getValueType();
3827 SDValue DST;
3828 bool isVec = OVT.isVector();
3829 if (OVT.getScalarType() != MVT::i64)
3830 {
3831 DST = SDValue(Op.getNode(), 0);
3832 } else {
3833 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
3834 // TODO: This needs to be turned into a tablegen pattern
3835 SDValue LHS = Op.getOperand(0);
3836 SDValue RHS = Op.getOperand(1);
3837
3838 MVT INTTY = MVT::i32;
3839 if (OVT == MVT::v2i64) {
3840 INTTY = MVT::v2i32;
3841 }
3842 // mul64(h1, l1, h0, l0)
3843 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3844 DL,
3845 INTTY, LHS);
3846 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3847 DL,
3848 INTTY, LHS);
3849 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3850 DL,
3851 INTTY, RHS);
3852 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3853 DL,
3854 INTTY, RHS);
3855 // MULLO_UINT_1 r1, h0, l1
3856 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
3857 DL,
3858 INTTY, RHSHI, LHSLO);
3859 // MULLO_UINT_1 r2, h1, l0
3860 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
3861 DL,
3862 INTTY, RHSLO, LHSHI);
3863 // ADD_INT hr, r1, r2
3864 SDValue ADDHI = DAG.getNode(ISD::ADD,
3865 DL,
3866 INTTY, RHILLO, RLOHHI);
3867 // MULHI_UINT_1 r3, l1, l0
3868 SDValue RLOLLO = DAG.getNode(ISD::MULHU,
3869 DL,
3870 INTTY, RHSLO, LHSLO);
3871 // ADD_INT hr, hr, r3
3872 SDValue HIGH = DAG.getNode(ISD::ADD,
3873 DL,
3874 INTTY, ADDHI, RLOLLO);
3875 // MULLO_UINT_1 l3, l1, l0
3876 SDValue LOW = DAG.getNode(AMDILISD::UMUL,
3877 DL,
3878 INTTY, LHSLO, RHSLO);
3879 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
3880 DL,
3881 OVT, LOW, HIGH);
3882 }
3883 return DST;
3884 }
3885 SDValue
3886 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
3887 {
3888 EVT VT = Op.getValueType();
3889 SDValue Nodes1;
3890 SDValue second;
3891 SDValue third;
3892 SDValue fourth;
3893 DebugLoc DL = Op.getDebugLoc();
3894 Nodes1 = DAG.getNode(AMDILISD::VBUILD,
3895 DL,
3896 VT, Op.getOperand(0));
3897 #if 0
3898 bool allEqual = true;
3899 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
3900 if (Op.getOperand(0) != Op.getOperand(x)) {
3901 allEqual = false;
3902 break;
3903 }
3904 }
3905 if (allEqual) {
3906 return Nodes1;
3907 }
3908 #endif
3909 switch(Op.getNumOperands()) {
3910 default:
3911 case 1:
3912 break;
3913 case 4:
3914 fourth = Op.getOperand(3);
3915 if (fourth.getOpcode() != ISD::UNDEF) {
3916 Nodes1 = DAG.getNode(
3917 ISD::INSERT_VECTOR_ELT,
3918 DL,
3919 Op.getValueType(),
3920 Nodes1,
3921 fourth,
3922 DAG.getConstant(7, MVT::i32));
3923 }
3924 case 3:
3925 third = Op.getOperand(2);
3926 if (third.getOpcode() != ISD::UNDEF) {
3927 Nodes1 = DAG.getNode(
3928 ISD::INSERT_VECTOR_ELT,
3929 DL,
3930 Op.getValueType(),
3931 Nodes1,
3932 third,
3933 DAG.getConstant(6, MVT::i32));
3934 }
3935 case 2:
3936 second = Op.getOperand(1);
3937 if (second.getOpcode() != ISD::UNDEF) {
3938 Nodes1 = DAG.getNode(
3939 ISD::INSERT_VECTOR_ELT,
3940 DL,
3941 Op.getValueType(),
3942 Nodes1,
3943 second,
3944 DAG.getConstant(5, MVT::i32));
3945 }
3946 break;
3947 };
3948 return Nodes1;
3949 }
3950
3951 SDValue
3952 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3953 SelectionDAG &DAG) const
3954 {
3955 DebugLoc DL = Op.getDebugLoc();
3956 EVT VT = Op.getValueType();
3957 const SDValue *ptr = NULL;
3958 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3959 uint32_t swizzleNum = 0;
3960 SDValue DST;
3961 if (!VT.isVector()) {
3962 SDValue Res = Op.getOperand(0);
3963 return Res;
3964 }
3965
3966 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
3967 ptr = &Op.getOperand(1);
3968 } else {
3969 ptr = &Op.getOperand(0);
3970 }
3971 if (CSDN) {
3972 swizzleNum = (uint32_t)CSDN->getZExtValue();
3973 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3974 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3975 DST = DAG.getNode(AMDILISD::VINSERT,
3976 DL,
3977 VT,
3978 Op.getOperand(0),
3979 *ptr,
3980 DAG.getTargetConstant(mask2, MVT::i32),
3981 DAG.getTargetConstant(mask3, MVT::i32));
3982 } else {
3983 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3984 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3985 SDValue res = DAG.getNode(AMDILISD::VINSERT,
3986 DL, VT, Op.getOperand(0), *ptr,
3987 DAG.getTargetConstant(mask2, MVT::i32),
3988 DAG.getTargetConstant(mask3, MVT::i32));
3989 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3990 mask2 = 0x04030201 & ~(0xFF << (x * 8));
3991 mask3 = 0x01010101 & (0xFF << (x * 8));
3992 SDValue t = DAG.getNode(AMDILISD::VINSERT,
3993 DL, VT, Op.getOperand(0), *ptr,
3994 DAG.getTargetConstant(mask2, MVT::i32),
3995 DAG.getTargetConstant(mask3, MVT::i32));
3996 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3997 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3998 Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3999 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
4000 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
4001 }
4002 DST = res;
4003 }
4004 return DST;
4005 }
4006
4007 SDValue
4008 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
4009 SelectionDAG &DAG) const
4010 {
4011 EVT VT = Op.getValueType();
4012 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4013 uint64_t swizzleNum = 0;
4014 DebugLoc DL = Op.getDebugLoc();
4015 SDValue Res;
4016 if (!Op.getOperand(0).getValueType().isVector()) {
4017 Res = Op.getOperand(0);
4018 return Res;
4019 }
4020 if (CSDN) {
4021 // Static vector extraction
4022 swizzleNum = CSDN->getZExtValue() + 1;
4023 Res = DAG.getNode(AMDILISD::VEXTRACT,
4024 DL, VT,
4025 Op.getOperand(0),
4026 DAG.getTargetConstant(swizzleNum, MVT::i32));
4027 } else {
4028 SDValue Op1 = Op.getOperand(1);
4029 uint32_t vecSize = 4;
4030 SDValue Op0 = Op.getOperand(0);
4031 SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
4032 DL, VT, Op0,
4033 DAG.getTargetConstant(1, MVT::i32));
4034 if (Op0.getValueType().isVector()) {
4035 vecSize = Op0.getValueType().getVectorNumElements();
4036 }
4037 for (uint32_t x = 2; x <= vecSize; ++x) {
4038 SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
4039 DL, VT, Op0,
4040 DAG.getTargetConstant(x, MVT::i32));
4041 SDValue c = DAG.getNode(AMDILISD::CMP,
4042 DL, Op1.getValueType(),
4043 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
4044 Op1, DAG.getConstant(x, MVT::i32));
4045 res = DAG.getNode(AMDILISD::CMOVLOG, DL,
4046 VT, c, t, res);
4047
4048 }
4049 Res = res;
4050 }
4051 return Res;
4052 }
4053
4054 SDValue
4055 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
4056 SelectionDAG &DAG) const
4057 {
4058 uint32_t vecSize = Op.getValueType().getVectorNumElements();
4059 SDValue src = Op.getOperand(0);
4060 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4061 uint64_t offset = 0;
4062 EVT vecType = Op.getValueType().getVectorElementType();
4063 DebugLoc DL = Op.getDebugLoc();
4064 SDValue Result;
4065 if (CSDN) {
4066 offset = CSDN->getZExtValue();
4067 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4068 DL,vecType, src, DAG.getConstant(offset, MVT::i32));
4069 Result = DAG.getNode(AMDILISD::VBUILD, DL,
4070 Op.getValueType(), Result);
4071 for (uint32_t x = 1; x < vecSize; ++x) {
4072 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4073 src, DAG.getConstant(offset + x, MVT::i32));
4074 if (elt.getOpcode() != ISD::UNDEF) {
4075 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4076 Op.getValueType(), Result, elt,
4077 DAG.getConstant(x, MVT::i32));
4078 }
4079 }
4080 } else {
4081 SDValue idx = Op.getOperand(1);
4082 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4083 DL, vecType, src, idx);
4084 Result = DAG.getNode(AMDILISD::VBUILD, DL,
4085 Op.getValueType(), Result);
4086 for (uint32_t x = 1; x < vecSize; ++x) {
4087 idx = DAG.getNode(ISD::ADD, DL, vecType,
4088 idx, DAG.getConstant(1, MVT::i32));
4089 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4090 src, idx);
4091 if (elt.getOpcode() != ISD::UNDEF) {
4092 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4093 Op.getValueType(), Result, elt, idx);
4094 }
4095 }
4096 }
4097 return Result;
4098 }
4099 SDValue
4100 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
4101 SelectionDAG &DAG) const
4102 {
4103 SDValue Res = DAG.getNode(AMDILISD::VBUILD,
4104 Op.getDebugLoc(),
4105 Op.getValueType(),
4106 Op.getOperand(0));
4107 return Res;
4108 }
4109 SDValue
4110 AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
4111 {
4112 SDValue andOp;
4113 andOp = DAG.getNode(
4114 AMDILISD::AND,
4115 Op.getDebugLoc(),
4116 Op.getValueType(),
4117 Op.getOperand(0),
4118 Op.getOperand(1));
4119 return andOp;
4120 }
4121 SDValue
4122 AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
4123 {
4124 SDValue orOp;
4125 orOp = DAG.getNode(AMDILISD::OR,
4126 Op.getDebugLoc(),
4127 Op.getValueType(),
4128 Op.getOperand(0),
4129 Op.getOperand(1));
4130 return orOp;
4131 }
4132 SDValue
4133 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
4134 {
4135 SDValue Cond = Op.getOperand(0);
4136 SDValue LHS = Op.getOperand(1);
4137 SDValue RHS = Op.getOperand(2);
4138 DebugLoc DL = Op.getDebugLoc();
4139 Cond = getConversionNode(DAG, Cond, Op, true);
4140 Cond = DAG.getNode(AMDILISD::CMOVLOG,
4141 DL,
4142 Op.getValueType(), Cond, LHS, RHS);
4143 return Cond;
4144 }
4145 SDValue
4146 AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
4147 {
4148 SDValue Cond;
4149 SDValue LHS = Op.getOperand(0);
4150 SDValue RHS = Op.getOperand(1);
4151 SDValue TRUE = Op.getOperand(2);
4152 SDValue FALSE = Op.getOperand(3);
4153 SDValue CC = Op.getOperand(4);
4154 DebugLoc DL = Op.getDebugLoc();
4155 bool skipCMov = false;
4156 bool genINot = false;
4157 EVT OVT = Op.getValueType();
4158
4159 // Check for possible elimination of cmov
4160 if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
4161 const ConstantSDNode *trueConst
4162 = dyn_cast<ConstantSDNode>( TRUE.getNode() );
4163 const ConstantSDNode *falseConst
4164 = dyn_cast<ConstantSDNode>( FALSE.getNode() );
4165 if (trueConst && falseConst) {
4166 // both possible result values are constants
4167 if (trueConst->isAllOnesValue()
4168 && falseConst->isNullValue()) { // and convenient constants
4169 skipCMov = true;
4170 }
4171 else if (trueConst->isNullValue()
4172 && falseConst->isAllOnesValue()) { // less convenient
4173 skipCMov = true;
4174 genINot = true;
4175 }
4176 }
4177 }
4178 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4179 unsigned int AMDILCC = CondCCodeToCC(
4180 SetCCOpcode,
4181 LHS.getValueType().getSimpleVT().SimpleTy);
4182 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4183 Cond = DAG.getNode(
4184 AMDILISD::CMP,
4185 DL,
4186 LHS.getValueType(),
4187 DAG.getConstant(AMDILCC, MVT::i32),
4188 LHS,
4189 RHS);
4190 Cond = getConversionNode(DAG, Cond, Op, true);
4191 if (genINot) {
4192 Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
4193 }
4194 if (!skipCMov) {
4195 Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
4196 }
4197 return Cond;
4198 }
4199 SDValue
4200 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
4201 {
4202 SDValue Cond;
4203 SDValue LHS = Op.getOperand(0);
4204 SDValue RHS = Op.getOperand(1);
4205 SDValue CC = Op.getOperand(2);
4206 DebugLoc DL = Op.getDebugLoc();
4207 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4208 unsigned int AMDILCC = CondCCodeToCC(
4209 SetCCOpcode,
4210 LHS.getValueType().getSimpleVT().SimpleTy);
4211 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4212 Cond = DAG.getNode(
4213 AMDILISD::CMP,
4214 DL,
4215 LHS.getValueType(),
4216 DAG.getConstant(AMDILCC, MVT::i32),
4217 LHS,
4218 RHS);
4219 Cond = getConversionNode(DAG, Cond, Op, true);
4220 Cond = DAG.getNode(
4221 ISD::AND,
4222 DL,
4223 Cond.getValueType(),
4224 DAG.getConstant(1, Cond.getValueType()),
4225 Cond);
4226 return Cond;
4227 }
4228
4229 SDValue
4230 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
4231 {
4232 SDValue Data = Op.getOperand(0);
4233 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
4234 DebugLoc DL = Op.getDebugLoc();
4235 EVT DVT = Data.getValueType();
4236 EVT BVT = BaseType->getVT();
4237 unsigned baseBits = BVT.getScalarType().getSizeInBits();
4238 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
4239 unsigned shiftBits = srcBits - baseBits;
4240 if (srcBits < 32) {
4241 // If the op is less than 32 bits, then it needs to extend to 32bits
4242 // so it can properly keep the upper bits valid.
4243 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
4244 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
4245 shiftBits = 32 - baseBits;
4246 DVT = IVT;
4247 }
4248 SDValue Shift = DAG.getConstant(shiftBits, DVT);
4249 // Shift left by 'Shift' bits.
4250 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
4251 // Signed shift Right by 'Shift' bits.
4252 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
4253 if (srcBits < 32) {
4254 // Once the sign extension is done, the op needs to be converted to
4255 // its original type.
4256 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
4257 }
4258 return Data;
4259 }
4260 EVT
4261 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
4262 {
4263 int iSize = (size * numEle);
4264 int vEle = (iSize >> ((size == 64) ? 6 : 5));
4265 if (!vEle) {
4266 vEle = 1;
4267 }
4268 if (size == 64) {
4269 if (vEle == 1) {
4270 return EVT(MVT::i64);
4271 } else {
4272 return EVT(MVT::getVectorVT(MVT::i64, vEle));
4273 }
4274 } else {
4275 if (vEle == 1) {
4276 return EVT(MVT::i32);
4277 } else {
4278 return EVT(MVT::getVectorVT(MVT::i32, vEle));
4279 }
4280 }
4281 }
4282
4283 SDValue
4284 AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
4285 {
4286 SDValue Src = Op.getOperand(0);
4287 SDValue Dst = Op;
4288 SDValue Res;
4289 DebugLoc DL = Op.getDebugLoc();
4290 EVT SrcVT = Src.getValueType();
4291 EVT DstVT = Dst.getValueType();
4292 // Lets bitcast the floating point types to an
4293 // equivalent integer type before converting to vectors.
4294 if (SrcVT.getScalarType().isFloatingPoint()) {
4295 Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
4296 SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
4297 SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
4298 Src);
4299 SrcVT = Src.getValueType();
4300 }
4301 uint32_t ScalarSrcSize = SrcVT.getScalarType()
4302 .getSimpleVT().getSizeInBits();
4303 uint32_t ScalarDstSize = DstVT.getScalarType()
4304 .getSimpleVT().getSizeInBits();
4305 uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
4306 uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
4307 bool isVec = SrcVT.isVector();
4308 if (DstVT.getScalarType().isInteger() &&
4309 (SrcVT.getScalarType().isInteger()
4310 || SrcVT.getScalarType().isFloatingPoint())) {
4311 if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
4312 || (ScalarSrcSize == 64
4313 && DstNumEle == 4
4314 && ScalarDstSize == 16)) {
4315 // This is the problematic case when bitcasting i64 <-> <4 x i16>
4316 // This approach is a little different as we cannot generate a
4317 // <4 x i64> vector
4318 // as that is illegal in our backend and we are already past
4319 // the DAG legalizer.
4320 // So, in this case, we will do the following conversion.
4321 // Case 1:
4322 // %dst = <4 x i16> %src bitconvert i64 ==>
4323 // %tmp = <4 x i16> %src convert <4 x i32>
4324 // %tmp = <4 x i32> %tmp and 0xFFFF
4325 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
4326 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
4327 // %dst = <2 x i32> %tmp bitcast i64
4328 // case 2:
4329 // %dst = i64 %src bitconvert <4 x i16> ==>
4330 // %tmp = i64 %src bitcast <2 x i32>
4331 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
4332 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
4333 // %tmp = <4 x i32> %tmp and 0xFFFF
4334 // %dst = <4 x i16> %tmp bitcast <4 x i32>
4335 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
4336 DAG.getConstant(0xFFFF, MVT::i32));
4337 SDValue const16 = DAG.getConstant(16, MVT::i32);
4338 if (ScalarDstSize == 64) {
4339 // case 1
4340 Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
4341 Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
4342 SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4343 Op, DAG.getConstant(0, MVT::i32));
4344 SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4345 Op, DAG.getConstant(1, MVT::i32));
4346 y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
4347 SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4348 Op, DAG.getConstant(2, MVT::i32));
4349 SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4350 Op, DAG.getConstant(3, MVT::i32));
4351 w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
4352 x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
4353 y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
4354 Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
4355 return Res;
4356 } else {
4357 // case 2
4358 SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
4359 SDValue lor16
4360 = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
4361 SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
4362 SDValue hir16
4363 = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
4364 SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
4365 MVT::v4i32, lo);
4366 SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4367 getPointerTy(), DAG.getConstant(1, MVT::i32));
4368 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4369 resVec, lor16, idxVal);
4370 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4371 getPointerTy(), DAG.getConstant(2, MVT::i32));
4372 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4373 resVec, hi, idxVal);
4374 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4375 getPointerTy(), DAG.getConstant(3, MVT::i32));
4376 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4377 resVec, hir16, idxVal);
4378 resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
4379 Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
4380 return Res;
4381 }
4382 } else {
4383 // There are four cases we need to worry about for bitcasts
4384 // where the size of all
4385 // source, intermediates and result is <= 128 bits, unlike
4386 // the above case
4387 // 1) Sub32bit bitcast 32bitAlign
4388 // %dst = <4 x i8> bitcast i32
4389 // (also <[2|4] x i16> to <[2|4] x i32>)
4390 // 2) 32bitAlign bitcast Sub32bit
4391 // %dst = i32 bitcast <4 x i8>
4392 // 3) Sub32bit bitcast LargerSub32bit
4393 // %dst = <2 x i8> bitcast i16
4394 // (also <4 x i8> to <2 x i16>)
4395 // 4) Sub32bit bitcast SmallerSub32bit
4396 // %dst = i16 bitcast <2 x i8>
4397 // (also <2 x i16> to <4 x i8>)
4398 // This also only handles types that are powers of two
4399 if ((ScalarDstSize & (ScalarDstSize - 1))
4400 || (ScalarSrcSize & (ScalarSrcSize - 1))) {
4401 } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
4402 // case 1:
4403 EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
4404 #if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
4405 SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
4406 #else
4407 SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4408 DAG.getUNDEF(IntTy.getScalarType()));
4409 for (uint32_t x = 0; x < SrcNumEle; ++x) {
4410 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4411 getPointerTy(), DAG.getConstant(x, MVT::i32));
4412 SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4413 SrcVT.getScalarType(), Src,
4414 DAG.getConstant(x, MVT::i32));
4415 temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
4416 res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
4417 res, temp, idx);
4418 }
4419 #endif
4420 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4421 DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
4422 SDValue *newEle = new SDValue[SrcNumEle];
4423 res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
4424 for (uint32_t x = 0; x < SrcNumEle; ++x) {
4425 newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4426 IntTy.getScalarType(), res,
4427 DAG.getConstant(x, MVT::i32));
4428 }
4429 uint32_t Ratio = SrcNumEle / DstNumEle;
4430 for (uint32_t x = 0; x < SrcNumEle; ++x) {
4431 if (x % Ratio) {
4432 newEle[x] = DAG.getNode(ISD::SHL, DL,
4433 IntTy.getScalarType(), newEle[x],
4434 DAG.getConstant(ScalarSrcSize * (x % Ratio),
4435 MVT::i32));
4436 }
4437 }
4438 for (uint32_t x = 0; x < SrcNumEle; x += 2) {
4439 newEle[x] = DAG.getNode(ISD::OR, DL,
4440 IntTy.getScalarType(), newEle[x], newEle[x + 1]);
4441 }
4442 if (ScalarSrcSize == 8) {
4443 for (uint32_t x = 0; x < SrcNumEle; x += 4) {
4444 newEle[x] = DAG.getNode(ISD::OR, DL,
4445 IntTy.getScalarType(), newEle[x], newEle[x + 2]);
4446 }
4447 if (DstNumEle == 1) {
4448 Dst = newEle[0];
4449 } else {
4450 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4451 newEle[0]);
4452 for (uint32_t x = 1; x < DstNumEle; ++x) {
4453 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4454 getPointerTy(), DAG.getConstant(x, MVT::i32));
4455 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4456 DstVT, Dst, newEle[x * 4], idx);
4457 }
4458 }
4459 } else {
4460 if (DstNumEle == 1) {
4461 Dst = newEle[0];
4462 } else {
4463 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4464 newEle[0]);
4465 for (uint32_t x = 1; x < DstNumEle; ++x) {
4466 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4467 getPointerTy(), DAG.getConstant(x, MVT::i32));
4468 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4469 DstVT, Dst, newEle[x * 2], idx);
4470 }
4471 }
4472 }
4473 delete [] newEle;
4474 return Dst;
4475 } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
4476 // case 2:
4477 EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
4478 SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4479 DAG.getUNDEF(IntTy.getScalarType()));
4480 uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
4481 for (uint32_t x = 0; x < SrcNumEle; ++x) {
4482 for (uint32_t y = 0; y < mult; ++y) {
4483 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4484 getPointerTy(),
4485 DAG.getConstant(x * mult + y, MVT::i32));
4486 SDValue t;
4487 if (SrcNumEle > 1) {
4488 t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4489 DL, SrcVT.getScalarType(), Src,
4490 DAG.getConstant(x, MVT::i32));
4491 } else {
4492 t = Src;
4493 }
4494 if (y != 0) {
4495 t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
4496 t, DAG.getConstant(y * ScalarDstSize,
4497 MVT::i32));
4498 }
4499 vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
4500 DL, IntTy, vec, t, idx);
4501 }
4502 }
4503 Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
4504 return Dst;
4505 } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
4506 // case 3:
4507 SDValue *numEle = new SDValue[SrcNumEle];
4508 for (uint32_t x = 0; x < SrcNumEle; ++x) {
4509 numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4510 MVT::i8, Src, DAG.getConstant(x, MVT::i32));
4511 numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
4512 numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
4513 DAG.getConstant(0xFF, MVT::i16));
4514 }
4515 for (uint32_t x = 1; x < SrcNumEle; x += 2) {
4516 numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
4517 DAG.getConstant(8, MVT::i16));
4518 numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
4519 numEle[x-1], numEle[x]);
4520 }
4521 if (DstNumEle > 1) {
4522 // If we are not a scalar i16, the only other case is a
4523 // v2i16 since we can't have v8i8 at this point, v4i16
4524 // cannot be generated
4525 Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
4526 numEle[0]);
4527 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4528 getPointerTy(), DAG.getConstant(1, MVT::i32));
4529 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
4530 Dst, numEle[2], idx);
4531 } else {
4532 Dst = numEle[0];
4533 }
4534 delete [] numEle;
4535 return Dst;
4536 } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
4537 // case 4:
4538 SDValue *numEle = new SDValue[DstNumEle];
4539 for (uint32_t x = 0; x < SrcNumEle; ++x) {
4540 numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4541 MVT::i16, Src, DAG.getConstant(x, MVT::i32));
4542 numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
4543 numEle[x * 2], DAG.getConstant(8, MVT::i16));
4544 }
4545 MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
4546 Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
4547 for (uint32_t x = 1; x < DstNumEle; ++x) {
4548 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4549 getPointerTy(), DAG.getConstant(x, MVT::i32));
4550 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
4551 Dst, numEle[x], idx);
4552 }
4553 delete [] numEle;
4554 ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
4555 Res = DAG.getSExtOrTrunc(Dst, DL, ty);
4556 return Res;
4557 }
4558 }
4559 }
4560 Res = DAG.getNode(AMDILISD::BITCONV,
4561 Dst.getDebugLoc(),
4562 Dst.getValueType(), Src);
4563 return Res;
4564 }
4565
4566 SDValue
4567 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4568 SelectionDAG &DAG) const
4569 {
4570 SDValue Chain = Op.getOperand(0);
4571 SDValue Size = Op.getOperand(1);
4572 unsigned int SPReg = AMDIL::SP;
4573 DebugLoc DL = Op.getDebugLoc();
4574 SDValue SP = DAG.getCopyFromReg(Chain,
4575 DL,
4576 SPReg, MVT::i32);
4577 SDValue NewSP = DAG.getNode(ISD::ADD,
4578 DL,
4579 MVT::i32, SP, Size);
4580 Chain = DAG.getCopyToReg(SP.getValue(1),
4581 DL,
4582 SPReg, NewSP);
4583 SDValue Ops[2] = {NewSP, Chain};
4584 Chain = DAG.getMergeValues(Ops, 2 ,DL);
4585 return Chain;
4586 }
4587 SDValue
4588 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
4589 {
4590 SDValue Chain = Op.getOperand(0);
4591 SDValue Cond = Op.getOperand(1);
4592 SDValue Jump = Op.getOperand(2);
4593 SDValue Result;
4594 Result = DAG.getNode(
4595 AMDILISD::BRANCH_COND,
4596 Op.getDebugLoc(),
4597 Op.getValueType(),
4598 Chain, Jump, Cond);
4599 return Result;
4600 }
4601
4602 SDValue
4603 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
4604 {
4605 SDValue Chain = Op.getOperand(0);
4606 CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
4607 SDValue LHS = Op.getOperand(2);
4608 SDValue RHS = Op.getOperand(3);
4609 SDValue JumpT = Op.getOperand(4);
4610 SDValue CmpValue;
4611 ISD::CondCode CC = CCNode->get();
4612 SDValue Result;
4613 unsigned int cmpOpcode = CondCCodeToCC(
4614 CC,
4615 LHS.getValueType().getSimpleVT().SimpleTy);
4616 CmpValue = DAG.getNode(
4617 AMDILISD::CMP,
4618 Op.getDebugLoc(),
4619 LHS.getValueType(),
4620 DAG.getConstant(cmpOpcode, MVT::i32),
4621 LHS, RHS);
4622 Result = DAG.getNode(
4623 AMDILISD::BRANCH_COND,
4624 CmpValue.getDebugLoc(),
4625 MVT::Other, Chain,
4626 JumpT, CmpValue);
4627 return Result;
4628 }
4629
4630 SDValue
4631 AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
4632 {
4633 SDValue Result = DAG.getNode(
4634 AMDILISD::DP_TO_FP,
4635 Op.getDebugLoc(),
4636 Op.getValueType(),
4637 Op.getOperand(0),
4638 Op.getOperand(1));
4639 return Result;
4640 }
4641
4642 SDValue
4643 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
4644 {
4645 SDValue Result = DAG.getNode(
4646 AMDILISD::VCONCAT,
4647 Op.getDebugLoc(),
4648 Op.getValueType(),
4649 Op.getOperand(0),
4650 Op.getOperand(1));
4651 return Result;
4652 }
4653 // LowerRET - Lower an ISD::RET node.
4654 SDValue
4655 AMDILTargetLowering::LowerReturn(SDValue Chain,
4656 CallingConv::ID CallConv, bool isVarArg,
4657 const SmallVectorImpl<ISD::OutputArg> &Outs,
4658 const SmallVectorImpl<SDValue> &OutVals,
4659 DebugLoc dl, SelectionDAG &DAG)
4660 const
4661 {
4662 //MachineFunction& MF = DAG.getMachineFunction();
4663 // CCValAssign - represent the assignment of the return value
4664 // to a location
4665 SmallVector<CCValAssign, 16> RVLocs;
4666
4667 // CCState - Info about the registers and stack slot
4668 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4669 getTargetMachine(), RVLocs, *DAG.getContext());
4670
4671 // Analyze return values of ISD::RET
4672 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
4673 // If this is the first return lowered for this function, add
4674 // the regs to the liveout set for the function
4675 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
4676 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4677 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
4678 MRI.addLiveOut(RVLocs[i].getLocReg());
4679 }
4680 }
4681 // FIXME: implement this when tail call is implemented
4682 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4683 // both x86 and ppc implement this in ISelLowering
4684
4685 // Regular return here
4686 SDValue Flag;
4687 SmallVector<SDValue, 6> RetOps;
4688 RetOps.push_back(Chain);
4689 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
4690 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4691 CCValAssign &VA = RVLocs[i];
4692 SDValue ValToCopy = OutVals[i];
4693 assert(VA.isRegLoc() && "Can only return in registers!");
4694 // ISD::Ret => ret chain, (regnum1, val1), ...
4695 // So i * 2 + 1 index only the regnums
4696 Chain = DAG.getCopyToReg(Chain,
4697 dl,
4698 VA.getLocReg(),
4699 ValToCopy,
4700 Flag);
4701 // guarantee that all emitted copies are stuck together
4702 // avoiding something bad
4703 Flag = Chain.getValue(1);
4704 }
4705 /*if (MF.getFunction()->hasStructRetAttr()) {
4706 assert(0 && "Struct returns are not yet implemented!");
4707 // Both MIPS and X86 have this
4708 }*/
4709 RetOps[0] = Chain;
4710 if (Flag.getNode())
4711 RetOps.push_back(Flag);
4712
4713 Flag = DAG.getNode(AMDILISD::RET_FLAG,
4714 dl,
4715 MVT::Other, &RetOps[0], RetOps.size());
4716 return Flag;
4717 }
4718 void
4719 AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
4720 unsigned int opCode) const
4721 {
4722 MachineOperand DST = MI->getOperand(0);
4723 MachineOperand LHS = MI->getOperand(2);
4724 MachineOperand RHS = MI->getOperand(3);
4725 unsigned int opi32Code = 0, si32Code = 0;
4726 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
4727 uint32_t REGS[12];
4728 // All the relationals can be generated with with 6 temp registers
4729 for (int x = 0; x < 12; ++x) {
4730 REGS[x] = genVReg(simpleVT);
4731 }
4732 // Pull out the high and low components of each 64 bit register
4733 generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
4734 generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
4735 generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
4736 generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
4737 // Determine the correct opcode that we should use
4738 switch(opCode) {
4739 default:
4740 assert(!"comparison case not handled!");
4741 break;
4742 case AMDIL::LEQ:
4743 si32Code = opi32Code = AMDIL::IEQ;
4744 break;
4745 case AMDIL::LNE:
4746 si32Code = opi32Code = AMDIL::INE;
4747 break;
4748 case AMDIL::LLE:
4749 case AMDIL::ULLE:
4750 case AMDIL::LGE:
4751 case AMDIL::ULGE:
4752 if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
4753 std::swap(REGS[0], REGS[2]);
4754 } else {
4755 std::swap(REGS[1], REGS[3]);
4756 }
4757 if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
4758 opi32Code = AMDIL::ILT;
4759 } else {
4760 opi32Code = AMDIL::ULT;
4761 }
4762 si32Code = AMDIL::UGE;
4763 break;
4764 case AMDIL::LGT:
4765 case AMDIL::ULGT:
4766 std::swap(REGS[0], REGS[2]);
4767 std::swap(REGS[1], REGS[3]);
4768 case AMDIL::LLT:
4769 case AMDIL::ULLT:
4770 if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
4771 opi32Code = AMDIL::ILT;
4772 } else {
4773 opi32Code = AMDIL::ULT;
4774 }
4775 si32Code = AMDIL::ULT;
4776 break;
4777 };
4778 // Do the initial opcode on the high and low components.
4779 // This leaves the following:
4780 // REGS[4] = L_HI OP R_HI
4781 // REGS[5] = L_LO OP R_LO
4782 generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
4783 generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
4784 switch(opi32Code) {
4785 case AMDIL::IEQ:
4786 case AMDIL::INE:
4787 {
4788 // combine the results with an and or or depending on if
4789 // we are eq or ne
4790 uint32_t combineOp = (opi32Code == AMDIL::IEQ)
4791 ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
4792 generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
4793 }
4794 break;
4795 default:
4796 // this finishes codegen for the following pattern
4797 // REGS[4] || (REGS[5] && (L_HI == R_HI))
4798 generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
4799 generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
4800 REGS[9]);
4801 generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
4802 REGS[10]);
4803 break;
4804 }
4805 generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
4806 }
4807
4808 unsigned int
4809 AMDILTargetLowering::getFunctionAlignment(const Function *) const
4810 {
4811 return 0;
4812 }
4813
4814 void
4815 AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
4816 MachineBasicBlock::iterator &BBI,
4817 DebugLoc *DL, const TargetInstrInfo *TII) const
4818 {
4819 mBB = BB;
4820 mBBI = BBI;
4821 mDL = DL;
4822 mTII = TII;
4823 }
4824 uint32_t
4825 AMDILTargetLowering::genVReg(uint32_t regType) const
4826 {
4827 return mBB->getParent()->getRegInfo().createVirtualRegister(
4828 getTargetMachine().getRegisterInfo()->getRegClass(regType));
4829 }
4830
4831 MachineInstrBuilder
4832 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
4833 {
4834 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
4835 }
4836
4837 MachineInstrBuilder
4838 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4839 uint32_t src1) const
4840 {
4841 return generateMachineInst(opcode, dst).addReg(src1);
4842 }
4843
4844 MachineInstrBuilder
4845 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4846 uint32_t src1, uint32_t src2) const
4847 {
4848 return generateMachineInst(opcode, dst, src1).addReg(src2);
4849 }
4850
4851 MachineInstrBuilder
4852 AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4853 uint32_t src1, uint32_t src2, uint32_t src3) const
4854 {
4855 return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
4856 }
4857
4858
4859 SDValue
4860 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
4861 {
4862 DebugLoc DL = Op.getDebugLoc();
4863 EVT OVT = Op.getValueType();
4864 SDValue LHS = Op.getOperand(0);
4865 SDValue RHS = Op.getOperand(1);
4866 MVT INTTY;
4867 MVT FLTTY;
4868 if (!OVT.isVector()) {
4869 INTTY = MVT::i32;
4870 FLTTY = MVT::f32;
4871 } else if (OVT.getVectorNumElements() == 2) {
4872 INTTY = MVT::v2i32;
4873 FLTTY = MVT::v2f32;
4874 } else if (OVT.getVectorNumElements() == 4) {
4875 INTTY = MVT::v4i32;
4876 FLTTY = MVT::v4f32;
4877 }
4878 unsigned bitsize = OVT.getScalarType().getSizeInBits();
4879 // char|short jq = ia ^ ib;
4880 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
4881
4882 // jq = jq >> (bitsize - 2)
4883 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
4884
4885 // jq = jq | 0x1
4886 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
4887
4888 // jq = (int)jq
4889 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
4890
4891 // int ia = (int)LHS;
4892 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4893
4894 // int ib, (int)RHS;
4895 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4896
4897 // float fa = (float)ia;
4898 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4899
4900 // float fb = (float)ib;
4901 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4902
4903 // float fq = native_divide(fa, fb);
4904 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4905
4906 // fq = trunc(fq);
4907 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4908
4909 // float fqneg = -fq;
4910 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
4911
4912 // float fr = mad(fqneg, fb, fa);
4913 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
4914
4915 // int iq = (int)fq;
4916 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4917
4918 // fr = fabs(fr);
4919 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
4920
4921 // fb = fabs(fb);
4922 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
4923
4924 // int cv = fr >= fb;
4925 SDValue cv;
4926 if (INTTY == MVT::i32) {
4927 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4928 } else {
4929 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4930 }
4931 // jq = (cv ? jq : 0);
4932 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
4933 DAG.getConstant(0, OVT));
4934 // dst = iq + jq;
4935 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
4936 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
4937 return iq;
4938 }
4939
4940 SDValue
4941 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
4942 {
4943 DebugLoc DL = Op.getDebugLoc();
4944 EVT OVT = Op.getValueType();
4945 SDValue LHS = Op.getOperand(0);
4946 SDValue RHS = Op.getOperand(1);
4947 // The LowerSDIV32 function generates equivalent to the following IL.
4948 // mov r0, LHS
4949 // mov r1, RHS
4950 // ilt r10, r0, 0
4951 // ilt r11, r1, 0
4952 // iadd r0, r0, r10
4953 // iadd r1, r1, r11
4954 // ixor r0, r0, r10
4955 // ixor r1, r1, r11
4956 // udiv r0, r0, r1
4957 // ixor r10, r10, r11
4958 // iadd r0, r0, r10
4959 // ixor DST, r0, r10
4960
4961 // mov r0, LHS
4962 SDValue r0 = LHS;
4963
4964 // mov r1, RHS
4965 SDValue r1 = RHS;
4966
4967 // ilt r10, r0, 0
4968 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4969 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4970 r0, DAG.getConstant(0, OVT));
4971
4972 // ilt r11, r1, 0
4973 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4974 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4975 r1, DAG.getConstant(0, OVT));
4976
4977 // iadd r0, r0, r10
4978 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4979
4980 // iadd r1, r1, r11
4981 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4982
4983 // ixor r0, r0, r10
4984 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4985
4986 // ixor r1, r1, r11
4987 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4988
4989 // udiv r0, r0, r1
4990 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
4991
4992 // ixor r10, r10, r11
4993 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
4994
4995 // iadd r0, r0, r10
4996 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4997
4998 // ixor DST, r0, r10
4999 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5000 return DST;
5001 }
5002
5003 SDValue
5004 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
5005 {
5006 return SDValue(Op.getNode(), 0);
5007 }
5008
5009 SDValue
5010 AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
5011 {
5012 DebugLoc DL = Op.getDebugLoc();
5013 EVT OVT = Op.getValueType();
5014 SDValue LHS = Op.getOperand(0);
5015 SDValue RHS = Op.getOperand(1);
5016 MVT INTTY;
5017 MVT FLTTY;
5018 if (!OVT.isVector()) {
5019 INTTY = MVT::i32;
5020 FLTTY = MVT::f32;
5021 } else if (OVT.getVectorNumElements() == 2) {
5022 INTTY = MVT::v2i32;
5023 FLTTY = MVT::v2f32;
5024 } else if (OVT.getVectorNumElements() == 4) {
5025 INTTY = MVT::v4i32;
5026 FLTTY = MVT::v4f32;
5027 }
5028
5029 // The LowerUDIV24 function implements the following CL.
5030 // int ia = (int)LHS
5031 // float fa = (float)ia
5032 // int ib = (int)RHS
5033 // float fb = (float)ib
5034 // float fq = native_divide(fa, fb)
5035 // fq = trunc(fq)
5036 // float t = mad(fq, fb, fb)
5037 // int iq = (int)fq - (t <= fa)
5038 // return (type)iq
5039
5040 // int ia = (int)LHS
5041 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
5042
5043 // float fa = (float)ia
5044 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
5045
5046 // int ib = (int)RHS
5047 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
5048
5049 // float fb = (float)ib
5050 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
5051
5052 // float fq = native_divide(fa, fb)
5053 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
5054
5055 // fq = trunc(fq)
5056 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
5057
5058 // float t = mad(fq, fb, fb)
5059 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
5060
5061 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
5062 SDValue iq;
5063 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
5064 if (INTTY == MVT::i32) {
5065 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5066 } else {
5067 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5068 }
5069 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
5070
5071
5072 // return (type)iq
5073 iq = DAG.getZExtOrTrunc(iq, DL, OVT);
5074 return iq;
5075
5076 }
5077
5078 SDValue
5079 AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
5080 {
5081 return SDValue(Op.getNode(), 0);
5082 }
5083
5084 SDValue
5085 AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
5086 {
5087 return SDValue(Op.getNode(), 0);
5088 }
5089 SDValue
5090 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
5091 {
5092 DebugLoc DL = Op.getDebugLoc();
5093 EVT OVT = Op.getValueType();
5094 MVT INTTY = MVT::i32;
5095 if (OVT == MVT::v2i8) {
5096 INTTY = MVT::v2i32;
5097 } else if (OVT == MVT::v4i8) {
5098 INTTY = MVT::v4i32;
5099 }
5100 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5101 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5102 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5103 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5104 return LHS;
5105 }
5106
5107 SDValue
5108 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
5109 {
5110 DebugLoc DL = Op.getDebugLoc();
5111 EVT OVT = Op.getValueType();
5112 MVT INTTY = MVT::i32;
5113 if (OVT == MVT::v2i16) {
5114 INTTY = MVT::v2i32;
5115 } else if (OVT == MVT::v4i16) {
5116 INTTY = MVT::v4i32;
5117 }
5118 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5119 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5120 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5121 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5122 return LHS;
5123 }
5124
5125 SDValue
5126 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
5127 {
5128 DebugLoc DL = Op.getDebugLoc();
5129 EVT OVT = Op.getValueType();
5130 SDValue LHS = Op.getOperand(0);
5131 SDValue RHS = Op.getOperand(1);
5132 // The LowerSREM32 function generates equivalent to the following IL.
5133 // mov r0, LHS
5134 // mov r1, RHS
5135 // ilt r10, r0, 0
5136 // ilt r11, r1, 0
5137 // iadd r0, r0, r10
5138 // iadd r1, r1, r11
5139 // ixor r0, r0, r10
5140 // ixor r1, r1, r11
5141 // udiv r20, r0, r1
5142 // umul r20, r20, r1
5143 // sub r0, r0, r20
5144 // iadd r0, r0, r10
5145 // ixor DST, r0, r10
5146
5147 // mov r0, LHS
5148 SDValue r0 = LHS;
5149
5150 // mov r1, RHS
5151 SDValue r1 = RHS;
5152
5153 // ilt r10, r0, 0
5154 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5155 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5156 r0, DAG.getConstant(0, OVT));
5157
5158 // ilt r11, r1, 0
5159 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5160 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5161 r1, DAG.getConstant(0, OVT));
5162
5163 // iadd r0, r0, r10
5164 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5165
5166 // iadd r1, r1, r11
5167 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
5168
5169 // ixor r0, r0, r10
5170 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5171
5172 // ixor r1, r1, r11
5173 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
5174
5175 // udiv r20, r0, r1
5176 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
5177
5178 // umul r20, r20, r1
5179 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
5180
5181 // sub r0, r0, r20
5182 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
5183
5184 // iadd r0, r0, r10
5185 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5186
5187 // ixor DST, r0, r10
5188 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5189 return DST;
5190 }
5191
5192 SDValue
5193 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
5194 {
5195 return SDValue(Op.getNode(), 0);
5196 }
5197
5198 SDValue
5199 AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
5200 {
5201 DebugLoc DL = Op.getDebugLoc();
5202 EVT OVT = Op.getValueType();
5203 MVT INTTY = MVT::i32;
5204 if (OVT == MVT::v2i8) {
5205 INTTY = MVT::v2i32;
5206 } else if (OVT == MVT::v4i8) {
5207 INTTY = MVT::v4i32;
5208 }
5209 SDValue LHS = Op.getOperand(0);
5210 SDValue RHS = Op.getOperand(1);
5211 // The LowerUREM8 function generates equivalent to the following IL.
5212 // mov r0, as_u32(LHS)
5213 // mov r1, as_u32(RHS)
5214 // and r10, r0, 0xFF
5215 // and r11, r1, 0xFF
5216 // cmov_logical r3, r11, r11, 0x1
5217 // udiv r3, r10, r3
5218 // cmov_logical r3, r11, r3, 0
5219 // umul r3, r3, r11
5220 // sub r3, r10, r3
5221 // and as_u8(DST), r3, 0xFF
5222
5223 // mov r0, as_u32(LHS)
5224 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
5225
5226 // mov r1, as_u32(RHS)
5227 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
5228
5229 // and r10, r0, 0xFF
5230 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
5231 DAG.getConstant(0xFF, INTTY));
5232
5233 // and r11, r1, 0xFF
5234 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
5235 DAG.getConstant(0xFF, INTTY));
5236
5237 // cmov_logical r3, r11, r11, 0x1
5238 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
5239 DAG.getConstant(0x01, INTTY));
5240
5241 // udiv r3, r10, r3
5242 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5243
5244 // cmov_logical r3, r11, r3, 0
5245 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
5246 DAG.getConstant(0, INTTY));
5247
5248 // umul r3, r3, r11
5249 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
5250
5251 // sub r3, r10, r3
5252 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
5253
5254 // and as_u8(DST), r3, 0xFF
5255 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
5256 DAG.getConstant(0xFF, INTTY));
5257 DST = DAG.getZExtOrTrunc(DST, DL, OVT);
5258 return DST;
5259 }
5260
5261 SDValue
5262 AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
5263 {
5264 DebugLoc DL = Op.getDebugLoc();
5265 EVT OVT = Op.getValueType();
5266 MVT INTTY = MVT::i32;
5267 if (OVT == MVT::v2i16) {
5268 INTTY = MVT::v2i32;
5269 } else if (OVT == MVT::v4i16) {
5270 INTTY = MVT::v4i32;
5271 }
5272 SDValue LHS = Op.getOperand(0);
5273 SDValue RHS = Op.getOperand(1);
5274 // The LowerUREM16 function generatest equivalent to the following IL.
5275 // mov r0, LHS
5276 // mov r1, RHS
5277 // DIV = LowerUDIV16(LHS, RHS)
5278 // and r10, r0, 0xFFFF
5279 // and r11, r1, 0xFFFF
5280 // cmov_logical r3, r11, r11, 0x1
5281 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5282 // and r3, r3, 0xFFFF
5283 // cmov_logical r3, r11, r3, 0
5284 // umul r3, r3, r11
5285 // sub r3, r10, r3
5286 // and DST, r3, 0xFFFF
5287
5288 // mov r0, LHS
5289 SDValue r0 = LHS;
5290
5291 // mov r1, RHS
5292 SDValue r1 = RHS;
5293
5294 // and r10, r0, 0xFFFF
5295 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
5296 DAG.getConstant(0xFFFF, OVT));
5297
5298 // and r11, r1, 0xFFFF
5299 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
5300 DAG.getConstant(0xFFFF, OVT));
5301
5302 // cmov_logical r3, r11, r11, 0x1
5303 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
5304 DAG.getConstant(0x01, OVT));
5305
5306 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5307 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
5308 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
5309 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5310 r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
5311 r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
5312
5313 // and r3, r3, 0xFFFF
5314 r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
5315 DAG.getConstant(0xFFFF, OVT));
5316
5317 // cmov_logical r3, r11, r3, 0
5318 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
5319 DAG.getConstant(0, OVT));
5320 // umul r3, r3, r11
5321 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
5322
5323 // sub r3, r10, r3
5324 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
5325
5326 // and DST, r3, 0xFFFF
5327 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
5328 DAG.getConstant(0xFFFF, OVT));
5329 return DST;
5330 }
5331
5332 SDValue
5333 AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
5334 {
5335 DebugLoc DL = Op.getDebugLoc();
5336 EVT OVT = Op.getValueType();
5337 SDValue LHS = Op.getOperand(0);
5338 SDValue RHS = Op.getOperand(1);
5339 // The LowerUREM32 function generates equivalent to the following IL.
5340 // udiv r20, LHS, RHS
5341 // umul r20, r20, RHS
5342 // sub DST, LHS, r20
5343
5344 // udiv r20, LHS, RHS
5345 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
5346
5347 // umul r20, r20, RHS
5348 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
5349
5350 // sub DST, LHS, r20
5351 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
5352 return DST;
5353 }
5354
5355 SDValue
5356 AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
5357 {
5358 return SDValue(Op.getNode(), 0);
5359 }
5360
5361
5362 SDValue
5363 AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
5364 {
5365 DebugLoc DL = Op.getDebugLoc();
5366 EVT OVT = Op.getValueType();
5367 MVT INTTY = MVT::i32;
5368 if (OVT == MVT::v2f32) {
5369 INTTY = MVT::v2i32;
5370 } else if (OVT == MVT::v4f32) {
5371 INTTY = MVT::v4i32;
5372 }
5373 SDValue LHS = Op.getOperand(0);
5374 SDValue RHS = Op.getOperand(1);
5375 SDValue DST;
5376 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
5377 &this->getTargetMachine())->getSubtargetImpl();
5378 if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
5379 // TODO: This doesn't work for vector types yet
5380 // The LowerFDIV32 function generates equivalent to the following
5381 // IL:
5382 // mov r20, as_int(LHS)
5383 // mov r21, as_int(RHS)
5384 // and r30, r20, 0x7f800000
5385 // and r31, r20, 0x807FFFFF
5386 // and r32, r21, 0x7f800000
5387 // and r33, r21, 0x807FFFFF
5388 // ieq r40, r30, 0x7F800000
5389 // ieq r41, r31, 0x7F800000
5390 // ieq r42, r32, 0
5391 // ieq r43, r33, 0
5392 // and r50, r20, 0x80000000
5393 // and r51, r21, 0x80000000
5394 // ior r32, r32, 0x3f800000
5395 // ior r33, r33, 0x3f800000
5396 // cmov_logical r32, r42, r50, r32
5397 // cmov_logical r33, r43, r51, r33
5398 // cmov_logical r32, r40, r20, r32
5399 // cmov_logical r33, r41, r21, r33
5400 // ior r50, r40, r41
5401 // ior r51, r42, r43
5402 // ior r50, r50, r51
5403 // inegate r52, r31
5404 // iadd r30, r30, r52
5405 // cmov_logical r30, r50, 0, r30
5406 // div_zeroop(infinity) r21, 1.0, r33
5407 // mul_ieee r20, r32, r21
5408 // and r22, r20, 0x7FFFFFFF
5409 // and r23, r20, 0x80000000
5410 // ishr r60, r22, 0x00000017
5411 // ishr r61, r30, 0x00000017
5412 // iadd r20, r20, r30
5413 // iadd r21, r22, r30
5414 // iadd r60, r60, r61
5415 // ige r42, 0, R60
5416 // ior r41, r23, 0x7F800000
5417 // ige r40, r60, 0x000000FF
5418 // cmov_logical r40, r50, 0, r40
5419 // cmov_logical r20, r42, r23, r20
5420 // cmov_logical DST, r40, r41, r20
5421 // as_float(DST)
5422
5423 // mov r20, as_int(LHS)
5424 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
5425
5426 // mov r21, as_int(RHS)
5427 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
5428
5429 // and r30, r20, 0x7f800000
5430 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5431 DAG.getConstant(0x7F800000, INTTY));
5432
5433 // and r31, r21, 0x7f800000
5434 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5435 DAG.getConstant(0x7f800000, INTTY));
5436
5437 // and r32, r20, 0x807FFFFF
5438 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5439 DAG.getConstant(0x807FFFFF, INTTY));
5440
5441 // and r33, r21, 0x807FFFFF
5442 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5443 DAG.getConstant(0x807FFFFF, INTTY));
5444
5445 // ieq r40, r30, 0x7F800000
5446 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5447 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5448 R30, DAG.getConstant(0x7F800000, INTTY));
5449
5450 // ieq r41, r31, 0x7F800000
5451 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5452 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5453 R31, DAG.getConstant(0x7F800000, INTTY));
5454
5455 // ieq r42, r30, 0
5456 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5457 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5458 R30, DAG.getConstant(0, INTTY));
5459
5460 // ieq r43, r31, 0
5461 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5462 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5463 R31, DAG.getConstant(0, INTTY));
5464
5465 // and r50, r20, 0x80000000
5466 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5467 DAG.getConstant(0x80000000, INTTY));
5468
5469 // and r51, r21, 0x80000000
5470 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5471 DAG.getConstant(0x80000000, INTTY));
5472
5473 // ior r32, r32, 0x3f800000
5474 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
5475 DAG.getConstant(0x3F800000, INTTY));
5476
5477 // ior r33, r33, 0x3f800000
5478 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
5479 DAG.getConstant(0x3F800000, INTTY));
5480
5481 // cmov_logical r32, r42, r50, r32
5482 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
5483
5484 // cmov_logical r33, r43, r51, r33
5485 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
5486
5487 // cmov_logical r32, r40, r20, r32
5488 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
5489
5490 // cmov_logical r33, r41, r21, r33
5491 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
5492
5493 // ior r50, r40, r41
5494 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
5495
5496 // ior r51, r42, r43
5497 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
5498
5499 // ior r50, r50, r51
5500 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
5501
5502 // inegate r52, r31
5503 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
5504
5505 // iadd r30, r30, r52
5506 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
5507
5508 // cmov_logical r30, r50, 0, r30
5509 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5510 DAG.getConstant(0, INTTY), R30);
5511
5512 // div_zeroop(infinity) r21, 1.0, as_float(r33)
5513 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5514 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5515 DAG.getConstantFP(1.0f, OVT), R33);
5516
5517 // mul_ieee as_int(r20), as_float(r32), r21
5518 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5519 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5520 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5521
5522 // div_zeroop(infinity) r21, 1.0, as_float(r33)
5523 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5524 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5525 DAG.getConstantFP(1.0f, OVT), R33);
5526
5527 // mul_ieee as_int(r20), as_float(r32), r21
5528 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5529 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5530 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5531
5532 // and r22, r20, 0x7FFFFFFF
5533 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5534 DAG.getConstant(0x7FFFFFFF, INTTY));
5535
5536 // and r23, r20, 0x80000000
5537 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5538 DAG.getConstant(0x80000000, INTTY));
5539
5540 // ishr r60, r22, 0x00000017
5541 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
5542 DAG.getConstant(0x00000017, INTTY));
5543
5544 // ishr r61, r30, 0x00000017
5545 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
5546 DAG.getConstant(0x00000017, INTTY));
5547
5548 // iadd r20, r20, r30
5549 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
5550
5551 // iadd r21, r22, r30
5552 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
5553
5554 // iadd r60, r60, r61
5555 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
5556
5557 // ige r42, 0, R60
5558 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5559 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5560 DAG.getConstant(0, INTTY),
5561 R60);
5562
5563 // ior r41, r23, 0x7F800000
5564 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
5565 DAG.getConstant(0x7F800000, INTTY));
5566
5567 // ige r40, r60, 0x000000FF
5568 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5569 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5570 R60,
5571 DAG.getConstant(0x0000000FF, INTTY));
5572
5573 // cmov_logical r40, r50, 0, r40
5574 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5575 DAG.getConstant(0, INTTY),
5576 R40);
5577
5578 // cmov_logical r20, r42, r23, r20
5579 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
5580
5581 // cmov_logical DST, r40, r41, r20
5582 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
5583
5584 // as_float(DST)
5585 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
5586 } else {
5587 // The following sequence of DAG nodes produce the following IL:
5588 // fabs r1, RHS
5589 // lt r2, 0x1.0p+96f, r1
5590 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5591 // mul_ieee r1, RHS, r3
5592 // div_zeroop(infinity) r0, LHS, r1
5593 // mul_ieee DST, r0, r3
5594
5595 // fabs r1, RHS
5596 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
5597 // lt r2, 0x1.0p+96f, r1
5598 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5599 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
5600 DAG.getConstant(0x6f800000, INTTY), r1);
5601 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5602 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
5603 DAG.getConstant(0x2f800000, INTTY),
5604 DAG.getConstant(0x3f800000, INTTY));
5605 // mul_ieee r1, RHS, r3
5606 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
5607 // div_zeroop(infinity) r0, LHS, r1
5608 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
5609 // mul_ieee DST, r0, r3
5610 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
5611 }
5612 return DST;
5613 }
5614
5615 SDValue
5616 AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
5617 {
5618 return SDValue(Op.getNode(), 0);
5619 }