1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILSubtarget.h"
19 #include "AMDILTargetMachine.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetOptions.h"
35 #define ISDBITCAST ISD::BITCAST
36 #define MVTGLUE MVT::Glue
37 //===----------------------------------------------------------------------===//
38 // Calling Convention Implementation
39 //===----------------------------------------------------------------------===//
40 #include "AMDILGenCallingConv.inc"
42 //===----------------------------------------------------------------------===//
43 // TargetLowering Implementation Help Functions Begin
44 //===----------------------------------------------------------------------===//
46 getConversionNode(SelectionDAG
&DAG
, SDValue
& Src
, SDValue
& Dst
, bool asType
)
48 DebugLoc DL
= Src
.getDebugLoc();
49 EVT svt
= Src
.getValueType().getScalarType();
50 EVT dvt
= Dst
.getValueType().getScalarType();
51 if (svt
.isFloatingPoint() && dvt
.isFloatingPoint()) {
52 if (dvt
.bitsGT(svt
)) {
53 Src
= DAG
.getNode(ISD::FP_EXTEND
, DL
, dvt
, Src
);
54 } else if (svt
.bitsLT(svt
)) {
55 Src
= DAG
.getNode(ISD::FP_ROUND
, DL
, dvt
, Src
,
56 DAG
.getConstant(1, MVT::i32
));
58 } else if (svt
.isInteger() && dvt
.isInteger()) {
59 if (!svt
.bitsEq(dvt
)) {
60 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
62 Src
= DAG
.getNode(AMDILISD::MOVE
, DL
, dvt
, Src
);
64 } else if (svt
.isInteger()) {
65 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::SINT_TO_FP
;
66 if (!svt
.bitsEq(dvt
)) {
67 if (dvt
.getSimpleVT().SimpleTy
== MVT::f32
) {
68 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i32
);
69 } else if (dvt
.getSimpleVT().SimpleTy
== MVT::f64
) {
70 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i64
);
72 assert(0 && "We only support 32 and 64bit fp types");
75 Src
= DAG
.getNode(opcode
, DL
, dvt
, Src
);
76 } else if (dvt
.isInteger()) {
77 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::FP_TO_SINT
;
78 if (svt
.getSimpleVT().SimpleTy
== MVT::f32
) {
79 Src
= DAG
.getNode(opcode
, DL
, MVT::i32
, Src
);
80 } else if (svt
.getSimpleVT().SimpleTy
== MVT::f64
) {
81 Src
= DAG
.getNode(opcode
, DL
, MVT::i64
, Src
);
83 assert(0 && "We only support 32 and 64bit fp types");
85 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
89 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
91 static AMDILCC::CondCodes
92 CondCCodeToCC(ISD::CondCode CC
, const MVT::SimpleValueType
& type
)
97 errs()<<"Condition Code: "<< (unsigned int)CC
<<"\n";
98 assert(0 && "Unknown condition code!");
103 return AMDILCC::IL_CC_F_O
;
105 return AMDILCC::IL_CC_D_O
;
107 assert(0 && "Opcode combination not generated correctly!");
108 return AMDILCC::COND_ERROR
;
113 return AMDILCC::IL_CC_F_UO
;
115 return AMDILCC::IL_CC_D_UO
;
117 assert(0 && "Opcode combination not generated correctly!");
118 return AMDILCC::COND_ERROR
;
126 return AMDILCC::IL_CC_I_GT
;
128 return AMDILCC::IL_CC_F_GT
;
130 return AMDILCC::IL_CC_D_GT
;
132 return AMDILCC::IL_CC_L_GT
;
134 assert(0 && "Opcode combination not generated correctly!");
135 return AMDILCC::COND_ERROR
;
143 return AMDILCC::IL_CC_I_GE
;
145 return AMDILCC::IL_CC_F_GE
;
147 return AMDILCC::IL_CC_D_GE
;
149 return AMDILCC::IL_CC_L_GE
;
151 assert(0 && "Opcode combination not generated correctly!");
152 return AMDILCC::COND_ERROR
;
160 return AMDILCC::IL_CC_I_LT
;
162 return AMDILCC::IL_CC_F_LT
;
164 return AMDILCC::IL_CC_D_LT
;
166 return AMDILCC::IL_CC_L_LT
;
168 assert(0 && "Opcode combination not generated correctly!");
169 return AMDILCC::COND_ERROR
;
177 return AMDILCC::IL_CC_I_LE
;
179 return AMDILCC::IL_CC_F_LE
;
181 return AMDILCC::IL_CC_D_LE
;
183 return AMDILCC::IL_CC_L_LE
;
185 assert(0 && "Opcode combination not generated correctly!");
186 return AMDILCC::COND_ERROR
;
194 return AMDILCC::IL_CC_I_NE
;
196 return AMDILCC::IL_CC_F_NE
;
198 return AMDILCC::IL_CC_D_NE
;
200 return AMDILCC::IL_CC_L_NE
;
202 assert(0 && "Opcode combination not generated correctly!");
203 return AMDILCC::COND_ERROR
;
211 return AMDILCC::IL_CC_I_EQ
;
213 return AMDILCC::IL_CC_F_EQ
;
215 return AMDILCC::IL_CC_D_EQ
;
217 return AMDILCC::IL_CC_L_EQ
;
219 assert(0 && "Opcode combination not generated correctly!");
220 return AMDILCC::COND_ERROR
;
228 return AMDILCC::IL_CC_U_GT
;
230 return AMDILCC::IL_CC_F_UGT
;
232 return AMDILCC::IL_CC_D_UGT
;
234 return AMDILCC::IL_CC_UL_GT
;
236 assert(0 && "Opcode combination not generated correctly!");
237 return AMDILCC::COND_ERROR
;
245 return AMDILCC::IL_CC_U_GE
;
247 return AMDILCC::IL_CC_F_UGE
;
249 return AMDILCC::IL_CC_D_UGE
;
251 return AMDILCC::IL_CC_UL_GE
;
253 assert(0 && "Opcode combination not generated correctly!");
254 return AMDILCC::COND_ERROR
;
262 return AMDILCC::IL_CC_U_LT
;
264 return AMDILCC::IL_CC_F_ULT
;
266 return AMDILCC::IL_CC_D_ULT
;
268 return AMDILCC::IL_CC_UL_LT
;
270 assert(0 && "Opcode combination not generated correctly!");
271 return AMDILCC::COND_ERROR
;
279 return AMDILCC::IL_CC_U_LE
;
281 return AMDILCC::IL_CC_F_ULE
;
283 return AMDILCC::IL_CC_D_ULE
;
285 return AMDILCC::IL_CC_UL_LE
;
287 assert(0 && "Opcode combination not generated correctly!");
288 return AMDILCC::COND_ERROR
;
296 return AMDILCC::IL_CC_U_NE
;
298 return AMDILCC::IL_CC_F_UNE
;
300 return AMDILCC::IL_CC_D_UNE
;
302 return AMDILCC::IL_CC_UL_NE
;
304 assert(0 && "Opcode combination not generated correctly!");
305 return AMDILCC::COND_ERROR
;
313 return AMDILCC::IL_CC_U_EQ
;
315 return AMDILCC::IL_CC_F_UEQ
;
317 return AMDILCC::IL_CC_D_UEQ
;
319 return AMDILCC::IL_CC_UL_EQ
;
321 assert(0 && "Opcode combination not generated correctly!");
322 return AMDILCC::COND_ERROR
;
327 return AMDILCC::IL_CC_F_OGT
;
329 return AMDILCC::IL_CC_D_OGT
;
336 assert(0 && "Opcode combination not generated correctly!");
337 return AMDILCC::COND_ERROR
;
342 return AMDILCC::IL_CC_F_OGE
;
344 return AMDILCC::IL_CC_D_OGE
;
351 assert(0 && "Opcode combination not generated correctly!");
352 return AMDILCC::COND_ERROR
;
357 return AMDILCC::IL_CC_F_OLT
;
359 return AMDILCC::IL_CC_D_OLT
;
366 assert(0 && "Opcode combination not generated correctly!");
367 return AMDILCC::COND_ERROR
;
372 return AMDILCC::IL_CC_F_OLE
;
374 return AMDILCC::IL_CC_D_OLE
;
381 assert(0 && "Opcode combination not generated correctly!");
382 return AMDILCC::COND_ERROR
;
387 return AMDILCC::IL_CC_F_ONE
;
389 return AMDILCC::IL_CC_D_ONE
;
396 assert(0 && "Opcode combination not generated correctly!");
397 return AMDILCC::COND_ERROR
;
402 return AMDILCC::IL_CC_F_OEQ
;
404 return AMDILCC::IL_CC_D_OEQ
;
411 assert(0 && "Opcode combination not generated correctly!");
412 return AMDILCC::COND_ERROR
;
418 translateToOpcode(uint64_t CCCode
, unsigned int regClass
)
421 case AMDILCC::IL_CC_D_EQ
:
422 case AMDILCC::IL_CC_D_OEQ
:
423 if (regClass
== AMDIL::GPRV2F64RegClassID
) {
424 return (unsigned int)AMDIL::DEQ_v2f64
;
426 return (unsigned int)AMDIL::DEQ
;
428 case AMDILCC::IL_CC_D_LE
:
429 case AMDILCC::IL_CC_D_OLE
:
430 case AMDILCC::IL_CC_D_ULE
:
431 case AMDILCC::IL_CC_D_GE
:
432 case AMDILCC::IL_CC_D_OGE
:
433 case AMDILCC::IL_CC_D_UGE
:
434 return (unsigned int)AMDIL::DGE
;
435 case AMDILCC::IL_CC_D_LT
:
436 case AMDILCC::IL_CC_D_OLT
:
437 case AMDILCC::IL_CC_D_ULT
:
438 case AMDILCC::IL_CC_D_GT
:
439 case AMDILCC::IL_CC_D_OGT
:
440 case AMDILCC::IL_CC_D_UGT
:
441 return (unsigned int)AMDIL::DLT
;
442 case AMDILCC::IL_CC_D_NE
:
443 case AMDILCC::IL_CC_D_UNE
:
444 return (unsigned int)AMDIL::DNE
;
445 case AMDILCC::IL_CC_F_EQ
:
446 case AMDILCC::IL_CC_F_OEQ
:
447 return (unsigned int)AMDIL::FEQ
;
448 case AMDILCC::IL_CC_F_LE
:
449 case AMDILCC::IL_CC_F_ULE
:
450 case AMDILCC::IL_CC_F_OLE
:
451 case AMDILCC::IL_CC_F_GE
:
452 case AMDILCC::IL_CC_F_UGE
:
453 case AMDILCC::IL_CC_F_OGE
:
454 return (unsigned int)AMDIL::FGE
;
455 case AMDILCC::IL_CC_F_LT
:
456 case AMDILCC::IL_CC_F_OLT
:
457 case AMDILCC::IL_CC_F_ULT
:
458 case AMDILCC::IL_CC_F_GT
:
459 case AMDILCC::IL_CC_F_OGT
:
460 case AMDILCC::IL_CC_F_UGT
:
461 if (regClass
== AMDIL::GPRV2F32RegClassID
) {
462 return (unsigned int)AMDIL::FLT_v2f32
;
463 } else if (regClass
== AMDIL::GPRV4F32RegClassID
) {
464 return (unsigned int)AMDIL::FLT_v4f32
;
466 return (unsigned int)AMDIL::FLT
;
468 case AMDILCC::IL_CC_F_NE
:
469 case AMDILCC::IL_CC_F_UNE
:
470 return (unsigned int)AMDIL::FNE
;
471 case AMDILCC::IL_CC_I_EQ
:
472 case AMDILCC::IL_CC_U_EQ
:
473 if (regClass
== AMDIL::GPRI32RegClassID
474 || regClass
== AMDIL::GPRI8RegClassID
475 || regClass
== AMDIL::GPRI16RegClassID
) {
476 return (unsigned int)AMDIL::IEQ
;
477 } else if (regClass
== AMDIL::GPRV2I32RegClassID
478 || regClass
== AMDIL::GPRV2I8RegClassID
479 || regClass
== AMDIL::GPRV2I16RegClassID
) {
480 return (unsigned int)AMDIL::IEQ_v2i32
;
481 } else if (regClass
== AMDIL::GPRV4I32RegClassID
482 || regClass
== AMDIL::GPRV4I8RegClassID
483 || regClass
== AMDIL::GPRV4I16RegClassID
) {
484 return (unsigned int)AMDIL::IEQ_v4i32
;
486 assert(!"Unknown reg class!");
488 case AMDILCC::IL_CC_L_EQ
:
489 case AMDILCC::IL_CC_UL_EQ
:
490 return (unsigned int)AMDIL::LEQ
;
491 case AMDILCC::IL_CC_I_GE
:
492 case AMDILCC::IL_CC_I_LE
:
493 if (regClass
== AMDIL::GPRI32RegClassID
494 || regClass
== AMDIL::GPRI8RegClassID
495 || regClass
== AMDIL::GPRI16RegClassID
) {
496 return (unsigned int)AMDIL::IGE
;
497 } else if (regClass
== AMDIL::GPRV2I32RegClassID
498 || regClass
== AMDIL::GPRI8RegClassID
499 || regClass
== AMDIL::GPRI16RegClassID
) {
500 return (unsigned int)AMDIL::IGE_v2i32
;
501 } else if (regClass
== AMDIL::GPRV4I32RegClassID
502 || regClass
== AMDIL::GPRI8RegClassID
503 || regClass
== AMDIL::GPRI16RegClassID
) {
504 return (unsigned int)AMDIL::IGE_v4i32
;
506 assert(!"Unknown reg class!");
508 case AMDILCC::IL_CC_I_LT
:
509 case AMDILCC::IL_CC_I_GT
:
510 if (regClass
== AMDIL::GPRI32RegClassID
511 || regClass
== AMDIL::GPRI8RegClassID
512 || regClass
== AMDIL::GPRI16RegClassID
) {
513 return (unsigned int)AMDIL::ILT
;
514 } else if (regClass
== AMDIL::GPRV2I32RegClassID
515 || regClass
== AMDIL::GPRI8RegClassID
516 || regClass
== AMDIL::GPRI16RegClassID
) {
517 return (unsigned int)AMDIL::ILT_v2i32
;
518 } else if (regClass
== AMDIL::GPRV4I32RegClassID
519 || regClass
== AMDIL::GPRI8RegClassID
520 || regClass
== AMDIL::GPRI16RegClassID
) {
521 return (unsigned int)AMDIL::ILT_v4i32
;
523 assert(!"Unknown reg class!");
525 case AMDILCC::IL_CC_L_GE
:
526 return (unsigned int)AMDIL::LGE
;
527 case AMDILCC::IL_CC_L_LE
:
528 return (unsigned int)AMDIL::LLE
;
529 case AMDILCC::IL_CC_L_LT
:
530 return (unsigned int)AMDIL::LLT
;
531 case AMDILCC::IL_CC_L_GT
:
532 return (unsigned int)AMDIL::LGT
;
533 case AMDILCC::IL_CC_I_NE
:
534 case AMDILCC::IL_CC_U_NE
:
535 if (regClass
== AMDIL::GPRI32RegClassID
536 || regClass
== AMDIL::GPRI8RegClassID
537 || regClass
== AMDIL::GPRI16RegClassID
) {
538 return (unsigned int)AMDIL::INE
;
539 } else if (regClass
== AMDIL::GPRV2I32RegClassID
540 || regClass
== AMDIL::GPRI8RegClassID
541 || regClass
== AMDIL::GPRI16RegClassID
) {
542 return (unsigned int)AMDIL::INE_v2i32
;
543 } else if (regClass
== AMDIL::GPRV4I32RegClassID
544 || regClass
== AMDIL::GPRI8RegClassID
545 || regClass
== AMDIL::GPRI16RegClassID
) {
546 return (unsigned int)AMDIL::INE_v4i32
;
548 assert(!"Unknown reg class!");
550 case AMDILCC::IL_CC_U_GE
:
551 case AMDILCC::IL_CC_U_LE
:
552 if (regClass
== AMDIL::GPRI32RegClassID
553 || regClass
== AMDIL::GPRI8RegClassID
554 || regClass
== AMDIL::GPRI16RegClassID
) {
555 return (unsigned int)AMDIL::UGE
;
556 } else if (regClass
== AMDIL::GPRV2I32RegClassID
557 || regClass
== AMDIL::GPRI8RegClassID
558 || regClass
== AMDIL::GPRI16RegClassID
) {
559 return (unsigned int)AMDIL::UGE_v2i32
;
560 } else if (regClass
== AMDIL::GPRV4I32RegClassID
561 || regClass
== AMDIL::GPRI8RegClassID
562 || regClass
== AMDIL::GPRI16RegClassID
) {
563 return (unsigned int)AMDIL::UGE_v4i32
;
565 assert(!"Unknown reg class!");
567 case AMDILCC::IL_CC_L_NE
:
568 case AMDILCC::IL_CC_UL_NE
:
569 return (unsigned int)AMDIL::LNE
;
570 case AMDILCC::IL_CC_UL_GE
:
571 return (unsigned int)AMDIL::ULGE
;
572 case AMDILCC::IL_CC_UL_LE
:
573 return (unsigned int)AMDIL::ULLE
;
574 case AMDILCC::IL_CC_U_LT
:
575 if (regClass
== AMDIL::GPRI32RegClassID
576 || regClass
== AMDIL::GPRI8RegClassID
577 || regClass
== AMDIL::GPRI16RegClassID
) {
578 return (unsigned int)AMDIL::ULT
;
579 } else if (regClass
== AMDIL::GPRV2I32RegClassID
580 || regClass
== AMDIL::GPRI8RegClassID
581 || regClass
== AMDIL::GPRI16RegClassID
) {
582 return (unsigned int)AMDIL::ULT_v2i32
;
583 } else if (regClass
== AMDIL::GPRV4I32RegClassID
584 || regClass
== AMDIL::GPRI8RegClassID
585 || regClass
== AMDIL::GPRI16RegClassID
) {
586 return (unsigned int)AMDIL::ULT_v4i32
;
588 assert(!"Unknown reg class!");
590 case AMDILCC::IL_CC_U_GT
:
591 if (regClass
== AMDIL::GPRI32RegClassID
592 || regClass
== AMDIL::GPRI8RegClassID
593 || regClass
== AMDIL::GPRI16RegClassID
) {
594 return (unsigned int)AMDIL::UGT
;
595 } else if (regClass
== AMDIL::GPRV2I32RegClassID
596 || regClass
== AMDIL::GPRI8RegClassID
597 || regClass
== AMDIL::GPRI16RegClassID
) {
598 return (unsigned int)AMDIL::UGT_v2i32
;
599 } else if (regClass
== AMDIL::GPRV4I32RegClassID
600 || regClass
== AMDIL::GPRI8RegClassID
601 || regClass
== AMDIL::GPRI16RegClassID
) {
602 return (unsigned int)AMDIL::UGT_v4i32
;
604 assert(!"Unknown reg class!");
606 case AMDILCC::IL_CC_UL_LT
:
607 return (unsigned int)AMDIL::ULLT
;
608 case AMDILCC::IL_CC_UL_GT
:
609 return (unsigned int)AMDIL::ULGT
;
610 case AMDILCC::IL_CC_F_UEQ
:
611 case AMDILCC::IL_CC_D_UEQ
:
612 case AMDILCC::IL_CC_F_ONE
:
613 case AMDILCC::IL_CC_D_ONE
:
614 case AMDILCC::IL_CC_F_O
:
615 case AMDILCC::IL_CC_F_UO
:
616 case AMDILCC::IL_CC_D_O
:
617 case AMDILCC::IL_CC_D_UO
:
622 errs()<<"Opcode: "<<CCCode
<<"\n";
623 assert(0 && "Unknown opcode retrieved");
627 /// Helper function used by LowerFormalArguments
628 static const TargetRegisterClass
*
629 getRegClassFromType(unsigned int type
) {
632 assert(0 && "Passed in type does not match any register classes.");
634 return &AMDIL::GPRI8RegClass
;
636 return &AMDIL::GPRI16RegClass
;
638 return &AMDIL::GPRI32RegClass
;
640 return &AMDIL::GPRF32RegClass
;
642 return &AMDIL::GPRI64RegClass
;
644 return &AMDIL::GPRF64RegClass
;
646 return &AMDIL::GPRV4F32RegClass
;
648 return &AMDIL::GPRV4I8RegClass
;
650 return &AMDIL::GPRV4I16RegClass
;
652 return &AMDIL::GPRV4I32RegClass
;
654 return &AMDIL::GPRV2F32RegClass
;
656 return &AMDIL::GPRV2I8RegClass
;
658 return &AMDIL::GPRV2I16RegClass
;
660 return &AMDIL::GPRV2I32RegClass
;
662 return &AMDIL::GPRV2F64RegClass
;
664 return &AMDIL::GPRV2I64RegClass
;
669 AMDILTargetLowering::LowerMemArgument(
671 CallingConv::ID CallConv
,
672 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
673 DebugLoc dl
, SelectionDAG
&DAG
,
674 const CCValAssign
&VA
,
675 MachineFrameInfo
*MFI
,
678 // Create the nodes corresponding to a load from this parameter slot.
679 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
681 bool AlwaysUseMutable
= (CallConv
==CallingConv::Fast
) &&
682 getTargetMachine().Options
.GuaranteedTailCallOpt
;
683 bool isImmutable
= !AlwaysUseMutable
&& !Flags
.isByVal();
685 // FIXME: For now, all byval parameter objects are marked mutable. This can
686 // be changed with more analysis.
687 // In case of tail call optimization mark all arguments mutable. Since they
688 // could be overwritten by lowering of arguments in case of a tail call.
689 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
690 VA
.getLocMemOffset(), isImmutable
);
691 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
695 return DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
696 MachinePointerInfo::getFixedStack(FI
),
697 false, false, false, 0);
699 //===----------------------------------------------------------------------===//
700 // TargetLowering Implementation Help Functions End
701 //===----------------------------------------------------------------------===//
702 //===----------------------------------------------------------------------===//
703 // Instruction generation functions
704 //===----------------------------------------------------------------------===//
706 AMDILTargetLowering::addExtensionInstructions(
707 uint32_t reg
, bool signedShift
,
708 unsigned int simpleVT
) const
711 uint32_t LShift
, RShift
;
716 case AMDIL::GPRI8RegClassID
:
718 LShift
= AMDIL::SHL_i8
;
720 RShift
= AMDIL::SHR_i8
;
722 RShift
= AMDIL::USHR_i8
;
725 case AMDIL::GPRV2I8RegClassID
:
727 LShift
= AMDIL::SHL_v2i8
;
729 RShift
= AMDIL::SHR_v2i8
;
731 RShift
= AMDIL::USHR_v2i8
;
734 case AMDIL::GPRV4I8RegClassID
:
736 LShift
= AMDIL::SHL_v4i8
;
738 RShift
= AMDIL::SHR_v4i8
;
740 RShift
= AMDIL::USHR_v4i8
;
743 case AMDIL::GPRI16RegClassID
:
745 LShift
= AMDIL::SHL_i16
;
747 RShift
= AMDIL::SHR_i16
;
749 RShift
= AMDIL::USHR_i16
;
752 case AMDIL::GPRV2I16RegClassID
:
754 LShift
= AMDIL::SHL_v2i16
;
756 RShift
= AMDIL::SHR_v2i16
;
758 RShift
= AMDIL::USHR_v2i16
;
761 case AMDIL::GPRV4I16RegClassID
:
763 LShift
= AMDIL::SHL_v4i16
;
765 RShift
= AMDIL::SHR_v4i16
;
767 RShift
= AMDIL::USHR_v4i16
;
771 uint32_t LoadReg
= genVReg(simpleVT
);
772 uint32_t tmp1
= genVReg(simpleVT
);
773 uint32_t tmp2
= genVReg(simpleVT
);
774 generateMachineInst(AMDIL::LOADCONST_i32
, LoadReg
).addImm(shiftSize
);
775 generateMachineInst(LShift
, tmp1
, reg
, LoadReg
);
776 generateMachineInst(RShift
, tmp2
, tmp1
, LoadReg
);
781 AMDILTargetLowering::convertToReg(MachineOperand op
) const
785 } else if (op
.isImm()) {
787 = genVReg(op
.getParent()->getDesc().OpInfo
[0].RegClass
);
788 generateMachineInst(AMDIL::LOADCONST_i32
, loadReg
)
789 .addImm(op
.getImm());
790 op
.ChangeToRegister(loadReg
, false);
791 } else if (op
.isFPImm()) {
793 = genVReg(op
.getParent()->getDesc().OpInfo
[0].RegClass
);
794 generateMachineInst(AMDIL::LOADCONST_f32
, loadReg
)
795 .addFPImm(op
.getFPImm());
796 op
.ChangeToRegister(loadReg
, false);
797 } else if (op
.isMBB()) {
798 op
.ChangeToRegister(0, false);
799 } else if (op
.isFI()) {
800 op
.ChangeToRegister(0, false);
801 } else if (op
.isCPI()) {
802 op
.ChangeToRegister(0, false);
803 } else if (op
.isJTI()) {
804 op
.ChangeToRegister(0, false);
805 } else if (op
.isGlobal()) {
806 op
.ChangeToRegister(0, false);
807 } else if (op
.isSymbol()) {
808 op
.ChangeToRegister(0, false);
809 }/* else if (op.isMetadata()) {
810 op.ChangeToRegister(0, false);
816 AMDILTargetLowering::generateCMPInstr(
818 MachineBasicBlock
*BB
,
819 const TargetInstrInfo
& TII
)
822 MachineOperand DST
= MI
->getOperand(0);
823 MachineOperand CC
= MI
->getOperand(1);
824 MachineOperand LHS
= MI
->getOperand(2);
825 MachineOperand RHS
= MI
->getOperand(3);
826 int64_t ccCode
= CC
.getImm();
827 unsigned int simpleVT
= MI
->getDesc().OpInfo
[0].RegClass
;
828 unsigned int opCode
= translateToOpcode(ccCode
, simpleVT
);
829 DebugLoc DL
= MI
->getDebugLoc();
830 MachineBasicBlock::iterator BBI
= MI
;
831 setPrivateData(BB
, BBI
, &DL
, &TII
);
833 LHS
= convertToReg(LHS
);
836 RHS
= convertToReg(RHS
);
839 case AMDILCC::IL_CC_I_EQ
:
840 case AMDILCC::IL_CC_I_NE
:
841 case AMDILCC::IL_CC_I_GE
:
842 case AMDILCC::IL_CC_I_LT
:
844 uint32_t lhsreg
= addExtensionInstructions(
845 LHS
.getReg(), true, simpleVT
);
846 uint32_t rhsreg
= addExtensionInstructions(
847 RHS
.getReg(), true, simpleVT
);
848 generateMachineInst(opCode
, DST
.getReg(), lhsreg
, rhsreg
);
851 case AMDILCC::IL_CC_U_EQ
:
852 case AMDILCC::IL_CC_U_NE
:
853 case AMDILCC::IL_CC_U_GE
:
854 case AMDILCC::IL_CC_U_LT
:
855 case AMDILCC::IL_CC_D_EQ
:
856 case AMDILCC::IL_CC_F_EQ
:
857 case AMDILCC::IL_CC_F_OEQ
:
858 case AMDILCC::IL_CC_D_OEQ
:
859 case AMDILCC::IL_CC_D_NE
:
860 case AMDILCC::IL_CC_F_NE
:
861 case AMDILCC::IL_CC_F_UNE
:
862 case AMDILCC::IL_CC_D_UNE
:
863 case AMDILCC::IL_CC_D_GE
:
864 case AMDILCC::IL_CC_F_GE
:
865 case AMDILCC::IL_CC_D_OGE
:
866 case AMDILCC::IL_CC_F_OGE
:
867 case AMDILCC::IL_CC_D_LT
:
868 case AMDILCC::IL_CC_F_LT
:
869 case AMDILCC::IL_CC_F_OLT
:
870 case AMDILCC::IL_CC_D_OLT
:
871 generateMachineInst(opCode
, DST
.getReg(),
872 LHS
.getReg(), RHS
.getReg());
874 case AMDILCC::IL_CC_I_GT
:
875 case AMDILCC::IL_CC_I_LE
:
877 uint32_t lhsreg
= addExtensionInstructions(
878 LHS
.getReg(), true, simpleVT
);
879 uint32_t rhsreg
= addExtensionInstructions(
880 RHS
.getReg(), true, simpleVT
);
881 generateMachineInst(opCode
, DST
.getReg(), rhsreg
, lhsreg
);
884 case AMDILCC::IL_CC_U_GT
:
885 case AMDILCC::IL_CC_U_LE
:
886 case AMDILCC::IL_CC_F_GT
:
887 case AMDILCC::IL_CC_D_GT
:
888 case AMDILCC::IL_CC_F_OGT
:
889 case AMDILCC::IL_CC_D_OGT
:
890 case AMDILCC::IL_CC_F_LE
:
891 case AMDILCC::IL_CC_D_LE
:
892 case AMDILCC::IL_CC_D_OLE
:
893 case AMDILCC::IL_CC_F_OLE
:
894 generateMachineInst(opCode
, DST
.getReg(),
895 RHS
.getReg(), LHS
.getReg());
897 case AMDILCC::IL_CC_F_UGT
:
898 case AMDILCC::IL_CC_F_ULE
:
901 genVReg(simpleVT
), genVReg(simpleVT
),
902 genVReg(simpleVT
), genVReg(simpleVT
)
904 generateMachineInst(opCode
, VReg
[0],
905 RHS
.getReg(), LHS
.getReg());
906 generateMachineInst(AMDIL::FNE
, VReg
[1],
907 RHS
.getReg(), RHS
.getReg());
908 generateMachineInst(AMDIL::FNE
, VReg
[2],
909 LHS
.getReg(), LHS
.getReg());
910 generateMachineInst(AMDIL::BINARY_OR_f32
,
911 VReg
[3], VReg
[0], VReg
[1]);
912 generateMachineInst(AMDIL::BINARY_OR_f32
,
913 DST
.getReg(), VReg
[2], VReg
[3]);
916 case AMDILCC::IL_CC_F_ULT
:
917 case AMDILCC::IL_CC_F_UGE
:
920 genVReg(simpleVT
), genVReg(simpleVT
),
921 genVReg(simpleVT
), genVReg(simpleVT
)
923 generateMachineInst(opCode
, VReg
[0],
924 LHS
.getReg(), RHS
.getReg());
925 generateMachineInst(AMDIL::FNE
, VReg
[1],
926 RHS
.getReg(), RHS
.getReg());
927 generateMachineInst(AMDIL::FNE
, VReg
[2],
928 LHS
.getReg(), LHS
.getReg());
929 generateMachineInst(AMDIL::BINARY_OR_f32
,
930 VReg
[3], VReg
[0], VReg
[1]);
931 generateMachineInst(AMDIL::BINARY_OR_f32
,
932 DST
.getReg(), VReg
[2], VReg
[3]);
935 case AMDILCC::IL_CC_D_UGT
:
936 case AMDILCC::IL_CC_D_ULE
:
938 uint32_t regID
= AMDIL::GPRF64RegClassID
;
940 genVReg(regID
), genVReg(regID
),
941 genVReg(regID
), genVReg(regID
)
943 // The result of a double comparison is a 32bit result
944 generateMachineInst(opCode
, VReg
[0],
945 RHS
.getReg(), LHS
.getReg());
946 generateMachineInst(AMDIL::DNE
, VReg
[1],
947 RHS
.getReg(), RHS
.getReg());
948 generateMachineInst(AMDIL::DNE
, VReg
[2],
949 LHS
.getReg(), LHS
.getReg());
950 generateMachineInst(AMDIL::BINARY_OR_f32
,
951 VReg
[3], VReg
[0], VReg
[1]);
952 generateMachineInst(AMDIL::BINARY_OR_f32
,
953 DST
.getReg(), VReg
[2], VReg
[3]);
956 case AMDILCC::IL_CC_D_UGE
:
957 case AMDILCC::IL_CC_D_ULT
:
959 uint32_t regID
= AMDIL::GPRF64RegClassID
;
961 genVReg(regID
), genVReg(regID
),
962 genVReg(regID
), genVReg(regID
)
964 // The result of a double comparison is a 32bit result
965 generateMachineInst(opCode
, VReg
[0],
966 LHS
.getReg(), RHS
.getReg());
967 generateMachineInst(AMDIL::DNE
, VReg
[1],
968 RHS
.getReg(), RHS
.getReg());
969 generateMachineInst(AMDIL::DNE
, VReg
[2],
970 LHS
.getReg(), LHS
.getReg());
971 generateMachineInst(AMDIL::BINARY_OR_f32
,
972 VReg
[3], VReg
[0], VReg
[1]);
973 generateMachineInst(AMDIL::BINARY_OR_f32
,
974 DST
.getReg(), VReg
[2], VReg
[3]);
977 case AMDILCC::IL_CC_F_UEQ
:
980 genVReg(simpleVT
), genVReg(simpleVT
),
981 genVReg(simpleVT
), genVReg(simpleVT
)
983 generateMachineInst(AMDIL::FEQ
, VReg
[0],
984 LHS
.getReg(), RHS
.getReg());
985 generateMachineInst(AMDIL::FNE
, VReg
[1],
986 LHS
.getReg(), LHS
.getReg());
987 generateMachineInst(AMDIL::FNE
, VReg
[2],
988 RHS
.getReg(), RHS
.getReg());
989 generateMachineInst(AMDIL::BINARY_OR_f32
,
990 VReg
[3], VReg
[0], VReg
[1]);
991 generateMachineInst(AMDIL::BINARY_OR_f32
,
992 DST
.getReg(), VReg
[2], VReg
[3]);
995 case AMDILCC::IL_CC_F_ONE
:
998 genVReg(simpleVT
), genVReg(simpleVT
),
999 genVReg(simpleVT
), genVReg(simpleVT
)
1001 generateMachineInst(AMDIL::FNE
, VReg
[0],
1002 LHS
.getReg(), RHS
.getReg());
1003 generateMachineInst(AMDIL::FEQ
, VReg
[1],
1004 LHS
.getReg(), LHS
.getReg());
1005 generateMachineInst(AMDIL::FEQ
, VReg
[2],
1006 RHS
.getReg(), RHS
.getReg());
1007 generateMachineInst(AMDIL::BINARY_AND_f32
,
1008 VReg
[3], VReg
[0], VReg
[1]);
1009 generateMachineInst(AMDIL::BINARY_AND_f32
,
1010 DST
.getReg(), VReg
[2], VReg
[3]);
1013 case AMDILCC::IL_CC_D_UEQ
:
1015 uint32_t regID
= AMDIL::GPRF64RegClassID
;
1016 uint32_t VReg
[4] = {
1017 genVReg(regID
), genVReg(regID
),
1018 genVReg(regID
), genVReg(regID
)
1020 // The result of a double comparison is a 32bit result
1021 generateMachineInst(AMDIL::DEQ
, VReg
[0],
1022 LHS
.getReg(), RHS
.getReg());
1023 generateMachineInst(AMDIL::DNE
, VReg
[1],
1024 LHS
.getReg(), LHS
.getReg());
1025 generateMachineInst(AMDIL::DNE
, VReg
[2],
1026 RHS
.getReg(), RHS
.getReg());
1027 generateMachineInst(AMDIL::BINARY_OR_f32
,
1028 VReg
[3], VReg
[0], VReg
[1]);
1029 generateMachineInst(AMDIL::BINARY_OR_f32
,
1030 DST
.getReg(), VReg
[2], VReg
[3]);
1034 case AMDILCC::IL_CC_D_ONE
:
1036 uint32_t regID
= AMDIL::GPRF64RegClassID
;
1037 uint32_t VReg
[4] = {
1038 genVReg(regID
), genVReg(regID
),
1039 genVReg(regID
), genVReg(regID
)
1041 // The result of a double comparison is a 32bit result
1042 generateMachineInst(AMDIL::DNE
, VReg
[0],
1043 LHS
.getReg(), RHS
.getReg());
1044 generateMachineInst(AMDIL::DEQ
, VReg
[1],
1045 LHS
.getReg(), LHS
.getReg());
1046 generateMachineInst(AMDIL::DEQ
, VReg
[2],
1047 RHS
.getReg(), RHS
.getReg());
1048 generateMachineInst(AMDIL::BINARY_AND_f32
,
1049 VReg
[3], VReg
[0], VReg
[1]);
1050 generateMachineInst(AMDIL::BINARY_AND_f32
,
1051 DST
.getReg(), VReg
[2], VReg
[3]);
1055 case AMDILCC::IL_CC_F_O
:
1057 uint32_t VReg
[2] = { genVReg(simpleVT
), genVReg(simpleVT
) };
1058 generateMachineInst(AMDIL::FEQ
, VReg
[0],
1059 RHS
.getReg(), RHS
.getReg());
1060 generateMachineInst(AMDIL::FEQ
, VReg
[1],
1061 LHS
.getReg(), LHS
.getReg());
1062 generateMachineInst(AMDIL::BINARY_AND_f32
,
1063 DST
.getReg(), VReg
[0], VReg
[1]);
1066 case AMDILCC::IL_CC_D_O
:
1068 uint32_t regID
= AMDIL::GPRF64RegClassID
;
1069 uint32_t VReg
[2] = { genVReg(regID
), genVReg(regID
) };
1070 // The result of a double comparison is a 32bit result
1071 generateMachineInst(AMDIL::DEQ
, VReg
[0],
1072 RHS
.getReg(), RHS
.getReg());
1073 generateMachineInst(AMDIL::DEQ
, VReg
[1],
1074 LHS
.getReg(), LHS
.getReg());
1075 generateMachineInst(AMDIL::BINARY_AND_f32
,
1076 DST
.getReg(), VReg
[0], VReg
[1]);
1079 case AMDILCC::IL_CC_F_UO
:
1081 uint32_t VReg
[2] = { genVReg(simpleVT
), genVReg(simpleVT
) };
1082 generateMachineInst(AMDIL::FNE
, VReg
[0],
1083 RHS
.getReg(), RHS
.getReg());
1084 generateMachineInst(AMDIL::FNE
, VReg
[1],
1085 LHS
.getReg(), LHS
.getReg());
1086 generateMachineInst(AMDIL::BINARY_OR_f32
,
1087 DST
.getReg(), VReg
[0], VReg
[1]);
1090 case AMDILCC::IL_CC_D_UO
:
1092 uint32_t regID
= AMDIL::GPRF64RegClassID
;
1093 uint32_t VReg
[2] = { genVReg(regID
), genVReg(regID
) };
1094 // The result of a double comparison is a 32bit result
1095 generateMachineInst(AMDIL::DNE
, VReg
[0],
1096 RHS
.getReg(), RHS
.getReg());
1097 generateMachineInst(AMDIL::DNE
, VReg
[1],
1098 LHS
.getReg(), LHS
.getReg());
1099 generateMachineInst(AMDIL::BINARY_OR_f32
,
1100 DST
.getReg(), VReg
[0], VReg
[1]);
1103 case AMDILCC::IL_CC_L_LE
:
1104 case AMDILCC::IL_CC_L_GE
:
1105 case AMDILCC::IL_CC_L_EQ
:
1106 case AMDILCC::IL_CC_L_NE
:
1107 case AMDILCC::IL_CC_L_LT
:
1108 case AMDILCC::IL_CC_L_GT
:
1109 case AMDILCC::IL_CC_UL_LE
:
1110 case AMDILCC::IL_CC_UL_GE
:
1111 case AMDILCC::IL_CC_UL_EQ
:
1112 case AMDILCC::IL_CC_UL_NE
:
1113 case AMDILCC::IL_CC_UL_LT
:
1114 case AMDILCC::IL_CC_UL_GT
:
1116 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
1117 &this->getTargetMachine())->getSubtargetImpl();
1118 if (stm
->device()->usesHardware(AMDILDeviceInfo::LongOps
)) {
1119 generateMachineInst(opCode
, DST
.getReg(), LHS
.getReg(), RHS
.getReg());
1121 generateLongRelational(MI
, opCode
);
1125 case AMDILCC::COND_ERROR
:
1126 assert(0 && "Invalid CC code");
1131 //===----------------------------------------------------------------------===//
1132 // TargetLowering Class Implementation Begins
1133 //===----------------------------------------------------------------------===//
1134 AMDILTargetLowering::AMDILTargetLowering(TargetMachine
&TM
)
1135 : TargetLowering(TM
, new TargetLoweringObjectFileELF())
1184 size_t numTypes
= sizeof(types
) / sizeof(*types
);
1185 size_t numFloatTypes
= sizeof(FloatTypes
) / sizeof(*FloatTypes
);
1186 size_t numIntTypes
= sizeof(IntTypes
) / sizeof(*IntTypes
);
1187 size_t numVectorTypes
= sizeof(VectorTypes
) / sizeof(*VectorTypes
);
1189 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
1190 &this->getTargetMachine())->getSubtargetImpl();
1191 // These are the current register classes that are
1194 addRegisterClass(MVT::i32
, AMDIL::GPRI32RegisterClass
);
1195 addRegisterClass(MVT::f32
, AMDIL::GPRF32RegisterClass
);
1197 if (stm
->device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
1198 addRegisterClass(MVT::f64
, AMDIL::GPRF64RegisterClass
);
1199 addRegisterClass(MVT::v2f64
, AMDIL::GPRV2F64RegisterClass
);
1201 if (stm
->device()->isSupported(AMDILDeviceInfo::ByteOps
)) {
1202 addRegisterClass(MVT::i8
, AMDIL::GPRI8RegisterClass
);
1203 addRegisterClass(MVT::v2i8
, AMDIL::GPRV2I8RegisterClass
);
1204 addRegisterClass(MVT::v4i8
, AMDIL::GPRV4I8RegisterClass
);
1205 setOperationAction(ISD::Constant
, MVT::i8
, Legal
);
1207 if (stm
->device()->isSupported(AMDILDeviceInfo::ShortOps
)) {
1208 addRegisterClass(MVT::i16
, AMDIL::GPRI16RegisterClass
);
1209 addRegisterClass(MVT::v2i16
, AMDIL::GPRV2I16RegisterClass
);
1210 addRegisterClass(MVT::v4i16
, AMDIL::GPRV4I16RegisterClass
);
1211 setOperationAction(ISD::Constant
, MVT::i16
, Legal
);
1213 addRegisterClass(MVT::v2f32
, AMDIL::GPRV2F32RegisterClass
);
1214 addRegisterClass(MVT::v4f32
, AMDIL::GPRV4F32RegisterClass
);
1215 addRegisterClass(MVT::v2i32
, AMDIL::GPRV2I32RegisterClass
);
1216 addRegisterClass(MVT::v4i32
, AMDIL::GPRV4I32RegisterClass
);
1217 if (stm
->device()->isSupported(AMDILDeviceInfo::LongOps
)) {
1218 addRegisterClass(MVT::i64
, AMDIL::GPRI64RegisterClass
);
1219 addRegisterClass(MVT::v2i64
, AMDIL::GPRV2I64RegisterClass
);
1222 for (unsigned int x
= 0; x
< numTypes
; ++x
) {
1223 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)types
[x
];
1225 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
1226 // We cannot sextinreg, expand to shifts
1227 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Custom
);
1228 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
, Custom
);
1229 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
1230 setOperationAction(ISD::OR
, VT
, Custom
);
1231 setOperationAction(ISD::SUBE
, VT
, Expand
);
1232 setOperationAction(ISD::SUBC
, VT
, Expand
);
1233 setOperationAction(ISD::ADD
, VT
, Custom
);
1234 setOperationAction(ISD::ADDE
, VT
, Expand
);
1235 setOperationAction(ISD::ADDC
, VT
, Expand
);
1236 setOperationAction(ISD::SETCC
, VT
, Custom
);
1237 setOperationAction(ISD::BRCOND
, VT
, Custom
);
1238 setOperationAction(ISD::BR_CC
, VT
, Custom
);
1239 setOperationAction(ISD::BR_JT
, VT
, Expand
);
1240 setOperationAction(ISD::BRIND
, VT
, Expand
);
1241 // TODO: Implement custom UREM/SREM routines
1242 setOperationAction(ISD::UREM
, VT
, Expand
);
1243 setOperationAction(ISD::SREM
, VT
, Expand
);
1244 setOperationAction(ISD::SINT_TO_FP
, VT
, Custom
);
1245 setOperationAction(ISD::UINT_TO_FP
, VT
, Custom
);
1246 setOperationAction(ISD::FP_TO_SINT
, VT
, Custom
);
1247 setOperationAction(ISD::FP_TO_UINT
, VT
, Custom
);
1248 setOperationAction(ISDBITCAST
, VT
, Custom
);
1249 setOperationAction(ISD::GlobalAddress
, VT
, Custom
);
1250 setOperationAction(ISD::JumpTable
, VT
, Custom
);
1251 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
1252 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
1253 setOperationAction(ISD::SELECT
, VT
, Custom
);
1254 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
1255 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
1256 if (VT
!= MVT::i64
&& VT
!= MVT::v2i64
) {
1257 setOperationAction(ISD::SDIV
, VT
, Custom
);
1258 setOperationAction(ISD::UDIV
, VT
, Custom
);
1260 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Custom
);
1261 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Custom
);
1263 for (unsigned int x
= 0; x
< numFloatTypes
; ++x
) {
1264 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)FloatTypes
[x
];
1266 // IL does not have these operations for floating point types
1267 setOperationAction(ISD::FP_ROUND_INREG
, VT
, Expand
);
1268 setOperationAction(ISD::FP_ROUND
, VT
, Custom
);
1269 setOperationAction(ISD::SETOLT
, VT
, Expand
);
1270 setOperationAction(ISD::SETOGE
, VT
, Expand
);
1271 setOperationAction(ISD::SETOGT
, VT
, Expand
);
1272 setOperationAction(ISD::SETOLE
, VT
, Expand
);
1273 setOperationAction(ISD::SETULT
, VT
, Expand
);
1274 setOperationAction(ISD::SETUGE
, VT
, Expand
);
1275 setOperationAction(ISD::SETUGT
, VT
, Expand
);
1276 setOperationAction(ISD::SETULE
, VT
, Expand
);
1279 for (unsigned int x
= 0; x
< numIntTypes
; ++x
) {
1280 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)IntTypes
[x
];
1282 // GPU also does not have divrem function for signed or unsigned
1283 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
1284 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
1285 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
1287 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
1288 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
1289 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
1291 // GPU doesn't have a rotl, rotr, or byteswap instruction
1292 setOperationAction(ISD::ROTR
, VT
, Expand
);
1293 setOperationAction(ISD::ROTL
, VT
, Expand
);
1294 setOperationAction(ISD::BSWAP
, VT
, Expand
);
1296 // GPU doesn't have any counting operators
1297 setOperationAction(ISD::CTPOP
, VT
, Expand
);
1298 setOperationAction(ISD::CTTZ
, VT
, Expand
);
1299 setOperationAction(ISD::CTLZ
, VT
, Expand
);
1302 for ( unsigned int ii
= 0; ii
< numVectorTypes
; ++ii
)
1304 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)VectorTypes
[ii
];
1306 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
1307 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
, Custom
);
1308 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
1309 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Expand
);
1310 setOperationAction(ISD::CONCAT_VECTORS
, VT
, Custom
);
1311 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
1312 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
1313 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
1314 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
1315 // setOperationAction(ISD::VSETCC, VT, Expand);
1316 setOperationAction(ISD::SETCC
, VT
, Expand
);
1317 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
1318 setOperationAction(ISD::SELECT
, VT
, Expand
);
1321 setOperationAction(ISD::FP_ROUND
, MVT::Other
, Expand
);
1322 if (stm
->device()->isSupported(AMDILDeviceInfo::LongOps
)) {
1323 if (stm
->calVersion() < CAL_VERSION_SC_139
1324 || stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
1325 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
1327 setOperationAction(ISD::SUB
, MVT::i64
, Custom
);
1328 setOperationAction(ISD::ADD
, MVT::i64
, Custom
);
1329 setOperationAction(ISD::MULHU
, MVT::i64
, Expand
);
1330 setOperationAction(ISD::MULHU
, MVT::v2i64
, Expand
);
1331 setOperationAction(ISD::MULHS
, MVT::i64
, Expand
);
1332 setOperationAction(ISD::MULHS
, MVT::v2i64
, Expand
);
1333 setOperationAction(ISD::MUL
, MVT::v2i64
, Expand
);
1334 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
1335 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
1336 setOperationAction(ISD::SREM
, MVT::v2i64
, Expand
);
1337 setOperationAction(ISD::Constant
, MVT::i64
, Legal
);
1338 setOperationAction(ISD::UDIV
, MVT::v2i64
, Expand
);
1339 setOperationAction(ISD::SDIV
, MVT::v2i64
, Expand
);
1340 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Expand
);
1341 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Expand
);
1342 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Expand
);
1343 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Expand
);
1344 setOperationAction(ISD::TRUNCATE
, MVT::v2i64
, Expand
);
1345 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2i64
, Expand
);
1346 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2i64
, Expand
);
1347 setOperationAction(ISD::ANY_EXTEND
, MVT::v2i64
, Expand
);
1349 if (stm
->device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
1350 // we support loading/storing v2f64 but not operations on the type
1351 setOperationAction(ISD::FADD
, MVT::v2f64
, Expand
);
1352 setOperationAction(ISD::FSUB
, MVT::v2f64
, Expand
);
1353 setOperationAction(ISD::FMUL
, MVT::v2f64
, Expand
);
1354 setOperationAction(ISD::FP_ROUND
, MVT::v2f64
, Expand
);
1355 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v2f64
, Expand
);
1356 setOperationAction(ISD::FP_EXTEND
, MVT::v2f64
, Expand
);
1357 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
1358 setOperationAction(ISD::FDIV
, MVT::v2f64
, Expand
);
1359 // We want to expand vector conversions into their scalar
1361 setOperationAction(ISD::SINT_TO_FP
, MVT::v2f64
, Expand
);
1362 setOperationAction(ISD::UINT_TO_FP
, MVT::v2f64
, Expand
);
1363 setOperationAction(ISD::FP_TO_SINT
, MVT::v2f64
, Expand
);
1364 setOperationAction(ISD::FP_TO_UINT
, MVT::v2f64
, Expand
);
1365 setOperationAction(ISD::TRUNCATE
, MVT::v2f64
, Expand
);
1366 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2f64
, Expand
);
1367 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2f64
, Expand
);
1368 setOperationAction(ISD::ANY_EXTEND
, MVT::v2f64
, Expand
);
1369 setOperationAction(ISD::FABS
, MVT::f64
, Expand
);
1370 setOperationAction(ISD::FABS
, MVT::v2f64
, Expand
);
1372 // TODO: Fix the UDIV24 algorithm so it works for these
1373 // types correctly. This needs vector comparisons
1374 // for this to work correctly.
1375 setOperationAction(ISD::UDIV
, MVT::v2i8
, Expand
);
1376 setOperationAction(ISD::UDIV
, MVT::v4i8
, Expand
);
1377 setOperationAction(ISD::UDIV
, MVT::v2i16
, Expand
);
1378 setOperationAction(ISD::UDIV
, MVT::v4i16
, Expand
);
1379 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Custom
);
1380 setOperationAction(ISD::SUBC
, MVT::Other
, Expand
);
1381 setOperationAction(ISD::ADDE
, MVT::Other
, Expand
);
1382 setOperationAction(ISD::ADDC
, MVT::Other
, Expand
);
1383 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
1384 setOperationAction(ISD::BR_CC
, MVT::Other
, Custom
);
1385 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
1386 setOperationAction(ISD::BRIND
, MVT::Other
, Expand
);
1387 setOperationAction(ISD::SETCC
, MVT::Other
, Custom
);
1388 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
1389 setOperationAction(ISD::FDIV
, MVT::f32
, Custom
);
1390 setOperationAction(ISD::FDIV
, MVT::v2f32
, Custom
);
1391 setOperationAction(ISD::FDIV
, MVT::v4f32
, Custom
);
1393 setOperationAction(ISD::BUILD_VECTOR
, MVT::Other
, Custom
);
1394 // Use the default implementation.
1395 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
1396 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
1397 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
1398 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
1399 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
1400 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
1401 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
1402 setOperationAction(ISD::Constant
, MVT::i32
, Legal
);
1403 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
1405 setStackPointerRegisterToSaveRestore(AMDIL::SP
);
1406 setSchedulingPreference(Sched::RegPressure
);
1407 setPow2DivIsCheap(false);
1408 setPrefLoopAlignment(16);
1409 setSelectIsExpensive(true);
1410 setJumpIsExpensive(true);
1411 computeRegisterProperties();
1413 maxStoresPerMemcpy
= 4096;
1414 maxStoresPerMemmove
= 4096;
1415 maxStoresPerMemset
= 4096;
1419 #undef numVectorTypes
1420 #undef numFloatTypes
1424 AMDILTargetLowering::getTargetNodeName(unsigned Opcode
) const
1428 case AMDILISD::INTTOANY
: return "AMDILISD::INTTOANY";
1429 case AMDILISD::DP_TO_FP
: return "AMDILISD::DP_TO_FP";
1430 case AMDILISD::FP_TO_DP
: return "AMDILISD::FP_TO_DP";
1431 case AMDILISD::BITCONV
: return "AMDILISD::BITCONV";
1432 case AMDILISD::CMOV
: return "AMDILISD::CMOV";
1433 case AMDILISD::CMOVLOG
: return "AMDILISD::CMOVLOG";
1434 case AMDILISD::INEGATE
: return "AMDILISD::INEGATE";
1435 case AMDILISD::MAD
: return "AMDILISD::MAD";
1436 case AMDILISD::UMAD
: return "AMDILISD::UMAD";
1437 case AMDILISD::CALL
: return "AMDILISD::CALL";
1438 case AMDILISD::RET
: return "AMDILISD::RET";
1439 case AMDILISD::IFFB_HI
: return "AMDILISD::IFFB_HI";
1440 case AMDILISD::IFFB_LO
: return "AMDILISD::IFFB_LO";
1441 case AMDILISD::ADD
: return "AMDILISD::ADD";
1442 case AMDILISD::UMUL
: return "AMDILISD::UMUL";
1443 case AMDILISD::AND
: return "AMDILISD::AND";
1444 case AMDILISD::OR
: return "AMDILISD::OR";
1445 case AMDILISD::NOT
: return "AMDILISD::NOT";
1446 case AMDILISD::XOR
: return "AMDILISD::XOR";
1447 case AMDILISD::DIV_INF
: return "AMDILISD::DIV_INF";
1448 case AMDILISD::SMAX
: return "AMDILISD::SMAX";
1449 case AMDILISD::PHIMOVE
: return "AMDILISD::PHIMOVE";
1450 case AMDILISD::MOVE
: return "AMDILISD::MOVE";
1451 case AMDILISD::VBUILD
: return "AMDILISD::VBUILD";
1452 case AMDILISD::VEXTRACT
: return "AMDILISD::VEXTRACT";
1453 case AMDILISD::VINSERT
: return "AMDILISD::VINSERT";
1454 case AMDILISD::VCONCAT
: return "AMDILISD::VCONCAT";
1455 case AMDILISD::LCREATE
: return "AMDILISD::LCREATE";
1456 case AMDILISD::LCOMPHI
: return "AMDILISD::LCOMPHI";
1457 case AMDILISD::LCOMPLO
: return "AMDILISD::LCOMPLO";
1458 case AMDILISD::DCREATE
: return "AMDILISD::DCREATE";
1459 case AMDILISD::DCOMPHI
: return "AMDILISD::DCOMPHI";
1460 case AMDILISD::DCOMPLO
: return "AMDILISD::DCOMPLO";
1461 case AMDILISD::LCREATE2
: return "AMDILISD::LCREATE2";
1462 case AMDILISD::LCOMPHI2
: return "AMDILISD::LCOMPHI2";
1463 case AMDILISD::LCOMPLO2
: return "AMDILISD::LCOMPLO2";
1464 case AMDILISD::DCREATE2
: return "AMDILISD::DCREATE2";
1465 case AMDILISD::DCOMPHI2
: return "AMDILISD::DCOMPHI2";
1466 case AMDILISD::DCOMPLO2
: return "AMDILISD::DCOMPLO2";
1467 case AMDILISD::CMP
: return "AMDILISD::CMP";
1468 case AMDILISD::IL_CC_I_LT
: return "AMDILISD::IL_CC_I_LT";
1469 case AMDILISD::IL_CC_I_LE
: return "AMDILISD::IL_CC_I_LE";
1470 case AMDILISD::IL_CC_I_GT
: return "AMDILISD::IL_CC_I_GT";
1471 case AMDILISD::IL_CC_I_GE
: return "AMDILISD::IL_CC_I_GE";
1472 case AMDILISD::IL_CC_I_EQ
: return "AMDILISD::IL_CC_I_EQ";
1473 case AMDILISD::IL_CC_I_NE
: return "AMDILISD::IL_CC_I_NE";
1474 case AMDILISD::RET_FLAG
: return "AMDILISD::RET_FLAG";
1475 case AMDILISD::BRANCH_COND
: return "AMDILISD::BRANCH_COND";
1476 case AMDILISD::LOOP_NZERO
: return "AMDILISD::LOOP_NZERO";
1477 case AMDILISD::LOOP_ZERO
: return "AMDILISD::LOOP_ZERO";
1478 case AMDILISD::LOOP_CMP
: return "AMDILISD::LOOP_CMP";
1479 case AMDILISD::ADDADDR
: return "AMDILISD::ADDADDR";
1480 case AMDILISD::ATOM_G_ADD
: return "AMDILISD::ATOM_G_ADD";
1481 case AMDILISD::ATOM_G_AND
: return "AMDILISD::ATOM_G_AND";
1482 case AMDILISD::ATOM_G_CMPXCHG
: return "AMDILISD::ATOM_G_CMPXCHG";
1483 case AMDILISD::ATOM_G_DEC
: return "AMDILISD::ATOM_G_DEC";
1484 case AMDILISD::ATOM_G_INC
: return "AMDILISD::ATOM_G_INC";
1485 case AMDILISD::ATOM_G_MAX
: return "AMDILISD::ATOM_G_MAX";
1486 case AMDILISD::ATOM_G_UMAX
: return "AMDILISD::ATOM_G_UMAX";
1487 case AMDILISD::ATOM_G_MIN
: return "AMDILISD::ATOM_G_MIN";
1488 case AMDILISD::ATOM_G_UMIN
: return "AMDILISD::ATOM_G_UMIN";
1489 case AMDILISD::ATOM_G_OR
: return "AMDILISD::ATOM_G_OR";
1490 case AMDILISD::ATOM_G_SUB
: return "AMDILISD::ATOM_G_SUB";
1491 case AMDILISD::ATOM_G_RSUB
: return "AMDILISD::ATOM_G_RSUB";
1492 case AMDILISD::ATOM_G_XCHG
: return "AMDILISD::ATOM_G_XCHG";
1493 case AMDILISD::ATOM_G_XOR
: return "AMDILISD::ATOM_G_XOR";
1494 case AMDILISD::ATOM_G_ADD_NORET
: return "AMDILISD::ATOM_G_ADD_NORET";
1495 case AMDILISD::ATOM_G_AND_NORET
: return "AMDILISD::ATOM_G_AND_NORET";
1496 case AMDILISD::ATOM_G_CMPXCHG_NORET
: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
1497 case AMDILISD::ATOM_G_DEC_NORET
: return "AMDILISD::ATOM_G_DEC_NORET";
1498 case AMDILISD::ATOM_G_INC_NORET
: return "AMDILISD::ATOM_G_INC_NORET";
1499 case AMDILISD::ATOM_G_MAX_NORET
: return "AMDILISD::ATOM_G_MAX_NORET";
1500 case AMDILISD::ATOM_G_UMAX_NORET
: return "AMDILISD::ATOM_G_UMAX_NORET";
1501 case AMDILISD::ATOM_G_MIN_NORET
: return "AMDILISD::ATOM_G_MIN_NORET";
1502 case AMDILISD::ATOM_G_UMIN_NORET
: return "AMDILISD::ATOM_G_UMIN_NORET";
1503 case AMDILISD::ATOM_G_OR_NORET
: return "AMDILISD::ATOM_G_OR_NORET";
1504 case AMDILISD::ATOM_G_SUB_NORET
: return "AMDILISD::ATOM_G_SUB_NORET";
1505 case AMDILISD::ATOM_G_RSUB_NORET
: return "AMDILISD::ATOM_G_RSUB_NORET";
1506 case AMDILISD::ATOM_G_XCHG_NORET
: return "AMDILISD::ATOM_G_XCHG_NORET";
1507 case AMDILISD::ATOM_G_XOR_NORET
: return "AMDILISD::ATOM_G_XOR_NORET";
1508 case AMDILISD::ATOM_L_ADD
: return "AMDILISD::ATOM_L_ADD";
1509 case AMDILISD::ATOM_L_AND
: return "AMDILISD::ATOM_L_AND";
1510 case AMDILISD::ATOM_L_CMPXCHG
: return "AMDILISD::ATOM_L_CMPXCHG";
1511 case AMDILISD::ATOM_L_DEC
: return "AMDILISD::ATOM_L_DEC";
1512 case AMDILISD::ATOM_L_INC
: return "AMDILISD::ATOM_L_INC";
1513 case AMDILISD::ATOM_L_MAX
: return "AMDILISD::ATOM_L_MAX";
1514 case AMDILISD::ATOM_L_UMAX
: return "AMDILISD::ATOM_L_UMAX";
1515 case AMDILISD::ATOM_L_MIN
: return "AMDILISD::ATOM_L_MIN";
1516 case AMDILISD::ATOM_L_UMIN
: return "AMDILISD::ATOM_L_UMIN";
1517 case AMDILISD::ATOM_L_OR
: return "AMDILISD::ATOM_L_OR";
1518 case AMDILISD::ATOM_L_SUB
: return "AMDILISD::ATOM_L_SUB";
1519 case AMDILISD::ATOM_L_RSUB
: return "AMDILISD::ATOM_L_RSUB";
1520 case AMDILISD::ATOM_L_XCHG
: return "AMDILISD::ATOM_L_XCHG";
1521 case AMDILISD::ATOM_L_XOR
: return "AMDILISD::ATOM_L_XOR";
1522 case AMDILISD::ATOM_L_ADD_NORET
: return "AMDILISD::ATOM_L_ADD_NORET";
1523 case AMDILISD::ATOM_L_AND_NORET
: return "AMDILISD::ATOM_L_AND_NORET";
1524 case AMDILISD::ATOM_L_CMPXCHG_NORET
: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
1525 case AMDILISD::ATOM_L_DEC_NORET
: return "AMDILISD::ATOM_L_DEC_NORET";
1526 case AMDILISD::ATOM_L_INC_NORET
: return "AMDILISD::ATOM_L_INC_NORET";
1527 case AMDILISD::ATOM_L_MAX_NORET
: return "AMDILISD::ATOM_L_MAX_NORET";
1528 case AMDILISD::ATOM_L_UMAX_NORET
: return "AMDILISD::ATOM_L_UMAX_NORET";
1529 case AMDILISD::ATOM_L_MIN_NORET
: return "AMDILISD::ATOM_L_MIN_NORET";
1530 case AMDILISD::ATOM_L_UMIN_NORET
: return "AMDILISD::ATOM_L_UMIN_NORET";
1531 case AMDILISD::ATOM_L_OR_NORET
: return "AMDILISD::ATOM_L_OR_NORET";
1532 case AMDILISD::ATOM_L_SUB_NORET
: return "AMDILISD::ATOM_L_SUB_NORET";
1533 case AMDILISD::ATOM_L_RSUB_NORET
: return "AMDILISD::ATOM_L_RSUB_NORET";
1534 case AMDILISD::ATOM_L_XCHG_NORET
: return "AMDILISD::ATOM_L_XCHG_NORET";
1535 case AMDILISD::ATOM_R_ADD
: return "AMDILISD::ATOM_R_ADD";
1536 case AMDILISD::ATOM_R_AND
: return "AMDILISD::ATOM_R_AND";
1537 case AMDILISD::ATOM_R_CMPXCHG
: return "AMDILISD::ATOM_R_CMPXCHG";
1538 case AMDILISD::ATOM_R_DEC
: return "AMDILISD::ATOM_R_DEC";
1539 case AMDILISD::ATOM_R_INC
: return "AMDILISD::ATOM_R_INC";
1540 case AMDILISD::ATOM_R_MAX
: return "AMDILISD::ATOM_R_MAX";
1541 case AMDILISD::ATOM_R_UMAX
: return "AMDILISD::ATOM_R_UMAX";
1542 case AMDILISD::ATOM_R_MIN
: return "AMDILISD::ATOM_R_MIN";
1543 case AMDILISD::ATOM_R_UMIN
: return "AMDILISD::ATOM_R_UMIN";
1544 case AMDILISD::ATOM_R_OR
: return "AMDILISD::ATOM_R_OR";
1545 case AMDILISD::ATOM_R_MSKOR
: return "AMDILISD::ATOM_R_MSKOR";
1546 case AMDILISD::ATOM_R_SUB
: return "AMDILISD::ATOM_R_SUB";
1547 case AMDILISD::ATOM_R_RSUB
: return "AMDILISD::ATOM_R_RSUB";
1548 case AMDILISD::ATOM_R_XCHG
: return "AMDILISD::ATOM_R_XCHG";
1549 case AMDILISD::ATOM_R_XOR
: return "AMDILISD::ATOM_R_XOR";
1550 case AMDILISD::ATOM_R_ADD_NORET
: return "AMDILISD::ATOM_R_ADD_NORET";
1551 case AMDILISD::ATOM_R_AND_NORET
: return "AMDILISD::ATOM_R_AND_NORET";
1552 case AMDILISD::ATOM_R_CMPXCHG_NORET
: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1553 case AMDILISD::ATOM_R_DEC_NORET
: return "AMDILISD::ATOM_R_DEC_NORET";
1554 case AMDILISD::ATOM_R_INC_NORET
: return "AMDILISD::ATOM_R_INC_NORET";
1555 case AMDILISD::ATOM_R_MAX_NORET
: return "AMDILISD::ATOM_R_MAX_NORET";
1556 case AMDILISD::ATOM_R_UMAX_NORET
: return "AMDILISD::ATOM_R_UMAX_NORET";
1557 case AMDILISD::ATOM_R_MIN_NORET
: return "AMDILISD::ATOM_R_MIN_NORET";
1558 case AMDILISD::ATOM_R_UMIN_NORET
: return "AMDILISD::ATOM_R_UMIN_NORET";
1559 case AMDILISD::ATOM_R_OR_NORET
: return "AMDILISD::ATOM_R_OR_NORET";
1560 case AMDILISD::ATOM_R_MSKOR_NORET
: return "AMDILISD::ATOM_R_MSKOR_NORET";
1561 case AMDILISD::ATOM_R_SUB_NORET
: return "AMDILISD::ATOM_R_SUB_NORET";
1562 case AMDILISD::ATOM_R_RSUB_NORET
: return "AMDILISD::ATOM_R_RSUB_NORET";
1563 case AMDILISD::ATOM_R_XCHG_NORET
: return "AMDILISD::ATOM_R_XCHG_NORET";
1564 case AMDILISD::ATOM_R_XOR_NORET
: return "AMDILISD::ATOM_R_XOR_NORET";
1565 case AMDILISD::APPEND_ALLOC
: return "AMDILISD::APPEND_ALLOC";
1566 case AMDILISD::APPEND_ALLOC_NORET
: return "AMDILISD::APPEND_ALLOC_NORET";
1567 case AMDILISD::APPEND_CONSUME
: return "AMDILISD::APPEND_CONSUME";
1568 case AMDILISD::APPEND_CONSUME_NORET
: return "AMDILISD::APPEND_CONSUME_NORET";
1569 case AMDILISD::IMAGE2D_READ
: return "AMDILISD::IMAGE2D_READ";
1570 case AMDILISD::IMAGE2D_WRITE
: return "AMDILISD::IMAGE2D_WRITE";
1571 case AMDILISD::IMAGE2D_INFO0
: return "AMDILISD::IMAGE2D_INFO0";
1572 case AMDILISD::IMAGE2D_INFO1
: return "AMDILISD::IMAGE2D_INFO1";
1573 case AMDILISD::IMAGE3D_READ
: return "AMDILISD::IMAGE3D_READ";
1574 case AMDILISD::IMAGE3D_WRITE
: return "AMDILISD::IMAGE3D_WRITE";
1575 case AMDILISD::IMAGE3D_INFO0
: return "AMDILISD::IMAGE3D_INFO0";
1576 case AMDILISD::IMAGE3D_INFO1
: return "AMDILISD::IMAGE3D_INFO1";
1581 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
1582 const CallInst
&I
, unsigned Intrinsic
) const
1584 if (Intrinsic
<= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1585 || Intrinsic
> AMDGPUIntrinsic::num_AMDIL_intrinsics
) {
1588 bool bitCastToInt
= false;
1591 const AMDILSubtarget
*STM
= &this->getTargetMachine()
1592 .getSubtarget
<AMDILSubtarget
>();
1593 switch (Intrinsic
) {
1594 default: return false; // Don't custom lower most intrinsics.
1595 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32
:
1596 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32
:
1597 IntNo
= AMDILISD::ATOM_G_ADD
; break;
1598 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret
:
1599 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret
:
1601 IntNo
= AMDILISD::ATOM_G_ADD_NORET
; break;
1602 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32
:
1603 case AMDGPUIntrinsic::AMDIL_atomic_add_li32
:
1604 IntNo
= AMDILISD::ATOM_L_ADD
; break;
1605 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret
:
1606 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret
:
1608 IntNo
= AMDILISD::ATOM_L_ADD_NORET
; break;
1609 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32
:
1610 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32
:
1611 IntNo
= AMDILISD::ATOM_R_ADD
; break;
1612 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret
:
1613 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret
:
1615 IntNo
= AMDILISD::ATOM_R_ADD_NORET
; break;
1616 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32
:
1617 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32
:
1618 IntNo
= AMDILISD::ATOM_G_AND
; break;
1619 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret
:
1620 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret
:
1622 IntNo
= AMDILISD::ATOM_G_AND_NORET
; break;
1623 case AMDGPUIntrinsic::AMDIL_atomic_and_li32
:
1624 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32
:
1625 IntNo
= AMDILISD::ATOM_L_AND
; break;
1626 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret
:
1627 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret
:
1629 IntNo
= AMDILISD::ATOM_L_AND_NORET
; break;
1630 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32
:
1631 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32
:
1632 IntNo
= AMDILISD::ATOM_R_AND
; break;
1633 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret
:
1634 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret
:
1636 IntNo
= AMDILISD::ATOM_R_AND_NORET
; break;
1637 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32
:
1638 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32
:
1639 IntNo
= AMDILISD::ATOM_G_CMPXCHG
; break;
1640 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret
:
1641 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret
:
1643 IntNo
= AMDILISD::ATOM_G_CMPXCHG_NORET
; break;
1644 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32
:
1645 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32
:
1646 IntNo
= AMDILISD::ATOM_L_CMPXCHG
; break;
1647 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret
:
1648 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret
:
1650 IntNo
= AMDILISD::ATOM_L_CMPXCHG_NORET
; break;
1651 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32
:
1652 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32
:
1653 IntNo
= AMDILISD::ATOM_R_CMPXCHG
; break;
1654 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret
:
1655 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret
:
1657 IntNo
= AMDILISD::ATOM_R_CMPXCHG_NORET
; break;
1658 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32
:
1659 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32
:
1660 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1661 IntNo
= AMDILISD::ATOM_G_DEC
;
1663 IntNo
= AMDILISD::ATOM_G_SUB
;
1666 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret
:
1667 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret
:
1669 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1670 IntNo
= AMDILISD::ATOM_G_DEC_NORET
;
1672 IntNo
= AMDILISD::ATOM_G_SUB_NORET
;
1675 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32
:
1676 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32
:
1677 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1678 IntNo
= AMDILISD::ATOM_L_DEC
;
1680 IntNo
= AMDILISD::ATOM_L_SUB
;
1683 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret
:
1684 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret
:
1686 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1687 IntNo
= AMDILISD::ATOM_L_DEC_NORET
;
1689 IntNo
= AMDILISD::ATOM_L_SUB_NORET
;
1692 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32
:
1693 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32
:
1694 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1695 IntNo
= AMDILISD::ATOM_R_DEC
;
1697 IntNo
= AMDILISD::ATOM_R_SUB
;
1700 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret
:
1701 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret
:
1703 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1704 IntNo
= AMDILISD::ATOM_R_DEC_NORET
;
1706 IntNo
= AMDILISD::ATOM_R_SUB_NORET
;
1709 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32
:
1710 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32
:
1711 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1712 IntNo
= AMDILISD::ATOM_G_INC
;
1714 IntNo
= AMDILISD::ATOM_G_ADD
;
1717 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret
:
1718 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret
:
1720 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1721 IntNo
= AMDILISD::ATOM_G_INC_NORET
;
1723 IntNo
= AMDILISD::ATOM_G_ADD_NORET
;
1726 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32
:
1727 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32
:
1728 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1729 IntNo
= AMDILISD::ATOM_L_INC
;
1731 IntNo
= AMDILISD::ATOM_L_ADD
;
1734 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret
:
1735 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret
:
1737 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1738 IntNo
= AMDILISD::ATOM_L_INC_NORET
;
1740 IntNo
= AMDILISD::ATOM_L_ADD_NORET
;
1743 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32
:
1744 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32
:
1745 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1746 IntNo
= AMDILISD::ATOM_R_INC
;
1748 IntNo
= AMDILISD::ATOM_R_ADD
;
1751 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret
:
1752 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret
:
1754 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1755 IntNo
= AMDILISD::ATOM_R_INC_NORET
;
1757 IntNo
= AMDILISD::ATOM_R_ADD_NORET
;
1760 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32
:
1761 IntNo
= AMDILISD::ATOM_G_MAX
; break;
1762 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32
:
1763 IntNo
= AMDILISD::ATOM_G_UMAX
; break;
1764 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret
:
1766 IntNo
= AMDILISD::ATOM_G_MAX_NORET
; break;
1767 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret
:
1769 IntNo
= AMDILISD::ATOM_G_UMAX_NORET
; break;
1770 case AMDGPUIntrinsic::AMDIL_atomic_max_li32
:
1771 IntNo
= AMDILISD::ATOM_L_MAX
; break;
1772 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32
:
1773 IntNo
= AMDILISD::ATOM_L_UMAX
; break;
1774 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret
:
1776 IntNo
= AMDILISD::ATOM_L_MAX_NORET
; break;
1777 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret
:
1779 IntNo
= AMDILISD::ATOM_L_UMAX_NORET
; break;
1780 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32
:
1781 IntNo
= AMDILISD::ATOM_R_MAX
; break;
1782 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32
:
1783 IntNo
= AMDILISD::ATOM_R_UMAX
; break;
1784 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret
:
1786 IntNo
= AMDILISD::ATOM_R_MAX_NORET
; break;
1787 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret
:
1789 IntNo
= AMDILISD::ATOM_R_UMAX_NORET
; break;
1790 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32
:
1791 IntNo
= AMDILISD::ATOM_G_MIN
; break;
1792 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32
:
1793 IntNo
= AMDILISD::ATOM_G_UMIN
; break;
1794 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret
:
1796 IntNo
= AMDILISD::ATOM_G_MIN_NORET
; break;
1797 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret
:
1799 IntNo
= AMDILISD::ATOM_G_UMIN_NORET
; break;
1800 case AMDGPUIntrinsic::AMDIL_atomic_min_li32
:
1801 IntNo
= AMDILISD::ATOM_L_MIN
; break;
1802 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32
:
1803 IntNo
= AMDILISD::ATOM_L_UMIN
; break;
1804 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret
:
1806 IntNo
= AMDILISD::ATOM_L_MIN_NORET
; break;
1807 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret
:
1809 IntNo
= AMDILISD::ATOM_L_UMIN_NORET
; break;
1810 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32
:
1811 IntNo
= AMDILISD::ATOM_R_MIN
; break;
1812 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32
:
1813 IntNo
= AMDILISD::ATOM_R_UMIN
; break;
1814 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret
:
1816 IntNo
= AMDILISD::ATOM_R_MIN_NORET
; break;
1817 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret
:
1819 IntNo
= AMDILISD::ATOM_R_UMIN_NORET
; break;
1820 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32
:
1821 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32
:
1822 IntNo
= AMDILISD::ATOM_G_OR
; break;
1823 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret
:
1824 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret
:
1826 IntNo
= AMDILISD::ATOM_G_OR_NORET
; break;
1827 case AMDGPUIntrinsic::AMDIL_atomic_or_li32
:
1828 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32
:
1829 IntNo
= AMDILISD::ATOM_L_OR
; break;
1830 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret
:
1831 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret
:
1833 IntNo
= AMDILISD::ATOM_L_OR_NORET
; break;
1834 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32
:
1835 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32
:
1836 IntNo
= AMDILISD::ATOM_R_OR
; break;
1837 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret
:
1838 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret
:
1840 IntNo
= AMDILISD::ATOM_R_OR_NORET
; break;
1841 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32
:
1842 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32
:
1843 IntNo
= AMDILISD::ATOM_G_SUB
; break;
1844 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret
:
1845 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret
:
1847 IntNo
= AMDILISD::ATOM_G_SUB_NORET
; break;
1848 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32
:
1849 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32
:
1850 IntNo
= AMDILISD::ATOM_L_SUB
; break;
1851 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret
:
1852 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret
:
1854 IntNo
= AMDILISD::ATOM_L_SUB_NORET
; break;
1855 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32
:
1856 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32
:
1857 IntNo
= AMDILISD::ATOM_R_SUB
; break;
1858 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret
:
1859 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret
:
1861 IntNo
= AMDILISD::ATOM_R_SUB_NORET
; break;
1862 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32
:
1863 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32
:
1864 IntNo
= AMDILISD::ATOM_G_RSUB
; break;
1865 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret
:
1866 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret
:
1868 IntNo
= AMDILISD::ATOM_G_RSUB_NORET
; break;
1869 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32
:
1870 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32
:
1871 IntNo
= AMDILISD::ATOM_L_RSUB
; break;
1872 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret
:
1873 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret
:
1875 IntNo
= AMDILISD::ATOM_L_RSUB_NORET
; break;
1876 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32
:
1877 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32
:
1878 IntNo
= AMDILISD::ATOM_R_RSUB
; break;
1879 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret
:
1880 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret
:
1882 IntNo
= AMDILISD::ATOM_R_RSUB_NORET
; break;
1883 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32
:
1884 bitCastToInt
= true;
1885 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32
:
1886 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32
:
1887 IntNo
= AMDILISD::ATOM_G_XCHG
; break;
1888 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret
:
1889 bitCastToInt
= true;
1890 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret
:
1891 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret
:
1893 IntNo
= AMDILISD::ATOM_G_XCHG_NORET
; break;
1894 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32
:
1895 bitCastToInt
= true;
1896 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32
:
1897 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32
:
1898 IntNo
= AMDILISD::ATOM_L_XCHG
; break;
1899 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret
:
1900 bitCastToInt
= true;
1901 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret
:
1902 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret
:
1904 IntNo
= AMDILISD::ATOM_L_XCHG_NORET
; break;
1905 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32
:
1906 bitCastToInt
= true;
1907 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32
:
1908 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32
:
1909 IntNo
= AMDILISD::ATOM_R_XCHG
; break;
1910 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret
:
1911 bitCastToInt
= true;
1912 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret
:
1913 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret
:
1915 IntNo
= AMDILISD::ATOM_R_XCHG_NORET
; break;
1916 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32
:
1917 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32
:
1918 IntNo
= AMDILISD::ATOM_G_XOR
; break;
1919 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret
:
1920 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret
:
1922 IntNo
= AMDILISD::ATOM_G_XOR_NORET
; break;
1923 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32
:
1924 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32
:
1925 IntNo
= AMDILISD::ATOM_L_XOR
; break;
1926 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret
:
1927 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret
:
1929 IntNo
= AMDILISD::ATOM_L_XOR_NORET
; break;
1930 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32
:
1931 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32
:
1932 IntNo
= AMDILISD::ATOM_R_XOR
; break;
1933 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret
:
1934 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret
:
1936 IntNo
= AMDILISD::ATOM_R_XOR_NORET
; break;
1937 case AMDGPUIntrinsic::AMDIL_append_alloc_i32
:
1938 IntNo
= AMDILISD::APPEND_ALLOC
; break;
1939 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret
:
1941 IntNo
= AMDILISD::APPEND_ALLOC_NORET
; break;
1942 case AMDGPUIntrinsic::AMDIL_append_consume_i32
:
1943 IntNo
= AMDILISD::APPEND_CONSUME
; break;
1944 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret
:
1946 IntNo
= AMDILISD::APPEND_CONSUME_NORET
; break;
1950 Info
.memVT
= (bitCastToInt
) ? MVT::f32
: MVT::i32
;
1951 Info
.ptrVal
= I
.getOperand(0);
1955 Info
.readMem
= isRet
;
1956 Info
.writeMem
= true;
1959 // The backend supports 32 and 64 bit floating point immediates
1961 AMDILTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const
1963 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
1964 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
1972 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT
) const
1974 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
1975 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
1983 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1984 // be zero. Op is expected to be a target specific node. Used by DAG
1988 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1992 const SelectionDAG
&DAG
,
1993 unsigned Depth
) const
1997 KnownZero
= KnownOne
= APInt(KnownOne
.getBitWidth(), 0); // Don't know anything
1998 switch (Op
.getOpcode()) {
2000 case AMDILISD::SELECT_CC
:
2001 DAG
.ComputeMaskedBits(
2007 DAG
.ComputeMaskedBits(
2012 assert((KnownZero
& KnownOne
) == 0
2013 && "Bits known to be one AND zero?");
2014 assert((KnownZero2
& KnownOne2
) == 0
2015 && "Bits known to be one AND zero?");
2016 // Only known if known in both the LHS and RHS
2017 KnownOne
&= KnownOne2
;
2018 KnownZero
&= KnownZero2
;
2023 // This is the function that determines which calling convention should
2024 // be used. Currently there is only one calling convention
2026 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op
) const
2028 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2032 // LowerCallResult - Lower the result values of an ISD::CALL into the
2033 // appropriate copies out of appropriate physical registers. This assumes that
2034 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
2035 // being lowered. The returns a SDNode with the same number of values as the
2038 AMDILTargetLowering::LowerCallResult(
2041 CallingConv::ID CallConv
,
2043 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
2046 SmallVectorImpl
<SDValue
> &InVals
) const
2048 // Assign locations to each value returned by this call
2049 SmallVector
<CCValAssign
, 16> RVLocs
;
2050 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
2051 getTargetMachine(), RVLocs
, *DAG
.getContext());
2052 CCInfo
.AnalyzeCallResult(Ins
, RetCC_AMDIL32
);
2054 // Copy all of the result registers out of their specified physreg.
2055 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
2056 EVT CopyVT
= RVLocs
[i
].getValVT();
2057 if (RVLocs
[i
].isRegLoc()) {
2058 Chain
= DAG
.getCopyFromReg(
2061 RVLocs
[i
].getLocReg(),
2065 SDValue Val
= Chain
.getValue(0);
2066 InFlag
= Chain
.getValue(2);
2067 InVals
.push_back(Val
);
2075 //===----------------------------------------------------------------------===//
2076 // Other Lowering Hooks
2077 //===----------------------------------------------------------------------===//
2080 AMDILTargetLowering::EmitInstrWithCustomInserter(
2081 MachineInstr
*MI
, MachineBasicBlock
*BB
) const
2083 const TargetInstrInfo
&TII
= *getTargetMachine().getInstrInfo();
2084 switch (MI
->getOpcode()) {
2085 ExpandCaseToAllTypes(AMDIL::CMP
);
2086 generateCMPInstr(MI
, BB
, TII
);
2087 MI
->eraseFromParent();
2095 // Recursively assign SDNodeOrdering to any unordered nodes
2096 // This is necessary to maintain source ordering of instructions
2097 // under -O0 to avoid odd-looking "skipping around" issues.
2098 static const SDValue
2099 Ordered( SelectionDAG
&DAG
, unsigned order
, const SDValue New
)
2101 if (order
!= 0 && DAG
.GetOrdering( New
.getNode() ) == 0) {
2102 DAG
.AssignOrdering( New
.getNode(), order
);
2103 for (unsigned i
= 0, e
= New
.getNumOperands(); i
< e
; ++i
)
2104 Ordered( DAG
, order
, New
.getOperand(i
) );
2111 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
2114 AMDILTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
2116 switch (Op
.getOpcode()) {
2118 Op
.getNode()->dump();
2119 assert(0 && "Custom lowering code for this"
2120 "instruction is not implemented yet!");
2122 LOWER(GlobalAddress
);
2124 LOWER(ConstantPool
);
2125 LOWER(ExternalSymbol
);
2138 LOWER(BUILD_VECTOR
);
2139 LOWER(INSERT_VECTOR_ELT
);
2140 LOWER(EXTRACT_VECTOR_ELT
);
2141 LOWER(EXTRACT_SUBVECTOR
);
2142 LOWER(SCALAR_TO_VECTOR
);
2143 LOWER(CONCAT_VECTORS
);
2149 LOWER(SIGN_EXTEND_INREG
);
2151 LOWER(DYNAMIC_STACKALLOC
);
2160 AMDILTargetLowering::getVarArgsFrameOffset() const
2162 return VarArgsFrameOffset
;
2167 AMDILTargetLowering::LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const
2170 const GlobalAddressSDNode
*GADN
= cast
<GlobalAddressSDNode
>(Op
);
2171 const GlobalValue
*G
= GADN
->getGlobal();
2172 DebugLoc DL
= Op
.getDebugLoc();
2173 const GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(G
);
2175 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
2177 if (GV
->hasInitializer()) {
2178 const Constant
*C
= dyn_cast
<Constant
>(GV
->getInitializer());
2179 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
)) {
2180 DST
= DAG
.getConstant(CI
->getValue(), Op
.getValueType());
2181 } else if (const ConstantFP
*CF
= dyn_cast
<ConstantFP
>(C
)) {
2182 DST
= DAG
.getConstantFP(CF
->getValueAPF(),
2184 } else if (dyn_cast
<ConstantAggregateZero
>(C
)) {
2185 EVT VT
= Op
.getValueType();
2186 if (VT
.isInteger()) {
2187 DST
= DAG
.getConstant(0, VT
);
2189 DST
= DAG
.getConstantFP(0, VT
);
2192 assert(!"lowering this type of Global Address "
2193 "not implemented yet!");
2195 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
2198 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
2205 AMDILTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const
2207 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
2208 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), MVT::i32
);
2212 AMDILTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const
2214 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
2215 EVT PtrVT
= Op
.getValueType();
2217 if (CP
->isMachineConstantPoolEntry()) {
2218 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
2219 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
2221 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
2222 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
2228 AMDILTargetLowering::LowerExternalSymbol(SDValue Op
, SelectionDAG
&DAG
) const
2230 const char *Sym
= cast
<ExternalSymbolSDNode
>(Op
)->getSymbol();
2231 SDValue Result
= DAG
.getTargetExternalSymbol(Sym
, MVT::i32
);
2235 /// LowerFORMAL_ARGUMENTS - transform physical registers into
2236 /// virtual registers and generate load operations for
2237 /// arguments places on the stack.
2238 /// TODO: isVarArg, hasStructRet, isMemReg
2240 AMDILTargetLowering::LowerFormalArguments(SDValue Chain
,
2241 CallingConv::ID CallConv
,
2243 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
2246 SmallVectorImpl
<SDValue
> &InVals
)
2250 MachineFunction
&MF
= DAG
.getMachineFunction();
2251 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
2252 //const Function *Fn = MF.getFunction();
2253 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
2255 SmallVector
<CCValAssign
, 16> ArgLocs
;
2256 CallingConv::ID CC
= MF
.getFunction()->getCallingConv();
2257 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
2259 CCState
CCInfo(CC
, isVarArg
, DAG
.getMachineFunction(),
2260 getTargetMachine(), ArgLocs
, *DAG
.getContext());
2262 // When more calling conventions are added, they need to be chosen here
2263 CCInfo
.AnalyzeFormalArguments(Ins
, CC_AMDIL32
);
2266 //unsigned int FirstStackArgLoc = 0;
2268 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
2269 CCValAssign
&VA
= ArgLocs
[i
];
2270 if (VA
.isRegLoc()) {
2271 EVT RegVT
= VA
.getLocVT();
2272 const TargetRegisterClass
*RC
= getRegClassFromType(
2273 RegVT
.getSimpleVT().SimpleTy
);
2275 unsigned int Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
2276 SDValue ArgValue
= DAG
.getCopyFromReg(
2281 // If this is an 8 or 16-bit value, it is really passed
2282 // promoted to 32 bits. Insert an assert[sz]ext to capture
2283 // this, then truncate to the right size.
2285 if (VA
.getLocInfo() == CCValAssign::SExt
) {
2286 ArgValue
= DAG
.getNode(
2291 DAG
.getValueType(VA
.getValVT()));
2292 } else if (VA
.getLocInfo() == CCValAssign::ZExt
) {
2293 ArgValue
= DAG
.getNode(
2298 DAG
.getValueType(VA
.getValVT()));
2300 if (VA
.getLocInfo() != CCValAssign::Full
) {
2301 ArgValue
= DAG
.getNode(
2307 // Add the value to the list of arguments
2308 // to be passed in registers
2309 InVals
.push_back(ArgValue
);
2311 assert(0 && "Variable arguments are not yet supported");
2312 // See MipsISelLowering.cpp for ideas on how to implement
2314 } else if(VA
.isMemLoc()) {
2315 InVals
.push_back(LowerMemArgument(Chain
, CallConv
, Ins
,
2316 dl
, DAG
, VA
, MFI
, i
));
2318 assert(0 && "found a Value Assign that is "
2319 "neither a register or a memory location");
2322 /*if (hasStructRet) {
2323 assert(0 && "Has struct return is not yet implemented");
2324 // See MipsISelLowering.cpp for ideas on how to implement
2328 assert(0 && "Variable arguments are not yet supported");
2329 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
2331 // This needs to be changed to non-zero if the return function needs
2335 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2336 /// by "Src" to address "Dst" with size and alignment information specified by
2337 /// the specific parameter attribute. The copy will be passed as a byval
2338 /// function parameter.
2340 CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
, SDValue Chain
,
2341 ISD::ArgFlagsTy Flags
, SelectionDAG
&DAG
) {
2342 assert(0 && "MemCopy does not exist yet");
2343 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), MVT::i32
);
2345 return DAG
.getMemcpy(Chain
,
2347 Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
2348 /*IsVol=*/false, /*AlwaysInline=*/true,
2349 MachinePointerInfo(), MachinePointerInfo());
2353 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain
,
2354 SDValue StackPtr
, SDValue Arg
,
2355 DebugLoc dl
, SelectionDAG
&DAG
,
2356 const CCValAssign
&VA
,
2357 ISD::ArgFlagsTy Flags
) const
2359 unsigned int LocMemOffset
= VA
.getLocMemOffset();
2360 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
2361 PtrOff
= DAG
.getNode(ISD::ADD
,
2363 getPointerTy(), StackPtr
, PtrOff
);
2364 if (Flags
.isByVal()) {
2365 PtrOff
= CreateCopyOfByValArgument(Arg
, PtrOff
, Chain
, Flags
, DAG
);
2367 PtrOff
= DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
2368 MachinePointerInfo::getStack(LocMemOffset
),
2373 /// LowerCAL - functions arguments are copied from virtual
2374 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
2375 /// CALLSEQ_END are emitted.
2376 /// TODO: isVarArg, isTailCall, hasStructRet
2378 AMDILTargetLowering::LowerCall(SDValue Chain
, SDValue Callee
,
2379 CallingConv::ID CallConv
, bool isVarArg
, bool doesNotRet
,
2381 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
2382 const SmallVectorImpl
<SDValue
> &OutVals
,
2383 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
2384 DebugLoc dl
, SelectionDAG
&DAG
,
2385 SmallVectorImpl
<SDValue
> &InVals
)
2389 MachineFunction
& MF
= DAG
.getMachineFunction();
2390 // FIXME: DO we need to handle fast calling conventions and tail call
2391 // optimizations?? X86/PPC ISelLowering
2392 /*bool hasStructRet = (TheCall->getNumArgs())
2393 ? TheCall->getArgFlags(0).device()->isSRet()
2396 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
2398 // Analyze operands of the call, assigning locations to each operand
2399 SmallVector
<CCValAssign
, 16> ArgLocs
;
2400 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
2401 getTargetMachine(), ArgLocs
, *DAG
.getContext());
2402 // Analyize the calling operands, but need to change
2403 // if we have more than one calling convetion
2404 CCInfo
.AnalyzeCallOperands(Outs
, CCAssignFnForNode(CallConv
));
2406 unsigned int NumBytes
= CCInfo
.getNextStackOffset();
2408 assert(isTailCall
&& "Tail Call not handled yet!");
2409 // See X86/PPC ISelLowering
2412 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
2414 SmallVector
<std::pair
<unsigned int, SDValue
>, 8> RegsToPass
;
2415 SmallVector
<SDValue
, 8> MemOpChains
;
2417 //unsigned int FirstStacArgLoc = 0;
2418 //int LastArgStackLoc = 0;
2420 // Walk the register/memloc assignments, insert copies/loads
2421 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
2422 CCValAssign
&VA
= ArgLocs
[i
];
2423 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
2424 // Arguments start after the 5 first operands of ISD::CALL
2425 SDValue Arg
= OutVals
[i
];
2426 //Promote the value if needed
2427 switch(VA
.getLocInfo()) {
2428 default: assert(0 && "Unknown loc info!");
2429 case CCValAssign::Full
:
2431 case CCValAssign::SExt
:
2432 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
,
2434 VA
.getLocVT(), Arg
);
2436 case CCValAssign::ZExt
:
2437 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
,
2439 VA
.getLocVT(), Arg
);
2441 case CCValAssign::AExt
:
2442 Arg
= DAG
.getNode(ISD::ANY_EXTEND
,
2444 VA
.getLocVT(), Arg
);
2448 if (VA
.isRegLoc()) {
2449 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
2450 } else if (VA
.isMemLoc()) {
2451 // Create the frame index object for this incoming parameter
2452 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
2453 VA
.getLocMemOffset(), true);
2454 SDValue PtrOff
= DAG
.getFrameIndex(FI
,getPointerTy());
2456 // emit ISD::STORE whichs stores the
2457 // parameter value to a stack Location
2458 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
2459 MachinePointerInfo::getFixedStack(FI
),
2462 assert(0 && "Not a Reg/Mem Loc, major error!");
2465 if (!MemOpChains
.empty()) {
2466 Chain
= DAG
.getNode(ISD::TokenFactor
,
2470 MemOpChains
.size());
2474 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
2475 Chain
= DAG
.getCopyToReg(Chain
,
2477 RegsToPass
[i
].first
,
2478 RegsToPass
[i
].second
,
2480 InFlag
= Chain
.getValue(1);
2484 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
2485 // every direct call is) turn it into a TargetGlobalAddress/
2486 // TargetExternalSymbol
2487 // node so that legalize doesn't hack it.
2488 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
2489 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, getPointerTy());
2491 else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
2492 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), getPointerTy());
2494 else if (isTailCall
) {
2495 assert(0 && "Tail calls are not handled yet");
2496 // see X86 ISelLowering for ideas on implementation: 1708
2499 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVTGLUE
);
2500 SmallVector
<SDValue
, 8> Ops
;
2503 assert(0 && "Tail calls are not handled yet");
2504 // see X86 ISelLowering for ideas on implementation: 1721
2506 // If this is a direct call, pass the chain and the callee
2507 if (Callee
.getNode()) {
2508 Ops
.push_back(Chain
);
2509 Ops
.push_back(Callee
);
2513 assert(0 && "Tail calls are not handled yet");
2514 // see X86 ISelLowering for ideas on implementation: 1739
2517 // Add argument registers to the end of the list so that they are known
2518 // live into the call
2519 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
2520 Ops
.push_back(DAG
.getRegister(
2521 RegsToPass
[i
].first
,
2522 RegsToPass
[i
].second
.getValueType()));
2524 if (InFlag
.getNode()) {
2525 Ops
.push_back(InFlag
);
2530 assert(0 && "Tail calls are not handled yet");
2531 // see X86 ISelLowering for ideas on implementation: 1762
2534 Chain
= DAG
.getNode(AMDILISD::CALL
,
2536 NodeTys
, &Ops
[0], Ops
.size());
2537 InFlag
= Chain
.getValue(1);
2539 // Create the CALLSEQ_END node
2540 Chain
= DAG
.getCALLSEQ_END(
2542 DAG
.getIntPtrConstant(NumBytes
, true),
2543 DAG
.getIntPtrConstant(0, true),
2545 InFlag
= Chain
.getValue(1);
2546 // Handle result values, copying them out of physregs into vregs that
2548 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
2551 static void checkMADType(
2552 SDValue Op
, const AMDILSubtarget
*STM
, bool& is24bitMAD
, bool& is32bitMAD
)
2554 bool globalLoadStore
= false;
2558 assert(Op
.getOpcode() == ISD::ADD
&& "The opcode must be a add in order for "
2559 "this to work correctly!");
2560 if (Op
.getNode()->use_empty()) {
2563 for (SDNode::use_iterator nBegin
= Op
.getNode()->use_begin(),
2564 nEnd
= Op
.getNode()->use_end(); nBegin
!= nEnd
; ++nBegin
) {
2565 SDNode
*ptr
= *nBegin
;
2566 const LSBaseSDNode
*lsNode
= dyn_cast
<LSBaseSDNode
>(ptr
);
2567 // If we are not a LSBaseSDNode then we don't do this
2569 // If we are a LSBaseSDNode, but the op is not the offset
2570 // or base pointer, then we don't do this optimization
2571 // (i.e. we are the value being stored)
2573 (lsNode
->writeMem() && lsNode
->getOperand(1) == Op
)) {
2576 const PointerType
*PT
=
2577 dyn_cast
<PointerType
>(lsNode
->getSrcValue()->getType());
2578 unsigned as
= PT
->getAddressSpace();
2581 globalLoadStore
= true;
2582 case AMDILAS::PRIVATE_ADDRESS
:
2583 if (!STM
->device()->usesHardware(AMDILDeviceInfo::PrivateMem
)) {
2584 globalLoadStore
= true;
2587 case AMDILAS::CONSTANT_ADDRESS
:
2588 if (!STM
->device()->usesHardware(AMDILDeviceInfo::ConstantMem
)) {
2589 globalLoadStore
= true;
2592 case AMDILAS::LOCAL_ADDRESS
:
2593 if (!STM
->device()->usesHardware(AMDILDeviceInfo::LocalMem
)) {
2594 globalLoadStore
= true;
2597 case AMDILAS::REGION_ADDRESS
:
2598 if (!STM
->device()->usesHardware(AMDILDeviceInfo::RegionMem
)) {
2599 globalLoadStore
= true;
2604 if (globalLoadStore
) {
2612 AMDILTargetLowering::LowerADD(SDValue Op
, SelectionDAG
&DAG
) const
2614 SDValue LHS
= Op
.getOperand(0);
2615 SDValue RHS
= Op
.getOperand(1);
2616 DebugLoc DL
= Op
.getDebugLoc();
2617 EVT OVT
= Op
.getValueType();
2619 const AMDILSubtarget
*stm
= &this->getTargetMachine()
2620 .getSubtarget
<AMDILSubtarget
>();
2621 bool isVec
= OVT
.isVector();
2622 if (OVT
.getScalarType() == MVT::i64
) {
2623 MVT INTTY
= MVT::i32
;
2624 if (OVT
== MVT::v2i64
) {
2627 if (stm
->device()->usesHardware(AMDILDeviceInfo::LongOps
)
2628 && INTTY
== MVT::i32
) {
2629 DST
= DAG
.getNode(AMDILISD::ADD
,
2634 SDValue LHSLO
, LHSHI
, RHSLO
, RHSHI
, INTLO
, INTHI
;
2635 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2636 LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, LHS
);
2637 RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, RHS
);
2638 LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, LHS
);
2639 RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, RHS
);
2640 INTLO
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, LHSLO
, RHSLO
);
2641 INTHI
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, LHSHI
, RHSHI
);
2643 cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2644 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
2646 cmp
= DAG
.getNode(AMDILISD::INEGATE
, DL
, INTTY
, cmp
);
2647 INTHI
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, INTHI
, cmp
);
2648 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, OVT
,
2652 if (LHS
.getOpcode() == ISD::FrameIndex
||
2653 RHS
.getOpcode() == ISD::FrameIndex
) {
2654 DST
= DAG
.getNode(AMDILISD::ADDADDR
,
2659 if (stm
->device()->usesHardware(AMDILDeviceInfo::LocalMem
)
2660 && LHS
.getNumOperands()
2661 && RHS
.getNumOperands()) {
2662 bool is24bitMAD
= false;
2663 bool is32bitMAD
= false;
2664 const ConstantSDNode
*LHSConstOpCode
=
2665 dyn_cast
<ConstantSDNode
>(LHS
.getOperand(LHS
.getNumOperands()-1));
2666 const ConstantSDNode
*RHSConstOpCode
=
2667 dyn_cast
<ConstantSDNode
>(RHS
.getOperand(RHS
.getNumOperands()-1));
2668 if ((LHS
.getOpcode() == ISD::SHL
&& LHSConstOpCode
)
2669 || (RHS
.getOpcode() == ISD::SHL
&& RHSConstOpCode
)
2670 || LHS
.getOpcode() == ISD::MUL
2671 || RHS
.getOpcode() == ISD::MUL
) {
2672 SDValue Op1
, Op2
, Op3
;
2673 // FIXME: Fix this so that it works for unsigned 24bit ops.
2674 if (LHS
.getOpcode() == ISD::MUL
) {
2675 Op1
= LHS
.getOperand(0);
2676 Op2
= LHS
.getOperand(1);
2678 } else if (RHS
.getOpcode() == ISD::MUL
) {
2679 Op1
= RHS
.getOperand(0);
2680 Op2
= RHS
.getOperand(1);
2682 } else if (LHS
.getOpcode() == ISD::SHL
&& LHSConstOpCode
) {
2683 Op1
= LHS
.getOperand(0);
2684 Op2
= DAG
.getConstant(
2685 1 << LHSConstOpCode
->getZExtValue(), MVT::i32
);
2687 } else if (RHS
.getOpcode() == ISD::SHL
&& RHSConstOpCode
) {
2688 Op1
= RHS
.getOperand(0);
2689 Op2
= DAG
.getConstant(
2690 1 << RHSConstOpCode
->getZExtValue(), MVT::i32
);
2693 checkMADType(Op
, stm
, is24bitMAD
, is32bitMAD
);
2694 // We can possibly do a MAD transform!
2695 if (is24bitMAD
&& stm
->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps
)) {
2696 uint32_t opcode
= AMDGPUIntrinsic::AMDIL_mad24_i32
;
2697 SDVTList Tys
= DAG
.getVTList(OVT
/*, MVT::Other*/);
2698 DST
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
,
2699 DL
, Tys
, DAG
.getEntryNode(), DAG
.getConstant(opcode
, MVT::i32
),
2701 } else if(is32bitMAD
) {
2702 SDVTList Tys
= DAG
.getVTList(OVT
/*, MVT::Other*/);
2703 DST
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
,
2704 DL
, Tys
, DAG
.getEntryNode(),
2706 AMDGPUIntrinsic::AMDIL_mad_i32
, MVT::i32
),
2711 DST
= DAG
.getNode(AMDILISD::ADD
,
2720 AMDILTargetLowering::genCLZuN(SDValue Op
, SelectionDAG
&DAG
,
2721 uint32_t bits
) const
2723 DebugLoc DL
= Op
.getDebugLoc();
2724 EVT INTTY
= Op
.getValueType();
2726 if (INTTY
.isVector()) {
2727 FPTY
= EVT(MVT::getVectorVT(MVT::f32
,
2728 INTTY
.getVectorNumElements()));
2730 FPTY
= EVT(MVT::f32
);
2732 /* static inline uint
2735 int xor = 0x3f800000U | x;
2736 float tp = as_float(xor);
2737 float t = tp + -1.0f;
2738 uint tint = as_uint(t);
2740 uint tsrc = tint >> 23;
2741 uint tmask = tsrc & 0xffU;
2742 uint cst = (103 + N)U - tmask;
2743 return cmp ? cst : N;
2746 assert(INTTY
.getScalarType().getSimpleVT().SimpleTy
== MVT::i32
2747 && "genCLZu16 only works on 32bit types");
2750 // xornode = 0x3f800000 | x
2751 SDValue xornode
= DAG
.getNode(ISD::OR
, DL
, INTTY
,
2752 DAG
.getConstant(0x3f800000, INTTY
), x
);
2753 // float tp = as_float(xornode)
2754 SDValue tp
= DAG
.getNode(ISDBITCAST
, DL
, FPTY
, xornode
);
2755 // float t = tp + -1.0f
2756 SDValue t
= DAG
.getNode(ISD::FADD
, DL
, FPTY
, tp
,
2757 DAG
.getConstantFP(-1.0f
, FPTY
));
2758 // uint tint = as_uint(t)
2759 SDValue tint
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, t
);
2760 // int cmp = (x != 0)
2761 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2762 DAG
.getConstant(CondCCodeToCC(ISD::SETNE
, MVT::i32
), MVT::i32
), x
,
2763 DAG
.getConstant(0, INTTY
));
2764 // uint tsrc = tint >> 23
2765 SDValue tsrc
= DAG
.getNode(ISD::SRL
, DL
, INTTY
, tint
,
2766 DAG
.getConstant(23, INTTY
));
2767 // uint tmask = tsrc & 0xFF
2768 SDValue tmask
= DAG
.getNode(ISD::AND
, DL
, INTTY
, tsrc
,
2769 DAG
.getConstant(0xFFU
, INTTY
));
2770 // uint cst = (103 + bits) - tmask
2771 SDValue cst
= DAG
.getNode(ISD::SUB
, DL
, INTTY
,
2772 DAG
.getConstant((103U + bits
), INTTY
), tmask
);
2773 // return cmp ? cst : N
2774 cst
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
, cst
,
2775 DAG
.getConstant(bits
, INTTY
));
2780 AMDILTargetLowering::genCLZu32(SDValue Op
, SelectionDAG
&DAG
) const
2782 SDValue DST
= SDValue();
2783 DebugLoc DL
= Op
.getDebugLoc();
2784 EVT INTTY
= Op
.getValueType();
2785 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
2786 &this->getTargetMachine())->getSubtargetImpl();
2787 if (stm
->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX
) {
2788 //__clz_32bit(uint u)
2790 // int z = __amdil_ffb_hi(u) ;
2791 // return z < 0 ? 32 : z;
2795 // int z = __amdil_ffb_hi(u)
2796 SDValue z
= DAG
.getNode(AMDILISD::IFFB_HI
, DL
, INTTY
, u
);
2798 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2799 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
2800 z
, DAG
.getConstant(0, INTTY
));
2801 // return cmp ? 32 : z
2802 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
,
2803 DAG
.getConstant(32, INTTY
), z
);
2804 } else if (stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
2805 // static inline uint
2806 //__clz_32bit(uint x)
2808 // uint zh = __clz_16bit(x >> 16);
2809 // uint zl = __clz_16bit(x & 0xffffU);
2810 // return zh == 16U ? 16U + zl : zh;
2814 // uint xs16 = x >> 16
2815 SDValue xs16
= DAG
.getNode(ISD::SRL
, DL
, INTTY
, x
,
2816 DAG
.getConstant(16, INTTY
));
2817 // uint zh = __clz_16bit(xs16)
2818 SDValue zh
= genCLZuN(xs16
, DAG
, 16);
2819 // uint xa16 = x & 0xFFFF
2820 SDValue xa16
= DAG
.getNode(ISD::AND
, DL
, INTTY
, x
,
2821 DAG
.getConstant(0xFFFFU
, INTTY
));
2822 // uint zl = __clz_16bit(xa16)
2823 SDValue zl
= genCLZuN(xa16
, DAG
, 16);
2824 // uint cmp = zh == 16U
2825 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2826 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2827 zh
, DAG
.getConstant(16U, INTTY
));
2828 // uint zl16 = zl + 16
2829 SDValue zl16
= DAG
.getNode(ISD::ADD
, DL
, INTTY
,
2830 DAG
.getConstant(16, INTTY
), zl
);
2831 // return cmp ? zl16 : zh
2832 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
,
2835 assert(0 && "Attempting to generate a CLZ function with an"
2836 " unknown graphics card");
2841 AMDILTargetLowering::genCLZu64(SDValue Op
, SelectionDAG
&DAG
) const
2843 SDValue DST
= SDValue();
2844 DebugLoc DL
= Op
.getDebugLoc();
2846 EVT LONGTY
= Op
.getValueType();
2847 bool isVec
= LONGTY
.isVector();
2849 INTTY
= EVT(MVT::getVectorVT(MVT::i32
, Op
.getValueType()
2850 .getVectorNumElements()));
2852 INTTY
= EVT(MVT::i32
);
2854 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
2855 &this->getTargetMachine())->getSubtargetImpl();
2856 if (stm
->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX
) {
2858 // static inline uint
2859 // __clz_u64(ulong x)
2861 //uint zhi = __clz_32bit((uint)(x >> 32));
2862 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2863 //return zhi == 32U ? 32U + zlo : zhi;
2867 // uint xhi = x >> 32
2868 SDValue xlo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, x
);
2869 // uint xlo = x & 0xFFFFFFFF
2870 SDValue xhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, x
);
2871 // uint zhi = __clz_32bit(xhi)
2872 SDValue zhi
= genCLZu32(xhi
, DAG
);
2873 // uint zlo = __clz_32bit(xlo)
2874 SDValue zlo
= genCLZu32(xlo
, DAG
);
2875 // uint cmp = zhi == 32
2876 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2877 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2878 zhi
, DAG
.getConstant(32U, INTTY
));
2879 // uint zlop32 = 32 + zlo
2880 SDValue zlop32
= DAG
.getNode(AMDILISD::ADD
, DL
, INTTY
,
2881 DAG
.getConstant(32U, INTTY
), zlo
);
2882 // return cmp ? zlop32: zhi
2883 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
, zlop32
, zhi
);
2884 } else if (stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
2886 // static inline uint
2887 //__clz_64bit(ulong x)
2889 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2890 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2891 //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2892 //uint r = zh == 18U ? 18U + zm : zh;
2893 //return zh + zm == 41U ? 41U + zl : r;
2897 // ulong xs46 = x >> 46
2898 SDValue xs46
= DAG
.getNode(ISD::SRL
, DL
, LONGTY
, x
,
2899 DAG
.getConstant(46, LONGTY
));
2900 // uint ixs46 = (uint)xs46
2901 SDValue ixs46
= DAG
.getNode(ISD::TRUNCATE
, DL
, INTTY
, xs46
);
2902 // ulong xs23 = x >> 23
2903 SDValue xs23
= DAG
.getNode(ISD::SRL
, DL
, LONGTY
, x
,
2904 DAG
.getConstant(23, LONGTY
));
2905 // uint ixs23 = (uint)xs23
2906 SDValue ixs23
= DAG
.getNode(ISD::TRUNCATE
, DL
, INTTY
, xs23
);
2907 // uint xs23m23 = ixs23 & 0x7FFFFF
2908 SDValue xs23m23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, ixs23
,
2909 DAG
.getConstant(0x7fffffU
, INTTY
));
2910 // uint ix = (uint)x
2911 SDValue ix
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, x
);
2912 // uint xm23 = ix & 0x7FFFFF
2913 SDValue xm23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, ix
,
2914 DAG
.getConstant(0x7fffffU
, INTTY
));
2915 // uint zh = __clz_23bit(ixs46)
2916 SDValue zh
= genCLZuN(ixs46
, DAG
, 23);
2917 // uint zm = __clz_23bit(xs23m23)
2918 SDValue zm
= genCLZuN(xs23m23
, DAG
, 23);
2919 // uint zl = __clz_23bit(xm23)
2920 SDValue zl
= genCLZuN(xm23
, DAG
, 23);
2921 // uint zhm5 = zh - 5
2922 SDValue zhm5
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zh
,
2923 DAG
.getConstant(-5U, INTTY
));
2924 SDValue const18
= DAG
.getConstant(18, INTTY
);
2925 SDValue const41
= DAG
.getConstant(41, INTTY
);
2926 // uint cmp1 = zh = 18
2927 SDValue cmp1
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2928 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2930 // uint zhm5zm = zhm5 + zh
2931 SDValue zhm5zm
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zhm5
, zm
);
2932 // uint cmp2 = zhm5zm == 41
2933 SDValue cmp2
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2934 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2936 // uint zmp18 = zhm5 + 18
2937 SDValue zmp18
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zm
, const18
);
2938 // uint zlp41 = zl + 41
2939 SDValue zlp41
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zl
, const41
);
2940 // uint r = cmp1 ? zmp18 : zh
2941 SDValue r
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
,
2943 // return cmp2 ? zlp41 : r
2944 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp2
, zlp41
, r
);
2946 assert(0 && "Attempting to generate a CLZ function with an"
2947 " unknown graphics card");
2952 AMDILTargetLowering::genf64toi64(SDValue RHS
, SelectionDAG
&DAG
,
2953 bool includeSign
) const
2958 DebugLoc DL
= RHS
.getDebugLoc();
2959 EVT RHSVT
= RHS
.getValueType();
2960 bool isVec
= RHSVT
.isVector();
2962 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
, RHSVT
2963 .getVectorNumElements()));
2964 INTVT
= EVT(MVT::getVectorVT(MVT::i32
, RHSVT
2965 .getVectorNumElements()));
2967 LONGVT
= EVT(MVT::i64
);
2968 INTVT
= EVT(MVT::i32
);
2970 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
2971 &this->getTargetMachine())->getSubtargetImpl();
2972 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2973 // unsigned version:
2974 // uint uhi = (uint)(d * 0x1.0p-32);
2975 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2976 // return as_ulong2((uint2)(ulo, uhi));
2979 // double ad = fabs(d);
2980 // long l = unsigned_version(ad);
2982 // return d == ad ? l : nl;
2985 d
= DAG
.getNode(ISD::FABS
, DL
, RHSVT
, d
);
2987 SDValue uhid
= DAG
.getNode(ISD::FMUL
, DL
, RHSVT
, d
,
2988 DAG
.getConstantFP(0x2f800000, RHSVT
));
2989 SDValue uhi
= DAG
.getNode(ISD::FP_TO_UINT
, DL
, INTVT
, uhid
);
2990 SDValue ulod
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, RHSVT
, uhi
);
2991 ulod
= DAG
.getNode(AMDILISD::MAD
, DL
, RHSVT
, ulod
,
2992 DAG
.getConstantFP(0xcf800000, RHSVT
), d
);
2993 SDValue ulo
= DAG
.getNode(ISD::FP_TO_UINT
, DL
, INTVT
, ulod
);
2994 SDValue l
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, ulo
, uhi
);
2996 SDValue nl
= DAG
.getNode(AMDILISD::INEGATE
, DL
, LONGVT
, l
);
2997 SDValue c
= DAG
.getNode(AMDILISD::CMP
, DL
, RHSVT
,
2998 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::f64
), MVT::i32
),
3000 l
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, LONGVT
, c
, l
, nl
);
3005 __attribute__((always_inline)) long
3006 cast_f64_to_i64(double d)
3008 // Convert d in to 32-bit components
3009 long x = as_long(d);
3013 // Generate 'normalized' mantissa
3014 mhi = xhi | 0x00100000; // hidden bit
3016 temp = xlo >> (32 - 11);
3020 // Compute shift right count from exponent
3021 e = (xhi >> (52-32)) & 0x7ff;
3026 // Compute result for 0 <= sr < 32
3027 rhi0 = mhi >> (sr &31);
3028 rlo0 = mlo >> (sr &31);
3029 temp = mhi << (32 - sr);
3031 rlo0 = sr ? temp : rlo0;
3033 // Compute result for 32 <= sr
3035 rlo1 = srge64 ? 0 : rhi0;
3037 // Pick between the 2 results
3038 rhi = srge32 ? rhi1 : rhi0;
3039 rlo = srge32 ? rlo1 : rlo0;
3041 // Optional saturate on overflow
3043 rhi = srlt0 ? MAXVALUE : rhi;
3044 rlo = srlt0 ? MAXVALUE : rlo;
3047 res = LCREATE( rlo, rhi );
3049 // Deal with sign bit (ignoring whether result is signed or unsigned value)
3051 sign = ((signed int) xhi) >> 31; fill with sign bit
3052 sign = LCREATE( sign, sign );
3060 SDValue c11
= DAG
.getConstant( 63 - 52, INTVT
);
3061 SDValue c32
= DAG
.getConstant( 32, INTVT
);
3063 // Convert d in to 32-bit components
3065 SDValue x
= DAG
.getNode(ISDBITCAST
, DL
, LONGVT
, d
);
3066 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
3067 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
3069 // Generate 'normalized' mantissa
3070 SDValue mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
,
3071 xhi
, DAG
.getConstant( 0x00100000, INTVT
) );
3072 mhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, c11
);
3073 SDValue temp
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3074 xlo
, DAG
.getConstant( 32 - (63 - 52), INTVT
) );
3075 mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, mhi
, temp
);
3076 SDValue mlo
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, c11
);
3078 // Compute shift right count from exponent
3079 SDValue e
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3080 xhi
, DAG
.getConstant( 52-32, INTVT
) );
3081 e
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3082 e
, DAG
.getConstant( 0x7ff, INTVT
) );
3083 SDValue sr
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
3084 DAG
.getConstant( 1023 + 63, INTVT
), e
);
3085 SDValue srge64
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
3086 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
3087 sr
, DAG
.getConstant(64, INTVT
));
3088 SDValue srge32
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
3089 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
3090 sr
, DAG
.getConstant(32, INTVT
));
3092 // Compute result for 0 <= sr < 32
3093 SDValue rhi0
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mhi
, sr
);
3094 SDValue rlo0
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mlo
, sr
);
3095 temp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
, c32
, sr
);
3096 temp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, temp
);
3097 temp
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rlo0
, temp
);
3098 rlo0
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, sr
, temp
, rlo0
);
3100 // Compute result for 32 <= sr
3101 SDValue rhi1
= DAG
.getConstant( 0, INTVT
);
3102 SDValue rlo1
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
3103 srge64
, rhi1
, rhi0
);
3105 // Pick between the 2 results
3106 SDValue rhi
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
3107 srge32
, rhi1
, rhi0
);
3108 SDValue rlo
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
3109 srge32
, rlo1
, rlo0
);
3112 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
3114 // Deal with sign bit
3116 SDValue sign
= DAG
.getNode( ISD::SRA
, DL
, INTVT
,
3117 xhi
, DAG
.getConstant( 31, INTVT
) );
3118 sign
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, sign
, sign
);
3119 res
= DAG
.getNode( ISD::ADD
, DL
, LONGVT
, res
, sign
);
3120 res
= DAG
.getNode( ISD::XOR
, DL
, LONGVT
, res
, sign
);
3127 AMDILTargetLowering::genf64toi32(SDValue RHS
, SelectionDAG
&DAG
,
3128 bool includeSign
) const
3132 DebugLoc DL
= RHS
.getDebugLoc();
3133 EVT RHSVT
= RHS
.getValueType();
3134 bool isVec
= RHSVT
.isVector();
3136 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
3137 RHSVT
.getVectorNumElements()));
3138 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
3139 RHSVT
.getVectorNumElements()));
3141 LONGVT
= EVT(MVT::i64
);
3142 INTVT
= EVT(MVT::i32
);
3145 __attribute__((always_inline)) int
3146 cast_f64_to_[u|i]32(double d)
3148 // Convert d in to 32-bit components
3149 long x = as_long(d);
3153 // Generate 'normalized' mantissa
3154 mhi = xhi | 0x00100000; // hidden bit
3156 temp = xlo >> (32 - 11);
3159 // Compute shift right count from exponent
3160 e = (xhi >> (52-32)) & 0x7ff;
3164 // Compute result for 0 <= sr < 32
3165 res = mhi >> (sr &31);
3166 res = srge32 ? 0 : res;
3168 // Optional saturate on overflow
3170 res = srlt0 ? MAXVALUE : res;
3172 // Deal with sign bit (ignoring whether result is signed or unsigned value)
3174 sign = ((signed int) xhi) >> 31; fill with sign bit
3182 SDValue c11
= DAG
.getConstant( 63 - 52, INTVT
);
3184 // Convert d in to 32-bit components
3186 SDValue x
= DAG
.getNode(ISDBITCAST
, DL
, LONGVT
, d
);
3187 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
3188 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
3190 // Generate 'normalized' mantissa
3191 SDValue mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
,
3192 xhi
, DAG
.getConstant( 0x00100000, INTVT
) );
3193 mhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, c11
);
3194 SDValue temp
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3195 xlo
, DAG
.getConstant( 32 - (63 - 52), INTVT
) );
3196 mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, mhi
, temp
);
3198 // Compute shift right count from exponent
3199 SDValue e
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3200 xhi
, DAG
.getConstant( 52-32, INTVT
) );
3201 e
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3202 e
, DAG
.getConstant( 0x7ff, INTVT
) );
3203 SDValue sr
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
3204 DAG
.getConstant( 1023 + 31, INTVT
), e
);
3205 SDValue srge32
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
3206 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
3207 sr
, DAG
.getConstant(32, INTVT
));
3209 // Compute result for 0 <= sr < 32
3210 SDValue res
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mhi
, sr
);
3211 res
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
3212 srge32
, DAG
.getConstant(0,INTVT
), res
);
3214 // Deal with sign bit
3216 SDValue sign
= DAG
.getNode( ISD::SRA
, DL
, INTVT
,
3217 xhi
, DAG
.getConstant( 31, INTVT
) );
3218 res
= DAG
.getNode( ISD::ADD
, DL
, INTVT
, res
, sign
);
3219 res
= DAG
.getNode( ISD::XOR
, DL
, INTVT
, res
, sign
);
3224 AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op
, SelectionDAG
&DAG
) const
3226 SDValue RHS
= Op
.getOperand(0);
3227 EVT RHSVT
= RHS
.getValueType();
3228 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
3229 EVT LHSVT
= Op
.getValueType();
3230 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
3231 DebugLoc DL
= Op
.getDebugLoc();
3233 const AMDILTargetMachine
*
3234 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
3235 (&this->getTargetMachine());
3236 const AMDILSubtarget
*
3237 stm
= static_cast<const AMDILSubtarget
*>(
3238 amdtm
->getSubtargetImpl());
3239 if (RST
== MVT::f64
&& RHSVT
.isVector()
3240 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3241 // We dont support vector 64bit floating point convertions.
3242 for (unsigned x
= 0, y
= RHSVT
.getVectorNumElements(); x
< y
; ++x
) {
3243 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3244 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
3245 op
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, LST
, op
);
3247 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
3249 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
,
3250 DST
, op
, DAG
.getTargetConstant(x
, MVT::i32
));
3255 && LST
== MVT::i32
) {
3256 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3257 DST
= SDValue(Op
.getNode(), 0);
3259 DST
= genf64toi32(RHS
, DAG
, true);
3261 } else if (RST
== MVT::f64
3262 && LST
== MVT::i64
) {
3263 DST
= genf64toi64(RHS
, DAG
, true);
3264 } else if (RST
== MVT::f64
3265 && (LST
== MVT::i8
|| LST
== MVT::i16
)) {
3266 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3267 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, SDValue(Op
.getNode(), 0));
3269 SDValue ToInt
= genf64toi32(RHS
, DAG
, true);
3270 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, ToInt
);
3274 DST
= SDValue(Op
.getNode(), 0);
3281 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op
, SelectionDAG
&DAG
) const
3284 SDValue RHS
= Op
.getOperand(0);
3285 EVT RHSVT
= RHS
.getValueType();
3286 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
3287 EVT LHSVT
= Op
.getValueType();
3288 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
3289 DebugLoc DL
= Op
.getDebugLoc();
3290 const AMDILTargetMachine
*
3291 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
3292 (&this->getTargetMachine());
3293 const AMDILSubtarget
*
3294 stm
= static_cast<const AMDILSubtarget
*>(
3295 amdtm
->getSubtargetImpl());
3296 if (RST
== MVT::f64
&& RHSVT
.isVector()
3297 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3298 // We dont support vector 64bit floating point convertions.
3299 for (unsigned x
= 0, y
= RHSVT
.getVectorNumElements(); x
< y
; ++x
) {
3300 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3301 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
3302 op
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, LST
, op
);
3304 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
3306 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
,
3307 DST
, op
, DAG
.getTargetConstant(x
, MVT::i32
));
3313 && LST
== MVT::i32
) {
3314 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3315 DST
= SDValue(Op
.getNode(), 0);
3317 DST
= genf64toi32(RHS
, DAG
, false);
3319 } else if (RST
== MVT::f64
3320 && LST
== MVT::i64
) {
3321 DST
= genf64toi64(RHS
, DAG
, false);
3322 } else if (RST
== MVT::f64
3323 && (LST
== MVT::i8
|| LST
== MVT::i16
)) {
3324 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3325 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, SDValue(Op
.getNode(), 0));
3327 SDValue ToInt
= genf64toi32(RHS
, DAG
, false);
3328 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, ToInt
);
3332 DST
= SDValue(Op
.getNode(), 0);
3338 AMDILTargetLowering::genu32tof64(SDValue RHS
, EVT LHSVT
,
3339 SelectionDAG
&DAG
) const
3341 EVT RHSVT
= RHS
.getValueType();
3342 DebugLoc DL
= RHS
.getDebugLoc();
3345 bool isVec
= RHSVT
.isVector();
3347 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
3348 RHSVT
.getVectorNumElements()));
3349 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
3350 RHSVT
.getVectorNumElements()));
3352 LONGVT
= EVT(MVT::i64
);
3353 INTVT
= EVT(MVT::i32
);
3356 const AMDILTargetMachine
*
3357 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
3358 (&this->getTargetMachine());
3359 const AMDILSubtarget
*
3360 stm
= static_cast<const AMDILSubtarget
*>(
3361 amdtm
->getSubtargetImpl());
3362 if (stm
->calVersion() >= CAL_VERSION_SC_135
) {
3363 // unsigned x = RHS;
3364 // ulong xd = (ulong)(0x4330_0000 << 32) | x;
3365 // double d = as_double( xd );
3366 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
3367 SDValue xd
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, x
,
3368 DAG
.getConstant( 0x43300000, INTVT
) );
3369 SDValue d
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xd
);
3370 SDValue offsetd
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
,
3371 DAG
.getConstant( 0x4330000000000000ULL
, LONGVT
) );
3372 return DAG
.getNode( ISD::FSUB
, DL
, LHSVT
, d
, offsetd
);
3374 SDValue clz
= genCLZu32(x
, DAG
);
3376 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
3377 // Except for an input 0... which requires a 0 exponent
3378 SDValue exp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
3379 DAG
.getConstant( (1023+31), INTVT
), clz
);
3380 exp
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, x
, exp
, x
);
3383 SDValue rhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, x
, clz
);
3385 // Eliminate hidden bit
3386 rhi
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3387 rhi
, DAG
.getConstant( 0x7fffffff, INTVT
) );
3389 // Pack exponent and frac
3390 SDValue rlo
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
3391 rhi
, DAG
.getConstant( (32 - 11), INTVT
) );
3392 rhi
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3393 rhi
, DAG
.getConstant( 11, INTVT
) );
3394 exp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
3395 exp
, DAG
.getConstant( 20, INTVT
) );
3396 rhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rhi
, exp
);
3398 // Convert 2 x 32 in to 1 x 64, then to double precision float type
3399 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
3400 return DAG
.getNode(ISDBITCAST
, DL
, LHSVT
, res
);
3404 AMDILTargetLowering::genu64tof64(SDValue RHS
, EVT LHSVT
,
3405 SelectionDAG
&DAG
) const
3407 EVT RHSVT
= RHS
.getValueType();
3408 DebugLoc DL
= RHS
.getDebugLoc();
3411 bool isVec
= RHSVT
.isVector();
3413 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
3414 RHSVT
.getVectorNumElements()));
3416 INTVT
= EVT(MVT::i32
);
3420 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
3421 &this->getTargetMachine())->getSubtargetImpl();
3422 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3423 // double dhi = (double)(as_uint2(x).y);
3424 // double dlo = (double)(as_uint2(x).x);
3425 // return mad(dhi, 0x1.0p+32, dlo)
3426 SDValue dhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
3427 dhi
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LHSVT
, dhi
);
3428 SDValue dlo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
3429 dlo
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LHSVT
, dlo
);
3430 return DAG
.getNode(AMDILISD::MAD
, DL
, LHSVT
, dhi
,
3431 DAG
.getConstantFP(0x4f800000, LHSVT
), dlo
);
3432 } else if (stm
->calVersion() >= CAL_VERSION_SC_135
) {
3433 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
3434 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
3435 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
3436 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
); // x & 0xffff_ffffUL
3437 SDValue xd
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, xlo
, DAG
.getConstant( 0x43300000, INTVT
) );
3438 SDValue lo
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xd
);
3439 SDValue xhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
); // x >> 32
3440 SDValue xe
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, xhi
, DAG
.getConstant( 0x45300000, INTVT
) );
3441 SDValue hi
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xe
);
3442 SDValue c
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
,
3443 DAG
.getConstant( 0x4530000000100000ULL
, LONGVT
) );
3444 hi
= DAG
.getNode( ISD::FSUB
, DL
, LHSVT
, hi
, c
);
3445 return DAG
.getNode( ISD::FADD
, DL
, LHSVT
, hi
, lo
);
3448 SDValue clz
= genCLZu64(x
, DAG
);
3449 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
3450 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
3452 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
3453 SDValue exp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
3454 DAG
.getConstant( (1023+63), INTVT
), clz
);
3455 SDValue mash
= DAG
.getNode( ISD::OR
, DL
, INTVT
, xhi
, xlo
);
3456 exp
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
3457 mash
, exp
, mash
); // exp = exp, or 0 if input was 0
3460 SDValue clz31
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3461 clz
, DAG
.getConstant( 31, INTVT
) );
3462 SDValue rshift
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
3463 DAG
.getConstant( 32, INTVT
), clz31
);
3464 SDValue t1
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xhi
, clz31
);
3465 SDValue t2
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, xlo
, rshift
);
3466 t2
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, clz31
, t2
, t1
);
3467 SDValue rhi1
= DAG
.getNode( ISD::OR
, DL
, INTVT
, t1
, t2
);
3468 SDValue rlo1
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, clz31
);
3469 SDValue rhi2
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, clz31
);
3470 SDValue rlo2
= DAG
.getConstant( 0, INTVT
);
3471 SDValue clz32
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3472 clz
, DAG
.getConstant( 32, INTVT
) );
3473 SDValue rhi
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
3474 clz32
, rhi2
, rhi1
);
3475 SDValue rlo
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
3476 clz32
, rlo2
, rlo1
);
3478 // Eliminate hidden bit
3479 rhi
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3480 rhi
, DAG
.getConstant( 0x7fffffff, INTVT
) );
3482 // Save bits needed to round properly
3483 SDValue round
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3484 rlo
, DAG
.getConstant( 0x7ff, INTVT
) );
3486 // Pack exponent and frac
3487 rlo
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3488 rlo
, DAG
.getConstant( 11, INTVT
) );
3489 SDValue temp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
3490 rhi
, DAG
.getConstant( (32 - 11), INTVT
) );
3491 rlo
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rlo
, temp
);
3492 rhi
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3493 rhi
, DAG
.getConstant( 11, INTVT
) );
3494 exp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
3495 exp
, DAG
.getConstant( 20, INTVT
) );
3496 rhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rhi
, exp
);
3498 // Compute rounding bit
3499 SDValue even
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3500 rlo
, DAG
.getConstant( 1, INTVT
) );
3501 SDValue grs
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
3502 round
, DAG
.getConstant( 0x3ff, INTVT
) );
3503 grs
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
3504 DAG
.getConstant( CondCCodeToCC( ISD::SETNE
, MVT::i32
), MVT::i32
),
3505 grs
, DAG
.getConstant( 0, INTVT
) ); // -1 if any GRS set, 0 if none
3506 grs
= DAG
.getNode( ISD::OR
, DL
, INTVT
, grs
, even
);
3507 round
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
3508 round
, DAG
.getConstant( 10, INTVT
) );
3509 round
= DAG
.getNode( ISD::AND
, DL
, INTVT
, round
, grs
); // 0 or 1
3512 SDValue lround
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
,
3513 round
, DAG
.getConstant( 0, INTVT
) );
3514 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
3515 res
= DAG
.getNode( ISD::ADD
, DL
, LONGVT
, res
, lround
);
3516 return DAG
.getNode(ISDBITCAST
, DL
, LHSVT
, res
);
3520 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) const
3522 SDValue RHS
= Op
.getOperand(0);
3523 EVT RHSVT
= RHS
.getValueType();
3524 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
3525 EVT LHSVT
= Op
.getValueType();
3526 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
3527 DebugLoc DL
= Op
.getDebugLoc();
3531 const AMDILTargetMachine
*
3532 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
3533 (&this->getTargetMachine());
3534 const AMDILSubtarget
*
3535 stm
= static_cast<const AMDILSubtarget
*>(
3536 amdtm
->getSubtargetImpl());
3537 if (LST
== MVT::f64
&& LHSVT
.isVector()
3538 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3539 // We dont support vector 64bit floating point convertions.
3541 for (unsigned x
= 0, y
= LHSVT
.getVectorNumElements(); x
< y
; ++x
) {
3542 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3543 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
3544 op
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LST
, op
);
3546 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
3548 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
, DST
,
3549 op
, DAG
.getTargetConstant(x
, MVT::i32
));
3556 && LST
== MVT::f64
) {
3557 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3558 DST
= SDValue(Op
.getNode(), 0);
3560 DST
= genu32tof64(RHS
, LHSVT
, DAG
);
3562 } else if (RST
== MVT::i64
3563 && LST
== MVT::f64
) {
3564 DST
= genu64tof64(RHS
, LHSVT
, DAG
);
3566 DST
= SDValue(Op
.getNode(), 0);
3573 AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) const
3575 SDValue RHS
= Op
.getOperand(0);
3576 EVT RHSVT
= RHS
.getValueType();
3577 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
3581 bool isVec
= RHSVT
.isVector();
3582 DebugLoc DL
= Op
.getDebugLoc();
3583 EVT LHSVT
= Op
.getValueType();
3584 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
3585 const AMDILTargetMachine
*
3586 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
3587 (&this->getTargetMachine());
3588 const AMDILSubtarget
*
3589 stm
= static_cast<const AMDILSubtarget
*>(
3590 amdtm
->getSubtargetImpl());
3591 if (LST
== MVT::f64
&& LHSVT
.isVector()
3592 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3593 // We dont support vector 64bit floating point convertions.
3594 for (unsigned x
= 0, y
= LHSVT
.getVectorNumElements(); x
< y
; ++x
) {
3595 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3596 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
3597 op
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LST
, op
);
3599 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
3601 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
, DST
,
3602 op
, DAG
.getTargetConstant(x
, MVT::i32
));
3609 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
3610 RHSVT
.getVectorNumElements()));
3611 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
3612 RHSVT
.getVectorNumElements()));
3614 LONGVT
= EVT(MVT::i64
);
3615 INTVT
= EVT(MVT::i32
);
3617 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
3618 if ((RST
== MVT::i32
|| RST
== MVT::i64
)
3619 && LST
== MVT::f64
) {
3620 if (RST
== MVT::i32
) {
3621 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3622 DST
= SDValue(Op
.getNode(), 0);
3626 SDValue c31
= DAG
.getConstant( 31, INTVT
);
3627 SDValue cSbit
= DAG
.getConstant( 0x80000000, INTVT
);
3629 SDValue S
; // Sign, as 0 or -1
3630 SDValue Sbit
; // Sign bit, as one bit, MSB only.
3631 if (RST
== MVT::i32
) {
3632 Sbit
= DAG
.getNode( ISD::AND
, DL
, INTVT
, RHS
, cSbit
);
3633 S
= DAG
.getNode(ISD::SRA
, DL
, RHSVT
, RHS
, c31
);
3634 } else { // 64-bit case... SRA of 64-bit values is slow
3635 SDValue hi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, RHS
);
3636 Sbit
= DAG
.getNode( ISD::AND
, DL
, INTVT
, hi
, cSbit
);
3637 SDValue temp
= DAG
.getNode( ISD::SRA
, DL
, INTVT
, hi
, c31
);
3638 S
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, RHSVT
, temp
, temp
);
3641 // get abs() of input value, given sign as S (0 or -1)
3643 SDValue SpI
= DAG
.getNode(ISD::ADD
, DL
, RHSVT
, RHS
, S
);
3645 SDValue SpIxS
= DAG
.getNode(ISD::XOR
, DL
, RHSVT
, SpI
, S
);
3647 // Convert unsigned value to double precision
3649 if (RST
== MVT::i32
) {
3650 // r = cast_u32_to_f64(SpIxS)
3651 R
= genu32tof64(SpIxS
, LHSVT
, DAG
);
3653 // r = cast_u64_to_f64(SpIxS)
3654 R
= genu64tof64(SpIxS
, LHSVT
, DAG
);
3657 // drop in the sign bit
3658 SDValue t
= DAG
.getNode( AMDILISD::BITCONV
, DL
, LONGVT
, R
);
3659 SDValue thi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, t
);
3660 SDValue tlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, t
);
3661 thi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, thi
, Sbit
);
3662 t
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, tlo
, thi
);
3663 DST
= DAG
.getNode( AMDILISD::BITCONV
, DL
, LHSVT
, t
);
3665 DST
= SDValue(Op
.getNode(), 0);
3671 AMDILTargetLowering::LowerSUB(SDValue Op
, SelectionDAG
&DAG
) const
3673 SDValue LHS
= Op
.getOperand(0);
3674 SDValue RHS
= Op
.getOperand(1);
3675 DebugLoc DL
= Op
.getDebugLoc();
3676 EVT OVT
= Op
.getValueType();
3678 bool isVec
= RHS
.getValueType().isVector();
3679 if (OVT
.getScalarType() == MVT::i64
) {
3680 /*const AMDILTargetMachine*
3681 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3682 (&this->getTargetMachine());
3683 const AMDILSubtarget*
3684 stm = dynamic_cast<const AMDILSubtarget*>(
3685 amdtm->getSubtargetImpl());*/
3686 MVT INTTY
= MVT::i32
;
3687 if (OVT
== MVT::v2i64
) {
3690 SDValue LHSLO
, LHSHI
, RHSLO
, RHSHI
, INTLO
, INTHI
;
3691 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3692 LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, LHS
);
3693 RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, RHS
);
3694 LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, LHS
);
3695 RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, RHS
);
3696 INTLO
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, LHSLO
, RHSLO
);
3697 INTHI
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, LHSHI
, RHSHI
);
3698 //TODO: need to use IBORROW on HD5XXX and later hardware
3700 if (OVT
== MVT::i64
) {
3701 cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
3702 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
3707 SDValue LHSRLO
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3708 DL
, MVT::i32
, LHSLO
, DAG
.getTargetConstant(0, MVT::i32
));
3709 SDValue LHSRHI
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3710 DL
, MVT::i32
, LHSLO
, DAG
.getTargetConstant(1, MVT::i32
));
3711 SDValue RHSRLO
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3712 DL
, MVT::i32
, RHSLO
, DAG
.getTargetConstant(0, MVT::i32
));
3713 SDValue RHSRHI
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3714 DL
, MVT::i32
, RHSLO
, DAG
.getTargetConstant(1, MVT::i32
));
3715 cmplo
= DAG
.getNode(AMDILISD::CMP
, DL
, MVT::i32
,
3716 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
3718 cmphi
= DAG
.getNode(AMDILISD::CMP
, DL
, MVT::i32
,
3719 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
3721 cmp
= DAG
.getNode(AMDILISD::VBUILD
, DL
, MVT::v2i32
, cmplo
);
3722 cmp
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v2i32
,
3723 cmp
, cmphi
, DAG
.getTargetConstant(1, MVT::i32
));
3725 INTHI
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, INTHI
, cmp
);
3726 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, OVT
,
3729 DST
= SDValue(Op
.getNode(), 0);
3734 AMDILTargetLowering::LowerFDIV(SDValue Op
, SelectionDAG
&DAG
) const
3736 EVT OVT
= Op
.getValueType();
3738 if (OVT
.getScalarType() == MVT::f64
) {
3739 DST
= LowerFDIV64(Op
, DAG
);
3740 } else if (OVT
.getScalarType() == MVT::f32
) {
3741 DST
= LowerFDIV32(Op
, DAG
);
3743 DST
= SDValue(Op
.getNode(), 0);
3749 AMDILTargetLowering::LowerSDIV(SDValue Op
, SelectionDAG
&DAG
) const
3751 EVT OVT
= Op
.getValueType();
3753 if (OVT
.getScalarType() == MVT::i64
) {
3754 DST
= LowerSDIV64(Op
, DAG
);
3755 } else if (OVT
.getScalarType() == MVT::i32
) {
3756 DST
= LowerSDIV32(Op
, DAG
);
3757 } else if (OVT
.getScalarType() == MVT::i16
3758 || OVT
.getScalarType() == MVT::i8
) {
3759 DST
= LowerSDIV24(Op
, DAG
);
3761 DST
= SDValue(Op
.getNode(), 0);
3767 AMDILTargetLowering::LowerUDIV(SDValue Op
, SelectionDAG
&DAG
) const
3769 EVT OVT
= Op
.getValueType();
3771 if (OVT
.getScalarType() == MVT::i64
) {
3772 DST
= LowerUDIV64(Op
, DAG
);
3773 } else if (OVT
.getScalarType() == MVT::i32
) {
3774 DST
= LowerUDIV32(Op
, DAG
);
3775 } else if (OVT
.getScalarType() == MVT::i16
3776 || OVT
.getScalarType() == MVT::i8
) {
3777 DST
= LowerUDIV24(Op
, DAG
);
3779 DST
= SDValue(Op
.getNode(), 0);
3785 AMDILTargetLowering::LowerSREM(SDValue Op
, SelectionDAG
&DAG
) const
3787 EVT OVT
= Op
.getValueType();
3789 if (OVT
.getScalarType() == MVT::i64
) {
3790 DST
= LowerSREM64(Op
, DAG
);
3791 } else if (OVT
.getScalarType() == MVT::i32
) {
3792 DST
= LowerSREM32(Op
, DAG
);
3793 } else if (OVT
.getScalarType() == MVT::i16
) {
3794 DST
= LowerSREM16(Op
, DAG
);
3795 } else if (OVT
.getScalarType() == MVT::i8
) {
3796 DST
= LowerSREM8(Op
, DAG
);
3798 DST
= SDValue(Op
.getNode(), 0);
3804 AMDILTargetLowering::LowerUREM(SDValue Op
, SelectionDAG
&DAG
) const
3806 EVT OVT
= Op
.getValueType();
3808 if (OVT
.getScalarType() == MVT::i64
) {
3809 DST
= LowerUREM64(Op
, DAG
);
3810 } else if (OVT
.getScalarType() == MVT::i32
) {
3811 DST
= LowerUREM32(Op
, DAG
);
3812 } else if (OVT
.getScalarType() == MVT::i16
) {
3813 DST
= LowerUREM16(Op
, DAG
);
3814 } else if (OVT
.getScalarType() == MVT::i8
) {
3815 DST
= LowerUREM8(Op
, DAG
);
3817 DST
= SDValue(Op
.getNode(), 0);
3823 AMDILTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const
3825 DebugLoc DL
= Op
.getDebugLoc();
3826 EVT OVT
= Op
.getValueType();
3828 bool isVec
= OVT
.isVector();
3829 if (OVT
.getScalarType() != MVT::i64
)
3831 DST
= SDValue(Op
.getNode(), 0);
3833 assert(OVT
.getScalarType() == MVT::i64
&& "Only 64 bit mul should be lowered!");
3834 // TODO: This needs to be turned into a tablegen pattern
3835 SDValue LHS
= Op
.getOperand(0);
3836 SDValue RHS
= Op
.getOperand(1);
3838 MVT INTTY
= MVT::i32
;
3839 if (OVT
== MVT::v2i64
) {
3842 // mul64(h1, l1, h0, l0)
3843 SDValue LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
,
3846 SDValue LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
,
3849 SDValue RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
,
3852 SDValue RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
,
3855 // MULLO_UINT_1 r1, h0, l1
3856 SDValue RHILLO
= DAG
.getNode(AMDILISD::UMUL
,
3858 INTTY
, RHSHI
, LHSLO
);
3859 // MULLO_UINT_1 r2, h1, l0
3860 SDValue RLOHHI
= DAG
.getNode(AMDILISD::UMUL
,
3862 INTTY
, RHSLO
, LHSHI
);
3863 // ADD_INT hr, r1, r2
3864 SDValue ADDHI
= DAG
.getNode(ISD::ADD
,
3866 INTTY
, RHILLO
, RLOHHI
);
3867 // MULHI_UINT_1 r3, l1, l0
3868 SDValue RLOLLO
= DAG
.getNode(ISD::MULHU
,
3870 INTTY
, RHSLO
, LHSLO
);
3871 // ADD_INT hr, hr, r3
3872 SDValue HIGH
= DAG
.getNode(ISD::ADD
,
3874 INTTY
, ADDHI
, RLOLLO
);
3875 // MULLO_UINT_1 l3, l1, l0
3876 SDValue LOW
= DAG
.getNode(AMDILISD::UMUL
,
3878 INTTY
, LHSLO
, RHSLO
);
3879 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
,
3886 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op
, SelectionDAG
&DAG
) const
3888 EVT VT
= Op
.getValueType();
3893 DebugLoc DL
= Op
.getDebugLoc();
3894 Nodes1
= DAG
.getNode(AMDILISD::VBUILD
,
3896 VT
, Op
.getOperand(0));
3898 bool allEqual
= true;
3899 for (unsigned x
= 1, y
= Op
.getNumOperands(); x
< y
; ++x
) {
3900 if (Op
.getOperand(0) != Op
.getOperand(x
)) {
3909 switch(Op
.getNumOperands()) {
3914 fourth
= Op
.getOperand(3);
3915 if (fourth
.getOpcode() != ISD::UNDEF
) {
3916 Nodes1
= DAG
.getNode(
3917 ISD::INSERT_VECTOR_ELT
,
3922 DAG
.getConstant(7, MVT::i32
));
3925 third
= Op
.getOperand(2);
3926 if (third
.getOpcode() != ISD::UNDEF
) {
3927 Nodes1
= DAG
.getNode(
3928 ISD::INSERT_VECTOR_ELT
,
3933 DAG
.getConstant(6, MVT::i32
));
3936 second
= Op
.getOperand(1);
3937 if (second
.getOpcode() != ISD::UNDEF
) {
3938 Nodes1
= DAG
.getNode(
3939 ISD::INSERT_VECTOR_ELT
,
3944 DAG
.getConstant(5, MVT::i32
));
3952 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
3953 SelectionDAG
&DAG
) const
3955 DebugLoc DL
= Op
.getDebugLoc();
3956 EVT VT
= Op
.getValueType();
3957 const SDValue
*ptr
= NULL
;
3958 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
3959 uint32_t swizzleNum
= 0;
3961 if (!VT
.isVector()) {
3962 SDValue Res
= Op
.getOperand(0);
3966 if (Op
.getOperand(1).getOpcode() != ISD::UNDEF
) {
3967 ptr
= &Op
.getOperand(1);
3969 ptr
= &Op
.getOperand(0);
3972 swizzleNum
= (uint32_t)CSDN
->getZExtValue();
3973 uint32_t mask2
= 0x04030201 & ~(0xFF << (swizzleNum
* 8));
3974 uint32_t mask3
= 0x01010101 & (0xFF << (swizzleNum
* 8));
3975 DST
= DAG
.getNode(AMDILISD::VINSERT
,
3980 DAG
.getTargetConstant(mask2
, MVT::i32
),
3981 DAG
.getTargetConstant(mask3
, MVT::i32
));
3983 uint32_t mask2
= 0x04030201 & ~(0xFF << (swizzleNum
* 8));
3984 uint32_t mask3
= 0x01010101 & (0xFF << (swizzleNum
* 8));
3985 SDValue res
= DAG
.getNode(AMDILISD::VINSERT
,
3986 DL
, VT
, Op
.getOperand(0), *ptr
,
3987 DAG
.getTargetConstant(mask2
, MVT::i32
),
3988 DAG
.getTargetConstant(mask3
, MVT::i32
));
3989 for (uint32_t x
= 1; x
< VT
.getVectorNumElements(); ++x
) {
3990 mask2
= 0x04030201 & ~(0xFF << (x
* 8));
3991 mask3
= 0x01010101 & (0xFF << (x
* 8));
3992 SDValue t
= DAG
.getNode(AMDILISD::VINSERT
,
3993 DL
, VT
, Op
.getOperand(0), *ptr
,
3994 DAG
.getTargetConstant(mask2
, MVT::i32
),
3995 DAG
.getTargetConstant(mask3
, MVT::i32
));
3996 SDValue c
= DAG
.getNode(AMDILISD::CMP
, DL
, ptr
->getValueType(),
3997 DAG
.getConstant(AMDILCC::IL_CC_I_EQ
, MVT::i32
),
3998 Op
.getOperand(2), DAG
.getConstant(x
, MVT::i32
));
3999 c
= DAG
.getNode(AMDILISD::VBUILD
, DL
, Op
.getValueType(), c
);
4000 res
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, VT
, c
, t
, res
);
4008 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
4009 SelectionDAG
&DAG
) const
4011 EVT VT
= Op
.getValueType();
4012 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
4013 uint64_t swizzleNum
= 0;
4014 DebugLoc DL
= Op
.getDebugLoc();
4016 if (!Op
.getOperand(0).getValueType().isVector()) {
4017 Res
= Op
.getOperand(0);
4021 // Static vector extraction
4022 swizzleNum
= CSDN
->getZExtValue() + 1;
4023 Res
= DAG
.getNode(AMDILISD::VEXTRACT
,
4026 DAG
.getTargetConstant(swizzleNum
, MVT::i32
));
4028 SDValue Op1
= Op
.getOperand(1);
4029 uint32_t vecSize
= 4;
4030 SDValue Op0
= Op
.getOperand(0);
4031 SDValue res
= DAG
.getNode(AMDILISD::VEXTRACT
,
4033 DAG
.getTargetConstant(1, MVT::i32
));
4034 if (Op0
.getValueType().isVector()) {
4035 vecSize
= Op0
.getValueType().getVectorNumElements();
4037 for (uint32_t x
= 2; x
<= vecSize
; ++x
) {
4038 SDValue t
= DAG
.getNode(AMDILISD::VEXTRACT
,
4040 DAG
.getTargetConstant(x
, MVT::i32
));
4041 SDValue c
= DAG
.getNode(AMDILISD::CMP
,
4042 DL
, Op1
.getValueType(),
4043 DAG
.getConstant(AMDILCC::IL_CC_I_EQ
, MVT::i32
),
4044 Op1
, DAG
.getConstant(x
, MVT::i32
));
4045 res
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
,
4055 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op
,
4056 SelectionDAG
&DAG
) const
4058 uint32_t vecSize
= Op
.getValueType().getVectorNumElements();
4059 SDValue src
= Op
.getOperand(0);
4060 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
4061 uint64_t offset
= 0;
4062 EVT vecType
= Op
.getValueType().getVectorElementType();
4063 DebugLoc DL
= Op
.getDebugLoc();
4066 offset
= CSDN
->getZExtValue();
4067 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
4068 DL
,vecType
, src
, DAG
.getConstant(offset
, MVT::i32
));
4069 Result
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
4070 Op
.getValueType(), Result
);
4071 for (uint32_t x
= 1; x
< vecSize
; ++x
) {
4072 SDValue elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, vecType
,
4073 src
, DAG
.getConstant(offset
+ x
, MVT::i32
));
4074 if (elt
.getOpcode() != ISD::UNDEF
) {
4075 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4076 Op
.getValueType(), Result
, elt
,
4077 DAG
.getConstant(x
, MVT::i32
));
4081 SDValue idx
= Op
.getOperand(1);
4082 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
4083 DL
, vecType
, src
, idx
);
4084 Result
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
4085 Op
.getValueType(), Result
);
4086 for (uint32_t x
= 1; x
< vecSize
; ++x
) {
4087 idx
= DAG
.getNode(ISD::ADD
, DL
, vecType
,
4088 idx
, DAG
.getConstant(1, MVT::i32
));
4089 SDValue elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, vecType
,
4091 if (elt
.getOpcode() != ISD::UNDEF
) {
4092 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4093 Op
.getValueType(), Result
, elt
, idx
);
4100 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
4101 SelectionDAG
&DAG
) const
4103 SDValue Res
= DAG
.getNode(AMDILISD::VBUILD
,
4110 AMDILTargetLowering::LowerAND(SDValue Op
, SelectionDAG
&DAG
) const
4113 andOp
= DAG
.getNode(
4122 AMDILTargetLowering::LowerOR(SDValue Op
, SelectionDAG
&DAG
) const
4125 orOp
= DAG
.getNode(AMDILISD::OR
,
4133 AMDILTargetLowering::LowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const
4135 SDValue Cond
= Op
.getOperand(0);
4136 SDValue LHS
= Op
.getOperand(1);
4137 SDValue RHS
= Op
.getOperand(2);
4138 DebugLoc DL
= Op
.getDebugLoc();
4139 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
4140 Cond
= DAG
.getNode(AMDILISD::CMOVLOG
,
4142 Op
.getValueType(), Cond
, LHS
, RHS
);
4146 AMDILTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
4149 SDValue LHS
= Op
.getOperand(0);
4150 SDValue RHS
= Op
.getOperand(1);
4151 SDValue TRUE
= Op
.getOperand(2);
4152 SDValue FALSE
= Op
.getOperand(3);
4153 SDValue CC
= Op
.getOperand(4);
4154 DebugLoc DL
= Op
.getDebugLoc();
4155 bool skipCMov
= false;
4156 bool genINot
= false;
4157 EVT OVT
= Op
.getValueType();
4159 // Check for possible elimination of cmov
4160 if (TRUE
.getValueType().getSimpleVT().SimpleTy
== MVT::i32
) {
4161 const ConstantSDNode
*trueConst
4162 = dyn_cast
<ConstantSDNode
>( TRUE
.getNode() );
4163 const ConstantSDNode
*falseConst
4164 = dyn_cast
<ConstantSDNode
>( FALSE
.getNode() );
4165 if (trueConst
&& falseConst
) {
4166 // both possible result values are constants
4167 if (trueConst
->isAllOnesValue()
4168 && falseConst
->isNullValue()) { // and convenient constants
4171 else if (trueConst
->isNullValue()
4172 && falseConst
->isAllOnesValue()) { // less convenient
4178 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
4179 unsigned int AMDILCC
= CondCCodeToCC(
4181 LHS
.getValueType().getSimpleVT().SimpleTy
);
4182 assert((AMDILCC
!= AMDILCC::COND_ERROR
) && "Invalid SetCC!");
4187 DAG
.getConstant(AMDILCC
, MVT::i32
),
4190 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
4192 Cond
= DAG
.getNode(AMDILISD::NOT
, DL
, OVT
, Cond
);
4195 Cond
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, Cond
, TRUE
, FALSE
);
4200 AMDILTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
4203 SDValue LHS
= Op
.getOperand(0);
4204 SDValue RHS
= Op
.getOperand(1);
4205 SDValue CC
= Op
.getOperand(2);
4206 DebugLoc DL
= Op
.getDebugLoc();
4207 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
4208 unsigned int AMDILCC
= CondCCodeToCC(
4210 LHS
.getValueType().getSimpleVT().SimpleTy
);
4211 assert((AMDILCC
!= AMDILCC::COND_ERROR
) && "Invalid SetCC!");
4216 DAG
.getConstant(AMDILCC
, MVT::i32
),
4219 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
4223 Cond
.getValueType(),
4224 DAG
.getConstant(1, Cond
.getValueType()),
4230 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
, SelectionDAG
&DAG
) const
4232 SDValue Data
= Op
.getOperand(0);
4233 VTSDNode
*BaseType
= cast
<VTSDNode
>(Op
.getOperand(1));
4234 DebugLoc DL
= Op
.getDebugLoc();
4235 EVT DVT
= Data
.getValueType();
4236 EVT BVT
= BaseType
->getVT();
4237 unsigned baseBits
= BVT
.getScalarType().getSizeInBits();
4238 unsigned srcBits
= DVT
.isSimple() ? DVT
.getScalarType().getSizeInBits() : 1;
4239 unsigned shiftBits
= srcBits
- baseBits
;
4241 // If the op is less than 32 bits, then it needs to extend to 32bits
4242 // so it can properly keep the upper bits valid.
4243 EVT IVT
= genIntType(32, DVT
.isVector() ? DVT
.getVectorNumElements() : 1);
4244 Data
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, IVT
, Data
);
4245 shiftBits
= 32 - baseBits
;
4248 SDValue Shift
= DAG
.getConstant(shiftBits
, DVT
);
4249 // Shift left by 'Shift' bits.
4250 Data
= DAG
.getNode(ISD::SHL
, DL
, DVT
, Data
, Shift
);
4251 // Signed shift Right by 'Shift' bits.
4252 Data
= DAG
.getNode(ISD::SRA
, DL
, DVT
, Data
, Shift
);
4254 // Once the sign extension is done, the op needs to be converted to
4255 // its original type.
4256 Data
= DAG
.getSExtOrTrunc(Data
, DL
, Op
.getOperand(0).getValueType());
4261 AMDILTargetLowering::genIntType(uint32_t size
, uint32_t numEle
) const
4263 int iSize
= (size
* numEle
);
4264 int vEle
= (iSize
>> ((size
== 64) ? 6 : 5));
4270 return EVT(MVT::i64
);
4272 return EVT(MVT::getVectorVT(MVT::i64
, vEle
));
4276 return EVT(MVT::i32
);
4278 return EVT(MVT::getVectorVT(MVT::i32
, vEle
));
4284 AMDILTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const
4286 SDValue Src
= Op
.getOperand(0);
4289 DebugLoc DL
= Op
.getDebugLoc();
4290 EVT SrcVT
= Src
.getValueType();
4291 EVT DstVT
= Dst
.getValueType();
4292 // Lets bitcast the floating point types to an
4293 // equivalent integer type before converting to vectors.
4294 if (SrcVT
.getScalarType().isFloatingPoint()) {
4295 Src
= DAG
.getNode(AMDILISD::BITCONV
, DL
, genIntType(
4296 SrcVT
.getScalarType().getSimpleVT().getSizeInBits(),
4297 SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1),
4299 SrcVT
= Src
.getValueType();
4301 uint32_t ScalarSrcSize
= SrcVT
.getScalarType()
4302 .getSimpleVT().getSizeInBits();
4303 uint32_t ScalarDstSize
= DstVT
.getScalarType()
4304 .getSimpleVT().getSizeInBits();
4305 uint32_t SrcNumEle
= SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1;
4306 uint32_t DstNumEle
= DstVT
.isVector() ? DstVT
.getVectorNumElements() : 1;
4307 bool isVec
= SrcVT
.isVector();
4308 if (DstVT
.getScalarType().isInteger() &&
4309 (SrcVT
.getScalarType().isInteger()
4310 || SrcVT
.getScalarType().isFloatingPoint())) {
4311 if ((ScalarDstSize
== 64 && SrcNumEle
== 4 && ScalarSrcSize
== 16)
4312 || (ScalarSrcSize
== 64
4314 && ScalarDstSize
== 16)) {
4315 // This is the problematic case when bitcasting i64 <-> <4 x i16>
4316 // This approach is a little different as we cannot generate a
4318 // as that is illegal in our backend and we are already past
4319 // the DAG legalizer.
4320 // So, in this case, we will do the following conversion.
4322 // %dst = <4 x i16> %src bitconvert i64 ==>
4323 // %tmp = <4 x i16> %src convert <4 x i32>
4324 // %tmp = <4 x i32> %tmp and 0xFFFF
4325 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
4326 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
4327 // %dst = <2 x i32> %tmp bitcast i64
4329 // %dst = i64 %src bitconvert <4 x i16> ==>
4330 // %tmp = i64 %src bitcast <2 x i32>
4331 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
4332 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
4333 // %tmp = <4 x i32> %tmp and 0xFFFF
4334 // %dst = <4 x i16> %tmp bitcast <4 x i32>
4335 SDValue mask
= DAG
.getNode(AMDILISD::VBUILD
, DL
, MVT::v4i32
,
4336 DAG
.getConstant(0xFFFF, MVT::i32
));
4337 SDValue const16
= DAG
.getConstant(16, MVT::i32
);
4338 if (ScalarDstSize
== 64) {
4340 Op
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::v4i32
);
4341 Op
= DAG
.getNode(ISD::AND
, DL
, Op
.getValueType(), Op
, mask
);
4342 SDValue x
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
4343 Op
, DAG
.getConstant(0, MVT::i32
));
4344 SDValue y
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
4345 Op
, DAG
.getConstant(1, MVT::i32
));
4346 y
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, y
, const16
);
4347 SDValue z
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
4348 Op
, DAG
.getConstant(2, MVT::i32
));
4349 SDValue w
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
4350 Op
, DAG
.getConstant(3, MVT::i32
));
4351 w
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, w
, const16
);
4352 x
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, x
, y
);
4353 y
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, z
, w
);
4354 Res
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, MVT::i64
, x
, y
);
4358 SDValue lo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, MVT::i32
, Src
);
4360 = DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, lo
, const16
);
4361 SDValue hi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, MVT::i32
, Src
);
4363 = DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, hi
, const16
);
4364 SDValue resVec
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
4366 SDValue idxVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4367 getPointerTy(), DAG
.getConstant(1, MVT::i32
));
4368 resVec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v4i32
,
4369 resVec
, lor16
, idxVal
);
4370 idxVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4371 getPointerTy(), DAG
.getConstant(2, MVT::i32
));
4372 resVec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v4i32
,
4373 resVec
, hi
, idxVal
);
4374 idxVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4375 getPointerTy(), DAG
.getConstant(3, MVT::i32
));
4376 resVec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v4i32
,
4377 resVec
, hir16
, idxVal
);
4378 resVec
= DAG
.getNode(ISD::AND
, DL
, MVT::v4i32
, resVec
, mask
);
4379 Res
= DAG
.getSExtOrTrunc(resVec
, DL
, MVT::v4i16
);
4383 // There are four cases we need to worry about for bitcasts
4384 // where the size of all
4385 // source, intermediates and result is <= 128 bits, unlike
4387 // 1) Sub32bit bitcast 32bitAlign
4388 // %dst = <4 x i8> bitcast i32
4389 // (also <[2|4] x i16> to <[2|4] x i32>)
4390 // 2) 32bitAlign bitcast Sub32bit
4391 // %dst = i32 bitcast <4 x i8>
4392 // 3) Sub32bit bitcast LargerSub32bit
4393 // %dst = <2 x i8> bitcast i16
4394 // (also <4 x i8> to <2 x i16>)
4395 // 4) Sub32bit bitcast SmallerSub32bit
4396 // %dst = i16 bitcast <2 x i8>
4397 // (also <2 x i16> to <4 x i8>)
4398 // This also only handles types that are powers of two
4399 if ((ScalarDstSize
& (ScalarDstSize
- 1))
4400 || (ScalarSrcSize
& (ScalarSrcSize
- 1))) {
4401 } else if (ScalarDstSize
>= 32 && ScalarSrcSize
< 32) {
4403 EVT IntTy
= genIntType(ScalarDstSize
, SrcNumEle
);
4404 #if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
4405 SDValue res
= DAG
.getSExtOrTrunc(Src
, DL
, IntTy
);
4407 SDValue res
= DAG
.getNode(AMDILISD::VBUILD
, DL
, IntTy
,
4408 DAG
.getUNDEF(IntTy
.getScalarType()));
4409 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
4410 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4411 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
4412 SDValue temp
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
4413 SrcVT
.getScalarType(), Src
,
4414 DAG
.getConstant(x
, MVT::i32
));
4415 temp
= DAG
.getSExtOrTrunc(temp
, DL
, IntTy
.getScalarType());
4416 res
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, IntTy
,
4420 SDValue mask
= DAG
.getNode(AMDILISD::VBUILD
, DL
, IntTy
,
4421 DAG
.getConstant((1 << ScalarSrcSize
) - 1, MVT::i32
));
4422 SDValue
*newEle
= new SDValue
[SrcNumEle
];
4423 res
= DAG
.getNode(ISD::AND
, DL
, IntTy
, res
, mask
);
4424 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
4425 newEle
[x
] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
4426 IntTy
.getScalarType(), res
,
4427 DAG
.getConstant(x
, MVT::i32
));
4429 uint32_t Ratio
= SrcNumEle
/ DstNumEle
;
4430 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
4432 newEle
[x
] = DAG
.getNode(ISD::SHL
, DL
,
4433 IntTy
.getScalarType(), newEle
[x
],
4434 DAG
.getConstant(ScalarSrcSize
* (x
% Ratio
),
4438 for (uint32_t x
= 0; x
< SrcNumEle
; x
+= 2) {
4439 newEle
[x
] = DAG
.getNode(ISD::OR
, DL
,
4440 IntTy
.getScalarType(), newEle
[x
], newEle
[x
+ 1]);
4442 if (ScalarSrcSize
== 8) {
4443 for (uint32_t x
= 0; x
< SrcNumEle
; x
+= 4) {
4444 newEle
[x
] = DAG
.getNode(ISD::OR
, DL
,
4445 IntTy
.getScalarType(), newEle
[x
], newEle
[x
+ 2]);
4447 if (DstNumEle
== 1) {
4450 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, DstVT
,
4452 for (uint32_t x
= 1; x
< DstNumEle
; ++x
) {
4453 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4454 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
4455 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4456 DstVT
, Dst
, newEle
[x
* 4], idx
);
4460 if (DstNumEle
== 1) {
4463 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, DstVT
,
4465 for (uint32_t x
= 1; x
< DstNumEle
; ++x
) {
4466 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4467 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
4468 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4469 DstVT
, Dst
, newEle
[x
* 2], idx
);
4475 } else if (ScalarDstSize
< 32 && ScalarSrcSize
>= 32) {
4477 EVT IntTy
= genIntType(ScalarSrcSize
, DstNumEle
);
4478 SDValue vec
= DAG
.getNode(AMDILISD::VBUILD
, DL
, IntTy
,
4479 DAG
.getUNDEF(IntTy
.getScalarType()));
4480 uint32_t mult
= (ScalarDstSize
== 8) ? 4 : 2;
4481 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
4482 for (uint32_t y
= 0; y
< mult
; ++y
) {
4483 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4485 DAG
.getConstant(x
* mult
+ y
, MVT::i32
));
4487 if (SrcNumEle
> 1) {
4488 t
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
4489 DL
, SrcVT
.getScalarType(), Src
,
4490 DAG
.getConstant(x
, MVT::i32
));
4495 t
= DAG
.getNode(ISD::SRL
, DL
, t
.getValueType(),
4496 t
, DAG
.getConstant(y
* ScalarDstSize
,
4499 vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
,
4500 DL
, IntTy
, vec
, t
, idx
);
4503 Dst
= DAG
.getSExtOrTrunc(vec
, DL
, DstVT
);
4505 } else if (ScalarDstSize
== 16 && ScalarSrcSize
== 8) {
4507 SDValue
*numEle
= new SDValue
[SrcNumEle
];
4508 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
4509 numEle
[x
] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
4510 MVT::i8
, Src
, DAG
.getConstant(x
, MVT::i32
));
4511 numEle
[x
] = DAG
.getSExtOrTrunc(numEle
[x
], DL
, MVT::i16
);
4512 numEle
[x
] = DAG
.getNode(ISD::AND
, DL
, MVT::i16
, numEle
[x
],
4513 DAG
.getConstant(0xFF, MVT::i16
));
4515 for (uint32_t x
= 1; x
< SrcNumEle
; x
+= 2) {
4516 numEle
[x
] = DAG
.getNode(ISD::SHL
, DL
, MVT::i16
, numEle
[x
],
4517 DAG
.getConstant(8, MVT::i16
));
4518 numEle
[x
- 1] = DAG
.getNode(ISD::OR
, DL
, MVT::i16
,
4519 numEle
[x
-1], numEle
[x
]);
4521 if (DstNumEle
> 1) {
4522 // If we are not a scalar i16, the only other case is a
4523 // v2i16 since we can't have v8i8 at this point, v4i16
4524 // cannot be generated
4525 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, MVT::v2i16
,
4527 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4528 getPointerTy(), DAG
.getConstant(1, MVT::i32
));
4529 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v2i16
,
4530 Dst
, numEle
[2], idx
);
4536 } else if (ScalarDstSize
== 8 && ScalarSrcSize
== 16) {
4538 SDValue
*numEle
= new SDValue
[DstNumEle
];
4539 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
4540 numEle
[x
* 2] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
4541 MVT::i16
, Src
, DAG
.getConstant(x
, MVT::i32
));
4542 numEle
[x
* 2 + 1] = DAG
.getNode(ISD::SRL
, DL
, MVT::i16
,
4543 numEle
[x
* 2], DAG
.getConstant(8, MVT::i16
));
4545 MVT ty
= (SrcNumEle
== 1) ? MVT::v2i16
: MVT::v4i16
;
4546 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, ty
, numEle
[0]);
4547 for (uint32_t x
= 1; x
< DstNumEle
; ++x
) {
4548 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
4549 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
4550 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ty
,
4551 Dst
, numEle
[x
], idx
);
4554 ty
= (SrcNumEle
== 1) ? MVT::v2i8
: MVT::v4i8
;
4555 Res
= DAG
.getSExtOrTrunc(Dst
, DL
, ty
);
4560 Res
= DAG
.getNode(AMDILISD::BITCONV
,
4562 Dst
.getValueType(), Src
);
4567 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
4568 SelectionDAG
&DAG
) const
4570 SDValue Chain
= Op
.getOperand(0);
4571 SDValue Size
= Op
.getOperand(1);
4572 unsigned int SPReg
= AMDIL::SP
;
4573 DebugLoc DL
= Op
.getDebugLoc();
4574 SDValue SP
= DAG
.getCopyFromReg(Chain
,
4577 SDValue NewSP
= DAG
.getNode(ISD::ADD
,
4579 MVT::i32
, SP
, Size
);
4580 Chain
= DAG
.getCopyToReg(SP
.getValue(1),
4583 SDValue Ops
[2] = {NewSP
, Chain
};
4584 Chain
= DAG
.getMergeValues(Ops
, 2 ,DL
);
4588 AMDILTargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const
4590 SDValue Chain
= Op
.getOperand(0);
4591 SDValue Cond
= Op
.getOperand(1);
4592 SDValue Jump
= Op
.getOperand(2);
4594 Result
= DAG
.getNode(
4595 AMDILISD::BRANCH_COND
,
4603 AMDILTargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
4605 SDValue Chain
= Op
.getOperand(0);
4606 CondCodeSDNode
*CCNode
= cast
<CondCodeSDNode
>(Op
.getOperand(1));
4607 SDValue LHS
= Op
.getOperand(2);
4608 SDValue RHS
= Op
.getOperand(3);
4609 SDValue JumpT
= Op
.getOperand(4);
4611 ISD::CondCode CC
= CCNode
->get();
4613 unsigned int cmpOpcode
= CondCCodeToCC(
4615 LHS
.getValueType().getSimpleVT().SimpleTy
);
4616 CmpValue
= DAG
.getNode(
4620 DAG
.getConstant(cmpOpcode
, MVT::i32
),
4622 Result
= DAG
.getNode(
4623 AMDILISD::BRANCH_COND
,
4624 CmpValue
.getDebugLoc(),
4631 AMDILTargetLowering::LowerFP_ROUND(SDValue Op
, SelectionDAG
&DAG
) const
4633 SDValue Result
= DAG
.getNode(
4643 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) const
4645 SDValue Result
= DAG
.getNode(
4653 // LowerRET - Lower an ISD::RET node.
4655 AMDILTargetLowering::LowerReturn(SDValue Chain
,
4656 CallingConv::ID CallConv
, bool isVarArg
,
4657 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
4658 const SmallVectorImpl
<SDValue
> &OutVals
,
4659 DebugLoc dl
, SelectionDAG
&DAG
)
4662 //MachineFunction& MF = DAG.getMachineFunction();
4663 // CCValAssign - represent the assignment of the return value
4665 SmallVector
<CCValAssign
, 16> RVLocs
;
4667 // CCState - Info about the registers and stack slot
4668 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
4669 getTargetMachine(), RVLocs
, *DAG
.getContext());
4671 // Analyze return values of ISD::RET
4672 CCInfo
.AnalyzeReturn(Outs
, RetCC_AMDIL32
);
4673 // If this is the first return lowered for this function, add
4674 // the regs to the liveout set for the function
4675 MachineRegisterInfo
&MRI
= DAG
.getMachineFunction().getRegInfo();
4676 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
4677 if (RVLocs
[i
].isRegLoc() && !MRI
.isLiveOut(RVLocs
[i
].getLocReg())) {
4678 MRI
.addLiveOut(RVLocs
[i
].getLocReg());
4681 // FIXME: implement this when tail call is implemented
4682 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4683 // both x86 and ppc implement this in ISelLowering
4685 // Regular return here
4687 SmallVector
<SDValue
, 6> RetOps
;
4688 RetOps
.push_back(Chain
);
4689 RetOps
.push_back(DAG
.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32
));
4690 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
4691 CCValAssign
&VA
= RVLocs
[i
];
4692 SDValue ValToCopy
= OutVals
[i
];
4693 assert(VA
.isRegLoc() && "Can only return in registers!");
4694 // ISD::Ret => ret chain, (regnum1, val1), ...
4695 // So i * 2 + 1 index only the regnums
4696 Chain
= DAG
.getCopyToReg(Chain
,
4701 // guarantee that all emitted copies are stuck together
4702 // avoiding something bad
4703 Flag
= Chain
.getValue(1);
4705 /*if (MF.getFunction()->hasStructRetAttr()) {
4706 assert(0 && "Struct returns are not yet implemented!");
4707 // Both MIPS and X86 have this
4711 RetOps
.push_back(Flag
);
4713 Flag
= DAG
.getNode(AMDILISD::RET_FLAG
,
4715 MVT::Other
, &RetOps
[0], RetOps
.size());
4719 AMDILTargetLowering::generateLongRelational(MachineInstr
*MI
,
4720 unsigned int opCode
) const
4722 MachineOperand DST
= MI
->getOperand(0);
4723 MachineOperand LHS
= MI
->getOperand(2);
4724 MachineOperand RHS
= MI
->getOperand(3);
4725 unsigned int opi32Code
= 0, si32Code
= 0;
4726 unsigned int simpleVT
= MI
->getDesc().OpInfo
[0].RegClass
;
4728 // All the relationals can be generated with with 6 temp registers
4729 for (int x
= 0; x
< 12; ++x
) {
4730 REGS
[x
] = genVReg(simpleVT
);
4732 // Pull out the high and low components of each 64 bit register
4733 generateMachineInst(AMDIL::LHI
, REGS
[0], LHS
.getReg());
4734 generateMachineInst(AMDIL::LLO
, REGS
[1], LHS
.getReg());
4735 generateMachineInst(AMDIL::LHI
, REGS
[2], RHS
.getReg());
4736 generateMachineInst(AMDIL::LLO
, REGS
[3], RHS
.getReg());
4737 // Determine the correct opcode that we should use
4740 assert(!"comparison case not handled!");
4743 si32Code
= opi32Code
= AMDIL::IEQ
;
4746 si32Code
= opi32Code
= AMDIL::INE
;
4752 if (opCode
== AMDIL::LGE
|| opCode
== AMDIL::ULGE
) {
4753 std::swap(REGS
[0], REGS
[2]);
4755 std::swap(REGS
[1], REGS
[3]);
4757 if (opCode
== AMDIL::LLE
|| opCode
== AMDIL::LGE
) {
4758 opi32Code
= AMDIL::ILT
;
4760 opi32Code
= AMDIL::ULT
;
4762 si32Code
= AMDIL::UGE
;
4766 std::swap(REGS
[0], REGS
[2]);
4767 std::swap(REGS
[1], REGS
[3]);
4770 if (opCode
== AMDIL::LGT
|| opCode
== AMDIL::LLT
) {
4771 opi32Code
= AMDIL::ILT
;
4773 opi32Code
= AMDIL::ULT
;
4775 si32Code
= AMDIL::ULT
;
4778 // Do the initial opcode on the high and low components.
4779 // This leaves the following:
4780 // REGS[4] = L_HI OP R_HI
4781 // REGS[5] = L_LO OP R_LO
4782 generateMachineInst(opi32Code
, REGS
[4], REGS
[0], REGS
[2]);
4783 generateMachineInst(si32Code
, REGS
[5], REGS
[1], REGS
[3]);
4788 // combine the results with an and or or depending on if
4790 uint32_t combineOp
= (opi32Code
== AMDIL::IEQ
)
4791 ? AMDIL::BINARY_AND_i32
: AMDIL::BINARY_OR_i32
;
4792 generateMachineInst(combineOp
, REGS
[11], REGS
[4], REGS
[5]);
4796 // this finishes codegen for the following pattern
4797 // REGS[4] || (REGS[5] && (L_HI == R_HI))
4798 generateMachineInst(AMDIL::IEQ
, REGS
[9], REGS
[0], REGS
[2]);
4799 generateMachineInst(AMDIL::BINARY_AND_i32
, REGS
[10], REGS
[5],
4801 generateMachineInst(AMDIL::BINARY_OR_i32
, REGS
[11], REGS
[4],
4805 generateMachineInst(AMDIL::LCREATE
, DST
.getReg(), REGS
[11], REGS
[11]);
4809 AMDILTargetLowering::getFunctionAlignment(const Function
*) const
4815 AMDILTargetLowering::setPrivateData(MachineBasicBlock
*BB
,
4816 MachineBasicBlock::iterator
&BBI
,
4817 DebugLoc
*DL
, const TargetInstrInfo
*TII
) const
4825 AMDILTargetLowering::genVReg(uint32_t regType
) const
4827 return mBB
->getParent()->getRegInfo().createVirtualRegister(
4828 getTargetMachine().getRegisterInfo()->getRegClass(regType
));
4832 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
) const
4834 return BuildMI(*mBB
, mBBI
, *mDL
, mTII
->get(opcode
), dst
);
4838 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
4839 uint32_t src1
) const
4841 return generateMachineInst(opcode
, dst
).addReg(src1
);
4845 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
4846 uint32_t src1
, uint32_t src2
) const
4848 return generateMachineInst(opcode
, dst
, src1
).addReg(src2
);
4852 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
4853 uint32_t src1
, uint32_t src2
, uint32_t src3
) const
4855 return generateMachineInst(opcode
, dst
, src1
, src2
).addReg(src3
);
4860 AMDILTargetLowering::LowerSDIV24(SDValue Op
, SelectionDAG
&DAG
) const
4862 DebugLoc DL
= Op
.getDebugLoc();
4863 EVT OVT
= Op
.getValueType();
4864 SDValue LHS
= Op
.getOperand(0);
4865 SDValue RHS
= Op
.getOperand(1);
4868 if (!OVT
.isVector()) {
4871 } else if (OVT
.getVectorNumElements() == 2) {
4874 } else if (OVT
.getVectorNumElements() == 4) {
4878 unsigned bitsize
= OVT
.getScalarType().getSizeInBits();
4879 // char|short jq = ia ^ ib;
4880 SDValue jq
= DAG
.getNode(ISD::XOR
, DL
, OVT
, LHS
, RHS
);
4882 // jq = jq >> (bitsize - 2)
4883 jq
= DAG
.getNode(ISD::SRA
, DL
, OVT
, jq
, DAG
.getConstant(bitsize
- 2, OVT
));
4886 jq
= DAG
.getNode(ISD::OR
, DL
, OVT
, jq
, DAG
.getConstant(1, OVT
));
4889 jq
= DAG
.getSExtOrTrunc(jq
, DL
, INTTY
);
4891 // int ia = (int)LHS;
4892 SDValue ia
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
4894 // int ib, (int)RHS;
4895 SDValue ib
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
4897 // float fa = (float)ia;
4898 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
4900 // float fb = (float)ib;
4901 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
4903 // float fq = native_divide(fa, fb);
4904 SDValue fq
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
4907 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
4909 // float fqneg = -fq;
4910 SDValue fqneg
= DAG
.getNode(ISD::FNEG
, DL
, FLTTY
, fq
);
4912 // float fr = mad(fqneg, fb, fa);
4913 SDValue fr
= DAG
.getNode(AMDILISD::MAD
, DL
, FLTTY
, fqneg
, fb
, fa
);
4915 // int iq = (int)fq;
4916 SDValue iq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
4919 fr
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fr
);
4922 fb
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fb
);
4924 // int cv = fr >= fb;
4926 if (INTTY
== MVT::i32
) {
4927 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
4929 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
4931 // jq = (cv ? jq : 0);
4932 jq
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, cv
, jq
,
4933 DAG
.getConstant(0, OVT
));
4935 iq
= DAG
.getSExtOrTrunc(iq
, DL
, OVT
);
4936 iq
= DAG
.getNode(ISD::ADD
, DL
, OVT
, iq
, jq
);
4941 AMDILTargetLowering::LowerSDIV32(SDValue Op
, SelectionDAG
&DAG
) const
4943 DebugLoc DL
= Op
.getDebugLoc();
4944 EVT OVT
= Op
.getValueType();
4945 SDValue LHS
= Op
.getOperand(0);
4946 SDValue RHS
= Op
.getOperand(1);
4947 // The LowerSDIV32 function generates equivalent to the following IL.
4957 // ixor r10, r10, r11
4959 // ixor DST, r0, r10
4968 SDValue r10
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
4969 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
4970 r0
, DAG
.getConstant(0, OVT
));
4973 SDValue r11
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
4974 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
4975 r1
, DAG
.getConstant(0, OVT
));
4978 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
4981 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
4984 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
4987 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
4990 r0
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, r0
, r1
);
4992 // ixor r10, r10, r11
4993 r10
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r10
, r11
);
4996 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
4998 // ixor DST, r0, r10
4999 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
5004 AMDILTargetLowering::LowerSDIV64(SDValue Op
, SelectionDAG
&DAG
) const
5006 return SDValue(Op
.getNode(), 0);
5010 AMDILTargetLowering::LowerUDIV24(SDValue Op
, SelectionDAG
&DAG
) const
5012 DebugLoc DL
= Op
.getDebugLoc();
5013 EVT OVT
= Op
.getValueType();
5014 SDValue LHS
= Op
.getOperand(0);
5015 SDValue RHS
= Op
.getOperand(1);
5018 if (!OVT
.isVector()) {
5021 } else if (OVT
.getVectorNumElements() == 2) {
5024 } else if (OVT
.getVectorNumElements() == 4) {
5029 // The LowerUDIV24 function implements the following CL.
5030 // int ia = (int)LHS
5031 // float fa = (float)ia
5032 // int ib = (int)RHS
5033 // float fb = (float)ib
5034 // float fq = native_divide(fa, fb)
5036 // float t = mad(fq, fb, fb)
5037 // int iq = (int)fq - (t <= fa)
5040 // int ia = (int)LHS
5041 SDValue ia
= DAG
.getZExtOrTrunc(LHS
, DL
, INTTY
);
5043 // float fa = (float)ia
5044 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
5046 // int ib = (int)RHS
5047 SDValue ib
= DAG
.getZExtOrTrunc(RHS
, DL
, INTTY
);
5049 // float fb = (float)ib
5050 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
5052 // float fq = native_divide(fa, fb)
5053 SDValue fq
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
5056 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
5058 // float t = mad(fq, fb, fb)
5059 SDValue t
= DAG
.getNode(AMDILISD::MAD
, DL
, FLTTY
, fq
, fb
, fb
);
5061 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
5063 fq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
5064 if (INTTY
== MVT::i32
) {
5065 iq
= DAG
.getSetCC(DL
, INTTY
, t
, fa
, ISD::SETOLE
);
5067 iq
= DAG
.getSetCC(DL
, INTTY
, t
, fa
, ISD::SETOLE
);
5069 iq
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, fq
, iq
);
5073 iq
= DAG
.getZExtOrTrunc(iq
, DL
, OVT
);
5079 AMDILTargetLowering::LowerUDIV32(SDValue Op
, SelectionDAG
&DAG
) const
5081 return SDValue(Op
.getNode(), 0);
5085 AMDILTargetLowering::LowerUDIV64(SDValue Op
, SelectionDAG
&DAG
) const
5087 return SDValue(Op
.getNode(), 0);
5090 AMDILTargetLowering::LowerSREM8(SDValue Op
, SelectionDAG
&DAG
) const
5092 DebugLoc DL
= Op
.getDebugLoc();
5093 EVT OVT
= Op
.getValueType();
5094 MVT INTTY
= MVT::i32
;
5095 if (OVT
== MVT::v2i8
) {
5097 } else if (OVT
== MVT::v4i8
) {
5100 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
5101 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
5102 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
5103 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
5108 AMDILTargetLowering::LowerSREM16(SDValue Op
, SelectionDAG
&DAG
) const
5110 DebugLoc DL
= Op
.getDebugLoc();
5111 EVT OVT
= Op
.getValueType();
5112 MVT INTTY
= MVT::i32
;
5113 if (OVT
== MVT::v2i16
) {
5115 } else if (OVT
== MVT::v4i16
) {
5118 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
5119 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
5120 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
5121 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
5126 AMDILTargetLowering::LowerSREM32(SDValue Op
, SelectionDAG
&DAG
) const
5128 DebugLoc DL
= Op
.getDebugLoc();
5129 EVT OVT
= Op
.getValueType();
5130 SDValue LHS
= Op
.getOperand(0);
5131 SDValue RHS
= Op
.getOperand(1);
5132 // The LowerSREM32 function generates equivalent to the following IL.
5142 // umul r20, r20, r1
5145 // ixor DST, r0, r10
5154 SDValue r10
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
5155 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
5156 r0
, DAG
.getConstant(0, OVT
));
5159 SDValue r11
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
5160 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
5161 r1
, DAG
.getConstant(0, OVT
));
5164 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
5167 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
5170 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
5173 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
5176 SDValue r20
= DAG
.getNode(ISD::UREM
, DL
, OVT
, r0
, r1
);
5178 // umul r20, r20, r1
5179 r20
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r20
, r1
);
5182 r0
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r0
, r20
);
5185 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
5187 // ixor DST, r0, r10
5188 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
5193 AMDILTargetLowering::LowerSREM64(SDValue Op
, SelectionDAG
&DAG
) const
5195 return SDValue(Op
.getNode(), 0);
5199 AMDILTargetLowering::LowerUREM8(SDValue Op
, SelectionDAG
&DAG
) const
5201 DebugLoc DL
= Op
.getDebugLoc();
5202 EVT OVT
= Op
.getValueType();
5203 MVT INTTY
= MVT::i32
;
5204 if (OVT
== MVT::v2i8
) {
5206 } else if (OVT
== MVT::v4i8
) {
5209 SDValue LHS
= Op
.getOperand(0);
5210 SDValue RHS
= Op
.getOperand(1);
5211 // The LowerUREM8 function generates equivalent to the following IL.
5212 // mov r0, as_u32(LHS)
5213 // mov r1, as_u32(RHS)
5214 // and r10, r0, 0xFF
5215 // and r11, r1, 0xFF
5216 // cmov_logical r3, r11, r11, 0x1
5218 // cmov_logical r3, r11, r3, 0
5221 // and as_u8(DST), r3, 0xFF
5223 // mov r0, as_u32(LHS)
5224 SDValue r0
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
5226 // mov r1, as_u32(RHS)
5227 SDValue r1
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
5229 // and r10, r0, 0xFF
5230 SDValue r10
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r0
,
5231 DAG
.getConstant(0xFF, INTTY
));
5233 // and r11, r1, 0xFF
5234 SDValue r11
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r1
,
5235 DAG
.getConstant(0xFF, INTTY
));
5237 // cmov_logical r3, r11, r11, 0x1
5238 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, r11
, r11
,
5239 DAG
.getConstant(0x01, INTTY
));
5242 r3
= DAG
.getNode(ISD::UREM
, DL
, INTTY
, r10
, r3
);
5244 // cmov_logical r3, r11, r3, 0
5245 r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, r11
, r3
,
5246 DAG
.getConstant(0, INTTY
));
5249 r3
= DAG
.getNode(AMDILISD::UMUL
, DL
, INTTY
, r3
, r11
);
5252 r3
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, r10
, r3
);
5254 // and as_u8(DST), r3, 0xFF
5255 SDValue DST
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r3
,
5256 DAG
.getConstant(0xFF, INTTY
));
5257 DST
= DAG
.getZExtOrTrunc(DST
, DL
, OVT
);
5262 AMDILTargetLowering::LowerUREM16(SDValue Op
, SelectionDAG
&DAG
) const
5264 DebugLoc DL
= Op
.getDebugLoc();
5265 EVT OVT
= Op
.getValueType();
5266 MVT INTTY
= MVT::i32
;
5267 if (OVT
== MVT::v2i16
) {
5269 } else if (OVT
== MVT::v4i16
) {
5272 SDValue LHS
= Op
.getOperand(0);
5273 SDValue RHS
= Op
.getOperand(1);
5274 // The LowerUREM16 function generatest equivalent to the following IL.
5277 // DIV = LowerUDIV16(LHS, RHS)
5278 // and r10, r0, 0xFFFF
5279 // and r11, r1, 0xFFFF
5280 // cmov_logical r3, r11, r11, 0x1
5281 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5282 // and r3, r3, 0xFFFF
5283 // cmov_logical r3, r11, r3, 0
5286 // and DST, r3, 0xFFFF
5294 // and r10, r0, 0xFFFF
5295 SDValue r10
= DAG
.getNode(ISD::AND
, DL
, OVT
, r0
,
5296 DAG
.getConstant(0xFFFF, OVT
));
5298 // and r11, r1, 0xFFFF
5299 SDValue r11
= DAG
.getNode(ISD::AND
, DL
, OVT
, r1
,
5300 DAG
.getConstant(0xFFFF, OVT
));
5302 // cmov_logical r3, r11, r11, 0x1
5303 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r11
, r11
,
5304 DAG
.getConstant(0x01, OVT
));
5306 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5307 r10
= DAG
.getZExtOrTrunc(r10
, DL
, INTTY
);
5308 r3
= DAG
.getZExtOrTrunc(r3
, DL
, INTTY
);
5309 r3
= DAG
.getNode(ISD::UREM
, DL
, INTTY
, r10
, r3
);
5310 r3
= DAG
.getZExtOrTrunc(r3
, DL
, OVT
);
5311 r10
= DAG
.getZExtOrTrunc(r10
, DL
, OVT
);
5313 // and r3, r3, 0xFFFF
5314 r3
= DAG
.getNode(ISD::AND
, DL
, OVT
, r3
,
5315 DAG
.getConstant(0xFFFF, OVT
));
5317 // cmov_logical r3, r11, r3, 0
5318 r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r11
, r3
,
5319 DAG
.getConstant(0, OVT
));
5321 r3
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r3
, r11
);
5324 r3
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r10
, r3
);
5326 // and DST, r3, 0xFFFF
5327 SDValue DST
= DAG
.getNode(ISD::AND
, DL
, OVT
, r3
,
5328 DAG
.getConstant(0xFFFF, OVT
));
5333 AMDILTargetLowering::LowerUREM32(SDValue Op
, SelectionDAG
&DAG
) const
5335 DebugLoc DL
= Op
.getDebugLoc();
5336 EVT OVT
= Op
.getValueType();
5337 SDValue LHS
= Op
.getOperand(0);
5338 SDValue RHS
= Op
.getOperand(1);
5339 // The LowerUREM32 function generates equivalent to the following IL.
5340 // udiv r20, LHS, RHS
5341 // umul r20, r20, RHS
5342 // sub DST, LHS, r20
5344 // udiv r20, LHS, RHS
5345 SDValue r20
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, LHS
, RHS
);
5347 // umul r20, r20, RHS
5348 r20
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r20
, RHS
);
5350 // sub DST, LHS, r20
5351 SDValue DST
= DAG
.getNode(ISD::SUB
, DL
, OVT
, LHS
, r20
);
5356 AMDILTargetLowering::LowerUREM64(SDValue Op
, SelectionDAG
&DAG
) const
5358 return SDValue(Op
.getNode(), 0);
5363 AMDILTargetLowering::LowerFDIV32(SDValue Op
, SelectionDAG
&DAG
) const
5365 DebugLoc DL
= Op
.getDebugLoc();
5366 EVT OVT
= Op
.getValueType();
5367 MVT INTTY
= MVT::i32
;
5368 if (OVT
== MVT::v2f32
) {
5370 } else if (OVT
== MVT::v4f32
) {
5373 SDValue LHS
= Op
.getOperand(0);
5374 SDValue RHS
= Op
.getOperand(1);
5376 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
5377 &this->getTargetMachine())->getSubtargetImpl();
5378 if (stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
5379 // TODO: This doesn't work for vector types yet
5380 // The LowerFDIV32 function generates equivalent to the following
5382 // mov r20, as_int(LHS)
5383 // mov r21, as_int(RHS)
5384 // and r30, r20, 0x7f800000
5385 // and r31, r20, 0x807FFFFF
5386 // and r32, r21, 0x7f800000
5387 // and r33, r21, 0x807FFFFF
5388 // ieq r40, r30, 0x7F800000
5389 // ieq r41, r31, 0x7F800000
5392 // and r50, r20, 0x80000000
5393 // and r51, r21, 0x80000000
5394 // ior r32, r32, 0x3f800000
5395 // ior r33, r33, 0x3f800000
5396 // cmov_logical r32, r42, r50, r32
5397 // cmov_logical r33, r43, r51, r33
5398 // cmov_logical r32, r40, r20, r32
5399 // cmov_logical r33, r41, r21, r33
5400 // ior r50, r40, r41
5401 // ior r51, r42, r43
5402 // ior r50, r50, r51
5404 // iadd r30, r30, r52
5405 // cmov_logical r30, r50, 0, r30
5406 // div_zeroop(infinity) r21, 1.0, r33
5407 // mul_ieee r20, r32, r21
5408 // and r22, r20, 0x7FFFFFFF
5409 // and r23, r20, 0x80000000
5410 // ishr r60, r22, 0x00000017
5411 // ishr r61, r30, 0x00000017
5412 // iadd r20, r20, r30
5413 // iadd r21, r22, r30
5414 // iadd r60, r60, r61
5416 // ior r41, r23, 0x7F800000
5417 // ige r40, r60, 0x000000FF
5418 // cmov_logical r40, r50, 0, r40
5419 // cmov_logical r20, r42, r23, r20
5420 // cmov_logical DST, r40, r41, r20
5423 // mov r20, as_int(LHS)
5424 SDValue R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, LHS
);
5426 // mov r21, as_int(RHS)
5427 SDValue R21
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, RHS
);
5429 // and r30, r20, 0x7f800000
5430 SDValue R30
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
5431 DAG
.getConstant(0x7F800000, INTTY
));
5433 // and r31, r21, 0x7f800000
5434 SDValue R31
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
5435 DAG
.getConstant(0x7f800000, INTTY
));
5437 // and r32, r20, 0x807FFFFF
5438 SDValue R32
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
5439 DAG
.getConstant(0x807FFFFF, INTTY
));
5441 // and r33, r21, 0x807FFFFF
5442 SDValue R33
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
5443 DAG
.getConstant(0x807FFFFF, INTTY
));
5445 // ieq r40, r30, 0x7F800000
5446 SDValue R40
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
5447 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
5448 R30
, DAG
.getConstant(0x7F800000, INTTY
));
5450 // ieq r41, r31, 0x7F800000
5451 SDValue R41
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
5452 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
5453 R31
, DAG
.getConstant(0x7F800000, INTTY
));
5456 SDValue R42
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
5457 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
5458 R30
, DAG
.getConstant(0, INTTY
));
5461 SDValue R43
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
5462 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
5463 R31
, DAG
.getConstant(0, INTTY
));
5465 // and r50, r20, 0x80000000
5466 SDValue R50
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
5467 DAG
.getConstant(0x80000000, INTTY
));
5469 // and r51, r21, 0x80000000
5470 SDValue R51
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
5471 DAG
.getConstant(0x80000000, INTTY
));
5473 // ior r32, r32, 0x3f800000
5474 R32
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R32
,
5475 DAG
.getConstant(0x3F800000, INTTY
));
5477 // ior r33, r33, 0x3f800000
5478 R33
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R33
,
5479 DAG
.getConstant(0x3F800000, INTTY
));
5481 // cmov_logical r32, r42, r50, r32
5482 R32
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R42
, R50
, R32
);
5484 // cmov_logical r33, r43, r51, r33
5485 R33
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R43
, R51
, R33
);
5487 // cmov_logical r32, r40, r20, r32
5488 R32
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R40
, R20
, R32
);
5490 // cmov_logical r33, r41, r21, r33
5491 R33
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R41
, R21
, R33
);
5493 // ior r50, r40, r41
5494 R50
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R40
, R41
);
5496 // ior r51, r42, r43
5497 R51
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R42
, R43
);
5499 // ior r50, r50, r51
5500 R50
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R50
, R51
);
5503 SDValue R52
= DAG
.getNode(AMDILISD::INEGATE
, DL
, INTTY
, R31
);
5505 // iadd r30, r30, r52
5506 R30
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R30
, R52
);
5508 // cmov_logical r30, r50, 0, r30
5509 R30
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R50
,
5510 DAG
.getConstant(0, INTTY
), R30
);
5512 // div_zeroop(infinity) r21, 1.0, as_float(r33)
5513 R33
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R33
);
5514 R21
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
,
5515 DAG
.getConstantFP(1.0f
, OVT
), R33
);
5517 // mul_ieee as_int(r20), as_float(r32), r21
5518 R32
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R32
);
5519 R20
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, R32
, R21
);
5520 R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, R20
);
5522 // div_zeroop(infinity) r21, 1.0, as_float(r33)
5523 R33
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R33
);
5524 R21
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
,
5525 DAG
.getConstantFP(1.0f
, OVT
), R33
);
5527 // mul_ieee as_int(r20), as_float(r32), r21
5528 R32
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R32
);
5529 R20
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, R32
, R21
);
5530 R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, R20
);
5532 // and r22, r20, 0x7FFFFFFF
5533 SDValue R22
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
5534 DAG
.getConstant(0x7FFFFFFF, INTTY
));
5536 // and r23, r20, 0x80000000
5537 SDValue R23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
5538 DAG
.getConstant(0x80000000, INTTY
));
5540 // ishr r60, r22, 0x00000017
5541 SDValue R60
= DAG
.getNode(ISD::SRA
, DL
, INTTY
, R22
,
5542 DAG
.getConstant(0x00000017, INTTY
));
5544 // ishr r61, r30, 0x00000017
5545 SDValue R61
= DAG
.getNode(ISD::SRA
, DL
, INTTY
, R30
,
5546 DAG
.getConstant(0x00000017, INTTY
));
5548 // iadd r20, r20, r30
5549 R20
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R20
, R30
);
5551 // iadd r21, r22, r30
5552 R21
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R22
, R30
);
5554 // iadd r60, r60, r61
5555 R60
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R60
, R61
);
5558 R42
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
5559 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
5560 DAG
.getConstant(0, INTTY
),
5563 // ior r41, r23, 0x7F800000
5564 R41
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R23
,
5565 DAG
.getConstant(0x7F800000, INTTY
));
5567 // ige r40, r60, 0x000000FF
5568 R40
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
5569 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
5571 DAG
.getConstant(0x0000000FF, INTTY
));
5573 // cmov_logical r40, r50, 0, r40
5574 R40
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R50
,
5575 DAG
.getConstant(0, INTTY
),
5578 // cmov_logical r20, r42, r23, r20
5579 R20
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R42
, R23
, R20
);
5581 // cmov_logical DST, r40, r41, r20
5582 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R40
, R41
, R20
);
5585 DST
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, DST
);
5587 // The following sequence of DAG nodes produce the following IL:
5589 // lt r2, 0x1.0p+96f, r1
5590 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5591 // mul_ieee r1, RHS, r3
5592 // div_zeroop(infinity) r0, LHS, r1
5593 // mul_ieee DST, r0, r3
5596 SDValue r1
= DAG
.getNode(ISD::FABS
, DL
, OVT
, RHS
);
5597 // lt r2, 0x1.0p+96f, r1
5598 SDValue r2
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
5599 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::f32
), MVT::i32
),
5600 DAG
.getConstant(0x6f800000, INTTY
), r1
);
5601 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5602 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r2
,
5603 DAG
.getConstant(0x2f800000, INTTY
),
5604 DAG
.getConstant(0x3f800000, INTTY
));
5605 // mul_ieee r1, RHS, r3
5606 r1
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, RHS
, r3
);
5607 // div_zeroop(infinity) r0, LHS, r1
5608 SDValue r0
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
, LHS
, r1
);
5609 // mul_ieee DST, r0, r3
5610 DST
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, r0
, r3
);
5616 AMDILTargetLowering::LowerFDIV64(SDValue Op
, SelectionDAG
&DAG
) const
5618 return SDValue(Op
.getNode(), 0);