1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILSubtarget.h"
19 #include "AMDILTargetMachine.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetOptions.h"
35 #define ISDBITCAST ISD::BITCAST
36 #define MVTGLUE MVT::Glue
37 //===----------------------------------------------------------------------===//
38 // Calling Convention Implementation
39 //===----------------------------------------------------------------------===//
40 #include "AMDILGenCallingConv.inc"
42 //===----------------------------------------------------------------------===//
43 // TargetLowering Implementation Help Functions Begin
44 //===----------------------------------------------------------------------===//
46 getConversionNode(SelectionDAG
&DAG
, SDValue
& Src
, SDValue
& Dst
, bool asType
)
48 DebugLoc DL
= Src
.getDebugLoc();
49 EVT svt
= Src
.getValueType().getScalarType();
50 EVT dvt
= Dst
.getValueType().getScalarType();
51 if (svt
.isFloatingPoint() && dvt
.isFloatingPoint()) {
52 if (dvt
.bitsGT(svt
)) {
53 Src
= DAG
.getNode(ISD::FP_EXTEND
, DL
, dvt
, Src
);
54 } else if (svt
.bitsLT(svt
)) {
55 Src
= DAG
.getNode(ISD::FP_ROUND
, DL
, dvt
, Src
,
56 DAG
.getConstant(1, MVT::i32
));
58 } else if (svt
.isInteger() && dvt
.isInteger()) {
59 if (!svt
.bitsEq(dvt
)) {
60 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
62 Src
= DAG
.getNode(AMDILISD::MOVE
, DL
, dvt
, Src
);
64 } else if (svt
.isInteger()) {
65 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::SINT_TO_FP
;
66 if (!svt
.bitsEq(dvt
)) {
67 if (dvt
.getSimpleVT().SimpleTy
== MVT::f32
) {
68 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i32
);
69 } else if (dvt
.getSimpleVT().SimpleTy
== MVT::f64
) {
70 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i64
);
72 assert(0 && "We only support 32 and 64bit fp types");
75 Src
= DAG
.getNode(opcode
, DL
, dvt
, Src
);
76 } else if (dvt
.isInteger()) {
77 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::FP_TO_SINT
;
78 if (svt
.getSimpleVT().SimpleTy
== MVT::f32
) {
79 Src
= DAG
.getNode(opcode
, DL
, MVT::i32
, Src
);
80 } else if (svt
.getSimpleVT().SimpleTy
== MVT::f64
) {
81 Src
= DAG
.getNode(opcode
, DL
, MVT::i64
, Src
);
83 assert(0 && "We only support 32 and 64bit fp types");
85 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
89 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
91 static AMDILCC::CondCodes
92 CondCCodeToCC(ISD::CondCode CC
, const MVT::SimpleValueType
& type
)
97 errs()<<"Condition Code: "<< (unsigned int)CC
<<"\n";
98 assert(0 && "Unknown condition code!");
103 return AMDILCC::IL_CC_F_O
;
105 return AMDILCC::IL_CC_D_O
;
107 assert(0 && "Opcode combination not generated correctly!");
108 return AMDILCC::COND_ERROR
;
113 return AMDILCC::IL_CC_F_UO
;
115 return AMDILCC::IL_CC_D_UO
;
117 assert(0 && "Opcode combination not generated correctly!");
118 return AMDILCC::COND_ERROR
;
126 return AMDILCC::IL_CC_I_GT
;
128 return AMDILCC::IL_CC_F_GT
;
130 return AMDILCC::IL_CC_D_GT
;
132 return AMDILCC::IL_CC_L_GT
;
134 assert(0 && "Opcode combination not generated correctly!");
135 return AMDILCC::COND_ERROR
;
143 return AMDILCC::IL_CC_I_GE
;
145 return AMDILCC::IL_CC_F_GE
;
147 return AMDILCC::IL_CC_D_GE
;
149 return AMDILCC::IL_CC_L_GE
;
151 assert(0 && "Opcode combination not generated correctly!");
152 return AMDILCC::COND_ERROR
;
160 return AMDILCC::IL_CC_I_LT
;
162 return AMDILCC::IL_CC_F_LT
;
164 return AMDILCC::IL_CC_D_LT
;
166 return AMDILCC::IL_CC_L_LT
;
168 assert(0 && "Opcode combination not generated correctly!");
169 return AMDILCC::COND_ERROR
;
177 return AMDILCC::IL_CC_I_LE
;
179 return AMDILCC::IL_CC_F_LE
;
181 return AMDILCC::IL_CC_D_LE
;
183 return AMDILCC::IL_CC_L_LE
;
185 assert(0 && "Opcode combination not generated correctly!");
186 return AMDILCC::COND_ERROR
;
194 return AMDILCC::IL_CC_I_NE
;
196 return AMDILCC::IL_CC_F_NE
;
198 return AMDILCC::IL_CC_D_NE
;
200 return AMDILCC::IL_CC_L_NE
;
202 assert(0 && "Opcode combination not generated correctly!");
203 return AMDILCC::COND_ERROR
;
211 return AMDILCC::IL_CC_I_EQ
;
213 return AMDILCC::IL_CC_F_EQ
;
215 return AMDILCC::IL_CC_D_EQ
;
217 return AMDILCC::IL_CC_L_EQ
;
219 assert(0 && "Opcode combination not generated correctly!");
220 return AMDILCC::COND_ERROR
;
228 return AMDILCC::IL_CC_U_GT
;
230 return AMDILCC::IL_CC_F_UGT
;
232 return AMDILCC::IL_CC_D_UGT
;
234 return AMDILCC::IL_CC_UL_GT
;
236 assert(0 && "Opcode combination not generated correctly!");
237 return AMDILCC::COND_ERROR
;
245 return AMDILCC::IL_CC_U_GE
;
247 return AMDILCC::IL_CC_F_UGE
;
249 return AMDILCC::IL_CC_D_UGE
;
251 return AMDILCC::IL_CC_UL_GE
;
253 assert(0 && "Opcode combination not generated correctly!");
254 return AMDILCC::COND_ERROR
;
262 return AMDILCC::IL_CC_U_LT
;
264 return AMDILCC::IL_CC_F_ULT
;
266 return AMDILCC::IL_CC_D_ULT
;
268 return AMDILCC::IL_CC_UL_LT
;
270 assert(0 && "Opcode combination not generated correctly!");
271 return AMDILCC::COND_ERROR
;
279 return AMDILCC::IL_CC_U_LE
;
281 return AMDILCC::IL_CC_F_ULE
;
283 return AMDILCC::IL_CC_D_ULE
;
285 return AMDILCC::IL_CC_UL_LE
;
287 assert(0 && "Opcode combination not generated correctly!");
288 return AMDILCC::COND_ERROR
;
296 return AMDILCC::IL_CC_U_NE
;
298 return AMDILCC::IL_CC_F_UNE
;
300 return AMDILCC::IL_CC_D_UNE
;
302 return AMDILCC::IL_CC_UL_NE
;
304 assert(0 && "Opcode combination not generated correctly!");
305 return AMDILCC::COND_ERROR
;
313 return AMDILCC::IL_CC_U_EQ
;
315 return AMDILCC::IL_CC_F_UEQ
;
317 return AMDILCC::IL_CC_D_UEQ
;
319 return AMDILCC::IL_CC_UL_EQ
;
321 assert(0 && "Opcode combination not generated correctly!");
322 return AMDILCC::COND_ERROR
;
327 return AMDILCC::IL_CC_F_OGT
;
329 return AMDILCC::IL_CC_D_OGT
;
336 assert(0 && "Opcode combination not generated correctly!");
337 return AMDILCC::COND_ERROR
;
342 return AMDILCC::IL_CC_F_OGE
;
344 return AMDILCC::IL_CC_D_OGE
;
351 assert(0 && "Opcode combination not generated correctly!");
352 return AMDILCC::COND_ERROR
;
357 return AMDILCC::IL_CC_F_OLT
;
359 return AMDILCC::IL_CC_D_OLT
;
366 assert(0 && "Opcode combination not generated correctly!");
367 return AMDILCC::COND_ERROR
;
372 return AMDILCC::IL_CC_F_OLE
;
374 return AMDILCC::IL_CC_D_OLE
;
381 assert(0 && "Opcode combination not generated correctly!");
382 return AMDILCC::COND_ERROR
;
387 return AMDILCC::IL_CC_F_ONE
;
389 return AMDILCC::IL_CC_D_ONE
;
396 assert(0 && "Opcode combination not generated correctly!");
397 return AMDILCC::COND_ERROR
;
402 return AMDILCC::IL_CC_F_OEQ
;
404 return AMDILCC::IL_CC_D_OEQ
;
411 assert(0 && "Opcode combination not generated correctly!");
412 return AMDILCC::COND_ERROR
;
417 /// Helper function used by LowerFormalArguments
418 static const TargetRegisterClass
*
419 getRegClassFromType(unsigned int type
) {
422 assert(0 && "Passed in type does not match any register classes.");
424 return &AMDIL::GPRI8RegClass
;
426 return &AMDIL::GPRI16RegClass
;
428 return &AMDIL::GPRI32RegClass
;
430 return &AMDIL::GPRF32RegClass
;
432 return &AMDIL::GPRI64RegClass
;
434 return &AMDIL::GPRF64RegClass
;
436 return &AMDIL::GPRV4F32RegClass
;
438 return &AMDIL::GPRV4I8RegClass
;
440 return &AMDIL::GPRV4I16RegClass
;
442 return &AMDIL::GPRV4I32RegClass
;
444 return &AMDIL::GPRV2F32RegClass
;
446 return &AMDIL::GPRV2I8RegClass
;
448 return &AMDIL::GPRV2I16RegClass
;
450 return &AMDIL::GPRV2I32RegClass
;
452 return &AMDIL::GPRV2F64RegClass
;
454 return &AMDIL::GPRV2I64RegClass
;
459 AMDILTargetLowering::LowerMemArgument(
461 CallingConv::ID CallConv
,
462 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
463 DebugLoc dl
, SelectionDAG
&DAG
,
464 const CCValAssign
&VA
,
465 MachineFrameInfo
*MFI
,
468 // Create the nodes corresponding to a load from this parameter slot.
469 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
471 bool AlwaysUseMutable
= (CallConv
==CallingConv::Fast
) &&
472 getTargetMachine().Options
.GuaranteedTailCallOpt
;
473 bool isImmutable
= !AlwaysUseMutable
&& !Flags
.isByVal();
475 // FIXME: For now, all byval parameter objects are marked mutable. This can
476 // be changed with more analysis.
477 // In case of tail call optimization mark all arguments mutable. Since they
478 // could be overwritten by lowering of arguments in case of a tail call.
479 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
480 VA
.getLocMemOffset(), isImmutable
);
481 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
485 return DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
486 MachinePointerInfo::getFixedStack(FI
),
487 false, false, false, 0);
489 //===----------------------------------------------------------------------===//
490 // TargetLowering Implementation Help Functions End
491 //===----------------------------------------------------------------------===//
492 //===----------------------------------------------------------------------===//
493 // Instruction generation functions
494 //===----------------------------------------------------------------------===//
496 AMDILTargetLowering::addExtensionInstructions(
497 uint32_t reg
, bool signedShift
,
498 unsigned int simpleVT
) const
501 uint32_t LShift
, RShift
;
506 case AMDIL::GPRI8RegClassID
:
508 LShift
= AMDIL::SHL_i8
;
510 RShift
= AMDIL::SHR_i8
;
512 RShift
= AMDIL::USHR_i8
;
515 case AMDIL::GPRV2I8RegClassID
:
517 LShift
= AMDIL::SHL_v2i8
;
519 RShift
= AMDIL::SHR_v2i8
;
521 RShift
= AMDIL::USHR_v2i8
;
524 case AMDIL::GPRV4I8RegClassID
:
526 LShift
= AMDIL::SHL_v4i8
;
528 RShift
= AMDIL::SHR_v4i8
;
530 RShift
= AMDIL::USHR_v4i8
;
533 case AMDIL::GPRI16RegClassID
:
535 LShift
= AMDIL::SHL_i16
;
537 RShift
= AMDIL::SHR_i16
;
539 RShift
= AMDIL::USHR_i16
;
542 case AMDIL::GPRV2I16RegClassID
:
544 LShift
= AMDIL::SHL_v2i16
;
546 RShift
= AMDIL::SHR_v2i16
;
548 RShift
= AMDIL::USHR_v2i16
;
551 case AMDIL::GPRV4I16RegClassID
:
553 LShift
= AMDIL::SHL_v4i16
;
555 RShift
= AMDIL::SHR_v4i16
;
557 RShift
= AMDIL::USHR_v4i16
;
561 uint32_t LoadReg
= genVReg(simpleVT
);
562 uint32_t tmp1
= genVReg(simpleVT
);
563 uint32_t tmp2
= genVReg(simpleVT
);
564 generateMachineInst(AMDIL::LOADCONST_i32
, LoadReg
).addImm(shiftSize
);
565 generateMachineInst(LShift
, tmp1
, reg
, LoadReg
);
566 generateMachineInst(RShift
, tmp2
, tmp1
, LoadReg
);
571 AMDILTargetLowering::convertToReg(MachineOperand op
) const
575 } else if (op
.isImm()) {
577 = genVReg(op
.getParent()->getDesc().OpInfo
[0].RegClass
);
578 generateMachineInst(AMDIL::LOADCONST_i32
, loadReg
)
579 .addImm(op
.getImm());
580 op
.ChangeToRegister(loadReg
, false);
581 } else if (op
.isFPImm()) {
583 = genVReg(op
.getParent()->getDesc().OpInfo
[0].RegClass
);
584 generateMachineInst(AMDIL::LOADCONST_f32
, loadReg
)
585 .addFPImm(op
.getFPImm());
586 op
.ChangeToRegister(loadReg
, false);
587 } else if (op
.isMBB()) {
588 op
.ChangeToRegister(0, false);
589 } else if (op
.isFI()) {
590 op
.ChangeToRegister(0, false);
591 } else if (op
.isCPI()) {
592 op
.ChangeToRegister(0, false);
593 } else if (op
.isJTI()) {
594 op
.ChangeToRegister(0, false);
595 } else if (op
.isGlobal()) {
596 op
.ChangeToRegister(0, false);
597 } else if (op
.isSymbol()) {
598 op
.ChangeToRegister(0, false);
599 }/* else if (op.isMetadata()) {
600 op.ChangeToRegister(0, false);
605 //===----------------------------------------------------------------------===//
606 // TargetLowering Class Implementation Begins
607 //===----------------------------------------------------------------------===//
608 AMDILTargetLowering::AMDILTargetLowering(TargetMachine
&TM
)
609 : TargetLowering(TM
, new TargetLoweringObjectFileELF())
658 size_t numTypes
= sizeof(types
) / sizeof(*types
);
659 size_t numFloatTypes
= sizeof(FloatTypes
) / sizeof(*FloatTypes
);
660 size_t numIntTypes
= sizeof(IntTypes
) / sizeof(*IntTypes
);
661 size_t numVectorTypes
= sizeof(VectorTypes
) / sizeof(*VectorTypes
);
663 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
664 &this->getTargetMachine())->getSubtargetImpl();
665 // These are the current register classes that are
668 addRegisterClass(MVT::i32
, AMDIL::GPRI32RegisterClass
);
669 addRegisterClass(MVT::f32
, AMDIL::GPRF32RegisterClass
);
671 if (stm
->device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
672 addRegisterClass(MVT::f64
, AMDIL::GPRF64RegisterClass
);
673 addRegisterClass(MVT::v2f64
, AMDIL::GPRV2F64RegisterClass
);
675 if (stm
->device()->isSupported(AMDILDeviceInfo::ByteOps
)) {
676 addRegisterClass(MVT::i8
, AMDIL::GPRI8RegisterClass
);
677 addRegisterClass(MVT::v2i8
, AMDIL::GPRV2I8RegisterClass
);
678 addRegisterClass(MVT::v4i8
, AMDIL::GPRV4I8RegisterClass
);
679 setOperationAction(ISD::Constant
, MVT::i8
, Legal
);
681 if (stm
->device()->isSupported(AMDILDeviceInfo::ShortOps
)) {
682 addRegisterClass(MVT::i16
, AMDIL::GPRI16RegisterClass
);
683 addRegisterClass(MVT::v2i16
, AMDIL::GPRV2I16RegisterClass
);
684 addRegisterClass(MVT::v4i16
, AMDIL::GPRV4I16RegisterClass
);
685 setOperationAction(ISD::Constant
, MVT::i16
, Legal
);
687 addRegisterClass(MVT::v2f32
, AMDIL::GPRV2F32RegisterClass
);
688 addRegisterClass(MVT::v4f32
, AMDIL::GPRV4F32RegisterClass
);
689 addRegisterClass(MVT::v2i32
, AMDIL::GPRV2I32RegisterClass
);
690 addRegisterClass(MVT::v4i32
, AMDIL::GPRV4I32RegisterClass
);
691 if (stm
->device()->isSupported(AMDILDeviceInfo::LongOps
)) {
692 addRegisterClass(MVT::i64
, AMDIL::GPRI64RegisterClass
);
693 addRegisterClass(MVT::v2i64
, AMDIL::GPRV2I64RegisterClass
);
696 for (unsigned int x
= 0; x
< numTypes
; ++x
) {
697 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)types
[x
];
699 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
700 // We cannot sextinreg, expand to shifts
701 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Custom
);
702 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
, Custom
);
703 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
704 setOperationAction(ISD::SUBE
, VT
, Expand
);
705 setOperationAction(ISD::SUBC
, VT
, Expand
);
706 setOperationAction(ISD::ADD
, VT
, Custom
);
707 setOperationAction(ISD::ADDE
, VT
, Expand
);
708 setOperationAction(ISD::ADDC
, VT
, Expand
);
709 setOperationAction(ISD::SETCC
, VT
, Custom
);
710 setOperationAction(ISD::BRCOND
, VT
, Custom
);
711 setOperationAction(ISD::BR_CC
, VT
, Custom
);
712 setOperationAction(ISD::BR_JT
, VT
, Expand
);
713 setOperationAction(ISD::BRIND
, VT
, Expand
);
714 // TODO: Implement custom UREM/SREM routines
715 setOperationAction(ISD::UREM
, VT
, Expand
);
716 setOperationAction(ISD::SREM
, VT
, Expand
);
717 setOperationAction(ISD::SINT_TO_FP
, VT
, Custom
);
718 setOperationAction(ISD::UINT_TO_FP
, VT
, Custom
);
719 setOperationAction(ISD::FP_TO_SINT
, VT
, Custom
);
720 setOperationAction(ISD::FP_TO_UINT
, VT
, Custom
);
721 setOperationAction(ISDBITCAST
, VT
, Custom
);
722 setOperationAction(ISD::GlobalAddress
, VT
, Custom
);
723 setOperationAction(ISD::JumpTable
, VT
, Custom
);
724 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
725 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
726 setOperationAction(ISD::SELECT
, VT
, Custom
);
727 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
728 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
729 if (VT
!= MVT::i64
&& VT
!= MVT::v2i64
) {
730 setOperationAction(ISD::SDIV
, VT
, Custom
);
731 setOperationAction(ISD::UDIV
, VT
, Custom
);
733 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Custom
);
734 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Custom
);
736 for (unsigned int x
= 0; x
< numFloatTypes
; ++x
) {
737 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)FloatTypes
[x
];
739 // IL does not have these operations for floating point types
740 setOperationAction(ISD::FP_ROUND_INREG
, VT
, Expand
);
741 setOperationAction(ISD::FP_ROUND
, VT
, Custom
);
742 setOperationAction(ISD::SETOLT
, VT
, Expand
);
743 setOperationAction(ISD::SETOGE
, VT
, Expand
);
744 setOperationAction(ISD::SETOGT
, VT
, Expand
);
745 setOperationAction(ISD::SETOLE
, VT
, Expand
);
746 setOperationAction(ISD::SETULT
, VT
, Expand
);
747 setOperationAction(ISD::SETUGE
, VT
, Expand
);
748 setOperationAction(ISD::SETUGT
, VT
, Expand
);
749 setOperationAction(ISD::SETULE
, VT
, Expand
);
752 for (unsigned int x
= 0; x
< numIntTypes
; ++x
) {
753 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)IntTypes
[x
];
755 // GPU also does not have divrem function for signed or unsigned
756 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
757 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
758 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
760 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
761 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
762 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
764 // GPU doesn't have a rotl, rotr, or byteswap instruction
765 setOperationAction(ISD::ROTR
, VT
, Expand
);
766 setOperationAction(ISD::ROTL
, VT
, Expand
);
767 setOperationAction(ISD::BSWAP
, VT
, Expand
);
769 // GPU doesn't have any counting operators
770 setOperationAction(ISD::CTPOP
, VT
, Expand
);
771 setOperationAction(ISD::CTTZ
, VT
, Expand
);
772 setOperationAction(ISD::CTLZ
, VT
, Expand
);
775 for ( unsigned int ii
= 0; ii
< numVectorTypes
; ++ii
)
777 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)VectorTypes
[ii
];
779 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
780 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
, Custom
);
781 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
782 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Expand
);
783 setOperationAction(ISD::CONCAT_VECTORS
, VT
, Custom
);
784 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
785 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
786 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
787 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
788 // setOperationAction(ISD::VSETCC, VT, Expand);
789 setOperationAction(ISD::SETCC
, VT
, Expand
);
790 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
791 setOperationAction(ISD::SELECT
, VT
, Expand
);
794 setOperationAction(ISD::FP_ROUND
, MVT::Other
, Expand
);
795 if (stm
->device()->isSupported(AMDILDeviceInfo::LongOps
)) {
796 if (stm
->calVersion() < CAL_VERSION_SC_139
797 || stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
798 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
800 setOperationAction(ISD::SUB
, MVT::i64
, Custom
);
801 setOperationAction(ISD::ADD
, MVT::i64
, Custom
);
802 setOperationAction(ISD::MULHU
, MVT::i64
, Expand
);
803 setOperationAction(ISD::MULHU
, MVT::v2i64
, Expand
);
804 setOperationAction(ISD::MULHS
, MVT::i64
, Expand
);
805 setOperationAction(ISD::MULHS
, MVT::v2i64
, Expand
);
806 setOperationAction(ISD::MUL
, MVT::v2i64
, Expand
);
807 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
808 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
809 setOperationAction(ISD::SREM
, MVT::v2i64
, Expand
);
810 setOperationAction(ISD::Constant
, MVT::i64
, Legal
);
811 setOperationAction(ISD::UDIV
, MVT::v2i64
, Expand
);
812 setOperationAction(ISD::SDIV
, MVT::v2i64
, Expand
);
813 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Expand
);
814 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Expand
);
815 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Expand
);
816 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Expand
);
817 setOperationAction(ISD::TRUNCATE
, MVT::v2i64
, Expand
);
818 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2i64
, Expand
);
819 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2i64
, Expand
);
820 setOperationAction(ISD::ANY_EXTEND
, MVT::v2i64
, Expand
);
822 if (stm
->device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
823 // we support loading/storing v2f64 but not operations on the type
824 setOperationAction(ISD::FADD
, MVT::v2f64
, Expand
);
825 setOperationAction(ISD::FSUB
, MVT::v2f64
, Expand
);
826 setOperationAction(ISD::FMUL
, MVT::v2f64
, Expand
);
827 setOperationAction(ISD::FP_ROUND
, MVT::v2f64
, Expand
);
828 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v2f64
, Expand
);
829 setOperationAction(ISD::FP_EXTEND
, MVT::v2f64
, Expand
);
830 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
831 setOperationAction(ISD::FDIV
, MVT::v2f64
, Expand
);
832 // We want to expand vector conversions into their scalar
834 setOperationAction(ISD::SINT_TO_FP
, MVT::v2f64
, Expand
);
835 setOperationAction(ISD::UINT_TO_FP
, MVT::v2f64
, Expand
);
836 setOperationAction(ISD::FP_TO_SINT
, MVT::v2f64
, Expand
);
837 setOperationAction(ISD::FP_TO_UINT
, MVT::v2f64
, Expand
);
838 setOperationAction(ISD::TRUNCATE
, MVT::v2f64
, Expand
);
839 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2f64
, Expand
);
840 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2f64
, Expand
);
841 setOperationAction(ISD::ANY_EXTEND
, MVT::v2f64
, Expand
);
842 setOperationAction(ISD::FABS
, MVT::f64
, Expand
);
843 setOperationAction(ISD::FABS
, MVT::v2f64
, Expand
);
845 // TODO: Fix the UDIV24 algorithm so it works for these
846 // types correctly. This needs vector comparisons
847 // for this to work correctly.
848 setOperationAction(ISD::UDIV
, MVT::v2i8
, Expand
);
849 setOperationAction(ISD::UDIV
, MVT::v4i8
, Expand
);
850 setOperationAction(ISD::UDIV
, MVT::v2i16
, Expand
);
851 setOperationAction(ISD::UDIV
, MVT::v4i16
, Expand
);
852 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Custom
);
853 setOperationAction(ISD::SUBC
, MVT::Other
, Expand
);
854 setOperationAction(ISD::ADDE
, MVT::Other
, Expand
);
855 setOperationAction(ISD::ADDC
, MVT::Other
, Expand
);
856 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
857 setOperationAction(ISD::BR_CC
, MVT::Other
, Custom
);
858 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
859 setOperationAction(ISD::BRIND
, MVT::Other
, Expand
);
860 setOperationAction(ISD::SETCC
, MVT::Other
, Custom
);
861 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
862 setOperationAction(ISD::FDIV
, MVT::f32
, Custom
);
863 setOperationAction(ISD::FDIV
, MVT::v2f32
, Custom
);
864 setOperationAction(ISD::FDIV
, MVT::v4f32
, Custom
);
866 setOperationAction(ISD::BUILD_VECTOR
, MVT::Other
, Custom
);
867 // Use the default implementation.
868 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
869 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
870 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
871 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
872 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
873 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
874 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
875 setOperationAction(ISD::Constant
, MVT::i32
, Legal
);
876 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
878 setStackPointerRegisterToSaveRestore(AMDIL::SP
);
879 setSchedulingPreference(Sched::RegPressure
);
880 setPow2DivIsCheap(false);
881 setPrefLoopAlignment(16);
882 setSelectIsExpensive(true);
883 setJumpIsExpensive(true);
884 computeRegisterProperties();
886 maxStoresPerMemcpy
= 4096;
887 maxStoresPerMemmove
= 4096;
888 maxStoresPerMemset
= 4096;
892 #undef numVectorTypes
897 AMDILTargetLowering::getTargetNodeName(unsigned Opcode
) const
901 case AMDILISD::INTTOANY
: return "AMDILISD::INTTOANY";
902 case AMDILISD::DP_TO_FP
: return "AMDILISD::DP_TO_FP";
903 case AMDILISD::FP_TO_DP
: return "AMDILISD::FP_TO_DP";
904 case AMDILISD::BITCONV
: return "AMDILISD::BITCONV";
905 case AMDILISD::CMOV
: return "AMDILISD::CMOV";
906 case AMDILISD::CMOVLOG
: return "AMDILISD::CMOVLOG";
907 case AMDILISD::INEGATE
: return "AMDILISD::INEGATE";
908 case AMDILISD::MAD
: return "AMDILISD::MAD";
909 case AMDILISD::UMAD
: return "AMDILISD::UMAD";
910 case AMDILISD::CALL
: return "AMDILISD::CALL";
911 case AMDILISD::RET
: return "AMDILISD::RET";
912 case AMDILISD::IFFB_HI
: return "AMDILISD::IFFB_HI";
913 case AMDILISD::IFFB_LO
: return "AMDILISD::IFFB_LO";
914 case AMDILISD::ADD
: return "AMDILISD::ADD";
915 case AMDILISD::UMUL
: return "AMDILISD::UMUL";
916 case AMDILISD::AND
: return "AMDILISD::AND";
917 case AMDILISD::OR
: return "AMDILISD::OR";
918 case AMDILISD::NOT
: return "AMDILISD::NOT";
919 case AMDILISD::XOR
: return "AMDILISD::XOR";
920 case AMDILISD::DIV_INF
: return "AMDILISD::DIV_INF";
921 case AMDILISD::SMAX
: return "AMDILISD::SMAX";
922 case AMDILISD::PHIMOVE
: return "AMDILISD::PHIMOVE";
923 case AMDILISD::MOVE
: return "AMDILISD::MOVE";
924 case AMDILISD::VBUILD
: return "AMDILISD::VBUILD";
925 case AMDILISD::VEXTRACT
: return "AMDILISD::VEXTRACT";
926 case AMDILISD::VINSERT
: return "AMDILISD::VINSERT";
927 case AMDILISD::VCONCAT
: return "AMDILISD::VCONCAT";
928 case AMDILISD::LCREATE
: return "AMDILISD::LCREATE";
929 case AMDILISD::LCOMPHI
: return "AMDILISD::LCOMPHI";
930 case AMDILISD::LCOMPLO
: return "AMDILISD::LCOMPLO";
931 case AMDILISD::DCREATE
: return "AMDILISD::DCREATE";
932 case AMDILISD::DCOMPHI
: return "AMDILISD::DCOMPHI";
933 case AMDILISD::DCOMPLO
: return "AMDILISD::DCOMPLO";
934 case AMDILISD::LCREATE2
: return "AMDILISD::LCREATE2";
935 case AMDILISD::LCOMPHI2
: return "AMDILISD::LCOMPHI2";
936 case AMDILISD::LCOMPLO2
: return "AMDILISD::LCOMPLO2";
937 case AMDILISD::DCREATE2
: return "AMDILISD::DCREATE2";
938 case AMDILISD::DCOMPHI2
: return "AMDILISD::DCOMPHI2";
939 case AMDILISD::DCOMPLO2
: return "AMDILISD::DCOMPLO2";
940 case AMDILISD::CMP
: return "AMDILISD::CMP";
941 case AMDILISD::IL_CC_I_LT
: return "AMDILISD::IL_CC_I_LT";
942 case AMDILISD::IL_CC_I_LE
: return "AMDILISD::IL_CC_I_LE";
943 case AMDILISD::IL_CC_I_GT
: return "AMDILISD::IL_CC_I_GT";
944 case AMDILISD::IL_CC_I_GE
: return "AMDILISD::IL_CC_I_GE";
945 case AMDILISD::IL_CC_I_EQ
: return "AMDILISD::IL_CC_I_EQ";
946 case AMDILISD::IL_CC_I_NE
: return "AMDILISD::IL_CC_I_NE";
947 case AMDILISD::RET_FLAG
: return "AMDILISD::RET_FLAG";
948 case AMDILISD::BRANCH_COND
: return "AMDILISD::BRANCH_COND";
949 case AMDILISD::LOOP_NZERO
: return "AMDILISD::LOOP_NZERO";
950 case AMDILISD::LOOP_ZERO
: return "AMDILISD::LOOP_ZERO";
951 case AMDILISD::LOOP_CMP
: return "AMDILISD::LOOP_CMP";
952 case AMDILISD::ADDADDR
: return "AMDILISD::ADDADDR";
953 case AMDILISD::ATOM_G_ADD
: return "AMDILISD::ATOM_G_ADD";
954 case AMDILISD::ATOM_G_AND
: return "AMDILISD::ATOM_G_AND";
955 case AMDILISD::ATOM_G_CMPXCHG
: return "AMDILISD::ATOM_G_CMPXCHG";
956 case AMDILISD::ATOM_G_DEC
: return "AMDILISD::ATOM_G_DEC";
957 case AMDILISD::ATOM_G_INC
: return "AMDILISD::ATOM_G_INC";
958 case AMDILISD::ATOM_G_MAX
: return "AMDILISD::ATOM_G_MAX";
959 case AMDILISD::ATOM_G_UMAX
: return "AMDILISD::ATOM_G_UMAX";
960 case AMDILISD::ATOM_G_MIN
: return "AMDILISD::ATOM_G_MIN";
961 case AMDILISD::ATOM_G_UMIN
: return "AMDILISD::ATOM_G_UMIN";
962 case AMDILISD::ATOM_G_OR
: return "AMDILISD::ATOM_G_OR";
963 case AMDILISD::ATOM_G_SUB
: return "AMDILISD::ATOM_G_SUB";
964 case AMDILISD::ATOM_G_RSUB
: return "AMDILISD::ATOM_G_RSUB";
965 case AMDILISD::ATOM_G_XCHG
: return "AMDILISD::ATOM_G_XCHG";
966 case AMDILISD::ATOM_G_XOR
: return "AMDILISD::ATOM_G_XOR";
967 case AMDILISD::ATOM_G_ADD_NORET
: return "AMDILISD::ATOM_G_ADD_NORET";
968 case AMDILISD::ATOM_G_AND_NORET
: return "AMDILISD::ATOM_G_AND_NORET";
969 case AMDILISD::ATOM_G_CMPXCHG_NORET
: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
970 case AMDILISD::ATOM_G_DEC_NORET
: return "AMDILISD::ATOM_G_DEC_NORET";
971 case AMDILISD::ATOM_G_INC_NORET
: return "AMDILISD::ATOM_G_INC_NORET";
972 case AMDILISD::ATOM_G_MAX_NORET
: return "AMDILISD::ATOM_G_MAX_NORET";
973 case AMDILISD::ATOM_G_UMAX_NORET
: return "AMDILISD::ATOM_G_UMAX_NORET";
974 case AMDILISD::ATOM_G_MIN_NORET
: return "AMDILISD::ATOM_G_MIN_NORET";
975 case AMDILISD::ATOM_G_UMIN_NORET
: return "AMDILISD::ATOM_G_UMIN_NORET";
976 case AMDILISD::ATOM_G_OR_NORET
: return "AMDILISD::ATOM_G_OR_NORET";
977 case AMDILISD::ATOM_G_SUB_NORET
: return "AMDILISD::ATOM_G_SUB_NORET";
978 case AMDILISD::ATOM_G_RSUB_NORET
: return "AMDILISD::ATOM_G_RSUB_NORET";
979 case AMDILISD::ATOM_G_XCHG_NORET
: return "AMDILISD::ATOM_G_XCHG_NORET";
980 case AMDILISD::ATOM_G_XOR_NORET
: return "AMDILISD::ATOM_G_XOR_NORET";
981 case AMDILISD::ATOM_L_ADD
: return "AMDILISD::ATOM_L_ADD";
982 case AMDILISD::ATOM_L_AND
: return "AMDILISD::ATOM_L_AND";
983 case AMDILISD::ATOM_L_CMPXCHG
: return "AMDILISD::ATOM_L_CMPXCHG";
984 case AMDILISD::ATOM_L_DEC
: return "AMDILISD::ATOM_L_DEC";
985 case AMDILISD::ATOM_L_INC
: return "AMDILISD::ATOM_L_INC";
986 case AMDILISD::ATOM_L_MAX
: return "AMDILISD::ATOM_L_MAX";
987 case AMDILISD::ATOM_L_UMAX
: return "AMDILISD::ATOM_L_UMAX";
988 case AMDILISD::ATOM_L_MIN
: return "AMDILISD::ATOM_L_MIN";
989 case AMDILISD::ATOM_L_UMIN
: return "AMDILISD::ATOM_L_UMIN";
990 case AMDILISD::ATOM_L_OR
: return "AMDILISD::ATOM_L_OR";
991 case AMDILISD::ATOM_L_SUB
: return "AMDILISD::ATOM_L_SUB";
992 case AMDILISD::ATOM_L_RSUB
: return "AMDILISD::ATOM_L_RSUB";
993 case AMDILISD::ATOM_L_XCHG
: return "AMDILISD::ATOM_L_XCHG";
994 case AMDILISD::ATOM_L_XOR
: return "AMDILISD::ATOM_L_XOR";
995 case AMDILISD::ATOM_L_ADD_NORET
: return "AMDILISD::ATOM_L_ADD_NORET";
996 case AMDILISD::ATOM_L_AND_NORET
: return "AMDILISD::ATOM_L_AND_NORET";
997 case AMDILISD::ATOM_L_CMPXCHG_NORET
: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
998 case AMDILISD::ATOM_L_DEC_NORET
: return "AMDILISD::ATOM_L_DEC_NORET";
999 case AMDILISD::ATOM_L_INC_NORET
: return "AMDILISD::ATOM_L_INC_NORET";
1000 case AMDILISD::ATOM_L_MAX_NORET
: return "AMDILISD::ATOM_L_MAX_NORET";
1001 case AMDILISD::ATOM_L_UMAX_NORET
: return "AMDILISD::ATOM_L_UMAX_NORET";
1002 case AMDILISD::ATOM_L_MIN_NORET
: return "AMDILISD::ATOM_L_MIN_NORET";
1003 case AMDILISD::ATOM_L_UMIN_NORET
: return "AMDILISD::ATOM_L_UMIN_NORET";
1004 case AMDILISD::ATOM_L_OR_NORET
: return "AMDILISD::ATOM_L_OR_NORET";
1005 case AMDILISD::ATOM_L_SUB_NORET
: return "AMDILISD::ATOM_L_SUB_NORET";
1006 case AMDILISD::ATOM_L_RSUB_NORET
: return "AMDILISD::ATOM_L_RSUB_NORET";
1007 case AMDILISD::ATOM_L_XCHG_NORET
: return "AMDILISD::ATOM_L_XCHG_NORET";
1008 case AMDILISD::ATOM_R_ADD
: return "AMDILISD::ATOM_R_ADD";
1009 case AMDILISD::ATOM_R_AND
: return "AMDILISD::ATOM_R_AND";
1010 case AMDILISD::ATOM_R_CMPXCHG
: return "AMDILISD::ATOM_R_CMPXCHG";
1011 case AMDILISD::ATOM_R_DEC
: return "AMDILISD::ATOM_R_DEC";
1012 case AMDILISD::ATOM_R_INC
: return "AMDILISD::ATOM_R_INC";
1013 case AMDILISD::ATOM_R_MAX
: return "AMDILISD::ATOM_R_MAX";
1014 case AMDILISD::ATOM_R_UMAX
: return "AMDILISD::ATOM_R_UMAX";
1015 case AMDILISD::ATOM_R_MIN
: return "AMDILISD::ATOM_R_MIN";
1016 case AMDILISD::ATOM_R_UMIN
: return "AMDILISD::ATOM_R_UMIN";
1017 case AMDILISD::ATOM_R_OR
: return "AMDILISD::ATOM_R_OR";
1018 case AMDILISD::ATOM_R_MSKOR
: return "AMDILISD::ATOM_R_MSKOR";
1019 case AMDILISD::ATOM_R_SUB
: return "AMDILISD::ATOM_R_SUB";
1020 case AMDILISD::ATOM_R_RSUB
: return "AMDILISD::ATOM_R_RSUB";
1021 case AMDILISD::ATOM_R_XCHG
: return "AMDILISD::ATOM_R_XCHG";
1022 case AMDILISD::ATOM_R_XOR
: return "AMDILISD::ATOM_R_XOR";
1023 case AMDILISD::ATOM_R_ADD_NORET
: return "AMDILISD::ATOM_R_ADD_NORET";
1024 case AMDILISD::ATOM_R_AND_NORET
: return "AMDILISD::ATOM_R_AND_NORET";
1025 case AMDILISD::ATOM_R_CMPXCHG_NORET
: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1026 case AMDILISD::ATOM_R_DEC_NORET
: return "AMDILISD::ATOM_R_DEC_NORET";
1027 case AMDILISD::ATOM_R_INC_NORET
: return "AMDILISD::ATOM_R_INC_NORET";
1028 case AMDILISD::ATOM_R_MAX_NORET
: return "AMDILISD::ATOM_R_MAX_NORET";
1029 case AMDILISD::ATOM_R_UMAX_NORET
: return "AMDILISD::ATOM_R_UMAX_NORET";
1030 case AMDILISD::ATOM_R_MIN_NORET
: return "AMDILISD::ATOM_R_MIN_NORET";
1031 case AMDILISD::ATOM_R_UMIN_NORET
: return "AMDILISD::ATOM_R_UMIN_NORET";
1032 case AMDILISD::ATOM_R_OR_NORET
: return "AMDILISD::ATOM_R_OR_NORET";
1033 case AMDILISD::ATOM_R_MSKOR_NORET
: return "AMDILISD::ATOM_R_MSKOR_NORET";
1034 case AMDILISD::ATOM_R_SUB_NORET
: return "AMDILISD::ATOM_R_SUB_NORET";
1035 case AMDILISD::ATOM_R_RSUB_NORET
: return "AMDILISD::ATOM_R_RSUB_NORET";
1036 case AMDILISD::ATOM_R_XCHG_NORET
: return "AMDILISD::ATOM_R_XCHG_NORET";
1037 case AMDILISD::ATOM_R_XOR_NORET
: return "AMDILISD::ATOM_R_XOR_NORET";
1038 case AMDILISD::APPEND_ALLOC
: return "AMDILISD::APPEND_ALLOC";
1039 case AMDILISD::APPEND_ALLOC_NORET
: return "AMDILISD::APPEND_ALLOC_NORET";
1040 case AMDILISD::APPEND_CONSUME
: return "AMDILISD::APPEND_CONSUME";
1041 case AMDILISD::APPEND_CONSUME_NORET
: return "AMDILISD::APPEND_CONSUME_NORET";
1042 case AMDILISD::IMAGE2D_READ
: return "AMDILISD::IMAGE2D_READ";
1043 case AMDILISD::IMAGE2D_WRITE
: return "AMDILISD::IMAGE2D_WRITE";
1044 case AMDILISD::IMAGE2D_INFO0
: return "AMDILISD::IMAGE2D_INFO0";
1045 case AMDILISD::IMAGE2D_INFO1
: return "AMDILISD::IMAGE2D_INFO1";
1046 case AMDILISD::IMAGE3D_READ
: return "AMDILISD::IMAGE3D_READ";
1047 case AMDILISD::IMAGE3D_WRITE
: return "AMDILISD::IMAGE3D_WRITE";
1048 case AMDILISD::IMAGE3D_INFO0
: return "AMDILISD::IMAGE3D_INFO0";
1049 case AMDILISD::IMAGE3D_INFO1
: return "AMDILISD::IMAGE3D_INFO1";
1054 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
1055 const CallInst
&I
, unsigned Intrinsic
) const
1057 if (Intrinsic
<= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1058 || Intrinsic
> AMDGPUIntrinsic::num_AMDIL_intrinsics
) {
1061 bool bitCastToInt
= false;
1064 const AMDILSubtarget
*STM
= &this->getTargetMachine()
1065 .getSubtarget
<AMDILSubtarget
>();
1066 switch (Intrinsic
) {
1067 default: return false; // Don't custom lower most intrinsics.
1068 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32
:
1069 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32
:
1070 IntNo
= AMDILISD::ATOM_G_ADD
; break;
1071 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret
:
1072 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret
:
1074 IntNo
= AMDILISD::ATOM_G_ADD_NORET
; break;
1075 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32
:
1076 case AMDGPUIntrinsic::AMDIL_atomic_add_li32
:
1077 IntNo
= AMDILISD::ATOM_L_ADD
; break;
1078 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret
:
1079 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret
:
1081 IntNo
= AMDILISD::ATOM_L_ADD_NORET
; break;
1082 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32
:
1083 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32
:
1084 IntNo
= AMDILISD::ATOM_R_ADD
; break;
1085 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret
:
1086 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret
:
1088 IntNo
= AMDILISD::ATOM_R_ADD_NORET
; break;
1089 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32
:
1090 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32
:
1091 IntNo
= AMDILISD::ATOM_G_AND
; break;
1092 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret
:
1093 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret
:
1095 IntNo
= AMDILISD::ATOM_G_AND_NORET
; break;
1096 case AMDGPUIntrinsic::AMDIL_atomic_and_li32
:
1097 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32
:
1098 IntNo
= AMDILISD::ATOM_L_AND
; break;
1099 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret
:
1100 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret
:
1102 IntNo
= AMDILISD::ATOM_L_AND_NORET
; break;
1103 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32
:
1104 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32
:
1105 IntNo
= AMDILISD::ATOM_R_AND
; break;
1106 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret
:
1107 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret
:
1109 IntNo
= AMDILISD::ATOM_R_AND_NORET
; break;
1110 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32
:
1111 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32
:
1112 IntNo
= AMDILISD::ATOM_G_CMPXCHG
; break;
1113 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret
:
1114 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret
:
1116 IntNo
= AMDILISD::ATOM_G_CMPXCHG_NORET
; break;
1117 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32
:
1118 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32
:
1119 IntNo
= AMDILISD::ATOM_L_CMPXCHG
; break;
1120 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret
:
1121 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret
:
1123 IntNo
= AMDILISD::ATOM_L_CMPXCHG_NORET
; break;
1124 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32
:
1125 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32
:
1126 IntNo
= AMDILISD::ATOM_R_CMPXCHG
; break;
1127 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret
:
1128 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret
:
1130 IntNo
= AMDILISD::ATOM_R_CMPXCHG_NORET
; break;
1131 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32
:
1132 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32
:
1133 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1134 IntNo
= AMDILISD::ATOM_G_DEC
;
1136 IntNo
= AMDILISD::ATOM_G_SUB
;
1139 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret
:
1140 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret
:
1142 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1143 IntNo
= AMDILISD::ATOM_G_DEC_NORET
;
1145 IntNo
= AMDILISD::ATOM_G_SUB_NORET
;
1148 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32
:
1149 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32
:
1150 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1151 IntNo
= AMDILISD::ATOM_L_DEC
;
1153 IntNo
= AMDILISD::ATOM_L_SUB
;
1156 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret
:
1157 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret
:
1159 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1160 IntNo
= AMDILISD::ATOM_L_DEC_NORET
;
1162 IntNo
= AMDILISD::ATOM_L_SUB_NORET
;
1165 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32
:
1166 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32
:
1167 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1168 IntNo
= AMDILISD::ATOM_R_DEC
;
1170 IntNo
= AMDILISD::ATOM_R_SUB
;
1173 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret
:
1174 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret
:
1176 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1177 IntNo
= AMDILISD::ATOM_R_DEC_NORET
;
1179 IntNo
= AMDILISD::ATOM_R_SUB_NORET
;
1182 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32
:
1183 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32
:
1184 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1185 IntNo
= AMDILISD::ATOM_G_INC
;
1187 IntNo
= AMDILISD::ATOM_G_ADD
;
1190 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret
:
1191 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret
:
1193 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1194 IntNo
= AMDILISD::ATOM_G_INC_NORET
;
1196 IntNo
= AMDILISD::ATOM_G_ADD_NORET
;
1199 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32
:
1200 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32
:
1201 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1202 IntNo
= AMDILISD::ATOM_L_INC
;
1204 IntNo
= AMDILISD::ATOM_L_ADD
;
1207 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret
:
1208 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret
:
1210 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1211 IntNo
= AMDILISD::ATOM_L_INC_NORET
;
1213 IntNo
= AMDILISD::ATOM_L_ADD_NORET
;
1216 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32
:
1217 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32
:
1218 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1219 IntNo
= AMDILISD::ATOM_R_INC
;
1221 IntNo
= AMDILISD::ATOM_R_ADD
;
1224 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret
:
1225 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret
:
1227 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1228 IntNo
= AMDILISD::ATOM_R_INC_NORET
;
1230 IntNo
= AMDILISD::ATOM_R_ADD_NORET
;
1233 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32
:
1234 IntNo
= AMDILISD::ATOM_G_MAX
; break;
1235 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32
:
1236 IntNo
= AMDILISD::ATOM_G_UMAX
; break;
1237 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret
:
1239 IntNo
= AMDILISD::ATOM_G_MAX_NORET
; break;
1240 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret
:
1242 IntNo
= AMDILISD::ATOM_G_UMAX_NORET
; break;
1243 case AMDGPUIntrinsic::AMDIL_atomic_max_li32
:
1244 IntNo
= AMDILISD::ATOM_L_MAX
; break;
1245 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32
:
1246 IntNo
= AMDILISD::ATOM_L_UMAX
; break;
1247 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret
:
1249 IntNo
= AMDILISD::ATOM_L_MAX_NORET
; break;
1250 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret
:
1252 IntNo
= AMDILISD::ATOM_L_UMAX_NORET
; break;
1253 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32
:
1254 IntNo
= AMDILISD::ATOM_R_MAX
; break;
1255 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32
:
1256 IntNo
= AMDILISD::ATOM_R_UMAX
; break;
1257 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret
:
1259 IntNo
= AMDILISD::ATOM_R_MAX_NORET
; break;
1260 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret
:
1262 IntNo
= AMDILISD::ATOM_R_UMAX_NORET
; break;
1263 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32
:
1264 IntNo
= AMDILISD::ATOM_G_MIN
; break;
1265 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32
:
1266 IntNo
= AMDILISD::ATOM_G_UMIN
; break;
1267 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret
:
1269 IntNo
= AMDILISD::ATOM_G_MIN_NORET
; break;
1270 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret
:
1272 IntNo
= AMDILISD::ATOM_G_UMIN_NORET
; break;
1273 case AMDGPUIntrinsic::AMDIL_atomic_min_li32
:
1274 IntNo
= AMDILISD::ATOM_L_MIN
; break;
1275 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32
:
1276 IntNo
= AMDILISD::ATOM_L_UMIN
; break;
1277 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret
:
1279 IntNo
= AMDILISD::ATOM_L_MIN_NORET
; break;
1280 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret
:
1282 IntNo
= AMDILISD::ATOM_L_UMIN_NORET
; break;
1283 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32
:
1284 IntNo
= AMDILISD::ATOM_R_MIN
; break;
1285 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32
:
1286 IntNo
= AMDILISD::ATOM_R_UMIN
; break;
1287 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret
:
1289 IntNo
= AMDILISD::ATOM_R_MIN_NORET
; break;
1290 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret
:
1292 IntNo
= AMDILISD::ATOM_R_UMIN_NORET
; break;
1293 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32
:
1294 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32
:
1295 IntNo
= AMDILISD::ATOM_G_OR
; break;
1296 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret
:
1297 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret
:
1299 IntNo
= AMDILISD::ATOM_G_OR_NORET
; break;
1300 case AMDGPUIntrinsic::AMDIL_atomic_or_li32
:
1301 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32
:
1302 IntNo
= AMDILISD::ATOM_L_OR
; break;
1303 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret
:
1304 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret
:
1306 IntNo
= AMDILISD::ATOM_L_OR_NORET
; break;
1307 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32
:
1308 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32
:
1309 IntNo
= AMDILISD::ATOM_R_OR
; break;
1310 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret
:
1311 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret
:
1313 IntNo
= AMDILISD::ATOM_R_OR_NORET
; break;
1314 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32
:
1315 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32
:
1316 IntNo
= AMDILISD::ATOM_G_SUB
; break;
1317 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret
:
1318 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret
:
1320 IntNo
= AMDILISD::ATOM_G_SUB_NORET
; break;
1321 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32
:
1322 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32
:
1323 IntNo
= AMDILISD::ATOM_L_SUB
; break;
1324 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret
:
1325 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret
:
1327 IntNo
= AMDILISD::ATOM_L_SUB_NORET
; break;
1328 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32
:
1329 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32
:
1330 IntNo
= AMDILISD::ATOM_R_SUB
; break;
1331 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret
:
1332 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret
:
1334 IntNo
= AMDILISD::ATOM_R_SUB_NORET
; break;
1335 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32
:
1336 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32
:
1337 IntNo
= AMDILISD::ATOM_G_RSUB
; break;
1338 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret
:
1339 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret
:
1341 IntNo
= AMDILISD::ATOM_G_RSUB_NORET
; break;
1342 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32
:
1343 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32
:
1344 IntNo
= AMDILISD::ATOM_L_RSUB
; break;
1345 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret
:
1346 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret
:
1348 IntNo
= AMDILISD::ATOM_L_RSUB_NORET
; break;
1349 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32
:
1350 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32
:
1351 IntNo
= AMDILISD::ATOM_R_RSUB
; break;
1352 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret
:
1353 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret
:
1355 IntNo
= AMDILISD::ATOM_R_RSUB_NORET
; break;
1356 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32
:
1357 bitCastToInt
= true;
1358 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32
:
1359 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32
:
1360 IntNo
= AMDILISD::ATOM_G_XCHG
; break;
1361 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret
:
1362 bitCastToInt
= true;
1363 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret
:
1364 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret
:
1366 IntNo
= AMDILISD::ATOM_G_XCHG_NORET
; break;
1367 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32
:
1368 bitCastToInt
= true;
1369 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32
:
1370 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32
:
1371 IntNo
= AMDILISD::ATOM_L_XCHG
; break;
1372 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret
:
1373 bitCastToInt
= true;
1374 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret
:
1375 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret
:
1377 IntNo
= AMDILISD::ATOM_L_XCHG_NORET
; break;
1378 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32
:
1379 bitCastToInt
= true;
1380 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32
:
1381 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32
:
1382 IntNo
= AMDILISD::ATOM_R_XCHG
; break;
1383 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret
:
1384 bitCastToInt
= true;
1385 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret
:
1386 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret
:
1388 IntNo
= AMDILISD::ATOM_R_XCHG_NORET
; break;
1389 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32
:
1390 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32
:
1391 IntNo
= AMDILISD::ATOM_G_XOR
; break;
1392 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret
:
1393 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret
:
1395 IntNo
= AMDILISD::ATOM_G_XOR_NORET
; break;
1396 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32
:
1397 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32
:
1398 IntNo
= AMDILISD::ATOM_L_XOR
; break;
1399 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret
:
1400 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret
:
1402 IntNo
= AMDILISD::ATOM_L_XOR_NORET
; break;
1403 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32
:
1404 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32
:
1405 IntNo
= AMDILISD::ATOM_R_XOR
; break;
1406 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret
:
1407 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret
:
1409 IntNo
= AMDILISD::ATOM_R_XOR_NORET
; break;
1410 case AMDGPUIntrinsic::AMDIL_append_alloc_i32
:
1411 IntNo
= AMDILISD::APPEND_ALLOC
; break;
1412 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret
:
1414 IntNo
= AMDILISD::APPEND_ALLOC_NORET
; break;
1415 case AMDGPUIntrinsic::AMDIL_append_consume_i32
:
1416 IntNo
= AMDILISD::APPEND_CONSUME
; break;
1417 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret
:
1419 IntNo
= AMDILISD::APPEND_CONSUME_NORET
; break;
1423 Info
.memVT
= (bitCastToInt
) ? MVT::f32
: MVT::i32
;
1424 Info
.ptrVal
= I
.getOperand(0);
1428 Info
.readMem
= isRet
;
1429 Info
.writeMem
= true;
1432 // The backend supports 32 and 64 bit floating point immediates
1434 AMDILTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const
1436 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
1437 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
1445 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT
) const
1447 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
1448 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
1456 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1457 // be zero. Op is expected to be a target specific node. Used by DAG
1461 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1465 const SelectionDAG
&DAG
,
1466 unsigned Depth
) const
1470 KnownZero
= KnownOne
= APInt(KnownOne
.getBitWidth(), 0); // Don't know anything
1471 switch (Op
.getOpcode()) {
1473 case AMDILISD::SELECT_CC
:
1474 DAG
.ComputeMaskedBits(
1480 DAG
.ComputeMaskedBits(
1485 assert((KnownZero
& KnownOne
) == 0
1486 && "Bits known to be one AND zero?");
1487 assert((KnownZero2
& KnownOne2
) == 0
1488 && "Bits known to be one AND zero?");
1489 // Only known if known in both the LHS and RHS
1490 KnownOne
&= KnownOne2
;
1491 KnownZero
&= KnownZero2
;
1496 // This is the function that determines which calling convention should
1497 // be used. Currently there is only one calling convention
1499 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op
) const
1501 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1505 // LowerCallResult - Lower the result values of an ISD::CALL into the
1506 // appropriate copies out of appropriate physical registers. This assumes that
1507 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1508 // being lowered. The returns a SDNode with the same number of values as the
1511 AMDILTargetLowering::LowerCallResult(
1514 CallingConv::ID CallConv
,
1516 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1519 SmallVectorImpl
<SDValue
> &InVals
) const
1521 // Assign locations to each value returned by this call
1522 SmallVector
<CCValAssign
, 16> RVLocs
;
1523 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1524 getTargetMachine(), RVLocs
, *DAG
.getContext());
1525 CCInfo
.AnalyzeCallResult(Ins
, RetCC_AMDIL32
);
1527 // Copy all of the result registers out of their specified physreg.
1528 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
1529 EVT CopyVT
= RVLocs
[i
].getValVT();
1530 if (RVLocs
[i
].isRegLoc()) {
1531 Chain
= DAG
.getCopyFromReg(
1534 RVLocs
[i
].getLocReg(),
1538 SDValue Val
= Chain
.getValue(0);
1539 InFlag
= Chain
.getValue(2);
1540 InVals
.push_back(Val
);
1548 //===----------------------------------------------------------------------===//
1549 // Other Lowering Hooks
1550 //===----------------------------------------------------------------------===//
1552 // Recursively assign SDNodeOrdering to any unordered nodes
1553 // This is necessary to maintain source ordering of instructions
1554 // under -O0 to avoid odd-looking "skipping around" issues.
1555 static const SDValue
1556 Ordered( SelectionDAG
&DAG
, unsigned order
, const SDValue New
)
1558 if (order
!= 0 && DAG
.GetOrdering( New
.getNode() ) == 0) {
1559 DAG
.AssignOrdering( New
.getNode(), order
);
1560 for (unsigned i
= 0, e
= New
.getNumOperands(); i
< e
; ++i
)
1561 Ordered( DAG
, order
, New
.getOperand(i
) );
1568 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1571 AMDILTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
1573 switch (Op
.getOpcode()) {
1575 Op
.getNode()->dump();
1576 assert(0 && "Custom lowering code for this"
1577 "instruction is not implemented yet!");
1579 LOWER(GlobalAddress
);
1581 LOWER(ConstantPool
);
1582 LOWER(ExternalSymbol
);
1595 LOWER(BUILD_VECTOR
);
1596 LOWER(INSERT_VECTOR_ELT
);
1597 LOWER(EXTRACT_VECTOR_ELT
);
1598 LOWER(EXTRACT_SUBVECTOR
);
1599 LOWER(SCALAR_TO_VECTOR
);
1600 LOWER(CONCAT_VECTORS
);
1603 LOWER(SIGN_EXTEND_INREG
);
1605 LOWER(DYNAMIC_STACKALLOC
);
1614 AMDILTargetLowering::getVarArgsFrameOffset() const
1616 return VarArgsFrameOffset
;
1621 AMDILTargetLowering::LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const
1624 const GlobalAddressSDNode
*GADN
= cast
<GlobalAddressSDNode
>(Op
);
1625 const GlobalValue
*G
= GADN
->getGlobal();
1626 DebugLoc DL
= Op
.getDebugLoc();
1627 const GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(G
);
1629 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
1631 if (GV
->hasInitializer()) {
1632 const Constant
*C
= dyn_cast
<Constant
>(GV
->getInitializer());
1633 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
)) {
1634 DST
= DAG
.getConstant(CI
->getValue(), Op
.getValueType());
1635 } else if (const ConstantFP
*CF
= dyn_cast
<ConstantFP
>(C
)) {
1636 DST
= DAG
.getConstantFP(CF
->getValueAPF(),
1638 } else if (dyn_cast
<ConstantAggregateZero
>(C
)) {
1639 EVT VT
= Op
.getValueType();
1640 if (VT
.isInteger()) {
1641 DST
= DAG
.getConstant(0, VT
);
1643 DST
= DAG
.getConstantFP(0, VT
);
1646 assert(!"lowering this type of Global Address "
1647 "not implemented yet!");
1649 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
1652 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
1659 AMDILTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const
1661 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
1662 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), MVT::i32
);
1666 AMDILTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const
1668 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
1669 EVT PtrVT
= Op
.getValueType();
1671 if (CP
->isMachineConstantPoolEntry()) {
1672 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
1673 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
1675 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
1676 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
1682 AMDILTargetLowering::LowerExternalSymbol(SDValue Op
, SelectionDAG
&DAG
) const
1684 const char *Sym
= cast
<ExternalSymbolSDNode
>(Op
)->getSymbol();
1685 SDValue Result
= DAG
.getTargetExternalSymbol(Sym
, MVT::i32
);
1689 /// LowerFORMAL_ARGUMENTS - transform physical registers into
1690 /// virtual registers and generate load operations for
1691 /// arguments places on the stack.
1692 /// TODO: isVarArg, hasStructRet, isMemReg
1694 AMDILTargetLowering::LowerFormalArguments(SDValue Chain
,
1695 CallingConv::ID CallConv
,
1697 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1700 SmallVectorImpl
<SDValue
> &InVals
)
1704 MachineFunction
&MF
= DAG
.getMachineFunction();
1705 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1706 //const Function *Fn = MF.getFunction();
1707 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1709 SmallVector
<CCValAssign
, 16> ArgLocs
;
1710 CallingConv::ID CC
= MF
.getFunction()->getCallingConv();
1711 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1713 CCState
CCInfo(CC
, isVarArg
, DAG
.getMachineFunction(),
1714 getTargetMachine(), ArgLocs
, *DAG
.getContext());
1716 // When more calling conventions are added, they need to be chosen here
1717 CCInfo
.AnalyzeFormalArguments(Ins
, CC_AMDIL32
);
1720 //unsigned int FirstStackArgLoc = 0;
1722 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1723 CCValAssign
&VA
= ArgLocs
[i
];
1724 if (VA
.isRegLoc()) {
1725 EVT RegVT
= VA
.getLocVT();
1726 const TargetRegisterClass
*RC
= getRegClassFromType(
1727 RegVT
.getSimpleVT().SimpleTy
);
1729 unsigned int Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
1730 SDValue ArgValue
= DAG
.getCopyFromReg(
1735 // If this is an 8 or 16-bit value, it is really passed
1736 // promoted to 32 bits. Insert an assert[sz]ext to capture
1737 // this, then truncate to the right size.
1739 if (VA
.getLocInfo() == CCValAssign::SExt
) {
1740 ArgValue
= DAG
.getNode(
1745 DAG
.getValueType(VA
.getValVT()));
1746 } else if (VA
.getLocInfo() == CCValAssign::ZExt
) {
1747 ArgValue
= DAG
.getNode(
1752 DAG
.getValueType(VA
.getValVT()));
1754 if (VA
.getLocInfo() != CCValAssign::Full
) {
1755 ArgValue
= DAG
.getNode(
1761 // Add the value to the list of arguments
1762 // to be passed in registers
1763 InVals
.push_back(ArgValue
);
1765 assert(0 && "Variable arguments are not yet supported");
1766 // See MipsISelLowering.cpp for ideas on how to implement
1768 } else if(VA
.isMemLoc()) {
1769 InVals
.push_back(LowerMemArgument(Chain
, CallConv
, Ins
,
1770 dl
, DAG
, VA
, MFI
, i
));
1772 assert(0 && "found a Value Assign that is "
1773 "neither a register or a memory location");
1776 /*if (hasStructRet) {
1777 assert(0 && "Has struct return is not yet implemented");
1778 // See MipsISelLowering.cpp for ideas on how to implement
1782 assert(0 && "Variable arguments are not yet supported");
1783 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1785 // This needs to be changed to non-zero if the return function needs
1789 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1790 /// by "Src" to address "Dst" with size and alignment information specified by
1791 /// the specific parameter attribute. The copy will be passed as a byval
1792 /// function parameter.
1794 CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
, SDValue Chain
,
1795 ISD::ArgFlagsTy Flags
, SelectionDAG
&DAG
) {
1796 assert(0 && "MemCopy does not exist yet");
1797 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), MVT::i32
);
1799 return DAG
.getMemcpy(Chain
,
1801 Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
1802 /*IsVol=*/false, /*AlwaysInline=*/true,
1803 MachinePointerInfo(), MachinePointerInfo());
1807 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain
,
1808 SDValue StackPtr
, SDValue Arg
,
1809 DebugLoc dl
, SelectionDAG
&DAG
,
1810 const CCValAssign
&VA
,
1811 ISD::ArgFlagsTy Flags
) const
1813 unsigned int LocMemOffset
= VA
.getLocMemOffset();
1814 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
1815 PtrOff
= DAG
.getNode(ISD::ADD
,
1817 getPointerTy(), StackPtr
, PtrOff
);
1818 if (Flags
.isByVal()) {
1819 PtrOff
= CreateCopyOfByValArgument(Arg
, PtrOff
, Chain
, Flags
, DAG
);
1821 PtrOff
= DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
1822 MachinePointerInfo::getStack(LocMemOffset
),
1827 /// LowerCAL - functions arguments are copied from virtual
1828 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1829 /// CALLSEQ_END are emitted.
1830 /// TODO: isVarArg, isTailCall, hasStructRet
1832 AMDILTargetLowering::LowerCall(SDValue Chain
, SDValue Callee
,
1833 CallingConv::ID CallConv
, bool isVarArg
, bool doesNotRet
,
1835 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1836 const SmallVectorImpl
<SDValue
> &OutVals
,
1837 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1838 DebugLoc dl
, SelectionDAG
&DAG
,
1839 SmallVectorImpl
<SDValue
> &InVals
)
1843 MachineFunction
& MF
= DAG
.getMachineFunction();
1844 // FIXME: DO we need to handle fast calling conventions and tail call
1845 // optimizations?? X86/PPC ISelLowering
1846 /*bool hasStructRet = (TheCall->getNumArgs())
1847 ? TheCall->getArgFlags(0).device()->isSRet()
1850 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1852 // Analyze operands of the call, assigning locations to each operand
1853 SmallVector
<CCValAssign
, 16> ArgLocs
;
1854 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1855 getTargetMachine(), ArgLocs
, *DAG
.getContext());
1856 // Analyize the calling operands, but need to change
1857 // if we have more than one calling convetion
1858 CCInfo
.AnalyzeCallOperands(Outs
, CCAssignFnForNode(CallConv
));
1860 unsigned int NumBytes
= CCInfo
.getNextStackOffset();
1862 assert(isTailCall
&& "Tail Call not handled yet!");
1863 // See X86/PPC ISelLowering
1866 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
1868 SmallVector
<std::pair
<unsigned int, SDValue
>, 8> RegsToPass
;
1869 SmallVector
<SDValue
, 8> MemOpChains
;
1871 //unsigned int FirstStacArgLoc = 0;
1872 //int LastArgStackLoc = 0;
1874 // Walk the register/memloc assignments, insert copies/loads
1875 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1876 CCValAssign
&VA
= ArgLocs
[i
];
1877 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1878 // Arguments start after the 5 first operands of ISD::CALL
1879 SDValue Arg
= OutVals
[i
];
1880 //Promote the value if needed
1881 switch(VA
.getLocInfo()) {
1882 default: assert(0 && "Unknown loc info!");
1883 case CCValAssign::Full
:
1885 case CCValAssign::SExt
:
1886 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
,
1888 VA
.getLocVT(), Arg
);
1890 case CCValAssign::ZExt
:
1891 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
,
1893 VA
.getLocVT(), Arg
);
1895 case CCValAssign::AExt
:
1896 Arg
= DAG
.getNode(ISD::ANY_EXTEND
,
1898 VA
.getLocVT(), Arg
);
1902 if (VA
.isRegLoc()) {
1903 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
1904 } else if (VA
.isMemLoc()) {
1905 // Create the frame index object for this incoming parameter
1906 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
1907 VA
.getLocMemOffset(), true);
1908 SDValue PtrOff
= DAG
.getFrameIndex(FI
,getPointerTy());
1910 // emit ISD::STORE whichs stores the
1911 // parameter value to a stack Location
1912 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
1913 MachinePointerInfo::getFixedStack(FI
),
1916 assert(0 && "Not a Reg/Mem Loc, major error!");
1919 if (!MemOpChains
.empty()) {
1920 Chain
= DAG
.getNode(ISD::TokenFactor
,
1924 MemOpChains
.size());
1928 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1929 Chain
= DAG
.getCopyToReg(Chain
,
1931 RegsToPass
[i
].first
,
1932 RegsToPass
[i
].second
,
1934 InFlag
= Chain
.getValue(1);
1938 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1939 // every direct call is) turn it into a TargetGlobalAddress/
1940 // TargetExternalSymbol
1941 // node so that legalize doesn't hack it.
1942 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1943 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, getPointerTy());
1945 else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1946 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), getPointerTy());
1948 else if (isTailCall
) {
1949 assert(0 && "Tail calls are not handled yet");
1950 // see X86 ISelLowering for ideas on implementation: 1708
1953 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVTGLUE
);
1954 SmallVector
<SDValue
, 8> Ops
;
1957 assert(0 && "Tail calls are not handled yet");
1958 // see X86 ISelLowering for ideas on implementation: 1721
1960 // If this is a direct call, pass the chain and the callee
1961 if (Callee
.getNode()) {
1962 Ops
.push_back(Chain
);
1963 Ops
.push_back(Callee
);
1967 assert(0 && "Tail calls are not handled yet");
1968 // see X86 ISelLowering for ideas on implementation: 1739
1971 // Add argument registers to the end of the list so that they are known
1972 // live into the call
1973 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1974 Ops
.push_back(DAG
.getRegister(
1975 RegsToPass
[i
].first
,
1976 RegsToPass
[i
].second
.getValueType()));
1978 if (InFlag
.getNode()) {
1979 Ops
.push_back(InFlag
);
1984 assert(0 && "Tail calls are not handled yet");
1985 // see X86 ISelLowering for ideas on implementation: 1762
1988 Chain
= DAG
.getNode(AMDILISD::CALL
,
1990 NodeTys
, &Ops
[0], Ops
.size());
1991 InFlag
= Chain
.getValue(1);
1993 // Create the CALLSEQ_END node
1994 Chain
= DAG
.getCALLSEQ_END(
1996 DAG
.getIntPtrConstant(NumBytes
, true),
1997 DAG
.getIntPtrConstant(0, true),
1999 InFlag
= Chain
.getValue(1);
2000 // Handle result values, copying them out of physregs into vregs that
2002 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
2005 static void checkMADType(
2006 SDValue Op
, const AMDILSubtarget
*STM
, bool& is24bitMAD
, bool& is32bitMAD
)
2008 bool globalLoadStore
= false;
2012 assert(Op
.getOpcode() == ISD::ADD
&& "The opcode must be a add in order for "
2013 "this to work correctly!");
2014 if (Op
.getNode()->use_empty()) {
2017 for (SDNode::use_iterator nBegin
= Op
.getNode()->use_begin(),
2018 nEnd
= Op
.getNode()->use_end(); nBegin
!= nEnd
; ++nBegin
) {
2019 SDNode
*ptr
= *nBegin
;
2020 const LSBaseSDNode
*lsNode
= dyn_cast
<LSBaseSDNode
>(ptr
);
2021 // If we are not a LSBaseSDNode then we don't do this
2023 // If we are a LSBaseSDNode, but the op is not the offset
2024 // or base pointer, then we don't do this optimization
2025 // (i.e. we are the value being stored)
2027 (lsNode
->writeMem() && lsNode
->getOperand(1) == Op
)) {
2030 const PointerType
*PT
=
2031 dyn_cast
<PointerType
>(lsNode
->getSrcValue()->getType());
2032 unsigned as
= PT
->getAddressSpace();
2035 globalLoadStore
= true;
2036 case AMDILAS::PRIVATE_ADDRESS
:
2037 if (!STM
->device()->usesHardware(AMDILDeviceInfo::PrivateMem
)) {
2038 globalLoadStore
= true;
2041 case AMDILAS::CONSTANT_ADDRESS
:
2042 if (!STM
->device()->usesHardware(AMDILDeviceInfo::ConstantMem
)) {
2043 globalLoadStore
= true;
2046 case AMDILAS::LOCAL_ADDRESS
:
2047 if (!STM
->device()->usesHardware(AMDILDeviceInfo::LocalMem
)) {
2048 globalLoadStore
= true;
2051 case AMDILAS::REGION_ADDRESS
:
2052 if (!STM
->device()->usesHardware(AMDILDeviceInfo::RegionMem
)) {
2053 globalLoadStore
= true;
2058 if (globalLoadStore
) {
2066 AMDILTargetLowering::LowerADD(SDValue Op
, SelectionDAG
&DAG
) const
2068 SDValue LHS
= Op
.getOperand(0);
2069 SDValue RHS
= Op
.getOperand(1);
2070 DebugLoc DL
= Op
.getDebugLoc();
2071 EVT OVT
= Op
.getValueType();
2073 const AMDILSubtarget
*stm
= &this->getTargetMachine()
2074 .getSubtarget
<AMDILSubtarget
>();
2075 bool isVec
= OVT
.isVector();
2076 if (OVT
.getScalarType() == MVT::i64
) {
2077 MVT INTTY
= MVT::i32
;
2078 if (OVT
== MVT::v2i64
) {
2081 if (stm
->device()->usesHardware(AMDILDeviceInfo::LongOps
)
2082 && INTTY
== MVT::i32
) {
2083 DST
= DAG
.getNode(AMDILISD::ADD
,
2088 SDValue LHSLO
, LHSHI
, RHSLO
, RHSHI
, INTLO
, INTHI
;
2089 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2090 LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, LHS
);
2091 RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, RHS
);
2092 LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, LHS
);
2093 RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, RHS
);
2094 INTLO
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, LHSLO
, RHSLO
);
2095 INTHI
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, LHSHI
, RHSHI
);
2097 cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2098 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
2100 cmp
= DAG
.getNode(AMDILISD::INEGATE
, DL
, INTTY
, cmp
);
2101 INTHI
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, INTHI
, cmp
);
2102 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, OVT
,
2106 if (LHS
.getOpcode() == ISD::FrameIndex
||
2107 RHS
.getOpcode() == ISD::FrameIndex
) {
2108 DST
= DAG
.getNode(AMDILISD::ADDADDR
,
2113 if (stm
->device()->usesHardware(AMDILDeviceInfo::LocalMem
)
2114 && LHS
.getNumOperands()
2115 && RHS
.getNumOperands()) {
2116 bool is24bitMAD
= false;
2117 bool is32bitMAD
= false;
2118 const ConstantSDNode
*LHSConstOpCode
=
2119 dyn_cast
<ConstantSDNode
>(LHS
.getOperand(LHS
.getNumOperands()-1));
2120 const ConstantSDNode
*RHSConstOpCode
=
2121 dyn_cast
<ConstantSDNode
>(RHS
.getOperand(RHS
.getNumOperands()-1));
2122 if ((LHS
.getOpcode() == ISD::SHL
&& LHSConstOpCode
)
2123 || (RHS
.getOpcode() == ISD::SHL
&& RHSConstOpCode
)
2124 || LHS
.getOpcode() == ISD::MUL
2125 || RHS
.getOpcode() == ISD::MUL
) {
2126 SDValue Op1
, Op2
, Op3
;
2127 // FIXME: Fix this so that it works for unsigned 24bit ops.
2128 if (LHS
.getOpcode() == ISD::MUL
) {
2129 Op1
= LHS
.getOperand(0);
2130 Op2
= LHS
.getOperand(1);
2132 } else if (RHS
.getOpcode() == ISD::MUL
) {
2133 Op1
= RHS
.getOperand(0);
2134 Op2
= RHS
.getOperand(1);
2136 } else if (LHS
.getOpcode() == ISD::SHL
&& LHSConstOpCode
) {
2137 Op1
= LHS
.getOperand(0);
2138 Op2
= DAG
.getConstant(
2139 1 << LHSConstOpCode
->getZExtValue(), MVT::i32
);
2141 } else if (RHS
.getOpcode() == ISD::SHL
&& RHSConstOpCode
) {
2142 Op1
= RHS
.getOperand(0);
2143 Op2
= DAG
.getConstant(
2144 1 << RHSConstOpCode
->getZExtValue(), MVT::i32
);
2147 checkMADType(Op
, stm
, is24bitMAD
, is32bitMAD
);
2148 // We can possibly do a MAD transform!
2149 if (is24bitMAD
&& stm
->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps
)) {
2150 uint32_t opcode
= AMDGPUIntrinsic::AMDIL_mad24_i32
;
2151 SDVTList Tys
= DAG
.getVTList(OVT
/*, MVT::Other*/);
2152 DST
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
,
2153 DL
, Tys
, DAG
.getEntryNode(), DAG
.getConstant(opcode
, MVT::i32
),
2155 } else if(is32bitMAD
) {
2156 SDVTList Tys
= DAG
.getVTList(OVT
/*, MVT::Other*/);
2157 DST
= DAG
.getNode(ISD::INTRINSIC_W_CHAIN
,
2158 DL
, Tys
, DAG
.getEntryNode(),
2160 AMDGPUIntrinsic::AMDIL_mad_i32
, MVT::i32
),
2165 DST
= DAG
.getNode(AMDILISD::ADD
,
2174 AMDILTargetLowering::genCLZuN(SDValue Op
, SelectionDAG
&DAG
,
2175 uint32_t bits
) const
2177 DebugLoc DL
= Op
.getDebugLoc();
2178 EVT INTTY
= Op
.getValueType();
2180 if (INTTY
.isVector()) {
2181 FPTY
= EVT(MVT::getVectorVT(MVT::f32
,
2182 INTTY
.getVectorNumElements()));
2184 FPTY
= EVT(MVT::f32
);
2186 /* static inline uint
2189 int xor = 0x3f800000U | x;
2190 float tp = as_float(xor);
2191 float t = tp + -1.0f;
2192 uint tint = as_uint(t);
2194 uint tsrc = tint >> 23;
2195 uint tmask = tsrc & 0xffU;
2196 uint cst = (103 + N)U - tmask;
2197 return cmp ? cst : N;
2200 assert(INTTY
.getScalarType().getSimpleVT().SimpleTy
== MVT::i32
2201 && "genCLZu16 only works on 32bit types");
2204 // xornode = 0x3f800000 | x
2205 SDValue xornode
= DAG
.getNode(ISD::OR
, DL
, INTTY
,
2206 DAG
.getConstant(0x3f800000, INTTY
), x
);
2207 // float tp = as_float(xornode)
2208 SDValue tp
= DAG
.getNode(ISDBITCAST
, DL
, FPTY
, xornode
);
2209 // float t = tp + -1.0f
2210 SDValue t
= DAG
.getNode(ISD::FADD
, DL
, FPTY
, tp
,
2211 DAG
.getConstantFP(-1.0f
, FPTY
));
2212 // uint tint = as_uint(t)
2213 SDValue tint
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, t
);
2214 // int cmp = (x != 0)
2215 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2216 DAG
.getConstant(CondCCodeToCC(ISD::SETNE
, MVT::i32
), MVT::i32
), x
,
2217 DAG
.getConstant(0, INTTY
));
2218 // uint tsrc = tint >> 23
2219 SDValue tsrc
= DAG
.getNode(ISD::SRL
, DL
, INTTY
, tint
,
2220 DAG
.getConstant(23, INTTY
));
2221 // uint tmask = tsrc & 0xFF
2222 SDValue tmask
= DAG
.getNode(ISD::AND
, DL
, INTTY
, tsrc
,
2223 DAG
.getConstant(0xFFU
, INTTY
));
2224 // uint cst = (103 + bits) - tmask
2225 SDValue cst
= DAG
.getNode(ISD::SUB
, DL
, INTTY
,
2226 DAG
.getConstant((103U + bits
), INTTY
), tmask
);
2227 // return cmp ? cst : N
2228 cst
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
, cst
,
2229 DAG
.getConstant(bits
, INTTY
));
2234 AMDILTargetLowering::genCLZu32(SDValue Op
, SelectionDAG
&DAG
) const
2236 SDValue DST
= SDValue();
2237 DebugLoc DL
= Op
.getDebugLoc();
2238 EVT INTTY
= Op
.getValueType();
2239 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
2240 &this->getTargetMachine())->getSubtargetImpl();
2241 if (stm
->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX
) {
2242 //__clz_32bit(uint u)
2244 // int z = __amdil_ffb_hi(u) ;
2245 // return z < 0 ? 32 : z;
2249 // int z = __amdil_ffb_hi(u)
2250 SDValue z
= DAG
.getNode(AMDILISD::IFFB_HI
, DL
, INTTY
, u
);
2252 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2253 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
2254 z
, DAG
.getConstant(0, INTTY
));
2255 // return cmp ? 32 : z
2256 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
,
2257 DAG
.getConstant(32, INTTY
), z
);
2258 } else if (stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
2259 // static inline uint
2260 //__clz_32bit(uint x)
2262 // uint zh = __clz_16bit(x >> 16);
2263 // uint zl = __clz_16bit(x & 0xffffU);
2264 // return zh == 16U ? 16U + zl : zh;
2268 // uint xs16 = x >> 16
2269 SDValue xs16
= DAG
.getNode(ISD::SRL
, DL
, INTTY
, x
,
2270 DAG
.getConstant(16, INTTY
));
2271 // uint zh = __clz_16bit(xs16)
2272 SDValue zh
= genCLZuN(xs16
, DAG
, 16);
2273 // uint xa16 = x & 0xFFFF
2274 SDValue xa16
= DAG
.getNode(ISD::AND
, DL
, INTTY
, x
,
2275 DAG
.getConstant(0xFFFFU
, INTTY
));
2276 // uint zl = __clz_16bit(xa16)
2277 SDValue zl
= genCLZuN(xa16
, DAG
, 16);
2278 // uint cmp = zh == 16U
2279 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2280 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2281 zh
, DAG
.getConstant(16U, INTTY
));
2282 // uint zl16 = zl + 16
2283 SDValue zl16
= DAG
.getNode(ISD::ADD
, DL
, INTTY
,
2284 DAG
.getConstant(16, INTTY
), zl
);
2285 // return cmp ? zl16 : zh
2286 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
,
2289 assert(0 && "Attempting to generate a CLZ function with an"
2290 " unknown graphics card");
2295 AMDILTargetLowering::genCLZu64(SDValue Op
, SelectionDAG
&DAG
) const
2297 SDValue DST
= SDValue();
2298 DebugLoc DL
= Op
.getDebugLoc();
2300 EVT LONGTY
= Op
.getValueType();
2301 bool isVec
= LONGTY
.isVector();
2303 INTTY
= EVT(MVT::getVectorVT(MVT::i32
, Op
.getValueType()
2304 .getVectorNumElements()));
2306 INTTY
= EVT(MVT::i32
);
2308 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
2309 &this->getTargetMachine())->getSubtargetImpl();
2310 if (stm
->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX
) {
2312 // static inline uint
2313 // __clz_u64(ulong x)
2315 //uint zhi = __clz_32bit((uint)(x >> 32));
2316 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2317 //return zhi == 32U ? 32U + zlo : zhi;
2321 // uint xhi = x >> 32
2322 SDValue xlo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, x
);
2323 // uint xlo = x & 0xFFFFFFFF
2324 SDValue xhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, x
);
2325 // uint zhi = __clz_32bit(xhi)
2326 SDValue zhi
= genCLZu32(xhi
, DAG
);
2327 // uint zlo = __clz_32bit(xlo)
2328 SDValue zlo
= genCLZu32(xlo
, DAG
);
2329 // uint cmp = zhi == 32
2330 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2331 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2332 zhi
, DAG
.getConstant(32U, INTTY
));
2333 // uint zlop32 = 32 + zlo
2334 SDValue zlop32
= DAG
.getNode(AMDILISD::ADD
, DL
, INTTY
,
2335 DAG
.getConstant(32U, INTTY
), zlo
);
2336 // return cmp ? zlop32: zhi
2337 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
, zlop32
, zhi
);
2338 } else if (stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
2340 // static inline uint
2341 //__clz_64bit(ulong x)
2343 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2344 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2345 //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2346 //uint r = zh == 18U ? 18U + zm : zh;
2347 //return zh + zm == 41U ? 41U + zl : r;
2351 // ulong xs46 = x >> 46
2352 SDValue xs46
= DAG
.getNode(ISD::SRL
, DL
, LONGTY
, x
,
2353 DAG
.getConstant(46, LONGTY
));
2354 // uint ixs46 = (uint)xs46
2355 SDValue ixs46
= DAG
.getNode(ISD::TRUNCATE
, DL
, INTTY
, xs46
);
2356 // ulong xs23 = x >> 23
2357 SDValue xs23
= DAG
.getNode(ISD::SRL
, DL
, LONGTY
, x
,
2358 DAG
.getConstant(23, LONGTY
));
2359 // uint ixs23 = (uint)xs23
2360 SDValue ixs23
= DAG
.getNode(ISD::TRUNCATE
, DL
, INTTY
, xs23
);
2361 // uint xs23m23 = ixs23 & 0x7FFFFF
2362 SDValue xs23m23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, ixs23
,
2363 DAG
.getConstant(0x7fffffU
, INTTY
));
2364 // uint ix = (uint)x
2365 SDValue ix
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, x
);
2366 // uint xm23 = ix & 0x7FFFFF
2367 SDValue xm23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, ix
,
2368 DAG
.getConstant(0x7fffffU
, INTTY
));
2369 // uint zh = __clz_23bit(ixs46)
2370 SDValue zh
= genCLZuN(ixs46
, DAG
, 23);
2371 // uint zm = __clz_23bit(xs23m23)
2372 SDValue zm
= genCLZuN(xs23m23
, DAG
, 23);
2373 // uint zl = __clz_23bit(xm23)
2374 SDValue zl
= genCLZuN(xm23
, DAG
, 23);
2375 // uint zhm5 = zh - 5
2376 SDValue zhm5
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zh
,
2377 DAG
.getConstant(-5U, INTTY
));
2378 SDValue const18
= DAG
.getConstant(18, INTTY
);
2379 SDValue const41
= DAG
.getConstant(41, INTTY
);
2380 // uint cmp1 = zh = 18
2381 SDValue cmp1
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2382 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2384 // uint zhm5zm = zhm5 + zh
2385 SDValue zhm5zm
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zhm5
, zm
);
2386 // uint cmp2 = zhm5zm == 41
2387 SDValue cmp2
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2388 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2390 // uint zmp18 = zhm5 + 18
2391 SDValue zmp18
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zm
, const18
);
2392 // uint zlp41 = zl + 41
2393 SDValue zlp41
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zl
, const41
);
2394 // uint r = cmp1 ? zmp18 : zh
2395 SDValue r
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
,
2397 // return cmp2 ? zlp41 : r
2398 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp2
, zlp41
, r
);
2400 assert(0 && "Attempting to generate a CLZ function with an"
2401 " unknown graphics card");
2406 AMDILTargetLowering::genf64toi64(SDValue RHS
, SelectionDAG
&DAG
,
2407 bool includeSign
) const
2412 DebugLoc DL
= RHS
.getDebugLoc();
2413 EVT RHSVT
= RHS
.getValueType();
2414 bool isVec
= RHSVT
.isVector();
2416 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
, RHSVT
2417 .getVectorNumElements()));
2418 INTVT
= EVT(MVT::getVectorVT(MVT::i32
, RHSVT
2419 .getVectorNumElements()));
2421 LONGVT
= EVT(MVT::i64
);
2422 INTVT
= EVT(MVT::i32
);
2424 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
2425 &this->getTargetMachine())->getSubtargetImpl();
2426 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2427 // unsigned version:
2428 // uint uhi = (uint)(d * 0x1.0p-32);
2429 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2430 // return as_ulong2((uint2)(ulo, uhi));
2433 // double ad = fabs(d);
2434 // long l = unsigned_version(ad);
2436 // return d == ad ? l : nl;
2439 d
= DAG
.getNode(ISD::FABS
, DL
, RHSVT
, d
);
2441 SDValue uhid
= DAG
.getNode(ISD::FMUL
, DL
, RHSVT
, d
,
2442 DAG
.getConstantFP(0x2f800000, RHSVT
));
2443 SDValue uhi
= DAG
.getNode(ISD::FP_TO_UINT
, DL
, INTVT
, uhid
);
2444 SDValue ulod
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, RHSVT
, uhi
);
2445 ulod
= DAG
.getNode(AMDILISD::MAD
, DL
, RHSVT
, ulod
,
2446 DAG
.getConstantFP(0xcf800000, RHSVT
), d
);
2447 SDValue ulo
= DAG
.getNode(ISD::FP_TO_UINT
, DL
, INTVT
, ulod
);
2448 SDValue l
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, ulo
, uhi
);
2450 SDValue nl
= DAG
.getNode(AMDILISD::INEGATE
, DL
, LONGVT
, l
);
2451 SDValue c
= DAG
.getNode(AMDILISD::CMP
, DL
, RHSVT
,
2452 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::f64
), MVT::i32
),
2454 l
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, LONGVT
, c
, l
, nl
);
2459 __attribute__((always_inline)) long
2460 cast_f64_to_i64(double d)
2462 // Convert d in to 32-bit components
2463 long x = as_long(d);
2467 // Generate 'normalized' mantissa
2468 mhi = xhi | 0x00100000; // hidden bit
2470 temp = xlo >> (32 - 11);
2474 // Compute shift right count from exponent
2475 e = (xhi >> (52-32)) & 0x7ff;
2480 // Compute result for 0 <= sr < 32
2481 rhi0 = mhi >> (sr &31);
2482 rlo0 = mlo >> (sr &31);
2483 temp = mhi << (32 - sr);
2485 rlo0 = sr ? temp : rlo0;
2487 // Compute result for 32 <= sr
2489 rlo1 = srge64 ? 0 : rhi0;
2491 // Pick between the 2 results
2492 rhi = srge32 ? rhi1 : rhi0;
2493 rlo = srge32 ? rlo1 : rlo0;
2495 // Optional saturate on overflow
2497 rhi = srlt0 ? MAXVALUE : rhi;
2498 rlo = srlt0 ? MAXVALUE : rlo;
2501 res = LCREATE( rlo, rhi );
2503 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2505 sign = ((signed int) xhi) >> 31; fill with sign bit
2506 sign = LCREATE( sign, sign );
2514 SDValue c11
= DAG
.getConstant( 63 - 52, INTVT
);
2515 SDValue c32
= DAG
.getConstant( 32, INTVT
);
2517 // Convert d in to 32-bit components
2519 SDValue x
= DAG
.getNode(ISDBITCAST
, DL
, LONGVT
, d
);
2520 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2521 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2523 // Generate 'normalized' mantissa
2524 SDValue mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
,
2525 xhi
, DAG
.getConstant( 0x00100000, INTVT
) );
2526 mhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, c11
);
2527 SDValue temp
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2528 xlo
, DAG
.getConstant( 32 - (63 - 52), INTVT
) );
2529 mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, mhi
, temp
);
2530 SDValue mlo
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, c11
);
2532 // Compute shift right count from exponent
2533 SDValue e
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2534 xhi
, DAG
.getConstant( 52-32, INTVT
) );
2535 e
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2536 e
, DAG
.getConstant( 0x7ff, INTVT
) );
2537 SDValue sr
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2538 DAG
.getConstant( 1023 + 63, INTVT
), e
);
2539 SDValue srge64
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2540 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
2541 sr
, DAG
.getConstant(64, INTVT
));
2542 SDValue srge32
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2543 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
2544 sr
, DAG
.getConstant(32, INTVT
));
2546 // Compute result for 0 <= sr < 32
2547 SDValue rhi0
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mhi
, sr
);
2548 SDValue rlo0
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mlo
, sr
);
2549 temp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
, c32
, sr
);
2550 temp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, temp
);
2551 temp
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rlo0
, temp
);
2552 rlo0
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, sr
, temp
, rlo0
);
2554 // Compute result for 32 <= sr
2555 SDValue rhi1
= DAG
.getConstant( 0, INTVT
);
2556 SDValue rlo1
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2557 srge64
, rhi1
, rhi0
);
2559 // Pick between the 2 results
2560 SDValue rhi
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2561 srge32
, rhi1
, rhi0
);
2562 SDValue rlo
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2563 srge32
, rlo1
, rlo0
);
2566 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
2568 // Deal with sign bit
2570 SDValue sign
= DAG
.getNode( ISD::SRA
, DL
, INTVT
,
2571 xhi
, DAG
.getConstant( 31, INTVT
) );
2572 sign
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, sign
, sign
);
2573 res
= DAG
.getNode( ISD::ADD
, DL
, LONGVT
, res
, sign
);
2574 res
= DAG
.getNode( ISD::XOR
, DL
, LONGVT
, res
, sign
);
2581 AMDILTargetLowering::genf64toi32(SDValue RHS
, SelectionDAG
&DAG
,
2582 bool includeSign
) const
2586 DebugLoc DL
= RHS
.getDebugLoc();
2587 EVT RHSVT
= RHS
.getValueType();
2588 bool isVec
= RHSVT
.isVector();
2590 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
2591 RHSVT
.getVectorNumElements()));
2592 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
2593 RHSVT
.getVectorNumElements()));
2595 LONGVT
= EVT(MVT::i64
);
2596 INTVT
= EVT(MVT::i32
);
2599 __attribute__((always_inline)) int
2600 cast_f64_to_[u|i]32(double d)
2602 // Convert d in to 32-bit components
2603 long x = as_long(d);
2607 // Generate 'normalized' mantissa
2608 mhi = xhi | 0x00100000; // hidden bit
2610 temp = xlo >> (32 - 11);
2613 // Compute shift right count from exponent
2614 e = (xhi >> (52-32)) & 0x7ff;
2618 // Compute result for 0 <= sr < 32
2619 res = mhi >> (sr &31);
2620 res = srge32 ? 0 : res;
2622 // Optional saturate on overflow
2624 res = srlt0 ? MAXVALUE : res;
2626 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2628 sign = ((signed int) xhi) >> 31; fill with sign bit
2636 SDValue c11
= DAG
.getConstant( 63 - 52, INTVT
);
2638 // Convert d in to 32-bit components
2640 SDValue x
= DAG
.getNode(ISDBITCAST
, DL
, LONGVT
, d
);
2641 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2642 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2644 // Generate 'normalized' mantissa
2645 SDValue mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
,
2646 xhi
, DAG
.getConstant( 0x00100000, INTVT
) );
2647 mhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, c11
);
2648 SDValue temp
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2649 xlo
, DAG
.getConstant( 32 - (63 - 52), INTVT
) );
2650 mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, mhi
, temp
);
2652 // Compute shift right count from exponent
2653 SDValue e
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2654 xhi
, DAG
.getConstant( 52-32, INTVT
) );
2655 e
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2656 e
, DAG
.getConstant( 0x7ff, INTVT
) );
2657 SDValue sr
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2658 DAG
.getConstant( 1023 + 31, INTVT
), e
);
2659 SDValue srge32
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2660 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
2661 sr
, DAG
.getConstant(32, INTVT
));
2663 // Compute result for 0 <= sr < 32
2664 SDValue res
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mhi
, sr
);
2665 res
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2666 srge32
, DAG
.getConstant(0,INTVT
), res
);
2668 // Deal with sign bit
2670 SDValue sign
= DAG
.getNode( ISD::SRA
, DL
, INTVT
,
2671 xhi
, DAG
.getConstant( 31, INTVT
) );
2672 res
= DAG
.getNode( ISD::ADD
, DL
, INTVT
, res
, sign
);
2673 res
= DAG
.getNode( ISD::XOR
, DL
, INTVT
, res
, sign
);
2678 AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op
, SelectionDAG
&DAG
) const
2680 SDValue RHS
= Op
.getOperand(0);
2681 EVT RHSVT
= RHS
.getValueType();
2682 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
2683 EVT LHSVT
= Op
.getValueType();
2684 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
2685 DebugLoc DL
= Op
.getDebugLoc();
2687 const AMDILTargetMachine
*
2688 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
2689 (&this->getTargetMachine());
2690 const AMDILSubtarget
*
2691 stm
= static_cast<const AMDILSubtarget
*>(
2692 amdtm
->getSubtargetImpl());
2693 if (RST
== MVT::f64
&& RHSVT
.isVector()
2694 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2695 // We dont support vector 64bit floating point convertions.
2696 for (unsigned x
= 0, y
= RHSVT
.getVectorNumElements(); x
< y
; ++x
) {
2697 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2698 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
2699 op
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, LST
, op
);
2701 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
2703 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
,
2704 DST
, op
, DAG
.getTargetConstant(x
, MVT::i32
));
2709 && LST
== MVT::i32
) {
2710 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2711 DST
= SDValue(Op
.getNode(), 0);
2713 DST
= genf64toi32(RHS
, DAG
, true);
2715 } else if (RST
== MVT::f64
2716 && LST
== MVT::i64
) {
2717 DST
= genf64toi64(RHS
, DAG
, true);
2718 } else if (RST
== MVT::f64
2719 && (LST
== MVT::i8
|| LST
== MVT::i16
)) {
2720 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2721 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, SDValue(Op
.getNode(), 0));
2723 SDValue ToInt
= genf64toi32(RHS
, DAG
, true);
2724 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, ToInt
);
2728 DST
= SDValue(Op
.getNode(), 0);
2735 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op
, SelectionDAG
&DAG
) const
2738 SDValue RHS
= Op
.getOperand(0);
2739 EVT RHSVT
= RHS
.getValueType();
2740 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
2741 EVT LHSVT
= Op
.getValueType();
2742 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
2743 DebugLoc DL
= Op
.getDebugLoc();
2744 const AMDILTargetMachine
*
2745 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
2746 (&this->getTargetMachine());
2747 const AMDILSubtarget
*
2748 stm
= static_cast<const AMDILSubtarget
*>(
2749 amdtm
->getSubtargetImpl());
2750 if (RST
== MVT::f64
&& RHSVT
.isVector()
2751 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2752 // We dont support vector 64bit floating point convertions.
2753 for (unsigned x
= 0, y
= RHSVT
.getVectorNumElements(); x
< y
; ++x
) {
2754 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2755 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
2756 op
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, LST
, op
);
2758 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
2760 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
,
2761 DST
, op
, DAG
.getTargetConstant(x
, MVT::i32
));
2767 && LST
== MVT::i32
) {
2768 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2769 DST
= SDValue(Op
.getNode(), 0);
2771 DST
= genf64toi32(RHS
, DAG
, false);
2773 } else if (RST
== MVT::f64
2774 && LST
== MVT::i64
) {
2775 DST
= genf64toi64(RHS
, DAG
, false);
2776 } else if (RST
== MVT::f64
2777 && (LST
== MVT::i8
|| LST
== MVT::i16
)) {
2778 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2779 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, SDValue(Op
.getNode(), 0));
2781 SDValue ToInt
= genf64toi32(RHS
, DAG
, false);
2782 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, ToInt
);
2786 DST
= SDValue(Op
.getNode(), 0);
2792 AMDILTargetLowering::genu32tof64(SDValue RHS
, EVT LHSVT
,
2793 SelectionDAG
&DAG
) const
2795 EVT RHSVT
= RHS
.getValueType();
2796 DebugLoc DL
= RHS
.getDebugLoc();
2799 bool isVec
= RHSVT
.isVector();
2801 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
2802 RHSVT
.getVectorNumElements()));
2803 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
2804 RHSVT
.getVectorNumElements()));
2806 LONGVT
= EVT(MVT::i64
);
2807 INTVT
= EVT(MVT::i32
);
2810 const AMDILTargetMachine
*
2811 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
2812 (&this->getTargetMachine());
2813 const AMDILSubtarget
*
2814 stm
= static_cast<const AMDILSubtarget
*>(
2815 amdtm
->getSubtargetImpl());
2816 if (stm
->calVersion() >= CAL_VERSION_SC_135
) {
2817 // unsigned x = RHS;
2818 // ulong xd = (ulong)(0x4330_0000 << 32) | x;
2819 // double d = as_double( xd );
2820 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
2821 SDValue xd
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, x
,
2822 DAG
.getConstant( 0x43300000, INTVT
) );
2823 SDValue d
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xd
);
2824 SDValue offsetd
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
,
2825 DAG
.getConstant( 0x4330000000000000ULL
, LONGVT
) );
2826 return DAG
.getNode( ISD::FSUB
, DL
, LHSVT
, d
, offsetd
);
2828 SDValue clz
= genCLZu32(x
, DAG
);
2830 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
2831 // Except for an input 0... which requires a 0 exponent
2832 SDValue exp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2833 DAG
.getConstant( (1023+31), INTVT
), clz
);
2834 exp
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, x
, exp
, x
);
2837 SDValue rhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, x
, clz
);
2839 // Eliminate hidden bit
2840 rhi
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2841 rhi
, DAG
.getConstant( 0x7fffffff, INTVT
) );
2843 // Pack exponent and frac
2844 SDValue rlo
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2845 rhi
, DAG
.getConstant( (32 - 11), INTVT
) );
2846 rhi
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2847 rhi
, DAG
.getConstant( 11, INTVT
) );
2848 exp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2849 exp
, DAG
.getConstant( 20, INTVT
) );
2850 rhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rhi
, exp
);
2852 // Convert 2 x 32 in to 1 x 64, then to double precision float type
2853 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
2854 return DAG
.getNode(ISDBITCAST
, DL
, LHSVT
, res
);
2858 AMDILTargetLowering::genu64tof64(SDValue RHS
, EVT LHSVT
,
2859 SelectionDAG
&DAG
) const
2861 EVT RHSVT
= RHS
.getValueType();
2862 DebugLoc DL
= RHS
.getDebugLoc();
2865 bool isVec
= RHSVT
.isVector();
2867 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
2868 RHSVT
.getVectorNumElements()));
2870 INTVT
= EVT(MVT::i32
);
2874 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
2875 &this->getTargetMachine())->getSubtargetImpl();
2876 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2877 // double dhi = (double)(as_uint2(x).y);
2878 // double dlo = (double)(as_uint2(x).x);
2879 // return mad(dhi, 0x1.0p+32, dlo)
2880 SDValue dhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2881 dhi
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LHSVT
, dhi
);
2882 SDValue dlo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2883 dlo
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LHSVT
, dlo
);
2884 return DAG
.getNode(AMDILISD::MAD
, DL
, LHSVT
, dhi
,
2885 DAG
.getConstantFP(0x4f800000, LHSVT
), dlo
);
2886 } else if (stm
->calVersion() >= CAL_VERSION_SC_135
) {
2887 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
2888 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
2889 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
2890 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
); // x & 0xffff_ffffUL
2891 SDValue xd
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, xlo
, DAG
.getConstant( 0x43300000, INTVT
) );
2892 SDValue lo
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xd
);
2893 SDValue xhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
); // x >> 32
2894 SDValue xe
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, xhi
, DAG
.getConstant( 0x45300000, INTVT
) );
2895 SDValue hi
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xe
);
2896 SDValue c
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
,
2897 DAG
.getConstant( 0x4530000000100000ULL
, LONGVT
) );
2898 hi
= DAG
.getNode( ISD::FSUB
, DL
, LHSVT
, hi
, c
);
2899 return DAG
.getNode( ISD::FADD
, DL
, LHSVT
, hi
, lo
);
2902 SDValue clz
= genCLZu64(x
, DAG
);
2903 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2904 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2906 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
2907 SDValue exp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2908 DAG
.getConstant( (1023+63), INTVT
), clz
);
2909 SDValue mash
= DAG
.getNode( ISD::OR
, DL
, INTVT
, xhi
, xlo
);
2910 exp
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2911 mash
, exp
, mash
); // exp = exp, or 0 if input was 0
2914 SDValue clz31
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2915 clz
, DAG
.getConstant( 31, INTVT
) );
2916 SDValue rshift
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2917 DAG
.getConstant( 32, INTVT
), clz31
);
2918 SDValue t1
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xhi
, clz31
);
2919 SDValue t2
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, xlo
, rshift
);
2920 t2
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, clz31
, t2
, t1
);
2921 SDValue rhi1
= DAG
.getNode( ISD::OR
, DL
, INTVT
, t1
, t2
);
2922 SDValue rlo1
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, clz31
);
2923 SDValue rhi2
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, clz31
);
2924 SDValue rlo2
= DAG
.getConstant( 0, INTVT
);
2925 SDValue clz32
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2926 clz
, DAG
.getConstant( 32, INTVT
) );
2927 SDValue rhi
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2928 clz32
, rhi2
, rhi1
);
2929 SDValue rlo
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2930 clz32
, rlo2
, rlo1
);
2932 // Eliminate hidden bit
2933 rhi
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2934 rhi
, DAG
.getConstant( 0x7fffffff, INTVT
) );
2936 // Save bits needed to round properly
2937 SDValue round
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2938 rlo
, DAG
.getConstant( 0x7ff, INTVT
) );
2940 // Pack exponent and frac
2941 rlo
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2942 rlo
, DAG
.getConstant( 11, INTVT
) );
2943 SDValue temp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2944 rhi
, DAG
.getConstant( (32 - 11), INTVT
) );
2945 rlo
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rlo
, temp
);
2946 rhi
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2947 rhi
, DAG
.getConstant( 11, INTVT
) );
2948 exp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2949 exp
, DAG
.getConstant( 20, INTVT
) );
2950 rhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rhi
, exp
);
2952 // Compute rounding bit
2953 SDValue even
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2954 rlo
, DAG
.getConstant( 1, INTVT
) );
2955 SDValue grs
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2956 round
, DAG
.getConstant( 0x3ff, INTVT
) );
2957 grs
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2958 DAG
.getConstant( CondCCodeToCC( ISD::SETNE
, MVT::i32
), MVT::i32
),
2959 grs
, DAG
.getConstant( 0, INTVT
) ); // -1 if any GRS set, 0 if none
2960 grs
= DAG
.getNode( ISD::OR
, DL
, INTVT
, grs
, even
);
2961 round
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2962 round
, DAG
.getConstant( 10, INTVT
) );
2963 round
= DAG
.getNode( ISD::AND
, DL
, INTVT
, round
, grs
); // 0 or 1
2966 SDValue lround
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
,
2967 round
, DAG
.getConstant( 0, INTVT
) );
2968 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
2969 res
= DAG
.getNode( ISD::ADD
, DL
, LONGVT
, res
, lround
);
2970 return DAG
.getNode(ISDBITCAST
, DL
, LHSVT
, res
);
2974 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) const
2976 SDValue RHS
= Op
.getOperand(0);
2977 EVT RHSVT
= RHS
.getValueType();
2978 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
2979 EVT LHSVT
= Op
.getValueType();
2980 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
2981 DebugLoc DL
= Op
.getDebugLoc();
2985 const AMDILTargetMachine
*
2986 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
2987 (&this->getTargetMachine());
2988 const AMDILSubtarget
*
2989 stm
= static_cast<const AMDILSubtarget
*>(
2990 amdtm
->getSubtargetImpl());
2991 if (LST
== MVT::f64
&& LHSVT
.isVector()
2992 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2993 // We dont support vector 64bit floating point convertions.
2995 for (unsigned x
= 0, y
= LHSVT
.getVectorNumElements(); x
< y
; ++x
) {
2996 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2997 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
2998 op
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LST
, op
);
3000 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
3002 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
, DST
,
3003 op
, DAG
.getTargetConstant(x
, MVT::i32
));
3010 && LST
== MVT::f64
) {
3011 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3012 DST
= SDValue(Op
.getNode(), 0);
3014 DST
= genu32tof64(RHS
, LHSVT
, DAG
);
3016 } else if (RST
== MVT::i64
3017 && LST
== MVT::f64
) {
3018 DST
= genu64tof64(RHS
, LHSVT
, DAG
);
3020 DST
= SDValue(Op
.getNode(), 0);
3027 AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) const
3029 SDValue RHS
= Op
.getOperand(0);
3030 EVT RHSVT
= RHS
.getValueType();
3031 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
3035 bool isVec
= RHSVT
.isVector();
3036 DebugLoc DL
= Op
.getDebugLoc();
3037 EVT LHSVT
= Op
.getValueType();
3038 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
3039 const AMDILTargetMachine
*
3040 amdtm
= reinterpret_cast<const AMDILTargetMachine
*>
3041 (&this->getTargetMachine());
3042 const AMDILSubtarget
*
3043 stm
= static_cast<const AMDILSubtarget
*>(
3044 amdtm
->getSubtargetImpl());
3045 if (LST
== MVT::f64
&& LHSVT
.isVector()
3046 && stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3047 // We dont support vector 64bit floating point convertions.
3048 for (unsigned x
= 0, y
= LHSVT
.getVectorNumElements(); x
< y
; ++x
) {
3049 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3050 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
3051 op
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LST
, op
);
3053 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
3055 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
, DST
,
3056 op
, DAG
.getTargetConstant(x
, MVT::i32
));
3063 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
3064 RHSVT
.getVectorNumElements()));
3065 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
3066 RHSVT
.getVectorNumElements()));
3068 LONGVT
= EVT(MVT::i64
);
3069 INTVT
= EVT(MVT::i32
);
3071 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
3072 if ((RST
== MVT::i32
|| RST
== MVT::i64
)
3073 && LST
== MVT::f64
) {
3074 if (RST
== MVT::i32
) {
3075 if (stm
->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
3076 DST
= SDValue(Op
.getNode(), 0);
3080 SDValue c31
= DAG
.getConstant( 31, INTVT
);
3081 SDValue cSbit
= DAG
.getConstant( 0x80000000, INTVT
);
3083 SDValue S
; // Sign, as 0 or -1
3084 SDValue Sbit
; // Sign bit, as one bit, MSB only.
3085 if (RST
== MVT::i32
) {
3086 Sbit
= DAG
.getNode( ISD::AND
, DL
, INTVT
, RHS
, cSbit
);
3087 S
= DAG
.getNode(ISD::SRA
, DL
, RHSVT
, RHS
, c31
);
3088 } else { // 64-bit case... SRA of 64-bit values is slow
3089 SDValue hi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, RHS
);
3090 Sbit
= DAG
.getNode( ISD::AND
, DL
, INTVT
, hi
, cSbit
);
3091 SDValue temp
= DAG
.getNode( ISD::SRA
, DL
, INTVT
, hi
, c31
);
3092 S
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, RHSVT
, temp
, temp
);
3095 // get abs() of input value, given sign as S (0 or -1)
3097 SDValue SpI
= DAG
.getNode(ISD::ADD
, DL
, RHSVT
, RHS
, S
);
3099 SDValue SpIxS
= DAG
.getNode(ISD::XOR
, DL
, RHSVT
, SpI
, S
);
3101 // Convert unsigned value to double precision
3103 if (RST
== MVT::i32
) {
3104 // r = cast_u32_to_f64(SpIxS)
3105 R
= genu32tof64(SpIxS
, LHSVT
, DAG
);
3107 // r = cast_u64_to_f64(SpIxS)
3108 R
= genu64tof64(SpIxS
, LHSVT
, DAG
);
3111 // drop in the sign bit
3112 SDValue t
= DAG
.getNode( AMDILISD::BITCONV
, DL
, LONGVT
, R
);
3113 SDValue thi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, t
);
3114 SDValue tlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, t
);
3115 thi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, thi
, Sbit
);
3116 t
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, tlo
, thi
);
3117 DST
= DAG
.getNode( AMDILISD::BITCONV
, DL
, LHSVT
, t
);
3119 DST
= SDValue(Op
.getNode(), 0);
3125 AMDILTargetLowering::LowerSUB(SDValue Op
, SelectionDAG
&DAG
) const
3127 SDValue LHS
= Op
.getOperand(0);
3128 SDValue RHS
= Op
.getOperand(1);
3129 DebugLoc DL
= Op
.getDebugLoc();
3130 EVT OVT
= Op
.getValueType();
3132 bool isVec
= RHS
.getValueType().isVector();
3133 if (OVT
.getScalarType() == MVT::i64
) {
3134 /*const AMDILTargetMachine*
3135 amdtm = reinterpret_cast<const AMDILTargetMachine*>
3136 (&this->getTargetMachine());
3137 const AMDILSubtarget*
3138 stm = dynamic_cast<const AMDILSubtarget*>(
3139 amdtm->getSubtargetImpl());*/
3140 MVT INTTY
= MVT::i32
;
3141 if (OVT
== MVT::v2i64
) {
3144 SDValue LHSLO
, LHSHI
, RHSLO
, RHSHI
, INTLO
, INTHI
;
3145 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3146 LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, LHS
);
3147 RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, RHS
);
3148 LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, LHS
);
3149 RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, RHS
);
3150 INTLO
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, LHSLO
, RHSLO
);
3151 INTHI
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, LHSHI
, RHSHI
);
3152 //TODO: need to use IBORROW on HD5XXX and later hardware
3154 if (OVT
== MVT::i64
) {
3155 cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
3156 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
3161 SDValue LHSRLO
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3162 DL
, MVT::i32
, LHSLO
, DAG
.getTargetConstant(0, MVT::i32
));
3163 SDValue LHSRHI
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3164 DL
, MVT::i32
, LHSLO
, DAG
.getTargetConstant(1, MVT::i32
));
3165 SDValue RHSRLO
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3166 DL
, MVT::i32
, RHSLO
, DAG
.getTargetConstant(0, MVT::i32
));
3167 SDValue RHSRHI
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3168 DL
, MVT::i32
, RHSLO
, DAG
.getTargetConstant(1, MVT::i32
));
3169 cmplo
= DAG
.getNode(AMDILISD::CMP
, DL
, MVT::i32
,
3170 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
3172 cmphi
= DAG
.getNode(AMDILISD::CMP
, DL
, MVT::i32
,
3173 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
3175 cmp
= DAG
.getNode(AMDILISD::VBUILD
, DL
, MVT::v2i32
, cmplo
);
3176 cmp
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v2i32
,
3177 cmp
, cmphi
, DAG
.getTargetConstant(1, MVT::i32
));
3179 INTHI
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, INTHI
, cmp
);
3180 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, OVT
,
3183 DST
= SDValue(Op
.getNode(), 0);
3188 AMDILTargetLowering::LowerFDIV(SDValue Op
, SelectionDAG
&DAG
) const
3190 EVT OVT
= Op
.getValueType();
3192 if (OVT
.getScalarType() == MVT::f64
) {
3193 DST
= LowerFDIV64(Op
, DAG
);
3194 } else if (OVT
.getScalarType() == MVT::f32
) {
3195 DST
= LowerFDIV32(Op
, DAG
);
3197 DST
= SDValue(Op
.getNode(), 0);
3203 AMDILTargetLowering::LowerSDIV(SDValue Op
, SelectionDAG
&DAG
) const
3205 EVT OVT
= Op
.getValueType();
3207 if (OVT
.getScalarType() == MVT::i64
) {
3208 DST
= LowerSDIV64(Op
, DAG
);
3209 } else if (OVT
.getScalarType() == MVT::i32
) {
3210 DST
= LowerSDIV32(Op
, DAG
);
3211 } else if (OVT
.getScalarType() == MVT::i16
3212 || OVT
.getScalarType() == MVT::i8
) {
3213 DST
= LowerSDIV24(Op
, DAG
);
3215 DST
= SDValue(Op
.getNode(), 0);
3221 AMDILTargetLowering::LowerUDIV(SDValue Op
, SelectionDAG
&DAG
) const
3223 EVT OVT
= Op
.getValueType();
3225 if (OVT
.getScalarType() == MVT::i64
) {
3226 DST
= LowerUDIV64(Op
, DAG
);
3227 } else if (OVT
.getScalarType() == MVT::i32
) {
3228 DST
= LowerUDIV32(Op
, DAG
);
3229 } else if (OVT
.getScalarType() == MVT::i16
3230 || OVT
.getScalarType() == MVT::i8
) {
3231 DST
= LowerUDIV24(Op
, DAG
);
3233 DST
= SDValue(Op
.getNode(), 0);
3239 AMDILTargetLowering::LowerSREM(SDValue Op
, SelectionDAG
&DAG
) const
3241 EVT OVT
= Op
.getValueType();
3243 if (OVT
.getScalarType() == MVT::i64
) {
3244 DST
= LowerSREM64(Op
, DAG
);
3245 } else if (OVT
.getScalarType() == MVT::i32
) {
3246 DST
= LowerSREM32(Op
, DAG
);
3247 } else if (OVT
.getScalarType() == MVT::i16
) {
3248 DST
= LowerSREM16(Op
, DAG
);
3249 } else if (OVT
.getScalarType() == MVT::i8
) {
3250 DST
= LowerSREM8(Op
, DAG
);
3252 DST
= SDValue(Op
.getNode(), 0);
3258 AMDILTargetLowering::LowerUREM(SDValue Op
, SelectionDAG
&DAG
) const
3260 EVT OVT
= Op
.getValueType();
3262 if (OVT
.getScalarType() == MVT::i64
) {
3263 DST
= LowerUREM64(Op
, DAG
);
3264 } else if (OVT
.getScalarType() == MVT::i32
) {
3265 DST
= LowerUREM32(Op
, DAG
);
3266 } else if (OVT
.getScalarType() == MVT::i16
) {
3267 DST
= LowerUREM16(Op
, DAG
);
3268 } else if (OVT
.getScalarType() == MVT::i8
) {
3269 DST
= LowerUREM8(Op
, DAG
);
3271 DST
= SDValue(Op
.getNode(), 0);
3277 AMDILTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const
3279 DebugLoc DL
= Op
.getDebugLoc();
3280 EVT OVT
= Op
.getValueType();
3282 bool isVec
= OVT
.isVector();
3283 if (OVT
.getScalarType() != MVT::i64
)
3285 DST
= SDValue(Op
.getNode(), 0);
3287 assert(OVT
.getScalarType() == MVT::i64
&& "Only 64 bit mul should be lowered!");
3288 // TODO: This needs to be turned into a tablegen pattern
3289 SDValue LHS
= Op
.getOperand(0);
3290 SDValue RHS
= Op
.getOperand(1);
3292 MVT INTTY
= MVT::i32
;
3293 if (OVT
== MVT::v2i64
) {
3296 // mul64(h1, l1, h0, l0)
3297 SDValue LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
,
3300 SDValue LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
,
3303 SDValue RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
,
3306 SDValue RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
,
3309 // MULLO_UINT_1 r1, h0, l1
3310 SDValue RHILLO
= DAG
.getNode(AMDILISD::UMUL
,
3312 INTTY
, RHSHI
, LHSLO
);
3313 // MULLO_UINT_1 r2, h1, l0
3314 SDValue RLOHHI
= DAG
.getNode(AMDILISD::UMUL
,
3316 INTTY
, RHSLO
, LHSHI
);
3317 // ADD_INT hr, r1, r2
3318 SDValue ADDHI
= DAG
.getNode(ISD::ADD
,
3320 INTTY
, RHILLO
, RLOHHI
);
3321 // MULHI_UINT_1 r3, l1, l0
3322 SDValue RLOLLO
= DAG
.getNode(ISD::MULHU
,
3324 INTTY
, RHSLO
, LHSLO
);
3325 // ADD_INT hr, hr, r3
3326 SDValue HIGH
= DAG
.getNode(ISD::ADD
,
3328 INTTY
, ADDHI
, RLOLLO
);
3329 // MULLO_UINT_1 l3, l1, l0
3330 SDValue LOW
= DAG
.getNode(AMDILISD::UMUL
,
3332 INTTY
, LHSLO
, RHSLO
);
3333 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
,
3340 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op
, SelectionDAG
&DAG
) const
3342 EVT VT
= Op
.getValueType();
3347 DebugLoc DL
= Op
.getDebugLoc();
3348 Nodes1
= DAG
.getNode(AMDILISD::VBUILD
,
3350 VT
, Op
.getOperand(0));
3352 bool allEqual
= true;
3353 for (unsigned x
= 1, y
= Op
.getNumOperands(); x
< y
; ++x
) {
3354 if (Op
.getOperand(0) != Op
.getOperand(x
)) {
3363 switch(Op
.getNumOperands()) {
3368 fourth
= Op
.getOperand(3);
3369 if (fourth
.getOpcode() != ISD::UNDEF
) {
3370 Nodes1
= DAG
.getNode(
3371 ISD::INSERT_VECTOR_ELT
,
3376 DAG
.getConstant(7, MVT::i32
));
3379 third
= Op
.getOperand(2);
3380 if (third
.getOpcode() != ISD::UNDEF
) {
3381 Nodes1
= DAG
.getNode(
3382 ISD::INSERT_VECTOR_ELT
,
3387 DAG
.getConstant(6, MVT::i32
));
3390 second
= Op
.getOperand(1);
3391 if (second
.getOpcode() != ISD::UNDEF
) {
3392 Nodes1
= DAG
.getNode(
3393 ISD::INSERT_VECTOR_ELT
,
3398 DAG
.getConstant(5, MVT::i32
));
3406 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
3407 SelectionDAG
&DAG
) const
3409 DebugLoc DL
= Op
.getDebugLoc();
3410 EVT VT
= Op
.getValueType();
3411 const SDValue
*ptr
= NULL
;
3412 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
3413 uint32_t swizzleNum
= 0;
3415 if (!VT
.isVector()) {
3416 SDValue Res
= Op
.getOperand(0);
3420 if (Op
.getOperand(1).getOpcode() != ISD::UNDEF
) {
3421 ptr
= &Op
.getOperand(1);
3423 ptr
= &Op
.getOperand(0);
3426 swizzleNum
= (uint32_t)CSDN
->getZExtValue();
3427 uint32_t mask2
= 0x04030201 & ~(0xFF << (swizzleNum
* 8));
3428 uint32_t mask3
= 0x01010101 & (0xFF << (swizzleNum
* 8));
3429 DST
= DAG
.getNode(AMDILISD::VINSERT
,
3434 DAG
.getTargetConstant(mask2
, MVT::i32
),
3435 DAG
.getTargetConstant(mask3
, MVT::i32
));
3437 uint32_t mask2
= 0x04030201 & ~(0xFF << (swizzleNum
* 8));
3438 uint32_t mask3
= 0x01010101 & (0xFF << (swizzleNum
* 8));
3439 SDValue res
= DAG
.getNode(AMDILISD::VINSERT
,
3440 DL
, VT
, Op
.getOperand(0), *ptr
,
3441 DAG
.getTargetConstant(mask2
, MVT::i32
),
3442 DAG
.getTargetConstant(mask3
, MVT::i32
));
3443 for (uint32_t x
= 1; x
< VT
.getVectorNumElements(); ++x
) {
3444 mask2
= 0x04030201 & ~(0xFF << (x
* 8));
3445 mask3
= 0x01010101 & (0xFF << (x
* 8));
3446 SDValue t
= DAG
.getNode(AMDILISD::VINSERT
,
3447 DL
, VT
, Op
.getOperand(0), *ptr
,
3448 DAG
.getTargetConstant(mask2
, MVT::i32
),
3449 DAG
.getTargetConstant(mask3
, MVT::i32
));
3450 SDValue c
= DAG
.getNode(AMDILISD::CMP
, DL
, ptr
->getValueType(),
3451 DAG
.getConstant(AMDILCC::IL_CC_I_EQ
, MVT::i32
),
3452 Op
.getOperand(2), DAG
.getConstant(x
, MVT::i32
));
3453 c
= DAG
.getNode(AMDILISD::VBUILD
, DL
, Op
.getValueType(), c
);
3454 res
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, VT
, c
, t
, res
);
3462 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
3463 SelectionDAG
&DAG
) const
3465 EVT VT
= Op
.getValueType();
3466 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
3467 uint64_t swizzleNum
= 0;
3468 DebugLoc DL
= Op
.getDebugLoc();
3470 if (!Op
.getOperand(0).getValueType().isVector()) {
3471 Res
= Op
.getOperand(0);
3475 // Static vector extraction
3476 swizzleNum
= CSDN
->getZExtValue() + 1;
3477 Res
= DAG
.getNode(AMDILISD::VEXTRACT
,
3480 DAG
.getTargetConstant(swizzleNum
, MVT::i32
));
3482 SDValue Op1
= Op
.getOperand(1);
3483 uint32_t vecSize
= 4;
3484 SDValue Op0
= Op
.getOperand(0);
3485 SDValue res
= DAG
.getNode(AMDILISD::VEXTRACT
,
3487 DAG
.getTargetConstant(1, MVT::i32
));
3488 if (Op0
.getValueType().isVector()) {
3489 vecSize
= Op0
.getValueType().getVectorNumElements();
3491 for (uint32_t x
= 2; x
<= vecSize
; ++x
) {
3492 SDValue t
= DAG
.getNode(AMDILISD::VEXTRACT
,
3494 DAG
.getTargetConstant(x
, MVT::i32
));
3495 SDValue c
= DAG
.getNode(AMDILISD::CMP
,
3496 DL
, Op1
.getValueType(),
3497 DAG
.getConstant(AMDILCC::IL_CC_I_EQ
, MVT::i32
),
3498 Op1
, DAG
.getConstant(x
, MVT::i32
));
3499 res
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
,
3509 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op
,
3510 SelectionDAG
&DAG
) const
3512 uint32_t vecSize
= Op
.getValueType().getVectorNumElements();
3513 SDValue src
= Op
.getOperand(0);
3514 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
3515 uint64_t offset
= 0;
3516 EVT vecType
= Op
.getValueType().getVectorElementType();
3517 DebugLoc DL
= Op
.getDebugLoc();
3520 offset
= CSDN
->getZExtValue();
3521 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3522 DL
,vecType
, src
, DAG
.getConstant(offset
, MVT::i32
));
3523 Result
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
3524 Op
.getValueType(), Result
);
3525 for (uint32_t x
= 1; x
< vecSize
; ++x
) {
3526 SDValue elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, vecType
,
3527 src
, DAG
.getConstant(offset
+ x
, MVT::i32
));
3528 if (elt
.getOpcode() != ISD::UNDEF
) {
3529 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
3530 Op
.getValueType(), Result
, elt
,
3531 DAG
.getConstant(x
, MVT::i32
));
3535 SDValue idx
= Op
.getOperand(1);
3536 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3537 DL
, vecType
, src
, idx
);
3538 Result
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
3539 Op
.getValueType(), Result
);
3540 for (uint32_t x
= 1; x
< vecSize
; ++x
) {
3541 idx
= DAG
.getNode(ISD::ADD
, DL
, vecType
,
3542 idx
, DAG
.getConstant(1, MVT::i32
));
3543 SDValue elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, vecType
,
3545 if (elt
.getOpcode() != ISD::UNDEF
) {
3546 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
3547 Op
.getValueType(), Result
, elt
, idx
);
3554 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
3555 SelectionDAG
&DAG
) const
3557 SDValue Res
= DAG
.getNode(AMDILISD::VBUILD
,
3564 AMDILTargetLowering::LowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const
3566 SDValue Cond
= Op
.getOperand(0);
3567 SDValue LHS
= Op
.getOperand(1);
3568 SDValue RHS
= Op
.getOperand(2);
3569 DebugLoc DL
= Op
.getDebugLoc();
3570 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
3571 Cond
= DAG
.getNode(AMDILISD::CMOVLOG
,
3573 Op
.getValueType(), Cond
, LHS
, RHS
);
3577 AMDILTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
3580 SDValue LHS
= Op
.getOperand(0);
3581 SDValue RHS
= Op
.getOperand(1);
3582 SDValue CC
= Op
.getOperand(2);
3583 DebugLoc DL
= Op
.getDebugLoc();
3584 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
3585 unsigned int AMDILCC
= CondCCodeToCC(
3587 LHS
.getValueType().getSimpleVT().SimpleTy
);
3588 assert((AMDILCC
!= AMDILCC::COND_ERROR
) && "Invalid SetCC!");
3594 DAG
.getConstant(-1, MVT::i32
),
3595 DAG
.getConstant(0, MVT::i32
),
3597 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
3601 Cond
.getValueType(),
3602 DAG
.getConstant(1, Cond
.getValueType()),
3608 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
, SelectionDAG
&DAG
) const
3610 SDValue Data
= Op
.getOperand(0);
3611 VTSDNode
*BaseType
= cast
<VTSDNode
>(Op
.getOperand(1));
3612 DebugLoc DL
= Op
.getDebugLoc();
3613 EVT DVT
= Data
.getValueType();
3614 EVT BVT
= BaseType
->getVT();
3615 unsigned baseBits
= BVT
.getScalarType().getSizeInBits();
3616 unsigned srcBits
= DVT
.isSimple() ? DVT
.getScalarType().getSizeInBits() : 1;
3617 unsigned shiftBits
= srcBits
- baseBits
;
3619 // If the op is less than 32 bits, then it needs to extend to 32bits
3620 // so it can properly keep the upper bits valid.
3621 EVT IVT
= genIntType(32, DVT
.isVector() ? DVT
.getVectorNumElements() : 1);
3622 Data
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, IVT
, Data
);
3623 shiftBits
= 32 - baseBits
;
3626 SDValue Shift
= DAG
.getConstant(shiftBits
, DVT
);
3627 // Shift left by 'Shift' bits.
3628 Data
= DAG
.getNode(ISD::SHL
, DL
, DVT
, Data
, Shift
);
3629 // Signed shift Right by 'Shift' bits.
3630 Data
= DAG
.getNode(ISD::SRA
, DL
, DVT
, Data
, Shift
);
3632 // Once the sign extension is done, the op needs to be converted to
3633 // its original type.
3634 Data
= DAG
.getSExtOrTrunc(Data
, DL
, Op
.getOperand(0).getValueType());
3639 AMDILTargetLowering::genIntType(uint32_t size
, uint32_t numEle
) const
3641 int iSize
= (size
* numEle
);
3642 int vEle
= (iSize
>> ((size
== 64) ? 6 : 5));
3648 return EVT(MVT::i64
);
3650 return EVT(MVT::getVectorVT(MVT::i64
, vEle
));
3654 return EVT(MVT::i32
);
3656 return EVT(MVT::getVectorVT(MVT::i32
, vEle
));
3662 AMDILTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const
3664 SDValue Src
= Op
.getOperand(0);
3667 DebugLoc DL
= Op
.getDebugLoc();
3668 EVT SrcVT
= Src
.getValueType();
3669 EVT DstVT
= Dst
.getValueType();
3670 // Lets bitcast the floating point types to an
3671 // equivalent integer type before converting to vectors.
3672 if (SrcVT
.getScalarType().isFloatingPoint()) {
3673 Src
= DAG
.getNode(AMDILISD::BITCONV
, DL
, genIntType(
3674 SrcVT
.getScalarType().getSimpleVT().getSizeInBits(),
3675 SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1),
3677 SrcVT
= Src
.getValueType();
3679 uint32_t ScalarSrcSize
= SrcVT
.getScalarType()
3680 .getSimpleVT().getSizeInBits();
3681 uint32_t ScalarDstSize
= DstVT
.getScalarType()
3682 .getSimpleVT().getSizeInBits();
3683 uint32_t SrcNumEle
= SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1;
3684 uint32_t DstNumEle
= DstVT
.isVector() ? DstVT
.getVectorNumElements() : 1;
3685 bool isVec
= SrcVT
.isVector();
3686 if (DstVT
.getScalarType().isInteger() &&
3687 (SrcVT
.getScalarType().isInteger()
3688 || SrcVT
.getScalarType().isFloatingPoint())) {
3689 if ((ScalarDstSize
== 64 && SrcNumEle
== 4 && ScalarSrcSize
== 16)
3690 || (ScalarSrcSize
== 64
3692 && ScalarDstSize
== 16)) {
3693 // This is the problematic case when bitcasting i64 <-> <4 x i16>
3694 // This approach is a little different as we cannot generate a
3696 // as that is illegal in our backend and we are already past
3697 // the DAG legalizer.
3698 // So, in this case, we will do the following conversion.
3700 // %dst = <4 x i16> %src bitconvert i64 ==>
3701 // %tmp = <4 x i16> %src convert <4 x i32>
3702 // %tmp = <4 x i32> %tmp and 0xFFFF
3703 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
3704 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
3705 // %dst = <2 x i32> %tmp bitcast i64
3707 // %dst = i64 %src bitconvert <4 x i16> ==>
3708 // %tmp = i64 %src bitcast <2 x i32>
3709 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
3710 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
3711 // %tmp = <4 x i32> %tmp and 0xFFFF
3712 // %dst = <4 x i16> %tmp bitcast <4 x i32>
3713 SDValue mask
= DAG
.getNode(AMDILISD::VBUILD
, DL
, MVT::v4i32
,
3714 DAG
.getConstant(0xFFFF, MVT::i32
));
3715 SDValue const16
= DAG
.getConstant(16, MVT::i32
);
3716 if (ScalarDstSize
== 64) {
3718 Op
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::v4i32
);
3719 Op
= DAG
.getNode(ISD::AND
, DL
, Op
.getValueType(), Op
, mask
);
3720 SDValue x
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
3721 Op
, DAG
.getConstant(0, MVT::i32
));
3722 SDValue y
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
3723 Op
, DAG
.getConstant(1, MVT::i32
));
3724 y
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, y
, const16
);
3725 SDValue z
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
3726 Op
, DAG
.getConstant(2, MVT::i32
));
3727 SDValue w
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
,
3728 Op
, DAG
.getConstant(3, MVT::i32
));
3729 w
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, w
, const16
);
3730 x
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, x
, y
);
3731 y
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, z
, w
);
3732 Res
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, MVT::i64
, x
, y
);
3736 SDValue lo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, MVT::i32
, Src
);
3738 = DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, lo
, const16
);
3739 SDValue hi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, MVT::i32
, Src
);
3741 = DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, hi
, const16
);
3742 SDValue resVec
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
3744 SDValue idxVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3745 getPointerTy(), DAG
.getConstant(1, MVT::i32
));
3746 resVec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v4i32
,
3747 resVec
, lor16
, idxVal
);
3748 idxVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3749 getPointerTy(), DAG
.getConstant(2, MVT::i32
));
3750 resVec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v4i32
,
3751 resVec
, hi
, idxVal
);
3752 idxVal
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3753 getPointerTy(), DAG
.getConstant(3, MVT::i32
));
3754 resVec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v4i32
,
3755 resVec
, hir16
, idxVal
);
3756 resVec
= DAG
.getNode(ISD::AND
, DL
, MVT::v4i32
, resVec
, mask
);
3757 Res
= DAG
.getSExtOrTrunc(resVec
, DL
, MVT::v4i16
);
3761 // There are four cases we need to worry about for bitcasts
3762 // where the size of all
3763 // source, intermediates and result is <= 128 bits, unlike
3765 // 1) Sub32bit bitcast 32bitAlign
3766 // %dst = <4 x i8> bitcast i32
3767 // (also <[2|4] x i16> to <[2|4] x i32>)
3768 // 2) 32bitAlign bitcast Sub32bit
3769 // %dst = i32 bitcast <4 x i8>
3770 // 3) Sub32bit bitcast LargerSub32bit
3771 // %dst = <2 x i8> bitcast i16
3772 // (also <4 x i8> to <2 x i16>)
3773 // 4) Sub32bit bitcast SmallerSub32bit
3774 // %dst = i16 bitcast <2 x i8>
3775 // (also <2 x i16> to <4 x i8>)
3776 // This also only handles types that are powers of two
3777 if ((ScalarDstSize
& (ScalarDstSize
- 1))
3778 || (ScalarSrcSize
& (ScalarSrcSize
- 1))) {
3779 } else if (ScalarDstSize
>= 32 && ScalarSrcSize
< 32) {
3781 EVT IntTy
= genIntType(ScalarDstSize
, SrcNumEle
);
3782 #if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
3783 SDValue res
= DAG
.getSExtOrTrunc(Src
, DL
, IntTy
);
3785 SDValue res
= DAG
.getNode(AMDILISD::VBUILD
, DL
, IntTy
,
3786 DAG
.getUNDEF(IntTy
.getScalarType()));
3787 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
3788 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3789 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
3790 SDValue temp
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
3791 SrcVT
.getScalarType(), Src
,
3792 DAG
.getConstant(x
, MVT::i32
));
3793 temp
= DAG
.getSExtOrTrunc(temp
, DL
, IntTy
.getScalarType());
3794 res
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, IntTy
,
3798 SDValue mask
= DAG
.getNode(AMDILISD::VBUILD
, DL
, IntTy
,
3799 DAG
.getConstant((1 << ScalarSrcSize
) - 1, MVT::i32
));
3800 SDValue
*newEle
= new SDValue
[SrcNumEle
];
3801 res
= DAG
.getNode(ISD::AND
, DL
, IntTy
, res
, mask
);
3802 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
3803 newEle
[x
] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
3804 IntTy
.getScalarType(), res
,
3805 DAG
.getConstant(x
, MVT::i32
));
3807 uint32_t Ratio
= SrcNumEle
/ DstNumEle
;
3808 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
3810 newEle
[x
] = DAG
.getNode(ISD::SHL
, DL
,
3811 IntTy
.getScalarType(), newEle
[x
],
3812 DAG
.getConstant(ScalarSrcSize
* (x
% Ratio
),
3816 for (uint32_t x
= 0; x
< SrcNumEle
; x
+= 2) {
3817 newEle
[x
] = DAG
.getNode(ISD::OR
, DL
,
3818 IntTy
.getScalarType(), newEle
[x
], newEle
[x
+ 1]);
3820 if (ScalarSrcSize
== 8) {
3821 for (uint32_t x
= 0; x
< SrcNumEle
; x
+= 4) {
3822 newEle
[x
] = DAG
.getNode(ISD::OR
, DL
,
3823 IntTy
.getScalarType(), newEle
[x
], newEle
[x
+ 2]);
3825 if (DstNumEle
== 1) {
3828 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, DstVT
,
3830 for (uint32_t x
= 1; x
< DstNumEle
; ++x
) {
3831 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3832 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
3833 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
3834 DstVT
, Dst
, newEle
[x
* 4], idx
);
3838 if (DstNumEle
== 1) {
3841 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, DstVT
,
3843 for (uint32_t x
= 1; x
< DstNumEle
; ++x
) {
3844 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3845 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
3846 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
3847 DstVT
, Dst
, newEle
[x
* 2], idx
);
3853 } else if (ScalarDstSize
< 32 && ScalarSrcSize
>= 32) {
3855 EVT IntTy
= genIntType(ScalarSrcSize
, DstNumEle
);
3856 SDValue vec
= DAG
.getNode(AMDILISD::VBUILD
, DL
, IntTy
,
3857 DAG
.getUNDEF(IntTy
.getScalarType()));
3858 uint32_t mult
= (ScalarDstSize
== 8) ? 4 : 2;
3859 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
3860 for (uint32_t y
= 0; y
< mult
; ++y
) {
3861 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3863 DAG
.getConstant(x
* mult
+ y
, MVT::i32
));
3865 if (SrcNumEle
> 1) {
3866 t
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3867 DL
, SrcVT
.getScalarType(), Src
,
3868 DAG
.getConstant(x
, MVT::i32
));
3873 t
= DAG
.getNode(ISD::SRL
, DL
, t
.getValueType(),
3874 t
, DAG
.getConstant(y
* ScalarDstSize
,
3877 vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
,
3878 DL
, IntTy
, vec
, t
, idx
);
3881 Dst
= DAG
.getSExtOrTrunc(vec
, DL
, DstVT
);
3883 } else if (ScalarDstSize
== 16 && ScalarSrcSize
== 8) {
3885 SDValue
*numEle
= new SDValue
[SrcNumEle
];
3886 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
3887 numEle
[x
] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
3888 MVT::i8
, Src
, DAG
.getConstant(x
, MVT::i32
));
3889 numEle
[x
] = DAG
.getSExtOrTrunc(numEle
[x
], DL
, MVT::i16
);
3890 numEle
[x
] = DAG
.getNode(ISD::AND
, DL
, MVT::i16
, numEle
[x
],
3891 DAG
.getConstant(0xFF, MVT::i16
));
3893 for (uint32_t x
= 1; x
< SrcNumEle
; x
+= 2) {
3894 numEle
[x
] = DAG
.getNode(ISD::SHL
, DL
, MVT::i16
, numEle
[x
],
3895 DAG
.getConstant(8, MVT::i16
));
3896 numEle
[x
- 1] = DAG
.getNode(ISD::OR
, DL
, MVT::i16
,
3897 numEle
[x
-1], numEle
[x
]);
3899 if (DstNumEle
> 1) {
3900 // If we are not a scalar i16, the only other case is a
3901 // v2i16 since we can't have v8i8 at this point, v4i16
3902 // cannot be generated
3903 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, MVT::v2i16
,
3905 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3906 getPointerTy(), DAG
.getConstant(1, MVT::i32
));
3907 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v2i16
,
3908 Dst
, numEle
[2], idx
);
3914 } else if (ScalarDstSize
== 8 && ScalarSrcSize
== 16) {
3916 SDValue
*numEle
= new SDValue
[DstNumEle
];
3917 for (uint32_t x
= 0; x
< SrcNumEle
; ++x
) {
3918 numEle
[x
* 2] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
,
3919 MVT::i16
, Src
, DAG
.getConstant(x
, MVT::i32
));
3920 numEle
[x
* 2 + 1] = DAG
.getNode(ISD::SRL
, DL
, MVT::i16
,
3921 numEle
[x
* 2], DAG
.getConstant(8, MVT::i16
));
3923 MVT ty
= (SrcNumEle
== 1) ? MVT::v2i16
: MVT::v4i16
;
3924 Dst
= DAG
.getNode(AMDILISD::VBUILD
, DL
, ty
, numEle
[0]);
3925 for (uint32_t x
= 1; x
< DstNumEle
; ++x
) {
3926 SDValue idx
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
,
3927 getPointerTy(), DAG
.getConstant(x
, MVT::i32
));
3928 Dst
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ty
,
3929 Dst
, numEle
[x
], idx
);
3932 ty
= (SrcNumEle
== 1) ? MVT::v2i8
: MVT::v4i8
;
3933 Res
= DAG
.getSExtOrTrunc(Dst
, DL
, ty
);
3938 Res
= DAG
.getNode(AMDILISD::BITCONV
,
3940 Dst
.getValueType(), Src
);
3945 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
3946 SelectionDAG
&DAG
) const
3948 SDValue Chain
= Op
.getOperand(0);
3949 SDValue Size
= Op
.getOperand(1);
3950 unsigned int SPReg
= AMDIL::SP
;
3951 DebugLoc DL
= Op
.getDebugLoc();
3952 SDValue SP
= DAG
.getCopyFromReg(Chain
,
3955 SDValue NewSP
= DAG
.getNode(ISD::ADD
,
3957 MVT::i32
, SP
, Size
);
3958 Chain
= DAG
.getCopyToReg(SP
.getValue(1),
3961 SDValue Ops
[2] = {NewSP
, Chain
};
3962 Chain
= DAG
.getMergeValues(Ops
, 2 ,DL
);
3966 AMDILTargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const
3968 SDValue Chain
= Op
.getOperand(0);
3969 SDValue Cond
= Op
.getOperand(1);
3970 SDValue Jump
= Op
.getOperand(2);
3972 Result
= DAG
.getNode(
3973 AMDILISD::BRANCH_COND
,
3981 AMDILTargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
3983 SDValue Chain
= Op
.getOperand(0);
3984 SDValue CC
= Op
.getOperand(1);
3985 SDValue LHS
= Op
.getOperand(2);
3986 SDValue RHS
= Op
.getOperand(3);
3987 SDValue JumpT
= Op
.getOperand(4);
3990 CmpValue
= DAG
.getNode(
3995 DAG
.getConstant(-1, MVT::i32
),
3996 DAG
.getConstant(0, MVT::i32
),
3998 Result
= DAG
.getNode(
3999 AMDILISD::BRANCH_COND
,
4000 CmpValue
.getDebugLoc(),
4007 AMDILTargetLowering::LowerFP_ROUND(SDValue Op
, SelectionDAG
&DAG
) const
4009 SDValue Result
= DAG
.getNode(
4019 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) const
4021 SDValue Result
= DAG
.getNode(
4029 // LowerRET - Lower an ISD::RET node.
4031 AMDILTargetLowering::LowerReturn(SDValue Chain
,
4032 CallingConv::ID CallConv
, bool isVarArg
,
4033 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
4034 const SmallVectorImpl
<SDValue
> &OutVals
,
4035 DebugLoc dl
, SelectionDAG
&DAG
)
4038 //MachineFunction& MF = DAG.getMachineFunction();
4039 // CCValAssign - represent the assignment of the return value
4041 SmallVector
<CCValAssign
, 16> RVLocs
;
4043 // CCState - Info about the registers and stack slot
4044 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
4045 getTargetMachine(), RVLocs
, *DAG
.getContext());
4047 // Analyze return values of ISD::RET
4048 CCInfo
.AnalyzeReturn(Outs
, RetCC_AMDIL32
);
4049 // If this is the first return lowered for this function, add
4050 // the regs to the liveout set for the function
4051 MachineRegisterInfo
&MRI
= DAG
.getMachineFunction().getRegInfo();
4052 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
4053 if (RVLocs
[i
].isRegLoc() && !MRI
.isLiveOut(RVLocs
[i
].getLocReg())) {
4054 MRI
.addLiveOut(RVLocs
[i
].getLocReg());
4057 // FIXME: implement this when tail call is implemented
4058 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4059 // both x86 and ppc implement this in ISelLowering
4061 // Regular return here
4063 SmallVector
<SDValue
, 6> RetOps
;
4064 RetOps
.push_back(Chain
);
4065 RetOps
.push_back(DAG
.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32
));
4066 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
4067 CCValAssign
&VA
= RVLocs
[i
];
4068 SDValue ValToCopy
= OutVals
[i
];
4069 assert(VA
.isRegLoc() && "Can only return in registers!");
4070 // ISD::Ret => ret chain, (regnum1, val1), ...
4071 // So i * 2 + 1 index only the regnums
4072 Chain
= DAG
.getCopyToReg(Chain
,
4077 // guarantee that all emitted copies are stuck together
4078 // avoiding something bad
4079 Flag
= Chain
.getValue(1);
4081 /*if (MF.getFunction()->hasStructRetAttr()) {
4082 assert(0 && "Struct returns are not yet implemented!");
4083 // Both MIPS and X86 have this
4087 RetOps
.push_back(Flag
);
4089 Flag
= DAG
.getNode(AMDILISD::RET_FLAG
,
4091 MVT::Other
, &RetOps
[0], RetOps
.size());
4096 AMDILTargetLowering::getFunctionAlignment(const Function
*) const
4102 AMDILTargetLowering::setPrivateData(MachineBasicBlock
*BB
,
4103 MachineBasicBlock::iterator
&BBI
,
4104 DebugLoc
*DL
, const TargetInstrInfo
*TII
) const
4112 AMDILTargetLowering::genVReg(uint32_t regType
) const
4114 return mBB
->getParent()->getRegInfo().createVirtualRegister(
4115 getTargetMachine().getRegisterInfo()->getRegClass(regType
));
4119 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
) const
4121 return BuildMI(*mBB
, mBBI
, *mDL
, mTII
->get(opcode
), dst
);
4125 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
4126 uint32_t src1
) const
4128 return generateMachineInst(opcode
, dst
).addReg(src1
);
4132 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
4133 uint32_t src1
, uint32_t src2
) const
4135 return generateMachineInst(opcode
, dst
, src1
).addReg(src2
);
4139 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
4140 uint32_t src1
, uint32_t src2
, uint32_t src3
) const
4142 return generateMachineInst(opcode
, dst
, src1
, src2
).addReg(src3
);
4147 AMDILTargetLowering::LowerSDIV24(SDValue Op
, SelectionDAG
&DAG
) const
4149 DebugLoc DL
= Op
.getDebugLoc();
4150 EVT OVT
= Op
.getValueType();
4151 SDValue LHS
= Op
.getOperand(0);
4152 SDValue RHS
= Op
.getOperand(1);
4155 if (!OVT
.isVector()) {
4158 } else if (OVT
.getVectorNumElements() == 2) {
4161 } else if (OVT
.getVectorNumElements() == 4) {
4165 unsigned bitsize
= OVT
.getScalarType().getSizeInBits();
4166 // char|short jq = ia ^ ib;
4167 SDValue jq
= DAG
.getNode(ISD::XOR
, DL
, OVT
, LHS
, RHS
);
4169 // jq = jq >> (bitsize - 2)
4170 jq
= DAG
.getNode(ISD::SRA
, DL
, OVT
, jq
, DAG
.getConstant(bitsize
- 2, OVT
));
4173 jq
= DAG
.getNode(ISD::OR
, DL
, OVT
, jq
, DAG
.getConstant(1, OVT
));
4176 jq
= DAG
.getSExtOrTrunc(jq
, DL
, INTTY
);
4178 // int ia = (int)LHS;
4179 SDValue ia
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
4181 // int ib, (int)RHS;
4182 SDValue ib
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
4184 // float fa = (float)ia;
4185 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
4187 // float fb = (float)ib;
4188 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
4190 // float fq = native_divide(fa, fb);
4191 SDValue fq
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
4194 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
4196 // float fqneg = -fq;
4197 SDValue fqneg
= DAG
.getNode(ISD::FNEG
, DL
, FLTTY
, fq
);
4199 // float fr = mad(fqneg, fb, fa);
4200 SDValue fr
= DAG
.getNode(AMDILISD::MAD
, DL
, FLTTY
, fqneg
, fb
, fa
);
4202 // int iq = (int)fq;
4203 SDValue iq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
4206 fr
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fr
);
4209 fb
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fb
);
4211 // int cv = fr >= fb;
4213 if (INTTY
== MVT::i32
) {
4214 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
4216 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
4218 // jq = (cv ? jq : 0);
4219 jq
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, cv
, jq
,
4220 DAG
.getConstant(0, OVT
));
4222 iq
= DAG
.getSExtOrTrunc(iq
, DL
, OVT
);
4223 iq
= DAG
.getNode(ISD::ADD
, DL
, OVT
, iq
, jq
);
4228 AMDILTargetLowering::LowerSDIV32(SDValue Op
, SelectionDAG
&DAG
) const
4230 DebugLoc DL
= Op
.getDebugLoc();
4231 EVT OVT
= Op
.getValueType();
4232 SDValue LHS
= Op
.getOperand(0);
4233 SDValue RHS
= Op
.getOperand(1);
4234 // The LowerSDIV32 function generates equivalent to the following IL.
4244 // ixor r10, r10, r11
4246 // ixor DST, r0, r10
4255 SDValue r10
= DAG
.getSelectCC(DL
,
4256 r0
, DAG
.getConstant(0, OVT
),
4257 DAG
.getConstant(-1, MVT::i32
),
4258 DAG
.getConstant(0, MVT::i32
),
4262 SDValue r11
= DAG
.getSelectCC(DL
,
4263 r1
, DAG
.getConstant(0, OVT
),
4264 DAG
.getConstant(-1, MVT::i32
),
4265 DAG
.getConstant(0, MVT::i32
),
4269 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
4272 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
4275 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
4278 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
4281 r0
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, r0
, r1
);
4283 // ixor r10, r10, r11
4284 r10
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r10
, r11
);
4287 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
4289 // ixor DST, r0, r10
4290 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
4295 AMDILTargetLowering::LowerSDIV64(SDValue Op
, SelectionDAG
&DAG
) const
4297 return SDValue(Op
.getNode(), 0);
4301 AMDILTargetLowering::LowerUDIV24(SDValue Op
, SelectionDAG
&DAG
) const
4303 DebugLoc DL
= Op
.getDebugLoc();
4304 EVT OVT
= Op
.getValueType();
4305 SDValue LHS
= Op
.getOperand(0);
4306 SDValue RHS
= Op
.getOperand(1);
4309 if (!OVT
.isVector()) {
4312 } else if (OVT
.getVectorNumElements() == 2) {
4315 } else if (OVT
.getVectorNumElements() == 4) {
4320 // The LowerUDIV24 function implements the following CL.
4321 // int ia = (int)LHS
4322 // float fa = (float)ia
4323 // int ib = (int)RHS
4324 // float fb = (float)ib
4325 // float fq = native_divide(fa, fb)
4327 // float t = mad(fq, fb, fb)
4328 // int iq = (int)fq - (t <= fa)
4331 // int ia = (int)LHS
4332 SDValue ia
= DAG
.getZExtOrTrunc(LHS
, DL
, INTTY
);
4334 // float fa = (float)ia
4335 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
4337 // int ib = (int)RHS
4338 SDValue ib
= DAG
.getZExtOrTrunc(RHS
, DL
, INTTY
);
4340 // float fb = (float)ib
4341 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
4343 // float fq = native_divide(fa, fb)
4344 SDValue fq
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
4347 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
4349 // float t = mad(fq, fb, fb)
4350 SDValue t
= DAG
.getNode(AMDILISD::MAD
, DL
, FLTTY
, fq
, fb
, fb
);
4352 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
4354 fq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
4355 if (INTTY
== MVT::i32
) {
4356 iq
= DAG
.getSetCC(DL
, INTTY
, t
, fa
, ISD::SETOLE
);
4358 iq
= DAG
.getSetCC(DL
, INTTY
, t
, fa
, ISD::SETOLE
);
4360 iq
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, fq
, iq
);
4364 iq
= DAG
.getZExtOrTrunc(iq
, DL
, OVT
);
4370 AMDILTargetLowering::LowerUDIV32(SDValue Op
, SelectionDAG
&DAG
) const
4372 return SDValue(Op
.getNode(), 0);
4376 AMDILTargetLowering::LowerUDIV64(SDValue Op
, SelectionDAG
&DAG
) const
4378 return SDValue(Op
.getNode(), 0);
4381 AMDILTargetLowering::LowerSREM8(SDValue Op
, SelectionDAG
&DAG
) const
4383 DebugLoc DL
= Op
.getDebugLoc();
4384 EVT OVT
= Op
.getValueType();
4385 MVT INTTY
= MVT::i32
;
4386 if (OVT
== MVT::v2i8
) {
4388 } else if (OVT
== MVT::v4i8
) {
4391 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
4392 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
4393 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
4394 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
4399 AMDILTargetLowering::LowerSREM16(SDValue Op
, SelectionDAG
&DAG
) const
4401 DebugLoc DL
= Op
.getDebugLoc();
4402 EVT OVT
= Op
.getValueType();
4403 MVT INTTY
= MVT::i32
;
4404 if (OVT
== MVT::v2i16
) {
4406 } else if (OVT
== MVT::v4i16
) {
4409 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
4410 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
4411 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
4412 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
4417 AMDILTargetLowering::LowerSREM32(SDValue Op
, SelectionDAG
&DAG
) const
4419 DebugLoc DL
= Op
.getDebugLoc();
4420 EVT OVT
= Op
.getValueType();
4421 SDValue LHS
= Op
.getOperand(0);
4422 SDValue RHS
= Op
.getOperand(1);
4423 // The LowerSREM32 function generates equivalent to the following IL.
4433 // umul r20, r20, r1
4436 // ixor DST, r0, r10
4445 SDValue r10
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
4446 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
4447 r0
, DAG
.getConstant(0, OVT
));
4450 SDValue r11
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
4451 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
4452 r1
, DAG
.getConstant(0, OVT
));
4455 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
4458 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
4461 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
4464 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
4467 SDValue r20
= DAG
.getNode(ISD::UREM
, DL
, OVT
, r0
, r1
);
4469 // umul r20, r20, r1
4470 r20
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r20
, r1
);
4473 r0
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r0
, r20
);
4476 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
4478 // ixor DST, r0, r10
4479 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
4484 AMDILTargetLowering::LowerSREM64(SDValue Op
, SelectionDAG
&DAG
) const
4486 return SDValue(Op
.getNode(), 0);
4490 AMDILTargetLowering::LowerUREM8(SDValue Op
, SelectionDAG
&DAG
) const
4492 DebugLoc DL
= Op
.getDebugLoc();
4493 EVT OVT
= Op
.getValueType();
4494 MVT INTTY
= MVT::i32
;
4495 if (OVT
== MVT::v2i8
) {
4497 } else if (OVT
== MVT::v4i8
) {
4500 SDValue LHS
= Op
.getOperand(0);
4501 SDValue RHS
= Op
.getOperand(1);
4502 // The LowerUREM8 function generates equivalent to the following IL.
4503 // mov r0, as_u32(LHS)
4504 // mov r1, as_u32(RHS)
4505 // and r10, r0, 0xFF
4506 // and r11, r1, 0xFF
4507 // cmov_logical r3, r11, r11, 0x1
4509 // cmov_logical r3, r11, r3, 0
4512 // and as_u8(DST), r3, 0xFF
4514 // mov r0, as_u32(LHS)
4515 SDValue r0
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
4517 // mov r1, as_u32(RHS)
4518 SDValue r1
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
4520 // and r10, r0, 0xFF
4521 SDValue r10
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r0
,
4522 DAG
.getConstant(0xFF, INTTY
));
4524 // and r11, r1, 0xFF
4525 SDValue r11
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r1
,
4526 DAG
.getConstant(0xFF, INTTY
));
4528 // cmov_logical r3, r11, r11, 0x1
4529 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, r11
, r11
,
4530 DAG
.getConstant(0x01, INTTY
));
4533 r3
= DAG
.getNode(ISD::UREM
, DL
, INTTY
, r10
, r3
);
4535 // cmov_logical r3, r11, r3, 0
4536 r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, r11
, r3
,
4537 DAG
.getConstant(0, INTTY
));
4540 r3
= DAG
.getNode(AMDILISD::UMUL
, DL
, INTTY
, r3
, r11
);
4543 r3
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, r10
, r3
);
4545 // and as_u8(DST), r3, 0xFF
4546 SDValue DST
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r3
,
4547 DAG
.getConstant(0xFF, INTTY
));
4548 DST
= DAG
.getZExtOrTrunc(DST
, DL
, OVT
);
4553 AMDILTargetLowering::LowerUREM16(SDValue Op
, SelectionDAG
&DAG
) const
4555 DebugLoc DL
= Op
.getDebugLoc();
4556 EVT OVT
= Op
.getValueType();
4557 MVT INTTY
= MVT::i32
;
4558 if (OVT
== MVT::v2i16
) {
4560 } else if (OVT
== MVT::v4i16
) {
4563 SDValue LHS
= Op
.getOperand(0);
4564 SDValue RHS
= Op
.getOperand(1);
4565 // The LowerUREM16 function generatest equivalent to the following IL.
4568 // DIV = LowerUDIV16(LHS, RHS)
4569 // and r10, r0, 0xFFFF
4570 // and r11, r1, 0xFFFF
4571 // cmov_logical r3, r11, r11, 0x1
4572 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4573 // and r3, r3, 0xFFFF
4574 // cmov_logical r3, r11, r3, 0
4577 // and DST, r3, 0xFFFF
4585 // and r10, r0, 0xFFFF
4586 SDValue r10
= DAG
.getNode(ISD::AND
, DL
, OVT
, r0
,
4587 DAG
.getConstant(0xFFFF, OVT
));
4589 // and r11, r1, 0xFFFF
4590 SDValue r11
= DAG
.getNode(ISD::AND
, DL
, OVT
, r1
,
4591 DAG
.getConstant(0xFFFF, OVT
));
4593 // cmov_logical r3, r11, r11, 0x1
4594 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r11
, r11
,
4595 DAG
.getConstant(0x01, OVT
));
4597 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4598 r10
= DAG
.getZExtOrTrunc(r10
, DL
, INTTY
);
4599 r3
= DAG
.getZExtOrTrunc(r3
, DL
, INTTY
);
4600 r3
= DAG
.getNode(ISD::UREM
, DL
, INTTY
, r10
, r3
);
4601 r3
= DAG
.getZExtOrTrunc(r3
, DL
, OVT
);
4602 r10
= DAG
.getZExtOrTrunc(r10
, DL
, OVT
);
4604 // and r3, r3, 0xFFFF
4605 r3
= DAG
.getNode(ISD::AND
, DL
, OVT
, r3
,
4606 DAG
.getConstant(0xFFFF, OVT
));
4608 // cmov_logical r3, r11, r3, 0
4609 r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r11
, r3
,
4610 DAG
.getConstant(0, OVT
));
4612 r3
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r3
, r11
);
4615 r3
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r10
, r3
);
4617 // and DST, r3, 0xFFFF
4618 SDValue DST
= DAG
.getNode(ISD::AND
, DL
, OVT
, r3
,
4619 DAG
.getConstant(0xFFFF, OVT
));
4624 AMDILTargetLowering::LowerUREM32(SDValue Op
, SelectionDAG
&DAG
) const
4626 DebugLoc DL
= Op
.getDebugLoc();
4627 EVT OVT
= Op
.getValueType();
4628 SDValue LHS
= Op
.getOperand(0);
4629 SDValue RHS
= Op
.getOperand(1);
4630 // The LowerUREM32 function generates equivalent to the following IL.
4631 // udiv r20, LHS, RHS
4632 // umul r20, r20, RHS
4633 // sub DST, LHS, r20
4635 // udiv r20, LHS, RHS
4636 SDValue r20
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, LHS
, RHS
);
4638 // umul r20, r20, RHS
4639 r20
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r20
, RHS
);
4641 // sub DST, LHS, r20
4642 SDValue DST
= DAG
.getNode(ISD::SUB
, DL
, OVT
, LHS
, r20
);
4647 AMDILTargetLowering::LowerUREM64(SDValue Op
, SelectionDAG
&DAG
) const
4649 return SDValue(Op
.getNode(), 0);
4654 AMDILTargetLowering::LowerFDIV32(SDValue Op
, SelectionDAG
&DAG
) const
4656 DebugLoc DL
= Op
.getDebugLoc();
4657 EVT OVT
= Op
.getValueType();
4658 MVT INTTY
= MVT::i32
;
4659 if (OVT
== MVT::v2f32
) {
4661 } else if (OVT
== MVT::v4f32
) {
4664 SDValue LHS
= Op
.getOperand(0);
4665 SDValue RHS
= Op
.getOperand(1);
4667 const AMDILSubtarget
*stm
= reinterpret_cast<const AMDILTargetMachine
*>(
4668 &this->getTargetMachine())->getSubtargetImpl();
4669 if (stm
->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
4670 // TODO: This doesn't work for vector types yet
4671 // The LowerFDIV32 function generates equivalent to the following
4673 // mov r20, as_int(LHS)
4674 // mov r21, as_int(RHS)
4675 // and r30, r20, 0x7f800000
4676 // and r31, r20, 0x807FFFFF
4677 // and r32, r21, 0x7f800000
4678 // and r33, r21, 0x807FFFFF
4679 // ieq r40, r30, 0x7F800000
4680 // ieq r41, r31, 0x7F800000
4683 // and r50, r20, 0x80000000
4684 // and r51, r21, 0x80000000
4685 // ior r32, r32, 0x3f800000
4686 // ior r33, r33, 0x3f800000
4687 // cmov_logical r32, r42, r50, r32
4688 // cmov_logical r33, r43, r51, r33
4689 // cmov_logical r32, r40, r20, r32
4690 // cmov_logical r33, r41, r21, r33
4691 // ior r50, r40, r41
4692 // ior r51, r42, r43
4693 // ior r50, r50, r51
4695 // iadd r30, r30, r52
4696 // cmov_logical r30, r50, 0, r30
4697 // div_zeroop(infinity) r21, 1.0, r33
4698 // mul_ieee r20, r32, r21
4699 // and r22, r20, 0x7FFFFFFF
4700 // and r23, r20, 0x80000000
4701 // ishr r60, r22, 0x00000017
4702 // ishr r61, r30, 0x00000017
4703 // iadd r20, r20, r30
4704 // iadd r21, r22, r30
4705 // iadd r60, r60, r61
4707 // ior r41, r23, 0x7F800000
4708 // ige r40, r60, 0x000000FF
4709 // cmov_logical r40, r50, 0, r40
4710 // cmov_logical r20, r42, r23, r20
4711 // cmov_logical DST, r40, r41, r20
4714 // mov r20, as_int(LHS)
4715 SDValue R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, LHS
);
4717 // mov r21, as_int(RHS)
4718 SDValue R21
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, RHS
);
4720 // and r30, r20, 0x7f800000
4721 SDValue R30
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4722 DAG
.getConstant(0x7F800000, INTTY
));
4724 // and r31, r21, 0x7f800000
4725 SDValue R31
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
4726 DAG
.getConstant(0x7f800000, INTTY
));
4728 // and r32, r20, 0x807FFFFF
4729 SDValue R32
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4730 DAG
.getConstant(0x807FFFFF, INTTY
));
4732 // and r33, r21, 0x807FFFFF
4733 SDValue R33
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
4734 DAG
.getConstant(0x807FFFFF, INTTY
));
4736 // ieq r40, r30, 0x7F800000
4737 SDValue R40
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4738 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
4739 R30
, DAG
.getConstant(0x7F800000, INTTY
));
4741 // ieq r41, r31, 0x7F800000
4742 SDValue R41
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4743 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
4744 R31
, DAG
.getConstant(0x7F800000, INTTY
));
4747 SDValue R42
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4748 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
4749 R30
, DAG
.getConstant(0, INTTY
));
4752 SDValue R43
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4753 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
4754 R31
, DAG
.getConstant(0, INTTY
));
4756 // and r50, r20, 0x80000000
4757 SDValue R50
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4758 DAG
.getConstant(0x80000000, INTTY
));
4760 // and r51, r21, 0x80000000
4761 SDValue R51
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
4762 DAG
.getConstant(0x80000000, INTTY
));
4764 // ior r32, r32, 0x3f800000
4765 R32
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R32
,
4766 DAG
.getConstant(0x3F800000, INTTY
));
4768 // ior r33, r33, 0x3f800000
4769 R33
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R33
,
4770 DAG
.getConstant(0x3F800000, INTTY
));
4772 // cmov_logical r32, r42, r50, r32
4773 R32
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R42
, R50
, R32
);
4775 // cmov_logical r33, r43, r51, r33
4776 R33
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R43
, R51
, R33
);
4778 // cmov_logical r32, r40, r20, r32
4779 R32
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R40
, R20
, R32
);
4781 // cmov_logical r33, r41, r21, r33
4782 R33
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R41
, R21
, R33
);
4784 // ior r50, r40, r41
4785 R50
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R40
, R41
);
4787 // ior r51, r42, r43
4788 R51
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R42
, R43
);
4790 // ior r50, r50, r51
4791 R50
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R50
, R51
);
4794 SDValue R52
= DAG
.getNode(AMDILISD::INEGATE
, DL
, INTTY
, R31
);
4796 // iadd r30, r30, r52
4797 R30
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R30
, R52
);
4799 // cmov_logical r30, r50, 0, r30
4800 R30
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R50
,
4801 DAG
.getConstant(0, INTTY
), R30
);
4803 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4804 R33
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R33
);
4805 R21
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
,
4806 DAG
.getConstantFP(1.0f
, OVT
), R33
);
4808 // mul_ieee as_int(r20), as_float(r32), r21
4809 R32
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R32
);
4810 R20
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, R32
, R21
);
4811 R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, R20
);
4813 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4814 R33
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R33
);
4815 R21
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
,
4816 DAG
.getConstantFP(1.0f
, OVT
), R33
);
4818 // mul_ieee as_int(r20), as_float(r32), r21
4819 R32
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R32
);
4820 R20
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, R32
, R21
);
4821 R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, R20
);
4823 // and r22, r20, 0x7FFFFFFF
4824 SDValue R22
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4825 DAG
.getConstant(0x7FFFFFFF, INTTY
));
4827 // and r23, r20, 0x80000000
4828 SDValue R23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4829 DAG
.getConstant(0x80000000, INTTY
));
4831 // ishr r60, r22, 0x00000017
4832 SDValue R60
= DAG
.getNode(ISD::SRA
, DL
, INTTY
, R22
,
4833 DAG
.getConstant(0x00000017, INTTY
));
4835 // ishr r61, r30, 0x00000017
4836 SDValue R61
= DAG
.getNode(ISD::SRA
, DL
, INTTY
, R30
,
4837 DAG
.getConstant(0x00000017, INTTY
));
4839 // iadd r20, r20, r30
4840 R20
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R20
, R30
);
4842 // iadd r21, r22, r30
4843 R21
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R22
, R30
);
4845 // iadd r60, r60, r61
4846 R60
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R60
, R61
);
4849 R42
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4850 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
4851 DAG
.getConstant(0, INTTY
),
4854 // ior r41, r23, 0x7F800000
4855 R41
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R23
,
4856 DAG
.getConstant(0x7F800000, INTTY
));
4858 // ige r40, r60, 0x000000FF
4859 R40
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4860 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
4862 DAG
.getConstant(0x0000000FF, INTTY
));
4864 // cmov_logical r40, r50, 0, r40
4865 R40
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R50
,
4866 DAG
.getConstant(0, INTTY
),
4869 // cmov_logical r20, r42, r23, r20
4870 R20
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R42
, R23
, R20
);
4872 // cmov_logical DST, r40, r41, r20
4873 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R40
, R41
, R20
);
4876 DST
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, DST
);
4878 // The following sequence of DAG nodes produce the following IL:
4880 // lt r2, 0x1.0p+96f, r1
4881 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4882 // mul_ieee r1, RHS, r3
4883 // div_zeroop(infinity) r0, LHS, r1
4884 // mul_ieee DST, r0, r3
4887 SDValue r1
= DAG
.getNode(ISD::FABS
, DL
, OVT
, RHS
);
4888 // lt r2, 0x1.0p+96f, r1
4889 SDValue r2
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
4890 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::f32
), MVT::i32
),
4891 DAG
.getConstant(0x6f800000, INTTY
), r1
);
4892 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4893 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r2
,
4894 DAG
.getConstant(0x2f800000, INTTY
),
4895 DAG
.getConstant(0x3f800000, INTTY
));
4896 // mul_ieee r1, RHS, r3
4897 r1
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, RHS
, r3
);
4898 // div_zeroop(infinity) r0, LHS, r1
4899 SDValue r0
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
, LHS
, r1
);
4900 // mul_ieee DST, r0, r3
4901 DST
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, r0
, r3
);
4907 AMDILTargetLowering::LowerFDIV64(SDValue Op
, SelectionDAG
&DAG
) const
4909 return SDValue(Op
.getNode(), 0);