fb33583542529ec6fc4dd82ed638594b96834df6
1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILRegisterInfo.h"
19 #include "AMDILSubtarget.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetOptions.h"
36 #define ISDBITCAST ISD::BITCAST
37 #define MVTGLUE MVT::Glue
38 //===----------------------------------------------------------------------===//
39 // Calling Convention Implementation
40 //===----------------------------------------------------------------------===//
41 #include "AMDGPUGenCallingConv.inc"
43 //===----------------------------------------------------------------------===//
44 // TargetLowering Implementation Help Functions Begin
45 //===----------------------------------------------------------------------===//
47 getConversionNode(SelectionDAG
&DAG
, SDValue
& Src
, SDValue
& Dst
, bool asType
)
49 DebugLoc DL
= Src
.getDebugLoc();
50 EVT svt
= Src
.getValueType().getScalarType();
51 EVT dvt
= Dst
.getValueType().getScalarType();
52 if (svt
.isFloatingPoint() && dvt
.isFloatingPoint()) {
53 if (dvt
.bitsGT(svt
)) {
54 Src
= DAG
.getNode(ISD::FP_EXTEND
, DL
, dvt
, Src
);
55 } else if (svt
.bitsLT(svt
)) {
56 Src
= DAG
.getNode(ISD::FP_ROUND
, DL
, dvt
, Src
,
57 DAG
.getConstant(1, MVT::i32
));
59 } else if (svt
.isInteger() && dvt
.isInteger()) {
60 if (!svt
.bitsEq(dvt
)) {
61 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
63 } else if (svt
.isInteger()) {
64 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::SINT_TO_FP
;
65 if (!svt
.bitsEq(dvt
)) {
66 if (dvt
.getSimpleVT().SimpleTy
== MVT::f32
) {
67 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i32
);
68 } else if (dvt
.getSimpleVT().SimpleTy
== MVT::f64
) {
69 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i64
);
71 assert(0 && "We only support 32 and 64bit fp types");
74 Src
= DAG
.getNode(opcode
, DL
, dvt
, Src
);
75 } else if (dvt
.isInteger()) {
76 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::FP_TO_SINT
;
77 if (svt
.getSimpleVT().SimpleTy
== MVT::f32
) {
78 Src
= DAG
.getNode(opcode
, DL
, MVT::i32
, Src
);
79 } else if (svt
.getSimpleVT().SimpleTy
== MVT::f64
) {
80 Src
= DAG
.getNode(opcode
, DL
, MVT::i64
, Src
);
82 assert(0 && "We only support 32 and 64bit fp types");
84 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
88 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
90 static AMDILCC::CondCodes
91 CondCCodeToCC(ISD::CondCode CC
, const MVT::SimpleValueType
& type
)
96 errs()<<"Condition Code: "<< (unsigned int)CC
<<"\n";
97 assert(0 && "Unknown condition code!");
102 return AMDILCC::IL_CC_F_O
;
104 return AMDILCC::IL_CC_D_O
;
106 assert(0 && "Opcode combination not generated correctly!");
107 return AMDILCC::COND_ERROR
;
112 return AMDILCC::IL_CC_F_UO
;
114 return AMDILCC::IL_CC_D_UO
;
116 assert(0 && "Opcode combination not generated correctly!");
117 return AMDILCC::COND_ERROR
;
125 return AMDILCC::IL_CC_I_GT
;
127 return AMDILCC::IL_CC_F_GT
;
129 return AMDILCC::IL_CC_D_GT
;
131 return AMDILCC::IL_CC_L_GT
;
133 assert(0 && "Opcode combination not generated correctly!");
134 return AMDILCC::COND_ERROR
;
142 return AMDILCC::IL_CC_I_GE
;
144 return AMDILCC::IL_CC_F_GE
;
146 return AMDILCC::IL_CC_D_GE
;
148 return AMDILCC::IL_CC_L_GE
;
150 assert(0 && "Opcode combination not generated correctly!");
151 return AMDILCC::COND_ERROR
;
159 return AMDILCC::IL_CC_I_LT
;
161 return AMDILCC::IL_CC_F_LT
;
163 return AMDILCC::IL_CC_D_LT
;
165 return AMDILCC::IL_CC_L_LT
;
167 assert(0 && "Opcode combination not generated correctly!");
168 return AMDILCC::COND_ERROR
;
176 return AMDILCC::IL_CC_I_LE
;
178 return AMDILCC::IL_CC_F_LE
;
180 return AMDILCC::IL_CC_D_LE
;
182 return AMDILCC::IL_CC_L_LE
;
184 assert(0 && "Opcode combination not generated correctly!");
185 return AMDILCC::COND_ERROR
;
193 return AMDILCC::IL_CC_I_NE
;
195 return AMDILCC::IL_CC_F_NE
;
197 return AMDILCC::IL_CC_D_NE
;
199 return AMDILCC::IL_CC_L_NE
;
201 assert(0 && "Opcode combination not generated correctly!");
202 return AMDILCC::COND_ERROR
;
210 return AMDILCC::IL_CC_I_EQ
;
212 return AMDILCC::IL_CC_F_EQ
;
214 return AMDILCC::IL_CC_D_EQ
;
216 return AMDILCC::IL_CC_L_EQ
;
218 assert(0 && "Opcode combination not generated correctly!");
219 return AMDILCC::COND_ERROR
;
227 return AMDILCC::IL_CC_U_GT
;
229 return AMDILCC::IL_CC_F_UGT
;
231 return AMDILCC::IL_CC_D_UGT
;
233 return AMDILCC::IL_CC_UL_GT
;
235 assert(0 && "Opcode combination not generated correctly!");
236 return AMDILCC::COND_ERROR
;
244 return AMDILCC::IL_CC_U_GE
;
246 return AMDILCC::IL_CC_F_UGE
;
248 return AMDILCC::IL_CC_D_UGE
;
250 return AMDILCC::IL_CC_UL_GE
;
252 assert(0 && "Opcode combination not generated correctly!");
253 return AMDILCC::COND_ERROR
;
261 return AMDILCC::IL_CC_U_LT
;
263 return AMDILCC::IL_CC_F_ULT
;
265 return AMDILCC::IL_CC_D_ULT
;
267 return AMDILCC::IL_CC_UL_LT
;
269 assert(0 && "Opcode combination not generated correctly!");
270 return AMDILCC::COND_ERROR
;
278 return AMDILCC::IL_CC_U_LE
;
280 return AMDILCC::IL_CC_F_ULE
;
282 return AMDILCC::IL_CC_D_ULE
;
284 return AMDILCC::IL_CC_UL_LE
;
286 assert(0 && "Opcode combination not generated correctly!");
287 return AMDILCC::COND_ERROR
;
295 return AMDILCC::IL_CC_U_NE
;
297 return AMDILCC::IL_CC_F_UNE
;
299 return AMDILCC::IL_CC_D_UNE
;
301 return AMDILCC::IL_CC_UL_NE
;
303 assert(0 && "Opcode combination not generated correctly!");
304 return AMDILCC::COND_ERROR
;
312 return AMDILCC::IL_CC_U_EQ
;
314 return AMDILCC::IL_CC_F_UEQ
;
316 return AMDILCC::IL_CC_D_UEQ
;
318 return AMDILCC::IL_CC_UL_EQ
;
320 assert(0 && "Opcode combination not generated correctly!");
321 return AMDILCC::COND_ERROR
;
326 return AMDILCC::IL_CC_F_OGT
;
328 return AMDILCC::IL_CC_D_OGT
;
335 assert(0 && "Opcode combination not generated correctly!");
336 return AMDILCC::COND_ERROR
;
341 return AMDILCC::IL_CC_F_OGE
;
343 return AMDILCC::IL_CC_D_OGE
;
350 assert(0 && "Opcode combination not generated correctly!");
351 return AMDILCC::COND_ERROR
;
356 return AMDILCC::IL_CC_F_OLT
;
358 return AMDILCC::IL_CC_D_OLT
;
365 assert(0 && "Opcode combination not generated correctly!");
366 return AMDILCC::COND_ERROR
;
371 return AMDILCC::IL_CC_F_OLE
;
373 return AMDILCC::IL_CC_D_OLE
;
380 assert(0 && "Opcode combination not generated correctly!");
381 return AMDILCC::COND_ERROR
;
386 return AMDILCC::IL_CC_F_ONE
;
388 return AMDILCC::IL_CC_D_ONE
;
395 assert(0 && "Opcode combination not generated correctly!");
396 return AMDILCC::COND_ERROR
;
401 return AMDILCC::IL_CC_F_OEQ
;
403 return AMDILCC::IL_CC_D_OEQ
;
410 assert(0 && "Opcode combination not generated correctly!");
411 return AMDILCC::COND_ERROR
;
417 AMDILTargetLowering::LowerMemArgument(
419 CallingConv::ID CallConv
,
420 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
421 DebugLoc dl
, SelectionDAG
&DAG
,
422 const CCValAssign
&VA
,
423 MachineFrameInfo
*MFI
,
426 // Create the nodes corresponding to a load from this parameter slot.
427 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
429 bool AlwaysUseMutable
= (CallConv
==CallingConv::Fast
) &&
430 getTargetMachine().Options
.GuaranteedTailCallOpt
;
431 bool isImmutable
= !AlwaysUseMutable
&& !Flags
.isByVal();
433 // FIXME: For now, all byval parameter objects are marked mutable. This can
434 // be changed with more analysis.
435 // In case of tail call optimization mark all arguments mutable. Since they
436 // could be overwritten by lowering of arguments in case of a tail call.
437 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
438 VA
.getLocMemOffset(), isImmutable
);
439 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
443 return DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
444 MachinePointerInfo::getFixedStack(FI
),
445 false, false, false, 0);
447 //===----------------------------------------------------------------------===//
448 // TargetLowering Implementation Help Functions End
449 //===----------------------------------------------------------------------===//
451 //===----------------------------------------------------------------------===//
452 // TargetLowering Class Implementation Begins
453 //===----------------------------------------------------------------------===//
454 AMDILTargetLowering::AMDILTargetLowering(TargetMachine
&TM
)
455 : TargetLowering(TM
, new TargetLoweringObjectFileELF())
504 size_t numTypes
= sizeof(types
) / sizeof(*types
);
505 size_t numFloatTypes
= sizeof(FloatTypes
) / sizeof(*FloatTypes
);
506 size_t numIntTypes
= sizeof(IntTypes
) / sizeof(*IntTypes
);
507 size_t numVectorTypes
= sizeof(VectorTypes
) / sizeof(*VectorTypes
);
509 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
510 // These are the current register classes that are
513 for (unsigned int x
= 0; x
< numTypes
; ++x
) {
514 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)types
[x
];
516 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
517 // We cannot sextinreg, expand to shifts
518 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Custom
);
519 setOperationAction(ISD::SUBE
, VT
, Expand
);
520 setOperationAction(ISD::SUBC
, VT
, Expand
);
521 setOperationAction(ISD::ADDE
, VT
, Expand
);
522 setOperationAction(ISD::ADDC
, VT
, Expand
);
523 setOperationAction(ISD::SETCC
, VT
, Custom
);
524 setOperationAction(ISD::BRCOND
, VT
, Custom
);
525 setOperationAction(ISD::BR_CC
, VT
, Custom
);
526 setOperationAction(ISD::BR_JT
, VT
, Expand
);
527 setOperationAction(ISD::BRIND
, VT
, Expand
);
528 // TODO: Implement custom UREM/SREM routines
529 setOperationAction(ISD::SREM
, VT
, Expand
);
530 setOperationAction(ISD::GlobalAddress
, VT
, Custom
);
531 setOperationAction(ISD::JumpTable
, VT
, Custom
);
532 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
533 setOperationAction(ISD::SELECT
, VT
, Custom
);
534 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
535 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
536 if (VT
!= MVT::i64
&& VT
!= MVT::v2i64
) {
537 setOperationAction(ISD::SDIV
, VT
, Custom
);
540 for (unsigned int x
= 0; x
< numFloatTypes
; ++x
) {
541 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)FloatTypes
[x
];
543 // IL does not have these operations for floating point types
544 setOperationAction(ISD::FP_ROUND_INREG
, VT
, Expand
);
545 setOperationAction(ISD::SETOLT
, VT
, Expand
);
546 setOperationAction(ISD::SETOGE
, VT
, Expand
);
547 setOperationAction(ISD::SETOGT
, VT
, Expand
);
548 setOperationAction(ISD::SETOLE
, VT
, Expand
);
549 setOperationAction(ISD::SETULT
, VT
, Expand
);
550 setOperationAction(ISD::SETUGE
, VT
, Expand
);
551 setOperationAction(ISD::SETUGT
, VT
, Expand
);
552 setOperationAction(ISD::SETULE
, VT
, Expand
);
555 for (unsigned int x
= 0; x
< numIntTypes
; ++x
) {
556 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)IntTypes
[x
];
558 // GPU also does not have divrem function for signed or unsigned
559 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
561 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
562 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
563 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
565 // GPU doesn't have a rotl, rotr, or byteswap instruction
566 setOperationAction(ISD::ROTR
, VT
, Expand
);
567 setOperationAction(ISD::BSWAP
, VT
, Expand
);
569 // GPU doesn't have any counting operators
570 setOperationAction(ISD::CTPOP
, VT
, Expand
);
571 setOperationAction(ISD::CTTZ
, VT
, Expand
);
572 setOperationAction(ISD::CTLZ
, VT
, Expand
);
575 for ( unsigned int ii
= 0; ii
< numVectorTypes
; ++ii
)
577 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)VectorTypes
[ii
];
579 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
580 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Expand
);
581 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
582 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
583 // setOperationAction(ISD::VSETCC, VT, Expand);
584 setOperationAction(ISD::SETCC
, VT
, Expand
);
585 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
586 setOperationAction(ISD::SELECT
, VT
, Expand
);
589 if (STM
.device()->isSupported(AMDILDeviceInfo::LongOps
)) {
590 setOperationAction(ISD::MULHU
, MVT::i64
, Expand
);
591 setOperationAction(ISD::MULHU
, MVT::v2i64
, Expand
);
592 setOperationAction(ISD::MULHS
, MVT::i64
, Expand
);
593 setOperationAction(ISD::MULHS
, MVT::v2i64
, Expand
);
594 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
595 setOperationAction(ISD::SREM
, MVT::v2i64
, Expand
);
596 setOperationAction(ISD::Constant
, MVT::i64
, Legal
);
597 setOperationAction(ISD::SDIV
, MVT::v2i64
, Expand
);
598 setOperationAction(ISD::TRUNCATE
, MVT::v2i64
, Expand
);
599 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2i64
, Expand
);
600 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2i64
, Expand
);
601 setOperationAction(ISD::ANY_EXTEND
, MVT::v2i64
, Expand
);
603 if (STM
.device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
604 // we support loading/storing v2f64 but not operations on the type
605 setOperationAction(ISD::FADD
, MVT::v2f64
, Expand
);
606 setOperationAction(ISD::FSUB
, MVT::v2f64
, Expand
);
607 setOperationAction(ISD::FMUL
, MVT::v2f64
, Expand
);
608 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v2f64
, Expand
);
609 setOperationAction(ISD::FP_EXTEND
, MVT::v2f64
, Expand
);
610 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
611 // We want to expand vector conversions into their scalar
613 setOperationAction(ISD::TRUNCATE
, MVT::v2f64
, Expand
);
614 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2f64
, Expand
);
615 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2f64
, Expand
);
616 setOperationAction(ISD::ANY_EXTEND
, MVT::v2f64
, Expand
);
617 setOperationAction(ISD::FABS
, MVT::f64
, Expand
);
618 setOperationAction(ISD::FABS
, MVT::v2f64
, Expand
);
620 // TODO: Fix the UDIV24 algorithm so it works for these
621 // types correctly. This needs vector comparisons
622 // for this to work correctly.
623 setOperationAction(ISD::UDIV
, MVT::v2i8
, Expand
);
624 setOperationAction(ISD::UDIV
, MVT::v4i8
, Expand
);
625 setOperationAction(ISD::UDIV
, MVT::v2i16
, Expand
);
626 setOperationAction(ISD::UDIV
, MVT::v4i16
, Expand
);
627 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Custom
);
628 setOperationAction(ISD::SUBC
, MVT::Other
, Expand
);
629 setOperationAction(ISD::ADDE
, MVT::Other
, Expand
);
630 setOperationAction(ISD::ADDC
, MVT::Other
, Expand
);
631 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
632 setOperationAction(ISD::BR_CC
, MVT::Other
, Custom
);
633 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
634 setOperationAction(ISD::BRIND
, MVT::Other
, Expand
);
635 setOperationAction(ISD::SETCC
, MVT::Other
, Custom
);
636 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
638 setOperationAction(ISD::BUILD_VECTOR
, MVT::Other
, Custom
);
639 // Use the default implementation.
640 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
641 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
642 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
643 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
644 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
645 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
646 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
647 setOperationAction(ISD::Constant
, MVT::i32
, Legal
);
648 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
650 setStackPointerRegisterToSaveRestore(AMDGPU::SP
);
651 setSchedulingPreference(Sched::RegPressure
);
652 setPow2DivIsCheap(false);
653 setPrefLoopAlignment(16);
654 setSelectIsExpensive(true);
655 setJumpIsExpensive(true);
657 maxStoresPerMemcpy
= 4096;
658 maxStoresPerMemmove
= 4096;
659 maxStoresPerMemset
= 4096;
663 #undef numVectorTypes
668 AMDILTargetLowering::getTargetNodeName(unsigned Opcode
) const
672 case AMDILISD::CMOVLOG
: return "AMDILISD::CMOVLOG";
673 case AMDILISD::MAD
: return "AMDILISD::MAD";
674 case AMDILISD::CALL
: return "AMDILISD::CALL";
675 case AMDILISD::SELECT_CC
: return "AMDILISD::SELECT_CC";
676 case AMDILISD::UMUL
: return "AMDILISD::UMUL";
677 case AMDILISD::DIV_INF
: return "AMDILISD::DIV_INF";
678 case AMDILISD::VBUILD
: return "AMDILISD::VBUILD";
679 case AMDILISD::CMP
: return "AMDILISD::CMP";
680 case AMDILISD::IL_CC_I_LT
: return "AMDILISD::IL_CC_I_LT";
681 case AMDILISD::IL_CC_I_LE
: return "AMDILISD::IL_CC_I_LE";
682 case AMDILISD::IL_CC_I_GT
: return "AMDILISD::IL_CC_I_GT";
683 case AMDILISD::IL_CC_I_GE
: return "AMDILISD::IL_CC_I_GE";
684 case AMDILISD::IL_CC_I_EQ
: return "AMDILISD::IL_CC_I_EQ";
685 case AMDILISD::IL_CC_I_NE
: return "AMDILISD::IL_CC_I_NE";
686 case AMDILISD::RET_FLAG
: return "AMDILISD::RET_FLAG";
687 case AMDILISD::BRANCH_COND
: return "AMDILISD::BRANCH_COND";
692 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
693 const CallInst
&I
, unsigned Intrinsic
) const
697 // The backend supports 32 and 64 bit floating point immediates
699 AMDILTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const
701 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
702 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
710 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT
) const
712 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
713 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
721 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
722 // be zero. Op is expected to be a target specific node. Used by DAG
726 AMDILTargetLowering::computeMaskedBitsForTargetNode(
730 const SelectionDAG
&DAG
,
731 unsigned Depth
) const
735 KnownZero
= KnownOne
= APInt(KnownOne
.getBitWidth(), 0); // Don't know anything
736 switch (Op
.getOpcode()) {
738 case AMDILISD::SELECT_CC
:
739 DAG
.ComputeMaskedBits(
745 DAG
.ComputeMaskedBits(
750 assert((KnownZero
& KnownOne
) == 0
751 && "Bits known to be one AND zero?");
752 assert((KnownZero2
& KnownOne2
) == 0
753 && "Bits known to be one AND zero?");
754 // Only known if known in both the LHS and RHS
755 KnownOne
&= KnownOne2
;
756 KnownZero
&= KnownZero2
;
761 // This is the function that determines which calling convention should
762 // be used. Currently there is only one calling convention
764 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op
) const
766 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
770 // LowerCallResult - Lower the result values of an ISD::CALL into the
771 // appropriate copies out of appropriate physical registers. This assumes that
772 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
773 // being lowered. The returns a SDNode with the same number of values as the
776 AMDILTargetLowering::LowerCallResult(
779 CallingConv::ID CallConv
,
781 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
784 SmallVectorImpl
<SDValue
> &InVals
) const
786 // Assign locations to each value returned by this call
787 SmallVector
<CCValAssign
, 16> RVLocs
;
788 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
789 getTargetMachine(), RVLocs
, *DAG
.getContext());
790 CCInfo
.AnalyzeCallResult(Ins
, RetCC_AMDIL32
);
792 // Copy all of the result registers out of their specified physreg.
793 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
794 EVT CopyVT
= RVLocs
[i
].getValVT();
795 if (RVLocs
[i
].isRegLoc()) {
796 Chain
= DAG
.getCopyFromReg(
799 RVLocs
[i
].getLocReg(),
803 SDValue Val
= Chain
.getValue(0);
804 InFlag
= Chain
.getValue(2);
805 InVals
.push_back(Val
);
813 //===----------------------------------------------------------------------===//
814 // Other Lowering Hooks
815 //===----------------------------------------------------------------------===//
817 // Recursively assign SDNodeOrdering to any unordered nodes
818 // This is necessary to maintain source ordering of instructions
819 // under -O0 to avoid odd-looking "skipping around" issues.
821 Ordered( SelectionDAG
&DAG
, unsigned order
, const SDValue New
)
823 if (order
!= 0 && DAG
.GetOrdering( New
.getNode() ) == 0) {
824 DAG
.AssignOrdering( New
.getNode(), order
);
825 for (unsigned i
= 0, e
= New
.getNumOperands(); i
< e
; ++i
)
826 Ordered( DAG
, order
, New
.getOperand(i
) );
833 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
836 AMDILTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
838 switch (Op
.getOpcode()) {
840 Op
.getNode()->dump();
841 assert(0 && "Custom lowering code for this"
842 "instruction is not implemented yet!");
844 LOWER(GlobalAddress
);
847 LOWER(ExternalSymbol
);
853 LOWER(SIGN_EXTEND_INREG
);
854 LOWER(DYNAMIC_STACKALLOC
);
864 AMDILTargetLowering::LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const
867 const GlobalAddressSDNode
*GADN
= cast
<GlobalAddressSDNode
>(Op
);
868 const GlobalValue
*G
= GADN
->getGlobal();
869 DebugLoc DL
= Op
.getDebugLoc();
870 const GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(G
);
872 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
874 if (GV
->hasInitializer()) {
875 const Constant
*C
= dyn_cast
<Constant
>(GV
->getInitializer());
876 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
)) {
877 DST
= DAG
.getConstant(CI
->getValue(), Op
.getValueType());
878 } else if (const ConstantFP
*CF
= dyn_cast
<ConstantFP
>(C
)) {
879 DST
= DAG
.getConstantFP(CF
->getValueAPF(),
881 } else if (dyn_cast
<ConstantAggregateZero
>(C
)) {
882 EVT VT
= Op
.getValueType();
883 if (VT
.isInteger()) {
884 DST
= DAG
.getConstant(0, VT
);
886 DST
= DAG
.getConstantFP(0, VT
);
889 assert(!"lowering this type of Global Address "
890 "not implemented yet!");
892 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
895 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
902 AMDILTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const
904 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
905 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), MVT::i32
);
909 AMDILTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const
911 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
912 EVT PtrVT
= Op
.getValueType();
914 if (CP
->isMachineConstantPoolEntry()) {
915 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
916 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
918 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
919 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
925 AMDILTargetLowering::LowerExternalSymbol(SDValue Op
, SelectionDAG
&DAG
) const
927 const char *Sym
= cast
<ExternalSymbolSDNode
>(Op
)->getSymbol();
928 SDValue Result
= DAG
.getTargetExternalSymbol(Sym
, MVT::i32
);
932 /// LowerFORMAL_ARGUMENTS - transform physical registers into
933 /// virtual registers and generate load operations for
934 /// arguments places on the stack.
935 /// TODO: isVarArg, hasStructRet, isMemReg
937 AMDILTargetLowering::LowerFormalArguments(SDValue Chain
,
938 CallingConv::ID CallConv
,
940 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
943 SmallVectorImpl
<SDValue
> &InVals
)
947 MachineFunction
&MF
= DAG
.getMachineFunction();
948 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
949 //const Function *Fn = MF.getFunction();
950 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
952 SmallVector
<CCValAssign
, 16> ArgLocs
;
953 CallingConv::ID CC
= MF
.getFunction()->getCallingConv();
954 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
956 CCState
CCInfo(CC
, isVarArg
, DAG
.getMachineFunction(),
957 getTargetMachine(), ArgLocs
, *DAG
.getContext());
959 // When more calling conventions are added, they need to be chosen here
960 CCInfo
.AnalyzeFormalArguments(Ins
, CC_AMDIL32
);
963 //unsigned int FirstStackArgLoc = 0;
965 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
966 CCValAssign
&VA
= ArgLocs
[i
];
968 EVT RegVT
= VA
.getLocVT();
969 const TargetRegisterClass
*RC
= getRegClassFor(
970 RegVT
.getSimpleVT().SimpleTy
);
972 unsigned int Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
973 SDValue ArgValue
= DAG
.getCopyFromReg(
978 // If this is an 8 or 16-bit value, it is really passed
979 // promoted to 32 bits. Insert an assert[sz]ext to capture
980 // this, then truncate to the right size.
982 if (VA
.getLocInfo() == CCValAssign::SExt
) {
983 ArgValue
= DAG
.getNode(
988 DAG
.getValueType(VA
.getValVT()));
989 } else if (VA
.getLocInfo() == CCValAssign::ZExt
) {
990 ArgValue
= DAG
.getNode(
995 DAG
.getValueType(VA
.getValVT()));
997 if (VA
.getLocInfo() != CCValAssign::Full
) {
998 ArgValue
= DAG
.getNode(
1004 // Add the value to the list of arguments
1005 // to be passed in registers
1006 InVals
.push_back(ArgValue
);
1008 assert(0 && "Variable arguments are not yet supported");
1009 // See MipsISelLowering.cpp for ideas on how to implement
1011 } else if(VA
.isMemLoc()) {
1012 InVals
.push_back(LowerMemArgument(Chain
, CallConv
, Ins
,
1013 dl
, DAG
, VA
, MFI
, i
));
1015 assert(0 && "found a Value Assign that is "
1016 "neither a register or a memory location");
1019 /*if (hasStructRet) {
1020 assert(0 && "Has struct return is not yet implemented");
1021 // See MipsISelLowering.cpp for ideas on how to implement
1025 assert(0 && "Variable arguments are not yet supported");
1026 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1028 // This needs to be changed to non-zero if the return function needs
1032 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1033 /// by "Src" to address "Dst" with size and alignment information specified by
1034 /// the specific parameter attribute. The copy will be passed as a byval
1035 /// function parameter.
1037 CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
, SDValue Chain
,
1038 ISD::ArgFlagsTy Flags
, SelectionDAG
&DAG
) {
1039 assert(0 && "MemCopy does not exist yet");
1040 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), MVT::i32
);
1042 return DAG
.getMemcpy(Chain
,
1044 Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
1045 /*IsVol=*/false, /*AlwaysInline=*/true,
1046 MachinePointerInfo(), MachinePointerInfo());
1050 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain
,
1051 SDValue StackPtr
, SDValue Arg
,
1052 DebugLoc dl
, SelectionDAG
&DAG
,
1053 const CCValAssign
&VA
,
1054 ISD::ArgFlagsTy Flags
) const
1056 unsigned int LocMemOffset
= VA
.getLocMemOffset();
1057 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
1058 PtrOff
= DAG
.getNode(ISD::ADD
,
1060 getPointerTy(), StackPtr
, PtrOff
);
1061 if (Flags
.isByVal()) {
1062 PtrOff
= CreateCopyOfByValArgument(Arg
, PtrOff
, Chain
, Flags
, DAG
);
1064 PtrOff
= DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
1065 MachinePointerInfo::getStack(LocMemOffset
),
1070 /// LowerCAL - functions arguments are copied from virtual
1071 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1072 /// CALLSEQ_END are emitted.
1073 /// TODO: isVarArg, isTailCall, hasStructRet
1075 AMDILTargetLowering::LowerCall(SDValue Chain
, SDValue Callee
,
1076 CallingConv::ID CallConv
, bool isVarArg
, bool doesNotRet
,
1078 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1079 const SmallVectorImpl
<SDValue
> &OutVals
,
1080 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1081 DebugLoc dl
, SelectionDAG
&DAG
,
1082 SmallVectorImpl
<SDValue
> &InVals
)
1086 MachineFunction
& MF
= DAG
.getMachineFunction();
1087 // FIXME: DO we need to handle fast calling conventions and tail call
1088 // optimizations?? X86/PPC ISelLowering
1089 /*bool hasStructRet = (TheCall->getNumArgs())
1090 ? TheCall->getArgFlags(0).device()->isSRet()
1093 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1095 // Analyze operands of the call, assigning locations to each operand
1096 SmallVector
<CCValAssign
, 16> ArgLocs
;
1097 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1098 getTargetMachine(), ArgLocs
, *DAG
.getContext());
1099 // Analyize the calling operands, but need to change
1100 // if we have more than one calling convetion
1101 CCInfo
.AnalyzeCallOperands(Outs
, CCAssignFnForNode(CallConv
));
1103 unsigned int NumBytes
= CCInfo
.getNextStackOffset();
1105 assert(isTailCall
&& "Tail Call not handled yet!");
1106 // See X86/PPC ISelLowering
1109 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
1111 SmallVector
<std::pair
<unsigned int, SDValue
>, 8> RegsToPass
;
1112 SmallVector
<SDValue
, 8> MemOpChains
;
1114 //unsigned int FirstStacArgLoc = 0;
1115 //int LastArgStackLoc = 0;
1117 // Walk the register/memloc assignments, insert copies/loads
1118 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1119 CCValAssign
&VA
= ArgLocs
[i
];
1120 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1121 // Arguments start after the 5 first operands of ISD::CALL
1122 SDValue Arg
= OutVals
[i
];
1123 //Promote the value if needed
1124 switch(VA
.getLocInfo()) {
1125 default: assert(0 && "Unknown loc info!");
1126 case CCValAssign::Full
:
1128 case CCValAssign::SExt
:
1129 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
,
1131 VA
.getLocVT(), Arg
);
1133 case CCValAssign::ZExt
:
1134 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
,
1136 VA
.getLocVT(), Arg
);
1138 case CCValAssign::AExt
:
1139 Arg
= DAG
.getNode(ISD::ANY_EXTEND
,
1141 VA
.getLocVT(), Arg
);
1145 if (VA
.isRegLoc()) {
1146 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
1147 } else if (VA
.isMemLoc()) {
1148 // Create the frame index object for this incoming parameter
1149 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
1150 VA
.getLocMemOffset(), true);
1151 SDValue PtrOff
= DAG
.getFrameIndex(FI
,getPointerTy());
1153 // emit ISD::STORE whichs stores the
1154 // parameter value to a stack Location
1155 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
1156 MachinePointerInfo::getFixedStack(FI
),
1159 assert(0 && "Not a Reg/Mem Loc, major error!");
1162 if (!MemOpChains
.empty()) {
1163 Chain
= DAG
.getNode(ISD::TokenFactor
,
1167 MemOpChains
.size());
1171 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1172 Chain
= DAG
.getCopyToReg(Chain
,
1174 RegsToPass
[i
].first
,
1175 RegsToPass
[i
].second
,
1177 InFlag
= Chain
.getValue(1);
1181 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1182 // every direct call is) turn it into a TargetGlobalAddress/
1183 // TargetExternalSymbol
1184 // node so that legalize doesn't hack it.
1185 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1186 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, getPointerTy());
1188 else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1189 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), getPointerTy());
1191 else if (isTailCall
) {
1192 assert(0 && "Tail calls are not handled yet");
1193 // see X86 ISelLowering for ideas on implementation: 1708
1196 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVTGLUE
);
1197 SmallVector
<SDValue
, 8> Ops
;
1200 assert(0 && "Tail calls are not handled yet");
1201 // see X86 ISelLowering for ideas on implementation: 1721
1203 // If this is a direct call, pass the chain and the callee
1204 if (Callee
.getNode()) {
1205 Ops
.push_back(Chain
);
1206 Ops
.push_back(Callee
);
1210 assert(0 && "Tail calls are not handled yet");
1211 // see X86 ISelLowering for ideas on implementation: 1739
1214 // Add argument registers to the end of the list so that they are known
1215 // live into the call
1216 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1217 Ops
.push_back(DAG
.getRegister(
1218 RegsToPass
[i
].first
,
1219 RegsToPass
[i
].second
.getValueType()));
1221 if (InFlag
.getNode()) {
1222 Ops
.push_back(InFlag
);
1227 assert(0 && "Tail calls are not handled yet");
1228 // see X86 ISelLowering for ideas on implementation: 1762
1231 Chain
= DAG
.getNode(AMDILISD::CALL
,
1233 NodeTys
, &Ops
[0], Ops
.size());
1234 InFlag
= Chain
.getValue(1);
1236 // Create the CALLSEQ_END node
1237 Chain
= DAG
.getCALLSEQ_END(
1239 DAG
.getIntPtrConstant(NumBytes
, true),
1240 DAG
.getIntPtrConstant(0, true),
1242 InFlag
= Chain
.getValue(1);
1243 // Handle result values, copying them out of physregs into vregs that
1245 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
1250 AMDILTargetLowering::LowerSDIV(SDValue Op
, SelectionDAG
&DAG
) const
1252 EVT OVT
= Op
.getValueType();
1254 if (OVT
.getScalarType() == MVT::i64
) {
1255 DST
= LowerSDIV64(Op
, DAG
);
1256 } else if (OVT
.getScalarType() == MVT::i32
) {
1257 DST
= LowerSDIV32(Op
, DAG
);
1258 } else if (OVT
.getScalarType() == MVT::i16
1259 || OVT
.getScalarType() == MVT::i8
) {
1260 DST
= LowerSDIV24(Op
, DAG
);
1262 DST
= SDValue(Op
.getNode(), 0);
1268 AMDILTargetLowering::LowerSREM(SDValue Op
, SelectionDAG
&DAG
) const
1270 EVT OVT
= Op
.getValueType();
1272 if (OVT
.getScalarType() == MVT::i64
) {
1273 DST
= LowerSREM64(Op
, DAG
);
1274 } else if (OVT
.getScalarType() == MVT::i32
) {
1275 DST
= LowerSREM32(Op
, DAG
);
1276 } else if (OVT
.getScalarType() == MVT::i16
) {
1277 DST
= LowerSREM16(Op
, DAG
);
1278 } else if (OVT
.getScalarType() == MVT::i8
) {
1279 DST
= LowerSREM8(Op
, DAG
);
1281 DST
= SDValue(Op
.getNode(), 0);
1287 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op
, SelectionDAG
&DAG
) const
1289 EVT VT
= Op
.getValueType();
1294 DebugLoc DL
= Op
.getDebugLoc();
1295 Nodes1
= DAG
.getNode(AMDILISD::VBUILD
,
1297 VT
, Op
.getOperand(0));
1299 bool allEqual
= true;
1300 for (unsigned x
= 1, y
= Op
.getNumOperands(); x
< y
; ++x
) {
1301 if (Op
.getOperand(0) != Op
.getOperand(x
)) {
1310 switch(Op
.getNumOperands()) {
1315 fourth
= Op
.getOperand(3);
1316 if (fourth
.getOpcode() != ISD::UNDEF
) {
1317 Nodes1
= DAG
.getNode(
1318 ISD::INSERT_VECTOR_ELT
,
1323 DAG
.getConstant(7, MVT::i32
));
1326 third
= Op
.getOperand(2);
1327 if (third
.getOpcode() != ISD::UNDEF
) {
1328 Nodes1
= DAG
.getNode(
1329 ISD::INSERT_VECTOR_ELT
,
1334 DAG
.getConstant(6, MVT::i32
));
1337 second
= Op
.getOperand(1);
1338 if (second
.getOpcode() != ISD::UNDEF
) {
1339 Nodes1
= DAG
.getNode(
1340 ISD::INSERT_VECTOR_ELT
,
1345 DAG
.getConstant(5, MVT::i32
));
1353 AMDILTargetLowering::LowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const
1355 SDValue Cond
= Op
.getOperand(0);
1356 SDValue LHS
= Op
.getOperand(1);
1357 SDValue RHS
= Op
.getOperand(2);
1358 DebugLoc DL
= Op
.getDebugLoc();
1359 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
1360 Cond
= DAG
.getNode(AMDILISD::CMOVLOG
,
1362 Op
.getValueType(), Cond
, LHS
, RHS
);
1366 AMDILTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
1369 SDValue LHS
= Op
.getOperand(0);
1370 SDValue RHS
= Op
.getOperand(1);
1371 SDValue CC
= Op
.getOperand(2);
1372 DebugLoc DL
= Op
.getDebugLoc();
1373 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
1374 unsigned int AMDILCC
= CondCCodeToCC(
1376 LHS
.getValueType().getSimpleVT().SimpleTy
);
1377 assert((AMDILCC
!= AMDILCC::COND_ERROR
) && "Invalid SetCC!");
1383 DAG
.getConstant(-1, MVT::i32
),
1384 DAG
.getConstant(0, MVT::i32
),
1386 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
1390 Cond
.getValueType(),
1391 DAG
.getConstant(1, Cond
.getValueType()),
1397 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
, SelectionDAG
&DAG
) const
1399 SDValue Data
= Op
.getOperand(0);
1400 VTSDNode
*BaseType
= cast
<VTSDNode
>(Op
.getOperand(1));
1401 DebugLoc DL
= Op
.getDebugLoc();
1402 EVT DVT
= Data
.getValueType();
1403 EVT BVT
= BaseType
->getVT();
1404 unsigned baseBits
= BVT
.getScalarType().getSizeInBits();
1405 unsigned srcBits
= DVT
.isSimple() ? DVT
.getScalarType().getSizeInBits() : 1;
1406 unsigned shiftBits
= srcBits
- baseBits
;
1408 // If the op is less than 32 bits, then it needs to extend to 32bits
1409 // so it can properly keep the upper bits valid.
1410 EVT IVT
= genIntType(32, DVT
.isVector() ? DVT
.getVectorNumElements() : 1);
1411 Data
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, IVT
, Data
);
1412 shiftBits
= 32 - baseBits
;
1415 SDValue Shift
= DAG
.getConstant(shiftBits
, DVT
);
1416 // Shift left by 'Shift' bits.
1417 Data
= DAG
.getNode(ISD::SHL
, DL
, DVT
, Data
, Shift
);
1418 // Signed shift Right by 'Shift' bits.
1419 Data
= DAG
.getNode(ISD::SRA
, DL
, DVT
, Data
, Shift
);
1421 // Once the sign extension is done, the op needs to be converted to
1422 // its original type.
1423 Data
= DAG
.getSExtOrTrunc(Data
, DL
, Op
.getOperand(0).getValueType());
1428 AMDILTargetLowering::genIntType(uint32_t size
, uint32_t numEle
) const
1430 int iSize
= (size
* numEle
);
1431 int vEle
= (iSize
>> ((size
== 64) ? 6 : 5));
1437 return EVT(MVT::i64
);
1439 return EVT(MVT::getVectorVT(MVT::i64
, vEle
));
1443 return EVT(MVT::i32
);
1445 return EVT(MVT::getVectorVT(MVT::i32
, vEle
));
1451 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
1452 SelectionDAG
&DAG
) const
1454 SDValue Chain
= Op
.getOperand(0);
1455 SDValue Size
= Op
.getOperand(1);
1456 unsigned int SPReg
= AMDGPU::SP
;
1457 DebugLoc DL
= Op
.getDebugLoc();
1458 SDValue SP
= DAG
.getCopyFromReg(Chain
,
1461 SDValue NewSP
= DAG
.getNode(ISD::ADD
,
1463 MVT::i32
, SP
, Size
);
1464 Chain
= DAG
.getCopyToReg(SP
.getValue(1),
1467 SDValue Ops
[2] = {NewSP
, Chain
};
1468 Chain
= DAG
.getMergeValues(Ops
, 2 ,DL
);
1472 AMDILTargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const
1474 SDValue Chain
= Op
.getOperand(0);
1475 SDValue Cond
= Op
.getOperand(1);
1476 SDValue Jump
= Op
.getOperand(2);
1478 Result
= DAG
.getNode(
1479 AMDILISD::BRANCH_COND
,
1487 AMDILTargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
1489 SDValue Chain
= Op
.getOperand(0);
1490 SDValue CC
= Op
.getOperand(1);
1491 SDValue LHS
= Op
.getOperand(2);
1492 SDValue RHS
= Op
.getOperand(3);
1493 SDValue JumpT
= Op
.getOperand(4);
1496 CmpValue
= DAG
.getNode(
1501 DAG
.getConstant(-1, MVT::i32
),
1502 DAG
.getConstant(0, MVT::i32
),
1504 Result
= DAG
.getNode(
1505 AMDILISD::BRANCH_COND
,
1506 CmpValue
.getDebugLoc(),
1512 // LowerRET - Lower an ISD::RET node.
1514 AMDILTargetLowering::LowerReturn(SDValue Chain
,
1515 CallingConv::ID CallConv
, bool isVarArg
,
1516 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1517 const SmallVectorImpl
<SDValue
> &OutVals
,
1518 DebugLoc dl
, SelectionDAG
&DAG
)
1521 //MachineFunction& MF = DAG.getMachineFunction();
1522 // CCValAssign - represent the assignment of the return value
1524 SmallVector
<CCValAssign
, 16> RVLocs
;
1526 // CCState - Info about the registers and stack slot
1527 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1528 getTargetMachine(), RVLocs
, *DAG
.getContext());
1530 // Analyze return values of ISD::RET
1531 CCInfo
.AnalyzeReturn(Outs
, RetCC_AMDIL32
);
1532 // If this is the first return lowered for this function, add
1533 // the regs to the liveout set for the function
1534 MachineRegisterInfo
&MRI
= DAG
.getMachineFunction().getRegInfo();
1535 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
1536 if (RVLocs
[i
].isRegLoc() && !MRI
.isLiveOut(RVLocs
[i
].getLocReg())) {
1537 MRI
.addLiveOut(RVLocs
[i
].getLocReg());
1540 // FIXME: implement this when tail call is implemented
1541 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
1542 // both x86 and ppc implement this in ISelLowering
1544 // Regular return here
1546 SmallVector
<SDValue
, 6> RetOps
;
1547 RetOps
.push_back(Chain
);
1548 RetOps
.push_back(DAG
.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32
));
1549 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
1550 CCValAssign
&VA
= RVLocs
[i
];
1551 SDValue ValToCopy
= OutVals
[i
];
1552 assert(VA
.isRegLoc() && "Can only return in registers!");
1553 // ISD::Ret => ret chain, (regnum1, val1), ...
1554 // So i * 2 + 1 index only the regnums
1555 Chain
= DAG
.getCopyToReg(Chain
,
1560 // guarantee that all emitted copies are stuck together
1561 // avoiding something bad
1562 Flag
= Chain
.getValue(1);
1564 /*if (MF.getFunction()->hasStructRetAttr()) {
1565 assert(0 && "Struct returns are not yet implemented!");
1566 // Both MIPS and X86 have this
1570 RetOps
.push_back(Flag
);
1572 Flag
= DAG
.getNode(AMDILISD::RET_FLAG
,
1574 MVT::Other
, &RetOps
[0], RetOps
.size());
1579 AMDILTargetLowering::getFunctionAlignment(const Function
*) const
1585 AMDILTargetLowering::LowerSDIV24(SDValue Op
, SelectionDAG
&DAG
) const
1587 DebugLoc DL
= Op
.getDebugLoc();
1588 EVT OVT
= Op
.getValueType();
1589 SDValue LHS
= Op
.getOperand(0);
1590 SDValue RHS
= Op
.getOperand(1);
1593 if (!OVT
.isVector()) {
1596 } else if (OVT
.getVectorNumElements() == 2) {
1599 } else if (OVT
.getVectorNumElements() == 4) {
1603 unsigned bitsize
= OVT
.getScalarType().getSizeInBits();
1604 // char|short jq = ia ^ ib;
1605 SDValue jq
= DAG
.getNode(ISD::XOR
, DL
, OVT
, LHS
, RHS
);
1607 // jq = jq >> (bitsize - 2)
1608 jq
= DAG
.getNode(ISD::SRA
, DL
, OVT
, jq
, DAG
.getConstant(bitsize
- 2, OVT
));
1611 jq
= DAG
.getNode(ISD::OR
, DL
, OVT
, jq
, DAG
.getConstant(1, OVT
));
1614 jq
= DAG
.getSExtOrTrunc(jq
, DL
, INTTY
);
1616 // int ia = (int)LHS;
1617 SDValue ia
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
1619 // int ib, (int)RHS;
1620 SDValue ib
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
1622 // float fa = (float)ia;
1623 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
1625 // float fb = (float)ib;
1626 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
1628 // float fq = native_divide(fa, fb);
1629 SDValue fq
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
1632 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
1634 // float fqneg = -fq;
1635 SDValue fqneg
= DAG
.getNode(ISD::FNEG
, DL
, FLTTY
, fq
);
1637 // float fr = mad(fqneg, fb, fa);
1638 SDValue fr
= DAG
.getNode(AMDILISD::MAD
, DL
, FLTTY
, fqneg
, fb
, fa
);
1640 // int iq = (int)fq;
1641 SDValue iq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
1644 fr
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fr
);
1647 fb
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fb
);
1649 // int cv = fr >= fb;
1651 if (INTTY
== MVT::i32
) {
1652 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
1654 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
1656 // jq = (cv ? jq : 0);
1657 jq
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, cv
, jq
,
1658 DAG
.getConstant(0, OVT
));
1660 iq
= DAG
.getSExtOrTrunc(iq
, DL
, OVT
);
1661 iq
= DAG
.getNode(ISD::ADD
, DL
, OVT
, iq
, jq
);
1666 AMDILTargetLowering::LowerSDIV32(SDValue Op
, SelectionDAG
&DAG
) const
1668 DebugLoc DL
= Op
.getDebugLoc();
1669 EVT OVT
= Op
.getValueType();
1670 SDValue LHS
= Op
.getOperand(0);
1671 SDValue RHS
= Op
.getOperand(1);
1672 // The LowerSDIV32 function generates equivalent to the following IL.
1682 // ixor r10, r10, r11
1684 // ixor DST, r0, r10
1693 SDValue r10
= DAG
.getSelectCC(DL
,
1694 r0
, DAG
.getConstant(0, OVT
),
1695 DAG
.getConstant(-1, MVT::i32
),
1696 DAG
.getConstant(0, MVT::i32
),
1700 SDValue r11
= DAG
.getSelectCC(DL
,
1701 r1
, DAG
.getConstant(0, OVT
),
1702 DAG
.getConstant(-1, MVT::i32
),
1703 DAG
.getConstant(0, MVT::i32
),
1707 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
1710 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
1713 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
1716 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
1719 r0
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, r0
, r1
);
1721 // ixor r10, r10, r11
1722 r10
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r10
, r11
);
1725 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
1727 // ixor DST, r0, r10
1728 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
1733 AMDILTargetLowering::LowerSDIV64(SDValue Op
, SelectionDAG
&DAG
) const
1735 return SDValue(Op
.getNode(), 0);
1739 AMDILTargetLowering::LowerSREM8(SDValue Op
, SelectionDAG
&DAG
) const
1741 DebugLoc DL
= Op
.getDebugLoc();
1742 EVT OVT
= Op
.getValueType();
1743 MVT INTTY
= MVT::i32
;
1744 if (OVT
== MVT::v2i8
) {
1746 } else if (OVT
== MVT::v4i8
) {
1749 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
1750 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
1751 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
1752 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
1757 AMDILTargetLowering::LowerSREM16(SDValue Op
, SelectionDAG
&DAG
) const
1759 DebugLoc DL
= Op
.getDebugLoc();
1760 EVT OVT
= Op
.getValueType();
1761 MVT INTTY
= MVT::i32
;
1762 if (OVT
== MVT::v2i16
) {
1764 } else if (OVT
== MVT::v4i16
) {
1767 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
1768 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
1769 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
1770 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
1775 AMDILTargetLowering::LowerSREM32(SDValue Op
, SelectionDAG
&DAG
) const
1777 DebugLoc DL
= Op
.getDebugLoc();
1778 EVT OVT
= Op
.getValueType();
1779 SDValue LHS
= Op
.getOperand(0);
1780 SDValue RHS
= Op
.getOperand(1);
1781 // The LowerSREM32 function generates equivalent to the following IL.
1791 // umul r20, r20, r1
1794 // ixor DST, r0, r10
1803 SDValue r10
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
1804 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
1805 r0
, DAG
.getConstant(0, OVT
));
1808 SDValue r11
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
1809 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
1810 r1
, DAG
.getConstant(0, OVT
));
1813 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
1816 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
1819 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
1822 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
1825 SDValue r20
= DAG
.getNode(ISD::UREM
, DL
, OVT
, r0
, r1
);
1827 // umul r20, r20, r1
1828 r20
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r20
, r1
);
1831 r0
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r0
, r20
);
1834 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
1836 // ixor DST, r0, r10
1837 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
1842 AMDILTargetLowering::LowerSREM64(SDValue Op
, SelectionDAG
&DAG
) const
1844 return SDValue(Op
.getNode(), 0);