1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILRegisterInfo.h"
19 #include "AMDILSubtarget.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetOptions.h"
36 #define ISDBITCAST ISD::BITCAST
37 #define MVTGLUE MVT::Glue
38 //===----------------------------------------------------------------------===//
39 // Calling Convention Implementation
40 //===----------------------------------------------------------------------===//
41 #include "AMDILGenCallingConv.inc"
43 //===----------------------------------------------------------------------===//
44 // TargetLowering Implementation Help Functions Begin
45 //===----------------------------------------------------------------------===//
47 getConversionNode(SelectionDAG
&DAG
, SDValue
& Src
, SDValue
& Dst
, bool asType
)
49 DebugLoc DL
= Src
.getDebugLoc();
50 EVT svt
= Src
.getValueType().getScalarType();
51 EVT dvt
= Dst
.getValueType().getScalarType();
52 if (svt
.isFloatingPoint() && dvt
.isFloatingPoint()) {
53 if (dvt
.bitsGT(svt
)) {
54 Src
= DAG
.getNode(ISD::FP_EXTEND
, DL
, dvt
, Src
);
55 } else if (svt
.bitsLT(svt
)) {
56 Src
= DAG
.getNode(ISD::FP_ROUND
, DL
, dvt
, Src
,
57 DAG
.getConstant(1, MVT::i32
));
59 } else if (svt
.isInteger() && dvt
.isInteger()) {
60 if (!svt
.bitsEq(dvt
)) {
61 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
63 Src
= DAG
.getNode(AMDILISD::MOVE
, DL
, dvt
, Src
);
65 } else if (svt
.isInteger()) {
66 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::SINT_TO_FP
;
67 if (!svt
.bitsEq(dvt
)) {
68 if (dvt
.getSimpleVT().SimpleTy
== MVT::f32
) {
69 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i32
);
70 } else if (dvt
.getSimpleVT().SimpleTy
== MVT::f64
) {
71 Src
= DAG
.getSExtOrTrunc(Src
, DL
, MVT::i64
);
73 assert(0 && "We only support 32 and 64bit fp types");
76 Src
= DAG
.getNode(opcode
, DL
, dvt
, Src
);
77 } else if (dvt
.isInteger()) {
78 unsigned opcode
= (asType
) ? ISDBITCAST
: ISD::FP_TO_SINT
;
79 if (svt
.getSimpleVT().SimpleTy
== MVT::f32
) {
80 Src
= DAG
.getNode(opcode
, DL
, MVT::i32
, Src
);
81 } else if (svt
.getSimpleVT().SimpleTy
== MVT::f64
) {
82 Src
= DAG
.getNode(opcode
, DL
, MVT::i64
, Src
);
84 assert(0 && "We only support 32 and 64bit fp types");
86 Src
= DAG
.getSExtOrTrunc(Src
, DL
, dvt
);
90 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
92 static AMDILCC::CondCodes
93 CondCCodeToCC(ISD::CondCode CC
, const MVT::SimpleValueType
& type
)
98 errs()<<"Condition Code: "<< (unsigned int)CC
<<"\n";
99 assert(0 && "Unknown condition code!");
104 return AMDILCC::IL_CC_F_O
;
106 return AMDILCC::IL_CC_D_O
;
108 assert(0 && "Opcode combination not generated correctly!");
109 return AMDILCC::COND_ERROR
;
114 return AMDILCC::IL_CC_F_UO
;
116 return AMDILCC::IL_CC_D_UO
;
118 assert(0 && "Opcode combination not generated correctly!");
119 return AMDILCC::COND_ERROR
;
127 return AMDILCC::IL_CC_I_GT
;
129 return AMDILCC::IL_CC_F_GT
;
131 return AMDILCC::IL_CC_D_GT
;
133 return AMDILCC::IL_CC_L_GT
;
135 assert(0 && "Opcode combination not generated correctly!");
136 return AMDILCC::COND_ERROR
;
144 return AMDILCC::IL_CC_I_GE
;
146 return AMDILCC::IL_CC_F_GE
;
148 return AMDILCC::IL_CC_D_GE
;
150 return AMDILCC::IL_CC_L_GE
;
152 assert(0 && "Opcode combination not generated correctly!");
153 return AMDILCC::COND_ERROR
;
161 return AMDILCC::IL_CC_I_LT
;
163 return AMDILCC::IL_CC_F_LT
;
165 return AMDILCC::IL_CC_D_LT
;
167 return AMDILCC::IL_CC_L_LT
;
169 assert(0 && "Opcode combination not generated correctly!");
170 return AMDILCC::COND_ERROR
;
178 return AMDILCC::IL_CC_I_LE
;
180 return AMDILCC::IL_CC_F_LE
;
182 return AMDILCC::IL_CC_D_LE
;
184 return AMDILCC::IL_CC_L_LE
;
186 assert(0 && "Opcode combination not generated correctly!");
187 return AMDILCC::COND_ERROR
;
195 return AMDILCC::IL_CC_I_NE
;
197 return AMDILCC::IL_CC_F_NE
;
199 return AMDILCC::IL_CC_D_NE
;
201 return AMDILCC::IL_CC_L_NE
;
203 assert(0 && "Opcode combination not generated correctly!");
204 return AMDILCC::COND_ERROR
;
212 return AMDILCC::IL_CC_I_EQ
;
214 return AMDILCC::IL_CC_F_EQ
;
216 return AMDILCC::IL_CC_D_EQ
;
218 return AMDILCC::IL_CC_L_EQ
;
220 assert(0 && "Opcode combination not generated correctly!");
221 return AMDILCC::COND_ERROR
;
229 return AMDILCC::IL_CC_U_GT
;
231 return AMDILCC::IL_CC_F_UGT
;
233 return AMDILCC::IL_CC_D_UGT
;
235 return AMDILCC::IL_CC_UL_GT
;
237 assert(0 && "Opcode combination not generated correctly!");
238 return AMDILCC::COND_ERROR
;
246 return AMDILCC::IL_CC_U_GE
;
248 return AMDILCC::IL_CC_F_UGE
;
250 return AMDILCC::IL_CC_D_UGE
;
252 return AMDILCC::IL_CC_UL_GE
;
254 assert(0 && "Opcode combination not generated correctly!");
255 return AMDILCC::COND_ERROR
;
263 return AMDILCC::IL_CC_U_LT
;
265 return AMDILCC::IL_CC_F_ULT
;
267 return AMDILCC::IL_CC_D_ULT
;
269 return AMDILCC::IL_CC_UL_LT
;
271 assert(0 && "Opcode combination not generated correctly!");
272 return AMDILCC::COND_ERROR
;
280 return AMDILCC::IL_CC_U_LE
;
282 return AMDILCC::IL_CC_F_ULE
;
284 return AMDILCC::IL_CC_D_ULE
;
286 return AMDILCC::IL_CC_UL_LE
;
288 assert(0 && "Opcode combination not generated correctly!");
289 return AMDILCC::COND_ERROR
;
297 return AMDILCC::IL_CC_U_NE
;
299 return AMDILCC::IL_CC_F_UNE
;
301 return AMDILCC::IL_CC_D_UNE
;
303 return AMDILCC::IL_CC_UL_NE
;
305 assert(0 && "Opcode combination not generated correctly!");
306 return AMDILCC::COND_ERROR
;
314 return AMDILCC::IL_CC_U_EQ
;
316 return AMDILCC::IL_CC_F_UEQ
;
318 return AMDILCC::IL_CC_D_UEQ
;
320 return AMDILCC::IL_CC_UL_EQ
;
322 assert(0 && "Opcode combination not generated correctly!");
323 return AMDILCC::COND_ERROR
;
328 return AMDILCC::IL_CC_F_OGT
;
330 return AMDILCC::IL_CC_D_OGT
;
337 assert(0 && "Opcode combination not generated correctly!");
338 return AMDILCC::COND_ERROR
;
343 return AMDILCC::IL_CC_F_OGE
;
345 return AMDILCC::IL_CC_D_OGE
;
352 assert(0 && "Opcode combination not generated correctly!");
353 return AMDILCC::COND_ERROR
;
358 return AMDILCC::IL_CC_F_OLT
;
360 return AMDILCC::IL_CC_D_OLT
;
367 assert(0 && "Opcode combination not generated correctly!");
368 return AMDILCC::COND_ERROR
;
373 return AMDILCC::IL_CC_F_OLE
;
375 return AMDILCC::IL_CC_D_OLE
;
382 assert(0 && "Opcode combination not generated correctly!");
383 return AMDILCC::COND_ERROR
;
388 return AMDILCC::IL_CC_F_ONE
;
390 return AMDILCC::IL_CC_D_ONE
;
397 assert(0 && "Opcode combination not generated correctly!");
398 return AMDILCC::COND_ERROR
;
403 return AMDILCC::IL_CC_F_OEQ
;
405 return AMDILCC::IL_CC_D_OEQ
;
412 assert(0 && "Opcode combination not generated correctly!");
413 return AMDILCC::COND_ERROR
;
418 /// Helper function used by LowerFormalArguments
419 static const TargetRegisterClass
*
420 getRegClassFromType(unsigned int type
) {
423 assert(0 && "Passed in type does not match any register classes.");
425 return &AMDIL::GPRI8RegClass
;
427 return &AMDIL::GPRI16RegClass
;
429 return &AMDIL::GPRI32RegClass
;
431 return &AMDIL::GPRF32RegClass
;
433 return &AMDIL::GPRI64RegClass
;
435 return &AMDIL::GPRF64RegClass
;
437 return &AMDIL::GPRV4F32RegClass
;
439 return &AMDIL::GPRV4I8RegClass
;
441 return &AMDIL::GPRV4I16RegClass
;
443 return &AMDIL::GPRV4I32RegClass
;
445 return &AMDIL::GPRV2F32RegClass
;
447 return &AMDIL::GPRV2I8RegClass
;
449 return &AMDIL::GPRV2I16RegClass
;
451 return &AMDIL::GPRV2I32RegClass
;
453 return &AMDIL::GPRV2F64RegClass
;
455 return &AMDIL::GPRV2I64RegClass
;
460 AMDILTargetLowering::LowerMemArgument(
462 CallingConv::ID CallConv
,
463 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
464 DebugLoc dl
, SelectionDAG
&DAG
,
465 const CCValAssign
&VA
,
466 MachineFrameInfo
*MFI
,
469 // Create the nodes corresponding to a load from this parameter slot.
470 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
472 bool AlwaysUseMutable
= (CallConv
==CallingConv::Fast
) &&
473 getTargetMachine().Options
.GuaranteedTailCallOpt
;
474 bool isImmutable
= !AlwaysUseMutable
&& !Flags
.isByVal();
476 // FIXME: For now, all byval parameter objects are marked mutable. This can
477 // be changed with more analysis.
478 // In case of tail call optimization mark all arguments mutable. Since they
479 // could be overwritten by lowering of arguments in case of a tail call.
480 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
481 VA
.getLocMemOffset(), isImmutable
);
482 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
486 return DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
487 MachinePointerInfo::getFixedStack(FI
),
488 false, false, false, 0);
490 //===----------------------------------------------------------------------===//
491 // TargetLowering Implementation Help Functions End
492 //===----------------------------------------------------------------------===//
493 //===----------------------------------------------------------------------===//
494 // Instruction generation functions
495 //===----------------------------------------------------------------------===//
497 AMDILTargetLowering::convertToReg(MachineOperand op
) const
501 } else if (op
.isImm()) {
503 = genVReg(op
.getParent()->getDesc().OpInfo
[0].RegClass
);
504 generateMachineInst(AMDIL::LOADCONST_i32
, loadReg
)
505 .addImm(op
.getImm());
506 op
.ChangeToRegister(loadReg
, false);
507 } else if (op
.isFPImm()) {
509 = genVReg(op
.getParent()->getDesc().OpInfo
[0].RegClass
);
510 generateMachineInst(AMDIL::LOADCONST_f32
, loadReg
)
511 .addFPImm(op
.getFPImm());
512 op
.ChangeToRegister(loadReg
, false);
513 } else if (op
.isMBB()) {
514 op
.ChangeToRegister(0, false);
515 } else if (op
.isFI()) {
516 op
.ChangeToRegister(0, false);
517 } else if (op
.isCPI()) {
518 op
.ChangeToRegister(0, false);
519 } else if (op
.isJTI()) {
520 op
.ChangeToRegister(0, false);
521 } else if (op
.isGlobal()) {
522 op
.ChangeToRegister(0, false);
523 } else if (op
.isSymbol()) {
524 op
.ChangeToRegister(0, false);
525 }/* else if (op.isMetadata()) {
526 op.ChangeToRegister(0, false);
531 //===----------------------------------------------------------------------===//
532 // TargetLowering Class Implementation Begins
533 //===----------------------------------------------------------------------===//
534 AMDILTargetLowering::AMDILTargetLowering(TargetMachine
&TM
)
535 : TargetLowering(TM
, new TargetLoweringObjectFileELF())
584 size_t numTypes
= sizeof(types
) / sizeof(*types
);
585 size_t numFloatTypes
= sizeof(FloatTypes
) / sizeof(*FloatTypes
);
586 size_t numIntTypes
= sizeof(IntTypes
) / sizeof(*IntTypes
);
587 size_t numVectorTypes
= sizeof(VectorTypes
) / sizeof(*VectorTypes
);
589 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
590 // These are the current register classes that are
593 addRegisterClass(MVT::i32
, AMDIL::GPRI32RegisterClass
);
594 addRegisterClass(MVT::f32
, AMDIL::GPRF32RegisterClass
);
596 if (STM
.device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
597 addRegisterClass(MVT::f64
, AMDIL::GPRF64RegisterClass
);
598 addRegisterClass(MVT::v2f64
, AMDIL::GPRV2F64RegisterClass
);
600 if (STM
.device()->isSupported(AMDILDeviceInfo::ByteOps
)) {
601 addRegisterClass(MVT::i8
, AMDIL::GPRI8RegisterClass
);
602 addRegisterClass(MVT::v2i8
, AMDIL::GPRV2I8RegisterClass
);
603 addRegisterClass(MVT::v4i8
, AMDIL::GPRV4I8RegisterClass
);
604 setOperationAction(ISD::Constant
, MVT::i8
, Legal
);
606 if (STM
.device()->isSupported(AMDILDeviceInfo::ShortOps
)) {
607 addRegisterClass(MVT::i16
, AMDIL::GPRI16RegisterClass
);
608 addRegisterClass(MVT::v2i16
, AMDIL::GPRV2I16RegisterClass
);
609 addRegisterClass(MVT::v4i16
, AMDIL::GPRV4I16RegisterClass
);
610 setOperationAction(ISD::Constant
, MVT::i16
, Legal
);
612 addRegisterClass(MVT::v2f32
, AMDIL::GPRV2F32RegisterClass
);
613 addRegisterClass(MVT::v4f32
, AMDIL::GPRV4F32RegisterClass
);
614 addRegisterClass(MVT::v2i32
, AMDIL::GPRV2I32RegisterClass
);
615 addRegisterClass(MVT::v4i32
, AMDIL::GPRV4I32RegisterClass
);
616 if (STM
.device()->isSupported(AMDILDeviceInfo::LongOps
)) {
617 addRegisterClass(MVT::i64
, AMDIL::GPRI64RegisterClass
);
618 addRegisterClass(MVT::v2i64
, AMDIL::GPRV2I64RegisterClass
);
621 for (unsigned int x
= 0; x
< numTypes
; ++x
) {
622 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)types
[x
];
624 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
625 // We cannot sextinreg, expand to shifts
626 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Custom
);
627 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
, Custom
);
628 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
629 setOperationAction(ISD::SUBE
, VT
, Expand
);
630 setOperationAction(ISD::SUBC
, VT
, Expand
);
631 setOperationAction(ISD::ADDE
, VT
, Expand
);
632 setOperationAction(ISD::ADDC
, VT
, Expand
);
633 setOperationAction(ISD::SETCC
, VT
, Custom
);
634 setOperationAction(ISD::BRCOND
, VT
, Custom
);
635 setOperationAction(ISD::BR_CC
, VT
, Custom
);
636 setOperationAction(ISD::BR_JT
, VT
, Expand
);
637 setOperationAction(ISD::BRIND
, VT
, Expand
);
638 // TODO: Implement custom UREM/SREM routines
639 setOperationAction(ISD::UREM
, VT
, Expand
);
640 setOperationAction(ISD::SREM
, VT
, Expand
);
641 setOperationAction(ISD::UINT_TO_FP
, VT
, Custom
);
642 setOperationAction(ISD::FP_TO_UINT
, VT
, Custom
);
643 setOperationAction(ISD::GlobalAddress
, VT
, Custom
);
644 setOperationAction(ISD::JumpTable
, VT
, Custom
);
645 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
646 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
647 setOperationAction(ISD::SELECT
, VT
, Custom
);
648 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
649 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
650 if (VT
!= MVT::i64
&& VT
!= MVT::v2i64
) {
651 setOperationAction(ISD::SDIV
, VT
, Custom
);
653 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Custom
);
654 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Custom
);
656 for (unsigned int x
= 0; x
< numFloatTypes
; ++x
) {
657 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)FloatTypes
[x
];
659 // IL does not have these operations for floating point types
660 setOperationAction(ISD::FP_ROUND_INREG
, VT
, Expand
);
661 setOperationAction(ISD::FP_ROUND
, VT
, Custom
);
662 setOperationAction(ISD::SETOLT
, VT
, Expand
);
663 setOperationAction(ISD::SETOGE
, VT
, Expand
);
664 setOperationAction(ISD::SETOGT
, VT
, Expand
);
665 setOperationAction(ISD::SETOLE
, VT
, Expand
);
666 setOperationAction(ISD::SETULT
, VT
, Expand
);
667 setOperationAction(ISD::SETUGE
, VT
, Expand
);
668 setOperationAction(ISD::SETUGT
, VT
, Expand
);
669 setOperationAction(ISD::SETULE
, VT
, Expand
);
672 for (unsigned int x
= 0; x
< numIntTypes
; ++x
) {
673 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)IntTypes
[x
];
675 // GPU also does not have divrem function for signed or unsigned
676 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
677 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
678 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
680 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
681 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
682 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
684 // GPU doesn't have a rotl, rotr, or byteswap instruction
685 setOperationAction(ISD::ROTR
, VT
, Expand
);
686 setOperationAction(ISD::ROTL
, VT
, Expand
);
687 setOperationAction(ISD::BSWAP
, VT
, Expand
);
689 // GPU doesn't have any counting operators
690 setOperationAction(ISD::CTPOP
, VT
, Expand
);
691 setOperationAction(ISD::CTTZ
, VT
, Expand
);
692 setOperationAction(ISD::CTLZ
, VT
, Expand
);
695 for ( unsigned int ii
= 0; ii
< numVectorTypes
; ++ii
)
697 MVT::SimpleValueType VT
= (MVT::SimpleValueType
)VectorTypes
[ii
];
699 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
700 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
, Custom
);
701 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
702 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Expand
);
703 setOperationAction(ISD::CONCAT_VECTORS
, VT
, Custom
);
704 setOperationAction(ISD::FP_ROUND
, VT
, Expand
);
705 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
706 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
707 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
708 // setOperationAction(ISD::VSETCC, VT, Expand);
709 setOperationAction(ISD::SETCC
, VT
, Expand
);
710 setOperationAction(ISD::SELECT_CC
, VT
, Expand
);
711 setOperationAction(ISD::SELECT
, VT
, Expand
);
714 setOperationAction(ISD::FP_ROUND
, MVT::Other
, Expand
);
715 if (STM
.device()->isSupported(AMDILDeviceInfo::LongOps
)) {
716 if (STM
.calVersion() < CAL_VERSION_SC_139
717 || STM
.device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
718 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
720 setOperationAction(ISD::SUB
, MVT::i64
, Custom
);
721 setOperationAction(ISD::ADD
, MVT::i64
, Custom
);
722 setOperationAction(ISD::MULHU
, MVT::i64
, Expand
);
723 setOperationAction(ISD::MULHU
, MVT::v2i64
, Expand
);
724 setOperationAction(ISD::MULHS
, MVT::i64
, Expand
);
725 setOperationAction(ISD::MULHS
, MVT::v2i64
, Expand
);
726 setOperationAction(ISD::MUL
, MVT::v2i64
, Expand
);
727 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
728 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
729 setOperationAction(ISD::SREM
, MVT::v2i64
, Expand
);
730 setOperationAction(ISD::Constant
, MVT::i64
, Legal
);
731 setOperationAction(ISD::SDIV
, MVT::v2i64
, Expand
);
732 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Expand
);
733 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Expand
);
734 setOperationAction(ISD::TRUNCATE
, MVT::v2i64
, Expand
);
735 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2i64
, Expand
);
736 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2i64
, Expand
);
737 setOperationAction(ISD::ANY_EXTEND
, MVT::v2i64
, Expand
);
739 if (STM
.device()->isSupported(AMDILDeviceInfo::DoubleOps
)) {
740 // we support loading/storing v2f64 but not operations on the type
741 setOperationAction(ISD::FADD
, MVT::v2f64
, Expand
);
742 setOperationAction(ISD::FSUB
, MVT::v2f64
, Expand
);
743 setOperationAction(ISD::FMUL
, MVT::v2f64
, Expand
);
744 setOperationAction(ISD::FP_ROUND
, MVT::v2f64
, Expand
);
745 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v2f64
, Expand
);
746 setOperationAction(ISD::FP_EXTEND
, MVT::v2f64
, Expand
);
747 setOperationAction(ISD::ConstantFP
, MVT::f64
, Legal
);
748 setOperationAction(ISD::FDIV
, MVT::v2f64
, Expand
);
749 // We want to expand vector conversions into their scalar
751 setOperationAction(ISD::UINT_TO_FP
, MVT::v2f64
, Expand
);
752 setOperationAction(ISD::FP_TO_UINT
, MVT::v2f64
, Expand
);
753 setOperationAction(ISD::TRUNCATE
, MVT::v2f64
, Expand
);
754 setOperationAction(ISD::SIGN_EXTEND
, MVT::v2f64
, Expand
);
755 setOperationAction(ISD::ZERO_EXTEND
, MVT::v2f64
, Expand
);
756 setOperationAction(ISD::ANY_EXTEND
, MVT::v2f64
, Expand
);
757 setOperationAction(ISD::FABS
, MVT::f64
, Expand
);
758 setOperationAction(ISD::FABS
, MVT::v2f64
, Expand
);
760 // TODO: Fix the UDIV24 algorithm so it works for these
761 // types correctly. This needs vector comparisons
762 // for this to work correctly.
763 setOperationAction(ISD::UDIV
, MVT::v2i8
, Expand
);
764 setOperationAction(ISD::UDIV
, MVT::v4i8
, Expand
);
765 setOperationAction(ISD::UDIV
, MVT::v2i16
, Expand
);
766 setOperationAction(ISD::UDIV
, MVT::v4i16
, Expand
);
767 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Custom
);
768 setOperationAction(ISD::SUBC
, MVT::Other
, Expand
);
769 setOperationAction(ISD::ADDE
, MVT::Other
, Expand
);
770 setOperationAction(ISD::ADDC
, MVT::Other
, Expand
);
771 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
772 setOperationAction(ISD::BR_CC
, MVT::Other
, Custom
);
773 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
774 setOperationAction(ISD::BRIND
, MVT::Other
, Expand
);
775 setOperationAction(ISD::SETCC
, MVT::Other
, Custom
);
776 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
777 setOperationAction(ISD::FDIV
, MVT::f32
, Custom
);
778 setOperationAction(ISD::FDIV
, MVT::v2f32
, Custom
);
779 setOperationAction(ISD::FDIV
, MVT::v4f32
, Custom
);
781 setOperationAction(ISD::BUILD_VECTOR
, MVT::Other
, Custom
);
782 // Use the default implementation.
783 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
784 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
785 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
786 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
787 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
788 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
789 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
790 setOperationAction(ISD::Constant
, MVT::i32
, Legal
);
791 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
793 setStackPointerRegisterToSaveRestore(AMDIL::SP
);
794 setSchedulingPreference(Sched::RegPressure
);
795 setPow2DivIsCheap(false);
796 setPrefLoopAlignment(16);
797 setSelectIsExpensive(true);
798 setJumpIsExpensive(true);
800 maxStoresPerMemcpy
= 4096;
801 maxStoresPerMemmove
= 4096;
802 maxStoresPerMemset
= 4096;
806 #undef numVectorTypes
811 AMDILTargetLowering::getTargetNodeName(unsigned Opcode
) const
815 case AMDILISD::INTTOANY
: return "AMDILISD::INTTOANY";
816 case AMDILISD::DP_TO_FP
: return "AMDILISD::DP_TO_FP";
817 case AMDILISD::FP_TO_DP
: return "AMDILISD::FP_TO_DP";
818 case AMDILISD::BITCONV
: return "AMDILISD::BITCONV";
819 case AMDILISD::CMOV
: return "AMDILISD::CMOV";
820 case AMDILISD::CMOVLOG
: return "AMDILISD::CMOVLOG";
821 case AMDILISD::INEGATE
: return "AMDILISD::INEGATE";
822 case AMDILISD::MAD
: return "AMDILISD::MAD";
823 case AMDILISD::UMAD
: return "AMDILISD::UMAD";
824 case AMDILISD::CALL
: return "AMDILISD::CALL";
825 case AMDILISD::RET
: return "AMDILISD::RET";
826 case AMDILISD::IFFB_HI
: return "AMDILISD::IFFB_HI";
827 case AMDILISD::IFFB_LO
: return "AMDILISD::IFFB_LO";
828 case AMDILISD::ADD
: return "AMDILISD::ADD";
829 case AMDILISD::UMUL
: return "AMDILISD::UMUL";
830 case AMDILISD::AND
: return "AMDILISD::AND";
831 case AMDILISD::OR
: return "AMDILISD::OR";
832 case AMDILISD::NOT
: return "AMDILISD::NOT";
833 case AMDILISD::XOR
: return "AMDILISD::XOR";
834 case AMDILISD::DIV_INF
: return "AMDILISD::DIV_INF";
835 case AMDILISD::SMAX
: return "AMDILISD::SMAX";
836 case AMDILISD::PHIMOVE
: return "AMDILISD::PHIMOVE";
837 case AMDILISD::MOVE
: return "AMDILISD::MOVE";
838 case AMDILISD::VBUILD
: return "AMDILISD::VBUILD";
839 case AMDILISD::VEXTRACT
: return "AMDILISD::VEXTRACT";
840 case AMDILISD::VINSERT
: return "AMDILISD::VINSERT";
841 case AMDILISD::VCONCAT
: return "AMDILISD::VCONCAT";
842 case AMDILISD::LCREATE
: return "AMDILISD::LCREATE";
843 case AMDILISD::LCOMPHI
: return "AMDILISD::LCOMPHI";
844 case AMDILISD::LCOMPLO
: return "AMDILISD::LCOMPLO";
845 case AMDILISD::DCREATE
: return "AMDILISD::DCREATE";
846 case AMDILISD::DCOMPHI
: return "AMDILISD::DCOMPHI";
847 case AMDILISD::DCOMPLO
: return "AMDILISD::DCOMPLO";
848 case AMDILISD::LCREATE2
: return "AMDILISD::LCREATE2";
849 case AMDILISD::LCOMPHI2
: return "AMDILISD::LCOMPHI2";
850 case AMDILISD::LCOMPLO2
: return "AMDILISD::LCOMPLO2";
851 case AMDILISD::DCREATE2
: return "AMDILISD::DCREATE2";
852 case AMDILISD::DCOMPHI2
: return "AMDILISD::DCOMPHI2";
853 case AMDILISD::DCOMPLO2
: return "AMDILISD::DCOMPLO2";
854 case AMDILISD::CMP
: return "AMDILISD::CMP";
855 case AMDILISD::IL_CC_I_LT
: return "AMDILISD::IL_CC_I_LT";
856 case AMDILISD::IL_CC_I_LE
: return "AMDILISD::IL_CC_I_LE";
857 case AMDILISD::IL_CC_I_GT
: return "AMDILISD::IL_CC_I_GT";
858 case AMDILISD::IL_CC_I_GE
: return "AMDILISD::IL_CC_I_GE";
859 case AMDILISD::IL_CC_I_EQ
: return "AMDILISD::IL_CC_I_EQ";
860 case AMDILISD::IL_CC_I_NE
: return "AMDILISD::IL_CC_I_NE";
861 case AMDILISD::RET_FLAG
: return "AMDILISD::RET_FLAG";
862 case AMDILISD::BRANCH_COND
: return "AMDILISD::BRANCH_COND";
863 case AMDILISD::LOOP_NZERO
: return "AMDILISD::LOOP_NZERO";
864 case AMDILISD::LOOP_ZERO
: return "AMDILISD::LOOP_ZERO";
865 case AMDILISD::LOOP_CMP
: return "AMDILISD::LOOP_CMP";
866 case AMDILISD::ADDADDR
: return "AMDILISD::ADDADDR";
867 case AMDILISD::ATOM_G_ADD
: return "AMDILISD::ATOM_G_ADD";
868 case AMDILISD::ATOM_G_AND
: return "AMDILISD::ATOM_G_AND";
869 case AMDILISD::ATOM_G_CMPXCHG
: return "AMDILISD::ATOM_G_CMPXCHG";
870 case AMDILISD::ATOM_G_DEC
: return "AMDILISD::ATOM_G_DEC";
871 case AMDILISD::ATOM_G_INC
: return "AMDILISD::ATOM_G_INC";
872 case AMDILISD::ATOM_G_MAX
: return "AMDILISD::ATOM_G_MAX";
873 case AMDILISD::ATOM_G_UMAX
: return "AMDILISD::ATOM_G_UMAX";
874 case AMDILISD::ATOM_G_MIN
: return "AMDILISD::ATOM_G_MIN";
875 case AMDILISD::ATOM_G_UMIN
: return "AMDILISD::ATOM_G_UMIN";
876 case AMDILISD::ATOM_G_OR
: return "AMDILISD::ATOM_G_OR";
877 case AMDILISD::ATOM_G_SUB
: return "AMDILISD::ATOM_G_SUB";
878 case AMDILISD::ATOM_G_RSUB
: return "AMDILISD::ATOM_G_RSUB";
879 case AMDILISD::ATOM_G_XCHG
: return "AMDILISD::ATOM_G_XCHG";
880 case AMDILISD::ATOM_G_XOR
: return "AMDILISD::ATOM_G_XOR";
881 case AMDILISD::ATOM_G_ADD_NORET
: return "AMDILISD::ATOM_G_ADD_NORET";
882 case AMDILISD::ATOM_G_AND_NORET
: return "AMDILISD::ATOM_G_AND_NORET";
883 case AMDILISD::ATOM_G_CMPXCHG_NORET
: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
884 case AMDILISD::ATOM_G_DEC_NORET
: return "AMDILISD::ATOM_G_DEC_NORET";
885 case AMDILISD::ATOM_G_INC_NORET
: return "AMDILISD::ATOM_G_INC_NORET";
886 case AMDILISD::ATOM_G_MAX_NORET
: return "AMDILISD::ATOM_G_MAX_NORET";
887 case AMDILISD::ATOM_G_UMAX_NORET
: return "AMDILISD::ATOM_G_UMAX_NORET";
888 case AMDILISD::ATOM_G_MIN_NORET
: return "AMDILISD::ATOM_G_MIN_NORET";
889 case AMDILISD::ATOM_G_UMIN_NORET
: return "AMDILISD::ATOM_G_UMIN_NORET";
890 case AMDILISD::ATOM_G_OR_NORET
: return "AMDILISD::ATOM_G_OR_NORET";
891 case AMDILISD::ATOM_G_SUB_NORET
: return "AMDILISD::ATOM_G_SUB_NORET";
892 case AMDILISD::ATOM_G_RSUB_NORET
: return "AMDILISD::ATOM_G_RSUB_NORET";
893 case AMDILISD::ATOM_G_XCHG_NORET
: return "AMDILISD::ATOM_G_XCHG_NORET";
894 case AMDILISD::ATOM_G_XOR_NORET
: return "AMDILISD::ATOM_G_XOR_NORET";
895 case AMDILISD::ATOM_L_ADD
: return "AMDILISD::ATOM_L_ADD";
896 case AMDILISD::ATOM_L_AND
: return "AMDILISD::ATOM_L_AND";
897 case AMDILISD::ATOM_L_CMPXCHG
: return "AMDILISD::ATOM_L_CMPXCHG";
898 case AMDILISD::ATOM_L_DEC
: return "AMDILISD::ATOM_L_DEC";
899 case AMDILISD::ATOM_L_INC
: return "AMDILISD::ATOM_L_INC";
900 case AMDILISD::ATOM_L_MAX
: return "AMDILISD::ATOM_L_MAX";
901 case AMDILISD::ATOM_L_UMAX
: return "AMDILISD::ATOM_L_UMAX";
902 case AMDILISD::ATOM_L_MIN
: return "AMDILISD::ATOM_L_MIN";
903 case AMDILISD::ATOM_L_UMIN
: return "AMDILISD::ATOM_L_UMIN";
904 case AMDILISD::ATOM_L_OR
: return "AMDILISD::ATOM_L_OR";
905 case AMDILISD::ATOM_L_SUB
: return "AMDILISD::ATOM_L_SUB";
906 case AMDILISD::ATOM_L_RSUB
: return "AMDILISD::ATOM_L_RSUB";
907 case AMDILISD::ATOM_L_XCHG
: return "AMDILISD::ATOM_L_XCHG";
908 case AMDILISD::ATOM_L_XOR
: return "AMDILISD::ATOM_L_XOR";
909 case AMDILISD::ATOM_L_ADD_NORET
: return "AMDILISD::ATOM_L_ADD_NORET";
910 case AMDILISD::ATOM_L_AND_NORET
: return "AMDILISD::ATOM_L_AND_NORET";
911 case AMDILISD::ATOM_L_CMPXCHG_NORET
: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
912 case AMDILISD::ATOM_L_DEC_NORET
: return "AMDILISD::ATOM_L_DEC_NORET";
913 case AMDILISD::ATOM_L_INC_NORET
: return "AMDILISD::ATOM_L_INC_NORET";
914 case AMDILISD::ATOM_L_MAX_NORET
: return "AMDILISD::ATOM_L_MAX_NORET";
915 case AMDILISD::ATOM_L_UMAX_NORET
: return "AMDILISD::ATOM_L_UMAX_NORET";
916 case AMDILISD::ATOM_L_MIN_NORET
: return "AMDILISD::ATOM_L_MIN_NORET";
917 case AMDILISD::ATOM_L_UMIN_NORET
: return "AMDILISD::ATOM_L_UMIN_NORET";
918 case AMDILISD::ATOM_L_OR_NORET
: return "AMDILISD::ATOM_L_OR_NORET";
919 case AMDILISD::ATOM_L_SUB_NORET
: return "AMDILISD::ATOM_L_SUB_NORET";
920 case AMDILISD::ATOM_L_RSUB_NORET
: return "AMDILISD::ATOM_L_RSUB_NORET";
921 case AMDILISD::ATOM_L_XCHG_NORET
: return "AMDILISD::ATOM_L_XCHG_NORET";
922 case AMDILISD::ATOM_R_ADD
: return "AMDILISD::ATOM_R_ADD";
923 case AMDILISD::ATOM_R_AND
: return "AMDILISD::ATOM_R_AND";
924 case AMDILISD::ATOM_R_CMPXCHG
: return "AMDILISD::ATOM_R_CMPXCHG";
925 case AMDILISD::ATOM_R_DEC
: return "AMDILISD::ATOM_R_DEC";
926 case AMDILISD::ATOM_R_INC
: return "AMDILISD::ATOM_R_INC";
927 case AMDILISD::ATOM_R_MAX
: return "AMDILISD::ATOM_R_MAX";
928 case AMDILISD::ATOM_R_UMAX
: return "AMDILISD::ATOM_R_UMAX";
929 case AMDILISD::ATOM_R_MIN
: return "AMDILISD::ATOM_R_MIN";
930 case AMDILISD::ATOM_R_UMIN
: return "AMDILISD::ATOM_R_UMIN";
931 case AMDILISD::ATOM_R_OR
: return "AMDILISD::ATOM_R_OR";
932 case AMDILISD::ATOM_R_MSKOR
: return "AMDILISD::ATOM_R_MSKOR";
933 case AMDILISD::ATOM_R_SUB
: return "AMDILISD::ATOM_R_SUB";
934 case AMDILISD::ATOM_R_RSUB
: return "AMDILISD::ATOM_R_RSUB";
935 case AMDILISD::ATOM_R_XCHG
: return "AMDILISD::ATOM_R_XCHG";
936 case AMDILISD::ATOM_R_XOR
: return "AMDILISD::ATOM_R_XOR";
937 case AMDILISD::ATOM_R_ADD_NORET
: return "AMDILISD::ATOM_R_ADD_NORET";
938 case AMDILISD::ATOM_R_AND_NORET
: return "AMDILISD::ATOM_R_AND_NORET";
939 case AMDILISD::ATOM_R_CMPXCHG_NORET
: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
940 case AMDILISD::ATOM_R_DEC_NORET
: return "AMDILISD::ATOM_R_DEC_NORET";
941 case AMDILISD::ATOM_R_INC_NORET
: return "AMDILISD::ATOM_R_INC_NORET";
942 case AMDILISD::ATOM_R_MAX_NORET
: return "AMDILISD::ATOM_R_MAX_NORET";
943 case AMDILISD::ATOM_R_UMAX_NORET
: return "AMDILISD::ATOM_R_UMAX_NORET";
944 case AMDILISD::ATOM_R_MIN_NORET
: return "AMDILISD::ATOM_R_MIN_NORET";
945 case AMDILISD::ATOM_R_UMIN_NORET
: return "AMDILISD::ATOM_R_UMIN_NORET";
946 case AMDILISD::ATOM_R_OR_NORET
: return "AMDILISD::ATOM_R_OR_NORET";
947 case AMDILISD::ATOM_R_MSKOR_NORET
: return "AMDILISD::ATOM_R_MSKOR_NORET";
948 case AMDILISD::ATOM_R_SUB_NORET
: return "AMDILISD::ATOM_R_SUB_NORET";
949 case AMDILISD::ATOM_R_RSUB_NORET
: return "AMDILISD::ATOM_R_RSUB_NORET";
950 case AMDILISD::ATOM_R_XCHG_NORET
: return "AMDILISD::ATOM_R_XCHG_NORET";
951 case AMDILISD::ATOM_R_XOR_NORET
: return "AMDILISD::ATOM_R_XOR_NORET";
952 case AMDILISD::APPEND_ALLOC
: return "AMDILISD::APPEND_ALLOC";
953 case AMDILISD::APPEND_ALLOC_NORET
: return "AMDILISD::APPEND_ALLOC_NORET";
954 case AMDILISD::APPEND_CONSUME
: return "AMDILISD::APPEND_CONSUME";
955 case AMDILISD::APPEND_CONSUME_NORET
: return "AMDILISD::APPEND_CONSUME_NORET";
956 case AMDILISD::IMAGE2D_READ
: return "AMDILISD::IMAGE2D_READ";
957 case AMDILISD::IMAGE2D_WRITE
: return "AMDILISD::IMAGE2D_WRITE";
958 case AMDILISD::IMAGE2D_INFO0
: return "AMDILISD::IMAGE2D_INFO0";
959 case AMDILISD::IMAGE2D_INFO1
: return "AMDILISD::IMAGE2D_INFO1";
960 case AMDILISD::IMAGE3D_READ
: return "AMDILISD::IMAGE3D_READ";
961 case AMDILISD::IMAGE3D_WRITE
: return "AMDILISD::IMAGE3D_WRITE";
962 case AMDILISD::IMAGE3D_INFO0
: return "AMDILISD::IMAGE3D_INFO0";
963 case AMDILISD::IMAGE3D_INFO1
: return "AMDILISD::IMAGE3D_INFO1";
968 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
969 const CallInst
&I
, unsigned Intrinsic
) const
971 if (Intrinsic
<= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
972 || Intrinsic
> AMDGPUIntrinsic::num_AMDIL_intrinsics
) {
975 bool bitCastToInt
= false;
978 const AMDILSubtarget
*STM
= &this->getTargetMachine()
979 .getSubtarget
<AMDILSubtarget
>();
981 default: return false; // Don't custom lower most intrinsics.
982 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32
:
983 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32
:
984 IntNo
= AMDILISD::ATOM_G_ADD
; break;
985 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret
:
986 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret
:
988 IntNo
= AMDILISD::ATOM_G_ADD_NORET
; break;
989 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32
:
990 case AMDGPUIntrinsic::AMDIL_atomic_add_li32
:
991 IntNo
= AMDILISD::ATOM_L_ADD
; break;
992 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret
:
993 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret
:
995 IntNo
= AMDILISD::ATOM_L_ADD_NORET
; break;
996 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32
:
997 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32
:
998 IntNo
= AMDILISD::ATOM_R_ADD
; break;
999 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret
:
1000 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret
:
1002 IntNo
= AMDILISD::ATOM_R_ADD_NORET
; break;
1003 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32
:
1004 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32
:
1005 IntNo
= AMDILISD::ATOM_G_AND
; break;
1006 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret
:
1007 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret
:
1009 IntNo
= AMDILISD::ATOM_G_AND_NORET
; break;
1010 case AMDGPUIntrinsic::AMDIL_atomic_and_li32
:
1011 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32
:
1012 IntNo
= AMDILISD::ATOM_L_AND
; break;
1013 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret
:
1014 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret
:
1016 IntNo
= AMDILISD::ATOM_L_AND_NORET
; break;
1017 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32
:
1018 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32
:
1019 IntNo
= AMDILISD::ATOM_R_AND
; break;
1020 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret
:
1021 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret
:
1023 IntNo
= AMDILISD::ATOM_R_AND_NORET
; break;
1024 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32
:
1025 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32
:
1026 IntNo
= AMDILISD::ATOM_G_CMPXCHG
; break;
1027 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret
:
1028 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret
:
1030 IntNo
= AMDILISD::ATOM_G_CMPXCHG_NORET
; break;
1031 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32
:
1032 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32
:
1033 IntNo
= AMDILISD::ATOM_L_CMPXCHG
; break;
1034 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret
:
1035 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret
:
1037 IntNo
= AMDILISD::ATOM_L_CMPXCHG_NORET
; break;
1038 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32
:
1039 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32
:
1040 IntNo
= AMDILISD::ATOM_R_CMPXCHG
; break;
1041 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret
:
1042 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret
:
1044 IntNo
= AMDILISD::ATOM_R_CMPXCHG_NORET
; break;
1045 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32
:
1046 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32
:
1047 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1048 IntNo
= AMDILISD::ATOM_G_DEC
;
1050 IntNo
= AMDILISD::ATOM_G_SUB
;
1053 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret
:
1054 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret
:
1056 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1057 IntNo
= AMDILISD::ATOM_G_DEC_NORET
;
1059 IntNo
= AMDILISD::ATOM_G_SUB_NORET
;
1062 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32
:
1063 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32
:
1064 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1065 IntNo
= AMDILISD::ATOM_L_DEC
;
1067 IntNo
= AMDILISD::ATOM_L_SUB
;
1070 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret
:
1071 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret
:
1073 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1074 IntNo
= AMDILISD::ATOM_L_DEC_NORET
;
1076 IntNo
= AMDILISD::ATOM_L_SUB_NORET
;
1079 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32
:
1080 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32
:
1081 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1082 IntNo
= AMDILISD::ATOM_R_DEC
;
1084 IntNo
= AMDILISD::ATOM_R_SUB
;
1087 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret
:
1088 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret
:
1090 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1091 IntNo
= AMDILISD::ATOM_R_DEC_NORET
;
1093 IntNo
= AMDILISD::ATOM_R_SUB_NORET
;
1096 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32
:
1097 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32
:
1098 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1099 IntNo
= AMDILISD::ATOM_G_INC
;
1101 IntNo
= AMDILISD::ATOM_G_ADD
;
1104 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret
:
1105 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret
:
1107 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1108 IntNo
= AMDILISD::ATOM_G_INC_NORET
;
1110 IntNo
= AMDILISD::ATOM_G_ADD_NORET
;
1113 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32
:
1114 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32
:
1115 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1116 IntNo
= AMDILISD::ATOM_L_INC
;
1118 IntNo
= AMDILISD::ATOM_L_ADD
;
1121 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret
:
1122 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret
:
1124 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1125 IntNo
= AMDILISD::ATOM_L_INC_NORET
;
1127 IntNo
= AMDILISD::ATOM_L_ADD_NORET
;
1130 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32
:
1131 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32
:
1132 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1133 IntNo
= AMDILISD::ATOM_R_INC
;
1135 IntNo
= AMDILISD::ATOM_R_ADD
;
1138 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret
:
1139 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret
:
1141 if (STM
->calVersion() >= CAL_VERSION_SC_136
) {
1142 IntNo
= AMDILISD::ATOM_R_INC_NORET
;
1144 IntNo
= AMDILISD::ATOM_R_ADD_NORET
;
1147 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32
:
1148 IntNo
= AMDILISD::ATOM_G_MAX
; break;
1149 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32
:
1150 IntNo
= AMDILISD::ATOM_G_UMAX
; break;
1151 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret
:
1153 IntNo
= AMDILISD::ATOM_G_MAX_NORET
; break;
1154 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret
:
1156 IntNo
= AMDILISD::ATOM_G_UMAX_NORET
; break;
1157 case AMDGPUIntrinsic::AMDIL_atomic_max_li32
:
1158 IntNo
= AMDILISD::ATOM_L_MAX
; break;
1159 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32
:
1160 IntNo
= AMDILISD::ATOM_L_UMAX
; break;
1161 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret
:
1163 IntNo
= AMDILISD::ATOM_L_MAX_NORET
; break;
1164 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret
:
1166 IntNo
= AMDILISD::ATOM_L_UMAX_NORET
; break;
1167 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32
:
1168 IntNo
= AMDILISD::ATOM_R_MAX
; break;
1169 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32
:
1170 IntNo
= AMDILISD::ATOM_R_UMAX
; break;
1171 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret
:
1173 IntNo
= AMDILISD::ATOM_R_MAX_NORET
; break;
1174 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret
:
1176 IntNo
= AMDILISD::ATOM_R_UMAX_NORET
; break;
1177 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32
:
1178 IntNo
= AMDILISD::ATOM_G_MIN
; break;
1179 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32
:
1180 IntNo
= AMDILISD::ATOM_G_UMIN
; break;
1181 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret
:
1183 IntNo
= AMDILISD::ATOM_G_MIN_NORET
; break;
1184 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret
:
1186 IntNo
= AMDILISD::ATOM_G_UMIN_NORET
; break;
1187 case AMDGPUIntrinsic::AMDIL_atomic_min_li32
:
1188 IntNo
= AMDILISD::ATOM_L_MIN
; break;
1189 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32
:
1190 IntNo
= AMDILISD::ATOM_L_UMIN
; break;
1191 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret
:
1193 IntNo
= AMDILISD::ATOM_L_MIN_NORET
; break;
1194 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret
:
1196 IntNo
= AMDILISD::ATOM_L_UMIN_NORET
; break;
1197 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32
:
1198 IntNo
= AMDILISD::ATOM_R_MIN
; break;
1199 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32
:
1200 IntNo
= AMDILISD::ATOM_R_UMIN
; break;
1201 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret
:
1203 IntNo
= AMDILISD::ATOM_R_MIN_NORET
; break;
1204 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret
:
1206 IntNo
= AMDILISD::ATOM_R_UMIN_NORET
; break;
1207 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32
:
1208 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32
:
1209 IntNo
= AMDILISD::ATOM_G_OR
; break;
1210 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret
:
1211 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret
:
1213 IntNo
= AMDILISD::ATOM_G_OR_NORET
; break;
1214 case AMDGPUIntrinsic::AMDIL_atomic_or_li32
:
1215 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32
:
1216 IntNo
= AMDILISD::ATOM_L_OR
; break;
1217 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret
:
1218 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret
:
1220 IntNo
= AMDILISD::ATOM_L_OR_NORET
; break;
1221 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32
:
1222 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32
:
1223 IntNo
= AMDILISD::ATOM_R_OR
; break;
1224 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret
:
1225 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret
:
1227 IntNo
= AMDILISD::ATOM_R_OR_NORET
; break;
1228 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32
:
1229 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32
:
1230 IntNo
= AMDILISD::ATOM_G_SUB
; break;
1231 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret
:
1232 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret
:
1234 IntNo
= AMDILISD::ATOM_G_SUB_NORET
; break;
1235 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32
:
1236 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32
:
1237 IntNo
= AMDILISD::ATOM_L_SUB
; break;
1238 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret
:
1239 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret
:
1241 IntNo
= AMDILISD::ATOM_L_SUB_NORET
; break;
1242 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32
:
1243 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32
:
1244 IntNo
= AMDILISD::ATOM_R_SUB
; break;
1245 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret
:
1246 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret
:
1248 IntNo
= AMDILISD::ATOM_R_SUB_NORET
; break;
1249 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32
:
1250 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32
:
1251 IntNo
= AMDILISD::ATOM_G_RSUB
; break;
1252 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret
:
1253 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret
:
1255 IntNo
= AMDILISD::ATOM_G_RSUB_NORET
; break;
1256 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32
:
1257 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32
:
1258 IntNo
= AMDILISD::ATOM_L_RSUB
; break;
1259 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret
:
1260 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret
:
1262 IntNo
= AMDILISD::ATOM_L_RSUB_NORET
; break;
1263 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32
:
1264 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32
:
1265 IntNo
= AMDILISD::ATOM_R_RSUB
; break;
1266 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret
:
1267 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret
:
1269 IntNo
= AMDILISD::ATOM_R_RSUB_NORET
; break;
1270 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32
:
1271 bitCastToInt
= true;
1272 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32
:
1273 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32
:
1274 IntNo
= AMDILISD::ATOM_G_XCHG
; break;
1275 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret
:
1276 bitCastToInt
= true;
1277 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret
:
1278 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret
:
1280 IntNo
= AMDILISD::ATOM_G_XCHG_NORET
; break;
1281 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32
:
1282 bitCastToInt
= true;
1283 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32
:
1284 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32
:
1285 IntNo
= AMDILISD::ATOM_L_XCHG
; break;
1286 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret
:
1287 bitCastToInt
= true;
1288 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret
:
1289 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret
:
1291 IntNo
= AMDILISD::ATOM_L_XCHG_NORET
; break;
1292 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32
:
1293 bitCastToInt
= true;
1294 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32
:
1295 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32
:
1296 IntNo
= AMDILISD::ATOM_R_XCHG
; break;
1297 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret
:
1298 bitCastToInt
= true;
1299 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret
:
1300 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret
:
1302 IntNo
= AMDILISD::ATOM_R_XCHG_NORET
; break;
1303 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32
:
1304 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32
:
1305 IntNo
= AMDILISD::ATOM_G_XOR
; break;
1306 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret
:
1307 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret
:
1309 IntNo
= AMDILISD::ATOM_G_XOR_NORET
; break;
1310 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32
:
1311 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32
:
1312 IntNo
= AMDILISD::ATOM_L_XOR
; break;
1313 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret
:
1314 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret
:
1316 IntNo
= AMDILISD::ATOM_L_XOR_NORET
; break;
1317 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32
:
1318 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32
:
1319 IntNo
= AMDILISD::ATOM_R_XOR
; break;
1320 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret
:
1321 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret
:
1323 IntNo
= AMDILISD::ATOM_R_XOR_NORET
; break;
1324 case AMDGPUIntrinsic::AMDIL_append_alloc_i32
:
1325 IntNo
= AMDILISD::APPEND_ALLOC
; break;
1326 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret
:
1328 IntNo
= AMDILISD::APPEND_ALLOC_NORET
; break;
1329 case AMDGPUIntrinsic::AMDIL_append_consume_i32
:
1330 IntNo
= AMDILISD::APPEND_CONSUME
; break;
1331 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret
:
1333 IntNo
= AMDILISD::APPEND_CONSUME_NORET
; break;
1337 Info
.memVT
= (bitCastToInt
) ? MVT::f32
: MVT::i32
;
1338 Info
.ptrVal
= I
.getOperand(0);
1342 Info
.readMem
= isRet
;
1343 Info
.writeMem
= true;
1346 // The backend supports 32 and 64 bit floating point immediates
1348 AMDILTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const
1350 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
1351 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
1359 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT
) const
1361 if (VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f32
1362 || VT
.getScalarType().getSimpleVT().SimpleTy
== MVT::f64
) {
1370 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1371 // be zero. Op is expected to be a target specific node. Used by DAG
1375 AMDILTargetLowering::computeMaskedBitsForTargetNode(
1379 const SelectionDAG
&DAG
,
1380 unsigned Depth
) const
1384 KnownZero
= KnownOne
= APInt(KnownOne
.getBitWidth(), 0); // Don't know anything
1385 switch (Op
.getOpcode()) {
1387 case AMDILISD::SELECT_CC
:
1388 DAG
.ComputeMaskedBits(
1394 DAG
.ComputeMaskedBits(
1399 assert((KnownZero
& KnownOne
) == 0
1400 && "Bits known to be one AND zero?");
1401 assert((KnownZero2
& KnownOne2
) == 0
1402 && "Bits known to be one AND zero?");
1403 // Only known if known in both the LHS and RHS
1404 KnownOne
&= KnownOne2
;
1405 KnownZero
&= KnownZero2
;
1410 // This is the function that determines which calling convention should
1411 // be used. Currently there is only one calling convention
1413 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op
) const
1415 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1419 // LowerCallResult - Lower the result values of an ISD::CALL into the
1420 // appropriate copies out of appropriate physical registers. This assumes that
1421 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1422 // being lowered. The returns a SDNode with the same number of values as the
1425 AMDILTargetLowering::LowerCallResult(
1428 CallingConv::ID CallConv
,
1430 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1433 SmallVectorImpl
<SDValue
> &InVals
) const
1435 // Assign locations to each value returned by this call
1436 SmallVector
<CCValAssign
, 16> RVLocs
;
1437 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1438 getTargetMachine(), RVLocs
, *DAG
.getContext());
1439 CCInfo
.AnalyzeCallResult(Ins
, RetCC_AMDIL32
);
1441 // Copy all of the result registers out of their specified physreg.
1442 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
1443 EVT CopyVT
= RVLocs
[i
].getValVT();
1444 if (RVLocs
[i
].isRegLoc()) {
1445 Chain
= DAG
.getCopyFromReg(
1448 RVLocs
[i
].getLocReg(),
1452 SDValue Val
= Chain
.getValue(0);
1453 InFlag
= Chain
.getValue(2);
1454 InVals
.push_back(Val
);
1462 //===----------------------------------------------------------------------===//
1463 // Other Lowering Hooks
1464 //===----------------------------------------------------------------------===//
1466 // Recursively assign SDNodeOrdering to any unordered nodes
1467 // This is necessary to maintain source ordering of instructions
1468 // under -O0 to avoid odd-looking "skipping around" issues.
1469 static const SDValue
1470 Ordered( SelectionDAG
&DAG
, unsigned order
, const SDValue New
)
1472 if (order
!= 0 && DAG
.GetOrdering( New
.getNode() ) == 0) {
1473 DAG
.AssignOrdering( New
.getNode(), order
);
1474 for (unsigned i
= 0, e
= New
.getNumOperands(); i
< e
; ++i
)
1475 Ordered( DAG
, order
, New
.getOperand(i
) );
1482 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1485 AMDILTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
1487 switch (Op
.getOpcode()) {
1489 Op
.getNode()->dump();
1490 assert(0 && "Custom lowering code for this"
1491 "instruction is not implemented yet!");
1493 LOWER(GlobalAddress
);
1495 LOWER(ConstantPool
);
1496 LOWER(ExternalSymbol
);
1505 LOWER(BUILD_VECTOR
);
1506 LOWER(INSERT_VECTOR_ELT
);
1507 LOWER(EXTRACT_VECTOR_ELT
);
1508 LOWER(EXTRACT_SUBVECTOR
);
1509 LOWER(SCALAR_TO_VECTOR
);
1510 LOWER(CONCAT_VECTORS
);
1513 LOWER(SIGN_EXTEND_INREG
);
1514 LOWER(DYNAMIC_STACKALLOC
);
1523 AMDILTargetLowering::getVarArgsFrameOffset() const
1525 return VarArgsFrameOffset
;
1530 AMDILTargetLowering::LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const
1533 const GlobalAddressSDNode
*GADN
= cast
<GlobalAddressSDNode
>(Op
);
1534 const GlobalValue
*G
= GADN
->getGlobal();
1535 DebugLoc DL
= Op
.getDebugLoc();
1536 const GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(G
);
1538 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
1540 if (GV
->hasInitializer()) {
1541 const Constant
*C
= dyn_cast
<Constant
>(GV
->getInitializer());
1542 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
)) {
1543 DST
= DAG
.getConstant(CI
->getValue(), Op
.getValueType());
1544 } else if (const ConstantFP
*CF
= dyn_cast
<ConstantFP
>(C
)) {
1545 DST
= DAG
.getConstantFP(CF
->getValueAPF(),
1547 } else if (dyn_cast
<ConstantAggregateZero
>(C
)) {
1548 EVT VT
= Op
.getValueType();
1549 if (VT
.isInteger()) {
1550 DST
= DAG
.getConstant(0, VT
);
1552 DST
= DAG
.getConstantFP(0, VT
);
1555 assert(!"lowering this type of Global Address "
1556 "not implemented yet!");
1558 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
1561 DST
= DAG
.getTargetGlobalAddress(GV
, DL
, MVT::i32
);
1568 AMDILTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const
1570 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
1571 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), MVT::i32
);
1575 AMDILTargetLowering::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) const
1577 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
1578 EVT PtrVT
= Op
.getValueType();
1580 if (CP
->isMachineConstantPoolEntry()) {
1581 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
1582 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
1584 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
1585 CP
->getAlignment(), CP
->getOffset(), CP
->getTargetFlags());
1591 AMDILTargetLowering::LowerExternalSymbol(SDValue Op
, SelectionDAG
&DAG
) const
1593 const char *Sym
= cast
<ExternalSymbolSDNode
>(Op
)->getSymbol();
1594 SDValue Result
= DAG
.getTargetExternalSymbol(Sym
, MVT::i32
);
1598 /// LowerFORMAL_ARGUMENTS - transform physical registers into
1599 /// virtual registers and generate load operations for
1600 /// arguments places on the stack.
1601 /// TODO: isVarArg, hasStructRet, isMemReg
1603 AMDILTargetLowering::LowerFormalArguments(SDValue Chain
,
1604 CallingConv::ID CallConv
,
1606 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1609 SmallVectorImpl
<SDValue
> &InVals
)
1613 MachineFunction
&MF
= DAG
.getMachineFunction();
1614 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1615 //const Function *Fn = MF.getFunction();
1616 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1618 SmallVector
<CCValAssign
, 16> ArgLocs
;
1619 CallingConv::ID CC
= MF
.getFunction()->getCallingConv();
1620 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1622 CCState
CCInfo(CC
, isVarArg
, DAG
.getMachineFunction(),
1623 getTargetMachine(), ArgLocs
, *DAG
.getContext());
1625 // When more calling conventions are added, they need to be chosen here
1626 CCInfo
.AnalyzeFormalArguments(Ins
, CC_AMDIL32
);
1629 //unsigned int FirstStackArgLoc = 0;
1631 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1632 CCValAssign
&VA
= ArgLocs
[i
];
1633 if (VA
.isRegLoc()) {
1634 EVT RegVT
= VA
.getLocVT();
1635 const TargetRegisterClass
*RC
= getRegClassFromType(
1636 RegVT
.getSimpleVT().SimpleTy
);
1638 unsigned int Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
1639 SDValue ArgValue
= DAG
.getCopyFromReg(
1644 // If this is an 8 or 16-bit value, it is really passed
1645 // promoted to 32 bits. Insert an assert[sz]ext to capture
1646 // this, then truncate to the right size.
1648 if (VA
.getLocInfo() == CCValAssign::SExt
) {
1649 ArgValue
= DAG
.getNode(
1654 DAG
.getValueType(VA
.getValVT()));
1655 } else if (VA
.getLocInfo() == CCValAssign::ZExt
) {
1656 ArgValue
= DAG
.getNode(
1661 DAG
.getValueType(VA
.getValVT()));
1663 if (VA
.getLocInfo() != CCValAssign::Full
) {
1664 ArgValue
= DAG
.getNode(
1670 // Add the value to the list of arguments
1671 // to be passed in registers
1672 InVals
.push_back(ArgValue
);
1674 assert(0 && "Variable arguments are not yet supported");
1675 // See MipsISelLowering.cpp for ideas on how to implement
1677 } else if(VA
.isMemLoc()) {
1678 InVals
.push_back(LowerMemArgument(Chain
, CallConv
, Ins
,
1679 dl
, DAG
, VA
, MFI
, i
));
1681 assert(0 && "found a Value Assign that is "
1682 "neither a register or a memory location");
1685 /*if (hasStructRet) {
1686 assert(0 && "Has struct return is not yet implemented");
1687 // See MipsISelLowering.cpp for ideas on how to implement
1691 assert(0 && "Variable arguments are not yet supported");
1692 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1694 // This needs to be changed to non-zero if the return function needs
1698 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1699 /// by "Src" to address "Dst" with size and alignment information specified by
1700 /// the specific parameter attribute. The copy will be passed as a byval
1701 /// function parameter.
1703 CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
, SDValue Chain
,
1704 ISD::ArgFlagsTy Flags
, SelectionDAG
&DAG
) {
1705 assert(0 && "MemCopy does not exist yet");
1706 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), MVT::i32
);
1708 return DAG
.getMemcpy(Chain
,
1710 Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
1711 /*IsVol=*/false, /*AlwaysInline=*/true,
1712 MachinePointerInfo(), MachinePointerInfo());
1716 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain
,
1717 SDValue StackPtr
, SDValue Arg
,
1718 DebugLoc dl
, SelectionDAG
&DAG
,
1719 const CCValAssign
&VA
,
1720 ISD::ArgFlagsTy Flags
) const
1722 unsigned int LocMemOffset
= VA
.getLocMemOffset();
1723 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
1724 PtrOff
= DAG
.getNode(ISD::ADD
,
1726 getPointerTy(), StackPtr
, PtrOff
);
1727 if (Flags
.isByVal()) {
1728 PtrOff
= CreateCopyOfByValArgument(Arg
, PtrOff
, Chain
, Flags
, DAG
);
1730 PtrOff
= DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
1731 MachinePointerInfo::getStack(LocMemOffset
),
1736 /// LowerCAL - functions arguments are copied from virtual
1737 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1738 /// CALLSEQ_END are emitted.
1739 /// TODO: isVarArg, isTailCall, hasStructRet
1741 AMDILTargetLowering::LowerCall(SDValue Chain
, SDValue Callee
,
1742 CallingConv::ID CallConv
, bool isVarArg
, bool doesNotRet
,
1744 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1745 const SmallVectorImpl
<SDValue
> &OutVals
,
1746 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1747 DebugLoc dl
, SelectionDAG
&DAG
,
1748 SmallVectorImpl
<SDValue
> &InVals
)
1752 MachineFunction
& MF
= DAG
.getMachineFunction();
1753 // FIXME: DO we need to handle fast calling conventions and tail call
1754 // optimizations?? X86/PPC ISelLowering
1755 /*bool hasStructRet = (TheCall->getNumArgs())
1756 ? TheCall->getArgFlags(0).device()->isSRet()
1759 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1761 // Analyze operands of the call, assigning locations to each operand
1762 SmallVector
<CCValAssign
, 16> ArgLocs
;
1763 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1764 getTargetMachine(), ArgLocs
, *DAG
.getContext());
1765 // Analyize the calling operands, but need to change
1766 // if we have more than one calling convetion
1767 CCInfo
.AnalyzeCallOperands(Outs
, CCAssignFnForNode(CallConv
));
1769 unsigned int NumBytes
= CCInfo
.getNextStackOffset();
1771 assert(isTailCall
&& "Tail Call not handled yet!");
1772 // See X86/PPC ISelLowering
1775 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
1777 SmallVector
<std::pair
<unsigned int, SDValue
>, 8> RegsToPass
;
1778 SmallVector
<SDValue
, 8> MemOpChains
;
1780 //unsigned int FirstStacArgLoc = 0;
1781 //int LastArgStackLoc = 0;
1783 // Walk the register/memloc assignments, insert copies/loads
1784 for (unsigned int i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1785 CCValAssign
&VA
= ArgLocs
[i
];
1786 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1787 // Arguments start after the 5 first operands of ISD::CALL
1788 SDValue Arg
= OutVals
[i
];
1789 //Promote the value if needed
1790 switch(VA
.getLocInfo()) {
1791 default: assert(0 && "Unknown loc info!");
1792 case CCValAssign::Full
:
1794 case CCValAssign::SExt
:
1795 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
,
1797 VA
.getLocVT(), Arg
);
1799 case CCValAssign::ZExt
:
1800 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
,
1802 VA
.getLocVT(), Arg
);
1804 case CCValAssign::AExt
:
1805 Arg
= DAG
.getNode(ISD::ANY_EXTEND
,
1807 VA
.getLocVT(), Arg
);
1811 if (VA
.isRegLoc()) {
1812 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
1813 } else if (VA
.isMemLoc()) {
1814 // Create the frame index object for this incoming parameter
1815 int FI
= MFI
->CreateFixedObject(VA
.getValVT().getSizeInBits()/8,
1816 VA
.getLocMemOffset(), true);
1817 SDValue PtrOff
= DAG
.getFrameIndex(FI
,getPointerTy());
1819 // emit ISD::STORE whichs stores the
1820 // parameter value to a stack Location
1821 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
1822 MachinePointerInfo::getFixedStack(FI
),
1825 assert(0 && "Not a Reg/Mem Loc, major error!");
1828 if (!MemOpChains
.empty()) {
1829 Chain
= DAG
.getNode(ISD::TokenFactor
,
1833 MemOpChains
.size());
1837 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1838 Chain
= DAG
.getCopyToReg(Chain
,
1840 RegsToPass
[i
].first
,
1841 RegsToPass
[i
].second
,
1843 InFlag
= Chain
.getValue(1);
1847 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1848 // every direct call is) turn it into a TargetGlobalAddress/
1849 // TargetExternalSymbol
1850 // node so that legalize doesn't hack it.
1851 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1852 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
, getPointerTy());
1854 else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1855 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), getPointerTy());
1857 else if (isTailCall
) {
1858 assert(0 && "Tail calls are not handled yet");
1859 // see X86 ISelLowering for ideas on implementation: 1708
1862 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVTGLUE
);
1863 SmallVector
<SDValue
, 8> Ops
;
1866 assert(0 && "Tail calls are not handled yet");
1867 // see X86 ISelLowering for ideas on implementation: 1721
1869 // If this is a direct call, pass the chain and the callee
1870 if (Callee
.getNode()) {
1871 Ops
.push_back(Chain
);
1872 Ops
.push_back(Callee
);
1876 assert(0 && "Tail calls are not handled yet");
1877 // see X86 ISelLowering for ideas on implementation: 1739
1880 // Add argument registers to the end of the list so that they are known
1881 // live into the call
1882 for (unsigned int i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1883 Ops
.push_back(DAG
.getRegister(
1884 RegsToPass
[i
].first
,
1885 RegsToPass
[i
].second
.getValueType()));
1887 if (InFlag
.getNode()) {
1888 Ops
.push_back(InFlag
);
1893 assert(0 && "Tail calls are not handled yet");
1894 // see X86 ISelLowering for ideas on implementation: 1762
1897 Chain
= DAG
.getNode(AMDILISD::CALL
,
1899 NodeTys
, &Ops
[0], Ops
.size());
1900 InFlag
= Chain
.getValue(1);
1902 // Create the CALLSEQ_END node
1903 Chain
= DAG
.getCALLSEQ_END(
1905 DAG
.getIntPtrConstant(NumBytes
, true),
1906 DAG
.getIntPtrConstant(0, true),
1908 InFlag
= Chain
.getValue(1);
1909 // Handle result values, copying them out of physregs into vregs that
1911 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
1916 AMDILTargetLowering::genCLZuN(SDValue Op
, SelectionDAG
&DAG
,
1917 uint32_t bits
) const
1919 DebugLoc DL
= Op
.getDebugLoc();
1920 EVT INTTY
= Op
.getValueType();
1922 if (INTTY
.isVector()) {
1923 FPTY
= EVT(MVT::getVectorVT(MVT::f32
,
1924 INTTY
.getVectorNumElements()));
1926 FPTY
= EVT(MVT::f32
);
1928 /* static inline uint
1931 int xor = 0x3f800000U | x;
1932 float tp = as_float(xor);
1933 float t = tp + -1.0f;
1934 uint tint = as_uint(t);
1936 uint tsrc = tint >> 23;
1937 uint tmask = tsrc & 0xffU;
1938 uint cst = (103 + N)U - tmask;
1939 return cmp ? cst : N;
1942 assert(INTTY
.getScalarType().getSimpleVT().SimpleTy
== MVT::i32
1943 && "genCLZu16 only works on 32bit types");
1946 // xornode = 0x3f800000 | x
1947 SDValue xornode
= DAG
.getNode(ISD::OR
, DL
, INTTY
,
1948 DAG
.getConstant(0x3f800000, INTTY
), x
);
1949 // float tp = as_float(xornode)
1950 SDValue tp
= DAG
.getNode(ISDBITCAST
, DL
, FPTY
, xornode
);
1951 // float t = tp + -1.0f
1952 SDValue t
= DAG
.getNode(ISD::FADD
, DL
, FPTY
, tp
,
1953 DAG
.getConstantFP(-1.0f
, FPTY
));
1954 // uint tint = as_uint(t)
1955 SDValue tint
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, t
);
1956 // int cmp = (x != 0)
1957 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
1958 DAG
.getConstant(CondCCodeToCC(ISD::SETNE
, MVT::i32
), MVT::i32
), x
,
1959 DAG
.getConstant(0, INTTY
));
1960 // uint tsrc = tint >> 23
1961 SDValue tsrc
= DAG
.getNode(ISD::SRL
, DL
, INTTY
, tint
,
1962 DAG
.getConstant(23, INTTY
));
1963 // uint tmask = tsrc & 0xFF
1964 SDValue tmask
= DAG
.getNode(ISD::AND
, DL
, INTTY
, tsrc
,
1965 DAG
.getConstant(0xFFU
, INTTY
));
1966 // uint cst = (103 + bits) - tmask
1967 SDValue cst
= DAG
.getNode(ISD::SUB
, DL
, INTTY
,
1968 DAG
.getConstant((103U + bits
), INTTY
), tmask
);
1969 // return cmp ? cst : N
1970 cst
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
, cst
,
1971 DAG
.getConstant(bits
, INTTY
));
1976 AMDILTargetLowering::genCLZu32(SDValue Op
, SelectionDAG
&DAG
) const
1978 SDValue DST
= SDValue();
1979 DebugLoc DL
= Op
.getDebugLoc();
1980 EVT INTTY
= Op
.getValueType();
1981 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
1982 if (STM
.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX
) {
1983 //__clz_32bit(uint u)
1985 // int z = __amdil_ffb_hi(u) ;
1986 // return z < 0 ? 32 : z;
1990 // int z = __amdil_ffb_hi(u)
1991 SDValue z
= DAG
.getNode(AMDILISD::IFFB_HI
, DL
, INTTY
, u
);
1993 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
1994 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
1995 z
, DAG
.getConstant(0, INTTY
));
1996 // return cmp ? 32 : z
1997 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
,
1998 DAG
.getConstant(32, INTTY
), z
);
1999 } else if (STM
.device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
2000 // static inline uint
2001 //__clz_32bit(uint x)
2003 // uint zh = __clz_16bit(x >> 16);
2004 // uint zl = __clz_16bit(x & 0xffffU);
2005 // return zh == 16U ? 16U + zl : zh;
2009 // uint xs16 = x >> 16
2010 SDValue xs16
= DAG
.getNode(ISD::SRL
, DL
, INTTY
, x
,
2011 DAG
.getConstant(16, INTTY
));
2012 // uint zh = __clz_16bit(xs16)
2013 SDValue zh
= genCLZuN(xs16
, DAG
, 16);
2014 // uint xa16 = x & 0xFFFF
2015 SDValue xa16
= DAG
.getNode(ISD::AND
, DL
, INTTY
, x
,
2016 DAG
.getConstant(0xFFFFU
, INTTY
));
2017 // uint zl = __clz_16bit(xa16)
2018 SDValue zl
= genCLZuN(xa16
, DAG
, 16);
2019 // uint cmp = zh == 16U
2020 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2021 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2022 zh
, DAG
.getConstant(16U, INTTY
));
2023 // uint zl16 = zl + 16
2024 SDValue zl16
= DAG
.getNode(ISD::ADD
, DL
, INTTY
,
2025 DAG
.getConstant(16, INTTY
), zl
);
2026 // return cmp ? zl16 : zh
2027 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
,
2030 assert(0 && "Attempting to generate a CLZ function with an"
2031 " unknown graphics card");
2036 AMDILTargetLowering::genCLZu64(SDValue Op
, SelectionDAG
&DAG
) const
2038 SDValue DST
= SDValue();
2039 DebugLoc DL
= Op
.getDebugLoc();
2041 EVT LONGTY
= Op
.getValueType();
2042 bool isVec
= LONGTY
.isVector();
2044 INTTY
= EVT(MVT::getVectorVT(MVT::i32
, Op
.getValueType()
2045 .getVectorNumElements()));
2047 INTTY
= EVT(MVT::i32
);
2049 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
2050 if (STM
.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX
) {
2052 // static inline uint
2053 // __clz_u64(ulong x)
2055 //uint zhi = __clz_32bit((uint)(x >> 32));
2056 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2057 //return zhi == 32U ? 32U + zlo : zhi;
2061 // uint xhi = x >> 32
2062 SDValue xlo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, x
);
2063 // uint xlo = x & 0xFFFFFFFF
2064 SDValue xhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, x
);
2065 // uint zhi = __clz_32bit(xhi)
2066 SDValue zhi
= genCLZu32(xhi
, DAG
);
2067 // uint zlo = __clz_32bit(xlo)
2068 SDValue zlo
= genCLZu32(xlo
, DAG
);
2069 // uint cmp = zhi == 32
2070 SDValue cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2071 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2072 zhi
, DAG
.getConstant(32U, INTTY
));
2073 // uint zlop32 = 32 + zlo
2074 SDValue zlop32
= DAG
.getNode(AMDILISD::ADD
, DL
, INTTY
,
2075 DAG
.getConstant(32U, INTTY
), zlo
);
2076 // return cmp ? zlop32: zhi
2077 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp
, zlop32
, zhi
);
2078 } else if (STM
.device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
2080 // static inline uint
2081 //__clz_64bit(ulong x)
2083 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2084 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2085 //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2086 //uint r = zh == 18U ? 18U + zm : zh;
2087 //return zh + zm == 41U ? 41U + zl : r;
2091 // ulong xs46 = x >> 46
2092 SDValue xs46
= DAG
.getNode(ISD::SRL
, DL
, LONGTY
, x
,
2093 DAG
.getConstant(46, LONGTY
));
2094 // uint ixs46 = (uint)xs46
2095 SDValue ixs46
= DAG
.getNode(ISD::TRUNCATE
, DL
, INTTY
, xs46
);
2096 // ulong xs23 = x >> 23
2097 SDValue xs23
= DAG
.getNode(ISD::SRL
, DL
, LONGTY
, x
,
2098 DAG
.getConstant(23, LONGTY
));
2099 // uint ixs23 = (uint)xs23
2100 SDValue ixs23
= DAG
.getNode(ISD::TRUNCATE
, DL
, INTTY
, xs23
);
2101 // uint xs23m23 = ixs23 & 0x7FFFFF
2102 SDValue xs23m23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, ixs23
,
2103 DAG
.getConstant(0x7fffffU
, INTTY
));
2104 // uint ix = (uint)x
2105 SDValue ix
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, x
);
2106 // uint xm23 = ix & 0x7FFFFF
2107 SDValue xm23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, ix
,
2108 DAG
.getConstant(0x7fffffU
, INTTY
));
2109 // uint zh = __clz_23bit(ixs46)
2110 SDValue zh
= genCLZuN(ixs46
, DAG
, 23);
2111 // uint zm = __clz_23bit(xs23m23)
2112 SDValue zm
= genCLZuN(xs23m23
, DAG
, 23);
2113 // uint zl = __clz_23bit(xm23)
2114 SDValue zl
= genCLZuN(xm23
, DAG
, 23);
2115 // uint zhm5 = zh - 5
2116 SDValue zhm5
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zh
,
2117 DAG
.getConstant(-5U, INTTY
));
2118 SDValue const18
= DAG
.getConstant(18, INTTY
);
2119 SDValue const41
= DAG
.getConstant(41, INTTY
);
2120 // uint cmp1 = zh = 18
2121 SDValue cmp1
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2122 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2124 // uint zhm5zm = zhm5 + zh
2125 SDValue zhm5zm
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zhm5
, zm
);
2126 // uint cmp2 = zhm5zm == 41
2127 SDValue cmp2
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2128 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
2130 // uint zmp18 = zhm5 + 18
2131 SDValue zmp18
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zm
, const18
);
2132 // uint zlp41 = zl + 41
2133 SDValue zlp41
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, zl
, const41
);
2134 // uint r = cmp1 ? zmp18 : zh
2135 SDValue r
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
,
2137 // return cmp2 ? zlp41 : r
2138 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, cmp2
, zlp41
, r
);
2140 assert(0 && "Attempting to generate a CLZ function with an"
2141 " unknown graphics card");
2146 AMDILTargetLowering::genf64toi64(SDValue RHS
, SelectionDAG
&DAG
,
2147 bool includeSign
) const
2152 DebugLoc DL
= RHS
.getDebugLoc();
2153 EVT RHSVT
= RHS
.getValueType();
2154 bool isVec
= RHSVT
.isVector();
2156 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
, RHSVT
2157 .getVectorNumElements()));
2158 INTVT
= EVT(MVT::getVectorVT(MVT::i32
, RHSVT
2159 .getVectorNumElements()));
2161 LONGVT
= EVT(MVT::i64
);
2162 INTVT
= EVT(MVT::i32
);
2164 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
2165 if (STM
.device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2166 // unsigned version:
2167 // uint uhi = (uint)(d * 0x1.0p-32);
2168 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2169 // return as_ulong2((uint2)(ulo, uhi));
2172 // double ad = fabs(d);
2173 // long l = unsigned_version(ad);
2175 // return d == ad ? l : nl;
2178 d
= DAG
.getNode(ISD::FABS
, DL
, RHSVT
, d
);
2180 SDValue uhid
= DAG
.getNode(ISD::FMUL
, DL
, RHSVT
, d
,
2181 DAG
.getConstantFP(0x2f800000, RHSVT
));
2182 SDValue uhi
= DAG
.getNode(ISD::FP_TO_UINT
, DL
, INTVT
, uhid
);
2183 SDValue ulod
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, RHSVT
, uhi
);
2184 ulod
= DAG
.getNode(AMDILISD::MAD
, DL
, RHSVT
, ulod
,
2185 DAG
.getConstantFP(0xcf800000, RHSVT
), d
);
2186 SDValue ulo
= DAG
.getNode(ISD::FP_TO_UINT
, DL
, INTVT
, ulod
);
2187 SDValue l
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, ulo
, uhi
);
2189 SDValue nl
= DAG
.getNode(AMDILISD::INEGATE
, DL
, LONGVT
, l
);
2190 SDValue c
= DAG
.getNode(AMDILISD::CMP
, DL
, RHSVT
,
2191 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::f64
), MVT::i32
),
2193 l
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, LONGVT
, c
, l
, nl
);
2198 __attribute__((always_inline)) long
2199 cast_f64_to_i64(double d)
2201 // Convert d in to 32-bit components
2202 long x = as_long(d);
2206 // Generate 'normalized' mantissa
2207 mhi = xhi | 0x00100000; // hidden bit
2209 temp = xlo >> (32 - 11);
2213 // Compute shift right count from exponent
2214 e = (xhi >> (52-32)) & 0x7ff;
2219 // Compute result for 0 <= sr < 32
2220 rhi0 = mhi >> (sr &31);
2221 rlo0 = mlo >> (sr &31);
2222 temp = mhi << (32 - sr);
2224 rlo0 = sr ? temp : rlo0;
2226 // Compute result for 32 <= sr
2228 rlo1 = srge64 ? 0 : rhi0;
2230 // Pick between the 2 results
2231 rhi = srge32 ? rhi1 : rhi0;
2232 rlo = srge32 ? rlo1 : rlo0;
2234 // Optional saturate on overflow
2236 rhi = srlt0 ? MAXVALUE : rhi;
2237 rlo = srlt0 ? MAXVALUE : rlo;
2240 res = LCREATE( rlo, rhi );
2242 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2244 sign = ((signed int) xhi) >> 31; fill with sign bit
2245 sign = LCREATE( sign, sign );
2253 SDValue c11
= DAG
.getConstant( 63 - 52, INTVT
);
2254 SDValue c32
= DAG
.getConstant( 32, INTVT
);
2256 // Convert d in to 32-bit components
2258 SDValue x
= DAG
.getNode(ISDBITCAST
, DL
, LONGVT
, d
);
2259 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2260 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2262 // Generate 'normalized' mantissa
2263 SDValue mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
,
2264 xhi
, DAG
.getConstant( 0x00100000, INTVT
) );
2265 mhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, c11
);
2266 SDValue temp
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2267 xlo
, DAG
.getConstant( 32 - (63 - 52), INTVT
) );
2268 mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, mhi
, temp
);
2269 SDValue mlo
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, c11
);
2271 // Compute shift right count from exponent
2272 SDValue e
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2273 xhi
, DAG
.getConstant( 52-32, INTVT
) );
2274 e
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2275 e
, DAG
.getConstant( 0x7ff, INTVT
) );
2276 SDValue sr
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2277 DAG
.getConstant( 1023 + 63, INTVT
), e
);
2278 SDValue srge64
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2279 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
2280 sr
, DAG
.getConstant(64, INTVT
));
2281 SDValue srge32
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2282 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
2283 sr
, DAG
.getConstant(32, INTVT
));
2285 // Compute result for 0 <= sr < 32
2286 SDValue rhi0
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mhi
, sr
);
2287 SDValue rlo0
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mlo
, sr
);
2288 temp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
, c32
, sr
);
2289 temp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, temp
);
2290 temp
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rlo0
, temp
);
2291 rlo0
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, sr
, temp
, rlo0
);
2293 // Compute result for 32 <= sr
2294 SDValue rhi1
= DAG
.getConstant( 0, INTVT
);
2295 SDValue rlo1
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2296 srge64
, rhi1
, rhi0
);
2298 // Pick between the 2 results
2299 SDValue rhi
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2300 srge32
, rhi1
, rhi0
);
2301 SDValue rlo
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2302 srge32
, rlo1
, rlo0
);
2305 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
2307 // Deal with sign bit
2309 SDValue sign
= DAG
.getNode( ISD::SRA
, DL
, INTVT
,
2310 xhi
, DAG
.getConstant( 31, INTVT
) );
2311 sign
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, sign
, sign
);
2312 res
= DAG
.getNode( ISD::ADD
, DL
, LONGVT
, res
, sign
);
2313 res
= DAG
.getNode( ISD::XOR
, DL
, LONGVT
, res
, sign
);
2320 AMDILTargetLowering::genf64toi32(SDValue RHS
, SelectionDAG
&DAG
,
2321 bool includeSign
) const
2325 DebugLoc DL
= RHS
.getDebugLoc();
2326 EVT RHSVT
= RHS
.getValueType();
2327 bool isVec
= RHSVT
.isVector();
2329 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
2330 RHSVT
.getVectorNumElements()));
2331 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
2332 RHSVT
.getVectorNumElements()));
2334 LONGVT
= EVT(MVT::i64
);
2335 INTVT
= EVT(MVT::i32
);
2338 __attribute__((always_inline)) int
2339 cast_f64_to_[u|i]32(double d)
2341 // Convert d in to 32-bit components
2342 long x = as_long(d);
2346 // Generate 'normalized' mantissa
2347 mhi = xhi | 0x00100000; // hidden bit
2349 temp = xlo >> (32 - 11);
2352 // Compute shift right count from exponent
2353 e = (xhi >> (52-32)) & 0x7ff;
2357 // Compute result for 0 <= sr < 32
2358 res = mhi >> (sr &31);
2359 res = srge32 ? 0 : res;
2361 // Optional saturate on overflow
2363 res = srlt0 ? MAXVALUE : res;
2365 // Deal with sign bit (ignoring whether result is signed or unsigned value)
2367 sign = ((signed int) xhi) >> 31; fill with sign bit
2375 SDValue c11
= DAG
.getConstant( 63 - 52, INTVT
);
2377 // Convert d in to 32-bit components
2379 SDValue x
= DAG
.getNode(ISDBITCAST
, DL
, LONGVT
, d
);
2380 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2381 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2383 // Generate 'normalized' mantissa
2384 SDValue mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
,
2385 xhi
, DAG
.getConstant( 0x00100000, INTVT
) );
2386 mhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, mhi
, c11
);
2387 SDValue temp
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2388 xlo
, DAG
.getConstant( 32 - (63 - 52), INTVT
) );
2389 mhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, mhi
, temp
);
2391 // Compute shift right count from exponent
2392 SDValue e
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2393 xhi
, DAG
.getConstant( 52-32, INTVT
) );
2394 e
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2395 e
, DAG
.getConstant( 0x7ff, INTVT
) );
2396 SDValue sr
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2397 DAG
.getConstant( 1023 + 31, INTVT
), e
);
2398 SDValue srge32
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2399 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
2400 sr
, DAG
.getConstant(32, INTVT
));
2402 // Compute result for 0 <= sr < 32
2403 SDValue res
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, mhi
, sr
);
2404 res
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2405 srge32
, DAG
.getConstant(0,INTVT
), res
);
2407 // Deal with sign bit
2409 SDValue sign
= DAG
.getNode( ISD::SRA
, DL
, INTVT
,
2410 xhi
, DAG
.getConstant( 31, INTVT
) );
2411 res
= DAG
.getNode( ISD::ADD
, DL
, INTVT
, res
, sign
);
2412 res
= DAG
.getNode( ISD::XOR
, DL
, INTVT
, res
, sign
);
2418 AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op
, SelectionDAG
&DAG
) const
2421 SDValue RHS
= Op
.getOperand(0);
2422 EVT RHSVT
= RHS
.getValueType();
2423 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
2424 EVT LHSVT
= Op
.getValueType();
2425 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
2426 DebugLoc DL
= Op
.getDebugLoc();
2427 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
2428 if (RST
== MVT::f64
&& RHSVT
.isVector()
2429 && STM
.device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2430 // We dont support vector 64bit floating point convertions.
2431 for (unsigned x
= 0, y
= RHSVT
.getVectorNumElements(); x
< y
; ++x
) {
2432 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2433 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
2434 op
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, LST
, op
);
2436 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
2438 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
,
2439 DST
, op
, DAG
.getTargetConstant(x
, MVT::i32
));
2445 && LST
== MVT::i32
) {
2446 if (STM
.device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2447 DST
= SDValue(Op
.getNode(), 0);
2449 DST
= genf64toi32(RHS
, DAG
, false);
2451 } else if (RST
== MVT::f64
2452 && LST
== MVT::i64
) {
2453 DST
= genf64toi64(RHS
, DAG
, false);
2454 } else if (RST
== MVT::f64
2455 && (LST
== MVT::i8
|| LST
== MVT::i16
)) {
2456 if (STM
.device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2457 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, SDValue(Op
.getNode(), 0));
2459 SDValue ToInt
= genf64toi32(RHS
, DAG
, false);
2460 DST
= DAG
.getNode(ISD::TRUNCATE
, DL
, LHSVT
, ToInt
);
2464 DST
= SDValue(Op
.getNode(), 0);
2470 AMDILTargetLowering::genu32tof64(SDValue RHS
, EVT LHSVT
,
2471 SelectionDAG
&DAG
) const
2473 EVT RHSVT
= RHS
.getValueType();
2474 DebugLoc DL
= RHS
.getDebugLoc();
2477 bool isVec
= RHSVT
.isVector();
2479 LONGVT
= EVT(MVT::getVectorVT(MVT::i64
,
2480 RHSVT
.getVectorNumElements()));
2481 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
2482 RHSVT
.getVectorNumElements()));
2484 LONGVT
= EVT(MVT::i64
);
2485 INTVT
= EVT(MVT::i32
);
2488 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
2489 if (STM
.calVersion() >= CAL_VERSION_SC_135
) {
2490 // unsigned x = RHS;
2491 // ulong xd = (ulong)(0x4330_0000 << 32) | x;
2492 // double d = as_double( xd );
2493 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
2494 SDValue xd
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, x
,
2495 DAG
.getConstant( 0x43300000, INTVT
) );
2496 SDValue d
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xd
);
2497 SDValue offsetd
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
,
2498 DAG
.getConstant( 0x4330000000000000ULL
, LONGVT
) );
2499 return DAG
.getNode( ISD::FSUB
, DL
, LHSVT
, d
, offsetd
);
2501 SDValue clz
= genCLZu32(x
, DAG
);
2503 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
2504 // Except for an input 0... which requires a 0 exponent
2505 SDValue exp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2506 DAG
.getConstant( (1023+31), INTVT
), clz
);
2507 exp
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, x
, exp
, x
);
2510 SDValue rhi
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, x
, clz
);
2512 // Eliminate hidden bit
2513 rhi
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2514 rhi
, DAG
.getConstant( 0x7fffffff, INTVT
) );
2516 // Pack exponent and frac
2517 SDValue rlo
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2518 rhi
, DAG
.getConstant( (32 - 11), INTVT
) );
2519 rhi
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2520 rhi
, DAG
.getConstant( 11, INTVT
) );
2521 exp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2522 exp
, DAG
.getConstant( 20, INTVT
) );
2523 rhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rhi
, exp
);
2525 // Convert 2 x 32 in to 1 x 64, then to double precision float type
2526 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
2527 return DAG
.getNode(ISDBITCAST
, DL
, LHSVT
, res
);
2531 AMDILTargetLowering::genu64tof64(SDValue RHS
, EVT LHSVT
,
2532 SelectionDAG
&DAG
) const
2534 EVT RHSVT
= RHS
.getValueType();
2535 DebugLoc DL
= RHS
.getDebugLoc();
2538 bool isVec
= RHSVT
.isVector();
2540 INTVT
= EVT(MVT::getVectorVT(MVT::i32
,
2541 RHSVT
.getVectorNumElements()));
2543 INTVT
= EVT(MVT::i32
);
2547 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
2548 if (STM
.device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2549 // double dhi = (double)(as_uint2(x).y);
2550 // double dlo = (double)(as_uint2(x).x);
2551 // return mad(dhi, 0x1.0p+32, dlo)
2552 SDValue dhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2553 dhi
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LHSVT
, dhi
);
2554 SDValue dlo
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2555 dlo
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LHSVT
, dlo
);
2556 return DAG
.getNode(AMDILISD::MAD
, DL
, LHSVT
, dhi
,
2557 DAG
.getConstantFP(0x4f800000, LHSVT
), dlo
);
2558 } else if (STM
.calVersion() >= CAL_VERSION_SC_135
) {
2559 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
2560 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
2561 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
2562 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
); // x & 0xffff_ffffUL
2563 SDValue xd
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, xlo
, DAG
.getConstant( 0x43300000, INTVT
) );
2564 SDValue lo
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xd
);
2565 SDValue xhi
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
); // x >> 32
2566 SDValue xe
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, xhi
, DAG
.getConstant( 0x45300000, INTVT
) );
2567 SDValue hi
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
, xe
);
2568 SDValue c
= DAG
.getNode( ISDBITCAST
, DL
, LHSVT
,
2569 DAG
.getConstant( 0x4530000000100000ULL
, LONGVT
) );
2570 hi
= DAG
.getNode( ISD::FSUB
, DL
, LHSVT
, hi
, c
);
2571 return DAG
.getNode( ISD::FADD
, DL
, LHSVT
, hi
, lo
);
2574 SDValue clz
= genCLZu64(x
, DAG
);
2575 SDValue xhi
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTVT
, x
);
2576 SDValue xlo
= DAG
.getNode( (isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTVT
, x
);
2578 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
2579 SDValue exp
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2580 DAG
.getConstant( (1023+63), INTVT
), clz
);
2581 SDValue mash
= DAG
.getNode( ISD::OR
, DL
, INTVT
, xhi
, xlo
);
2582 exp
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2583 mash
, exp
, mash
); // exp = exp, or 0 if input was 0
2586 SDValue clz31
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2587 clz
, DAG
.getConstant( 31, INTVT
) );
2588 SDValue rshift
= DAG
.getNode( ISD::SUB
, DL
, INTVT
,
2589 DAG
.getConstant( 32, INTVT
), clz31
);
2590 SDValue t1
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xhi
, clz31
);
2591 SDValue t2
= DAG
.getNode( ISD::SRL
, DL
, INTVT
, xlo
, rshift
);
2592 t2
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
, clz31
, t2
, t1
);
2593 SDValue rhi1
= DAG
.getNode( ISD::OR
, DL
, INTVT
, t1
, t2
);
2594 SDValue rlo1
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, clz31
);
2595 SDValue rhi2
= DAG
.getNode( ISD::SHL
, DL
, INTVT
, xlo
, clz31
);
2596 SDValue rlo2
= DAG
.getConstant( 0, INTVT
);
2597 SDValue clz32
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2598 clz
, DAG
.getConstant( 32, INTVT
) );
2599 SDValue rhi
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2600 clz32
, rhi2
, rhi1
);
2601 SDValue rlo
= DAG
.getNode( AMDILISD::CMOVLOG
, DL
, INTVT
,
2602 clz32
, rlo2
, rlo1
);
2604 // Eliminate hidden bit
2605 rhi
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2606 rhi
, DAG
.getConstant( 0x7fffffff, INTVT
) );
2608 // Save bits needed to round properly
2609 SDValue round
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2610 rlo
, DAG
.getConstant( 0x7ff, INTVT
) );
2612 // Pack exponent and frac
2613 rlo
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2614 rlo
, DAG
.getConstant( 11, INTVT
) );
2615 SDValue temp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2616 rhi
, DAG
.getConstant( (32 - 11), INTVT
) );
2617 rlo
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rlo
, temp
);
2618 rhi
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2619 rhi
, DAG
.getConstant( 11, INTVT
) );
2620 exp
= DAG
.getNode( ISD::SHL
, DL
, INTVT
,
2621 exp
, DAG
.getConstant( 20, INTVT
) );
2622 rhi
= DAG
.getNode( ISD::OR
, DL
, INTVT
, rhi
, exp
);
2624 // Compute rounding bit
2625 SDValue even
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2626 rlo
, DAG
.getConstant( 1, INTVT
) );
2627 SDValue grs
= DAG
.getNode( ISD::AND
, DL
, INTVT
,
2628 round
, DAG
.getConstant( 0x3ff, INTVT
) );
2629 grs
= DAG
.getNode( AMDILISD::CMP
, DL
, INTVT
,
2630 DAG
.getConstant( CondCCodeToCC( ISD::SETNE
, MVT::i32
), MVT::i32
),
2631 grs
, DAG
.getConstant( 0, INTVT
) ); // -1 if any GRS set, 0 if none
2632 grs
= DAG
.getNode( ISD::OR
, DL
, INTVT
, grs
, even
);
2633 round
= DAG
.getNode( ISD::SRL
, DL
, INTVT
,
2634 round
, DAG
.getConstant( 10, INTVT
) );
2635 round
= DAG
.getNode( ISD::AND
, DL
, INTVT
, round
, grs
); // 0 or 1
2638 SDValue lround
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
,
2639 round
, DAG
.getConstant( 0, INTVT
) );
2640 SDValue res
= DAG
.getNode( (isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, LONGVT
, rlo
, rhi
);
2641 res
= DAG
.getNode( ISD::ADD
, DL
, LONGVT
, res
, lround
);
2642 return DAG
.getNode(ISDBITCAST
, DL
, LHSVT
, res
);
2646 AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) const
2648 SDValue RHS
= Op
.getOperand(0);
2649 EVT RHSVT
= RHS
.getValueType();
2650 MVT RST
= RHSVT
.getScalarType().getSimpleVT();
2651 EVT LHSVT
= Op
.getValueType();
2652 MVT LST
= LHSVT
.getScalarType().getSimpleVT();
2653 DebugLoc DL
= Op
.getDebugLoc();
2657 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
2658 if (LST
== MVT::f64
&& LHSVT
.isVector()
2659 && STM
.device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2660 // We dont support vector 64bit floating point convertions.
2662 for (unsigned x
= 0, y
= LHSVT
.getVectorNumElements(); x
< y
; ++x
) {
2663 SDValue op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2664 DL
, RST
, RHS
, DAG
.getTargetConstant(x
, MVT::i32
));
2665 op
= DAG
.getNode(ISD::UINT_TO_FP
, DL
, LST
, op
);
2667 DST
= DAG
.getNode(AMDILISD::VBUILD
, DL
, LHSVT
, op
);
2669 DST
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, LHSVT
, DST
,
2670 op
, DAG
.getTargetConstant(x
, MVT::i32
));
2677 && LST
== MVT::f64
) {
2678 if (STM
.device()->getGeneration() > AMDILDeviceInfo::HD6XXX
) {
2679 DST
= SDValue(Op
.getNode(), 0);
2681 DST
= genu32tof64(RHS
, LHSVT
, DAG
);
2683 } else if (RST
== MVT::i64
2684 && LST
== MVT::f64
) {
2685 DST
= genu64tof64(RHS
, LHSVT
, DAG
);
2687 DST
= SDValue(Op
.getNode(), 0);
2694 AMDILTargetLowering::LowerSUB(SDValue Op
, SelectionDAG
&DAG
) const
2696 SDValue LHS
= Op
.getOperand(0);
2697 SDValue RHS
= Op
.getOperand(1);
2698 DebugLoc DL
= Op
.getDebugLoc();
2699 EVT OVT
= Op
.getValueType();
2701 bool isVec
= RHS
.getValueType().isVector();
2702 if (OVT
.getScalarType() == MVT::i64
) {
2703 MVT INTTY
= MVT::i32
;
2704 if (OVT
== MVT::v2i64
) {
2707 SDValue LHSLO
, LHSHI
, RHSLO
, RHSHI
, INTLO
, INTHI
;
2708 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2709 LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, LHS
);
2710 RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
, DL
, INTTY
, RHS
);
2711 LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, LHS
);
2712 RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
, DL
, INTTY
, RHS
);
2713 INTLO
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, LHSLO
, RHSLO
);
2714 INTHI
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, LHSHI
, RHSHI
);
2715 //TODO: need to use IBORROW on HD5XXX and later hardware
2717 if (OVT
== MVT::i64
) {
2718 cmp
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
2719 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
2724 SDValue LHSRLO
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2725 DL
, MVT::i32
, LHSLO
, DAG
.getTargetConstant(0, MVT::i32
));
2726 SDValue LHSRHI
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2727 DL
, MVT::i32
, LHSLO
, DAG
.getTargetConstant(1, MVT::i32
));
2728 SDValue RHSRLO
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2729 DL
, MVT::i32
, RHSLO
, DAG
.getTargetConstant(0, MVT::i32
));
2730 SDValue RHSRHI
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
2731 DL
, MVT::i32
, RHSLO
, DAG
.getTargetConstant(1, MVT::i32
));
2732 cmplo
= DAG
.getNode(AMDILISD::CMP
, DL
, MVT::i32
,
2733 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
2735 cmphi
= DAG
.getNode(AMDILISD::CMP
, DL
, MVT::i32
,
2736 DAG
.getConstant(CondCCodeToCC(ISD::SETULT
, MVT::i32
), MVT::i32
),
2738 cmp
= DAG
.getNode(AMDILISD::VBUILD
, DL
, MVT::v2i32
, cmplo
);
2739 cmp
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, MVT::v2i32
,
2740 cmp
, cmphi
, DAG
.getTargetConstant(1, MVT::i32
));
2742 INTHI
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, INTHI
, cmp
);
2743 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
, DL
, OVT
,
2746 DST
= SDValue(Op
.getNode(), 0);
2751 AMDILTargetLowering::LowerFDIV(SDValue Op
, SelectionDAG
&DAG
) const
2753 EVT OVT
= Op
.getValueType();
2755 if (OVT
.getScalarType() == MVT::f64
) {
2756 DST
= LowerFDIV64(Op
, DAG
);
2757 } else if (OVT
.getScalarType() == MVT::f32
) {
2758 DST
= LowerFDIV32(Op
, DAG
);
2760 DST
= SDValue(Op
.getNode(), 0);
2766 AMDILTargetLowering::LowerSDIV(SDValue Op
, SelectionDAG
&DAG
) const
2768 EVT OVT
= Op
.getValueType();
2770 if (OVT
.getScalarType() == MVT::i64
) {
2771 DST
= LowerSDIV64(Op
, DAG
);
2772 } else if (OVT
.getScalarType() == MVT::i32
) {
2773 DST
= LowerSDIV32(Op
, DAG
);
2774 } else if (OVT
.getScalarType() == MVT::i16
2775 || OVT
.getScalarType() == MVT::i8
) {
2776 DST
= LowerSDIV24(Op
, DAG
);
2778 DST
= SDValue(Op
.getNode(), 0);
2784 AMDILTargetLowering::LowerSREM(SDValue Op
, SelectionDAG
&DAG
) const
2786 EVT OVT
= Op
.getValueType();
2788 if (OVT
.getScalarType() == MVT::i64
) {
2789 DST
= LowerSREM64(Op
, DAG
);
2790 } else if (OVT
.getScalarType() == MVT::i32
) {
2791 DST
= LowerSREM32(Op
, DAG
);
2792 } else if (OVT
.getScalarType() == MVT::i16
) {
2793 DST
= LowerSREM16(Op
, DAG
);
2794 } else if (OVT
.getScalarType() == MVT::i8
) {
2795 DST
= LowerSREM8(Op
, DAG
);
2797 DST
= SDValue(Op
.getNode(), 0);
2803 AMDILTargetLowering::LowerUREM(SDValue Op
, SelectionDAG
&DAG
) const
2805 EVT OVT
= Op
.getValueType();
2807 if (OVT
.getScalarType() == MVT::i64
) {
2808 DST
= LowerUREM64(Op
, DAG
);
2809 } else if (OVT
.getScalarType() == MVT::i32
) {
2810 DST
= LowerUREM32(Op
, DAG
);
2811 } else if (OVT
.getScalarType() == MVT::i16
) {
2812 DST
= LowerUREM16(Op
, DAG
);
2813 } else if (OVT
.getScalarType() == MVT::i8
) {
2814 DST
= LowerUREM8(Op
, DAG
);
2816 DST
= SDValue(Op
.getNode(), 0);
2822 AMDILTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const
2824 DebugLoc DL
= Op
.getDebugLoc();
2825 EVT OVT
= Op
.getValueType();
2827 bool isVec
= OVT
.isVector();
2828 if (OVT
.getScalarType() != MVT::i64
)
2830 DST
= SDValue(Op
.getNode(), 0);
2832 assert(OVT
.getScalarType() == MVT::i64
&& "Only 64 bit mul should be lowered!");
2833 // TODO: This needs to be turned into a tablegen pattern
2834 SDValue LHS
= Op
.getOperand(0);
2835 SDValue RHS
= Op
.getOperand(1);
2837 MVT INTTY
= MVT::i32
;
2838 if (OVT
== MVT::v2i64
) {
2841 // mul64(h1, l1, h0, l0)
2842 SDValue LHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
,
2845 SDValue LHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
,
2848 SDValue RHSLO
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPLO2
: AMDILISD::LCOMPLO
,
2851 SDValue RHSHI
= DAG
.getNode((isVec
) ? AMDILISD::LCOMPHI2
: AMDILISD::LCOMPHI
,
2854 // MULLO_UINT_1 r1, h0, l1
2855 SDValue RHILLO
= DAG
.getNode(AMDILISD::UMUL
,
2857 INTTY
, RHSHI
, LHSLO
);
2858 // MULLO_UINT_1 r2, h1, l0
2859 SDValue RLOHHI
= DAG
.getNode(AMDILISD::UMUL
,
2861 INTTY
, RHSLO
, LHSHI
);
2862 // ADD_INT hr, r1, r2
2863 SDValue ADDHI
= DAG
.getNode(ISD::ADD
,
2865 INTTY
, RHILLO
, RLOHHI
);
2866 // MULHI_UINT_1 r3, l1, l0
2867 SDValue RLOLLO
= DAG
.getNode(ISD::MULHU
,
2869 INTTY
, RHSLO
, LHSLO
);
2870 // ADD_INT hr, hr, r3
2871 SDValue HIGH
= DAG
.getNode(ISD::ADD
,
2873 INTTY
, ADDHI
, RLOLLO
);
2874 // MULLO_UINT_1 l3, l1, l0
2875 SDValue LOW
= DAG
.getNode(AMDILISD::UMUL
,
2877 INTTY
, LHSLO
, RHSLO
);
2878 DST
= DAG
.getNode((isVec
) ? AMDILISD::LCREATE2
: AMDILISD::LCREATE
,
2885 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op
, SelectionDAG
&DAG
) const
2887 EVT VT
= Op
.getValueType();
2892 DebugLoc DL
= Op
.getDebugLoc();
2893 Nodes1
= DAG
.getNode(AMDILISD::VBUILD
,
2895 VT
, Op
.getOperand(0));
2897 bool allEqual
= true;
2898 for (unsigned x
= 1, y
= Op
.getNumOperands(); x
< y
; ++x
) {
2899 if (Op
.getOperand(0) != Op
.getOperand(x
)) {
2908 switch(Op
.getNumOperands()) {
2913 fourth
= Op
.getOperand(3);
2914 if (fourth
.getOpcode() != ISD::UNDEF
) {
2915 Nodes1
= DAG
.getNode(
2916 ISD::INSERT_VECTOR_ELT
,
2921 DAG
.getConstant(7, MVT::i32
));
2924 third
= Op
.getOperand(2);
2925 if (third
.getOpcode() != ISD::UNDEF
) {
2926 Nodes1
= DAG
.getNode(
2927 ISD::INSERT_VECTOR_ELT
,
2932 DAG
.getConstant(6, MVT::i32
));
2935 second
= Op
.getOperand(1);
2936 if (second
.getOpcode() != ISD::UNDEF
) {
2937 Nodes1
= DAG
.getNode(
2938 ISD::INSERT_VECTOR_ELT
,
2943 DAG
.getConstant(5, MVT::i32
));
2951 AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
2952 SelectionDAG
&DAG
) const
2954 DebugLoc DL
= Op
.getDebugLoc();
2955 EVT VT
= Op
.getValueType();
2956 const SDValue
*ptr
= NULL
;
2957 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
2958 uint32_t swizzleNum
= 0;
2960 if (!VT
.isVector()) {
2961 SDValue Res
= Op
.getOperand(0);
2965 if (Op
.getOperand(1).getOpcode() != ISD::UNDEF
) {
2966 ptr
= &Op
.getOperand(1);
2968 ptr
= &Op
.getOperand(0);
2971 swizzleNum
= (uint32_t)CSDN
->getZExtValue();
2972 uint32_t mask2
= 0x04030201 & ~(0xFF << (swizzleNum
* 8));
2973 uint32_t mask3
= 0x01010101 & (0xFF << (swizzleNum
* 8));
2974 DST
= DAG
.getNode(AMDILISD::VINSERT
,
2979 DAG
.getTargetConstant(mask2
, MVT::i32
),
2980 DAG
.getTargetConstant(mask3
, MVT::i32
));
2982 uint32_t mask2
= 0x04030201 & ~(0xFF << (swizzleNum
* 8));
2983 uint32_t mask3
= 0x01010101 & (0xFF << (swizzleNum
* 8));
2984 SDValue res
= DAG
.getNode(AMDILISD::VINSERT
,
2985 DL
, VT
, Op
.getOperand(0), *ptr
,
2986 DAG
.getTargetConstant(mask2
, MVT::i32
),
2987 DAG
.getTargetConstant(mask3
, MVT::i32
));
2988 for (uint32_t x
= 1; x
< VT
.getVectorNumElements(); ++x
) {
2989 mask2
= 0x04030201 & ~(0xFF << (x
* 8));
2990 mask3
= 0x01010101 & (0xFF << (x
* 8));
2991 SDValue t
= DAG
.getNode(AMDILISD::VINSERT
,
2992 DL
, VT
, Op
.getOperand(0), *ptr
,
2993 DAG
.getTargetConstant(mask2
, MVT::i32
),
2994 DAG
.getTargetConstant(mask3
, MVT::i32
));
2995 SDValue c
= DAG
.getNode(AMDILISD::CMP
, DL
, ptr
->getValueType(),
2996 DAG
.getConstant(AMDILCC::IL_CC_I_EQ
, MVT::i32
),
2997 Op
.getOperand(2), DAG
.getConstant(x
, MVT::i32
));
2998 c
= DAG
.getNode(AMDILISD::VBUILD
, DL
, Op
.getValueType(), c
);
2999 res
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, VT
, c
, t
, res
);
3007 AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
3008 SelectionDAG
&DAG
) const
3010 EVT VT
= Op
.getValueType();
3011 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
3012 uint64_t swizzleNum
= 0;
3013 DebugLoc DL
= Op
.getDebugLoc();
3015 if (!Op
.getOperand(0).getValueType().isVector()) {
3016 Res
= Op
.getOperand(0);
3020 // Static vector extraction
3021 swizzleNum
= CSDN
->getZExtValue() + 1;
3022 Res
= DAG
.getNode(AMDILISD::VEXTRACT
,
3025 DAG
.getTargetConstant(swizzleNum
, MVT::i32
));
3027 SDValue Op1
= Op
.getOperand(1);
3028 uint32_t vecSize
= 4;
3029 SDValue Op0
= Op
.getOperand(0);
3030 SDValue res
= DAG
.getNode(AMDILISD::VEXTRACT
,
3032 DAG
.getTargetConstant(1, MVT::i32
));
3033 if (Op0
.getValueType().isVector()) {
3034 vecSize
= Op0
.getValueType().getVectorNumElements();
3036 for (uint32_t x
= 2; x
<= vecSize
; ++x
) {
3037 SDValue t
= DAG
.getNode(AMDILISD::VEXTRACT
,
3039 DAG
.getTargetConstant(x
, MVT::i32
));
3040 SDValue c
= DAG
.getNode(AMDILISD::CMP
,
3041 DL
, Op1
.getValueType(),
3042 DAG
.getConstant(AMDILCC::IL_CC_I_EQ
, MVT::i32
),
3043 Op1
, DAG
.getConstant(x
, MVT::i32
));
3044 res
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
,
3054 AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op
,
3055 SelectionDAG
&DAG
) const
3057 uint32_t vecSize
= Op
.getValueType().getVectorNumElements();
3058 SDValue src
= Op
.getOperand(0);
3059 const ConstantSDNode
*CSDN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
3060 uint64_t offset
= 0;
3061 EVT vecType
= Op
.getValueType().getVectorElementType();
3062 DebugLoc DL
= Op
.getDebugLoc();
3065 offset
= CSDN
->getZExtValue();
3066 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3067 DL
,vecType
, src
, DAG
.getConstant(offset
, MVT::i32
));
3068 Result
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
3069 Op
.getValueType(), Result
);
3070 for (uint32_t x
= 1; x
< vecSize
; ++x
) {
3071 SDValue elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, vecType
,
3072 src
, DAG
.getConstant(offset
+ x
, MVT::i32
));
3073 if (elt
.getOpcode() != ISD::UNDEF
) {
3074 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
3075 Op
.getValueType(), Result
, elt
,
3076 DAG
.getConstant(x
, MVT::i32
));
3080 SDValue idx
= Op
.getOperand(1);
3081 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
3082 DL
, vecType
, src
, idx
);
3083 Result
= DAG
.getNode(AMDILISD::VBUILD
, DL
,
3084 Op
.getValueType(), Result
);
3085 for (uint32_t x
= 1; x
< vecSize
; ++x
) {
3086 idx
= DAG
.getNode(ISD::ADD
, DL
, vecType
,
3087 idx
, DAG
.getConstant(1, MVT::i32
));
3088 SDValue elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, vecType
,
3090 if (elt
.getOpcode() != ISD::UNDEF
) {
3091 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
3092 Op
.getValueType(), Result
, elt
, idx
);
3099 AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
3100 SelectionDAG
&DAG
) const
3102 SDValue Res
= DAG
.getNode(AMDILISD::VBUILD
,
3109 AMDILTargetLowering::LowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const
3111 SDValue Cond
= Op
.getOperand(0);
3112 SDValue LHS
= Op
.getOperand(1);
3113 SDValue RHS
= Op
.getOperand(2);
3114 DebugLoc DL
= Op
.getDebugLoc();
3115 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
3116 Cond
= DAG
.getNode(AMDILISD::CMOVLOG
,
3118 Op
.getValueType(), Cond
, LHS
, RHS
);
3122 AMDILTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
3125 SDValue LHS
= Op
.getOperand(0);
3126 SDValue RHS
= Op
.getOperand(1);
3127 SDValue CC
= Op
.getOperand(2);
3128 DebugLoc DL
= Op
.getDebugLoc();
3129 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
3130 unsigned int AMDILCC
= CondCCodeToCC(
3132 LHS
.getValueType().getSimpleVT().SimpleTy
);
3133 assert((AMDILCC
!= AMDILCC::COND_ERROR
) && "Invalid SetCC!");
3139 DAG
.getConstant(-1, MVT::i32
),
3140 DAG
.getConstant(0, MVT::i32
),
3142 Cond
= getConversionNode(DAG
, Cond
, Op
, true);
3146 Cond
.getValueType(),
3147 DAG
.getConstant(1, Cond
.getValueType()),
3153 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
, SelectionDAG
&DAG
) const
3155 SDValue Data
= Op
.getOperand(0);
3156 VTSDNode
*BaseType
= cast
<VTSDNode
>(Op
.getOperand(1));
3157 DebugLoc DL
= Op
.getDebugLoc();
3158 EVT DVT
= Data
.getValueType();
3159 EVT BVT
= BaseType
->getVT();
3160 unsigned baseBits
= BVT
.getScalarType().getSizeInBits();
3161 unsigned srcBits
= DVT
.isSimple() ? DVT
.getScalarType().getSizeInBits() : 1;
3162 unsigned shiftBits
= srcBits
- baseBits
;
3164 // If the op is less than 32 bits, then it needs to extend to 32bits
3165 // so it can properly keep the upper bits valid.
3166 EVT IVT
= genIntType(32, DVT
.isVector() ? DVT
.getVectorNumElements() : 1);
3167 Data
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, IVT
, Data
);
3168 shiftBits
= 32 - baseBits
;
3171 SDValue Shift
= DAG
.getConstant(shiftBits
, DVT
);
3172 // Shift left by 'Shift' bits.
3173 Data
= DAG
.getNode(ISD::SHL
, DL
, DVT
, Data
, Shift
);
3174 // Signed shift Right by 'Shift' bits.
3175 Data
= DAG
.getNode(ISD::SRA
, DL
, DVT
, Data
, Shift
);
3177 // Once the sign extension is done, the op needs to be converted to
3178 // its original type.
3179 Data
= DAG
.getSExtOrTrunc(Data
, DL
, Op
.getOperand(0).getValueType());
3184 AMDILTargetLowering::genIntType(uint32_t size
, uint32_t numEle
) const
3186 int iSize
= (size
* numEle
);
3187 int vEle
= (iSize
>> ((size
== 64) ? 6 : 5));
3193 return EVT(MVT::i64
);
3195 return EVT(MVT::getVectorVT(MVT::i64
, vEle
));
3199 return EVT(MVT::i32
);
3201 return EVT(MVT::getVectorVT(MVT::i32
, vEle
));
3207 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
3208 SelectionDAG
&DAG
) const
3210 SDValue Chain
= Op
.getOperand(0);
3211 SDValue Size
= Op
.getOperand(1);
3212 unsigned int SPReg
= AMDIL::SP
;
3213 DebugLoc DL
= Op
.getDebugLoc();
3214 SDValue SP
= DAG
.getCopyFromReg(Chain
,
3217 SDValue NewSP
= DAG
.getNode(ISD::ADD
,
3219 MVT::i32
, SP
, Size
);
3220 Chain
= DAG
.getCopyToReg(SP
.getValue(1),
3223 SDValue Ops
[2] = {NewSP
, Chain
};
3224 Chain
= DAG
.getMergeValues(Ops
, 2 ,DL
);
3228 AMDILTargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const
3230 SDValue Chain
= Op
.getOperand(0);
3231 SDValue Cond
= Op
.getOperand(1);
3232 SDValue Jump
= Op
.getOperand(2);
3234 Result
= DAG
.getNode(
3235 AMDILISD::BRANCH_COND
,
3243 AMDILTargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
3245 SDValue Chain
= Op
.getOperand(0);
3246 SDValue CC
= Op
.getOperand(1);
3247 SDValue LHS
= Op
.getOperand(2);
3248 SDValue RHS
= Op
.getOperand(3);
3249 SDValue JumpT
= Op
.getOperand(4);
3252 CmpValue
= DAG
.getNode(
3257 DAG
.getConstant(-1, MVT::i32
),
3258 DAG
.getConstant(0, MVT::i32
),
3260 Result
= DAG
.getNode(
3261 AMDILISD::BRANCH_COND
,
3262 CmpValue
.getDebugLoc(),
3269 AMDILTargetLowering::LowerFP_ROUND(SDValue Op
, SelectionDAG
&DAG
) const
3271 SDValue Result
= DAG
.getNode(
3281 AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) const
3283 SDValue Result
= DAG
.getNode(
3291 // LowerRET - Lower an ISD::RET node.
3293 AMDILTargetLowering::LowerReturn(SDValue Chain
,
3294 CallingConv::ID CallConv
, bool isVarArg
,
3295 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
3296 const SmallVectorImpl
<SDValue
> &OutVals
,
3297 DebugLoc dl
, SelectionDAG
&DAG
)
3300 //MachineFunction& MF = DAG.getMachineFunction();
3301 // CCValAssign - represent the assignment of the return value
3303 SmallVector
<CCValAssign
, 16> RVLocs
;
3305 // CCState - Info about the registers and stack slot
3306 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
3307 getTargetMachine(), RVLocs
, *DAG
.getContext());
3309 // Analyze return values of ISD::RET
3310 CCInfo
.AnalyzeReturn(Outs
, RetCC_AMDIL32
);
3311 // If this is the first return lowered for this function, add
3312 // the regs to the liveout set for the function
3313 MachineRegisterInfo
&MRI
= DAG
.getMachineFunction().getRegInfo();
3314 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
3315 if (RVLocs
[i
].isRegLoc() && !MRI
.isLiveOut(RVLocs
[i
].getLocReg())) {
3316 MRI
.addLiveOut(RVLocs
[i
].getLocReg());
3319 // FIXME: implement this when tail call is implemented
3320 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
3321 // both x86 and ppc implement this in ISelLowering
3323 // Regular return here
3325 SmallVector
<SDValue
, 6> RetOps
;
3326 RetOps
.push_back(Chain
);
3327 RetOps
.push_back(DAG
.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32
));
3328 for (unsigned int i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
3329 CCValAssign
&VA
= RVLocs
[i
];
3330 SDValue ValToCopy
= OutVals
[i
];
3331 assert(VA
.isRegLoc() && "Can only return in registers!");
3332 // ISD::Ret => ret chain, (regnum1, val1), ...
3333 // So i * 2 + 1 index only the regnums
3334 Chain
= DAG
.getCopyToReg(Chain
,
3339 // guarantee that all emitted copies are stuck together
3340 // avoiding something bad
3341 Flag
= Chain
.getValue(1);
3343 /*if (MF.getFunction()->hasStructRetAttr()) {
3344 assert(0 && "Struct returns are not yet implemented!");
3345 // Both MIPS and X86 have this
3349 RetOps
.push_back(Flag
);
3351 Flag
= DAG
.getNode(AMDILISD::RET_FLAG
,
3353 MVT::Other
, &RetOps
[0], RetOps
.size());
3358 AMDILTargetLowering::getFunctionAlignment(const Function
*) const
3364 AMDILTargetLowering::setPrivateData(MachineBasicBlock
*BB
,
3365 MachineBasicBlock::iterator
&BBI
,
3366 DebugLoc
*DL
, const TargetInstrInfo
*TII
) const
3374 AMDILTargetLowering::genVReg(uint32_t regType
) const
3376 return mBB
->getParent()->getRegInfo().createVirtualRegister(
3377 getTargetMachine().getRegisterInfo()->getRegClass(regType
));
3381 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
) const
3383 return BuildMI(*mBB
, mBBI
, *mDL
, mTII
->get(opcode
), dst
);
3387 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
3388 uint32_t src1
) const
3390 return generateMachineInst(opcode
, dst
).addReg(src1
);
3394 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
3395 uint32_t src1
, uint32_t src2
) const
3397 return generateMachineInst(opcode
, dst
, src1
).addReg(src2
);
3401 AMDILTargetLowering::generateMachineInst(uint32_t opcode
, uint32_t dst
,
3402 uint32_t src1
, uint32_t src2
, uint32_t src3
) const
3404 return generateMachineInst(opcode
, dst
, src1
, src2
).addReg(src3
);
3409 AMDILTargetLowering::LowerSDIV24(SDValue Op
, SelectionDAG
&DAG
) const
3411 DebugLoc DL
= Op
.getDebugLoc();
3412 EVT OVT
= Op
.getValueType();
3413 SDValue LHS
= Op
.getOperand(0);
3414 SDValue RHS
= Op
.getOperand(1);
3417 if (!OVT
.isVector()) {
3420 } else if (OVT
.getVectorNumElements() == 2) {
3423 } else if (OVT
.getVectorNumElements() == 4) {
3427 unsigned bitsize
= OVT
.getScalarType().getSizeInBits();
3428 // char|short jq = ia ^ ib;
3429 SDValue jq
= DAG
.getNode(ISD::XOR
, DL
, OVT
, LHS
, RHS
);
3431 // jq = jq >> (bitsize - 2)
3432 jq
= DAG
.getNode(ISD::SRA
, DL
, OVT
, jq
, DAG
.getConstant(bitsize
- 2, OVT
));
3435 jq
= DAG
.getNode(ISD::OR
, DL
, OVT
, jq
, DAG
.getConstant(1, OVT
));
3438 jq
= DAG
.getSExtOrTrunc(jq
, DL
, INTTY
);
3440 // int ia = (int)LHS;
3441 SDValue ia
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
3443 // int ib, (int)RHS;
3444 SDValue ib
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
3446 // float fa = (float)ia;
3447 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
3449 // float fb = (float)ib;
3450 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
3452 // float fq = native_divide(fa, fb);
3453 SDValue fq
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
3456 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
3458 // float fqneg = -fq;
3459 SDValue fqneg
= DAG
.getNode(ISD::FNEG
, DL
, FLTTY
, fq
);
3461 // float fr = mad(fqneg, fb, fa);
3462 SDValue fr
= DAG
.getNode(AMDILISD::MAD
, DL
, FLTTY
, fqneg
, fb
, fa
);
3464 // int iq = (int)fq;
3465 SDValue iq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
3468 fr
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fr
);
3471 fb
= DAG
.getNode(ISD::FABS
, DL
, FLTTY
, fb
);
3473 // int cv = fr >= fb;
3475 if (INTTY
== MVT::i32
) {
3476 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
3478 cv
= DAG
.getSetCC(DL
, INTTY
, fr
, fb
, ISD::SETOGE
);
3480 // jq = (cv ? jq : 0);
3481 jq
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, cv
, jq
,
3482 DAG
.getConstant(0, OVT
));
3484 iq
= DAG
.getSExtOrTrunc(iq
, DL
, OVT
);
3485 iq
= DAG
.getNode(ISD::ADD
, DL
, OVT
, iq
, jq
);
3490 AMDILTargetLowering::LowerSDIV32(SDValue Op
, SelectionDAG
&DAG
) const
3492 DebugLoc DL
= Op
.getDebugLoc();
3493 EVT OVT
= Op
.getValueType();
3494 SDValue LHS
= Op
.getOperand(0);
3495 SDValue RHS
= Op
.getOperand(1);
3496 // The LowerSDIV32 function generates equivalent to the following IL.
3506 // ixor r10, r10, r11
3508 // ixor DST, r0, r10
3517 SDValue r10
= DAG
.getSelectCC(DL
,
3518 r0
, DAG
.getConstant(0, OVT
),
3519 DAG
.getConstant(-1, MVT::i32
),
3520 DAG
.getConstant(0, MVT::i32
),
3524 SDValue r11
= DAG
.getSelectCC(DL
,
3525 r1
, DAG
.getConstant(0, OVT
),
3526 DAG
.getConstant(-1, MVT::i32
),
3527 DAG
.getConstant(0, MVT::i32
),
3531 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
3534 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
3537 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
3540 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
3543 r0
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, r0
, r1
);
3545 // ixor r10, r10, r11
3546 r10
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r10
, r11
);
3549 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
3551 // ixor DST, r0, r10
3552 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
3557 AMDILTargetLowering::LowerSDIV64(SDValue Op
, SelectionDAG
&DAG
) const
3559 return SDValue(Op
.getNode(), 0);
3563 AMDILTargetLowering::LowerUDIV24(SDValue Op
, SelectionDAG
&DAG
) const
3565 DebugLoc DL
= Op
.getDebugLoc();
3566 EVT OVT
= Op
.getValueType();
3567 SDValue LHS
= Op
.getOperand(0);
3568 SDValue RHS
= Op
.getOperand(1);
3571 if (!OVT
.isVector()) {
3574 } else if (OVT
.getVectorNumElements() == 2) {
3577 } else if (OVT
.getVectorNumElements() == 4) {
3582 // The LowerUDIV24 function implements the following CL.
3583 // int ia = (int)LHS
3584 // float fa = (float)ia
3585 // int ib = (int)RHS
3586 // float fb = (float)ib
3587 // float fq = native_divide(fa, fb)
3589 // float t = mad(fq, fb, fb)
3590 // int iq = (int)fq - (t <= fa)
3593 // int ia = (int)LHS
3594 SDValue ia
= DAG
.getZExtOrTrunc(LHS
, DL
, INTTY
);
3596 // float fa = (float)ia
3597 SDValue fa
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ia
);
3599 // int ib = (int)RHS
3600 SDValue ib
= DAG
.getZExtOrTrunc(RHS
, DL
, INTTY
);
3602 // float fb = (float)ib
3603 SDValue fb
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, FLTTY
, ib
);
3605 // float fq = native_divide(fa, fb)
3606 SDValue fq
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, FLTTY
, fa
, fb
);
3609 fq
= DAG
.getNode(ISD::FTRUNC
, DL
, FLTTY
, fq
);
3611 // float t = mad(fq, fb, fb)
3612 SDValue t
= DAG
.getNode(AMDILISD::MAD
, DL
, FLTTY
, fq
, fb
, fb
);
3614 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
3616 fq
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, INTTY
, fq
);
3617 if (INTTY
== MVT::i32
) {
3618 iq
= DAG
.getSetCC(DL
, INTTY
, t
, fa
, ISD::SETOLE
);
3620 iq
= DAG
.getSetCC(DL
, INTTY
, t
, fa
, ISD::SETOLE
);
3622 iq
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, fq
, iq
);
3626 iq
= DAG
.getZExtOrTrunc(iq
, DL
, OVT
);
3632 AMDILTargetLowering::LowerSREM8(SDValue Op
, SelectionDAG
&DAG
) const
3634 DebugLoc DL
= Op
.getDebugLoc();
3635 EVT OVT
= Op
.getValueType();
3636 MVT INTTY
= MVT::i32
;
3637 if (OVT
== MVT::v2i8
) {
3639 } else if (OVT
== MVT::v4i8
) {
3642 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
3643 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
3644 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
3645 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
3650 AMDILTargetLowering::LowerSREM16(SDValue Op
, SelectionDAG
&DAG
) const
3652 DebugLoc DL
= Op
.getDebugLoc();
3653 EVT OVT
= Op
.getValueType();
3654 MVT INTTY
= MVT::i32
;
3655 if (OVT
== MVT::v2i16
) {
3657 } else if (OVT
== MVT::v4i16
) {
3660 SDValue LHS
= DAG
.getSExtOrTrunc(Op
.getOperand(0), DL
, INTTY
);
3661 SDValue RHS
= DAG
.getSExtOrTrunc(Op
.getOperand(1), DL
, INTTY
);
3662 LHS
= DAG
.getNode(ISD::SREM
, DL
, INTTY
, LHS
, RHS
);
3663 LHS
= DAG
.getSExtOrTrunc(LHS
, DL
, OVT
);
3668 AMDILTargetLowering::LowerSREM32(SDValue Op
, SelectionDAG
&DAG
) const
3670 DebugLoc DL
= Op
.getDebugLoc();
3671 EVT OVT
= Op
.getValueType();
3672 SDValue LHS
= Op
.getOperand(0);
3673 SDValue RHS
= Op
.getOperand(1);
3674 // The LowerSREM32 function generates equivalent to the following IL.
3684 // umul r20, r20, r1
3687 // ixor DST, r0, r10
3696 SDValue r10
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
3697 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
3698 r0
, DAG
.getConstant(0, OVT
));
3701 SDValue r11
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
3702 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::i32
), MVT::i32
),
3703 r1
, DAG
.getConstant(0, OVT
));
3706 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
3709 r1
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r1
, r11
);
3712 r0
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
3715 r1
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r1
, r11
);
3718 SDValue r20
= DAG
.getNode(ISD::UREM
, DL
, OVT
, r0
, r1
);
3720 // umul r20, r20, r1
3721 r20
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r20
, r1
);
3724 r0
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r0
, r20
);
3727 r0
= DAG
.getNode(ISD::ADD
, DL
, OVT
, r0
, r10
);
3729 // ixor DST, r0, r10
3730 SDValue DST
= DAG
.getNode(ISD::XOR
, DL
, OVT
, r0
, r10
);
3735 AMDILTargetLowering::LowerSREM64(SDValue Op
, SelectionDAG
&DAG
) const
3737 return SDValue(Op
.getNode(), 0);
3741 AMDILTargetLowering::LowerUREM8(SDValue Op
, SelectionDAG
&DAG
) const
3743 DebugLoc DL
= Op
.getDebugLoc();
3744 EVT OVT
= Op
.getValueType();
3745 MVT INTTY
= MVT::i32
;
3746 if (OVT
== MVT::v2i8
) {
3748 } else if (OVT
== MVT::v4i8
) {
3751 SDValue LHS
= Op
.getOperand(0);
3752 SDValue RHS
= Op
.getOperand(1);
3753 // The LowerUREM8 function generates equivalent to the following IL.
3754 // mov r0, as_u32(LHS)
3755 // mov r1, as_u32(RHS)
3756 // and r10, r0, 0xFF
3757 // and r11, r1, 0xFF
3758 // cmov_logical r3, r11, r11, 0x1
3760 // cmov_logical r3, r11, r3, 0
3763 // and as_u8(DST), r3, 0xFF
3765 // mov r0, as_u32(LHS)
3766 SDValue r0
= DAG
.getSExtOrTrunc(LHS
, DL
, INTTY
);
3768 // mov r1, as_u32(RHS)
3769 SDValue r1
= DAG
.getSExtOrTrunc(RHS
, DL
, INTTY
);
3771 // and r10, r0, 0xFF
3772 SDValue r10
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r0
,
3773 DAG
.getConstant(0xFF, INTTY
));
3775 // and r11, r1, 0xFF
3776 SDValue r11
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r1
,
3777 DAG
.getConstant(0xFF, INTTY
));
3779 // cmov_logical r3, r11, r11, 0x1
3780 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, r11
, r11
,
3781 DAG
.getConstant(0x01, INTTY
));
3784 r3
= DAG
.getNode(ISD::UREM
, DL
, INTTY
, r10
, r3
);
3786 // cmov_logical r3, r11, r3, 0
3787 r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, r11
, r3
,
3788 DAG
.getConstant(0, INTTY
));
3791 r3
= DAG
.getNode(AMDILISD::UMUL
, DL
, INTTY
, r3
, r11
);
3794 r3
= DAG
.getNode(ISD::SUB
, DL
, INTTY
, r10
, r3
);
3796 // and as_u8(DST), r3, 0xFF
3797 SDValue DST
= DAG
.getNode(ISD::AND
, DL
, INTTY
, r3
,
3798 DAG
.getConstant(0xFF, INTTY
));
3799 DST
= DAG
.getZExtOrTrunc(DST
, DL
, OVT
);
3804 AMDILTargetLowering::LowerUREM16(SDValue Op
, SelectionDAG
&DAG
) const
3806 DebugLoc DL
= Op
.getDebugLoc();
3807 EVT OVT
= Op
.getValueType();
3808 MVT INTTY
= MVT::i32
;
3809 if (OVT
== MVT::v2i16
) {
3811 } else if (OVT
== MVT::v4i16
) {
3814 SDValue LHS
= Op
.getOperand(0);
3815 SDValue RHS
= Op
.getOperand(1);
3816 // The LowerUREM16 function generatest equivalent to the following IL.
3819 // DIV = LowerUDIV16(LHS, RHS)
3820 // and r10, r0, 0xFFFF
3821 // and r11, r1, 0xFFFF
3822 // cmov_logical r3, r11, r11, 0x1
3823 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
3824 // and r3, r3, 0xFFFF
3825 // cmov_logical r3, r11, r3, 0
3828 // and DST, r3, 0xFFFF
3836 // and r10, r0, 0xFFFF
3837 SDValue r10
= DAG
.getNode(ISD::AND
, DL
, OVT
, r0
,
3838 DAG
.getConstant(0xFFFF, OVT
));
3840 // and r11, r1, 0xFFFF
3841 SDValue r11
= DAG
.getNode(ISD::AND
, DL
, OVT
, r1
,
3842 DAG
.getConstant(0xFFFF, OVT
));
3844 // cmov_logical r3, r11, r11, 0x1
3845 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r11
, r11
,
3846 DAG
.getConstant(0x01, OVT
));
3848 // udiv as_u16(r3), as_u32(r10), as_u32(r3)
3849 r10
= DAG
.getZExtOrTrunc(r10
, DL
, INTTY
);
3850 r3
= DAG
.getZExtOrTrunc(r3
, DL
, INTTY
);
3851 r3
= DAG
.getNode(ISD::UREM
, DL
, INTTY
, r10
, r3
);
3852 r3
= DAG
.getZExtOrTrunc(r3
, DL
, OVT
);
3853 r10
= DAG
.getZExtOrTrunc(r10
, DL
, OVT
);
3855 // and r3, r3, 0xFFFF
3856 r3
= DAG
.getNode(ISD::AND
, DL
, OVT
, r3
,
3857 DAG
.getConstant(0xFFFF, OVT
));
3859 // cmov_logical r3, r11, r3, 0
3860 r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r11
, r3
,
3861 DAG
.getConstant(0, OVT
));
3863 r3
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r3
, r11
);
3866 r3
= DAG
.getNode(ISD::SUB
, DL
, OVT
, r10
, r3
);
3868 // and DST, r3, 0xFFFF
3869 SDValue DST
= DAG
.getNode(ISD::AND
, DL
, OVT
, r3
,
3870 DAG
.getConstant(0xFFFF, OVT
));
3875 AMDILTargetLowering::LowerUREM32(SDValue Op
, SelectionDAG
&DAG
) const
3877 DebugLoc DL
= Op
.getDebugLoc();
3878 EVT OVT
= Op
.getValueType();
3879 SDValue LHS
= Op
.getOperand(0);
3880 SDValue RHS
= Op
.getOperand(1);
3881 // The LowerUREM32 function generates equivalent to the following IL.
3882 // udiv r20, LHS, RHS
3883 // umul r20, r20, RHS
3884 // sub DST, LHS, r20
3886 // udiv r20, LHS, RHS
3887 SDValue r20
= DAG
.getNode(ISD::UDIV
, DL
, OVT
, LHS
, RHS
);
3889 // umul r20, r20, RHS
3890 r20
= DAG
.getNode(AMDILISD::UMUL
, DL
, OVT
, r20
, RHS
);
3892 // sub DST, LHS, r20
3893 SDValue DST
= DAG
.getNode(ISD::SUB
, DL
, OVT
, LHS
, r20
);
3898 AMDILTargetLowering::LowerUREM64(SDValue Op
, SelectionDAG
&DAG
) const
3900 return SDValue(Op
.getNode(), 0);
3905 AMDILTargetLowering::LowerFDIV32(SDValue Op
, SelectionDAG
&DAG
) const
3907 DebugLoc DL
= Op
.getDebugLoc();
3908 EVT OVT
= Op
.getValueType();
3909 MVT INTTY
= MVT::i32
;
3910 if (OVT
== MVT::v2f32
) {
3912 } else if (OVT
== MVT::v4f32
) {
3915 SDValue LHS
= Op
.getOperand(0);
3916 SDValue RHS
= Op
.getOperand(1);
3918 const AMDILSubtarget
&STM
= getTargetMachine().getSubtarget
<AMDILSubtarget
>();
3919 if (STM
.device()->getGeneration() == AMDILDeviceInfo::HD4XXX
) {
3920 // TODO: This doesn't work for vector types yet
3921 // The LowerFDIV32 function generates equivalent to the following
3923 // mov r20, as_int(LHS)
3924 // mov r21, as_int(RHS)
3925 // and r30, r20, 0x7f800000
3926 // and r31, r20, 0x807FFFFF
3927 // and r32, r21, 0x7f800000
3928 // and r33, r21, 0x807FFFFF
3929 // ieq r40, r30, 0x7F800000
3930 // ieq r41, r31, 0x7F800000
3933 // and r50, r20, 0x80000000
3934 // and r51, r21, 0x80000000
3935 // ior r32, r32, 0x3f800000
3936 // ior r33, r33, 0x3f800000
3937 // cmov_logical r32, r42, r50, r32
3938 // cmov_logical r33, r43, r51, r33
3939 // cmov_logical r32, r40, r20, r32
3940 // cmov_logical r33, r41, r21, r33
3941 // ior r50, r40, r41
3942 // ior r51, r42, r43
3943 // ior r50, r50, r51
3945 // iadd r30, r30, r52
3946 // cmov_logical r30, r50, 0, r30
3947 // div_zeroop(infinity) r21, 1.0, r33
3948 // mul_ieee r20, r32, r21
3949 // and r22, r20, 0x7FFFFFFF
3950 // and r23, r20, 0x80000000
3951 // ishr r60, r22, 0x00000017
3952 // ishr r61, r30, 0x00000017
3953 // iadd r20, r20, r30
3954 // iadd r21, r22, r30
3955 // iadd r60, r60, r61
3957 // ior r41, r23, 0x7F800000
3958 // ige r40, r60, 0x000000FF
3959 // cmov_logical r40, r50, 0, r40
3960 // cmov_logical r20, r42, r23, r20
3961 // cmov_logical DST, r40, r41, r20
3964 // mov r20, as_int(LHS)
3965 SDValue R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, LHS
);
3967 // mov r21, as_int(RHS)
3968 SDValue R21
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, RHS
);
3970 // and r30, r20, 0x7f800000
3971 SDValue R30
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
3972 DAG
.getConstant(0x7F800000, INTTY
));
3974 // and r31, r21, 0x7f800000
3975 SDValue R31
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
3976 DAG
.getConstant(0x7f800000, INTTY
));
3978 // and r32, r20, 0x807FFFFF
3979 SDValue R32
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
3980 DAG
.getConstant(0x807FFFFF, INTTY
));
3982 // and r33, r21, 0x807FFFFF
3983 SDValue R33
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
3984 DAG
.getConstant(0x807FFFFF, INTTY
));
3986 // ieq r40, r30, 0x7F800000
3987 SDValue R40
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
3988 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
3989 R30
, DAG
.getConstant(0x7F800000, INTTY
));
3991 // ieq r41, r31, 0x7F800000
3992 SDValue R41
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
3993 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
3994 R31
, DAG
.getConstant(0x7F800000, INTTY
));
3997 SDValue R42
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
3998 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
3999 R30
, DAG
.getConstant(0, INTTY
));
4002 SDValue R43
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4003 DAG
.getConstant(CondCCodeToCC(ISD::SETEQ
, MVT::i32
), MVT::i32
),
4004 R31
, DAG
.getConstant(0, INTTY
));
4006 // and r50, r20, 0x80000000
4007 SDValue R50
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4008 DAG
.getConstant(0x80000000, INTTY
));
4010 // and r51, r21, 0x80000000
4011 SDValue R51
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R21
,
4012 DAG
.getConstant(0x80000000, INTTY
));
4014 // ior r32, r32, 0x3f800000
4015 R32
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R32
,
4016 DAG
.getConstant(0x3F800000, INTTY
));
4018 // ior r33, r33, 0x3f800000
4019 R33
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R33
,
4020 DAG
.getConstant(0x3F800000, INTTY
));
4022 // cmov_logical r32, r42, r50, r32
4023 R32
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R42
, R50
, R32
);
4025 // cmov_logical r33, r43, r51, r33
4026 R33
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R43
, R51
, R33
);
4028 // cmov_logical r32, r40, r20, r32
4029 R32
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R40
, R20
, R32
);
4031 // cmov_logical r33, r41, r21, r33
4032 R33
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R41
, R21
, R33
);
4034 // ior r50, r40, r41
4035 R50
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R40
, R41
);
4037 // ior r51, r42, r43
4038 R51
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R42
, R43
);
4040 // ior r50, r50, r51
4041 R50
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R50
, R51
);
4044 SDValue R52
= DAG
.getNode(AMDILISD::INEGATE
, DL
, INTTY
, R31
);
4046 // iadd r30, r30, r52
4047 R30
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R30
, R52
);
4049 // cmov_logical r30, r50, 0, r30
4050 R30
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R50
,
4051 DAG
.getConstant(0, INTTY
), R30
);
4053 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4054 R33
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R33
);
4055 R21
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
,
4056 DAG
.getConstantFP(1.0f
, OVT
), R33
);
4058 // mul_ieee as_int(r20), as_float(r32), r21
4059 R32
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R32
);
4060 R20
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, R32
, R21
);
4061 R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, R20
);
4063 // div_zeroop(infinity) r21, 1.0, as_float(r33)
4064 R33
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R33
);
4065 R21
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
,
4066 DAG
.getConstantFP(1.0f
, OVT
), R33
);
4068 // mul_ieee as_int(r20), as_float(r32), r21
4069 R32
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, R32
);
4070 R20
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, R32
, R21
);
4071 R20
= DAG
.getNode(ISDBITCAST
, DL
, INTTY
, R20
);
4073 // and r22, r20, 0x7FFFFFFF
4074 SDValue R22
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4075 DAG
.getConstant(0x7FFFFFFF, INTTY
));
4077 // and r23, r20, 0x80000000
4078 SDValue R23
= DAG
.getNode(ISD::AND
, DL
, INTTY
, R20
,
4079 DAG
.getConstant(0x80000000, INTTY
));
4081 // ishr r60, r22, 0x00000017
4082 SDValue R60
= DAG
.getNode(ISD::SRA
, DL
, INTTY
, R22
,
4083 DAG
.getConstant(0x00000017, INTTY
));
4085 // ishr r61, r30, 0x00000017
4086 SDValue R61
= DAG
.getNode(ISD::SRA
, DL
, INTTY
, R30
,
4087 DAG
.getConstant(0x00000017, INTTY
));
4089 // iadd r20, r20, r30
4090 R20
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R20
, R30
);
4092 // iadd r21, r22, r30
4093 R21
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R22
, R30
);
4095 // iadd r60, r60, r61
4096 R60
= DAG
.getNode(ISD::ADD
, DL
, INTTY
, R60
, R61
);
4099 R42
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4100 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
4101 DAG
.getConstant(0, INTTY
),
4104 // ior r41, r23, 0x7F800000
4105 R41
= DAG
.getNode(ISD::OR
, DL
, INTTY
, R23
,
4106 DAG
.getConstant(0x7F800000, INTTY
));
4108 // ige r40, r60, 0x000000FF
4109 R40
= DAG
.getNode(AMDILISD::CMP
, DL
, INTTY
,
4110 DAG
.getConstant(CondCCodeToCC(ISD::SETGE
, MVT::i32
), MVT::i32
),
4112 DAG
.getConstant(0x0000000FF, INTTY
));
4114 // cmov_logical r40, r50, 0, r40
4115 R40
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R50
,
4116 DAG
.getConstant(0, INTTY
),
4119 // cmov_logical r20, r42, r23, r20
4120 R20
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R42
, R23
, R20
);
4122 // cmov_logical DST, r40, r41, r20
4123 DST
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, INTTY
, R40
, R41
, R20
);
4126 DST
= DAG
.getNode(ISDBITCAST
, DL
, OVT
, DST
);
4128 // The following sequence of DAG nodes produce the following IL:
4130 // lt r2, 0x1.0p+96f, r1
4131 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4132 // mul_ieee r1, RHS, r3
4133 // div_zeroop(infinity) r0, LHS, r1
4134 // mul_ieee DST, r0, r3
4137 SDValue r1
= DAG
.getNode(ISD::FABS
, DL
, OVT
, RHS
);
4138 // lt r2, 0x1.0p+96f, r1
4139 SDValue r2
= DAG
.getNode(AMDILISD::CMP
, DL
, OVT
,
4140 DAG
.getConstant(CondCCodeToCC(ISD::SETLT
, MVT::f32
), MVT::i32
),
4141 DAG
.getConstant(0x6f800000, INTTY
), r1
);
4142 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4143 SDValue r3
= DAG
.getNode(AMDILISD::CMOVLOG
, DL
, OVT
, r2
,
4144 DAG
.getConstant(0x2f800000, INTTY
),
4145 DAG
.getConstant(0x3f800000, INTTY
));
4146 // mul_ieee r1, RHS, r3
4147 r1
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, RHS
, r3
);
4148 // div_zeroop(infinity) r0, LHS, r1
4149 SDValue r0
= DAG
.getNode(AMDILISD::DIV_INF
, DL
, OVT
, LHS
, r1
);
4150 // mul_ieee DST, r0, r3
4151 DST
= DAG
.getNode(ISD::FMUL
, DL
, OVT
, r0
, r3
);
4157 AMDILTargetLowering::LowerFDIV64(SDValue Op
, SelectionDAG
&DAG
) const
4159 return SDValue(Op
.getNode(), 0);