#include "AMDILISelLowering.h"
#include "AMDILDevices.h"
-#include "AMDILGlobalManager.h"
#include "AMDILIntrinsicInfo.h"
-#include "AMDILKernelManager.h"
-#include "AMDILMachineFunctionInfo.h"
+#include "AMDILRegisterInfo.h"
#include "AMDILSubtarget.h"
-#include "AMDILTargetMachine.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
-#include "AMDILGenCallingConv.inc"
+#include "AMDGPUGenCallingConv.inc"
//===----------------------------------------------------------------------===//
// TargetLowering Implementation Help Functions Begin
} else if (svt.isInteger() && dvt.isInteger()) {
if (!svt.bitsEq(dvt)) {
Src = DAG.getSExtOrTrunc(Src, DL, dvt);
- } else {
- Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
}
} else if (svt.isInteger()) {
unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
};
}
- static unsigned int
-translateToOpcode(uint64_t CCCode, unsigned int regClass)
-{
- switch (CCCode) {
- case AMDILCC::IL_CC_D_EQ:
- case AMDILCC::IL_CC_D_OEQ:
- if (regClass == AMDIL::GPRV2F64RegClassID) {
- return (unsigned int)AMDIL::DEQ_v2f64;
- } else {
- return (unsigned int)AMDIL::DEQ;
- }
- case AMDILCC::IL_CC_D_LE:
- case AMDILCC::IL_CC_D_OLE:
- case AMDILCC::IL_CC_D_ULE:
- case AMDILCC::IL_CC_D_GE:
- case AMDILCC::IL_CC_D_OGE:
- case AMDILCC::IL_CC_D_UGE:
- return (unsigned int)AMDIL::DGE;
- case AMDILCC::IL_CC_D_LT:
- case AMDILCC::IL_CC_D_OLT:
- case AMDILCC::IL_CC_D_ULT:
- case AMDILCC::IL_CC_D_GT:
- case AMDILCC::IL_CC_D_OGT:
- case AMDILCC::IL_CC_D_UGT:
- return (unsigned int)AMDIL::DLT;
- case AMDILCC::IL_CC_D_NE:
- case AMDILCC::IL_CC_D_UNE:
- return (unsigned int)AMDIL::DNE;
- case AMDILCC::IL_CC_F_EQ:
- case AMDILCC::IL_CC_F_OEQ:
- return (unsigned int)AMDIL::FEQ;
- case AMDILCC::IL_CC_F_LE:
- case AMDILCC::IL_CC_F_ULE:
- case AMDILCC::IL_CC_F_OLE:
- case AMDILCC::IL_CC_F_GE:
- case AMDILCC::IL_CC_F_UGE:
- case AMDILCC::IL_CC_F_OGE:
- return (unsigned int)AMDIL::FGE;
- case AMDILCC::IL_CC_F_LT:
- case AMDILCC::IL_CC_F_OLT:
- case AMDILCC::IL_CC_F_ULT:
- case AMDILCC::IL_CC_F_GT:
- case AMDILCC::IL_CC_F_OGT:
- case AMDILCC::IL_CC_F_UGT:
- if (regClass == AMDIL::GPRV2F32RegClassID) {
- return (unsigned int)AMDIL::FLT_v2f32;
- } else if (regClass == AMDIL::GPRV4F32RegClassID) {
- return (unsigned int)AMDIL::FLT_v4f32;
- } else {
- return (unsigned int)AMDIL::FLT;
- }
- case AMDILCC::IL_CC_F_NE:
- case AMDILCC::IL_CC_F_UNE:
- return (unsigned int)AMDIL::FNE;
- case AMDILCC::IL_CC_I_EQ:
- case AMDILCC::IL_CC_U_EQ:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IEQ;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRV2I8RegClassID
- || regClass == AMDIL::GPRV2I16RegClassID) {
- return (unsigned int)AMDIL::IEQ_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRV4I8RegClassID
- || regClass == AMDIL::GPRV4I16RegClassID) {
- return (unsigned int)AMDIL::IEQ_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_L_EQ:
- case AMDILCC::IL_CC_UL_EQ:
- return (unsigned int)AMDIL::LEQ;
- case AMDILCC::IL_CC_I_GE:
- case AMDILCC::IL_CC_I_LE:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IGE;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IGE_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IGE_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_I_LT:
- case AMDILCC::IL_CC_I_GT:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ILT;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ILT_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ILT_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_L_GE:
- return (unsigned int)AMDIL::LGE;
- case AMDILCC::IL_CC_L_LE:
- return (unsigned int)AMDIL::LLE;
- case AMDILCC::IL_CC_L_LT:
- return (unsigned int)AMDIL::LLT;
- case AMDILCC::IL_CC_L_GT:
- return (unsigned int)AMDIL::LGT;
- case AMDILCC::IL_CC_I_NE:
- case AMDILCC::IL_CC_U_NE:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::INE;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::INE_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::INE_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_U_GE:
- case AMDILCC::IL_CC_U_LE:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGE;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGE_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGE_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_L_NE:
- case AMDILCC::IL_CC_UL_NE:
- return (unsigned int)AMDIL::LNE;
- case AMDILCC::IL_CC_UL_GE:
- return (unsigned int)AMDIL::ULGE;
- case AMDILCC::IL_CC_UL_LE:
- return (unsigned int)AMDIL::ULLE;
- case AMDILCC::IL_CC_U_LT:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ULT;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ULT_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ULT_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_U_GT:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGT;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGT_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGT_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_UL_LT:
- return (unsigned int)AMDIL::ULLT;
- case AMDILCC::IL_CC_UL_GT:
- return (unsigned int)AMDIL::ULGT;
- case AMDILCC::IL_CC_F_UEQ:
- case AMDILCC::IL_CC_D_UEQ:
- case AMDILCC::IL_CC_F_ONE:
- case AMDILCC::IL_CC_D_ONE:
- case AMDILCC::IL_CC_F_O:
- case AMDILCC::IL_CC_F_UO:
- case AMDILCC::IL_CC_D_O:
- case AMDILCC::IL_CC_D_UO:
- // we don't care
- return 0;
-
- }
- errs()<<"Opcode: "<<CCCode<<"\n";
- assert(0 && "Unknown opcode retrieved");
- return 0;
-}
SDValue
AMDILTargetLowering::LowerMemArgument(
SDValue Chain,
//===----------------------------------------------------------------------===//
// TargetLowering Implementation Help Functions End
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Instruction generation functions
-//===----------------------------------------------------------------------===//
-uint32_t
-AMDILTargetLowering::addExtensionInstructions(
- uint32_t reg, bool signedShift,
- unsigned int simpleVT) const
-{
- int shiftSize = 0;
- uint32_t LShift, RShift;
- switch(simpleVT)
- {
- default:
- return reg;
- case AMDIL::GPRI8RegClassID:
- shiftSize = 24;
- LShift = AMDIL::SHL_i8;
- if (signedShift) {
- RShift = AMDIL::SHR_i8;
- } else {
- RShift = AMDIL::USHR_i8;
- }
- break;
- case AMDIL::GPRV2I8RegClassID:
- shiftSize = 24;
- LShift = AMDIL::SHL_v2i8;
- if (signedShift) {
- RShift = AMDIL::SHR_v2i8;
- } else {
- RShift = AMDIL::USHR_v2i8;
- }
- break;
- case AMDIL::GPRV4I8RegClassID:
- shiftSize = 24;
- LShift = AMDIL::SHL_v4i8;
- if (signedShift) {
- RShift = AMDIL::SHR_v4i8;
- } else {
- RShift = AMDIL::USHR_v4i8;
- }
- break;
- case AMDIL::GPRI16RegClassID:
- shiftSize = 16;
- LShift = AMDIL::SHL_i16;
- if (signedShift) {
- RShift = AMDIL::SHR_i16;
- } else {
- RShift = AMDIL::USHR_i16;
- }
- break;
- case AMDIL::GPRV2I16RegClassID:
- shiftSize = 16;
- LShift = AMDIL::SHL_v2i16;
- if (signedShift) {
- RShift = AMDIL::SHR_v2i16;
- } else {
- RShift = AMDIL::USHR_v2i16;
- }
- break;
- case AMDIL::GPRV4I16RegClassID:
- shiftSize = 16;
- LShift = AMDIL::SHL_v4i16;
- if (signedShift) {
- RShift = AMDIL::SHR_v4i16;
- } else {
- RShift = AMDIL::USHR_v4i16;
- }
- break;
- };
- uint32_t LoadReg = genVReg(simpleVT);
- uint32_t tmp1 = genVReg(simpleVT);
- uint32_t tmp2 = genVReg(simpleVT);
- generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
- generateMachineInst(LShift, tmp1, reg, LoadReg);
- generateMachineInst(RShift, tmp2, tmp1, LoadReg);
- return tmp2;
-}
-
-MachineOperand
-AMDILTargetLowering::convertToReg(MachineOperand op) const
-{
- if (op.isReg()) {
- return op;
- } else if (op.isImm()) {
- uint32_t loadReg
- = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
- generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
- .addImm(op.getImm());
- op.ChangeToRegister(loadReg, false);
- } else if (op.isFPImm()) {
- uint32_t loadReg
- = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
- generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
- .addFPImm(op.getFPImm());
- op.ChangeToRegister(loadReg, false);
- } else if (op.isMBB()) {
- op.ChangeToRegister(0, false);
- } else if (op.isFI()) {
- op.ChangeToRegister(0, false);
- } else if (op.isCPI()) {
- op.ChangeToRegister(0, false);
- } else if (op.isJTI()) {
- op.ChangeToRegister(0, false);
- } else if (op.isGlobal()) {
- op.ChangeToRegister(0, false);
- } else if (op.isSymbol()) {
- op.ChangeToRegister(0, false);
- }/* else if (op.isMetadata()) {
- op.ChangeToRegister(0, false);
- }*/
- return op;
-}
-
-void
-AMDILTargetLowering::generateCMPInstr(
- MachineInstr *MI,
- MachineBasicBlock *BB,
- const TargetInstrInfo& TII)
-const
-{
- MachineOperand DST = MI->getOperand(0);
- MachineOperand CC = MI->getOperand(1);
- MachineOperand LHS = MI->getOperand(2);
- MachineOperand RHS = MI->getOperand(3);
- int64_t ccCode = CC.getImm();
- unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
- unsigned int opCode = translateToOpcode(ccCode, simpleVT);
- DebugLoc DL = MI->getDebugLoc();
- MachineBasicBlock::iterator BBI = MI;
- setPrivateData(BB, BBI, &DL, &TII);
- if (!LHS.isReg()) {
- LHS = convertToReg(LHS);
- }
- if (!RHS.isReg()) {
- RHS = convertToReg(RHS);
- }
- switch (ccCode) {
- case AMDILCC::IL_CC_I_EQ:
- case AMDILCC::IL_CC_I_NE:
- case AMDILCC::IL_CC_I_GE:
- case AMDILCC::IL_CC_I_LT:
- {
- uint32_t lhsreg = addExtensionInstructions(
- LHS.getReg(), true, simpleVT);
- uint32_t rhsreg = addExtensionInstructions(
- RHS.getReg(), true, simpleVT);
- generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
- }
- break;
- case AMDILCC::IL_CC_U_EQ:
- case AMDILCC::IL_CC_U_NE:
- case AMDILCC::IL_CC_U_GE:
- case AMDILCC::IL_CC_U_LT:
- case AMDILCC::IL_CC_D_EQ:
- case AMDILCC::IL_CC_F_EQ:
- case AMDILCC::IL_CC_F_OEQ:
- case AMDILCC::IL_CC_D_OEQ:
- case AMDILCC::IL_CC_D_NE:
- case AMDILCC::IL_CC_F_NE:
- case AMDILCC::IL_CC_F_UNE:
- case AMDILCC::IL_CC_D_UNE:
- case AMDILCC::IL_CC_D_GE:
- case AMDILCC::IL_CC_F_GE:
- case AMDILCC::IL_CC_D_OGE:
- case AMDILCC::IL_CC_F_OGE:
- case AMDILCC::IL_CC_D_LT:
- case AMDILCC::IL_CC_F_LT:
- case AMDILCC::IL_CC_F_OLT:
- case AMDILCC::IL_CC_D_OLT:
- generateMachineInst(opCode, DST.getReg(),
- LHS.getReg(), RHS.getReg());
- break;
- case AMDILCC::IL_CC_I_GT:
- case AMDILCC::IL_CC_I_LE:
- {
- uint32_t lhsreg = addExtensionInstructions(
- LHS.getReg(), true, simpleVT);
- uint32_t rhsreg = addExtensionInstructions(
- RHS.getReg(), true, simpleVT);
- generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
- }
- break;
- case AMDILCC::IL_CC_U_GT:
- case AMDILCC::IL_CC_U_LE:
- case AMDILCC::IL_CC_F_GT:
- case AMDILCC::IL_CC_D_GT:
- case AMDILCC::IL_CC_F_OGT:
- case AMDILCC::IL_CC_D_OGT:
- case AMDILCC::IL_CC_F_LE:
- case AMDILCC::IL_CC_D_LE:
- case AMDILCC::IL_CC_D_OLE:
- case AMDILCC::IL_CC_F_OLE:
- generateMachineInst(opCode, DST.getReg(),
- RHS.getReg(), LHS.getReg());
- break;
- case AMDILCC::IL_CC_F_UGT:
- case AMDILCC::IL_CC_F_ULE:
- {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(opCode, VReg[0],
- RHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::FNE, VReg[1],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::FNE, VReg[2],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_F_ULT:
- case AMDILCC::IL_CC_F_UGE:
- {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(opCode, VReg[0],
- LHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::FNE, VReg[1],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::FNE, VReg[2],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_D_UGT:
- case AMDILCC::IL_CC_D_ULE:
- {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(opCode, VReg[0],
- RHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::DNE, VReg[1],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::DNE, VReg[2],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_D_UGE:
- case AMDILCC::IL_CC_D_ULT:
- {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(opCode, VReg[0],
- LHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::DNE, VReg[1],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::DNE, VReg[2],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_F_UEQ:
- {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(AMDIL::FEQ, VReg[0],
- LHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::FNE, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::FNE, VReg[2],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_F_ONE:
- {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(AMDIL::FNE, VReg[0],
- LHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::FEQ, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::FEQ, VReg[2],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::BINARY_AND_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_D_UEQ:
- {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DEQ, VReg[0],
- LHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::DNE, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::DNE, VReg[2],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
-
- }
- break;
- case AMDILCC::IL_CC_D_ONE:
- {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DNE, VReg[0],
- LHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::DEQ, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::DEQ, VReg[2],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::BINARY_AND_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[2], VReg[3]);
-
- }
- break;
- case AMDILCC::IL_CC_F_O:
- {
- uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
- generateMachineInst(AMDIL::FEQ, VReg[0],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::FEQ, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_D_O:
- {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DEQ, VReg[0],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::DEQ, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_F_UO:
- {
- uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
- generateMachineInst(AMDIL::FNE, VReg[0],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::FNE, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_D_UO:
- {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DNE, VReg[0],
- RHS.getReg(), RHS.getReg());
- generateMachineInst(AMDIL::DNE, VReg[1],
- LHS.getReg(), LHS.getReg());
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_L_LE:
- case AMDILCC::IL_CC_L_GE:
- case AMDILCC::IL_CC_L_EQ:
- case AMDILCC::IL_CC_L_NE:
- case AMDILCC::IL_CC_L_LT:
- case AMDILCC::IL_CC_L_GT:
- case AMDILCC::IL_CC_UL_LE:
- case AMDILCC::IL_CC_UL_GE:
- case AMDILCC::IL_CC_UL_EQ:
- case AMDILCC::IL_CC_UL_NE:
- case AMDILCC::IL_CC_UL_LT:
- case AMDILCC::IL_CC_UL_GT:
- {
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
- if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
- generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
- } else {
- generateLongRelational(MI, opCode);
- }
- }
- break;
- case AMDILCC::COND_ERROR:
- assert(0 && "Invalid CC code");
- break;
- };
-}
//===----------------------------------------------------------------------===//
// TargetLowering Class Implementation Begins
size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
+ const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
// These are the current register classes that are
// supported
- addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
- addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
-
- if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
- addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
- addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
- }
- if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
- addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
- addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
- addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
- setOperationAction(ISD::Constant , MVT::i8 , Legal);
- }
- if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
- addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
- addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
- addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
- setOperationAction(ISD::Constant , MVT::i16 , Legal);
- }
- addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
- addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
- addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
- addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
- if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
- addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
- addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
- }
-
for (unsigned int x = 0; x < numTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
// We cannot sextinreg, expand to shifts
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::FP_ROUND, VT, Expand);
- setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::BRCOND, VT, Custom);
setOperationAction(ISD::BR_CC, VT, Custom);
setOperationAction(ISD::BR_JT, VT, Expand);
setOperationAction(ISD::BRIND, VT, Expand);
// TODO: Implement custom UREM/SREM routines
- setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
- setOperationAction(ISDBITCAST, VT, Custom);
setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::JumpTable, VT, Custom);
setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::SELECT_CC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
if (VT != MVT::i64 && VT != MVT::v2i64) {
setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
}
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
for (unsigned int x = 0; x < numFloatTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
// IL does not have these operations for floating point types
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
- setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::SETOLT, VT, Expand);
setOperationAction(ISD::SETOGE, VT, Expand);
setOperationAction(ISD::SETOGT, VT, Expand);
// GPU also does not have divrem function for signed or unsigned
setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::UDIVREM, VT, Expand);
- setOperationAction(ISD::FP_ROUND, VT, Expand);
// GPU does not have [S|U]MUL_LOHI functions as a single instruction
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
// GPU doesn't have a rotl, rotr, or byteswap instruction
setOperationAction(ISD::ROTR, VT, Expand);
- setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
// GPU doesn't have any counting operators
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::FP_ROUND, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
// setOperationAction(ISD::VSETCC, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
}
- setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
- if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
- if (stm->calVersion() < CAL_VERSION_SC_139
- || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
- setOperationAction(ISD::MUL, MVT::i64, Custom);
- }
- setOperationAction(ISD::SUB, MVT::i64, Custom);
- setOperationAction(ISD::ADD, MVT::i64, Custom);
+ if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
setOperationAction(ISD::MULHU, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
- setOperationAction(ISD::SUB, MVT::v2i64, Expand);
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
setOperationAction(ISD::Constant , MVT::i64 , Legal);
- setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
}
- if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
// we support loading/storing v2f64 but not operations on the type
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
- setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
// We want to expand vector conversions into their scalar
// counterparts.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::BR_CC, MVT::Other, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::SETCC, MVT::Other, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
- setOperationAction(ISD::FDIV, MVT::f32, Custom);
- setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
- setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
// Use the default implementation.
setOperationAction(ISD::Constant , MVT::i32 , Legal);
setOperationAction(ISD::TRAP , MVT::Other , Legal);
- setStackPointerRegisterToSaveRestore(AMDIL::SP);
+ setStackPointerRegisterToSaveRestore(AMDGPU::SP);
setSchedulingPreference(Sched::RegPressure);
setPow2DivIsCheap(false);
setPrefLoopAlignment(16);
setSelectIsExpensive(true);
setJumpIsExpensive(true);
- computeRegisterProperties();
maxStoresPerMemcpy = 4096;
maxStoresPerMemmove = 4096;
{
switch (Opcode) {
default: return 0;
- case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
- case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
- case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
- case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
- case AMDILISD::CMOV: return "AMDILISD::CMOV";
case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
- case AMDILISD::INEGATE: return "AMDILISD::INEGATE";
case AMDILISD::MAD: return "AMDILISD::MAD";
- case AMDILISD::UMAD: return "AMDILISD::UMAD";
case AMDILISD::CALL: return "AMDILISD::CALL";
- case AMDILISD::RET: return "AMDILISD::RET";
- case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
- case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
- case AMDILISD::ADD: return "AMDILISD::ADD";
+ case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
case AMDILISD::UMUL: return "AMDILISD::UMUL";
- case AMDILISD::AND: return "AMDILISD::AND";
- case AMDILISD::OR: return "AMDILISD::OR";
- case AMDILISD::NOT: return "AMDILISD::NOT";
- case AMDILISD::XOR: return "AMDILISD::XOR";
case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
- case AMDILISD::SMAX: return "AMDILISD::SMAX";
- case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
- case AMDILISD::MOVE: return "AMDILISD::MOVE";
case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
- case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
- case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
- case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
- case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
- case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
- case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
- case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
- case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
- case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
- case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
- case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
- case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
- case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
- case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
- case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
case AMDILISD::CMP: return "AMDILISD::CMP";
case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
- case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
- case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
- case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
- case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
- case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
- case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
- case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
- case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
- case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
- case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
- case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
- case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
- case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
- case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
- case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
- case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
- case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
- case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
- case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
- case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
- case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
- case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
- case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
- case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
- case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
- case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
- case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
- case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
- case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
- case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
- case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
- case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
- case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
- case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
- case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
- case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
- case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
- case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
- case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
- case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
- case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
- case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
- case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
- case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
- case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
- case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
- case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
- case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
- case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
- case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
- case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
- case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
- case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
- case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
- case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
- case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
- case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
- case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
- case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
- case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
- case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
- case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
- case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
- case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
- case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
- case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
- case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
- case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
- case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
- case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
- case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
- case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
- case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
- case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
- case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
- case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
- case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
- case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
- case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
- case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
- case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
- case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
- case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
- case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
- case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
- case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
- case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
- case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
- case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
- case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
- case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
- case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
- case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
- case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
- case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
- case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
- case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
- case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
- case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
- case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
- case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
};
}
AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I, unsigned Intrinsic) const
{
- if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
- || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
- return false;
- }
- bool bitCastToInt = false;
- unsigned IntNo;
- bool isRet = true;
- const AMDILSubtarget *STM = &this->getTargetMachine()
- .getSubtarget<AMDILSubtarget>();
- switch (Intrinsic) {
- default: return false; // Don't custom lower most intrinsics.
- case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
- IntNo = AMDILISD::ATOM_G_ADD; break;
- case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
- case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
- IntNo = AMDILISD::ATOM_L_ADD; break;
- case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
- case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
- IntNo = AMDILISD::ATOM_R_ADD; break;
- case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
- IntNo = AMDILISD::ATOM_G_AND; break;
- case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_AND_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
- IntNo = AMDILISD::ATOM_L_AND; break;
- case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_AND_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
- IntNo = AMDILISD::ATOM_R_AND; break;
- case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_AND_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
- IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
- IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
- IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_DEC;
- } else {
- IntNo = AMDILISD::ATOM_G_SUB;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
- isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_DEC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_G_SUB_NORET;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_DEC;
- } else {
- IntNo = AMDILISD::ATOM_L_SUB;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
- isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_DEC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_L_SUB_NORET;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_DEC;
- } else {
- IntNo = AMDILISD::ATOM_R_SUB;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
- isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_DEC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_R_SUB_NORET;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_INC;
- } else {
- IntNo = AMDILISD::ATOM_G_ADD;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
- isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_INC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_G_ADD_NORET;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_INC;
- } else {
- IntNo = AMDILISD::ATOM_L_ADD;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
- isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_INC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_L_ADD_NORET;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_INC;
- } else {
- IntNo = AMDILISD::ATOM_R_ADD;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
- isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_INC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_R_ADD_NORET;
- }
- break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
- IntNo = AMDILISD::ATOM_G_MAX; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
- IntNo = AMDILISD::ATOM_G_UMAX; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
- IntNo = AMDILISD::ATOM_L_MAX; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
- IntNo = AMDILISD::ATOM_L_UMAX; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
- IntNo = AMDILISD::ATOM_R_MAX; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
- IntNo = AMDILISD::ATOM_R_UMAX; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
- IntNo = AMDILISD::ATOM_G_MIN; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
- IntNo = AMDILISD::ATOM_G_UMIN; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
- IntNo = AMDILISD::ATOM_L_MIN; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
- IntNo = AMDILISD::ATOM_L_UMIN; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
- IntNo = AMDILISD::ATOM_R_MIN; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
- IntNo = AMDILISD::ATOM_R_UMIN; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
- IntNo = AMDILISD::ATOM_G_OR; break;
- case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_OR_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
- IntNo = AMDILISD::ATOM_L_OR; break;
- case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_OR_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
- IntNo = AMDILISD::ATOM_R_OR; break;
- case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_OR_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
- IntNo = AMDILISD::ATOM_G_SUB; break;
- case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
- IntNo = AMDILISD::ATOM_L_SUB; break;
- case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
- IntNo = AMDILISD::ATOM_R_SUB; break;
- case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
- IntNo = AMDILISD::ATOM_G_RSUB; break;
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
- IntNo = AMDILISD::ATOM_L_RSUB; break;
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
- IntNo = AMDILISD::ATOM_R_RSUB; break;
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
- bitCastToInt = true;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
- IntNo = AMDILISD::ATOM_G_XCHG; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
- bitCastToInt = true;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
- bitCastToInt = true;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
- IntNo = AMDILISD::ATOM_L_XCHG; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
- bitCastToInt = true;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
- bitCastToInt = true;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
- IntNo = AMDILISD::ATOM_R_XCHG; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
- bitCastToInt = true;
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
- case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
- IntNo = AMDILISD::ATOM_G_XOR; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
- case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
- IntNo = AMDILISD::ATOM_L_XOR; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
- case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
- IntNo = AMDILISD::ATOM_R_XOR; break;
- case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
- case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
- isRet = false;
- IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
- case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
- IntNo = AMDILISD::APPEND_ALLOC; break;
- case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
- isRet = false;
- IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
- case AMDGPUIntrinsic::AMDIL_append_consume_i32:
- IntNo = AMDILISD::APPEND_CONSUME; break;
- case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
- isRet = false;
- IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
- };
- const AMDILSubtarget *stm = &this->getTargetMachine()
- .getSubtarget<AMDILSubtarget>();
- AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
- stm->getKernelManager());
- KM->setOutputInst();
-
- Info.opc = IntNo;
- Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
- Info.ptrVal = I.getOperand(0);
- Info.offset = 0;
- Info.align = 4;
- Info.vol = true;
- Info.readMem = isRet;
- Info.writeMem = true;
- return true;
+ return false;
}
// The backend supports 32 and 64 bit floating point immediates
bool
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
-MachineBasicBlock *
-AMDILTargetLowering::EmitInstrWithCustomInserter(
- MachineInstr *MI, MachineBasicBlock *BB) const
-{
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
- switch (MI->getOpcode()) {
- ExpandCaseToAllTypes(AMDIL::CMP);
- generateCMPInstr(MI, BB, TII);
- MI->eraseFromParent();
- break;
- default:
- break;
- }
- return BB;
-}
-
// Recursively assign SDNodeOrdering to any unordered nodes
// This is necessary to maintain source ordering of instructions
// under -O0 to avoid odd-looking "skipping around" issues.
LOWER(JumpTable);
LOWER(ConstantPool);
LOWER(ExternalSymbol);
- LOWER(FP_TO_SINT);
- LOWER(FP_TO_UINT);
- LOWER(SINT_TO_FP);
- LOWER(UINT_TO_FP);
- LOWER(ADD);
- LOWER(MUL);
- LOWER(SUB);
- LOWER(FDIV);
LOWER(SDIV);
LOWER(SREM);
- LOWER(UDIV);
- LOWER(UREM);
LOWER(BUILD_VECTOR);
- LOWER(INSERT_VECTOR_ELT);
- LOWER(EXTRACT_VECTOR_ELT);
- LOWER(EXTRACT_SUBVECTOR);
- LOWER(SCALAR_TO_VECTOR);
- LOWER(CONCAT_VECTORS);
- LOWER(AND);
- LOWER(OR);
LOWER(SELECT);
- LOWER(SELECT_CC);
- LOWER(SETCC);
LOWER(SIGN_EXTEND_INREG);
- LOWER(BITCAST);
LOWER(DYNAMIC_STACKALLOC);
LOWER(BRCOND);
LOWER(BR_CC);
- LOWER(FP_ROUND);
}
return Op;
}
-int
-AMDILTargetLowering::getVarArgsFrameOffset() const
-{
- return VarArgsFrameOffset;
-}
#undef LOWER
SDValue
SDValue DST = Op;
const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *G = GADN->getGlobal();
- const AMDILSubtarget *stm = &this->getTargetMachine()
- .getSubtarget<AMDILSubtarget>();
- const AMDILGlobalManager *GM = stm->getGlobalManager();
DebugLoc DL = Op.getDebugLoc();
- int64_t base_offset = GADN->getOffset();
- int32_t arrayoffset = GM->getArrayOffset(G->getName());
- int32_t constoffset = GM->getConstOffset(G->getName());
- if (arrayoffset != -1) {
- DST = DAG.getConstant(arrayoffset, MVT::i32);
- DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
- DST, DAG.getConstant(base_offset, MVT::i32));
- } else if (constoffset != -1) {
- if (GM->getConstHWBit(G->getName())) {
- DST = DAG.getConstant(constoffset, MVT::i32);
- DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
- DST, DAG.getConstant(base_offset, MVT::i32));
- } else {
- SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32);
- SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32);
- DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg,
- DAG.getConstant(base_offset, MVT::i32));
- DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg);
- }
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
} else {
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
- if (!GV) {
- DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
- } else {
- if (GV->hasInitializer()) {
- const Constant *C = dyn_cast<Constant>(GV->getInitializer());
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
- DST = DAG.getConstant(CI->getValue(), Op.getValueType());
-
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
- DST = DAG.getConstantFP(CF->getValueAPF(),
- Op.getValueType());
- } else if (dyn_cast<ConstantAggregateZero>(C)) {
- EVT VT = Op.getValueType();
- if (VT.isInteger()) {
- DST = DAG.getConstant(0, VT);
- } else {
- DST = DAG.getConstantFP(0, VT);
- }
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
} else {
- assert(!"lowering this type of Global Address "
- "not implemented yet!");
- C->dump();
- DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ DST = DAG.getConstantFP(0, VT);
}
} else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
}
+ } else {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
}
}
return DST;
SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
return Result;
}
+
/// LowerFORMAL_ARGUMENTS - transform physical registers into
/// virtual registers and generate load operations for
/// arguments places on the stack.
{
MachineFunction &MF = DAG.getMachineFunction();
- AMDILMachineFunctionInfo *FuncInfo
- = MF.getInfo<AMDILMachineFunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
//const Function *Fn = MF.getFunction();
//MachineRegisterInfo &RegInfo = MF.getRegInfo();
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
- const TargetRegisterClass *RC = getRegClassFromType(
+ const TargetRegisterClass *RC = getRegClassFor(
RegVT.getSimpleVT().SimpleTy);
unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
// See MipsISelLowering.cpp for ideas on how to implement
}*/
- unsigned int StackSize = CCInfo.getNextStackOffset();
if (isVarArg) {
assert(0 && "Variable arguments are not yet supported");
// See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
}
// This needs to be changed to non-zero if the return function needs
// to pop bytes
- FuncInfo->setBytesToPopOnReturn(StackSize);
return Chain;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
InVals);
}
-static void checkMADType(
- SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
-{
- bool globalLoadStore = false;
- is24bitMAD = false;
- is32bitMAD = false;
- return;
- assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
- "this to work correctly!");
- if (Op.getNode()->use_empty()) {
- return;
- }
- for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
- nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
- SDNode *ptr = *nBegin;
- const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
- // If we are not a LSBaseSDNode then we don't do this
- // optimization.
- // If we are a LSBaseSDNode, but the op is not the offset
- // or base pointer, then we don't do this optimization
- // (i.e. we are the value being stored)
- if (!lsNode ||
- (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
- return;
- }
- const PointerType *PT =
- dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
- unsigned as = PT->getAddressSpace();
- switch(as) {
- default:
- globalLoadStore = true;
- case AMDILAS::PRIVATE_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
- globalLoadStore = true;
- }
- break;
- case AMDILAS::CONSTANT_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
- globalLoadStore = true;
- }
- break;
- case AMDILAS::LOCAL_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
- globalLoadStore = true;
- }
- break;
- case AMDILAS::REGION_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
- globalLoadStore = true;
- }
- break;
- }
- }
- if (globalLoadStore) {
- is32bitMAD = true;
- } else {
- is24bitMAD = true;
- }
-}
SDValue
-AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
{
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
SDValue DST;
- const AMDILSubtarget *stm = &this->getTargetMachine()
- .getSubtarget<AMDILSubtarget>();
- bool isVec = OVT.isVector();
if (OVT.getScalarType() == MVT::i64) {
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i64) {
- INTTY = MVT::v2i32;
- }
- if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
- && INTTY == MVT::i32) {
- DST = DAG.getNode(AMDILISD::ADD,
- DL,
- OVT,
- LHS, RHS);
- } else {
- SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
- // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
- LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
- RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
- LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
- RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
- INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
- INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
- SDValue cmp;
- cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- INTLO, RHSLO);
- cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
- INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
- DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
- INTLO, INTHI);
- }
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
} else {
- if (LHS.getOpcode() == ISD::FrameIndex ||
- RHS.getOpcode() == ISD::FrameIndex) {
- DST = DAG.getNode(AMDILISD::ADDADDR,
- DL,
- OVT,
- LHS, RHS);
- } else {
- if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
- && LHS.getNumOperands()
- && RHS.getNumOperands()) {
- bool is24bitMAD = false;
- bool is32bitMAD = false;
- const ConstantSDNode *LHSConstOpCode =
- dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
- const ConstantSDNode *RHSConstOpCode =
- dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
- if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
- || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
- || LHS.getOpcode() == ISD::MUL
- || RHS.getOpcode() == ISD::MUL) {
- SDValue Op1, Op2, Op3;
- // FIXME: Fix this so that it works for unsigned 24bit ops.
- if (LHS.getOpcode() == ISD::MUL) {
- Op1 = LHS.getOperand(0);
- Op2 = LHS.getOperand(1);
- Op3 = RHS;
- } else if (RHS.getOpcode() == ISD::MUL) {
- Op1 = RHS.getOperand(0);
- Op2 = RHS.getOperand(1);
- Op3 = LHS;
- } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
- Op1 = LHS.getOperand(0);
- Op2 = DAG.getConstant(
- 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
- Op3 = RHS;
- } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
- Op1 = RHS.getOperand(0);
- Op2 = DAG.getConstant(
- 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
- Op3 = LHS;
- }
- checkMADType(Op, stm, is24bitMAD, is32bitMAD);
- // We can possibly do a MAD transform!
- if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
- uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
- SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
- DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
- DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
- Op1, Op2, Op3);
- } else if(is32bitMAD) {
- SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
- DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
- DL, Tys, DAG.getEntryNode(),
- DAG.getConstant(
- AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
- Op1, Op2, Op3);
- }
- }
- }
- DST = DAG.getNode(AMDILISD::ADD,
- DL,
- OVT,
- LHS, RHS);
- }
+ DST = SDValue(Op.getNode(), 0);
}
return DST;
}
-SDValue
-AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
- uint32_t bits) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT INTTY = Op.getValueType();
- EVT FPTY;
- if (INTTY.isVector()) {
- FPTY = EVT(MVT::getVectorVT(MVT::f32,
- INTTY.getVectorNumElements()));
- } else {
- FPTY = EVT(MVT::f32);
- }
- /* static inline uint
- __clz_Nbit(uint x)
- {
- int xor = 0x3f800000U | x;
- float tp = as_float(xor);
- float t = tp + -1.0f;
- uint tint = as_uint(t);
- int cmp = (x != 0);
- uint tsrc = tint >> 23;
- uint tmask = tsrc & 0xffU;
- uint cst = (103 + N)U - tmask;
- return cmp ? cst : N;
- }
- */
- assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
- && "genCLZu16 only works on 32bit types");
- // uint x = Op
- SDValue x = Op;
- // xornode = 0x3f800000 | x
- SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
- DAG.getConstant(0x3f800000, INTTY), x);
- // float tp = as_float(xornode)
- SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
- // float t = tp + -1.0f
- SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
- DAG.getConstantFP(-1.0f, FPTY));
- // uint tint = as_uint(t)
- SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
- // int cmp = (x != 0)
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
- DAG.getConstant(0, INTTY));
- // uint tsrc = tint >> 23
- SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
- DAG.getConstant(23, INTTY));
- // uint tmask = tsrc & 0xFF
- SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
- DAG.getConstant(0xFFU, INTTY));
- // uint cst = (103 + bits) - tmask
- SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
- DAG.getConstant((103U + bits), INTTY), tmask);
- // return cmp ? cst : N
- cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
- DAG.getConstant(bits, INTTY));
- return cst;
-}
SDValue
-AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue DST = SDValue();
- DebugLoc DL = Op.getDebugLoc();
- EVT INTTY = Op.getValueType();
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
- if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
- //__clz_32bit(uint u)
- //{
- // int z = __amdil_ffb_hi(u) ;
- // return z < 0 ? 32 : z;
- // }
- // uint u = op
- SDValue u = Op;
- // int z = __amdil_ffb_hi(u)
- SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
- // int cmp = z < 0
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- z, DAG.getConstant(0, INTTY));
- // return cmp ? 32 : z
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
- DAG.getConstant(32, INTTY), z);
- } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
- // static inline uint
- //__clz_32bit(uint x)
- //{
- // uint zh = __clz_16bit(x >> 16);
- // uint zl = __clz_16bit(x & 0xffffU);
- // return zh == 16U ? 16U + zl : zh;
- //}
- // uint x = Op
- SDValue x = Op;
- // uint xs16 = x >> 16
- SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
- DAG.getConstant(16, INTTY));
- // uint zh = __clz_16bit(xs16)
- SDValue zh = genCLZuN(xs16, DAG, 16);
- // uint xa16 = x & 0xFFFF
- SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
- DAG.getConstant(0xFFFFU, INTTY));
- // uint zl = __clz_16bit(xa16)
- SDValue zl = genCLZuN(xa16, DAG, 16);
- // uint cmp = zh == 16U
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zh, DAG.getConstant(16U, INTTY));
- // uint zl16 = zl + 16
- SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
- DAG.getConstant(16, INTTY), zl);
- // return cmp ? zl16 : zh
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
- cmp, zl16, zh);
- } else {
- assert(0 && "Attempting to generate a CLZ function with an"
- " unknown graphics card");
- }
- return DST;
-}
-SDValue
-AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue DST = SDValue();
- DebugLoc DL = Op.getDebugLoc();
- EVT INTTY;
- EVT LONGTY = Op.getValueType();
- bool isVec = LONGTY.isVector();
- if (isVec) {
- INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
- .getVectorNumElements()));
- } else {
- INTTY = EVT(MVT::i32);
- }
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
- if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
- // Evergreen:
- // static inline uint
- // __clz_u64(ulong x)
- // {
- //uint zhi = __clz_32bit((uint)(x >> 32));
- //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
- //return zhi == 32U ? 32U + zlo : zhi;
- //}
- //ulong x = op
- SDValue x = Op;
- // uint xhi = x >> 32
- SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
- // uint xlo = x & 0xFFFFFFFF
- SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
- // uint zhi = __clz_32bit(xhi)
- SDValue zhi = genCLZu32(xhi, DAG);
- // uint zlo = __clz_32bit(xlo)
- SDValue zlo = genCLZu32(xlo, DAG);
- // uint cmp = zhi == 32
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zhi, DAG.getConstant(32U, INTTY));
- // uint zlop32 = 32 + zlo
- SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
- DAG.getConstant(32U, INTTY), zlo);
- // return cmp ? zlop32: zhi
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
- } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
- // HD4XXX:
- // static inline uint
- //__clz_64bit(ulong x)
- //{
- //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
- //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
- //uint zl = __clz_23bit((uint)x & 0x7fffffU);
- //uint r = zh == 18U ? 18U + zm : zh;
- //return zh + zm == 41U ? 41U + zl : r;
- //}
- //ulong x = Op
- SDValue x = Op;
- // ulong xs46 = x >> 46
- SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
- DAG.getConstant(46, LONGTY));
- // uint ixs46 = (uint)xs46
- SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
- // ulong xs23 = x >> 23
- SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
- DAG.getConstant(23, LONGTY));
- // uint ixs23 = (uint)xs23
- SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
- // uint xs23m23 = ixs23 & 0x7FFFFF
- SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
- DAG.getConstant(0x7fffffU, INTTY));
- // uint ix = (uint)x
- SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
- // uint xm23 = ix & 0x7FFFFF
- SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
- DAG.getConstant(0x7fffffU, INTTY));
- // uint zh = __clz_23bit(ixs46)
- SDValue zh = genCLZuN(ixs46, DAG, 23);
- // uint zm = __clz_23bit(xs23m23)
- SDValue zm = genCLZuN(xs23m23, DAG, 23);
- // uint zl = __clz_23bit(xm23)
- SDValue zl = genCLZuN(xm23, DAG, 23);
- // uint zhm5 = zh - 5
- SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
- DAG.getConstant(-5U, INTTY));
- SDValue const18 = DAG.getConstant(18, INTTY);
- SDValue const41 = DAG.getConstant(41, INTTY);
- // uint cmp1 = zh = 18
- SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zhm5, const18);
- // uint zhm5zm = zhm5 + zh
- SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
- // uint cmp2 = zhm5zm == 41
- SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zhm5zm, const41);
- // uint zmp18 = zhm5 + 18
- SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
- // uint zlp41 = zl + 41
- SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
- // uint r = cmp1 ? zmp18 : zh
- SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
- cmp1, zmp18, zhm5);
- // return cmp2 ? zlp41 : r
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
- } else {
- assert(0 && "Attempting to generate a CLZ function with an"
- " unknown graphics card");
- }
- return DST;
-}
-SDValue
-AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
- bool includeSign) const
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
{
- EVT INTVT;
- EVT LONGVT;
+ EVT OVT = Op.getValueType();
SDValue DST;
- DebugLoc DL = RHS.getDebugLoc();
- EVT RHSVT = RHS.getValueType();
- bool isVec = RHSVT.isVector();
- if (isVec) {
- LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
- .getVectorNumElements()));
- INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
- .getVectorNumElements()));
- } else {
- LONGVT = EVT(MVT::i64);
- INTVT = EVT(MVT::i32);
- }
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- // unsigned version:
- // uint uhi = (uint)(d * 0x1.0p-32);
- // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
- // return as_ulong2((uint2)(ulo, uhi));
- //
- // signed version:
- // double ad = fabs(d);
- // long l = unsigned_version(ad);
- // long nl = -l;
- // return d == ad ? l : nl;
- SDValue d = RHS;
- if (includeSign) {
- d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
- }
- SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
- DAG.getConstantFP(0x2f800000, RHSVT));
- SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
- SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
- ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
- DAG.getConstantFP(0xcf800000, RHSVT), d);
- SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
- SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
- if (includeSign) {
- SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
- SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
- RHS, d);
- l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
- }
- DST = l;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
} else {
- /*
- __attribute__((always_inline)) long
- cast_f64_to_i64(double d)
- {
- // Convert d in to 32-bit components
- long x = as_long(d);
- xhi = LCOMPHI(x);
- xlo = LCOMPLO(x);
-
- // Generate 'normalized' mantissa
- mhi = xhi | 0x00100000; // hidden bit
- mhi <<= 11;
- temp = xlo >> (32 - 11);
- mhi |= temp
- mlo = xlo << 11;
-
- // Compute shift right count from exponent
- e = (xhi >> (52-32)) & 0x7ff;
- sr = 1023 + 63 - e;
- srge64 = sr >= 64;
- srge32 = sr >= 32;
-
- // Compute result for 0 <= sr < 32
- rhi0 = mhi >> (sr &31);
- rlo0 = mlo >> (sr &31);
- temp = mhi << (32 - sr);
- temp |= rlo0;
- rlo0 = sr ? temp : rlo0;
-
- // Compute result for 32 <= sr
- rhi1 = 0;
- rlo1 = srge64 ? 0 : rhi0;
-
- // Pick between the 2 results
- rhi = srge32 ? rhi1 : rhi0;
- rlo = srge32 ? rlo1 : rlo0;
-
- // Optional saturate on overflow
- srlt0 = sr < 0;
- rhi = srlt0 ? MAXVALUE : rhi;
- rlo = srlt0 ? MAXVALUE : rlo;
-
- // Create long
- res = LCREATE( rlo, rhi );
-
- // Deal with sign bit (ignoring whether result is signed or unsigned value)
- if (includeSign) {
- sign = ((signed int) xhi) >> 31; fill with sign bit
- sign = LCREATE( sign, sign );
- res += sign;
- res ^= sign;
- }
-
- return res;
- }
- */
- SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
- SDValue c32 = DAG.getConstant( 32, INTVT );
-
- // Convert d in to 32-bit components
- SDValue d = RHS;
- SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
- SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
-
- // Generate 'normalized' mantissa
- SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
- xhi, DAG.getConstant( 0x00100000, INTVT ) );
- mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
- SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
- xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
- mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
- SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
-
- // Compute shift right count from exponent
- SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
- xhi, DAG.getConstant( 52-32, INTVT ) );
- e = DAG.getNode( ISD::AND, DL, INTVT,
- e, DAG.getConstant( 0x7ff, INTVT ) );
- SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( 1023 + 63, INTVT ), e );
- SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- sr, DAG.getConstant(64, INTVT));
- SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- sr, DAG.getConstant(32, INTVT));
-
- // Compute result for 0 <= sr < 32
- SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
- SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
- temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
- temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
- temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
- rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
-
- // Compute result for 32 <= sr
- SDValue rhi1 = DAG.getConstant( 0, INTVT );
- SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- srge64, rhi1, rhi0 );
-
- // Pick between the 2 results
- SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- srge32, rhi1, rhi0 );
- SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- srge32, rlo1, rlo0 );
-
- // Create long
- SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
-
- // Deal with sign bit
- if (includeSign) {
- SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
- xhi, DAG.getConstant( 31, INTVT ) );
- sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
- res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
- res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
- }
- DST = res;
+ DST = SDValue(Op.getNode(), 0);
}
return DST;
}
-SDValue
-AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
- bool includeSign) const
-{
- EVT INTVT;
- EVT LONGVT;
- DebugLoc DL = RHS.getDebugLoc();
- EVT RHSVT = RHS.getValueType();
- bool isVec = RHSVT.isVector();
- if (isVec) {
- LONGVT = EVT(MVT::getVectorVT(MVT::i64,
- RHSVT.getVectorNumElements()));
- INTVT = EVT(MVT::getVectorVT(MVT::i32,
- RHSVT.getVectorNumElements()));
- } else {
- LONGVT = EVT(MVT::i64);
- INTVT = EVT(MVT::i32);
- }
- /*
- __attribute__((always_inline)) int
- cast_f64_to_[u|i]32(double d)
- {
- // Convert d in to 32-bit components
- long x = as_long(d);
- xhi = LCOMPHI(x);
- xlo = LCOMPLO(x);
-
- // Generate 'normalized' mantissa
- mhi = xhi | 0x00100000; // hidden bit
- mhi <<= 11;
- temp = xlo >> (32 - 11);
- mhi |= temp
-
- // Compute shift right count from exponent
- e = (xhi >> (52-32)) & 0x7ff;
- sr = 1023 + 31 - e;
- srge32 = sr >= 32;
-
- // Compute result for 0 <= sr < 32
- res = mhi >> (sr &31);
- res = srge32 ? 0 : res;
-
- // Optional saturate on overflow
- srlt0 = sr < 0;
- res = srlt0 ? MAXVALUE : res;
-
- // Deal with sign bit (ignoring whether result is signed or unsigned value)
- if (includeSign) {
- sign = ((signed int) xhi) >> 31; fill with sign bit
- res += sign;
- res ^= sign;
- }
- return res;
- }
- */
- SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
-
- // Convert d in to 32-bit components
- SDValue d = RHS;
- SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
- SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
-
- // Generate 'normalized' mantissa
- SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
- xhi, DAG.getConstant( 0x00100000, INTVT ) );
- mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
- SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
- xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
- mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
-
- // Compute shift right count from exponent
- SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
- xhi, DAG.getConstant( 52-32, INTVT ) );
- e = DAG.getNode( ISD::AND, DL, INTVT,
- e, DAG.getConstant( 0x7ff, INTVT ) );
- SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( 1023 + 31, INTVT ), e );
- SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- sr, DAG.getConstant(32, INTVT));
-
- // Compute result for 0 <= sr < 32
- SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
- res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- srge32, DAG.getConstant(0,INTVT), res );
-
- // Deal with sign bit
- if (includeSign) {
- SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
- xhi, DAG.getConstant( 31, INTVT ) );
- res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
- res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
- }
- return res;
-}
SDValue
-AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
{
- SDValue RHS = Op.getOperand(0);
- EVT RHSVT = RHS.getValueType();
- MVT RST = RHSVT.getScalarType().getSimpleVT();
- EVT LHSVT = Op.getValueType();
- MVT LST = LHSVT.getScalarType().getSimpleVT();
+ EVT VT = Op.getValueType();
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
DebugLoc DL = Op.getDebugLoc();
- SDValue DST;
- const AMDILTargetMachine*
- amdtm = reinterpret_cast<const AMDILTargetMachine*>
- (&this->getTargetMachine());
- const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
- if (RST == MVT::f64 && RHSVT.isVector()
- && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- // We dont support vector 64bit floating point convertions.
- for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
- SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
- op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
- if (!x) {
- DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
- } else {
- DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
- DST, op, DAG.getTargetConstant(x, MVT::i32));
- }
- }
- } else {
- if (RST == MVT::f64
- && LST == MVT::i32) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- DST = SDValue(Op.getNode(), 0);
- } else {
- DST = genf64toi32(RHS, DAG, true);
- }
- } else if (RST == MVT::f64
- && LST == MVT::i64) {
- DST = genf64toi64(RHS, DAG, true);
- } else if (RST == MVT::f64
- && (LST == MVT::i8 || LST == MVT::i16)) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
- } else {
- SDValue ToInt = genf64toi32(RHS, DAG, true);
- DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
- }
-
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue DST;
- SDValue RHS = Op.getOperand(0);
- EVT RHSVT = RHS.getValueType();
- MVT RST = RHSVT.getScalarType().getSimpleVT();
- EVT LHSVT = Op.getValueType();
- MVT LST = LHSVT.getScalarType().getSimpleVT();
- DebugLoc DL = Op.getDebugLoc();
- const AMDILTargetMachine*
- amdtm = reinterpret_cast<const AMDILTargetMachine*>
- (&this->getTargetMachine());
- const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
- if (RST == MVT::f64 && RHSVT.isVector()
- && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- // We dont support vector 64bit floating point convertions.
- for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
- SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
- op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
- if (!x) {
- DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
- } else {
- DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
- DST, op, DAG.getTargetConstant(x, MVT::i32));
- }
-
- }
- } else {
- if (RST == MVT::f64
- && LST == MVT::i32) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- DST = SDValue(Op.getNode(), 0);
- } else {
- DST = genf64toi32(RHS, DAG, false);
- }
- } else if (RST == MVT::f64
- && LST == MVT::i64) {
- DST = genf64toi64(RHS, DAG, false);
- } else if (RST == MVT::f64
- && (LST == MVT::i8 || LST == MVT::i16)) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
- } else {
- SDValue ToInt = genf64toi32(RHS, DAG, false);
- DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
- }
-
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- }
- return DST;
-}
-SDValue
-AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
- SelectionDAG &DAG) const
-{
- EVT RHSVT = RHS.getValueType();
- DebugLoc DL = RHS.getDebugLoc();
- EVT INTVT;
- EVT LONGVT;
- bool isVec = RHSVT.isVector();
- if (isVec) {
- LONGVT = EVT(MVT::getVectorVT(MVT::i64,
- RHSVT.getVectorNumElements()));
- INTVT = EVT(MVT::getVectorVT(MVT::i32,
- RHSVT.getVectorNumElements()));
- } else {
- LONGVT = EVT(MVT::i64);
- INTVT = EVT(MVT::i32);
- }
- SDValue x = RHS;
- const AMDILTargetMachine*
- amdtm = reinterpret_cast<const AMDILTargetMachine*>
- (&this->getTargetMachine());
- const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
- if (stm->calVersion() >= CAL_VERSION_SC_135) {
- // unsigned x = RHS;
- // ulong xd = (ulong)(0x4330_0000 << 32) | x;
- // double d = as_double( xd );
- // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
- SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
- DAG.getConstant( 0x43300000, INTVT ) );
- SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
- SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
- DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
- return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
- } else {
- SDValue clz = genCLZu32(x, DAG);
-
- // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
- // Except for an input 0... which requires a 0 exponent
- SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( (1023+31), INTVT), clz );
- exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
-
- // Normalize frac
- SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
-
- // Eliminate hidden bit
- rhi = DAG.getNode( ISD::AND, DL, INTVT,
- rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
-
- // Pack exponent and frac
- SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
- rhi, DAG.getConstant( (32 - 11), INTVT ) );
- rhi = DAG.getNode( ISD::SRL, DL, INTVT,
- rhi, DAG.getConstant( 11, INTVT ) );
- exp = DAG.getNode( ISD::SHL, DL, INTVT,
- exp, DAG.getConstant( 20, INTVT ) );
- rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
-
- // Convert 2 x 32 in to 1 x 64, then to double precision float type
- SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
- return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
- }
-}
-SDValue
-AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
- SelectionDAG &DAG) const
-{
- EVT RHSVT = RHS.getValueType();
- DebugLoc DL = RHS.getDebugLoc();
- EVT INTVT;
- EVT LONGVT;
- bool isVec = RHSVT.isVector();
- if (isVec) {
- INTVT = EVT(MVT::getVectorVT(MVT::i32,
- RHSVT.getVectorNumElements()));
- } else {
- INTVT = EVT(MVT::i32);
- }
- LONGVT = RHSVT;
- SDValue x = RHS;
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- // double dhi = (double)(as_uint2(x).y);
- // double dlo = (double)(as_uint2(x).x);
- // return mad(dhi, 0x1.0p+32, dlo)
- SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
- dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
- SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
- dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
- return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
- DAG.getConstantFP(0x4f800000, LHSVT), dlo);
- } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
- // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
- // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
- // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
- SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
- SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
- SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
- SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
- SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
- SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
- DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
- hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
- return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
-
- } else {
- SDValue clz = genCLZu64(x, DAG);
- SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
-
- // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
- SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( (1023+63), INTVT), clz );
- SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
- exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- mash, exp, mash ); // exp = exp, or 0 if input was 0
-
- // Normalize frac
- SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
- clz, DAG.getConstant( 31, INTVT ) );
- SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( 32, INTVT ), clz31 );
- SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
- SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
- t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
- SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
- SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
- SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
- SDValue rlo2 = DAG.getConstant( 0, INTVT );
- SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
- clz, DAG.getConstant( 32, INTVT ) );
- SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- clz32, rhi2, rhi1 );
- SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- clz32, rlo2, rlo1 );
-
- // Eliminate hidden bit
- rhi = DAG.getNode( ISD::AND, DL, INTVT,
- rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
-
- // Save bits needed to round properly
- SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
- rlo, DAG.getConstant( 0x7ff, INTVT ) );
-
- // Pack exponent and frac
- rlo = DAG.getNode( ISD::SRL, DL, INTVT,
- rlo, DAG.getConstant( 11, INTVT ) );
- SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
- rhi, DAG.getConstant( (32 - 11), INTVT ) );
- rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
- rhi = DAG.getNode( ISD::SRL, DL, INTVT,
- rhi, DAG.getConstant( 11, INTVT ) );
- exp = DAG.getNode( ISD::SHL, DL, INTVT,
- exp, DAG.getConstant( 20, INTVT ) );
- rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
-
- // Compute rounding bit
- SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
- rlo, DAG.getConstant( 1, INTVT ) );
- SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
- round, DAG.getConstant( 0x3ff, INTVT ) );
- grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
- grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
- grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
- round = DAG.getNode( ISD::SRL, DL, INTVT,
- round, DAG.getConstant( 10, INTVT ) );
- round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
-
- // Add rounding bit
- SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
- round, DAG.getConstant( 0, INTVT ) );
- SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
- res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
- return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
- }
-}
-SDValue
-AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue RHS = Op.getOperand(0);
- EVT RHSVT = RHS.getValueType();
- MVT RST = RHSVT.getScalarType().getSimpleVT();
- EVT LHSVT = Op.getValueType();
- MVT LST = LHSVT.getScalarType().getSimpleVT();
- DebugLoc DL = Op.getDebugLoc();
- SDValue DST;
- EVT INTVT;
- EVT LONGVT;
- const AMDILTargetMachine*
- amdtm = reinterpret_cast<const AMDILTargetMachine*>
- (&this->getTargetMachine());
- const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
- if (LST == MVT::f64 && LHSVT.isVector()
- && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- // We dont support vector 64bit floating point convertions.
- DST = Op;
- for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
- SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
- op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
- if (!x) {
- DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
- } else {
- DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
- op, DAG.getTargetConstant(x, MVT::i32));
- }
-
- }
- } else {
-
- if (RST == MVT::i32
- && LST == MVT::f64) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- DST = SDValue(Op.getNode(), 0);
- } else {
- DST = genu32tof64(RHS, LHSVT, DAG);
- }
- } else if (RST == MVT::i64
- && LST == MVT::f64) {
- DST = genu64tof64(RHS, LHSVT, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue RHS = Op.getOperand(0);
- EVT RHSVT = RHS.getValueType();
- MVT RST = RHSVT.getScalarType().getSimpleVT();
- EVT INTVT;
- EVT LONGVT;
- SDValue DST;
- bool isVec = RHSVT.isVector();
- DebugLoc DL = Op.getDebugLoc();
- EVT LHSVT = Op.getValueType();
- MVT LST = LHSVT.getScalarType().getSimpleVT();
- const AMDILTargetMachine*
- amdtm = reinterpret_cast<const AMDILTargetMachine*>
- (&this->getTargetMachine());
- const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
- if (LST == MVT::f64 && LHSVT.isVector()
- && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- // We dont support vector 64bit floating point convertions.
- for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
- SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
- op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
- if (!x) {
- DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
- } else {
- DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
- op, DAG.getTargetConstant(x, MVT::i32));
- }
-
- }
- } else {
-
- if (isVec) {
- LONGVT = EVT(MVT::getVectorVT(MVT::i64,
- RHSVT.getVectorNumElements()));
- INTVT = EVT(MVT::getVectorVT(MVT::i32,
- RHSVT.getVectorNumElements()));
- } else {
- LONGVT = EVT(MVT::i64);
- INTVT = EVT(MVT::i32);
- }
- MVT RST = RHSVT.getScalarType().getSimpleVT();
- if ((RST == MVT::i32 || RST == MVT::i64)
- && LST == MVT::f64) {
- if (RST == MVT::i32) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- DST = SDValue(Op.getNode(), 0);
- return DST;
- }
- }
- SDValue c31 = DAG.getConstant( 31, INTVT );
- SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
-
- SDValue S; // Sign, as 0 or -1
- SDValue Sbit; // Sign bit, as one bit, MSB only.
- if (RST == MVT::i32) {
- Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
- S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
- } else { // 64-bit case... SRA of 64-bit values is slow
- SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
- Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
- SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
- S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
- }
-
- // get abs() of input value, given sign as S (0 or -1)
- // SpI = RHS + S
- SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
- // SpIxS = SpI ^ S
- SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
-
- // Convert unsigned value to double precision
- SDValue R;
- if (RST == MVT::i32) {
- // r = cast_u32_to_f64(SpIxS)
- R = genu32tof64(SpIxS, LHSVT, DAG);
- } else {
- // r = cast_u64_to_f64(SpIxS)
- R = genu64tof64(SpIxS, LHSVT, DAG);
- }
-
- // drop in the sign bit
- SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
- SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
- SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
- thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
- t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
- DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- }
- return DST;
-}
-SDValue
-AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue DST;
- bool isVec = RHS.getValueType().isVector();
- if (OVT.getScalarType() == MVT::i64) {
- /*const AMDILTargetMachine*
- amdtm = reinterpret_cast<const AMDILTargetMachine*>
- (&this->getTargetMachine());
- const AMDILSubtarget*
- stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());*/
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i64) {
- INTTY = MVT::v2i32;
- }
- SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
- // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
- LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
- RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
- LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
- RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
- INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
- INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
- //TODO: need to use IBORROW on HD5XXX and later hardware
- SDValue cmp;
- if (OVT == MVT::i64) {
- cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- LHSLO, RHSLO);
- } else {
- SDValue cmplo;
- SDValue cmphi;
- SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
- SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
- SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
- SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
- cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- LHSRLO, RHSRLO);
- cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- LHSRHI, RHSRHI);
- cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
- cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
- cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
- }
- INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
- DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
- INTLO, INTHI);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-SDValue
-AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::f64) {
- DST = LowerFDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::f32) {
- DST = LowerFDIV32(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSDIV32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16
- || OVT.getScalarType() == MVT::i8) {
- DST = LowerSDIV24(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerUDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerUDIV32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16
- || OVT.getScalarType() == MVT::i8) {
- DST = LowerUDIV24(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSREM64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSREM32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16) {
- DST = LowerSREM16(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i8) {
- DST = LowerSREM8(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerUREM64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerUREM32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16) {
- DST = LowerUREM16(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i8) {
- DST = LowerUREM8(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue DST;
- bool isVec = OVT.isVector();
- if (OVT.getScalarType() != MVT::i64)
- {
- DST = SDValue(Op.getNode(), 0);
- } else {
- assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
- // TODO: This needs to be turned into a tablegen pattern
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
-
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i64) {
- INTTY = MVT::v2i32;
- }
- // mul64(h1, l1, h0, l0)
- SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
- DL,
- INTTY, LHS);
- SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
- DL,
- INTTY, LHS);
- SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
- DL,
- INTTY, RHS);
- SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
- DL,
- INTTY, RHS);
- // MULLO_UINT_1 r1, h0, l1
- SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
- DL,
- INTTY, RHSHI, LHSLO);
- // MULLO_UINT_1 r2, h1, l0
- SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
- DL,
- INTTY, RHSLO, LHSHI);
- // ADD_INT hr, r1, r2
- SDValue ADDHI = DAG.getNode(ISD::ADD,
- DL,
- INTTY, RHILLO, RLOHHI);
- // MULHI_UINT_1 r3, l1, l0
- SDValue RLOLLO = DAG.getNode(ISD::MULHU,
- DL,
- INTTY, RHSLO, LHSLO);
- // ADD_INT hr, hr, r3
- SDValue HIGH = DAG.getNode(ISD::ADD,
- DL,
- INTTY, ADDHI, RLOLLO);
- // MULLO_UINT_1 l3, l1, l0
- SDValue LOW = DAG.getNode(AMDILISD::UMUL,
- DL,
- INTTY, LHSLO, RHSLO);
- DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
- DL,
- OVT, LOW, HIGH);
- }
- return DST;
-}
-SDValue
-AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
-{
- EVT VT = Op.getValueType();
- //printSDValue(Op, 1);
- SDValue Nodes1;
- SDValue second;
- SDValue third;
- SDValue fourth;
- DebugLoc DL = Op.getDebugLoc();
- Nodes1 = DAG.getNode(AMDILISD::VBUILD,
- DL,
- VT, Op.getOperand(0));
- bool allEqual = true;
- for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
- if (Op.getOperand(0) != Op.getOperand(x)) {
- allEqual = false;
- break;
+ Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+#if 0
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
}
}
if (allEqual) {
return Nodes1;
}
+#endif
switch(Op.getNumOperands()) {
default:
case 1:
second,
DAG.getConstant(5, MVT::i32));
}
- break;
- };
- return Nodes1;
-}
-
-SDValue
-AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- const SDValue *ptr = NULL;
- const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
- uint32_t swizzleNum = 0;
- SDValue DST;
- if (!VT.isVector()) {
- SDValue Res = Op.getOperand(0);
- return Res;
- }
-
- if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
- ptr = &Op.getOperand(1);
- } else {
- ptr = &Op.getOperand(0);
- }
- if (CSDN) {
- swizzleNum = (uint32_t)CSDN->getZExtValue();
- uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
- uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
- DST = DAG.getNode(AMDILISD::VINSERT,
- DL,
- VT,
- Op.getOperand(0),
- *ptr,
- DAG.getTargetConstant(mask2, MVT::i32),
- DAG.getTargetConstant(mask3, MVT::i32));
- } else {
- uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
- uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
- SDValue res = DAG.getNode(AMDILISD::VINSERT,
- DL, VT, Op.getOperand(0), *ptr,
- DAG.getTargetConstant(mask2, MVT::i32),
- DAG.getTargetConstant(mask3, MVT::i32));
- for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
- mask2 = 0x04030201 & ~(0xFF << (x * 8));
- mask3 = 0x01010101 & (0xFF << (x * 8));
- SDValue t = DAG.getNode(AMDILISD::VINSERT,
- DL, VT, Op.getOperand(0), *ptr,
- DAG.getTargetConstant(mask2, MVT::i32),
- DAG.getTargetConstant(mask3, MVT::i32));
- SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
- DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
- Op.getOperand(2), DAG.getConstant(x, MVT::i32));
- c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
- res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
- }
- DST = res;
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const
-{
- EVT VT = Op.getValueType();
- //printSDValue(Op, 1);
- const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- uint64_t swizzleNum = 0;
- DebugLoc DL = Op.getDebugLoc();
- SDValue Res;
- if (!Op.getOperand(0).getValueType().isVector()) {
- Res = Op.getOperand(0);
- return Res;
- }
- if (CSDN) {
- // Static vector extraction
- swizzleNum = CSDN->getZExtValue() + 1;
- Res = DAG.getNode(AMDILISD::VEXTRACT,
- DL, VT,
- Op.getOperand(0),
- DAG.getTargetConstant(swizzleNum, MVT::i32));
- } else {
- SDValue Op1 = Op.getOperand(1);
- uint32_t vecSize = 4;
- SDValue Op0 = Op.getOperand(0);
- SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
- DL, VT, Op0,
- DAG.getTargetConstant(1, MVT::i32));
- if (Op0.getValueType().isVector()) {
- vecSize = Op0.getValueType().getVectorNumElements();
- }
- for (uint32_t x = 2; x <= vecSize; ++x) {
- SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
- DL, VT, Op0,
- DAG.getTargetConstant(x, MVT::i32));
- SDValue c = DAG.getNode(AMDILISD::CMP,
- DL, Op1.getValueType(),
- DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
- Op1, DAG.getConstant(x, MVT::i32));
- res = DAG.getNode(AMDILISD::CMOVLOG, DL,
- VT, c, t, res);
-
- }
- Res = res;
- }
- return Res;
-}
-
-SDValue
-AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
- SelectionDAG &DAG) const
-{
- uint32_t vecSize = Op.getValueType().getVectorNumElements();
- SDValue src = Op.getOperand(0);
- const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- uint64_t offset = 0;
- EVT vecType = Op.getValueType().getVectorElementType();
- DebugLoc DL = Op.getDebugLoc();
- SDValue Result;
- if (CSDN) {
- offset = CSDN->getZExtValue();
- Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL,vecType, src, DAG.getConstant(offset, MVT::i32));
- Result = DAG.getNode(AMDILISD::VBUILD, DL,
- Op.getValueType(), Result);
- for (uint32_t x = 1; x < vecSize; ++x) {
- SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
- src, DAG.getConstant(offset + x, MVT::i32));
- if (elt.getOpcode() != ISD::UNDEF) {
- Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
- Op.getValueType(), Result, elt,
- DAG.getConstant(x, MVT::i32));
- }
- }
- } else {
- SDValue idx = Op.getOperand(1);
- Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, vecType, src, idx);
- Result = DAG.getNode(AMDILISD::VBUILD, DL,
- Op.getValueType(), Result);
- for (uint32_t x = 1; x < vecSize; ++x) {
- idx = DAG.getNode(ISD::ADD, DL, vecType,
- idx, DAG.getConstant(1, MVT::i32));
- SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
- src, idx);
- if (elt.getOpcode() != ISD::UNDEF) {
- Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
- Op.getValueType(), Result, elt, idx);
- }
- }
- }
- return Result;
-}
-SDValue
-AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
- SelectionDAG &DAG) const
-{
- SDValue Res = DAG.getNode(AMDILISD::VBUILD,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0));
- return Res;
-}
-SDValue
-AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue andOp;
- andOp = DAG.getNode(
- AMDILISD::AND,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
- return andOp;
-}
-SDValue
-AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue orOp;
- orOp = DAG.getNode(AMDILISD::OR,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
- return orOp;
-}
-SDValue
-AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Cond = Op.getOperand(0);
- SDValue LHS = Op.getOperand(1);
- SDValue RHS = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
- Cond = getConversionNode(DAG, Cond, Op, true);
- Cond = DAG.getNode(AMDILISD::CMOVLOG,
- DL,
- Op.getValueType(), Cond, LHS, RHS);
- return Cond;
-}
-SDValue
-AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Cond;
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue TRUE = Op.getOperand(2);
- SDValue FALSE = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
- DebugLoc DL = Op.getDebugLoc();
- bool skipCMov = false;
- bool genINot = false;
- EVT OVT = Op.getValueType();
-
- // Check for possible elimination of cmov
- if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
- const ConstantSDNode *trueConst
- = dyn_cast<ConstantSDNode>( TRUE.getNode() );
- const ConstantSDNode *falseConst
- = dyn_cast<ConstantSDNode>( FALSE.getNode() );
- if (trueConst && falseConst) {
- // both possible result values are constants
- if (trueConst->isAllOnesValue()
- && falseConst->isNullValue()) { // and convenient constants
- skipCMov = true;
- }
- else if (trueConst->isNullValue()
- && falseConst->isAllOnesValue()) { // less convenient
- skipCMov = true;
- genINot = true;
- }
- }
- }
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- unsigned int AMDILCC = CondCCodeToCC(
- SetCCOpcode,
- LHS.getValueType().getSimpleVT().SimpleTy);
- assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
- Cond = DAG.getNode(
- AMDILISD::CMP,
- DL,
- LHS.getValueType(),
- DAG.getConstant(AMDILCC, MVT::i32),
- LHS,
- RHS);
- Cond = getConversionNode(DAG, Cond, Op, true);
- if (genINot) {
- Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
- }
- if (!skipCMov) {
- Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
- }
- return Cond;
+ break;
+ };
+ return Nodes1;
}
+
SDValue
-AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
- SDValue Cond;
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(2);
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
DebugLoc DL = Op.getDebugLoc();
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- unsigned int AMDILCC = CondCCodeToCC(
- SetCCOpcode,
- LHS.getValueType().getSimpleVT().SimpleTy);
- assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
- Cond = DAG.getNode(
- AMDILISD::CMP,
- DL,
- LHS.getValueType(),
- DAG.getConstant(AMDILCC, MVT::i32),
- LHS,
- RHS);
Cond = getConversionNode(DAG, Cond, Op, true);
- Cond = DAG.getNode(
- ISD::AND,
+ Cond = DAG.getNode(AMDILISD::CMOVLOG,
DL,
- Cond.getValueType(),
- DAG.getConstant(1, Cond.getValueType()),
- Cond);
+ Op.getValueType(), Cond, LHS, RHS);
return Cond;
}
}
}
-SDValue
-AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Src = Op.getOperand(0);
- SDValue Dst = Op;
- SDValue Res;
- DebugLoc DL = Op.getDebugLoc();
- EVT SrcVT = Src.getValueType();
- EVT DstVT = Dst.getValueType();
- // Lets bitcast the floating point types to an
- // equivalent integer type before converting to vectors.
- if (SrcVT.getScalarType().isFloatingPoint()) {
- Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
- SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
- SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
- Src);
- SrcVT = Src.getValueType();
- }
- uint32_t ScalarSrcSize = SrcVT.getScalarType()
- .getSimpleVT().getSizeInBits();
- uint32_t ScalarDstSize = DstVT.getScalarType()
- .getSimpleVT().getSizeInBits();
- uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
- bool isVec = SrcVT.isVector();
- if (DstVT.getScalarType().isInteger() &&
- (SrcVT.getScalarType().isInteger()
- || SrcVT.getScalarType().isFloatingPoint())) {
- if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
- || (ScalarSrcSize == 64
- && DstNumEle == 4
- && ScalarDstSize == 16)) {
- // This is the problematic case when bitcasting i64 <-> <4 x i16>
- // This approach is a little different as we cannot generate a
- // <4 x i64> vector
- // as that is illegal in our backend and we are already past
- // the DAG legalizer.
- // So, in this case, we will do the following conversion.
- // Case 1:
- // %dst = <4 x i16> %src bitconvert i64 ==>
- // %tmp = <4 x i16> %src convert <4 x i32>
- // %tmp = <4 x i32> %tmp and 0xFFFF
- // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
- // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
- // %dst = <2 x i32> %tmp bitcast i64
- // case 2:
- // %dst = i64 %src bitconvert <4 x i16> ==>
- // %tmp = i64 %src bitcast <2 x i32>
- // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
- // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
- // %tmp = <4 x i32> %tmp and 0xFFFF
- // %dst = <4 x i16> %tmp bitcast <4 x i32>
- SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
- DAG.getConstant(0xFFFF, MVT::i32));
- SDValue const16 = DAG.getConstant(16, MVT::i32);
- if (ScalarDstSize == 64) {
- // case 1
- Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
- Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
- SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
- Op, DAG.getConstant(0, MVT::i32));
- SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
- Op, DAG.getConstant(1, MVT::i32));
- y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
- SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
- Op, DAG.getConstant(2, MVT::i32));
- SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
- Op, DAG.getConstant(3, MVT::i32));
- w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
- x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
- y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
- Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
- return Res;
- } else {
- // case 2
- SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
- SDValue lor16
- = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
- SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
- SDValue hir16
- = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
- SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
- MVT::v4i32, lo);
- SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(1, MVT::i32));
- resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
- resVec, lor16, idxVal);
- idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(2, MVT::i32));
- resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
- resVec, hi, idxVal);
- idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(3, MVT::i32));
- resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
- resVec, hir16, idxVal);
- resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
- Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
- return Res;
- }
- } else {
- // There are four cases we need to worry about for bitcasts
- // where the size of all
- // source, intermediates and result is <= 128 bits, unlike
- // the above case
- // 1) Sub32bit bitcast 32bitAlign
- // %dst = <4 x i8> bitcast i32
- // (also <[2|4] x i16> to <[2|4] x i32>)
- // 2) 32bitAlign bitcast Sub32bit
- // %dst = i32 bitcast <4 x i8>
- // 3) Sub32bit bitcast LargerSub32bit
- // %dst = <2 x i8> bitcast i16
- // (also <4 x i8> to <2 x i16>)
- // 4) Sub32bit bitcast SmallerSub32bit
- // %dst = i16 bitcast <2 x i8>
- // (also <2 x i16> to <4 x i8>)
- // This also only handles types that are powers of two
- if ((ScalarDstSize & (ScalarDstSize - 1))
- || (ScalarSrcSize & (ScalarSrcSize - 1))) {
- } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
- // case 1:
- EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
-#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
- SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
-#else
- SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
- DAG.getUNDEF(IntTy.getScalarType()));
- for (uint32_t x = 0; x < SrcNumEle; ++x) {
- SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(x, MVT::i32));
- SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- SrcVT.getScalarType(), Src,
- DAG.getConstant(x, MVT::i32));
- temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
- res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
- res, temp, idx);
- }
-#endif
- SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
- DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
- SDValue *newEle = new SDValue[SrcNumEle];
- res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
- for (uint32_t x = 0; x < SrcNumEle; ++x) {
- newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- IntTy.getScalarType(), res,
- DAG.getConstant(x, MVT::i32));
- }
- uint32_t Ratio = SrcNumEle / DstNumEle;
- for (uint32_t x = 0; x < SrcNumEle; ++x) {
- if (x % Ratio) {
- newEle[x] = DAG.getNode(ISD::SHL, DL,
- IntTy.getScalarType(), newEle[x],
- DAG.getConstant(ScalarSrcSize * (x % Ratio),
- MVT::i32));
- }
- }
- for (uint32_t x = 0; x < SrcNumEle; x += 2) {
- newEle[x] = DAG.getNode(ISD::OR, DL,
- IntTy.getScalarType(), newEle[x], newEle[x + 1]);
- }
- if (ScalarSrcSize == 8) {
- for (uint32_t x = 0; x < SrcNumEle; x += 4) {
- newEle[x] = DAG.getNode(ISD::OR, DL,
- IntTy.getScalarType(), newEle[x], newEle[x + 2]);
- }
- if (DstNumEle == 1) {
- Dst = newEle[0];
- } else {
- Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
- newEle[0]);
- for (uint32_t x = 1; x < DstNumEle; ++x) {
- SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(x, MVT::i32));
- Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
- DstVT, Dst, newEle[x * 4], idx);
- }
- }
- } else {
- if (DstNumEle == 1) {
- Dst = newEle[0];
- } else {
- Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
- newEle[0]);
- for (uint32_t x = 1; x < DstNumEle; ++x) {
- SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(x, MVT::i32));
- Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
- DstVT, Dst, newEle[x * 2], idx);
- }
- }
- }
- delete [] newEle;
- return Dst;
- } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
- // case 2:
- EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
- SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
- DAG.getUNDEF(IntTy.getScalarType()));
- uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
- for (uint32_t x = 0; x < SrcNumEle; ++x) {
- for (uint32_t y = 0; y < mult; ++y) {
- SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(),
- DAG.getConstant(x * mult + y, MVT::i32));
- SDValue t;
- if (SrcNumEle > 1) {
- t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, SrcVT.getScalarType(), Src,
- DAG.getConstant(x, MVT::i32));
- } else {
- t = Src;
- }
- if (y != 0) {
- t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
- t, DAG.getConstant(y * ScalarDstSize,
- MVT::i32));
- }
- vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
- DL, IntTy, vec, t, idx);
- }
- }
- Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
- return Dst;
- } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
- // case 3:
- SDValue *numEle = new SDValue[SrcNumEle];
- for (uint32_t x = 0; x < SrcNumEle; ++x) {
- numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- MVT::i8, Src, DAG.getConstant(x, MVT::i32));
- numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
- numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
- DAG.getConstant(0xFF, MVT::i16));
- }
- for (uint32_t x = 1; x < SrcNumEle; x += 2) {
- numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
- DAG.getConstant(8, MVT::i16));
- numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
- numEle[x-1], numEle[x]);
- }
- if (DstNumEle > 1) {
- // If we are not a scalar i16, the only other case is a
- // v2i16 since we can't have v8i8 at this point, v4i16
- // cannot be generated
- Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
- numEle[0]);
- SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(1, MVT::i32));
- Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
- Dst, numEle[2], idx);
- } else {
- Dst = numEle[0];
- }
- delete [] numEle;
- return Dst;
- } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
- // case 4:
- SDValue *numEle = new SDValue[DstNumEle];
- for (uint32_t x = 0; x < SrcNumEle; ++x) {
- numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- MVT::i16, Src, DAG.getConstant(x, MVT::i32));
- numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
- numEle[x * 2], DAG.getConstant(8, MVT::i16));
- }
- MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
- Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
- for (uint32_t x = 1; x < DstNumEle; ++x) {
- SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
- getPointerTy(), DAG.getConstant(x, MVT::i32));
- Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
- Dst, numEle[x], idx);
- }
- delete [] numEle;
- ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
- Res = DAG.getSExtOrTrunc(Dst, DL, ty);
- return Res;
- }
- }
- }
- Res = DAG.getNode(AMDILISD::BITCONV,
- Dst.getDebugLoc(),
- Dst.getValueType(), Src);
- return Res;
-}
-
SDValue
AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- unsigned int SPReg = AMDIL::SP;
+ unsigned int SPReg = AMDGPU::SP;
DebugLoc DL = Op.getDebugLoc();
SDValue SP = DAG.getCopyFromReg(Chain,
DL,
AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
- CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
+ SDValue CC = Op.getOperand(1);
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue JumpT = Op.getOperand(4);
SDValue CmpValue;
- ISD::CondCode CC = CCNode->get();
SDValue Result;
- unsigned int cmpOpcode = CondCCodeToCC(
- CC,
- LHS.getValueType().getSimpleVT().SimpleTy);
CmpValue = DAG.getNode(
- AMDILISD::CMP,
+ ISD::SELECT_CC,
Op.getDebugLoc(),
- LHS.getValueType(),
- DAG.getConstant(cmpOpcode, MVT::i32),
- LHS, RHS);
+ MVT::i32,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
Result = DAG.getNode(
AMDILISD::BRANCH_COND,
CmpValue.getDebugLoc(),
return Result;
}
-SDValue
-AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Result = DAG.getNode(
- AMDILISD::DP_TO_FP,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
- return Result;
-}
-
-SDValue
-AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Result = DAG.getNode(
- AMDILISD::VCONCAT,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
- return Result;
-}
// LowerRET - Lower an ISD::RET node.
SDValue
AMDILTargetLowering::LowerReturn(SDValue Chain,
MVT::Other, &RetOps[0], RetOps.size());
return Flag;
}
-void
-AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
- unsigned int opCode) const
-{
- MachineOperand DST = MI->getOperand(0);
- MachineOperand LHS = MI->getOperand(2);
- MachineOperand RHS = MI->getOperand(3);
- unsigned int opi32Code = 0, si32Code = 0;
- unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
- uint32_t REGS[12];
- // All the relationals can be generated with with 6 temp registers
- for (int x = 0; x < 12; ++x) {
- REGS[x] = genVReg(simpleVT);
- }
- // Pull out the high and low components of each 64 bit register
- generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
- generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
- generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
- generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
- // Determine the correct opcode that we should use
- switch(opCode) {
- default:
- assert(!"comparison case not handled!");
- break;
- case AMDIL::LEQ:
- si32Code = opi32Code = AMDIL::IEQ;
- break;
- case AMDIL::LNE:
- si32Code = opi32Code = AMDIL::INE;
- break;
- case AMDIL::LLE:
- case AMDIL::ULLE:
- case AMDIL::LGE:
- case AMDIL::ULGE:
- if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
- std::swap(REGS[0], REGS[2]);
- } else {
- std::swap(REGS[1], REGS[3]);
- }
- if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
- opi32Code = AMDIL::ILT;
- } else {
- opi32Code = AMDIL::ULT;
- }
- si32Code = AMDIL::UGE;
- break;
- case AMDIL::LGT:
- case AMDIL::ULGT:
- std::swap(REGS[0], REGS[2]);
- std::swap(REGS[1], REGS[3]);
- case AMDIL::LLT:
- case AMDIL::ULLT:
- if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
- opi32Code = AMDIL::ILT;
- } else {
- opi32Code = AMDIL::ULT;
- }
- si32Code = AMDIL::ULT;
- break;
- };
- // Do the initial opcode on the high and low components.
- // This leaves the following:
- // REGS[4] = L_HI OP R_HI
- // REGS[5] = L_LO OP R_LO
- generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
- generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
- switch(opi32Code) {
- case AMDIL::IEQ:
- case AMDIL::INE:
- {
- // combine the results with an and or or depending on if
- // we are eq or ne
- uint32_t combineOp = (opi32Code == AMDIL::IEQ)
- ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
- generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
- }
- break;
- default:
- // this finishes codegen for the following pattern
- // REGS[4] || (REGS[5] && (L_HI == R_HI))
- generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
- generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
- REGS[9]);
- generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
- REGS[10]);
- break;
- }
- generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
-}
unsigned int
AMDILTargetLowering::getFunctionAlignment(const Function *) const
return 0;
}
-void
-AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &BBI,
- DebugLoc *DL, const TargetInstrInfo *TII) const
-{
- mBB = BB;
- mBBI = BBI;
- mDL = DL;
- mTII = TII;
-}
-uint32_t
-AMDILTargetLowering::genVReg(uint32_t regType) const
-{
- return mBB->getParent()->getRegInfo().createVirtualRegister(
- getRegClassFromID(regType));
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
-{
- return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
- uint32_t src1) const
-{
- return generateMachineInst(opcode, dst).addReg(src1);
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
- uint32_t src1, uint32_t src2) const
-{
- return generateMachineInst(opcode, dst, src1).addReg(src2);
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
- uint32_t src1, uint32_t src2, uint32_t src3) const
-{
- return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
-}
-
-
SDValue
AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
{
SDValue r1 = RHS;
// ilt r10, r0, 0
- SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r0, DAG.getConstant(0, OVT));
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
// ilt r11, r1, 0
- SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r1, DAG.getConstant(0, OVT));
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
return SDValue(Op.getNode(), 0);
}
-SDValue
-AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
-
- // The LowerUDIV24 function implements the following CL.
- // int ia = (int)LHS
- // float fa = (float)ia
- // int ib = (int)RHS
- // float fb = (float)ib
- // float fq = native_divide(fa, fb)
- // fq = trunc(fq)
- // float t = mad(fq, fb, fb)
- // int iq = (int)fq - (t <= fa)
- // return (type)iq
-
- // int ia = (int)LHS
- SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
-
- // float fa = (float)ia
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
-
- // int ib = (int)RHS
- SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
-
- // float fb = (float)ib
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
-
- // float fq = native_divide(fa, fb)
- SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
-
- // fq = trunc(fq)
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
-
- // float t = mad(fq, fb, fb)
- SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
-
- // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
- SDValue iq;
- fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
- if (INTTY == MVT::i32) {
- iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
- } else {
- iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
- }
- iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
-
-
- // return (type)iq
- iq = DAG.getZExtOrTrunc(iq, DL, OVT);
- return iq;
-
-}
-
-SDValue
-AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue
-AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
SDValue
AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
{
{
return SDValue(Op.getNode(), 0);
}
-
-SDValue
-AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i8) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i8) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerUREM8 function generates equivalent to the following IL.
- // mov r0, as_u32(LHS)
- // mov r1, as_u32(RHS)
- // and r10, r0, 0xFF
- // and r11, r1, 0xFF
- // cmov_logical r3, r11, r11, 0x1
- // udiv r3, r10, r3
- // cmov_logical r3, r11, r3, 0
- // umul r3, r3, r11
- // sub r3, r10, r3
- // and as_u8(DST), r3, 0xFF
-
- // mov r0, as_u32(LHS)
- SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
-
- // mov r1, as_u32(RHS)
- SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
-
- // and r10, r0, 0xFF
- SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
- DAG.getConstant(0xFF, INTTY));
-
- // and r11, r1, 0xFF
- SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
- DAG.getConstant(0xFF, INTTY));
-
- // cmov_logical r3, r11, r11, 0x1
- SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
- DAG.getConstant(0x01, INTTY));
-
- // udiv r3, r10, r3
- r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
-
- // cmov_logical r3, r11, r3, 0
- r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
- DAG.getConstant(0, INTTY));
-
- // umul r3, r3, r11
- r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
-
- // sub r3, r10, r3
- r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
-
- // and as_u8(DST), r3, 0xFF
- SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
- DAG.getConstant(0xFF, INTTY));
- DST = DAG.getZExtOrTrunc(DST, DL, OVT);
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i16) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i16) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerUREM16 function generatest equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // DIV = LowerUDIV16(LHS, RHS)
- // and r10, r0, 0xFFFF
- // and r11, r1, 0xFFFF
- // cmov_logical r3, r11, r11, 0x1
- // udiv as_u16(r3), as_u32(r10), as_u32(r3)
- // and r3, r3, 0xFFFF
- // cmov_logical r3, r11, r3, 0
- // umul r3, r3, r11
- // sub r3, r10, r3
- // and DST, r3, 0xFFFF
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // and r10, r0, 0xFFFF
- SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
- DAG.getConstant(0xFFFF, OVT));
-
- // and r11, r1, 0xFFFF
- SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
- DAG.getConstant(0xFFFF, OVT));
-
- // cmov_logical r3, r11, r11, 0x1
- SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
- DAG.getConstant(0x01, OVT));
-
- // udiv as_u16(r3), as_u32(r10), as_u32(r3)
- r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
- r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
- r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
- r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
- r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
-
- // and r3, r3, 0xFFFF
- r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
- DAG.getConstant(0xFFFF, OVT));
-
- // cmov_logical r3, r11, r3, 0
- r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
- DAG.getConstant(0, OVT));
- // umul r3, r3, r11
- r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
-
- // sub r3, r10, r3
- r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
-
- // and DST, r3, 0xFFFF
- SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
- DAG.getConstant(0xFFFF, OVT));
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerUREM32 function generates equivalent to the following IL.
- // udiv r20, LHS, RHS
- // umul r20, r20, RHS
- // sub DST, LHS, r20
-
- // udiv r20, LHS, RHS
- SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
-
- // umul r20, r20, RHS
- r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
-
- // sub DST, LHS, r20
- SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
-
-
-SDValue
-AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2f32) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4f32) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue DST;
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
- if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
- // TODO: This doesn't work for vector types yet
- // The LowerFDIV32 function generates equivalent to the following
- // IL:
- // mov r20, as_int(LHS)
- // mov r21, as_int(RHS)
- // and r30, r20, 0x7f800000
- // and r31, r20, 0x807FFFFF
- // and r32, r21, 0x7f800000
- // and r33, r21, 0x807FFFFF
- // ieq r40, r30, 0x7F800000
- // ieq r41, r31, 0x7F800000
- // ieq r42, r32, 0
- // ieq r43, r33, 0
- // and r50, r20, 0x80000000
- // and r51, r21, 0x80000000
- // ior r32, r32, 0x3f800000
- // ior r33, r33, 0x3f800000
- // cmov_logical r32, r42, r50, r32
- // cmov_logical r33, r43, r51, r33
- // cmov_logical r32, r40, r20, r32
- // cmov_logical r33, r41, r21, r33
- // ior r50, r40, r41
- // ior r51, r42, r43
- // ior r50, r50, r51
- // inegate r52, r31
- // iadd r30, r30, r52
- // cmov_logical r30, r50, 0, r30
- // div_zeroop(infinity) r21, 1.0, r33
- // mul_ieee r20, r32, r21
- // and r22, r20, 0x7FFFFFFF
- // and r23, r20, 0x80000000
- // ishr r60, r22, 0x00000017
- // ishr r61, r30, 0x00000017
- // iadd r20, r20, r30
- // iadd r21, r22, r30
- // iadd r60, r60, r61
- // ige r42, 0, R60
- // ior r41, r23, 0x7F800000
- // ige r40, r60, 0x000000FF
- // cmov_logical r40, r50, 0, r40
- // cmov_logical r20, r42, r23, r20
- // cmov_logical DST, r40, r41, r20
- // as_float(DST)
-
- // mov r20, as_int(LHS)
- SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
-
- // mov r21, as_int(RHS)
- SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
-
- // and r30, r20, 0x7f800000
- SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
- DAG.getConstant(0x7F800000, INTTY));
-
- // and r31, r21, 0x7f800000
- SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
- DAG.getConstant(0x7f800000, INTTY));
-
- // and r32, r20, 0x807FFFFF
- SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
- DAG.getConstant(0x807FFFFF, INTTY));
-
- // and r33, r21, 0x807FFFFF
- SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
- DAG.getConstant(0x807FFFFF, INTTY));
-
- // ieq r40, r30, 0x7F800000
- SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R30, DAG.getConstant(0x7F800000, INTTY));
-
- // ieq r41, r31, 0x7F800000
- SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R31, DAG.getConstant(0x7F800000, INTTY));
-
- // ieq r42, r30, 0
- SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R30, DAG.getConstant(0, INTTY));
-
- // ieq r43, r31, 0
- SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R31, DAG.getConstant(0, INTTY));
-
- // and r50, r20, 0x80000000
- SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
- DAG.getConstant(0x80000000, INTTY));
-
- // and r51, r21, 0x80000000
- SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
- DAG.getConstant(0x80000000, INTTY));
-
- // ior r32, r32, 0x3f800000
- R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
- DAG.getConstant(0x3F800000, INTTY));
-
- // ior r33, r33, 0x3f800000
- R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
- DAG.getConstant(0x3F800000, INTTY));
-
- // cmov_logical r32, r42, r50, r32
- R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
-
- // cmov_logical r33, r43, r51, r33
- R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
-
- // cmov_logical r32, r40, r20, r32
- R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
-
- // cmov_logical r33, r41, r21, r33
- R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
-
- // ior r50, r40, r41
- R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
-
- // ior r51, r42, r43
- R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
-
- // ior r50, r50, r51
- R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
-
- // inegate r52, r31
- SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
-
- // iadd r30, r30, r52
- R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
-
- // cmov_logical r30, r50, 0, r30
- R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
- DAG.getConstant(0, INTTY), R30);
-
- // div_zeroop(infinity) r21, 1.0, as_float(r33)
- R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
- R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
- DAG.getConstantFP(1.0f, OVT), R33);
-
- // mul_ieee as_int(r20), as_float(r32), r21
- R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
- R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
- R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
-
- // div_zeroop(infinity) r21, 1.0, as_float(r33)
- R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
- R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
- DAG.getConstantFP(1.0f, OVT), R33);
-
- // mul_ieee as_int(r20), as_float(r32), r21
- R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
- R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
- R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
-
- // and r22, r20, 0x7FFFFFFF
- SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
- DAG.getConstant(0x7FFFFFFF, INTTY));
-
- // and r23, r20, 0x80000000
- SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
- DAG.getConstant(0x80000000, INTTY));
-
- // ishr r60, r22, 0x00000017
- SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
- DAG.getConstant(0x00000017, INTTY));
-
- // ishr r61, r30, 0x00000017
- SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
- DAG.getConstant(0x00000017, INTTY));
-
- // iadd r20, r20, r30
- R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
-
- // iadd r21, r22, r30
- R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
-
- // iadd r60, r60, r61
- R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
-
- // ige r42, 0, R60
- R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- DAG.getConstant(0, INTTY),
- R60);
-
- // ior r41, r23, 0x7F800000
- R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
- DAG.getConstant(0x7F800000, INTTY));
-
- // ige r40, r60, 0x000000FF
- R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- R60,
- DAG.getConstant(0x0000000FF, INTTY));
-
- // cmov_logical r40, r50, 0, r40
- R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
- DAG.getConstant(0, INTTY),
- R40);
-
- // cmov_logical r20, r42, r23, r20
- R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
-
- // cmov_logical DST, r40, r41, r20
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
-
- // as_float(DST)
- DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
- } else {
- // The following sequence of DAG nodes produce the following IL:
- // fabs r1, RHS
- // lt r2, 0x1.0p+96f, r1
- // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
- // mul_ieee r1, RHS, r3
- // div_zeroop(infinity) r0, LHS, r1
- // mul_ieee DST, r0, r3
-
- // fabs r1, RHS
- SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
- // lt r2, 0x1.0p+96f, r1
- SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
- DAG.getConstant(0x6f800000, INTTY), r1);
- // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
- SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
- DAG.getConstant(0x2f800000, INTTY),
- DAG.getConstant(0x3f800000, INTTY));
- // mul_ieee r1, RHS, r3
- r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
- // div_zeroop(infinity) r0, LHS, r1
- SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
- // mul_ieee DST, r0, r3
- DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}