This code now lives in an external tree.
For the next Mesa release fetch the code from the master branch
of this LLVM repo:
http://cgit.freedesktop.org/~tstellar/llvm/
For all subsequent Mesa releases, fetch the code from the official LLVM
project:
www.llvm.org
+++ /dev/null
-AMDGPUInstrEnums.h.include
-AMDGPUInstrEnums.include
-AMDGPUInstrEnums.td
-AMDILGenAsmWriter.inc
-AMDILGenCallingConv.inc
-AMDILGenCodeEmitter.inc
-AMDILGenDAGISel.inc
-AMDILGenEDInfo.inc
-AMDILGenInstrInfo.inc
-AMDILGenIntrinsics.inc
-AMDILGenRegisterInfo.inc
-AMDILGenSubtargetInfo.inc
-R600HwRegInfo.include
-R600Intrinsics.td
-R600RegisterInfo.td
-SIRegisterGetHWRegNum.inc
-SIRegisterInfo.td
-loader
+++ /dev/null
-//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPU_H
-#define AMDGPU_H
-
-#include "AMDGPUTargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-class FunctionPass;
-class AMDGPUTargetMachine;
-
-// R600 Passes
-FunctionPass* createR600KernelParametersPass(const TargetData* TD);
-FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
-
-// SI Passes
-FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
-FunctionPass *createSILowerFlowControlPass(TargetMachine &tm);
-FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
-FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
-
-// Passes common to R600 and SI
-FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-
-} // End namespace llvm
-
-namespace ShaderType {
- enum Type {
- PIXEL = 0,
- VERTEX = 1,
- GEOMETRY = 2,
- COMPUTE = 3
- };
-}
-
-#endif // AMDGPU_H
+++ /dev/null
-//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-
-// Include AMDIL TD files
-include "AMDILBase.td"
-
-
-def AMDGPUInstrInfo : InstrInfo {}
-
-//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
-//===----------------------------------------------------------------------===//
-def AMDGPUAsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- int Variant = 0;
- bit isMCAsmWriter = 1;
-}
-
-def AMDGPU : Target {
- // Pull in Instruction Info:
- let InstructionSet = AMDGPUInstrInfo;
- let AssemblyWriters = [AMDGPUAsmWriter];
-}
-
-// Include AMDGPU TD files
-include "R600Schedule.td"
-include "SISchedule.td"
-include "Processors.td"
-include "AMDGPUInstrInfo.td"
-include "AMDGPUIntrinsics.td"
-include "AMDGPURegisterInfo.td"
-include "AMDGPUInstructions.td"
+++ /dev/null
-//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The AMDGPUAsmPrinter is used to print both assembly string and also binary
-// code. When passed an MCAsmStreamer it prints assembly and when passed
-// an MCObjectStreamer it outputs binary code.
-//
-//===----------------------------------------------------------------------===//
-//
-
-
-#include "AMDGPUAsmPrinter.h"
-#include "AMDGPU.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-
-static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
- MCStreamer &Streamer) {
- return new AMDGPUAsmPrinter(tm, Streamer);
-}
-
-extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
- TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
-}
-
-/// runOnMachineFunction - We need to override this function so we can avoid
-/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
-bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
- if (STM.dumpCode()) {
- MF.dump();
- }
- SetupMachineFunction(MF);
- if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- EmitProgramInfo(MF);
- }
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
- EmitFunctionBody();
- return false;
-}
-
-void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
- unsigned MaxSGPR = 0;
- unsigned MaxVGPR = 0;
- bool VCCUsed = false;
- const SIRegisterInfo * RI =
- static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
-
- unsigned numOperands = MI.getNumOperands();
- for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- MachineOperand & MO = MI.getOperand(op_idx);
- unsigned maxUsed;
- unsigned width = 0;
- bool isSGPR = false;
- unsigned reg;
- unsigned hwReg;
- if (!MO.isReg()) {
- continue;
- }
- reg = MO.getReg();
- if (reg == AMDGPU::VCC) {
- VCCUsed = true;
- continue;
- }
- switch (reg) {
- default: break;
- case AMDGPU::EXEC:
- case AMDGPU::SI_LITERAL_CONSTANT:
- case AMDGPU::SREG_LIT_0:
- case AMDGPU::M0:
- continue;
- }
-
- if (AMDGPU::SReg_32RegClass.contains(reg)) {
- isSGPR = true;
- width = 1;
- } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
- isSGPR = false;
- width = 1;
- } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
- isSGPR = true;
- width = 2;
- } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
- isSGPR = false;
- width = 2;
- } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
- isSGPR = true;
- width = 4;
- } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
- isSGPR = false;
- width = 4;
- } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
- isSGPR = true;
- width = 8;
- } else {
- assert(!"Unknown register class");
- }
- hwReg = RI->getHWRegNum(reg);
- maxUsed = hwReg + width - 1;
- if (isSGPR) {
- MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
- } else {
- MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
- }
- }
- }
- }
- if (VCCUsed) {
- MaxSGPR += 2;
- }
- SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
- OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
- OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
- OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
-}
+++ /dev/null
-//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// AMDGPU Assembly printer class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPU_ASMPRINTER_H
-#define AMDGPU_ASMPRINTER_H
-
-#include "llvm/CodeGen/AsmPrinter.h"
-
-namespace llvm {
-
-class AMDGPUAsmPrinter : public AsmPrinter {
-
-public:
- explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "AMDGPU Assembly Printer";
- }
-
- /// EmitProgramInfo - Emit register usage information so that the GPU driver
- /// can correctly setup the GPU state.
- void EmitProgramInfo(MachineFunction &MF);
-
- /// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp
- virtual void EmitInstruction(const MachineInstr *MI);
-};
-
-} // End anonymous llvm
-
-#endif //AMDGPU_ASMPRINTER_H
+++ /dev/null
-//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// CodeEmitter interface for R600 and SI codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUCODEEMITTER_H
-#define AMDGPUCODEEMITTER_H
-
-namespace llvm {
-
- class AMDGPUCodeEmitter {
- public:
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
- virtual uint64_t getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const { return 0; }
- virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual uint64_t VOPPostEncode(const MachineInstr &MI,
- uint64_t Value) const {
- return Value;
- }
- virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
- const {
- return 0;
- }
- };
-
-} // End namespace llvm
-
-#endif // AMDGPUCODEEMITTER_H
+++ /dev/null
-//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass lowers AMDIL machine instructions to the appropriate hardware
-// instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUConvertToISAPass : public MachineFunctionPass {
-
-private:
- static char ID;
- TargetMachine &TM;
-
-public:
- AMDGPUConvertToISAPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUConvertToISAPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
- return new AMDGPUConvertToISAPass(tm);
-}
-
-bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
-{
- const AMDGPUInstrInfo * TII =
- static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
- }
- }
- return false;
-}
+++ /dev/null
-//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is the parent TargetLowering class for hardware code gen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUISelLowering.h"
-#include "AMDILIntrinsicInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-
-using namespace llvm;
-
-AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
- TargetLowering(TM, new TargetLoweringObjectFileELF())
-{
-
- // Initialize target lowering borrowed from AMDIL
- InitAMDILLowering();
-
- // We need to custom lower some of the intrinsics
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
- // Library functions. These default to Expand, but we have instructions
- // for them.
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FEXP2, MVT::f32, Legal);
- setOperationAction(ISD::FPOW, MVT::f32, Legal);
- setOperationAction(ISD::FLOG2, MVT::f32, Legal);
- setOperationAction(ISD::FABS, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
-
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
-}
-
-//===---------------------------------------------------------------------===//
-// TargetLowering Callbacks
-//===---------------------------------------------------------------------===//
-
-SDValue AMDGPUTargetLowering::LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const
-{
- for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
- InVals.push_back(SDValue());
- }
- return Chain;
-}
-
-SDValue AMDGPUTargetLowering::LowerReturn(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc DL, SelectionDAG &DAG) const
-{
- return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
-}
-
-//===---------------------------------------------------------------------===//
-// Target specific lowering
-//===---------------------------------------------------------------------===//
-
-SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
- const
-{
- switch (Op.getOpcode()) {
- default:
- Op.getNode()->dump();
- assert(0 && "Custom lowering code for this"
- "instruction is not implemented yet!");
- break;
- // AMDIL DAG lowering
- case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::SREM: return LowerSREM(Op, DAG);
- case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- // AMDGPU DAG lowering
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
- }
- return Op;
-}
-
-SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const
-{
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- switch (IntrinsicID) {
- default: return Op;
- case AMDGPUIntrinsic::AMDIL_abs:
- return LowerIntrinsicIABS(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_exp:
- return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDGPU_lrp:
- return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_fraction:
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_mad:
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
- case AMDGPUIntrinsic::AMDIL_max:
- return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_imax:
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_umax:
- return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_min:
- return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_imin:
- return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_umin:
- return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_round_nearest:
- return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
- }
-}
-
-///IABS(a) = SMAX(sub(0, a), a)
-SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
- SelectionDAG &DAG) const
-{
-
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
- Op.getOperand(1));
-
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
-}
-
-/// Linear Interpolation
-/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
-SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
- SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
- DAG.getConstantFP(1.0f, MVT::f32),
- Op.getOperand(1));
- SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
- Op.getOperand(3));
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2),
- OneSubAC);
-}
-
-
-
-SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
- SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- SDValue Num = Op.getOperand(0);
- SDValue Den = Op.getOperand(1);
-
- SmallVector<SDValue, 8> Results;
-
- // RCP = URECIP(Den) = 2^32 / Den + e
- // e is rounding error.
- SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
-
- // RCP_LO = umulo(RCP, Den) */
- SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
-
- // RCP_HI = mulhu (RCP, Den) */
- SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
-
- // NEG_RCP_LO = -RCP_LO
- SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
- RCP_LO);
-
- // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
- SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
- NEG_RCP_LO, RCP_LO,
- ISD::SETEQ);
- // Calculate the rounding error from the URECIP instruction
- // E = mulhu(ABS_RCP_LO, RCP)
- SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
-
- // RCP_A_E = RCP + E
- SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
-
- // RCP_S_E = RCP - E
- SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
-
- // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
- SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
- RCP_A_E, RCP_S_E,
- ISD::SETEQ);
- // Quotient = mulhu(Tmp0, Num)
- SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
-
- // Num_S_Remainder = Quotient * Den
- SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
-
- // Remainder = Num - Num_S_Remainder
- SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
-
- // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
- SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
- DAG.getConstant(-1, VT),
- DAG.getConstant(0, VT),
- ISD::SETGE);
- // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
- SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
- DAG.getConstant(0, VT),
- DAG.getConstant(-1, VT),
- DAG.getConstant(0, VT),
- ISD::SETGE);
- // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
- SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
- Remainder_GE_Zero);
-
- // Calculate Division result:
-
- // Quotient_A_One = Quotient + 1
- SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
- DAG.getConstant(1, VT));
-
- // Quotient_S_One = Quotient - 1
- SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
- DAG.getConstant(1, VT));
-
- // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
- SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
- Quotient, Quotient_A_One, ISD::SETEQ);
-
- // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
- Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
- Quotient_S_One, Div, ISD::SETEQ);
-
- // Calculate Rem result:
-
- // Remainder_S_Den = Remainder - Den
- SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
-
- // Remainder_A_Den = Remainder + Den
- SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
-
- // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
- SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
- Remainder, Remainder_S_Den, ISD::SETEQ);
-
- // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
- Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
- Remainder_A_Den, Rem, ISD::SETEQ);
-
- DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
- DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
-
- return Op;
-}
-
-//===----------------------------------------------------------------------===//
-// Helper functions
-//===----------------------------------------------------------------------===//
-
-bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
-{
- if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- return CFP->isExactlyValue(1.0);
- }
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isAllOnesValue();
- }
- return false;
-}
-
-bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
-{
- if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- return CFP->getValueAPF().isZero();
- }
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isNullValue();
- }
- return false;
-}
-
-SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned VirtualRegister;
- if (!MRI.isLiveIn(Reg)) {
- VirtualRegister = MRI.createVirtualRegister(RC);
- MRI.addLiveIn(Reg, VirtualRegister);
- } else {
- VirtualRegister = MRI.getLiveInVirtReg(Reg);
- }
- return DAG.getRegister(VirtualRegister, VT);
-}
-
-#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
-
-const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
- switch (Opcode) {
- default: return 0;
- // AMDIL DAG nodes
- NODE_NAME_CASE(MAD);
- NODE_NAME_CASE(CALL);
- NODE_NAME_CASE(UMUL);
- NODE_NAME_CASE(DIV_INF);
- NODE_NAME_CASE(RET_FLAG);
- NODE_NAME_CASE(BRANCH_COND);
-
- // AMDGPU DAG nodes
- NODE_NAME_CASE(FRACT)
- NODE_NAME_CASE(FMAX)
- NODE_NAME_CASE(SMAX)
- NODE_NAME_CASE(UMAX)
- NODE_NAME_CASE(FMIN)
- NODE_NAME_CASE(SMIN)
- NODE_NAME_CASE(UMIN)
- NODE_NAME_CASE(URECIP)
- NODE_NAME_CASE(INTERP)
- NODE_NAME_CASE(INTERP_P0)
- }
-}
+++ /dev/null
-//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the interface defintiion of the TargetLowering class
-// that is common to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUISELLOWERING_H
-#define AMDGPUISELLOWERING_H
-
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-
-class MachineRegisterInfo;
-
-class AMDGPUTargetLowering : public TargetLowering
-{
-private:
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
-
-protected:
-
- /// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
- /// of the DAG's MachineFunction. This returns a Register SDNode representing
- /// Reg.
- SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const;
-
- bool isHWTrueValue(SDValue Op) const;
- bool isHWFalseValue(SDValue Op) const;
-
-public:
- AMDGPUTargetLowering(TargetMachine &TM);
-
- virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc DL, SelectionDAG &DAG) const;
-
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- virtual const char* getTargetNodeName(unsigned Opcode) const;
-
-// Functions defined in AMDILISelLowering.cpp
-public:
-
- /// computeMaskedBitsForTargetNode - Determine which of the bits specified
- /// in Mask are known to be either zero or one and return them in the
- /// KnownZero/KnownOne bitsets.
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
-
- virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const;
-
- /// isFPImmLegal - We want to mark f32/f64 floating point values as legal.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-
- /// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants.
- bool ShouldShrinkFPConstant(EVT VT) const;
-
-private:
- void InitAMDILLowering();
- SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
-};
-
-namespace AMDGPUISD
-{
-
-enum
-{
- // AMDIL ISD Opcodes
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- MAD, // 32bit Fused Multiply Add instruction
- CALL, // Function call based on a single integer
- UMUL, // 32bit unsigned multiplication
- DIV_INF, // Divide with infinity returned on zero divisor
- RET_FLAG,
- BRANCH_COND,
- // End AMDIL ISD Opcodes
- BITALIGN,
- FRACT,
- FMAX,
- SMAX,
- UMAX,
- FMIN,
- SMIN,
- UMIN,
- URECIP,
- INTERP,
- INTERP_P0,
- LAST_AMDGPU_ISD_NUMBER
-};
-
-
-} // End namespace AMDGPUISD
-
-namespace SIISD {
-
-enum {
- SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
- VCC_AND,
- VCC_BITCAST
-};
-
-} // End namespace SIISD
-
-} // End namespace llvm
-
-#endif // AMDGPUISELLOWERING_H
+++ /dev/null
-//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the implementation of the TargetInstrInfo class that is
-// common to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "AMDIL.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-#define GET_INSTRINFO_CTOR
-#include "AMDGPUGenInstrInfo.inc"
-
-using namespace llvm;
-
-AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
- : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
-
-const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
- return RI;
-}
-
-bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SubIdx) const {
-// TODO: Implement this function
- return false;
-}
-
-unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-
-unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-
-bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
-// TODO: Implement this function
- return false;
-}
-unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
-// TODO: Implement this function
- return false;
-}
-
-MachineInstr *
-AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const {
-// TODO: Implement this function
- return NULL;
-}
-bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const {
- while (iter != MBB.end()) {
- switch (iter->getOpcode()) {
- default:
- break;
- ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
- case AMDGPU::BRANCH:
- return true;
- };
- ++iter;
- }
- return false;
-}
-
-MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
- MachineBasicBlock::iterator tmp = MBB->end();
- if (!MBB->size()) {
- return MBB->end();
- }
- while (--tmp) {
- if (tmp->getOpcode() == AMDGPU::ENDLOOP
- || tmp->getOpcode() == AMDGPU::ENDIF
- || tmp->getOpcode() == AMDGPU::ELSE) {
- if (tmp == MBB->begin()) {
- return tmp;
- } else {
- continue;
- }
- } else {
- return ++tmp;
- }
- }
- return MBB->end();
-}
-
-void
-AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
- int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- assert(!"Not Implemented");
-}
-
-void
-AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- assert(!"Not Implemented");
-}
-
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-MachineInstr*
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const {
- // TODO: Implement this function
- return 0;
-}
-bool
-AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const
-{
- // TODO: Implement this function
- return false;
-}
-bool
-AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
- unsigned Reg, bool UnfoldLoad,
- bool UnfoldStore,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
- // TODO: Implement this function
- return false;
-}
-
-bool
-AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
- SmallVectorImpl<SDNode*> &NewNodes) const {
- // TODO: Implement this function
- return false;
-}
-
-unsigned
-AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex) const {
- // TODO: Implement this function
- return 0;
-}
-
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const {
- assert(Offset2 > Offset1
- && "Second offset should be larger than first offset!");
- // If we have less than 16 loads in a row, and the offsets are within 16,
- // then schedule together.
- // TODO: Make the loads schedule near if it fits in a cacheline
- return (NumLoads < 16 && (Offset2 - Offset1) < 16);
-}
-
-bool
-AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
- const {
- // TODO: Implement this function
- return true;
-}
-void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- // TODO: Implement this function
-}
-
-bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
- // TODO: Implement this function
- return false;
-}
-bool
-AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2)
- const {
- // TODO: Implement this function
- return false;
-}
-
-bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const {
- // TODO: Implement this function
- return false;
-}
-
-bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
- // TODO: Implement this function
- return MI->getDesc().isPredicable();
-}
-
-bool
-AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
- // TODO: Implement this function
- return true;
-}
-
-void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const
-{
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const AMDGPURegisterInfo & RI = getRegisterInfo();
-
- for (unsigned i = 0; i < MI.getNumOperands(); i++) {
- MachineOperand &MO = MI.getOperand(i);
- // Convert dst regclass to one that is supported by the ISA
- if (MO.isReg() && MO.isDef()) {
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
- const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-
- assert(newRegClass);
-
- MRI.setRegClass(MO.getReg(), newRegClass);
- }
- }
- }
-}
+++ /dev/null
-//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the definition of a TargetInstrInfo class that is common
-// to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUINSTRUCTIONINFO_H_
-#define AMDGPUINSTRUCTIONINFO_H_
-
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#include <map>
-
-#define GET_INSTRINFO_HEADER
-#define GET_INSTRINFO_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-
-#define OPCODE_IS_ZERO_INT 0x00000042
-#define OPCODE_IS_NOT_ZERO_INT 0x00000045
-#define OPCODE_IS_ZERO 0x00000020
-#define OPCODE_IS_NOT_ZERO 0x00000023
-
-namespace llvm {
-
-class AMDGPUTargetMachine;
-class MachineFunction;
-class MachineInstr;
-class MachineInstrBuilder;
-
-class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
-private:
- const AMDGPURegisterInfo RI;
- TargetMachine &TM;
- bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const;
-public:
- explicit AMDGPUInstrInfo(TargetMachine &tm);
-
- virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
-
- bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &DstReg, unsigned &SubIdx) const;
-
- unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
- unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const;
- bool hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
- unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
- unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const;
- bool hasStoreFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
-
- MachineInstr *
- convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const;
-
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const = 0;
-
- void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
- void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
-protected:
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const;
-public:
- bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
- bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
- unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
- SmallVectorImpl<MachineInstr *> &NewMIs) const;
- bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
- SmallVectorImpl<SDNode *> &NewNodes) const;
- unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex = 0) const;
- bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const;
-
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
- void insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
- bool isPredicated(const MachineInstr *MI) const;
- bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
- bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
- bool isPredicable(MachineInstr *MI) const;
- bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-
- // Helper functions that check the opcode for status information
- bool isLoadInst(llvm::MachineInstr *MI) const;
- bool isExtLoadInst(llvm::MachineInstr *MI) const;
- bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
- bool isSExtLoadInst(llvm::MachineInstr *MI) const;
- bool isZExtLoadInst(llvm::MachineInstr *MI) const;
- bool isAExtLoadInst(llvm::MachineInstr *MI) const;
- bool isStoreInst(llvm::MachineInstr *MI) const;
- bool isTruncStoreInst(llvm::MachineInstr *MI) const;
-
- virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const = 0;
- virtual unsigned getIEQOpcode() const = 0;
- virtual bool isMov(unsigned opcode) const = 0;
-
- /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
- /// MachineInstr
- virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const;
-
-};
-
-} // End llvm namespace
-
-#endif // AMDGPUINSTRINFO_H_
+++ /dev/null
-//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains DAG node defintions for the AMDGPU target.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// AMDGPU DAG Profiles
-//===----------------------------------------------------------------------===//
-
-def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
-]>;
-
-//===----------------------------------------------------------------------===//
-// AMDGPU DAG Nodes
-//
-
-// out = ((a << 32) | b) >> c)
-//
-// Can be used to optimize rtol:
-// rotl(a, b) = bitalign(a, a, 32 - b)
-def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
-
-// out = a - floor(a)
-def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
-
-// out = max(a, b) a and b are floats
-def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = max(a, b) a and b are signed ints
-def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = max(a, b) a and b are unsigned ints
-def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a and b are floats
-def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a snd b are signed ints
-def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a and b are unsigned ints
-def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// urecip - This operation is a helper for integer division, it returns the
-// result of 1 / a as a fractional unsigned integer.
-// out = (2^32 / a) + e
-// e is rounding error
-def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
-
-def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+++ /dev/null
-//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains instruction defs that are common to all hw codegen
-// targets.
-//
-//===----------------------------------------------------------------------===//
-
-class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
- field bits<16> AMDILOp = 0;
- field bits<3> Gen = 0;
-
- let Namespace = "AMDGPU";
- let OutOperandList = outs;
- let InOperandList = ins;
- let AsmString = asm;
- let Pattern = pattern;
- let Itinerary = NullALU;
- let TSFlags{42-40} = Gen;
- let TSFlags{63-48} = AMDILOp;
-}
-
-class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
- : AMDGPUInst<outs, ins, asm, pattern> {
-
- field bits<32> Inst = 0xffffffff;
-
-}
-
-def COND_EQ : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOEQ: case ISD::SETUEQ:
- case ISD::SETEQ: return true;}}}]
->;
-
-def COND_NE : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETONE: case ISD::SETUNE:
- case ISD::SETNE: return true;}}}]
->;
-def COND_GT : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGT: case ISD::SETUGT:
- case ISD::SETGT: return true;}}}]
->;
-
-def COND_GE : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGE: case ISD::SETUGE:
- case ISD::SETGE: return true;}}}]
->;
-
-def COND_LT : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLT: case ISD::SETULT:
- case ISD::SETLT: return true;}}}]
->;
-
-def COND_LE : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLE: case ISD::SETULE:
- case ISD::SETLE: return true;}}}]
->;
-
-//===----------------------------------------------------------------------===//
-// Load/Store Pattern Fragments
-//===----------------------------------------------------------------------===//
-
-def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-class Constants {
-int TWO_PI = 0x40c90fdb;
-int PI = 0x40490fdb;
-int TWO_PI_INV = 0x3e22f983;
-}
-def CONST : Constants;
-
-def FP_ZERO : PatLeaf <
- (fpimm),
- [{return N->getValueAPF().isZero();}]
->;
-
-def FP_ONE : PatLeaf <
- (fpimm),
- [{return N->isExactlyValue(1.0);}]
->;
-
-let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
-
-class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "CLAMP $dst, $src0",
- [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
->;
-
-class FABS <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "FABS $dst, $src0",
- [(set rc:$dst, (fabs rc:$src0))]
->;
-
-class FNEG <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "FNEG $dst, $src0",
- [(set rc:$dst, (fneg rc:$src0))]
->;
-
-def SHADER_TYPE : AMDGPUShaderInst <
- (outs),
- (ins i32imm:$type),
- "SHADER_TYPE $type",
- [(int_AMDGPU_shader_type imm:$type)]
->;
-
-} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
-
-/* Generic helper patterns for intrinsics */
-/* -------------------------------------- */
-
-class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
- RegisterClass rc> : Pat <
- (fpow rc:$src0, rc:$src1),
- (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
->;
-
-/* Other helper patterns */
-/* --------------------- */
-
-/* Extract element pattern */
-class Extract_Element <ValueType sub_type, ValueType vec_type,
- RegisterClass vec_class, int sub_idx,
- SubRegIndex sub_reg>: Pat<
- (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
- (EXTRACT_SUBREG vec_class:$src, sub_reg)
->;
-
-/* Insert element pattern */
-class Insert_Element <ValueType elem_type, ValueType vec_type,
- RegisterClass elem_class, RegisterClass vec_class,
- int sub_idx, SubRegIndex sub_reg> : Pat <
-
- (vec_type (vector_insert (vec_type vec_class:$vec),
- (elem_type elem_class:$elem), sub_idx)),
- (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
->;
-
-// Vector Build pattern
-class Vector_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
- (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
- (elemType elemClass:$z), (elemType elemClass:$w))),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
- elemClass:$z, sel_z), elemClass:$w, sel_w)
->;
-
-// bitconvert pattern
-class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
- (dt (bitconvert (st rc:$src0))),
- (dt rc:$src0)
->;
-
-include "R600Instructions.td"
-
-include "SIInstrInfo.td"
-
+++ /dev/null
-//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines intrinsics that are used by all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "AMDGPU", isTarget = 1 in {
-
- def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
- def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
- def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
- def int_AMDGPU_kilp : Intrinsic<[], [], []>;
- def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-
- def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
-}
-
-let TargetPrefix = "TGSI", isTarget = 1 in {
-
- def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
-}
-
-include "SIIntrinsics.td"
+++ /dev/null
-//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower AMDGPU MachineInstrs to their corresponding
-// MCInst.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPUMCInstLower.h"
-#include "AMDGPUAsmPrinter.h"
-#include "R600InstrInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-AMDGPUMCInstLower::AMDGPUMCInstLower() { }
-
-void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- OutMI.setOpcode(MI->getOpcode());
-
- for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
- MCOperand MCOp;
- switch (MO.getType()) {
- default:
- llvm_unreachable("unknown operand type");
- case MachineOperand::MO_FPImmediate: {
- const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
- assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
- "Only floating point immediates are supported at the moment.");
- MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
- break;
- }
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
- break;
- case MachineOperand::MO_Register:
- MCOp = MCOperand::CreateReg(MO.getReg());
- break;
- }
- OutMI.addOperand(MCOp);
- }
-}
-
-void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- AMDGPUMCInstLower MCInstLowering;
-
- // Ignore placeholder instructions:
- if (MI->getOpcode() == AMDGPU::MASK_WRITE) {
- return;
- }
-
- if (MI->isBundle()) {
- const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator I = MI;
- ++I;
- while (I != MBB->end() && I->isInsideBundle()) {
- MCInst MCBundleInst;
- const MachineInstr *BundledInst = I;
- MCInstLowering.lower(BundledInst, MCBundleInst);
- OutStreamer.EmitInstruction(MCBundleInst);
- ++I;
- }
- } else {
- MCInst TmpInst;
- MCInstLowering.lower(MI, TmpInst);
- OutStreamer.EmitInstruction(TmpInst);
- }
-}
+++ /dev/null
-//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPU_MCINSTLOWER_H
-#define AMDGPU_MCINSTLOWER_H
-
-namespace llvm {
-
-class MCInst;
-class MachineInstr;
-
-class AMDGPUMCInstLower {
-
-public:
- AMDGPUMCInstLower();
-
- /// lower - Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-};
-
-} // End namespace llvm
-
-#endif //AMDGPU_MCINSTLOWER_H
+++ /dev/null
-//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Parent TargetRegisterInfo class common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-
-using namespace llvm;
-
-AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
- const TargetInstrInfo &tii)
-: AMDGPUGenRegisterInfo(0),
- TM(tm),
- TII(tii)
- { }
-
-//===----------------------------------------------------------------------===//
-// Function handling callbacks - Functions are a seldom used feature of GPUS, so
-// they are not supported at this time.
-//===----------------------------------------------------------------------===//
-
-const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
-
-const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
- const {
- return &CalleeSavedReg;
-}
-
-void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj,
- RegScavenger *RS) const {
- assert(!"Subroutines not supported yet");
-}
-
-unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- assert(!"Subroutines not supported yet");
- return 0;
-}
-
-#define GET_REGINFO_TARGET_DESC
-#include "AMDGPUGenRegisterInfo.inc"
+++ /dev/null
-//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the TargetRegisterInfo interface that is implemented
-// by all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUREGISTERINFO_H_
-#define AMDGPUREGISTERINFO_H_
-
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-
-namespace llvm {
-
-class AMDGPUTargetMachine;
-class TargetInstrInfo;
-
-struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo
-{
- TargetMachine &TM;
- const TargetInstrInfo &TII;
- static const uint16_t CalleeSavedReg;
-
- AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
-
- virtual BitVector getReservedRegs(const MachineFunction &MF) const {
- assert(!"Unimplemented"); return BitVector();
- }
-
- /// getISARegClass - rc is an AMDIL reg class. This function returns the
- /// ISA reg class that is equivalent to the given AMDIL reg class.
- virtual const TargetRegisterClass * getISARegClass(
- const TargetRegisterClass * rc) const {
- assert(!"Unimplemented"); return NULL;
- }
-
- virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
- assert(!"Unimplemented"); return NULL;
- }
-
- const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
- void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
- RegScavenger *RS) const;
- unsigned getFrameRegister(const MachineFunction &MF) const;
-
-};
-
-} // End namespace llvm
-
-#endif // AMDIDSAREGISTERINFO_H_
+++ /dev/null
-//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Tablegen register definitions common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-let Namespace = "AMDGPU" in {
- def sel_x : SubRegIndex;
- def sel_y : SubRegIndex;
- def sel_z : SubRegIndex;
- def sel_w : SubRegIndex;
-}
-
-include "R600RegisterInfo.td"
-include "SIRegisterInfo.td"
+++ /dev/null
-//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the AMDGPU specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUSubtarget.h"
-
-using namespace llvm;
-
-#define GET_SUBTARGETINFO_ENUM
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "AMDGPUGenSubtargetInfo.inc"
-
-AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
- AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) {
- InstrItins = getInstrItineraryForCPU(CPU);
-
- memset(CapsOverride, 0, sizeof(*CapsOverride)
- * AMDGPUDeviceInfo::MaxNumberCapabilities);
- // Default card
- StringRef GPU = CPU;
- mIs64bit = false;
- mDefaultSize[0] = 64;
- mDefaultSize[1] = 1;
- mDefaultSize[2] = 1;
- ParseSubtargetFeatures(GPU, FS);
- mDevName = GPU;
- mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
-}
-
-AMDGPUSubtarget::~AMDGPUSubtarget()
-{
- delete mDevice;
-}
-
-bool
-AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const
-{
- assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
- "Caps index is out of bounds!");
- return CapsOverride[caps];
-}
-bool
-AMDGPUSubtarget::is64bit() const
-{
- return mIs64bit;
-}
-bool
-AMDGPUSubtarget::isTargetELF() const
-{
- return false;
-}
-size_t
-AMDGPUSubtarget::getDefaultSize(uint32_t dim) const
-{
- if (dim > 3) {
- return 1;
- } else {
- return mDefaultSize[dim];
- }
-}
-
-std::string
-AMDGPUSubtarget::getDataLayout() const
-{
- if (!mDevice) {
- return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
- }
- return mDevice->getDataLayout();
-}
-
-std::string
-AMDGPUSubtarget::getDeviceName() const
-{
- return mDevName;
-}
-const AMDGPUDevice *
-AMDGPUSubtarget::device() const
-{
- return mDevice;
-}
+++ /dev/null
-//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file declares the AMDGPU specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _AMDGPUSUBTARGET_H_
-#define _AMDGPUSUBTARGET_H_
-#include "AMDILDevice.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-#define GET_SUBTARGETINFO_HEADER
-#include "AMDGPUGenSubtargetInfo.inc"
-
-#define MAX_CB_SIZE (1 << 16)
-
-namespace llvm {
-
-class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo
-{
-private:
- bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
- const AMDGPUDevice *mDevice;
- size_t mDefaultSize[3];
- size_t mMinimumSize[3];
- std::string mDevName;
- bool mIs64bit;
- bool mIs32on64bit;
- bool mDumpCode;
- bool mR600ALUInst;
-
- InstrItineraryData InstrItins;
-
-public:
- AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
- virtual ~AMDGPUSubtarget();
-
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
-
- bool isOverride(AMDGPUDeviceInfo::Caps) const;
- bool is64bit() const;
-
- // Helper functions to simplify if statements
- bool isTargetELF() const;
- const AMDGPUDevice* device() const;
- std::string getDataLayout() const;
- std::string getDeviceName() const;
- virtual size_t getDefaultSize(uint32_t dim) const;
- bool dumpCode() const { return mDumpCode; }
- bool r600ALUEncoding() const { return mR600ALUInst; }
-
-};
-
-} // End namespace llvm
-
-#endif // AMDGPUSUBTARGET_H_
+++ /dev/null
-//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The AMDGPU target machine contains all of the hardware specific information
-// needed to emit code for R600 and SI GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUTargetMachine.h"
-#include "AMDGPU.h"
-#include "R600ISelLowering.h"
-#include "R600InstrInfo.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_os_ostream.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-#include <llvm/CodeGen/Passes.h>
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeAMDGPUTarget() {
- // Register the target
- RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
-}
-
-AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OptLevel
-)
-:
- LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- Subtarget(TT, CPU, FS),
- DataLayout(Subtarget.getDataLayout()),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- Subtarget.device()->getStackAlignment(), 0),
- IntrinsicInfo(this),
- InstrItins(&Subtarget.getInstrItineraryData()),
- mDump(false)
-
-{
- // TLInfo uses InstrInfo so it must be initialized after.
- if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
- InstrInfo = new R600InstrInfo(*this);
- TLInfo = new R600TargetLowering(*this);
- } else {
- InstrInfo = new SIInstrInfo(*this);
- TLInfo = new SITargetLowering(*this);
- }
-}
-
-AMDGPUTargetMachine::~AMDGPUTargetMachine()
-{
-}
-
-namespace {
-class AMDGPUPassConfig : public TargetPassConfig {
-public:
- AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
-
- AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
- return getTM<AMDGPUTargetMachine>();
- }
-
- virtual bool addPreISel();
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
-};
-} // End of anonymous namespace
-
-TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new AMDGPUPassConfig(this, PM);
-}
-
-bool
-AMDGPUPassConfig::addPreISel()
-{
- return false;
-}
-
-bool AMDGPUPassConfig::addInstSelector() {
- PM->add(createAMDGPUPeepholeOpt(*TM));
- PM->add(createAMDGPUISelDag(getAMDGPUTargetMachine()));
- return false;
-}
-
-bool AMDGPUPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-
- if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- PM->add(createSIAssignInterpRegsPass(*TM));
- }
- PM->add(createAMDGPUConvertToISAPass(*TM));
- return false;
-}
-
-bool AMDGPUPassConfig::addPostRegAlloc() {
- return false;
-}
-
-bool AMDGPUPassConfig::addPreSched2() {
-
- addPass(IfConverterID);
- return false;
-}
-
-bool AMDGPUPassConfig::addPreEmitPass() {
- PM->add(createAMDGPUCFGPreparationPass(*TM));
- PM->add(createAMDGPUCFGStructurizerPass(*TM));
-
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
- PM->add(createR600ExpandSpecialInstrsPass(*TM));
- addPass(FinalizeMachineBundlesID);
- } else {
- PM->add(createSILowerLiteralConstantsPass(*TM));
- // piglit is unreliable (VM protection faults, GPU lockups) with this pass:
- //PM->add(createSILowerFlowControlPass(*TM));
- }
-
- return false;
-}
-
+++ /dev/null
-//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The AMDGPU TargetMachine interface definition for hw codgen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPU_TARGET_MACHINE_H
-#define AMDGPU_TARGET_MACHINE_H
-
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDILFrameLowering.h"
-#include "AMDILIntrinsicInfo.h"
-#include "R600ISelLowering.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Target/TargetData.h"
-
-namespace llvm {
-
-MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
-
-class AMDGPUTargetMachine : public LLVMTargetMachine {
-
- AMDGPUSubtarget Subtarget;
- const TargetData DataLayout;
- AMDGPUFrameLowering FrameLowering;
- AMDGPUIntrinsicInfo IntrinsicInfo;
- const AMDGPUInstrInfo * InstrInfo;
- AMDGPUTargetLowering * TLInfo;
- const InstrItineraryData* InstrItins;
- bool mDump;
-
-public:
- AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU,
- TargetOptions Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
- ~AMDGPUTargetMachine();
- virtual const AMDGPUFrameLowering* getFrameLowering() const {
- return &FrameLowering;
- }
- virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
- return &IntrinsicInfo;
- }
- virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
- virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
- virtual const AMDGPURegisterInfo *getRegisterInfo() const {
- return &InstrInfo->getRegisterInfo();
- }
- virtual AMDGPUTargetLowering * getTargetLowering() const {
- return TLInfo;
- }
- virtual const InstrItineraryData* getInstrItineraryData() const {
- return InstrItins;
- }
- virtual const TargetData* getTargetData() const { return &DataLayout; }
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-};
-
-} // End namespace llvm
-
-#endif // AMDGPU_TARGET_MACHINE_H
+++ /dev/null
-//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// AMDGPU back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDIL_H_
-#define AMDIL_H_
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetMachine.h"
-
-#define ARENA_SEGMENT_RESERVED_UAVS 12
-#define DEFAULT_ARENA_UAV_ID 8
-#define DEFAULT_RAW_UAV_ID 7
-#define GLOBAL_RETURN_RAW_UAV_ID 11
-#define HW_MAX_NUM_CB 8
-#define MAX_NUM_UNIQUE_UAVS 8
-#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
-#define OPENCL_MAX_READ_IMAGES 128
-#define OPENCL_MAX_WRITE_IMAGES 8
-#define OPENCL_MAX_SAMPLERS 16
-
-// The next two values can never be zero, as zero is the ID that is
-// used to assert against.
-#define DEFAULT_LDS_ID 1
-#define DEFAULT_GDS_ID 1
-#define DEFAULT_SCRATCH_ID 1
-#define DEFAULT_VEC_SLOTS 8
-
-#define OCL_DEVICE_RV710 0x0001
-#define OCL_DEVICE_RV730 0x0002
-#define OCL_DEVICE_RV770 0x0004
-#define OCL_DEVICE_CEDAR 0x0008
-#define OCL_DEVICE_REDWOOD 0x0010
-#define OCL_DEVICE_JUNIPER 0x0020
-#define OCL_DEVICE_CYPRESS 0x0040
-#define OCL_DEVICE_CAICOS 0x0080
-#define OCL_DEVICE_TURKS 0x0100
-#define OCL_DEVICE_BARTS 0x0200
-#define OCL_DEVICE_CAYMAN 0x0400
-#define OCL_DEVICE_ALL 0x3FFF
-
-/// The number of function ID's that are reserved for
-/// internal compiler usage.
-const unsigned int RESERVED_FUNCS = 1024;
-
-namespace llvm {
-class AMDGPUInstrPrinter;
-class FunctionPass;
-class MCAsmInfo;
-class raw_ostream;
-class Target;
-class TargetMachine;
-
-/// Instruction selection passes.
-FunctionPass*
- createAMDGPUISelDag(TargetMachine &TM);
-FunctionPass*
- createAMDGPUPeepholeOpt(TargetMachine &TM);
-
-/// Pre emit passes.
-FunctionPass*
- createAMDGPUCFGPreparationPass(TargetMachine &TM);
-FunctionPass*
- createAMDGPUCFGStructurizerPass(TargetMachine &TM);
-
-extern Target TheAMDGPUTarget;
-} // end namespace llvm;
-
-/// Include device information enumerations
-#include "AMDILDeviceInfo.h"
-
-namespace llvm {
-/// OpenCL uses address spaces to differentiate between
-/// various memory regions on the hardware. On the CPU
-/// all of the address spaces point to the same memory,
-/// however on the GPU, each address space points to
-/// a seperate piece of memory that is unique from other
-/// memory locations.
-namespace AMDGPUAS {
-enum AddressSpaces {
- PRIVATE_ADDRESS = 0, // Address space for private memory.
- GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, // Address space for constant memory.
- LOCAL_ADDRESS = 3, // Address space for local memory.
- REGION_ADDRESS = 4, // Address space for region memory.
- ADDRESS_NONE = 5, // Address space for unknown memory.
- PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
- PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
- USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
- LAST_ADDRESS = 9
-};
-
-} // namespace AMDGPUAS
-
-} // end namespace llvm
-#endif // AMDIL_H_
+++ /dev/null
-//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDIL7XXDevice.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDILDevice.h"
-
-using namespace llvm;
-
-AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST)
-{
- setCaps();
- std::string name = mSTM->getDeviceName();
- if (name == "rv710") {
- mDeviceFlag = OCL_DEVICE_RV710;
- } else if (name == "rv730") {
- mDeviceFlag = OCL_DEVICE_RV730;
- } else {
- mDeviceFlag = OCL_DEVICE_RV770;
- }
-}
-
-AMDGPU7XXDevice::~AMDGPU7XXDevice()
-{
-}
-
-void AMDGPU7XXDevice::setCaps()
-{
- mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-}
-
-size_t AMDGPU7XXDevice::getMaxLDSSize() const
-{
- if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_700;
- }
- return 0;
-}
-
-size_t AMDGPU7XXDevice::getWavefrontSize() const
-{
- return AMDGPUDevice::HalfWavefrontSize;
-}
-
-uint32_t AMDGPU7XXDevice::getGeneration() const
-{
- return AMDGPUDeviceInfo::HD4XXX;
-}
-
-uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const
-{
- switch (DeviceID) {
- default:
- assert(0 && "ID type passed in is unknown!");
- break;
- case GLOBAL_ID:
- case CONSTANT_ID:
- case RAW_UAV_ID:
- case ARENA_UAV_ID:
- break;
- case LDS_ID:
- if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
- return DEFAULT_LDS_ID;
- }
- break;
- case SCRATCH_ID:
- if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
- return DEFAULT_SCRATCH_ID;
- }
- break;
- case GDS_ID:
- assert(0 && "GDS UAV ID is not supported on this chip");
- if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
- return DEFAULT_GDS_ID;
- }
- break;
- };
-
- return 0;
-}
-
-uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const
-{
- return 1;
-}
-
-AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST)
-{
- setCaps();
-}
-
-AMDGPU770Device::~AMDGPU770Device()
-{
-}
-
-void AMDGPU770Device::setCaps()
-{
- if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
- mSWBits.set(AMDGPUDeviceInfo::FMA);
- mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
- }
- mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
- mHWBits.reset(AMDGPUDeviceInfo::LongOps);
- mSWBits.set(AMDGPUDeviceInfo::LongOps);
- mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-}
-
-size_t AMDGPU770Device::getWavefrontSize() const
-{
- return AMDGPUDevice::WavefrontSize;
-}
-
-AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST)
-{
-}
-
-AMDGPU710Device::~AMDGPU710Device()
-{
-}
-
-size_t AMDGPU710Device::getWavefrontSize() const
-{
- return AMDGPUDevice::QuarterWavefrontSize;
-}
+++ /dev/null
-//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===----------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===----------------------------------------------------------------------===//
-#ifndef _AMDIL7XXDEVICEIMPL_H_
-#define _AMDIL7XXDEVICEIMPL_H_
-#include "AMDILDevice.h"
-
-namespace llvm {
-class AMDGPUSubtarget;
-
-//===----------------------------------------------------------------------===//
-// 7XX generation of devices and their respective sub classes
-//===----------------------------------------------------------------------===//
-
-// The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX
-// devices are derived from this class. The AMDGPU7XX device will only
-// support the minimal features that are required to be considered OpenCL 1.0
-// compliant and nothing more.
-class AMDGPU7XXDevice : public AMDGPUDevice {
-public:
- AMDGPU7XXDevice(AMDGPUSubtarget *ST);
- virtual ~AMDGPU7XXDevice();
- virtual size_t getMaxLDSSize() const;
- virtual size_t getWavefrontSize() const;
- virtual uint32_t getGeneration() const;
- virtual uint32_t getResourceID(uint32_t DeviceID) const;
- virtual uint32_t getMaxNumUAVs() const;
-
-protected:
- virtual void setCaps();
-}; // AMDGPU7XXDevice
-
-// The AMDGPU770Device class represents the RV770 chip and it's
-// derivative cards. The difference between this device and the base
-// class is this device device adds support for double precision
-// and has a larger wavefront size.
-class AMDGPU770Device : public AMDGPU7XXDevice {
-public:
- AMDGPU770Device(AMDGPUSubtarget *ST);
- virtual ~AMDGPU770Device();
- virtual size_t getWavefrontSize() const;
-private:
- virtual void setCaps();
-}; // AMDGPU770Device
-
-// The AMDGPU710Device class derives from the 7XX base class, but this
-// class is a smaller derivative, so we need to overload some of the
-// functions in order to correctly specify this information.
-class AMDGPU710Device : public AMDGPU7XXDevice {
-public:
- AMDGPU710Device(AMDGPUSubtarget *ST);
- virtual ~AMDGPU710Device();
- virtual size_t getWavefrontSize() const;
-}; // AMDGPU710Device
-
-} // namespace llvm
-#endif // _AMDILDEVICEIMPL_H_
+++ /dev/null
-//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-// Dummy Instruction itineraries for pseudo instructions
-def ALU_NULL : FuncUnit;
-def NullALU : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// AMDIL Subtarget features.
-//===----------------------------------------------------------------------===//
-def FeatureFP64 : SubtargetFeature<"fp64",
- "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
- "true",
- "Enable 64bit double precision operations">;
-def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
- "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
- "true",
- "Enable byte addressable stores">;
-def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
- "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
- "true",
- "Enable duplicate barrier detection(HD5XXX or later).">;
-def FeatureImages : SubtargetFeature<"images",
- "CapsOverride[AMDGPUDeviceInfo::Images]",
- "true",
- "Enable image functions">;
-def FeatureMultiUAV : SubtargetFeature<"multi_uav",
- "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
- "true",
- "Generate multiple UAV code(HD5XXX family or later)">;
-def FeatureMacroDB : SubtargetFeature<"macrodb",
- "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
- "true",
- "Use internal macrodb, instead of macrodb in driver">;
-def FeatureNoAlias : SubtargetFeature<"noalias",
- "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
- "true",
- "assert that all kernel argument pointers are not aliased">;
-def FeatureNoInline : SubtargetFeature<"no-inline",
- "CapsOverride[AMDGPUDeviceInfo::NoInline]",
- "true",
- "specify whether to not inline functions">;
-
-def Feature64BitPtr : SubtargetFeature<"64BitPtr",
- "mIs64bit",
- "false",
- "Specify if 64bit addressing should be used.">;
-
-def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
- "mIs32on64bit",
- "false",
- "Specify if 64bit sized pointers with 32bit addressing should be used.">;
-def FeatureDebug : SubtargetFeature<"debug",
- "CapsOverride[AMDGPUDeviceInfo::Debug]",
- "true",
- "Debug mode is enabled, so disable hardware accelerated address spaces.">;
-def FeatureDumpCode : SubtargetFeature <"DumpCode",
- "mDumpCode",
- "true",
- "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
- "mR600ALUInst",
- "false",
- "Older version of ALU instructions encoding.">;
-
-
-//===----------------------------------------------------------------------===//
-// Register File, Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-
-include "AMDILRegisterInfo.td"
-include "AMDILInstrInfo.td"
-
+++ /dev/null
-//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-
-#define DEBUGME 0
-#define DEBUG_TYPE "structcfg"
-
-#include "AMDGPUInstrInfo.h"
-#include "AMDIL.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define FirstNonDebugInstr(A) A->begin()
-using namespace llvm;
-
-// TODO: move-begin.
-
-//===----------------------------------------------------------------------===//
-//
-// Statistics for CFGStructurizer.
-//
-//===----------------------------------------------------------------------===//
-
-STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
- "matched");
-STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
- "matched");
-STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
- "pattern matched");
-STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
- "pattern matched");
-STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
- "matched");
-STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
-STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
-
-//===----------------------------------------------------------------------===//
-//
-// Miscellaneous utility for CFGStructurizer.
-//
-//===----------------------------------------------------------------------===//
-namespace llvmCFGStruct
-{
-#define SHOWNEWINSTR(i) \
- if (DEBUGME) errs() << "New instr: " << *i << "\n"
-
-#define SHOWNEWBLK(b, msg) \
-if (DEBUGME) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- errs() << "\n"; \
-}
-
-#define SHOWBLK_DETAIL(b, msg) \
-if (DEBUGME) { \
- if (b) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- b->print(errs()); \
- errs() << "\n"; \
- } \
-}
-
-#define INVALIDSCCNUM -1
-#define INVALIDREGNUM 0
-
-template<class LoopinfoT>
-void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
- for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
- iterEnd = LoopInfo.end();
- iter != iterEnd; ++iter) {
- (*iter)->print(OS, 0);
- }
-}
-
-template<class NodeT>
-void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
- size_t sz = Src.size();
- for (size_t i = 0; i < sz/2; ++i) {
- NodeT *t = Src[i];
- Src[i] = Src[sz - i - 1];
- Src[sz - i - 1] = t;
- }
-}
-
-} //end namespace llvmCFGStruct
-
-
-//===----------------------------------------------------------------------===//
-//
-// MachinePostDominatorTree
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
-/// to compute the a post-dominator tree.
-///
-struct MachinePostDominatorTree : public MachineFunctionPass {
- static char ID; // Pass identification, replacement for typeid
- DominatorTreeBase<MachineBasicBlock> *DT;
- MachinePostDominatorTree() : MachineFunctionPass(ID)
- {
- DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
- // postdominator
- }
-
- ~MachinePostDominatorTree();
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- inline const std::vector<MachineBasicBlock *> &getRoots() const {
- return DT->getRoots();
- }
-
- inline MachineDomTreeNode *getRootNode() const {
- return DT->getRootNode();
- }
-
- inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
- return DT->getNode(BB);
- }
-
- inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
- return DT->getNode(BB);
- }
-
- inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
- return DT->dominates(A, B);
- }
-
- inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
- return DT->dominates(A, B);
- }
-
- inline bool
- properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
- return DT->properlyDominates(A, B);
- }
-
- inline bool
- properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
- return DT->properlyDominates(A, B);
- }
-
- inline MachineBasicBlock *
- findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
- return DT->findNearestCommonDominator(A, B);
- }
-
- virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
- DT->print(OS);
- }
-};
-} //end of namespace llvm
-
-char MachinePostDominatorTree::ID = 0;
-static RegisterPass<MachinePostDominatorTree>
-machinePostDominatorTreePass("machinepostdomtree",
- "MachinePostDominator Tree Construction",
- true, true);
-
-//const PassInfo *const llvm::MachinePostDominatorsID
-//= &machinePostDominatorTreePass;
-
-bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
- DT->recalculate(F);
- //DEBUG(DT->dump());
- return false;
-}
-
-MachinePostDominatorTree::~MachinePostDominatorTree() {
- delete DT;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// supporting data structure for CFGStructurizer
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvmCFGStruct
-{
-template<class PassT>
-struct CFGStructTraits {
-};
-
-template <class InstrT>
-class BlockInformation {
-public:
- bool isRetired;
- int sccNum;
- //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
- //Instructions defining the corresponding successor.
- BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
-};
-
-template <class BlockT, class InstrT, class RegiT>
-class LandInformation {
-public:
- BlockT *landBlk;
- std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
- //WHILELOOP(thisloop) init before entering
- //thisloop.
- std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
- //WHILELOOP(thisloop) init after entering
- //thisloop.
- std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
- //land block, branch cond on this reg.
- std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
- //endif" after ENDLOOP(thisloop) break
- //outerLoopOf(thisLoop).
- std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
- //endif" after ENDLOOP(thisloop) continue on
- //outerLoopOf(thisLoop).
- LandInformation() : landBlk(NULL) {}
-};
-
-} //end of namespace llvmCFGStruct
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructurizer
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvmCFGStruct
-{
-// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
-template<class PassT>
-class CFGStructurizer
-{
-public:
- typedef enum {
- Not_SinglePath = 0,
- SinglePath_InPath = 1,
- SinglePath_NotInPath = 2
- } PathToKind;
-
-public:
- typedef typename PassT::InstructionType InstrT;
- typedef typename PassT::FunctionType FuncT;
- typedef typename PassT::DominatortreeType DomTreeT;
- typedef typename PassT::PostDominatortreeType PostDomTreeT;
- typedef typename PassT::DomTreeNodeType DomTreeNodeT;
- typedef typename PassT::LoopinfoType LoopInfoT;
-
- typedef GraphTraits<FuncT *> FuncGTraits;
- //typedef FuncGTraits::nodes_iterator BlockIterator;
- typedef typename FuncT::iterator BlockIterator;
-
- typedef typename FuncGTraits::NodeType BlockT;
- typedef GraphTraits<BlockT *> BlockGTraits;
- typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
- //typedef BlockGTraits::succ_iterator InstructionIterator;
- typedef typename BlockT::iterator InstrIterator;
-
- typedef CFGStructTraits<PassT> CFGTraits;
- typedef BlockInformation<InstrT> BlockInfo;
- typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
-
- typedef int RegiT;
- typedef typename PassT::LoopType LoopT;
- typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
- typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
- //landing info for loop break
- typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
-
-public:
- CFGStructurizer();
- ~CFGStructurizer();
-
- /// Perform the CFG structurization
- bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-
- /// Perform the CFG preparation
- bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-
-private:
- void reversePredicateSetter(typename BlockT::iterator);
- void orderBlocks();
- void printOrderedBlocks(llvm::raw_ostream &OS);
- int patternMatch(BlockT *CurBlock);
- int patternMatchGroup(BlockT *CurBlock);
-
- int serialPatternMatch(BlockT *CurBlock);
- int ifPatternMatch(BlockT *CurBlock);
- int switchPatternMatch(BlockT *CurBlock);
- int loopendPatternMatch(BlockT *CurBlock);
- int loopPatternMatch(BlockT *CurBlock);
-
- int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
- int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
- //int loopWithoutBreak(BlockT *);
-
- void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
- BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
- void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
- BlockT *ContBlock, LoopT *contLoop);
- bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
- int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock);
- int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock);
- int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock, BlockT **LandBlockPtr);
- void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock, BlockT *LandBlock,
- bool Detail = false);
- PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
- bool AllowSideEntry = true);
- BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
- bool AllowSideEntry = true);
- int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
- void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
-
- void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
- BlockT *TrueBlock, BlockT *FalseBlock,
- BlockT *LandBlock);
- void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
- void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
- BlockT *ExitLandBlock, RegiT SetReg);
- void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
- RegiT SetReg);
- BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
- std::set<BlockT*> &ExitBlockSet,
- BlockT *ExitLandBlk);
- BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
- BlockTSmallerVector &ExitingBlocks,
- BlockTSmallerVector &ExitBlocks);
- BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
- void removeUnconditionalBranch(BlockT *SrcBlock);
- void removeRedundantConditionalBranch(BlockT *SrcBlock);
- void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
-
- void removeSuccessor(BlockT *SrcBlock);
- BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
- BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
-
- void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
- InstrIterator InsertPos);
-
- void recordSccnum(BlockT *SrcBlock, int SCCNum);
- int getSCCNum(BlockT *srcBlk);
-
- void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
- bool isRetiredBlock(BlockT *SrcBlock);
- bool isActiveLoophead(BlockT *CurBlock);
- bool needMigrateBlock(BlockT *Block);
-
- BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
- BlockTSmallerVector &exitBlocks,
- std::set<BlockT*> &ExitBlockSet);
- void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
- BlockT *getLoopLandBlock(LoopT *LoopRep);
- LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
-
- void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
-
- bool hasBackEdge(BlockT *curBlock);
- unsigned getLoopDepth (LoopT *LoopRep);
- int countActiveBlock(
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
- BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
- BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
-
-private:
- DomTreeT *domTree;
- PostDomTreeT *postDomTree;
- LoopInfoT *loopInfo;
- PassT *passRep;
- FuncT *funcRep;
-
- BlockInfoMap blockInfoMap;
- LoopLandInfoMap loopLandInfoMap;
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
- const AMDGPURegisterInfo *TRI;
-
-}; //template class CFGStructurizer
-
-template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
- : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
-}
-
-template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
- for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
- E = blockInfoMap.end(); I != E; ++I) {
- delete I->second;
- }
-}
-
-template<class PassT>
-bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
- const AMDGPURegisterInfo * tri) {
- passRep = &pass;
- funcRep = &func;
- TRI = tri;
-
- bool changed = false;
- //func.RenumberBlocks();
-
- //to do, if not reducible flow graph, make it so ???
-
- if (DEBUGME) {
- errs() << "AMDGPUCFGStructurizer::prepare\n";
- //func.viewCFG();
- //func.viewCFGOnly();
- //func.dump();
- }
-
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- //domTree = CFGTraits::getDominatorTree(pass);
- //if (DEBUGME) {
- // domTree->print(errs());
- //}
-
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- //postDomTree = CFGTraits::getPostDominatorTree(pass);
- //if (DEBUGME) {
- // postDomTree->print(errs());
- //}
-
- //FIXME: gcc complains on this.
- //loopInfo = &pass.getAnalysis<LoopInfoT>();
- loopInfo = CFGTraits::getLoopInfo(pass);
- if (DEBUGME) {
- errs() << "LoopInfo:\n";
- PrintLoopinfo(*loopInfo, errs());
- }
-
- orderBlocks();
- if (DEBUGME) {
- errs() << "Ordered blocks:\n";
- printOrderedBlocks(errs());
- }
-
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
-
- for (typename LoopInfoT::iterator iter = loopInfo->begin(),
- iterEnd = loopInfo->end();
- iter != iterEnd; ++iter) {
- LoopT* loopRep = (*iter);
- BlockTSmallerVector exitingBlks;
- loopRep->getExitingBlocks(exitingBlks);
-
- if (exitingBlks.size() == 0) {
- BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
- if (dummyExitBlk != NULL)
- retBlks.push_back(dummyExitBlk);
- }
- }
-
- // Remove unconditional branch instr.
- // Add dummy exit block iff there are multiple returns.
-
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
- iterBlk != iterEndBlk;
- ++iterBlk) {
- BlockT *curBlk = *iterBlk;
- removeUnconditionalBranch(curBlk);
- removeRedundantConditionalBranch(curBlk);
- if (CFGTraits::isReturnBlock(curBlk)) {
- retBlks.push_back(curBlk);
- }
- assert(curBlk->succ_size() <= 2);
- //assert(curBlk->size() > 0);
- //removeEmptyBlock(curBlk) ??
- } //for
-
- if (retBlks.size() >= 2) {
- addDummyExitBlock(retBlks);
- changed = true;
- }
-
- return changed;
-} //CFGStructurizer::prepare
-
-template<class PassT>
-bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
- const AMDGPURegisterInfo * tri) {
- passRep = &pass;
- funcRep = &func;
- TRI = tri;
-
- //func.RenumberBlocks();
-
- //Assume reducible CFG...
- if (DEBUGME) {
- errs() << "AMDGPUCFGStructurizer::run\n";
- //errs() << func.getFunction()->getNameStr() << "\n";
- func.viewCFG();
- //func.viewCFGOnly();
- //func.dump();
- }
-
-#if 1
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- domTree = CFGTraits::getDominatorTree(pass);
- if (DEBUGME) {
- domTree->print(errs(), (const llvm::Module*)0);
- }
-#endif
-
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- postDomTree = CFGTraits::getPostDominatorTree(pass);
- if (DEBUGME) {
- postDomTree->print(errs());
- }
-
- //FIXME: gcc complains on this.
- //loopInfo = &pass.getAnalysis<LoopInfoT>();
- loopInfo = CFGTraits::getLoopInfo(pass);
- if (DEBUGME) {
- errs() << "LoopInfo:\n";
- PrintLoopinfo(*loopInfo, errs());
- }
-
- orderBlocks();
-//#define STRESSTEST
-#ifdef STRESSTEST
- //Use the worse block ordering to test the algorithm.
- ReverseVector(orderedBlks);
-#endif
-
- if (DEBUGME) {
- errs() << "Ordered blocks:\n";
- printOrderedBlocks(errs());
- }
- int numIter = 0;
- bool finish = false;
- BlockT *curBlk;
- bool makeProgress = false;
- int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
- orderedBlks.end());
-
- do {
- ++numIter;
- if (DEBUGME) {
- errs() << "numIter = " << numIter
- << ", numRemaintedBlk = " << numRemainedBlk << "\n";
- }
-
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin();
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlkEnd = orderedBlks.end();
-
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- sccBeginIter = iterBlk;
- BlockT *sccBeginBlk = NULL;
- int sccNumBlk = 0; // The number of active blocks, init to a
- // maximum possible number.
- int sccNumIter; // Number of iteration in this SCC.
-
- while (iterBlk != iterBlkEnd) {
- curBlk = *iterBlk;
-
- if (sccBeginBlk == NULL) {
- sccBeginIter = iterBlk;
- sccBeginBlk = curBlk;
- sccNumIter = 0;
- sccNumBlk = numRemainedBlk; // Init to maximum possible number.
- if (DEBUGME) {
- errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
- errs() << "\n";
- }
- }
-
- if (!isRetiredBlock(curBlk)) {
- patternMatch(curBlk);
- }
-
- ++iterBlk;
-
- bool contNextScc = true;
- if (iterBlk == iterBlkEnd
- || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
- // Just finish one scc.
- ++sccNumIter;
- int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
- if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
- if (DEBUGME) {
- errs() << "Can't reduce SCC " << getSCCNum(curBlk)
- << ", sccNumIter = " << sccNumIter;
- errs() << "doesn't make any progress\n";
- }
- contNextScc = true;
- } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
- sccNumBlk = sccRemainedNumBlk;
- iterBlk = sccBeginIter;
- contNextScc = false;
- if (DEBUGME) {
- errs() << "repeat processing SCC" << getSCCNum(curBlk)
- << "sccNumIter = " << sccNumIter << "\n";
- func.viewCFG();
- //func.viewCFGOnly();
- }
- } else {
- // Finish the current scc.
- contNextScc = true;
- }
- } else {
- // Continue on next component in the current scc.
- contNextScc = false;
- }
-
- if (contNextScc) {
- sccBeginBlk = NULL;
- }
- } //while, "one iteration" over the function.
-
- BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
- if (entryBlk->succ_size() == 0) {
- finish = true;
- if (DEBUGME) {
- errs() << "Reduce to one block\n";
- }
- } else {
- int newnumRemainedBlk
- = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
- // consider cloned blocks ??
- if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
- makeProgress = true;
- numRemainedBlk = newnumRemainedBlk;
- } else {
- makeProgress = false;
- if (DEBUGME) {
- errs() << "No progress\n";
- }
- }
- }
- } while (!finish && makeProgress);
-
- // Misc wrap up to maintain the consistency of the Function representation.
- CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
-
- // Detach retired Block, release memory.
- for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
- iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
- if ((*iterMap).second && (*iterMap).second->isRetired) {
- assert(((*iterMap).first)->getNumber() != -1);
- if (DEBUGME) {
- errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
- }
- (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
- }
- delete (*iterMap).second;
- }
- blockInfoMap.clear();
-
- // clear loopLandInfoMap
- for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
- iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
- delete (*iterMap).second;
- }
- loopLandInfoMap.clear();
-
- if (DEBUGME) {
- func.viewCFG();
- //func.dump();
- }
-
- if (!finish) {
- assert(!"IRREDUCIBL_CF");
- }
-
- return true;
-} //CFGStructurizer::run
-
-/// Print the ordered Blocks.
-///
-template<class PassT>
-void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
- size_t i = 0;
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
- iterBlk != iterBlkEnd;
- ++iterBlk, ++i) {
- os << "BB" << (*iterBlk)->getNumber();
- os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
- if (i != 0 && i % 10 == 0) {
- os << "\n";
- } else {
- os << " ";
- }
- }
-} //printOrderedBlocks
-
-/// Compute the reversed DFS post order of Blocks
-///
-template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
- int sccNum = 0;
- BlockT *bb;
- for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
- sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
- std::vector<BlockT *> &sccNext = *sccIter;
- for (typename std::vector<BlockT *>::const_iterator
- blockIter = sccNext.begin(), blockEnd = sccNext.end();
- blockIter != blockEnd; ++blockIter) {
- bb = *blockIter;
- orderedBlks.push_back(bb);
- recordSccnum(bb, sccNum);
- }
- }
-
- //walk through all the block in func to check for unreachable
- for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
- blockEnd1 = FuncGTraits::nodes_end(funcRep);
- blockIter1 != blockEnd1; ++blockIter1) {
- BlockT *bb = &(*blockIter1);
- sccNum = getSCCNum(bb);
- if (sccNum == INVALIDSCCNUM) {
- errs() << "unreachable block BB" << bb->getNumber() << "\n";
- }
- } //end of for
-} //orderBlocks
-
-template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
- int numMatch = 0;
- int curMatch;
-
- if (DEBUGME) {
- errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
- }
-
- while ((curMatch = patternMatchGroup(curBlk)) > 0) {
- numMatch += curMatch;
- }
-
- if (DEBUGME) {
- errs() << "End patternMatch BB" << curBlk->getNumber()
- << ", numMatch = " << numMatch << "\n";
- }
-
- return numMatch;
-} //patternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
- int numMatch = 0;
- numMatch += serialPatternMatch(curBlk);
- numMatch += ifPatternMatch(curBlk);
- //numMatch += switchPatternMatch(curBlk);
- numMatch += loopendPatternMatch(curBlk);
- numMatch += loopPatternMatch(curBlk);
- return numMatch;
-}//patternMatchGroup
-
-template<class PassT>
-int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
- if (curBlk->succ_size() != 1) {
- return 0;
- }
-
- BlockT *childBlk = *curBlk->succ_begin();
- if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
- return 0;
- }
-
- mergeSerialBlock(curBlk, childBlk);
- ++numSerialPatternMatch;
- return 1;
-} //serialPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
- //two edges
- if (curBlk->succ_size() != 2) {
- return 0;
- }
-
- if (hasBackEdge(curBlk)) {
- return 0;
- }
-
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
- if (branchInstr == NULL) {
- return 0;
- }
-
- assert(CFGTraits::isCondBranch(branchInstr));
-
- BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
- BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
- BlockT *landBlk;
- int cloned = 0;
-
- // TODO: Simplify
- if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
- && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
- landBlk = *trueBlk->succ_begin();
- } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
- landBlk = NULL;
- } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
- landBlk = falseBlk;
- falseBlk = NULL;
- } else if (falseBlk->succ_size() == 1
- && *falseBlk->succ_begin() == trueBlk) {
- landBlk = trueBlk;
- trueBlk = NULL;
- } else if (falseBlk->succ_size() == 1
- && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
- landBlk = *falseBlk->succ_begin();
- } else if (trueBlk->succ_size() == 1
- && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
- landBlk = *trueBlk->succ_begin();
- } else {
- return handleJumpintoIf(curBlk, trueBlk, falseBlk);
- }
-
- // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
- // new BB created for landBlk==NULL may introduce new challenge to the
- // reduction process.
- if (landBlk != NULL &&
- ((trueBlk && trueBlk->pred_size() > 1)
- || (falseBlk && falseBlk->pred_size() > 1))) {
- cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
- }
-
- if (trueBlk && trueBlk->pred_size() > 1) {
- trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
- ++cloned;
- }
-
- if (falseBlk && falseBlk->pred_size() > 1) {
- falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
- ++cloned;
- }
-
- mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
-
- ++numIfPatternMatch;
-
- numClonedBlock += cloned;
-
- return 1 + cloned;
-} //ifPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
- return 0;
-} //switchPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- typename std::vector<LoopT *> nestedLoops;
- while (loopRep) {
- nestedLoops.push_back(loopRep);
- loopRep = loopRep->getParentLoop();
- }
-
- if (nestedLoops.size() == 0) {
- return 0;
- }
-
- // Process nested loop outside->inside, so "continue" to a outside loop won't
- // be mistaken as "break" of the current loop.
- int num = 0;
- for (typename std::vector<LoopT *>::reverse_iterator
- iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
- iter != iterEnd; ++iter) {
- loopRep = *iter;
-
- if (getLoopLandBlock(loopRep) != NULL) {
- continue;
- }
-
- BlockT *loopHeader = loopRep->getHeader();
-
- int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
-
- if (numBreak == -1) {
- break;
- }
-
- int numCont = loopcontPatternMatch(loopRep, loopHeader);
- num += numBreak + numCont;
- }
-
- return num;
-} //loopendPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
- if (curBlk->succ_size() != 0) {
- return 0;
- }
-
- int numLoop = 0;
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- while (loopRep && loopRep->getHeader() == curBlk) {
- LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
- if (loopLand) {
- BlockT *landBlk = loopLand->landBlk;
- assert(landBlk);
- if (!isRetiredBlock(landBlk)) {
- mergeLooplandBlock(curBlk, loopLand);
- ++numLoop;
- }
- }
- loopRep = loopRep->getParentLoop();
- }
-
- numLoopPatternMatch += numLoop;
-
- return numLoop;
-} //loopPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
- BlockT *loopHeader) {
- BlockTSmallerVector exitingBlks;
- loopRep->getExitingBlocks(exitingBlks);
-
- if (DEBUGME) {
- errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
- }
-
- if (exitingBlks.size() == 0) {
- setLoopLandBlock(loopRep);
- return 0;
- }
-
- // Compute the corresponding exitBlks and exit block set.
- BlockTSmallerVector exitBlks;
- std::set<BlockT *> exitBlkSet;
- for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
- iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
- BlockT *exitingBlk = *iter;
- BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
- exitBlks.push_back(exitBlk);
- exitBlkSet.insert(exitBlk); //non-duplicate insert
- }
-
- assert(exitBlkSet.size() > 0);
- assert(exitBlks.size() == exitingBlks.size());
-
- if (DEBUGME) {
- errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
- }
-
- // Find exitLandBlk.
- BlockT *exitLandBlk = NULL;
- int numCloned = 0;
- int numSerial = 0;
-
- if (exitBlkSet.size() == 1)
- {
- exitLandBlk = *exitBlkSet.begin();
- } else {
- exitLandBlk = findNearestCommonPostDom(exitBlkSet);
-
- if (exitLandBlk == NULL) {
- return -1;
- }
-
- bool allInPath = true;
- bool allNotInPath = true;
- for (typename std::set<BlockT*>::const_iterator
- iter = exitBlkSet.begin(),
- iterEnd = exitBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *exitBlk = *iter;
-
- PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
- if (DEBUGME) {
- errs() << "BB" << exitBlk->getNumber()
- << " to BB" << exitLandBlk->getNumber() << " PathToKind="
- << pathKind << "\n";
- }
-
- allInPath = allInPath && (pathKind == SinglePath_InPath);
- allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
-
- if (!allInPath && !allNotInPath) {
- if (DEBUGME) {
- errs() << "singlePath check fail\n";
- }
- return -1;
- }
- } // check all exit blocks
-
- if (allNotInPath) {
-#if 1
-
- // TODO: Simplify, maybe separate function?
- //funcRep->viewCFG();
- LoopT *parentLoopRep = loopRep->getParentLoop();
- BlockT *parentLoopHeader = NULL;
- if (parentLoopRep)
- parentLoopHeader = parentLoopRep->getHeader();
-
- if (exitLandBlk == parentLoopHeader &&
- (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
- loopRep,
- exitBlkSet,
- exitLandBlk)) != NULL) {
- if (DEBUGME) {
- errs() << "relocateLoopcontBlock success\n";
- }
- } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
- exitingBlks,
- exitBlks)) != NULL) {
- if (DEBUGME) {
- errs() << "insertEndbranchBlock success\n";
- }
- } else {
- if (DEBUGME) {
- errs() << "loop exit fail\n";
- }
- return -1;
- }
-#else
- return -1;
-#endif
- }
-
- // Handle side entry to exit path.
- exitBlks.clear();
- exitBlkSet.clear();
- for (typename BlockTSmallerVector::iterator iterExiting =
- exitingBlks.begin(),
- iterExitingEnd = exitingBlks.end();
- iterExiting != iterExitingEnd; ++iterExiting) {
- BlockT *exitingBlk = *iterExiting;
- BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
- BlockT *newExitBlk = exitBlk;
-
- if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
- newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
- ++numCloned;
- }
-
- numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
-
- exitBlks.push_back(newExitBlk);
- exitBlkSet.insert(newExitBlk);
- }
-
- for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
- iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit) {
- BlockT *exitBlk = *iterExit;
- numSerial += serialPatternMatch(exitBlk);
- }
-
- for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
- iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit) {
- BlockT *exitBlk = *iterExit;
- if (exitBlk->pred_size() > 1) {
- if (exitBlk != exitLandBlk) {
- return -1;
- }
- } else {
- if (exitBlk != exitLandBlk &&
- (exitBlk->succ_size() != 1 ||
- *exitBlk->succ_begin() != exitLandBlk)) {
- return -1;
- }
- }
- }
- } // else
-
- // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
- exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
-
- // Fold break into the breaking block. Leverage across level breaks.
- assert(exitingBlks.size() == exitBlks.size());
- for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
- iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
- BlockT *exitBlk = *iterExit;
- BlockT *exitingBlk = *iterExiting;
- assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
- LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
- handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
- }
-
- int numBreak = static_cast<int>(exitingBlks.size());
- numLoopbreakPatternMatch += numBreak;
- numClonedBlock += numCloned;
- return numBreak + numSerial + numCloned;
-} //loopbreakPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
- BlockT *loopHeader) {
- int numCont = 0;
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
- for (typename InvBlockGTraits::ChildIteratorType iter =
- InvBlockGTraits::child_begin(loopHeader),
- iterEnd = InvBlockGTraits::child_end(loopHeader);
- iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- if (loopRep->contains(curBlk)) {
- handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
- loopHeader, loopRep);
- contBlk.push_back(curBlk);
- ++numCont;
- }
- }
-
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
- iter = contBlk.begin(), iterEnd = contBlk.end();
- iter != iterEnd; ++iter) {
- (*iter)->removeSuccessor(loopHeader);
- }
-
- numLoopcontPatternMatch += numCont;
-
- return numCont;
-} //loopcontPatternMatch
-
-
-template<class PassT>
-bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
- BlockT *src2Blk) {
- // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
- // same loop with LoopLandInfo without explicitly keeping track of
- // loopContBlks and loopBreakBlks, this is a method to get the information.
- //
- if (src1Blk->succ_size() == 0) {
- LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
- if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
- if (theEntry != NULL) {
- if (DEBUGME) {
- errs() << "isLoopContBreakBlock yes src1 = BB"
- << src1Blk->getNumber()
- << " src2 = BB" << src2Blk->getNumber() << "\n";
- }
- return true;
- }
- }
- }
- return false;
-} //isSameloopDetachedContbreak
-
-template<class PassT>
-int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk) {
- int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
- if (num == 0) {
- if (DEBUGME) {
- errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
- }
- num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
- }
- return num;
-}
-
-template<class PassT>
-int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk) {
- int num = 0;
- BlockT *downBlk;
-
- //trueBlk could be the common post dominator
- downBlk = trueBlk;
-
- if (DEBUGME) {
- errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
- << " true = BB" << trueBlk->getNumber()
- << ", numSucc=" << trueBlk->succ_size()
- << " false = BB" << falseBlk->getNumber() << "\n";
- }
-
- while (downBlk) {
- if (DEBUGME) {
- errs() << "check down = BB" << downBlk->getNumber();
- }
-
- if (//postDomTree->dominates(downBlk, falseBlk) &&
- singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
- if (DEBUGME) {
- errs() << " working\n";
- }
-
- num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
- num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
-
- numClonedBlock += num;
- num += serialPatternMatch(*headBlk->succ_begin());
- num += serialPatternMatch(*(++headBlk->succ_begin()));
- num += ifPatternMatch(headBlk);
- assert(num > 0); //
-
- break;
- }
- if (DEBUGME) {
- errs() << " not working\n";
- }
- downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
- } // walk down the postDomTree
-
- return num;
-} //handleJumpintoIf
-
-template<class PassT>
-void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk,
- bool detail) {
- errs() << "head = BB" << headBlk->getNumber()
- << " size = " << headBlk->size();
- if (detail) {
- errs() << "\n";
- headBlk->print(errs());
- errs() << "\n";
- }
-
- if (trueBlk) {
- errs() << ", true = BB" << trueBlk->getNumber() << " size = "
- << trueBlk->size() << " numPred = " << trueBlk->pred_size();
- if (detail) {
- errs() << "\n";
- trueBlk->print(errs());
- errs() << "\n";
- }
- }
- if (falseBlk) {
- errs() << ", false = BB" << falseBlk->getNumber() << " size = "
- << falseBlk->size() << " numPred = " << falseBlk->pred_size();
- if (detail) {
- errs() << "\n";
- falseBlk->print(errs());
- errs() << "\n";
- }
- }
- if (landBlk) {
- errs() << ", land = BB" << landBlk->getNumber() << " size = "
- << landBlk->size() << " numPred = " << landBlk->pred_size();
- if (detail) {
- errs() << "\n";
- landBlk->print(errs());
- errs() << "\n";
- }
- }
-
- errs() << "\n";
-} //showImproveSimpleJumpintoIf
-
-template<class PassT>
-int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT **plandBlk) {
- bool migrateTrue = false;
- bool migrateFalse = false;
-
- BlockT *landBlk = *plandBlk;
-
- assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
- && (falseBlk == NULL || falseBlk->succ_size() <= 1));
-
- if (trueBlk == falseBlk) {
- return 0;
- }
-
-#if 0
- if (DEBUGME) {
- errs() << "improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- }
-#endif
-
- // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
- // May consider the # landBlk->pred_size() as it represents the number of
- // assignment initReg = .. needed to insert.
- migrateTrue = needMigrateBlock(trueBlk);
- migrateFalse = needMigrateBlock(falseBlk);
-
- if (!migrateTrue && !migrateFalse) {
- return 0;
- }
-
- // If we need to migrate either trueBlk and falseBlk, migrate the rest that
- // have more than one predecessors. without doing this, its predecessor
- // rather than headBlk will have undefined value in initReg.
- if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
- migrateTrue = true;
- }
- if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
- migrateFalse = true;
- }
-
- if (DEBUGME) {
- errs() << "before improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
- }
-
- // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
- //
- // new: headBlk => if () {initReg = 1; org trueBlk branch} else
- // {initReg = 0; org falseBlk branch }
- // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
- // => org landBlk
- // if landBlk->pred_size() > 2, put the about if-else inside
- // if (initReg !=2) {...}
- //
- // add initReg = initVal to headBlk
-
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- unsigned initReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- if (!migrateTrue || !migrateFalse) {
- int initVal = migrateTrue ? 0 : 1;
- CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
- }
-
- int numNewBlk = 0;
-
- if (landBlk == NULL) {
- landBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(landBlk); //insert to function
-
- if (trueBlk) {
- trueBlk->addSuccessor(landBlk);
- } else {
- headBlk->addSuccessor(landBlk);
- }
-
- if (falseBlk) {
- falseBlk->addSuccessor(landBlk);
- } else {
- headBlk->addSuccessor(landBlk);
- }
-
- numNewBlk ++;
- }
-
- bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
-
- //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
- typename BlockT::iterator insertPos =
- CFGTraits::getInstrPos
- (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
-
- if (landBlkHasOtherPred) {
- unsigned immReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
- unsigned cmpResReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
-
- CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
- initReg, immReg);
- CFGTraits::insertCondBranchBefore(landBlk, insertPos,
- AMDGPU::IF_LOGICALZ_i32, passRep,
- cmpResReg, DebugLoc());
- }
-
- CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
- passRep, initReg, DebugLoc());
-
- if (migrateTrue) {
- migrateInstruction(trueBlk, landBlk, insertPos);
- // need to uncondionally insert the assignment to ensure a path from its
- // predecessor rather than headBlk has valid value in initReg if
- // (initVal != 1).
- CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
- }
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
-
- if (migrateFalse) {
- migrateInstruction(falseBlk, landBlk, insertPos);
- // need to uncondionally insert the assignment to ensure a path from its
- // predecessor rather than headBlk has valid value in initReg if
- // (initVal != 0)
- CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
- }
- //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
-
- if (landBlkHasOtherPred) {
- // add endif
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
-
- // put initReg = 2 to other predecessors of landBlk
- for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
- predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
- ++predIter) {
- BlockT *curBlk = *predIter;
- if (curBlk != trueBlk && curBlk != falseBlk) {
- CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
- }
- } //for
- }
- if (DEBUGME) {
- errs() << "result from improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
- }
-
- // update landBlk
- *plandBlk = landBlk;
-
- return numNewBlk;
-} //improveSimpleJumpintoIf
-
-template<class PassT>
-void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
- LoopT *exitingLoop,
- BlockT *exitBlk,
- LoopT *exitLoop,
- BlockT *landBlk) {
- if (DEBUGME) {
- errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
- << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
- }
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-
- RegiT initReg = INVALIDREGNUM;
- if (exitingLoop != exitLoop) {
- initReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(initReg != INVALIDREGNUM);
- addLoopBreakInitReg(exitLoop, initReg);
- while (exitingLoop != exitLoop && exitingLoop) {
- addLoopBreakOnReg(exitingLoop, initReg);
- exitingLoop = exitingLoop->getParentLoop();
- }
- assert(exitingLoop == exitLoop);
- }
-
- mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
-
-} //handleLoopbreak
-
-template<class PassT>
-void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
- LoopT *contingLoop,
- BlockT *contBlk,
- LoopT *contLoop) {
- if (DEBUGME) {
- errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
- << " header = BB" << contBlk->getNumber() << "\n";
-
- errs() << "Trying to continue loop-depth = "
- << getLoopDepth(contLoop)
- << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
- }
-
- RegiT initReg = INVALIDREGNUM;
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- if (contingLoop != contLoop) {
- initReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(initReg != INVALIDREGNUM);
- addLoopContInitReg(contLoop, initReg);
- while (contingLoop && contingLoop->getParentLoop() != contLoop) {
- addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
- contingLoop = contingLoop->getParentLoop();
- }
- assert(contingLoop && contingLoop->getParentLoop() == contLoop);
- addLoopContOnReg(contingLoop, initReg);
- }
-
- settleLoopcontBlock(contingBlk, contBlk, initReg);
- //contingBlk->removeSuccessor(loopHeader);
-} //handleLoopcontBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
- if (DEBUGME) {
- errs() << "serialPattern BB" << dstBlk->getNumber()
- << " <= BB" << srcBlk->getNumber() << "\n";
- }
- //removeUnconditionalBranch(dstBlk);
- dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
-
- dstBlk->removeSuccessor(srcBlk);
- CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
-
- removeSuccessor(srcBlk);
- retireBlock(dstBlk, srcBlk);
-} //mergeSerialBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
- BlockT *curBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk) {
- if (DEBUGME) {
- errs() << "ifPattern BB" << curBlk->getNumber();
- errs() << "{ ";
- if (trueBlk) {
- errs() << "BB" << trueBlk->getNumber();
- }
- errs() << " } else ";
- errs() << "{ ";
- if (falseBlk) {
- errs() << "BB" << falseBlk->getNumber();
- }
- errs() << " }\n ";
- errs() << "landBlock: ";
- if (landBlk == NULL) {
- errs() << "NULL";
- } else {
- errs() << "BB" << landBlk->getNumber();
- }
- errs() << "\n";
- }
-
- int oldOpcode = branchInstr->getOpcode();
- DebugLoc branchDL = branchInstr->getDebugLoc();
-
-// transform to
-// if cond
-// trueBlk
-// else
-// falseBlk
-// endif
-// landBlk
-
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(curBlk, branchInstr);
- CFGTraits::insertCondBranchBefore(branchInstrPos,
- CFGTraits::getBranchNzeroOpcode(oldOpcode),
- passRep,
- branchDL);
-
- if (trueBlk) {
- curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
- curBlk->removeSuccessor(trueBlk);
- if (landBlk && trueBlk->succ_size()!=0) {
- trueBlk->removeSuccessor(landBlk);
- }
- retireBlock(curBlk, trueBlk);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
-
- if (falseBlk) {
- curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
- falseBlk->end());
- curBlk->removeSuccessor(falseBlk);
- if (landBlk && falseBlk->succ_size() != 0) {
- falseBlk->removeSuccessor(landBlk);
- }
- retireBlock(curBlk, falseBlk);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
-
- //curBlk->remove(branchInstrPos);
- branchInstr->eraseFromParent();
-
- if (landBlk && trueBlk && falseBlk) {
- curBlk->addSuccessor(landBlk);
- }
-
-} //mergeIfthenelseBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
- LoopLandInfo *loopLand) {
- BlockT *landBlk = loopLand->landBlk;
-
- if (DEBUGME) {
- errs() << "loopPattern header = BB" << dstBlk->getNumber()
- << " land = BB" << landBlk->getNumber() << "\n";
- }
-
- // Loop contInitRegs are init at the beginning of the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->contInitRegs.begin(),
- iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
-
- /* we last inserterd the DebugLoc in the
- * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
- * search for the DebugLoc in the that statement.
- * if not found, we have to insert the empty/default DebugLoc */
- InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
- DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
-
- CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
- // Loop breakInitRegs are init before entering the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->breakInitRegs.begin(),
- iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
- {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
- // Loop endbranchInitRegs are init before entering the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->endbranchInitRegs.begin(),
- iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
-
- /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
- * search for the DebugLoc in the continue statement.
- * if not found, we have to insert the empty/default DebugLoc */
- InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
- DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
-
- CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
- // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
- // loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->breakOnRegs.begin(),
- iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
- *iter);
- }
-
- // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
- // loop.
- for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
- iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
- passRep, *iter);
- }
-
- dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
-
- for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
- iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
- dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
- }
-
- removeSuccessor(landBlk);
- retireBlock(dstBlk, landBlk);
-} //mergeLooplandBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
-{
- while (I--) {
- if (I->getOpcode() == AMDGPU::PRED_X) {
- switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
- case OPCODE_IS_ZERO_INT:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
- return;
- case OPCODE_IS_NOT_ZERO_INT:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
- return;
- case OPCODE_IS_ZERO:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
- return;
- case OPCODE_IS_NOT_ZERO:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
- return;
- default:
- assert(0 && "PRED_X Opcode invalid!");
- }
- }
- }
-}
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
- BlockT *exitBlk,
- BlockT *exitLandBlk,
- RegiT setReg) {
- if (DEBUGME) {
- errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
- << " exit = BB" << exitBlk->getNumber()
- << " land = BB" << exitLandBlk->getNumber() << "\n";
- }
-
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
- assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
-
- DebugLoc DL = branchInstr->getDebugLoc();
-
- BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
- int oldOpcode = branchInstr->getOpcode();
-
- // transform exitingBlk to
- // if ( ) {
- // exitBlk (if exitBlk != exitLandBlk)
- // setReg = 1
- // break
- // }endif
- // successor = {orgSuccessor(exitingBlk) - exitBlk}
-
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(exitingBlk, branchInstr);
-
- if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
- //break_logical
-
- if (trueBranch != exitBlk) {
- reversePredicateSetter(branchInstrPos);
- }
- int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
- CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
- } else {
- if (trueBranch != exitBlk) {
- reversePredicateSetter(branchInstr);
- }
- int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
- CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
- if (exitBlk != exitLandBlk) {
- //splice is insert-before ...
- exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
- exitBlk->end());
- }
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
- } //if_logical
-
- //now branchInst can be erase safely
- //exitingBlk->eraseFromParent(branchInstr);
- branchInstr->eraseFromParent();
-
- //now take care of successors, retire blocks
- exitingBlk->removeSuccessor(exitBlk);
- if (exitBlk != exitLandBlk) {
- //splice is insert-before ...
- exitBlk->removeSuccessor(exitLandBlk);
- retireBlock(exitingBlk, exitBlk);
- }
-
-} //mergeLoopbreakBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
- BlockT *contBlk,
- RegiT setReg) {
- if (DEBUGME) {
- errs() << "settleLoopcontBlock conting = BB"
- << contingBlk->getNumber()
- << ", cont = BB" << contBlk->getNumber() << "\n";
- }
-
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
- if (branchInstr) {
- assert(CFGTraits::isCondBranch(branchInstr));
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(contingBlk, branchInstr);
- BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
- int oldOpcode = branchInstr->getOpcode();
- DebugLoc DL = branchInstr->getDebugLoc();
-
- // transform contingBlk to
- // if () {
- // move instr after branchInstr
- // continue
- // or
- // setReg = 1
- // break
- // }endif
- // successor = {orgSuccessor(contingBlk) - loopHeader}
-
- bool useContinueLogical =
- (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
-
- if (useContinueLogical == false)
- {
- int branchOpcode =
- trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
- : CFGTraits::getBranchZeroOpcode(oldOpcode);
-
- CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
-
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
- } else {
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
- }
-
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
- } else {
- int branchOpcode =
- trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
- : CFGTraits::getContinueZeroOpcode(oldOpcode);
-
- CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
- }
-
- //contingBlk->eraseFromParent(branchInstr);
- branchInstr->eraseFromParent();
- } else {
- /* if we've arrived here then we've already erased the branch instruction
- * travel back up the basic block to see the last reference of our debug location
- * we've just inserted that reference here so it should be representative */
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
- } else {
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
- }
- } //else
-
-} //settleLoopcontBlock
-
-// BBs in exitBlkSet are determined as in break-path for loopRep,
-// before we can put code for BBs as inside loop-body for loopRep
-// check whether those BBs are determined as cont-BB for parentLoopRep
-// earlier.
-// If so, generate a new BB newBlk
-// (1) set newBlk common successor of BBs in exitBlkSet
-// (2) change the continue-instr in BBs in exitBlkSet to break-instr
-// (3) generate continue-instr in newBlk
-//
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
- LoopT *loopRep,
- std::set<BlockT *> &exitBlkSet,
- BlockT *exitLandBlk) {
- std::set<BlockT *> endBlkSet;
-
-// BlockT *parentLoopHead = parentLoopRep->getHeader();
-
-
- for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
- iterEnd = exitBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *exitBlk = *iter;
- BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
-
- if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
- return NULL;
-
- endBlkSet.insert(endBlk);
- }
-
- BlockT *newBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(newBlk); //insert to function
- CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
- SHOWNEWBLK(newBlk, "New continue block: ");
-
- for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
- iterEnd = endBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *endBlk = *iter;
- InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
- if (contInstr) {
- contInstr->eraseFromParent();
- }
- endBlk->addSuccessor(newBlk);
- if (DEBUGME) {
- errs() << "Add new continue Block to BB"
- << endBlk->getNumber() << " successors\n";
- }
- }
-
- return newBlk;
-} //relocateLoopcontBlock
-
-
-// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
-// LoopLandBlock. This BB branch on the loop endBranchInit register to the
-// pathes corresponding to the loop exiting branches.
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
- BlockTSmallerVector &exitingBlks,
- BlockTSmallerVector &exitBlks) {
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-
- RegiT endBranchReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(endBranchReg >= 0);
-
- // reg = 0 before entering the loop
- addLoopEndbranchInitReg(loopRep, endBranchReg);
-
- uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
- assert(numBlks >=2 && numBlks == exitBlks.size());
-
- BlockT *preExitingBlk = exitingBlks[0];
- BlockT *preExitBlk = exitBlks[0];
- BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(preBranchBlk); //insert to function
- SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
-
- BlockT *newLandBlk = preBranchBlk;
-
- CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
- newLandBlk);
- preExitingBlk->removeSuccessor(preExitBlk);
- preExitingBlk->addSuccessor(newLandBlk);
-
- //it is redundant to add reg = 0 to exitingBlks[0]
-
- // For 1..n th exiting path (the last iteration handles two pathes) create the
- // branch to the previous path and the current path.
- for (uint32_t i = 1; i < numBlks; ++i) {
- BlockT *curExitingBlk = exitingBlks[i];
- BlockT *curExitBlk = exitBlks[i];
- BlockT *curBranchBlk;
-
- if (i == numBlks - 1) {
- curBranchBlk = curExitBlk;
- } else {
- curBranchBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(curBranchBlk); //insert to function
- SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
- }
-
- // Add reg = i to exitingBlks[i].
- CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
- endBranchReg, i);
-
- // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
- // (exitingBlks[i], newLandBlk).
- CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
- newLandBlk);
- curExitingBlk->removeSuccessor(curExitBlk);
- curExitingBlk->addSuccessor(newLandBlk);
-
- // add to preBranchBlk the branch instruction:
- // if (endBranchReg == preVal)
- // preExitBlk
- // else
- // curBranchBlk
- //
- // preValReg = i - 1
-
- DebugLoc DL;
- RegiT preValReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
-
- preBranchBlk->insert(preBranchBlk->begin(),
- tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
- i - 1));
-
- // condResReg = (endBranchReg == preValReg)
- RegiT condResReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
- .addReg(endBranchReg).addReg(preValReg);
-
- BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
- .addMBB(preExitBlk).addReg(condResReg);
-
- preBranchBlk->addSuccessor(preExitBlk);
- preBranchBlk->addSuccessor(curBranchBlk);
-
- // Update preExitingBlk, preExitBlk, preBranchBlk.
- preExitingBlk = curExitingBlk;
- preExitBlk = curExitBlk;
- preBranchBlk = curBranchBlk;
-
- } //end for 1 .. n blocks
-
- return newLandBlk;
-} //addLoopEndbranchBlock
-
-template<class PassT>
-typename CFGStructurizer<PassT>::PathToKind
-CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry) {
- assert(dstBlk);
-
- if (srcBlk == dstBlk) {
- return SinglePath_InPath;
- }
-
- while (srcBlk && srcBlk->succ_size() == 1) {
- srcBlk = *srcBlk->succ_begin();
- if (srcBlk == dstBlk) {
- return SinglePath_InPath;
- }
-
- if (!allowSideEntry && srcBlk->pred_size() > 1) {
- return Not_SinglePath;
- }
- }
-
- if (srcBlk && srcBlk->succ_size()==0) {
- return SinglePath_NotInPath;
- }
-
- return Not_SinglePath;
-} //singlePathTo
-
-// If there is a single path from srcBlk to dstBlk, return the last block before
-// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
-// last block in the path Otherwise, return NULL
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry) {
- assert(dstBlk);
-
- if (srcBlk == dstBlk) {
- return srcBlk;
- }
-
- if (srcBlk->succ_size() == 0) {
- return srcBlk;
- }
-
- while (srcBlk && srcBlk->succ_size() == 1) {
- BlockT *preBlk = srcBlk;
-
- srcBlk = *srcBlk->succ_begin();
- if (srcBlk == NULL) {
- return preBlk;
- }
-
- if (!allowSideEntry && srcBlk->pred_size() > 1) {
- return NULL;
- }
- }
-
- if (srcBlk && srcBlk->succ_size()==0) {
- return srcBlk;
- }
-
- return NULL;
-
-} //singlePathEnd
-
-template<class PassT>
-int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
- BlockT *dstBlk) {
- int cloned = 0;
- assert(preBlk->isSuccessor(srcBlk));
- while (srcBlk && srcBlk != dstBlk) {
- assert(srcBlk->succ_size() == 1);
- if (srcBlk->pred_size() > 1) {
- srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
- ++cloned;
- }
-
- preBlk = srcBlk;
- srcBlk = *srcBlk->succ_begin();
- }
-
- return cloned;
-} //cloneOnSideEntryTo
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
- BlockT *predBlk) {
- assert(predBlk->isSuccessor(curBlk) &&
- "succBlk is not a prececessor of curBlk");
-
- BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
- CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
- //srcBlk, oldBlk, newBlk
-
- predBlk->removeSuccessor(curBlk);
- predBlk->addSuccessor(cloneBlk);
-
- // add all successor to cloneBlk
- CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
-
- numClonedInstr += curBlk->size();
-
- if (DEBUGME) {
- errs() << "Cloned block: " << "BB"
- << curBlk->getNumber() << "size " << curBlk->size() << "\n";
- }
-
- SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
-
- return cloneBlk;
-} //cloneBlockForPredecessor
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
- BlockT *exitingBlk) {
- BlockT *exitBlk = NULL;
-
- for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
- iterSuccEnd = exitingBlk->succ_end();
- iterSucc != iterSuccEnd; ++iterSucc) {
- BlockT *curBlk = *iterSucc;
- if (!loopRep->contains(curBlk)) {
- assert(exitBlk == NULL);
- exitBlk = curBlk;
- }
- }
-
- assert(exitBlk != NULL);
-
- return exitBlk;
-} //exitingBlock2ExitBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
- BlockT *dstBlk,
- InstrIterator insertPos) {
- InstrIterator spliceEnd;
- //look for the input branchinstr, not the AMDGPU branchinstr
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
- if (branchInstr == NULL) {
- if (DEBUGME) {
- errs() << "migrateInstruction don't see branch instr\n" ;
- }
- spliceEnd = srcBlk->end();
- } else {
- if (DEBUGME) {
- errs() << "migrateInstruction see branch instr\n" ;
- branchInstr->dump();
- }
- spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
- }
- if (DEBUGME) {
- errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
- << "srcSize = " << srcBlk->size() << "\n";
- }
-
- //splice insert before insertPos
- dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
-
- if (DEBUGME) {
- errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
- << "srcSize = " << srcBlk->size() << "\n";
- }
-} //migrateInstruction
-
-// normalizeInfiniteLoopExit change
-// B1:
-// uncond_br LoopHeader
-//
-// to
-// B1:
-// cond_br 1 LoopHeader dummyExit
-// and return the newly added dummy exit block
-//
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
- BlockT *loopHeader;
- BlockT *loopLatch;
- loopHeader = LoopRep->getHeader();
- loopLatch = LoopRep->getLoopLatch();
- BlockT *dummyExitBlk = NULL;
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- if (loopHeader!=NULL && loopLatch!=NULL) {
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
- if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
- dummyExitBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(dummyExitBlk); //insert to function
- SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
-
- if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
-
- typename BlockT::iterator insertPos =
- CFGTraits::getInstrPos(loopLatch, branchInstr);
- unsigned immReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
- InstrT *newInstr =
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
- MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
-
- SHOWNEWINSTR(newInstr);
-
- branchInstr->eraseFromParent();
- loopLatch->addSuccessor(dummyExitBlk);
- }
- }
-
- return dummyExitBlk;
-} //normalizeInfiniteLoopExit
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
- InstrT *branchInstr;
-
- // I saw two unconditional branch in one basic block in example
- // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
- while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
- && CFGTraits::isUncondBranch(branchInstr)) {
- if (DEBUGME) {
- errs() << "Removing unconditional branch instruction" ;
- branchInstr->dump();
- }
- branchInstr->eraseFromParent();
- }
-} //removeUnconditionalBranch
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
- if (srcBlk->succ_size() == 2) {
- BlockT *blk1 = *srcBlk->succ_begin();
- BlockT *blk2 = *(++srcBlk->succ_begin());
-
- if (blk1 == blk2) {
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
- assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
- if (DEBUGME) {
- errs() << "Removing unneeded conditional branch instruction" ;
- branchInstr->dump();
- }
- branchInstr->eraseFromParent();
- SHOWNEWBLK(blk1, "Removing redundant successor");
- srcBlk->removeSuccessor(blk1);
- }
- }
-} //removeRedundantConditionalBranch
-
-template<class PassT>
-void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
- DEFAULT_VEC_SLOTS> &retBlks) {
- BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(dummyExitBlk); //insert to function
- CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
-
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
- retBlks.begin(),
- iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
- if (curInstr) {
- curInstr->eraseFromParent();
- }
-#if 0
- if (curBlk->size()==0 && curBlk->pred_size() == 1) {
- if (DEBUGME) {
- errs() << "Replace empty block BB" << curBlk->getNumber()
- << " with dummyExitBlock\n";
- }
- BlockT *predb = *curBlk->pred_begin();
- predb->removeSuccessor(curBlk);
- curBlk = predb;
- } //handle empty curBlk
-#endif
- curBlk->addSuccessor(dummyExitBlk);
- if (DEBUGME) {
- errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
- << " successors\n";
- }
- } //for
-
- SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
-} //addDummyExitBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
- while (srcBlk->succ_size()) {
- srcBlk->removeSuccessor(*srcBlk->succ_begin());
- }
-}
-
-template<class PassT>
-void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
- BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
-
- if (srcBlkInfo == NULL) {
- srcBlkInfo = new BlockInfo();
- }
-
- srcBlkInfo->sccNum = sccNum;
-}
-
-template<class PassT>
-int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
- BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
- return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
-}
-
-template<class PassT>
-void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
- if (DEBUGME) {
- errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
- }
-
- BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
-
- if (srcBlkInfo == NULL) {
- srcBlkInfo = new BlockInfo();
- }
-
- srcBlkInfo->isRetired = true;
- //int i = srcBlk->succ_size();
- //int j = srcBlk->pred_size();
- assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
- && "can't retire block yet");
-}
-
-template<class PassT>
-bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
- BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
- return (srcBlkInfo && srcBlkInfo->isRetired);
-}
-
-template<class PassT>
-bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- while (loopRep && loopRep->getHeader() == curBlk) {
- LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
-
- if(loopLand == NULL)
- return true;
-
- BlockT *landBlk = loopLand->landBlk;
- assert(landBlk);
- if (!isRetiredBlock(landBlk)) {
- return true;
- }
-
- loopRep = loopRep->getParentLoop();
- }
-
- return false;
-} //isActiveLoophead
-
-template<class PassT>
-bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
- const unsigned blockSizeThreshold = 30;
- const unsigned cloneInstrThreshold = 100;
-
- bool multiplePreds = blk && (blk->pred_size() > 1);
-
- if(!multiplePreds)
- return false;
-
- unsigned blkSize = blk->size();
- return ((blkSize > blockSizeThreshold)
- && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
-} //needMigrateBlock
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
- BlockTSmallerVector &exitBlks,
- std::set<BlockT *> &exitBlkSet) {
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
-
- for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
- predIterEnd = landBlk->pred_end();
- predIter != predIterEnd; ++predIter) {
- BlockT *curBlk = *predIter;
- if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
- inpathBlks.push_back(curBlk);
- }
- } //for
-
- //if landBlk has predecessors that are not in the given loop,
- //create a new block
- BlockT *newLandBlk = landBlk;
- if (inpathBlks.size() != landBlk->pred_size()) {
- newLandBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(newLandBlk); //insert to function
- newLandBlk->addSuccessor(landBlk);
- for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
- inpathBlks.begin(),
- iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
- //srcBlk, oldBlk, newBlk
- curBlk->removeSuccessor(landBlk);
- curBlk->addSuccessor(newLandBlk);
- }
- for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
- if (exitBlks[i] == landBlk) {
- exitBlks[i] = newLandBlk;
- }
- }
- SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
- }
-
- setLoopLandBlock(loopRep, newLandBlk);
-
- return newLandBlk;
-} // recordLoopbreakLand
-
-template<class PassT>
-void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- assert(theEntry->landBlk == NULL);
-
- if (blk == NULL) {
- blk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(blk); //insert to function
- SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
- }
-
- theEntry->landBlk = blk;
-
- if (DEBUGME) {
- errs() << "setLoopLandBlock loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " landing-block = BB" << blk->getNumber() << "\n";
- }
-} // setLoopLandBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
-
- theEntry->breakOnRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopBreakOnReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopBreakOnReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->contOnRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopContOnReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopContOnReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->breakInitRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopBreakInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopBreakInitReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->contInitRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopContInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopContInitReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
- RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->endbranchInitRegs.insert(regNum);
-
- if (DEBUGME)
- {
- errs() << "addLoopEndbranchInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopEndbranchInitReg
-
-template<class PassT>
-typename CFGStructurizer<PassT>::LoopLandInfo *
-CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- return theEntry;
-} // getLoopLandInfo
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- return theEntry ? theEntry->landBlk : NULL;
-} // getLoopLandBlock
-
-
-template<class PassT>
-bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- if (loopRep == NULL)
- return false;
-
- BlockT *loopHeader = loopRep->getHeader();
-
- return curBlk->isSuccessor(loopHeader);
-
-} //hasBackEdge
-
-template<class PassT>
-unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
- return loopRep ? loopRep->getLoopDepth() : 0;
-} //getLoopDepth
-
-template<class PassT>
-int CFGStructurizer<PassT>::countActiveBlock
-(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
- typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
- int count = 0;
- while (iterStart != iterEnd) {
- if (!isRetiredBlock(*iterStart)) {
- ++count;
- }
- ++iterStart;
- }
-
- return count;
-} //countActiveBlock
-
-// This is work around solution for findNearestCommonDominator not avaiable to
-// post dom a proper fix should go to Dominators.h.
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT*
-CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
-
- if (postDomTree->dominates(blk1, blk2)) {
- return blk1;
- }
- if (postDomTree->dominates(blk2, blk1)) {
- return blk2;
- }
-
- DomTreeNodeT *node1 = postDomTree->getNode(blk1);
- DomTreeNodeT *node2 = postDomTree->getNode(blk2);
-
- // Handle newly cloned node.
- if (node1 == NULL && blk1->succ_size() == 1) {
- return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
- }
- if (node2 == NULL && blk2->succ_size() == 1) {
- return findNearestCommonPostDom(blk1, *blk2->succ_begin());
- }
-
- if (node1 == NULL || node2 == NULL) {
- return NULL;
- }
-
- node1 = node1->getIDom();
- while (node1) {
- if (postDomTree->dominates(node1, node2)) {
- return node1->getBlock();
- }
- node1 = node1->getIDom();
- }
-
- return NULL;
-}
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::findNearestCommonPostDom
-(typename std::set<BlockT *> &blks) {
- BlockT *commonDom;
- typename std::set<BlockT *>::const_iterator iter = blks.begin();
- typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
- for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
- BlockT *curBlk = *iter;
- if (curBlk != commonDom) {
- commonDom = findNearestCommonPostDom(curBlk, commonDom);
- }
- }
-
- if (DEBUGME) {
- errs() << "Common post dominator for exit blocks is ";
- if (commonDom) {
- errs() << "BB" << commonDom->getNumber() << "\n";
- } else {
- errs() << "NULL\n";
- }
- }
-
- return commonDom;
-} //findNearestCommonPostDom
-
-} //end namespace llvm
-
-//todo: move-end
-
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructurizer for AMDGPU
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm
-{
-class AMDGPUCFGStructurizer : public MachineFunctionPass
-{
-public:
- typedef MachineInstr InstructionType;
- typedef MachineFunction FunctionType;
- typedef MachineBasicBlock BlockType;
- typedef MachineLoopInfo LoopinfoType;
- typedef MachineDominatorTree DominatortreeType;
- typedef MachinePostDominatorTree PostDominatortreeType;
- typedef MachineDomTreeNode DomTreeNodeType;
- typedef MachineLoop LoopType;
-
-protected:
- TargetMachine &TM;
- const TargetInstrInfo *TII;
- const AMDGPURegisterInfo *TRI;
-
-public:
- AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
- const TargetInstrInfo *getTargetInstrInfo() const;
- //bool runOnMachineFunction(MachineFunction &F);
-
-private:
-
-}; //end of class AMDGPUCFGStructurizer
-
-//char AMDGPUCFGStructurizer::ID = 0;
-} //end of namespace llvm
-AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm
- )
-: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
- TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
- ) {
-}
-
-const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
- return TII;
-}
-//===----------------------------------------------------------------------===//
-//
-// CFGPrepare
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm
-{
-class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer
-{
-public:
- static char ID;
-
-public:
- AMDGPUCFGPrepare(TargetMachine &tm);
-
- virtual const char *getPassName() const;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
- bool runOnMachineFunction(MachineFunction &F);
-
-private:
-
-}; //end of class AMDGPUCFGPrepare
-
-char AMDGPUCFGPrepare::ID = 0;
-} //end of namespace llvm
-
-AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
- : AMDGPUCFGStructurizer(ID, tm )
-{
-}
-const char *AMDGPUCFGPrepare::getPassName() const {
- return "AMD IL Control Flow Graph Preparation Pass";
-}
-
-void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
-}
-
-//===----------------------------------------------------------------------===//
-//
-// CFGPerform
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm
-{
-class AMDGPUCFGPerform : public AMDGPUCFGStructurizer
-{
-public:
- static char ID;
-
-public:
- AMDGPUCFGPerform(TargetMachine &tm);
- virtual const char *getPassName() const;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- bool runOnMachineFunction(MachineFunction &F);
-
-private:
-
-}; //end of class AMDGPUCFGPerform
-
-char AMDGPUCFGPerform::ID = 0;
-} //end of namespace llvm
-
- AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
-: AMDGPUCFGStructurizer(ID, tm)
-{
-}
-
-const char *AMDGPUCFGPerform::getPassName() const {
- return "AMD IL Control Flow Graph structurizer Pass";
-}
-
-void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
-}
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructTraits<AMDGPUCFGStructurizer>
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvmCFGStruct
-{
-// this class is tailor to the AMDGPU backend
-template<>
-struct CFGStructTraits<AMDGPUCFGStructurizer>
-{
- typedef int RegiT;
-
- static int getBreakNzeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getBreakZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getBranchNzeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
- case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getBranchZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
- case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getContinueNzeroOpcode(int oldOpcode)
- {
- switch(oldOpcode) {
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getContinueZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
-// the explicitly represented branch target is the true branch target
-#define getExplicitBranch getTrueBranch
-#define setExplicitBranch setTrueBranch
-
- static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
- return instr->getOperand(0).getMBB();
- }
-
- static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
- instr->getOperand(0).setMBB(blk);
- }
-
- static MachineBasicBlock *
- getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
- assert(blk->succ_size() == 2);
- MachineBasicBlock *trueBranch = getTrueBranch(instr);
- MachineBasicBlock::succ_iterator iter = blk->succ_begin();
- MachineBasicBlock::succ_iterator iterNext = iter;
- ++iterNext;
-
- return (*iter == trueBranch) ? *iterNext : *iter;
- }
-
- static bool isCondBranch(MachineInstr *instr) {
- switch (instr->getOpcode()) {
- case AMDGPU::JUMP:
- return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
- ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
- case AMDGPU::SI_IF_NZ:
- case AMDGPU::SI_IF_Z:
- break;
- default:
- return false;
- }
- return true;
- }
-
- static bool isUncondBranch(MachineInstr *instr) {
- switch (instr->getOpcode()) {
- case AMDGPU::JUMP:
- return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
- case AMDGPU::BRANCH:
- return true;
- default:
- return false;
- }
- return true;
- }
-
- static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
- //get DebugLoc from the first MachineBasicBlock instruction with debug info
- DebugLoc DL;
- for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
- MachineInstr *instr = &(*iter);
- if (instr->getDebugLoc().isUnknown() == false) {
- DL = instr->getDebugLoc();
- }
- }
- return DL;
- }
-
- static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- MachineInstr *instr = &*iter;
- if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
- return instr;
- }
- return NULL;
- }
-
- // The correct naming for this is getPossibleLoopendBlockBranchInstr.
- //
- // BB with backward-edge could have move instructions after the branch
- // instruction. Such move instruction "belong to" the loop backward-edge.
- //
- static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
- const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
- blk->getParent()->getTarget().getInstrInfo());
-
- for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
- iterEnd = blk->rend(); iter != iterEnd; ++iter) {
- // FIXME: Simplify
- MachineInstr *instr = &*iter;
- if (instr) {
- if (isCondBranch(instr) || isUncondBranch(instr)) {
- return instr;
- } else if (!TII->isMov(instr->getOpcode())) {
- break;
- }
- }
- }
- return NULL;
- }
-
- static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- if (iter != blk->rend()) {
- MachineInstr *instr = &(*iter);
- if (instr->getOpcode() == AMDGPU::RETURN) {
- return instr;
- }
- }
- return NULL;
- }
-
- static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- if (iter != blk->rend()) {
- MachineInstr *instr = &(*iter);
- if (instr->getOpcode() == AMDGPU::CONTINUE) {
- return instr;
- }
- }
- return NULL;
- }
-
- static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
- for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
- MachineInstr *instr = &(*iter);
- if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
- return instr;
- }
- }
- return NULL;
- }
-
- static bool isReturnBlock(MachineBasicBlock *blk) {
- MachineInstr *instr = getReturnInstr(blk);
- bool isReturn = (blk->succ_size() == 0);
- if (instr) {
- assert(isReturn);
- } else if (isReturn) {
- if (DEBUGME) {
- errs() << "BB" << blk->getNumber()
- <<" is return block without RETURN instr\n";
- }
- }
-
- return isReturn;
- }
-
- static MachineBasicBlock::iterator
- getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
- assert(instr->getParent() == blk && "instruction doesn't belong to block");
- MachineBasicBlock::iterator iter = blk->begin();
- MachineBasicBlock::iterator iterEnd = blk->end();
- while (&(*iter) != instr && iter != iterEnd) {
- ++iter;
- }
-
- assert(iter != iterEnd);
- return iter;
- }//getInstrPos
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep) {
- return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
- } //insertInstrBefore
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
-
- MachineBasicBlock::iterator res;
- if (blk->begin() != blk->end()) {
- blk->insert(blk->begin(), newInstr);
- } else {
- blk->push_back(newInstr);
- }
-
- SHOWNEWINSTR(newInstr);
-
- return newInstr;
- } //insertInstrBefore
-
- static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep) {
- insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
- } //insertInstrEnd
-
- static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr = blk->getParent()
- ->CreateMachineInstr(tii->get(newOpcode), DL);
-
- blk->push_back(newInstr);
- //assume the instruction doesn't take any reg operand ...
-
- SHOWNEWINSTR(newInstr);
- } //insertInstrEnd
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep) {
- MachineInstr *oldInstr = &(*instrPos);
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
- DebugLoc());
-
- blk->insert(instrPos, newInstr);
- //assume the instruction doesn't take any reg operand ...
-
- SHOWNEWINSTR(newInstr);
- return newInstr;
- } //insertInstrBefore
-
- static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep,
- DebugLoc DL) {
- MachineInstr *oldInstr = &(*instrPos);
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
- DL);
-
- blk->insert(instrPos, newInstr);
- MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
- false);
-
- SHOWNEWINSTR(newInstr);
- //erase later oldInstr->eraseFromParent();
- } //insertCondBranchBefore
-
- static void insertCondBranchBefore(MachineBasicBlock *blk,
- MachineBasicBlock::iterator insertPos,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum,
- DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
-
- //insert before
- blk->insert(insertPos, newInstr);
- MachineInstrBuilder(newInstr).addReg(regNum, false);
-
- SHOWNEWINSTR(newInstr);
- } //insertCondBranchBefore
-
- static void insertCondBranchEnd(MachineBasicBlock *blk,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
-
- blk->push_back(newInstr);
- MachineInstrBuilder(newInstr).addReg(regNum, false);
-
- SHOWNEWINSTR(newInstr);
- } //insertCondBranchEnd
-
-
- static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum, int regVal) {
- MachineInstr *oldInstr = &(*instrPos);
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
- regVal);
- blk->insert(instrPos, newInstr);
-
- SHOWNEWINSTR(newInstr);
- } //insertAssignInstrBefore
-
- static void insertAssignInstrBefore(MachineBasicBlock *blk,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum, int regVal) {
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
-
- MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
- regVal);
- if (blk->begin() != blk->end()) {
- blk->insert(blk->begin(), newInstr);
- } else {
- blk->push_back(newInstr);
- }
-
- SHOWNEWINSTR(newInstr);
-
- } //insertInstrBefore
-
- static void insertCompareInstrBefore(MachineBasicBlock *blk,
- MachineBasicBlock::iterator instrPos,
- AMDGPUCFGStructurizer *passRep,
- RegiT dstReg, RegiT src1Reg,
- RegiT src2Reg) {
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
-
- MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
- MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
- MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
-
- blk->insert(instrPos, newInstr);
- SHOWNEWINSTR(newInstr);
-
- } //insertCompareInstrBefore
-
- static void cloneSuccessorList(MachineBasicBlock *dstBlk,
- MachineBasicBlock *srcBlk) {
- for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
- iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
- dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
- }
- } //cloneSuccessorList
-
- static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
- MachineFunction *func = srcBlk->getParent();
- MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
- func->push_back(newBlk); //insert to function
- //newBlk->setNumber(srcBlk->getNumber());
- for (MachineBasicBlock::iterator iter = srcBlk->begin(),
- iterEnd = srcBlk->end();
- iter != iterEnd; ++iter) {
- MachineInstr *instr = func->CloneMachineInstr(iter);
- newBlk->push_back(instr);
- }
- return newBlk;
- }
-
- //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
- //the AMDGPU instruction is not recognized as terminator fix this and retire
- //this routine
- static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
- MachineBasicBlock *oldBlk,
- MachineBasicBlock *newBlk) {
- MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
- if (branchInstr && isCondBranch(branchInstr) &&
- getExplicitBranch(branchInstr) == oldBlk) {
- setExplicitBranch(branchInstr, newBlk);
- }
- }
-
- static void wrapup(MachineBasicBlock *entryBlk) {
- assert((!entryBlk->getParent()->getJumpTableInfo()
- || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
- && "found a jump table");
-
- //collect continue right before endloop
- SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
- MachineBasicBlock::iterator pre = entryBlk->begin();
- MachineBasicBlock::iterator iterEnd = entryBlk->end();
- MachineBasicBlock::iterator iter = pre;
- while (iter != iterEnd) {
- if (pre->getOpcode() == AMDGPU::CONTINUE
- && iter->getOpcode() == AMDGPU::ENDLOOP) {
- contInstr.push_back(pre);
- }
- pre = iter;
- ++iter;
- } //end while
-
- //delete continue right before endloop
- for (unsigned i = 0; i < contInstr.size(); ++i) {
- contInstr[i]->eraseFromParent();
- }
-
- // TODO to fix up jump table so later phase won't be confused. if
- // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
- // there isn't such an interface yet. alternatively, replace all the other
- // blocks in the jump table with the entryBlk //}
-
- } //wrapup
-
- static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
- return &pass.getAnalysis<MachineDominatorTree>();
- }
-
- static MachinePostDominatorTree*
- getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
- return &pass.getAnalysis<MachinePostDominatorTree>();
- }
-
- static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
- return &pass.getAnalysis<MachineLoopInfo>();
- }
-}; // template class CFGStructTraits
-} //end of namespace llvm
-
-// createAMDGPUCFGPreparationPass- Returns a pass
-FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
- ) {
- return new AMDGPUCFGPrepare(tm );
-}
-
-bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
- return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
- *this,
- TRI);
-}
-
-// createAMDGPUCFGStructurizerPass- Returns a pass
-FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
- ) {
- return new AMDGPUCFGPerform(tm );
-}
-
-bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
- return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
- *this,
- TRI);
-}
-
-//end of file newline goes below
-
+++ /dev/null
-//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILDevice.h"
-#include "AMDGPUSubtarget.h"
-
-using namespace llvm;
-// Default implementation for all of the classes.
-AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST)
-{
- mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
- mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
- setCaps();
- mDeviceFlag = OCL_DEVICE_ALL;
-}
-
-AMDGPUDevice::~AMDGPUDevice()
-{
- mHWBits.clear();
- mSWBits.clear();
-}
-
-size_t AMDGPUDevice::getMaxGDSSize() const
-{
- return 0;
-}
-
-uint32_t
-AMDGPUDevice::getDeviceFlag() const
-{
- return mDeviceFlag;
-}
-
-size_t AMDGPUDevice::getMaxNumCBs() const
-{
- if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
- return HW_MAX_NUM_CB;
- }
-
- return 0;
-}
-
-size_t AMDGPUDevice::getMaxCBSize() const
-{
- if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
- return MAX_CB_SIZE;
- }
-
- return 0;
-}
-
-size_t AMDGPUDevice::getMaxScratchSize() const
-{
- return 65536;
-}
-
-uint32_t AMDGPUDevice::getStackAlignment() const
-{
- return 16;
-}
-
-void AMDGPUDevice::setCaps()
-{
- mSWBits.set(AMDGPUDeviceInfo::HalfOps);
- mSWBits.set(AMDGPUDeviceInfo::ByteOps);
- mSWBits.set(AMDGPUDeviceInfo::ShortOps);
- mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
- if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
- mSWBits.set(AMDGPUDeviceInfo::NoInline);
- }
- if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
- mSWBits.set(AMDGPUDeviceInfo::MacroDB);
- }
- if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
- mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
- } else {
- mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
- }
- if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
- mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
- } else {
- mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
- }
- if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
- mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
- }
- mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
- mSWBits.set(AMDGPUDeviceInfo::LongOps);
-}
-
-AMDGPUDeviceInfo::ExecutionMode
-AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const
-{
- if (mHWBits[Caps]) {
- assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
- return AMDGPUDeviceInfo::Hardware;
- }
-
- if (mSWBits[Caps]) {
- assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
- return AMDGPUDeviceInfo::Software;
- }
-
- return AMDGPUDeviceInfo::Unsupported;
-
-}
-
-bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const
-{
- return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
-}
-
-bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const
-{
- return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
-}
-
-bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const
-{
- return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
-}
-
-std::string
-AMDGPUDevice::getDataLayout() const
-{
- return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
-}
+++ /dev/null
-//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===----------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===----------------------------------------------------------------------===//
-#ifndef _AMDILDEVICEIMPL_H_
-#define _AMDILDEVICEIMPL_H_
-#include "AMDIL.h"
-#include "llvm/ADT/BitVector.h"
-
-namespace llvm {
- class AMDGPUSubtarget;
- class MCStreamer;
-//===----------------------------------------------------------------------===//
-// Interface for data that is specific to a single device
-//===----------------------------------------------------------------------===//
-class AMDGPUDevice {
-public:
- AMDGPUDevice(AMDGPUSubtarget *ST);
- virtual ~AMDGPUDevice();
-
- // Enum values for the various memory types.
- enum {
- RAW_UAV_ID = 0,
- ARENA_UAV_ID = 1,
- LDS_ID = 2,
- GDS_ID = 3,
- SCRATCH_ID = 4,
- CONSTANT_ID = 5,
- GLOBAL_ID = 6,
- MAX_IDS = 7
- } IO_TYPE_IDS;
-
- // Returns the max LDS size that the hardware supports. Size is in
- // bytes.
- virtual size_t getMaxLDSSize() const = 0;
-
- // Returns the max GDS size that the hardware supports if the GDS is
- // supported by the hardware. Size is in bytes.
- virtual size_t getMaxGDSSize() const;
-
- // Returns the max number of hardware constant address spaces that
- // are supported by this device.
- virtual size_t getMaxNumCBs() const;
-
- // Returns the max number of bytes a single hardware constant buffer
- // can support. Size is in bytes.
- virtual size_t getMaxCBSize() const;
-
- // Returns the max number of bytes allowed by the hardware scratch
- // buffer. Size is in bytes.
- virtual size_t getMaxScratchSize() const;
-
- // Get the flag that corresponds to the device.
- virtual uint32_t getDeviceFlag() const;
-
- // Returns the number of work-items that exist in a single hardware
- // wavefront.
- virtual size_t getWavefrontSize() const = 0;
-
- // Get the generational name of this specific device.
- virtual uint32_t getGeneration() const = 0;
-
- // Get the stack alignment of this specific device.
- virtual uint32_t getStackAlignment() const;
-
- // Get the resource ID for this specific device.
- virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
-
- // Get the max number of UAV's for this device.
- virtual uint32_t getMaxNumUAVs() const = 0;
-
- // API utilizing more detailed capabilities of each family of
- // cards. If a capability is supported, then either usesHardware or
- // usesSoftware returned true. If usesHardware returned true, then
- // usesSoftware must return false for the same capability. Hardware
- // execution means that the feature is done natively by the hardware
- // and is not emulated by the softare. Software execution means
- // that the feature could be done in the hardware, but there is
- // software that emulates it with possibly using the hardware for
- // support since the hardware does not fully comply with OpenCL
- // specs.
- bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
- bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
- bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
- virtual std::string getDataLayout() const;
- static const unsigned int MAX_LDS_SIZE_700 = 16384;
- static const unsigned int MAX_LDS_SIZE_800 = 32768;
- static const unsigned int WavefrontSize = 64;
- static const unsigned int HalfWavefrontSize = 32;
- static const unsigned int QuarterWavefrontSize = 16;
-protected:
- virtual void setCaps();
- llvm::BitVector mHWBits;
- llvm::BitVector mSWBits;
- AMDGPUSubtarget *mSTM;
- uint32_t mDeviceFlag;
-private:
- AMDGPUDeviceInfo::ExecutionMode
- getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
-}; // AMDILDevice
-
-} // namespace llvm
-#endif // _AMDILDEVICEIMPL_H_
+++ /dev/null
-//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Function that creates DeviceInfo from a device name and other information.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILDevices.h"
-#include "AMDGPUSubtarget.h"
-
-using namespace llvm;
-namespace llvm {
-namespace AMDGPUDeviceInfo {
- AMDGPUDevice*
-getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr,
- bool is64bit, bool is64on32bit)
-{
- if (deviceName.c_str()[2] == '7') {
- switch (deviceName.c_str()[3]) {
- case '1':
- return new AMDGPU710Device(ptr);
- case '7':
- return new AMDGPU770Device(ptr);
- default:
- return new AMDGPU7XXDevice(ptr);
- };
- } else if (deviceName == "cypress") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPUCypressDevice(ptr);
- } else if (deviceName == "juniper") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPUEvergreenDevice(ptr);
- } else if (deviceName == "redwood") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPURedwoodDevice(ptr);
- } else if (deviceName == "cedar") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPUCedarDevice(ptr);
- } else if (deviceName == "barts"
- || deviceName == "turks") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPUNIDevice(ptr);
- } else if (deviceName == "cayman") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPUCaymanDevice(ptr);
- } else if (deviceName == "caicos") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPUNIDevice(ptr);
- } else if (deviceName == "SI") {
- return new AMDGPUSIDevice(ptr);
- } else {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDGPU7XXDevice(ptr);
- }
-}
-} // End namespace AMDGPUDeviceInfo
-} // End namespace llvm
+++ /dev/null
-//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#ifndef _AMDILDEVICEINFO_H_
-#define _AMDILDEVICEINFO_H_
-
-
-#include <string>
-
-namespace llvm
-{
- class AMDGPUDevice;
- class AMDGPUSubtarget;
- namespace AMDGPUDeviceInfo
- {
- // Each Capabilities can be executed using a hardware instruction,
- // emulated with a sequence of software instructions, or not
- // supported at all.
- enum ExecutionMode {
- Unsupported = 0, // Unsupported feature on the card(Default value)
- Software, // This is the execution mode that is set if the
- // feature is emulated in software
- Hardware // This execution mode is set if the feature exists
- // natively in hardware
- };
-
- // Any changes to this needs to have a corresponding update to the
- // twiki page GPUMetadataABI
- enum Caps {
- HalfOps = 0x1, // Half float is supported or not.
- DoubleOps = 0x2, // Double is supported or not.
- ByteOps = 0x3, // Byte(char) is support or not.
- ShortOps = 0x4, // Short is supported or not.
- LongOps = 0x5, // Long is supported or not.
- Images = 0x6, // Images are supported or not.
- ByteStores = 0x7, // ByteStores available(!HD4XXX).
- ConstantMem = 0x8, // Constant/CB memory.
- LocalMem = 0x9, // Local/LDS memory.
- PrivateMem = 0xA, // Scratch/Private/Stack memory.
- RegionMem = 0xB, // OCL GDS Memory Extension.
- FMA = 0xC, // Use HW FMA or SW FMA.
- ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
- MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
- Reserved0 = 0xF, // ReservedFlag
- NoAlias = 0x10, // Cached loads.
- Signed24BitOps = 0x11, // Peephole Optimization.
- // Debug mode implies that no hardware features or optimizations
- // are performned and that all memory access go through a single
- // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
- Debug = 0x12, // Debug mode is enabled.
- CachedMem = 0x13, // Cached mem is available or not.
- BarrierDetect = 0x14, // Detect duplicate barriers.
- Reserved1 = 0x15, // Reserved flag
- ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
- ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
- TmrReg = 0x18, // Flag to specify if Tmr register is supported.
- NoInline = 0x19, // Flag to specify that no inlining should occur.
- MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
- HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
- ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
- PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
- // If more capabilities are required, then
- // this number needs to be increased.
- // All capabilities must come before this
- // number.
- MaxNumberCapabilities = 0x20
- };
- // These have to be in order with the older generations
- // having the lower number enumerations.
- enum Generation {
- HD4XXX = 0, // 7XX based devices.
- HD5XXX, // Evergreen based devices.
- HD6XXX, // NI/Evergreen+ based devices.
- HD7XXX,
- HDTEST, // Experimental feature testing device.
- HDNUMGEN
- };
-
-
- AMDGPUDevice*
- getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
- bool is64bit = false, bool is64on32bit = false);
- } // namespace AMDILDeviceInfo
-} // namespace llvm
-#endif // _AMDILDEVICEINFO_H_
+++ /dev/null
-//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#ifndef __AMDIL_DEVICES_H_
-#define __AMDIL_DEVICES_H_
-// Include all of the device specific header files
-// This file is for Internal use only!
-#include "AMDIL7XXDevice.h"
-#include "AMDILDevice.h"
-#include "AMDILEvergreenDevice.h"
-#include "AMDILNIDevice.h"
-#include "AMDILSIDevice.h"
-
-#endif // _AMDIL_DEVICES_H_
+++ /dev/null
-//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILEvergreenDevice.h"
-
-using namespace llvm;
-
-AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
-: AMDGPUDevice(ST) {
- setCaps();
- std::string name = ST->getDeviceName();
- if (name == "cedar") {
- mDeviceFlag = OCL_DEVICE_CEDAR;
- } else if (name == "redwood") {
- mDeviceFlag = OCL_DEVICE_REDWOOD;
- } else if (name == "cypress") {
- mDeviceFlag = OCL_DEVICE_CYPRESS;
- } else {
- mDeviceFlag = OCL_DEVICE_JUNIPER;
- }
-}
-
-AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
-}
-
-size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
- if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_800;
- } else {
- return 0;
- }
-}
-size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
- if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
- return MAX_LDS_SIZE_800;
- } else {
- return 0;
- }
-}
-uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
- return 12;
-}
-
-uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
- switch(id) {
- default:
- assert(0 && "ID type passed in is unknown!");
- break;
- case CONSTANT_ID:
- case RAW_UAV_ID:
- return GLOBAL_RETURN_RAW_UAV_ID;
- case GLOBAL_ID:
- case ARENA_UAV_ID:
- return DEFAULT_ARENA_UAV_ID;
- case LDS_ID:
- if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
- return DEFAULT_LDS_ID;
- } else {
- return DEFAULT_ARENA_UAV_ID;
- }
- case GDS_ID:
- if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
- return DEFAULT_GDS_ID;
- } else {
- return DEFAULT_ARENA_UAV_ID;
- }
- case SCRATCH_ID:
- if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
- return DEFAULT_SCRATCH_ID;
- } else {
- return DEFAULT_ARENA_UAV_ID;
- }
- };
- return 0;
-}
-
-size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
- return AMDGPUDevice::WavefrontSize;
-}
-
-uint32_t AMDGPUEvergreenDevice::getGeneration() const {
- return AMDGPUDeviceInfo::HD5XXX;
-}
-
-void AMDGPUEvergreenDevice::setCaps() {
- mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
- mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
- mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
- mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
- mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
- if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
- mHWBits.set(AMDGPUDeviceInfo::ByteStores);
- }
- if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
- mSWBits.set(AMDGPUDeviceInfo::LocalMem);
- mSWBits.set(AMDGPUDeviceInfo::RegionMem);
- } else {
- mHWBits.set(AMDGPUDeviceInfo::LocalMem);
- mHWBits.set(AMDGPUDeviceInfo::RegionMem);
- }
- mHWBits.set(AMDGPUDeviceInfo::Images);
- if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
- mHWBits.set(AMDGPUDeviceInfo::NoAlias);
- }
- mHWBits.set(AMDGPUDeviceInfo::CachedMem);
- if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
- mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
- }
- mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
- mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
- mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
- mHWBits.set(AMDGPUDeviceInfo::LongOps);
- mSWBits.reset(AMDGPUDeviceInfo::LongOps);
- mHWBits.set(AMDGPUDeviceInfo::TmrReg);
-}
-
-AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
- : AMDGPUEvergreenDevice(ST) {
- setCaps();
-}
-
-AMDGPUCypressDevice::~AMDGPUCypressDevice() {
-}
-
-void AMDGPUCypressDevice::setCaps() {
- if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
- mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
- mHWBits.set(AMDGPUDeviceInfo::FMA);
- }
-}
-
-
-AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
- : AMDGPUEvergreenDevice(ST) {
- setCaps();
-}
-
-AMDGPUCedarDevice::~AMDGPUCedarDevice() {
-}
-
-void AMDGPUCedarDevice::setCaps() {
- mSWBits.set(AMDGPUDeviceInfo::FMA);
-}
-
-size_t AMDGPUCedarDevice::getWavefrontSize() const {
- return AMDGPUDevice::QuarterWavefrontSize;
-}
-
-AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
- : AMDGPUEvergreenDevice(ST) {
- setCaps();
-}
-
-AMDGPURedwoodDevice::~AMDGPURedwoodDevice()
-{
-}
-
-void AMDGPURedwoodDevice::setCaps() {
- mSWBits.set(AMDGPUDeviceInfo::FMA);
-}
-
-size_t AMDGPURedwoodDevice::getWavefrontSize() const {
- return AMDGPUDevice::HalfWavefrontSize;
-}
+++ /dev/null
-//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===----------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===----------------------------------------------------------------------===//
-#ifndef _AMDILEVERGREENDEVICE_H_
-#define _AMDILEVERGREENDEVICE_H_
-#include "AMDILDevice.h"
-#include "AMDGPUSubtarget.h"
-
-namespace llvm {
- class AMDGPUSubtarget;
-//===----------------------------------------------------------------------===//
-// Evergreen generation of devices and their respective sub classes
-//===----------------------------------------------------------------------===//
-
-
-// The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
-// series of cards. This class contains information required to differentiate
-// the Evergreen device from the generic AMDGPUDevice. This device represents
-// that capabilities of the 'Juniper' cards, also known as the HD57XX.
-class AMDGPUEvergreenDevice : public AMDGPUDevice {
-public:
- AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
- virtual ~AMDGPUEvergreenDevice();
- virtual size_t getMaxLDSSize() const;
- virtual size_t getMaxGDSSize() const;
- virtual size_t getWavefrontSize() const;
- virtual uint32_t getGeneration() const;
- virtual uint32_t getMaxNumUAVs() const;
- virtual uint32_t getResourceID(uint32_t) const;
-protected:
- virtual void setCaps();
-}; // AMDGPUEvergreenDevice
-
-// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
-// support for double precision operations. This device is used to represent
-// both the Cypress and Hemlock cards, which are commercially known as HD58XX
-// and HD59XX cards.
-class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
-public:
- AMDGPUCypressDevice(AMDGPUSubtarget *ST);
- virtual ~AMDGPUCypressDevice();
-private:
- virtual void setCaps();
-}; // AMDGPUCypressDevice
-
-
-// The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
-// devices. This class differs from the base AMDGPUEvergreenDevice in that the
-// device is a ~quarter of the 'Juniper'. These are commercially known as the
-// HD54XX and HD53XX series of cards.
-class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
-public:
- AMDGPUCedarDevice(AMDGPUSubtarget *ST);
- virtual ~AMDGPUCedarDevice();
- virtual size_t getWavefrontSize() const;
-private:
- virtual void setCaps();
-}; // AMDGPUCedarDevice
-
-// The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
-// devices. This class differs from the base class, in that these devices are
-// considered about half of a 'Juniper' device. These are commercially known as
-// the HD55XX and HD56XX series of cards.
-class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
-public:
- AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
- virtual ~AMDGPURedwoodDevice();
- virtual size_t getWavefrontSize() const;
-private:
- virtual void setCaps();
-}; // AMDGPURedwoodDevice
-
-} // namespace llvm
-#endif // _AMDGPUEVERGREENDEVICE_H_
+++ /dev/null
-//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface to describe a layout of a stack frame on a AMDIL target machine
-//
-//===----------------------------------------------------------------------===//
-#include "AMDILFrameLowering.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-
-using namespace llvm;
-AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
- int LAO, unsigned TransAl)
- : TargetFrameLowering(D, StackAl, LAO, TransAl)
-{
-}
-
-AMDGPUFrameLowering::~AMDGPUFrameLowering()
-{
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getObjectOffset(FI);
-}
-
-const TargetFrameLowering::SpillSlot *
-AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
-{
- NumEntries = 0;
- return 0;
-}
-void
-AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const
-{
-}
-void
-AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{
-}
-bool
-AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const
-{
- return false;
-}
+++ /dev/null
-//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface to describe a layout of a stack frame on a AMDIL target machine
-//
-//===----------------------------------------------------------------------===//
-#ifndef _AMDILFRAME_LOWERING_H_
-#define _AMDILFRAME_LOWERING_H_
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-/// Information about the stack frame layout on the AMDGPU targets. It holds
-/// the direction of the stack growth, the known stack alignment on entry to
-/// each function, and the offset to the locals area.
-/// See TargetFrameInfo for more comments.
-
-namespace llvm {
- class AMDGPUFrameLowering : public TargetFrameLowering {
- public:
- AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
- TransAl = 1);
- virtual ~AMDGPUFrameLowering();
- virtual int getFrameIndexOffset(const MachineFunction &MF,
- int FI) const;
- virtual const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const;
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
- virtual bool hasFP(const MachineFunction &MF) const;
- }; // class AMDGPUFrameLowering
-} // namespace llvm
-#endif // _AMDILFRAME_LOWERING_H_
+++ /dev/null
-//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the AMDIL target.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
-#include "AMDGPURegisterInfo.h"
-#include "AMDILDevices.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Compiler.h"
-#include <list>
-#include <queue>
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions
-// //for SelectionDAG operations.
-//
-namespace {
-class AMDGPUDAGToDAGISel : public SelectionDAGISel {
- // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
- // make the right decision when generating code for different targets.
- const AMDGPUSubtarget &Subtarget;
-public:
- AMDGPUDAGToDAGISel(TargetMachine &TM);
- virtual ~AMDGPUDAGToDAGISel();
-
- SDNode *Select(SDNode *N);
- virtual const char *getPassName() const;
-
-private:
- inline SDValue getSmallIPtrImm(unsigned Imm);
-
- // Complex pattern selectors
- bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
- bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
- bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
-
- static bool checkType(const Value *ptr, unsigned int addrspace);
- static const Value *getBasePointerValue(const Value *V);
-
- static bool isGlobalStore(const StoreSDNode *N);
- static bool isPrivateStore(const StoreSDNode *N);
- static bool isLocalStore(const StoreSDNode *N);
- static bool isRegionStore(const StoreSDNode *N);
-
- static bool isCPLoad(const LoadSDNode *N);
- static bool isConstantLoad(const LoadSDNode *N, int cbID);
- static bool isGlobalLoad(const LoadSDNode *N);
- static bool isPrivateLoad(const LoadSDNode *N);
- static bool isLocalLoad(const LoadSDNode *N);
- static bool isRegionLoad(const LoadSDNode *N);
-
- bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
- bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
-
- // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-} // end anonymous namespace
-
-// createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific
-// DAG, ready for instruction scheduling.
-//
-FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
- ) {
- return new AMDGPUDAGToDAGISel(TM);
-}
-
-AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
- )
- : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>())
-{
-}
-
-AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
-}
-
-SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDRParam(
- SDValue Addr, SDValue& R1, SDValue& R2) {
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
- }
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
- return SelectADDRParam(Addr, R1, R2);
-}
-
-
-bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
- }
- return true;
-}
-
-SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
- unsigned int Opc = N->getOpcode();
- if (N->isMachineOpcode()) {
- return NULL; // Already selected.
- }
- switch (Opc) {
- default: break;
- case ISD::FrameIndex:
- {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
- unsigned int FI = FIN->getIndex();
- EVT OpVT = N->getValueType(0);
- unsigned int NewOpc = AMDGPU::COPY;
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
- return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
- }
- }
- break;
- }
- return SelectCode(N);
-}
-
-bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
- if (!ptr) {
- return false;
- }
- Type *ptrType = ptr->getType();
- return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
-}
-
-const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V)
-{
- if (!V) {
- return NULL;
- }
- const Value *ret = NULL;
- ValueMap<const Value *, bool> ValueBitMap;
- std::queue<const Value *, std::list<const Value *> > ValueQueue;
- ValueQueue.push(V);
- while (!ValueQueue.empty()) {
- V = ValueQueue.front();
- if (ValueBitMap.find(V) == ValueBitMap.end()) {
- ValueBitMap[V] = true;
- if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
- ret = V;
- break;
- } else if (dyn_cast<GlobalVariable>(V)) {
- ret = V;
- break;
- } else if (dyn_cast<Constant>(V)) {
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
- if (CE) {
- ValueQueue.push(CE->getOperand(0));
- }
- } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- ret = AI;
- break;
- } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
- uint32_t numOps = I->getNumOperands();
- for (uint32_t x = 0; x < numOps; ++x) {
- ValueQueue.push(I->getOperand(x));
- }
- } else {
- // assert(0 && "Found a Value that we didn't know how to handle!");
- }
- }
- ValueQueue.pop();
- }
- return ret;
-}
-
-bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
- return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
-}
-
-bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
- if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
- return true;
- }
- MachineMemOperand *MMO = N->getMemOperand();
- const Value *V = MMO->getValue();
- const Value *BV = getBasePointerValue(V);
- if (MMO
- && MMO->getValue()
- && ((V && dyn_cast<GlobalValue>(V))
- || (BV && dyn_cast<GlobalValue>(
- getBasePointerValue(MMO->getValue()))))) {
- return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
- } else {
- return false;
- }
-}
-
-bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
- MachineMemOperand *MMO = N->getMemOperand();
- if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
- if (MMO) {
- const Value *V = MMO->getValue();
- const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
- if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
- return true;
- }
- }
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
- if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
- // Check to make sure we are not a constant pool load or a constant load
- // that is marked as a private load
- if (isCPLoad(N) || isConstantLoad(N, -1)) {
- return false;
- }
- }
- if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS))
- {
- return true;
- }
- return false;
-}
-
-const char *AMDGPUDAGToDAGISel::getPassName() const {
- return "AMDGPU DAG->DAG Pattern Instruction Selection";
-}
-
-#ifdef DEBUGTMP
-#undef INT64_C
-#endif
-#undef DEBUGTMP
-
-///==== AMDGPU Functions ====///
-
-bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
- SDValue& Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
-
-
- if (Addr.getOpcode() == ISD::ADD) {
- bool Match = false;
-
- // Find the base ptr and the offset
- for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
- SDValue Arg = Addr.getOperand(i);
- ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
- // This arg isn't a constant so it must be the base PTR.
- if (!OffsetNode) {
- Base = Addr.getOperand(i);
- continue;
- }
- // Check if the constant argument fits in 8-bits. The offset is in bytes
- // so we need to convert it to dwords.
- if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
- Match = true;
- Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
- MVT::i32);
- }
- }
- return Match;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset)
-{
- ConstantSDNode * IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- CurDAG->getEntryNode().getDebugLoc(),
- AMDGPU::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
- SDValue& Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress ||
- Addr.getOpcode() != ISD::ADD) {
- return false;
- }
-
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
-
- return true;
-}
+++ /dev/null
-//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file contains TargetLowering functions borrowed from AMDLI.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUISelLowering.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDILDevices.h"
-#include "AMDILIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-
-using namespace llvm;
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-#include "AMDGPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation Help Functions End
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Class Implementation Begins
-//===----------------------------------------------------------------------===//
-void AMDGPUTargetLowering::InitAMDILLowering()
-{
- int types[] =
- {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::f32,
- (int)MVT::f64,
- (int)MVT::i64,
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
- };
-
- int IntTypes[] =
- {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::i64
- };
-
- int FloatTypes[] =
- {
- (int)MVT::f32,
- (int)MVT::f64
- };
-
- int VectorTypes[] =
- {
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
- };
- size_t numTypes = sizeof(types) / sizeof(*types);
- size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
- size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
- size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
-
- const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
- // These are the current register classes that are
- // supported
-
- for (unsigned int x = 0; x < numTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
-
- //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
- // We cannot sextinreg, expand to shifts
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
- setOperationAction(ISD::SUBE, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::BRCOND, VT, Custom);
- setOperationAction(ISD::BR_JT, VT, Expand);
- setOperationAction(ISD::BRIND, VT, Expand);
- // TODO: Implement custom UREM/SREM routines
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- if (VT != MVT::i64 && VT != MVT::v2i64) {
- setOperationAction(ISD::SDIV, VT, Custom);
- }
- }
- for (unsigned int x = 0; x < numFloatTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
-
- // IL does not have these operations for floating point types
- setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
- setOperationAction(ISD::SETOLT, VT, Expand);
- setOperationAction(ISD::SETOGE, VT, Expand);
- setOperationAction(ISD::SETOGT, VT, Expand);
- setOperationAction(ISD::SETOLE, VT, Expand);
- setOperationAction(ISD::SETULT, VT, Expand);
- setOperationAction(ISD::SETUGE, VT, Expand);
- setOperationAction(ISD::SETUGT, VT, Expand);
- setOperationAction(ISD::SETULE, VT, Expand);
- }
-
- for (unsigned int x = 0; x < numIntTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
-
- // GPU also does not have divrem function for signed or unsigned
- setOperationAction(ISD::SDIVREM, VT, Expand);
-
- // GPU does not have [S|U]MUL_LOHI functions as a single instruction
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-
- // GPU doesn't have a rotl, rotr, or byteswap instruction
- setOperationAction(ISD::ROTR, VT, Expand);
- setOperationAction(ISD::BSWAP, VT, Expand);
-
- // GPU doesn't have any counting operators
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- }
-
- for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
- {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
-
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- // setOperationAction(ISD::VSETCC, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
-
- }
- if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SREM, MVT::v2i64, Expand);
- setOperationAction(ISD::Constant , MVT::i64 , Legal);
- setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
- }
- if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
- // we support loading/storing v2f64 but not operations on the type
- setOperationAction(ISD::FADD, MVT::v2f64, Expand);
- setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
- setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
- // We want to expand vector conversions into their scalar
- // counterparts.
- setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::v2f64, Expand);
- }
- // TODO: Fix the UDIV24 algorithm so it works for these
- // types correctly. This needs vector comparisons
- // for this to work correctly.
- setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
- setOperationAction(ISD::SUBC, MVT::Other, Expand);
- setOperationAction(ISD::ADDE, MVT::Other, Expand);
- setOperationAction(ISD::ADDC, MVT::Other, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
-
-
- // Use the default implementation.
- setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
- setOperationAction(ISD::Constant , MVT::i32 , Legal);
-
- setSchedulingPreference(Sched::RegPressure);
- setPow2DivIsCheap(false);
- setPrefLoopAlignment(16);
- setSelectIsExpensive(true);
- setJumpIsExpensive(true);
-
- maxStoresPerMemcpy = 4096;
- maxStoresPerMemmove = 4096;
- maxStoresPerMemset = 4096;
-
-#undef numTypes
-#undef numIntTypes
-#undef numVectorTypes
-#undef numFloatTypes
-}
-
-bool
-AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const
-{
- return false;
-}
-// The backend supports 32 and 64 bit floating point immediates
-bool
-AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
-{
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return true;
- } else {
- return false;
- }
-}
-
-bool
-AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
-{
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return false;
- } else {
- return true;
- }
-}
-
-
-// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
-// be zero. Op is expected to be a target specific node. Used by DAG
-// combiner.
-
-void
-AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
- const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const
-{
- APInt KnownZero2;
- APInt KnownOne2;
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
- switch (Op.getOpcode()) {
- default: break;
- case ISD::SELECT_CC:
- DAG.ComputeMaskedBits(
- Op.getOperand(1),
- KnownZero,
- KnownOne,
- Depth + 1
- );
- DAG.ComputeMaskedBits(
- Op.getOperand(0),
- KnownZero2,
- KnownOne2
- );
- assert((KnownZero & KnownOne) == 0
- && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0
- && "Bits known to be one AND zero?");
- // Only known if known in both the LHS and RHS
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
- break;
- };
-}
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSDIV32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16
- || OVT.getScalarType() == MVT::i8) {
- DST = LowerSDIV24(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSREM64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSREM32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16) {
- DST = LowerSREM16(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i8) {
- DST = LowerSREM8(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Data = Op.getOperand(0);
- VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
- DebugLoc DL = Op.getDebugLoc();
- EVT DVT = Data.getValueType();
- EVT BVT = BaseType->getVT();
- unsigned baseBits = BVT.getScalarType().getSizeInBits();
- unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
- unsigned shiftBits = srcBits - baseBits;
- if (srcBits < 32) {
- // If the op is less than 32 bits, then it needs to extend to 32bits
- // so it can properly keep the upper bits valid.
- EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
- Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
- shiftBits = 32 - baseBits;
- DVT = IVT;
- }
- SDValue Shift = DAG.getConstant(shiftBits, DVT);
- // Shift left by 'Shift' bits.
- Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
- // Signed shift Right by 'Shift' bits.
- Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
- if (srcBits < 32) {
- // Once the sign extension is done, the op needs to be converted to
- // its original type.
- Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
- }
- return Data;
-}
-EVT
-AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
-{
- int iSize = (size * numEle);
- int vEle = (iSize >> ((size == 64) ? 6 : 5));
- if (!vEle) {
- vEle = 1;
- }
- if (size == 64) {
- if (vEle == 1) {
- return EVT(MVT::i64);
- } else {
- return EVT(MVT::getVectorVT(MVT::i64, vEle));
- }
- } else {
- if (vEle == 1) {
- return EVT(MVT::i32);
- } else {
- return EVT(MVT::getVectorVT(MVT::i32, vEle));
- }
- }
-}
-
-SDValue
-AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Jump = Op.getOperand(2);
- SDValue Result;
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- Op.getDebugLoc(),
- Op.getValueType(),
- Chain, Jump, Cond);
- return Result;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
- unsigned bitsize = OVT.getScalarType().getSizeInBits();
- // char|short jq = ia ^ ib;
- SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
-
- // int ia = (int)LHS;
- SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
-
- // int ib, (int)RHS;
- SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
-
- // float fa = (float)ia;
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
-
- // float fb = (float)ib;
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
-
- // float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
-
- // fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
-
- // float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
-
- // float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
-
- // int iq = (int)fq;
- SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
-
- // fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
-
- // fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
-
- // int cv = fr >= fb;
- SDValue cv;
- if (INTTY == MVT::i32) {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- } else {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- }
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
- DAG.getConstant(0, OVT));
- // dst = iq + jq;
- iq = DAG.getSExtOrTrunc(iq, DL, OVT);
- iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
- return iq;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSDIV32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r0, r0, r1
- // ixor r10, r10, r11
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSelectCC(DL,
- r0, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSelectCC(DL,
- r1, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r0, r0, r1
- r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
-
- // ixor r10, r10, r11
- r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i8) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i8) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i16) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i16) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSREM32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r20, r0, r1
- // umul r20, r20, r1
- // sub r0, r0, r20
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r20, r0, r1
- SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
-
- // umul r20, r20, r1
- r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
-
- // sub r0, r0, r20
- r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
+++ /dev/null
-//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file describes the AMDIL instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-// AMDIL Instruction Predicate Definitions
-// Predicate that is set to true if the hardware supports double precision
-// divide
-def HasHWDDiv : Predicate<"Subtarget.device()"
- "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
- "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
-
-// Predicate that is set to true if the hardware supports double, but not double
-// precision divide in hardware
-def HasSWDDiv : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
- "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
-
-// Predicate that is set to true if the hardware support 24bit signed
-// math ops. Otherwise a software expansion to 32bit math ops is used instead.
-def HasHWSign24Bit : Predicate<"Subtarget.device()"
- "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
-
-// Predicate that is set to true if 64bit operations are supported or not
-def HasHW64Bit : Predicate<"Subtarget.device()"
- "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
-def HasSW64Bit : Predicate<"Subtarget.device()"
- "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
-
-// Predicate that is set to true if the timer register is supported
-def HasTmrRegister : Predicate<"Subtarget.device()"
- "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
-// Predicate that is true if we are at least evergreen series
-def HasDeviceIDInst : Predicate<"Subtarget.device()"
- "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
-
-// Predicate that is true if we have region address space.
-def hasRegionAS : Predicate<"Subtarget.device()"
- "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
-
-// Predicate that is false if we don't have region address space.
-def noRegionAS : Predicate<"!Subtarget.device()"
- "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
-
-
-// Predicate that is set to true if 64bit Mul is supported in the IL or not
-def HasHW64Mul : Predicate<"Subtarget.calVersion()"
- ">= CAL_VERSION_SC_139"
- "&& Subtarget.device()"
- "->getGeneration() >="
- "AMDGPUDeviceInfo::HD5XXX">;
-def HasSW64Mul : Predicate<"Subtarget.calVersion()"
- "< CAL_VERSION_SC_139">;
-// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
-def HasHW64DivMod : Predicate<"Subtarget.device()"
- "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
-def HasSW64DivMod : Predicate<"Subtarget.device()"
- "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
-
-// Predicate that is set to true if 64bit pointer are used.
-def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
-def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
-//===--------------------------------------------------------------------===//
-// Custom Operands
-//===--------------------------------------------------------------------===//
-def brtarget : Operand<OtherVT>;
-
-//===--------------------------------------------------------------------===//
-// Custom Selection DAG Type Profiles
-//===--------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Generic Profile Types
-//===----------------------------------------------------------------------===//
-
-def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
- ]>;
-def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
- ]>;
-def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
- SDTCisEltOfVec<1, 0>
- ]>;
-
-//===----------------------------------------------------------------------===//
-// Flow Control Profile Types
-//===----------------------------------------------------------------------===//
-// Branch instruction where second and third are basic blocks
-def SDTIL_BRCond : SDTypeProfile<0, 2, [
- SDTCisVT<0, OtherVT>
- ]>;
-
-//===--------------------------------------------------------------------===//
-// Custom Selection DAG Nodes
-//===--------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Flow Control DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
-
-//===----------------------------------------------------------------------===//
-// Call/Return DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-
-//===--------------------------------------------------------------------===//
-// Instructions
-//===--------------------------------------------------------------------===//
-// Floating point math functions
-def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
-def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
-
-//===----------------------------------------------------------------------===//
-// Integer functions
-//===----------------------------------------------------------------------===//
-def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-
-//===--------------------------------------------------------------------===//
-// Custom Pattern DAG Nodes
-//===--------------------------------------------------------------------===//
-def global_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return isGlobalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Load pattern fragments
-//===----------------------------------------------------------------------===//
-// Global address space loads
-def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-// Constant address space loads
-def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Complex addressing mode patterns
-//===----------------------------------------------------------------------===//
-def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
-def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
-def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
-def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
-
-//===----------------------------------------------------------------------===//
-// Instruction format classes
-//===----------------------------------------------------------------------===//
-class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
-: Instruction {
-
- let Namespace = "AMDGPU";
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let Pattern = pattern;
- let AsmString = !strconcat(asmstr, "\n");
- let isPseudo = 1;
- let Itinerary = NullALU;
- bit hasIEEEFlag = 0;
- bit hasZeroOpFlag = 0;
-}
-
-//===--------------------------------------------------------------------===//
-// Multiclass Instruction formats
-//===--------------------------------------------------------------------===//
-// Multiclass that handles branch instructions
-multiclass BranchConditional<SDNode Op> {
- def _i32 : ILFormat<(outs),
- (ins brtarget:$target, GPRI32:$src0),
- "; i32 Pseudo branch instruction",
- [(Op bb:$target, GPRI32:$src0)]>;
- def _f32 : ILFormat<(outs),
- (ins brtarget:$target, GPRF32:$src0),
- "; f32 Pseudo branch instruction",
- [(Op bb:$target, GPRF32:$src0)]>;
-}
-
-// Only scalar types should generate flow control
-multiclass BranchInstr<string name> {
- def _i32 : ILFormat<(outs), (ins GPRI32:$src),
- !strconcat(name, " $src"), []>;
- def _f32 : ILFormat<(outs), (ins GPRF32:$src),
- !strconcat(name, " $src"), []>;
-}
-// Only scalar types should generate flow control
-multiclass BranchInstr2<string name> {
- def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
- def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
-}
-
-//===--------------------------------------------------------------------===//
-// Intrinsics support
-//===--------------------------------------------------------------------===//
-include "AMDILIntrinsics.td"
-
-//===--------------------------------------------------------------------===//
-// Instructions support
-//===--------------------------------------------------------------------===//
-//===---------------------------------------------------------------------===//
-// Custom Inserter for Branches and returns, this eventually will be a
-// seperate pass
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
- def BRANCH : ILFormat<(outs), (ins brtarget:$target),
- "; Pseudo unconditional branch instruction",
- [(br bb:$target)]>;
- defm BRANCH_COND : BranchConditional<IL_brcond>;
-}
-
-//===---------------------------------------------------------------------===//
-// Flow and Program control Instructions
-//===---------------------------------------------------------------------===//
-let isTerminator=1 in {
- def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
- !strconcat("SWITCH", " $src"), []>;
- def CASE : ILFormat< (outs), (ins GPRI32:$src),
- !strconcat("CASE", " $src"), []>;
- def BREAK : ILFormat< (outs), (ins),
- "BREAK", []>;
- def CONTINUE : ILFormat< (outs), (ins),
- "CONTINUE", []>;
- def DEFAULT : ILFormat< (outs), (ins),
- "DEFAULT", []>;
- def ELSE : ILFormat< (outs), (ins),
- "ELSE", []>;
- def ENDSWITCH : ILFormat< (outs), (ins),
- "ENDSWITCH", []>;
- def ENDMAIN : ILFormat< (outs), (ins),
- "ENDMAIN", []>;
- def END : ILFormat< (outs), (ins),
- "END", []>;
- def ENDFUNC : ILFormat< (outs), (ins),
- "ENDFUNC", []>;
- def ENDIF : ILFormat< (outs), (ins),
- "ENDIF", []>;
- def WHILELOOP : ILFormat< (outs), (ins),
- "WHILE", []>;
- def ENDLOOP : ILFormat< (outs), (ins),
- "ENDLOOP", []>;
- def FUNC : ILFormat< (outs), (ins),
- "FUNC", []>;
- def RETDYN : ILFormat< (outs), (ins),
- "RET_DYN", []>;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
- defm IFC : BranchInstr2<"IFC">;
- defm BREAKC : BranchInstr2<"BREAKC">;
- defm CONTINUEC : BranchInstr2<"CONTINUEC">;
-}
+++ /dev/null
-//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file contains the AMDIL Implementation of the IntrinsicInfo class.
-//
-//===-----------------------------------------------------------------------===//
-
-#include "AMDILIntrinsicInfo.h"
-#include "AMDIL.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-
-using namespace llvm;
-
-#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
- : TargetIntrinsicInfo()
-{
-}
-
-std::string
-AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
- unsigned int numTys) const
-{
- static const char* const names[] = {
-#define GET_INTRINSIC_NAME_TABLE
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_INTRINSIC_NAME_TABLE
- };
-
- //assert(!isOverloaded(IntrID)
- //&& "AMDGPU Intrinsics are not overloaded");
- if (IntrID < Intrinsic::num_intrinsics) {
- return 0;
- }
- assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
- && "Invalid intrinsic ID");
-
- std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
- return Result;
-}
-
-unsigned int
-AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
-{
-#define GET_FUNCTION_RECOGNIZER
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_FUNCTION_RECOGNIZER
- AMDGPUIntrinsic::ID IntrinsicID
- = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
- IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
-
- if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
- return IntrinsicID;
- }
- return 0;
-}
-
-bool
-AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const
-{
- // Overload Table
-#define GET_INTRINSIC_OVERLOAD_TABLE
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_INTRINSIC_OVERLOAD_TABLE
-}
-
-/// This defines the "getAttributes(ID id)" method.
-#define GET_INTRINSIC_ATTRIBUTES
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_INTRINSIC_ATTRIBUTES
-
-Function*
-AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- Type **Tys,
- unsigned numTys) const
-{
- //Silence a warning
- AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
- (void)List;
- assert(!"Not implemented");
-}
+++ /dev/null
-//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the AMDIL Implementation of the Intrinsic Info class.
-//
-//===-----------------------------------------------------------------------===//
-#ifndef _AMDIL_INTRINSICS_H_
-#define _AMDIL_INTRINSICS_H_
-
-#include "llvm/Intrinsics.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-
-namespace llvm {
- class TargetMachine;
- namespace AMDGPUIntrinsic {
- enum ID {
- last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
-#define GET_INTRINSIC_ENUM_VALUES
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_INTRINSIC_ENUM_VALUES
- , num_AMDGPU_intrinsics
- };
-
- }
-
-
- class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
- public:
- AMDGPUIntrinsicInfo(TargetMachine *tm);
- std::string getName(unsigned int IntrId, Type **Tys = 0,
- unsigned int numTys = 0) const;
- unsigned int lookupName(const char *Name, unsigned int Len) const;
- bool isOverloaded(unsigned int IID) const;
- Function *getDeclaration(Module *M, unsigned int ID,
- Type **Tys = 0,
- unsigned int numTys = 0) const;
- }; // AMDGPUIntrinsicInfo
-}
-
-#endif // _AMDIL_INTRINSICS_H_
-
+++ /dev/null
-//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file defines all of the amdil-specific intrinsics
-//
-//===---------------------------------------------------------------===//
-//===--------------------------------------------------------------------===//
-// Intrinsic classes
-// Generic versions of the above classes but for Target specific intrinsics
-// instead of SDNode patterns.
-//===--------------------------------------------------------------------===//
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- class VoidIntLong :
- Intrinsic<[llvm_i64_ty], [], []>;
- class VoidIntInt :
- Intrinsic<[llvm_i32_ty], [], []>;
- class VoidIntBool :
- Intrinsic<[llvm_i32_ty], [], []>;
- class UnaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class ConvertIntFTOI :
- Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
- class ConvertIntITOF :
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
- class UnaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty], []>;
- class UnaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty], []>;
- class BinaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
- class BinaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
- class TernaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class TernaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class QuaternaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
- class UnaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-}
-
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
-
- def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
- TernaryIntInt;
- def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
- TernaryIntInt;
- def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
- UnaryIntInt;
- def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
- UnaryIntInt;
- def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
- TernaryIntInt;
- def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
- TernaryIntInt;
- def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
- QuaternaryIntInt;
- def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
- TernaryIntInt;
- def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
- BinaryIntInt;
- def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
- TernaryIntInt;
- def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
- TernaryIntInt;
- def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
- TernaryIntFloat;
- def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
- BinaryIntInt;
- def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
- BinaryIntInt;
- def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
- BinaryIntInt;
- def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
- BinaryIntInt;
- def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
- TernaryIntInt;
- def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
- TernaryIntInt;
- def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
- BinaryIntInt;
- def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
- BinaryIntInt;
- def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
- BinaryIntInt;
- def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
- BinaryIntInt;
- def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
- BinaryIntFloat;
- def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
- BinaryIntInt;
- def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
- BinaryIntInt;
- def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
- BinaryIntFloat;
- def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
- TernaryIntInt;
- def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
- TernaryIntInt;
- def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
- TernaryIntInt;
- def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
- UnaryIntFloat;
- def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
- TernaryIntFloat;
- def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
- UnaryIntFloat;
- def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
- UnaryIntFloat;
- def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
- UnaryIntFloat;
- def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
- UnaryIntFloat;
- def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
- UnaryIntFloat;
- def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
- UnaryIntFloat;
- def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
- UnaryIntFloat;
- def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
- UnaryIntFloat;
- def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
- UnaryIntFloat;
- def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
- UnaryIntFloat;
- def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
- UnaryIntFloat;
- def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
- UnaryIntFloat;
- def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
- def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
- def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
- def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
- UnaryIntFloat;
- def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
- UnaryIntFloat;
- def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
- UnaryIntFloat;
- def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
- UnaryIntFloat;
- def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
- UnaryIntFloat;
- def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
- UnaryIntFloat;
- def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
- UnaryIntFloat;
- def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
- UnaryIntFloat;
- def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
- TernaryIntFloat;
- def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
- UnaryIntFloat;
- def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
- UnaryIntFloat;
- def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
- UnaryIntFloat;
- def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
- TernaryIntFloat;
- def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i32_ty], []>;
-
- def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
- Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
- def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
- def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
- def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
- ConvertIntITOF;
- def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
- ConvertIntFTOI;
- def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
- def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
- ConvertIntITOF;
- def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty, llvm_float_ty], []>;
- def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty], []>;
- def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
- def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
-}
+++ /dev/null
-//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILNIDevice.h"
-#include "AMDILEvergreenDevice.h"
-#include "AMDGPUSubtarget.h"
-
-using namespace llvm;
-
-AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
- : AMDGPUEvergreenDevice(ST)
-{
- std::string name = ST->getDeviceName();
- if (name == "caicos") {
- mDeviceFlag = OCL_DEVICE_CAICOS;
- } else if (name == "turks") {
- mDeviceFlag = OCL_DEVICE_TURKS;
- } else if (name == "cayman") {
- mDeviceFlag = OCL_DEVICE_CAYMAN;
- } else {
- mDeviceFlag = OCL_DEVICE_BARTS;
- }
-}
-AMDGPUNIDevice::~AMDGPUNIDevice()
-{
-}
-
-size_t
-AMDGPUNIDevice::getMaxLDSSize() const
-{
- if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_900;
- } else {
- return 0;
- }
-}
-
-uint32_t
-AMDGPUNIDevice::getGeneration() const
-{
- return AMDGPUDeviceInfo::HD6XXX;
-}
-
-
-AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
- : AMDGPUNIDevice(ST)
-{
- setCaps();
-}
-
-AMDGPUCaymanDevice::~AMDGPUCaymanDevice()
-{
-}
-
-void
-AMDGPUCaymanDevice::setCaps()
-{
- if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
- mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
- mHWBits.set(AMDGPUDeviceInfo::FMA);
- }
- mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
- mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
- mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
-}
-
+++ /dev/null
-//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===---------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===---------------------------------------------------------------------===//
-#ifndef _AMDILNIDEVICE_H_
-#define _AMDILNIDEVICE_H_
-#include "AMDILEvergreenDevice.h"
-#include "AMDGPUSubtarget.h"
-
-namespace llvm {
- class AMDGPUSubtarget;
-//===---------------------------------------------------------------------===//
-// NI generation of devices and their respective sub classes
-//===---------------------------------------------------------------------===//
-
-// The AMDGPUNIDevice is the base class for all Northern Island series of
-// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
-// exception being differences in wavefront size and hardware capabilities. The
-// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
-// integer operations
-
- class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
- public:
- AMDGPUNIDevice(AMDGPUSubtarget*);
- virtual ~AMDGPUNIDevice();
- virtual size_t getMaxLDSSize() const;
- virtual uint32_t getGeneration() const;
- protected:
- }; // AMDGPUNIDevice
-
-// Just as the AMDGPUCypressDevice is the double capable version of the
-// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of
-// the AMDGPUNIDevice. The other major difference that is not as useful from
-// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
-// NI family is a 5 wide.
-
- class AMDGPUCaymanDevice: public AMDGPUNIDevice {
- public:
- AMDGPUCaymanDevice(AMDGPUSubtarget*);
- virtual ~AMDGPUCaymanDevice();
- private:
- virtual void setCaps();
- }; // AMDGPUCaymanDevice
-
- static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
-} // namespace llvm
-#endif // _AMDILNIDEVICE_H_
+++ /dev/null
-//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-
-#include "AMDILDevices.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-
-#include <sstream>
-
-#if 0
-STATISTIC(PointerAssignments, "Number of dynamic pointer "
- "assigments discovered");
-STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
-#endif
-
-using namespace llvm;
-// The Peephole optimization pass is used to do simple last minute optimizations
-// that are required for correct code or to remove redundant functions
-namespace {
-
-class OpaqueType;
-
-class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
-public:
- TargetMachine &TM;
- static char ID;
- AMDGPUPeepholeOpt(TargetMachine &tm);
- ~AMDGPUPeepholeOpt();
- const char *getPassName() const;
- bool runOnFunction(Function &F);
- bool doInitialization(Module &M);
- bool doFinalization(Module &M);
- void getAnalysisUsage(AnalysisUsage &AU) const;
-protected:
-private:
- // Function to initiate all of the instruction level optimizations.
- bool instLevelOptimizations(BasicBlock::iterator *inst);
- // Quick check to see if we need to dump all of the pointers into the
- // arena. If this is correct, then we set all pointers to exist in arena. This
- // is a workaround for aliasing of pointers in a struct/union.
- bool dumpAllIntoArena(Function &F);
- // Because I don't want to invalidate any pointers while in the
- // safeNestedForEachFunction. I push atomic conversions to a vector and handle
- // it later. This function does the conversions if required.
- void doAtomicConversionIfNeeded(Function &F);
- // Because __amdil_is_constant cannot be properly evaluated if
- // optimizations are disabled, the call's are placed in a vector
- // and evaluated after the __amdil_image* functions are evaluated
- // which should allow the __amdil_is_constant function to be
- // evaluated correctly.
- void doIsConstCallConversionIfNeeded();
- bool mChanged;
- bool mDebug;
- bool mConvertAtomics;
- CodeGenOpt::Level optLevel;
- // Run a series of tests to see if we can optimize a CALL instruction.
- bool optimizeCallInst(BasicBlock::iterator *bbb);
- // A peephole optimization to optimize bit extract sequences.
- bool optimizeBitExtract(Instruction *inst);
- // A peephole optimization to optimize bit insert sequences.
- bool optimizeBitInsert(Instruction *inst);
- bool setupBitInsert(Instruction *base,
- Instruction *&src,
- Constant *&mask,
- Constant *&shift);
- // Expand the bit field insert instruction on versions of OpenCL that
- // don't support it.
- bool expandBFI(CallInst *CI);
- // Expand the bit field mask instruction on version of OpenCL that
- // don't support it.
- bool expandBFM(CallInst *CI);
- // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
- // this case we need to expand them. These functions check for 24bit functions
- // and then expand.
- bool isSigned24BitOps(CallInst *CI);
- void expandSigned24BitOps(CallInst *CI);
- // One optimization that can occur is that if the required workgroup size is
- // specified then the result of get_local_size is known at compile time and
- // can be returned accordingly.
- bool isRWGLocalOpt(CallInst *CI);
- // On northern island cards, the division is slightly less accurate than on
- // previous generations, so we need to utilize a more accurate division. So we
- // can translate the accurate divide to a normal divide on all other cards.
- bool convertAccurateDivide(CallInst *CI);
- void expandAccurateDivide(CallInst *CI);
- // If the alignment is set incorrectly, it can produce really inefficient
- // code. This checks for this scenario and fixes it if possible.
- bool correctMisalignedMemOp(Instruction *inst);
-
- // If we are in no opt mode, then we need to make sure that
- // local samplers are properly propagated as constant propagation
- // doesn't occur and we need to know the value of kernel defined
- // samplers at compile time.
- bool propagateSamplerInst(CallInst *CI);
-
- // Helper functions
-
- // Group of functions that recursively calculate the size of a structure based
- // on it's sub-types.
- size_t getTypeSize(Type * const T, bool dereferencePtr = false);
- size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
- size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
- size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
- size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
- size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
- size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
- size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
-
- LLVMContext *mCTX;
- Function *mF;
- const AMDGPUSubtarget *mSTM;
- SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
- SmallVector<CallInst *, 16> isConstVec;
-}; // class AMDGPUPeepholeOpt
- char AMDGPUPeepholeOpt::ID = 0;
-
-// A template function that has two levels of looping before calling the
-// function with a pointer to the current iterator.
-template<class InputIterator, class SecondIterator, class Function>
-Function safeNestedForEach(InputIterator First, InputIterator Last,
- SecondIterator S, Function F)
-{
- for ( ; First != Last; ++First) {
- SecondIterator sf, sl;
- for (sf = First->begin(), sl = First->end();
- sf != sl; ) {
- if (!F(&sf)) {
- ++sf;
- }
- }
- }
- return F;
-}
-
-} // anonymous namespace
-
-namespace llvm {
- FunctionPass *
- createAMDGPUPeepholeOpt(TargetMachine &tm)
- {
- return new AMDGPUPeepholeOpt(tm);
- }
-} // llvm namespace
-
-AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
- : FunctionPass(ID), TM(tm)
-{
- mDebug = false;
- optLevel = TM.getOptLevel();
-
-}
-
-AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()
-{
-}
-
-const char *
-AMDGPUPeepholeOpt::getPassName() const
-{
- return "AMDGPU PeepHole Optimization Pass";
-}
-
-bool
-containsPointerType(Type *Ty)
-{
- if (!Ty) {
- return false;
- }
- switch(Ty->getTypeID()) {
- default:
- return false;
- case Type::StructTyID: {
- const StructType *ST = dyn_cast<StructType>(Ty);
- for (StructType::element_iterator stb = ST->element_begin(),
- ste = ST->element_end(); stb != ste; ++stb) {
- if (!containsPointerType(*stb)) {
- continue;
- }
- return true;
- }
- break;
- }
- case Type::VectorTyID:
- case Type::ArrayTyID:
- return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
- case Type::PointerTyID:
- return true;
- };
- return false;
-}
-
-bool
-AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)
-{
- bool dumpAll = false;
- for (Function::const_arg_iterator cab = F.arg_begin(),
- cae = F.arg_end(); cab != cae; ++cab) {
- const Argument *arg = cab;
- const PointerType *PT = dyn_cast<PointerType>(arg->getType());
- if (!PT) {
- continue;
- }
- Type *DereferencedType = PT->getElementType();
- if (!dyn_cast<StructType>(DereferencedType)
- ) {
- continue;
- }
- if (!containsPointerType(DereferencedType)) {
- continue;
- }
- // FIXME: Because a pointer inside of a struct/union may be aliased to
- // another pointer we need to take the conservative approach and place all
- // pointers into the arena until more advanced detection is implemented.
- dumpAll = true;
- }
- return dumpAll;
-}
-void
-AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded()
-{
- if (isConstVec.empty()) {
- return;
- }
- for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
- CallInst *CI = isConstVec[x];
- Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
- Type *aType = Type::getInt32Ty(*mCTX);
- Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
- : ConstantInt::get(aType, 0);
- CI->replaceAllUsesWith(Val);
- CI->eraseFromParent();
- }
- isConstVec.clear();
-}
-void
-AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
-{
- // Don't do anything if we don't have any atomic operations.
- if (atomicFuncs.empty()) {
- return;
- }
- // Change the function name for the atomic if it is required
- uint32_t size = atomicFuncs.size();
- for (uint32_t x = 0; x < size; ++x) {
- atomicFuncs[x].first->setOperand(
- atomicFuncs[x].first->getNumOperands()-1,
- atomicFuncs[x].second);
-
- }
- mChanged = true;
- if (mConvertAtomics) {
- return;
- }
-}
-
-bool
-AMDGPUPeepholeOpt::runOnFunction(Function &MF)
-{
- mChanged = false;
- mF = &MF;
- mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
- if (mDebug) {
- MF.dump();
- }
- mCTX = &MF.getType()->getContext();
- mConvertAtomics = true;
- safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
- std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
- this));
-
- doAtomicConversionIfNeeded(MF);
- doIsConstCallConversionIfNeeded();
-
- if (mDebug) {
- MF.dump();
- }
- return mChanged;
-}
-
-bool
-AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
-{
- Instruction *inst = (*bbb);
- CallInst *CI = dyn_cast<CallInst>(inst);
- if (!CI) {
- return false;
- }
- if (isSigned24BitOps(CI)) {
- expandSigned24BitOps(CI);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- if (propagateSamplerInst(CI)) {
- return false;
- }
- if (expandBFI(CI) || expandBFM(CI)) {
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- if (convertAccurateDivide(CI)) {
- expandAccurateDivide(CI);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
-
- StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
- if (calleeName.startswith("__amdil_is_constant")) {
- // If we do not have optimizations, then this
- // cannot be properly evaluated, so we add the
- // call instruction to a vector and process
- // them at the end of processing after the
- // samplers have been correctly handled.
- if (optLevel == CodeGenOpt::None) {
- isConstVec.push_back(CI);
- return false;
- } else {
- Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
- Type *aType = Type::getInt32Ty(*mCTX);
- Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
- : ConstantInt::get(aType, 0);
- CI->replaceAllUsesWith(Val);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- }
-
- if (calleeName.equals("__amdil_is_asic_id_i32")) {
- ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
- Type *aType = Type::getInt32Ty(*mCTX);
- Value *Val = CV;
- if (Val) {
- Val = ConstantInt::get(aType,
- mSTM->device()->getDeviceFlag() & CV->getZExtValue());
- } else {
- Val = ConstantInt::get(aType, 0);
- }
- CI->replaceAllUsesWith(Val);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
- if (!F) {
- return false;
- }
- if (F->getName().startswith("__atom") && !CI->getNumUses()
- && F->getName().find("_xchg") == StringRef::npos) {
- std::string buffer(F->getName().str() + "_noret");
- F = dyn_cast<Function>(
- F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
- atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
- }
-
- if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
- && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
- return false;
- }
- if (!mConvertAtomics) {
- return false;
- }
- StringRef name = F->getName();
- if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
- mConvertAtomics = false;
- }
- return false;
-}
-
-bool
-AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
- Instruction *&src,
- Constant *&mask,
- Constant *&shift)
-{
- if (!base) {
- if (mDebug) {
- dbgs() << "Null pointer passed into function.\n";
- }
- return false;
- }
- bool andOp = false;
- if (base->getOpcode() == Instruction::Shl) {
- shift = dyn_cast<Constant>(base->getOperand(1));
- } else if (base->getOpcode() == Instruction::And) {
- mask = dyn_cast<Constant>(base->getOperand(1));
- andOp = true;
- } else {
- if (mDebug) {
- dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
- }
- // If the base is neither a Shl or a And, we don't fit any of the patterns above.
- return false;
- }
- src = dyn_cast<Instruction>(base->getOperand(0));
- if (!src) {
- if (mDebug) {
- dbgs() << "Failed setup since the base operand is not an instruction!\n";
- }
- return false;
- }
- // If we find an 'and' operation, then we don't need to
- // find the next operation as we already know the
- // bits that are valid at this point.
- if (andOp) {
- return true;
- }
- if (src->getOpcode() == Instruction::Shl && !shift) {
- shift = dyn_cast<Constant>(src->getOperand(1));
- src = dyn_cast<Instruction>(src->getOperand(0));
- } else if (src->getOpcode() == Instruction::And && !mask) {
- mask = dyn_cast<Constant>(src->getOperand(1));
- }
- if (!mask && !shift) {
- if (mDebug) {
- dbgs() << "Failed setup since both mask and shift are NULL!\n";
- }
- // Did not find a constant mask or a shift.
- return false;
- }
- return true;
-}
-bool
-AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)
-{
- if (!inst) {
- return false;
- }
- if (!inst->isBinaryOp()) {
- return false;
- }
- if (inst->getOpcode() != Instruction::Or) {
- return false;
- }
- if (optLevel == CodeGenOpt::None) {
- return false;
- }
- // We want to do an optimization on a sequence of ops that in the end equals a
- // single ISA instruction.
- // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
- // Some simplified versions of this pattern are as follows:
- // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
- // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
- // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
- // (A & B) | (D << F) when (1 << F) >= B
- // (A << C) | (D & E) when (1 << C) >= E
- if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
- // The HD4XXX hardware doesn't support the ubit_insert instruction.
- return false;
- }
- Type *aType = inst->getType();
- bool isVector = aType->isVectorTy();
- int numEle = 1;
- // This optimization only works on 32bit integers.
- if (aType->getScalarType()
- != Type::getInt32Ty(inst->getContext())) {
- return false;
- }
- if (isVector) {
- const VectorType *VT = dyn_cast<VectorType>(aType);
- numEle = VT->getNumElements();
- // We currently cannot support more than 4 elements in a intrinsic and we
- // cannot support Vec3 types.
- if (numEle > 4 || numEle == 3) {
- return false;
- }
- }
- // TODO: Handle vectors.
- if (isVector) {
- if (mDebug) {
- dbgs() << "!!! Vectors are not supported yet!\n";
- }
- return false;
- }
- Instruction *LHSSrc = NULL, *RHSSrc = NULL;
- Constant *LHSMask = NULL, *RHSMask = NULL;
- Constant *LHSShift = NULL, *RHSShift = NULL;
- Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
- Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
- if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
- if (mDebug) {
- dbgs() << "Found an OR Operation that failed setup!\n";
- inst->dump();
- if (LHS) { LHS->dump(); }
- if (LHSSrc) { LHSSrc->dump(); }
- if (LHSMask) { LHSMask->dump(); }
- if (LHSShift) { LHSShift->dump(); }
- }
- // There was an issue with the setup for BitInsert.
- return false;
- }
- if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
- if (mDebug) {
- dbgs() << "Found an OR Operation that failed setup!\n";
- inst->dump();
- if (RHS) { RHS->dump(); }
- if (RHSSrc) { RHSSrc->dump(); }
- if (RHSMask) { RHSMask->dump(); }
- if (RHSShift) { RHSShift->dump(); }
- }
- // There was an issue with the setup for BitInsert.
- return false;
- }
- if (mDebug) {
- dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
- dbgs() << "Op: "; inst->dump();
- dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
- }
- Constant *offset = NULL;
- Constant *width = NULL;
- int32_t lhsMaskVal = 0, rhsMaskVal = 0;
- int32_t lhsShiftVal = 0, rhsShiftVal = 0;
- int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
- int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
- lhsMaskVal = (int32_t)(LHSMask
- ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
- rhsMaskVal = (int32_t)(RHSMask
- ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
- lhsShiftVal = (int32_t)(LHSShift
- ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
- rhsShiftVal = (int32_t)(RHSShift
- ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
- lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
- rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
- lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
- rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
- // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
- if (mDebug) {
- dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
- dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
- dbgs() << (RHSMask ? " & E)" : ")");
- dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
- dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
- dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
- dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
- dbgs() << "width(B) = " << lhsMaskWidth;
- dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
- dbgs() << "offset(B) = " << lhsMaskOffset;
- dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
- dbgs() << "Constraints: \n";
- dbgs() << "\t(1) B ^ E == 0\n";
- dbgs() << "\t(2-LHS) B is a mask\n";
- dbgs() << "\t(2-LHS) E is a mask\n";
- dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
- dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
- }
- if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
- if (mDebug) {
- dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
- dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
- dbgs() << "Failed constraint 1!\n";
- }
- return false;
- }
- if (mDebug) {
- dbgs() << "LHS = " << lhsMaskOffset << "";
- dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
- dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
- dbgs() << "\nRHS = " << rhsMaskOffset << "";
- dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
- dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
- dbgs() << "\n";
- }
- if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
- offset = ConstantInt::get(aType, lhsMaskOffset, false);
- width = ConstantInt::get(aType, lhsMaskWidth, false);
- RHSSrc = RHS;
- if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
- if (mDebug) {
- dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
- dbgs() << "Failed constraint 2!\n";
- }
- return false;
- }
- if (!LHSShift) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", LHS);
- } else if (lhsShiftVal != lhsMaskOffset) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", LHS);
- }
- if (mDebug) {
- dbgs() << "Optimizing LHS!\n";
- }
- } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
- offset = ConstantInt::get(aType, rhsMaskOffset, false);
- width = ConstantInt::get(aType, rhsMaskWidth, false);
- LHSSrc = RHSSrc;
- RHSSrc = LHS;
- if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
- if (mDebug) {
- dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
- dbgs() << "Failed constraint 2!\n";
- }
- return false;
- }
- if (!RHSShift) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", RHS);
- } else if (rhsShiftVal != rhsMaskOffset) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", RHS);
- }
- if (mDebug) {
- dbgs() << "Optimizing RHS!\n";
- }
- } else {
- if (mDebug) {
- dbgs() << "Failed constraint 3!\n";
- }
- return false;
- }
- if (mDebug) {
- dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
- dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
- dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
- dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
- }
- if (!offset || !width) {
- if (mDebug) {
- dbgs() << "Either width or offset are NULL, failed detection!\n";
- }
- return false;
- }
- // Lets create the function signature.
- std::vector<Type *> callTypes;
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- FunctionType *funcType = FunctionType::get(aType, callTypes, false);
- std::string name = "__amdil_ubit_insert";
- if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
- Function *Func =
- dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
- Value *Operands[4] = {
- width,
- offset,
- LHSSrc,
- RHSSrc
- };
- CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
- if (mDebug) {
- dbgs() << "Old Inst: ";
- inst->dump();
- dbgs() << "New Inst: ";
- CI->dump();
- dbgs() << "\n\n";
- }
- CI->insertBefore(inst);
- inst->replaceAllUsesWith(CI);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)
-{
- if (!inst) {
- return false;
- }
- if (!inst->isBinaryOp()) {
- return false;
- }
- if (inst->getOpcode() != Instruction::And) {
- return false;
- }
- if (optLevel == CodeGenOpt::None) {
- return false;
- }
- // We want to do some simple optimizations on Shift right/And patterns. The
- // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
- // value smaller than 32 and C is a mask. If C is a constant value, then the
- // following transformation can occur. For signed integers, it turns into the
- // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
- // integers, it turns into the function call dst =
- // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
- // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
- // Evergreen hardware.
- if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
- // This does not work on HD4XXX hardware.
- return false;
- }
- Type *aType = inst->getType();
- bool isVector = aType->isVectorTy();
-
- // XXX Support vector types
- if (isVector) {
- return false;
- }
- int numEle = 1;
- // This only works on 32bit integers
- if (aType->getScalarType()
- != Type::getInt32Ty(inst->getContext())) {
- return false;
- }
- if (isVector) {
- const VectorType *VT = dyn_cast<VectorType>(aType);
- numEle = VT->getNumElements();
- // We currently cannot support more than 4 elements in a intrinsic and we
- // cannot support Vec3 types.
- if (numEle > 4 || numEle == 3) {
- return false;
- }
- }
- BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
- // If the first operand is not a shift instruction, then we can return as it
- // doesn't match this pattern.
- if (!ShiftInst || !ShiftInst->isShift()) {
- return false;
- }
- // If we are a shift left, then we need don't match this pattern.
- if (ShiftInst->getOpcode() == Instruction::Shl) {
- return false;
- }
- bool isSigned = ShiftInst->isArithmeticShift();
- Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
- Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
- // Lets make sure that the shift value and the and mask are constant integers.
- if (!AndMask || !ShrVal) {
- return false;
- }
- Constant *newMaskConst;
- Constant *shiftValConst;
- if (isVector) {
- // Handle the vector case
- std::vector<Constant *> maskVals;
- std::vector<Constant *> shiftVals;
- ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
- ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
- Type *scalarType = AndMaskVec->getType()->getScalarType();
- assert(AndMaskVec->getNumOperands() ==
- ShrValVec->getNumOperands() && "cannot have a "
- "combination where the number of elements to a "
- "shift and an and are different!");
- for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
- ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
- ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
- if (!AndCI || !ShiftIC) {
- return false;
- }
- uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
- if (!isMask_32(maskVal)) {
- return false;
- }
- maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
- uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
- // If the mask or shiftval is greater than the bitcount, then break out.
- if (maskVal >= 32 || shiftVal >= 32) {
- return false;
- }
- // If the mask val is greater than the the number of original bits left
- // then this optimization is invalid.
- if (maskVal > (32 - shiftVal)) {
- return false;
- }
- maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
- shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
- }
- newMaskConst = ConstantVector::get(maskVals);
- shiftValConst = ConstantVector::get(shiftVals);
- } else {
- // Handle the scalar case
- uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
- // This must be a mask value where all lower bits are set to 1 and then any
- // bit higher is set to 0.
- if (!isMask_32(maskVal)) {
- return false;
- }
- maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
- // Count the number of bits set in the mask, this is the width of the
- // resulting bit set that is extracted from the source value.
- uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
- // If the mask or shift val is greater than the bitcount, then break out.
- if (maskVal >= 32 || shiftVal >= 32) {
- return false;
- }
- // If the mask val is greater than the the number of original bits left then
- // this optimization is invalid.
- if (maskVal > (32 - shiftVal)) {
- return false;
- }
- newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
- shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
- }
- // Lets create the function signature.
- std::vector<Type *> callTypes;
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- FunctionType *funcType = FunctionType::get(aType, callTypes, false);
- std::string name = "llvm.AMDIL.bit.extract.u32";
- if (isVector) {
- name += ".v" + itostr(numEle) + "i32";
- } else {
- name += ".";
- }
- // Lets create the function.
- Function *Func =
- dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
- Value *Operands[3] = {
- ShiftInst->getOperand(0),
- shiftValConst,
- newMaskConst
- };
- // Lets create the Call with the operands
- CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
- CI->setDoesNotAccessMemory();
- CI->insertBefore(inst);
- inst->replaceAllUsesWith(CI);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFI(CallInst *CI)
-{
- if (!CI) {
- return false;
- }
- Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
- if (!LHS->getName().startswith("__amdil_bfi")) {
- return false;
- }
- Type* type = CI->getOperand(0)->getType();
- Constant *negOneConst = NULL;
- if (type->isVectorTy()) {
- std::vector<Constant *> negOneVals;
- negOneConst = ConstantInt::get(CI->getContext(),
- APInt(32, StringRef("-1"), 10));
- for (size_t x = 0,
- y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
- negOneVals.push_back(negOneConst);
- }
- negOneConst = ConstantVector::get(negOneVals);
- } else {
- negOneConst = ConstantInt::get(CI->getContext(),
- APInt(32, StringRef("-1"), 10));
- }
- // __amdil_bfi => (A & B) | (~A & C)
- BinaryOperator *lhs =
- BinaryOperator::Create(Instruction::And, CI->getOperand(0),
- CI->getOperand(1), "bfi_and", CI);
- BinaryOperator *rhs =
- BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
- "bfi_not", CI);
- rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
- "bfi_and", CI);
- lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
- CI->replaceAllUsesWith(lhs);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFM(CallInst *CI)
-{
- if (!CI) {
- return false;
- }
- Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
- if (!LHS->getName().startswith("__amdil_bfm")) {
- return false;
- }
- // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
- Constant *newMaskConst = NULL;
- Constant *newShiftConst = NULL;
- Type* type = CI->getOperand(0)->getType();
- if (type->isVectorTy()) {
- std::vector<Constant*> newMaskVals, newShiftVals;
- newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
- newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
- for (size_t x = 0,
- y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
- newMaskVals.push_back(newMaskConst);
- newShiftVals.push_back(newShiftConst);
- }
- newMaskConst = ConstantVector::get(newMaskVals);
- newShiftConst = ConstantVector::get(newShiftVals);
- } else {
- newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
- newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
- }
- BinaryOperator *lhs =
- BinaryOperator::Create(Instruction::And, CI->getOperand(0),
- newMaskConst, "bfm_mask", CI);
- lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
- lhs, "bfm_shl", CI);
- lhs = BinaryOperator::Create(Instruction::Sub, lhs,
- newShiftConst, "bfm_sub", CI);
- BinaryOperator *rhs =
- BinaryOperator::Create(Instruction::And, CI->getOperand(1),
- newMaskConst, "bfm_mask", CI);
- lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
- CI->replaceAllUsesWith(lhs);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
-{
- Instruction *inst = (*bbb);
- if (optimizeCallInst(bbb)) {
- return true;
- }
- if (optimizeBitExtract(inst)) {
- return false;
- }
- if (optimizeBitInsert(inst)) {
- return false;
- }
- if (correctMisalignedMemOp(inst)) {
- return false;
- }
- return false;
-}
-bool
-AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
-{
- LoadInst *linst = dyn_cast<LoadInst>(inst);
- StoreInst *sinst = dyn_cast<StoreInst>(inst);
- unsigned alignment;
- Type* Ty = inst->getType();
- if (linst) {
- alignment = linst->getAlignment();
- Ty = inst->getType();
- } else if (sinst) {
- alignment = sinst->getAlignment();
- Ty = sinst->getValueOperand()->getType();
- } else {
- return false;
- }
- unsigned size = getTypeSize(Ty);
- if (size == alignment || size < alignment) {
- return false;
- }
- if (!Ty->isStructTy()) {
- return false;
- }
- if (alignment < 4) {
- if (linst) {
- linst->setAlignment(0);
- return true;
- } else if (sinst) {
- sinst->setAlignment(0);
- return true;
- }
- }
- return false;
-}
-bool
-AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)
-{
- if (!CI) {
- return false;
- }
- Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
- std::string namePrefix = LHS->getName().substr(0, 14);
- if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
- && namePrefix != "__amdil__imul24_high") {
- return false;
- }
- if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
- return false;
- }
- return true;
-}
-
-void
-AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)
-{
- assert(isSigned24BitOps(CI) && "Must be a "
- "signed 24 bit operation to call this function!");
- Value *LHS = CI->getOperand(CI->getNumOperands()-1);
- // On 7XX and 8XX we do not have signed 24bit, so we need to
- // expand it to the following:
- // imul24 turns into 32bit imul
- // imad24 turns into 32bit imad
- // imul24_high turns into 32bit imulhigh
- if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
- Type *aType = CI->getOperand(0)->getType();
- bool isVector = aType->isVectorTy();
- int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
- std::vector<Type*> callTypes;
- callTypes.push_back(CI->getOperand(0)->getType());
- callTypes.push_back(CI->getOperand(1)->getType());
- callTypes.push_back(CI->getOperand(2)->getType());
- FunctionType *funcType =
- FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
- std::string name = "__amdil_imad";
- if (isVector) {
- name += "_v" + itostr(numEle) + "i32";
- } else {
- name += "_i32";
- }
- Function *Func = dyn_cast<Function>(
- CI->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
- Value *Operands[3] = {
- CI->getOperand(0),
- CI->getOperand(1),
- CI->getOperand(2)
- };
- CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
- nCI->insertBefore(CI);
- CI->replaceAllUsesWith(nCI);
- } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
- BinaryOperator *mulOp =
- BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
- CI->getOperand(1), "imul24", CI);
- CI->replaceAllUsesWith(mulOp);
- } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
- Type *aType = CI->getOperand(0)->getType();
-
- bool isVector = aType->isVectorTy();
- int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
- std::vector<Type*> callTypes;
- callTypes.push_back(CI->getOperand(0)->getType());
- callTypes.push_back(CI->getOperand(1)->getType());
- FunctionType *funcType =
- FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
- std::string name = "__amdil_imul_high";
- if (isVector) {
- name += "_v" + itostr(numEle) + "i32";
- } else {
- name += "_i32";
- }
- Function *Func = dyn_cast<Function>(
- CI->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
- Value *Operands[2] = {
- CI->getOperand(0),
- CI->getOperand(1)
- };
- CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
- nCI->insertBefore(CI);
- CI->replaceAllUsesWith(nCI);
- }
-}
-
-bool
-AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)
-{
- return (CI != NULL
- && CI->getOperand(CI->getNumOperands() - 1)->getName()
- == "__amdil_get_local_size_int");
-}
-
-bool
-AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)
-{
- if (!CI) {
- return false;
- }
- if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
- && (mSTM->getDeviceName() == "cayman")) {
- return false;
- }
- return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
- == "__amdil_improved_div";
-}
-
-void
-AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)
-{
- assert(convertAccurateDivide(CI)
- && "expanding accurate divide can only happen if it is expandable!");
- BinaryOperator *divOp =
- BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
- CI->getOperand(1), "fdiv32", CI);
- CI->replaceAllUsesWith(divOp);
-}
-
-bool
-AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI)
-{
- if (optLevel != CodeGenOpt::None) {
- return false;
- }
-
- if (!CI) {
- return false;
- }
-
- unsigned funcNameIdx = 0;
- funcNameIdx = CI->getNumOperands() - 1;
- StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
- if (calleeName != "__amdil_image2d_read_norm"
- && calleeName != "__amdil_image2d_read_unnorm"
- && calleeName != "__amdil_image3d_read_norm"
- && calleeName != "__amdil_image3d_read_unnorm") {
- return false;
- }
-
- unsigned samplerIdx = 2;
- samplerIdx = 1;
- Value *sampler = CI->getOperand(samplerIdx);
- LoadInst *lInst = dyn_cast<LoadInst>(sampler);
- if (!lInst) {
- return false;
- }
-
- if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
- return false;
- }
-
- GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
- // If we are loading from what is not a global value, then we
- // fail and return.
- if (!gv) {
- return false;
- }
-
- // If we don't have an initializer or we have an initializer and
- // the initializer is not a 32bit integer, we fail.
- if (!gv->hasInitializer()
- || !gv->getInitializer()->getType()->isIntegerTy(32)) {
- return false;
- }
-
- // Now that we have the global variable initializer, lets replace
- // all uses of the load instruction with the samplerVal and
- // reparse the __amdil_is_constant() function.
- Constant *samplerVal = gv->getInitializer();
- lInst->replaceAllUsesWith(samplerVal);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::doInitialization(Module &M)
-{
- return false;
-}
-
-bool
-AMDGPUPeepholeOpt::doFinalization(Module &M)
-{
- return false;
-}
-
-void
-AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
-{
- AU.addRequired<MachineFunctionAnalysis>();
- FunctionPass::getAnalysisUsage(AU);
- AU.setPreservesAll();
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
- size_t size = 0;
- if (!T) {
- return size;
- }
- switch (T->getTypeID()) {
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- case Type::LabelTyID:
- assert(0 && "These types are not supported by this backend");
- default:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- size = T->getPrimitiveSizeInBits() >> 3;
- break;
- case Type::PointerTyID:
- size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
- break;
- case Type::IntegerTyID:
- size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
- break;
- case Type::StructTyID:
- size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
- break;
- case Type::ArrayTyID:
- size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
- break;
- case Type::FunctionTyID:
- size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
- break;
- case Type::VectorTyID:
- size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
- break;
- };
- return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
- bool dereferencePtr) {
- size_t size = 0;
- if (!ST) {
- return size;
- }
- Type *curType;
- StructType::element_iterator eib;
- StructType::element_iterator eie;
- for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
- curType = *eib;
- size += getTypeSize(curType, dereferencePtr);
- }
- return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
- bool dereferencePtr) {
- return IT ? (IT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
- bool dereferencePtr) {
- assert(0 && "Should not be able to calculate the size of an function type");
- return 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
- bool dereferencePtr) {
- return (size_t)(AT ? (getTypeSize(AT->getElementType(),
- dereferencePtr) * AT->getNumElements())
- : 0);
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
- bool dereferencePtr) {
- return VT ? (VT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
- bool dereferencePtr) {
- if (!PT) {
- return 0;
- }
- Type *CT = PT->getElementType();
- if (CT->getTypeID() == Type::StructTyID &&
- PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
- return getTypeSize(dyn_cast<StructType>(CT));
- } else if (dereferencePtr) {
- size_t size = 0;
- for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
- size += getTypeSize(PT->getContainedType(x), dereferencePtr);
- }
- return size;
- } else {
- return 4;
- }
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
- bool dereferencePtr) {
- //assert(0 && "Should not be able to calculate the size of an opaque type");
- return 4;
-}
+++ /dev/null
-//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Declarations that describe the AMDIL register file
-//
-//===----------------------------------------------------------------------===//
-
-class AMDILReg<bits<16> num, string n> : Register<n> {
- field bits<16> Value;
- let Value = num;
- let Namespace = "AMDGPU";
-}
-
-// We will start with 8 registers for each class before expanding to more
-// Since the swizzle is added based on the register class, we can leave it
-// off here and just specify different registers for different register classes
-def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
-def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
-def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
-def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
-def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
-def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
-def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
-def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
-def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
-def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
-def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
-def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
-def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
-def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
-def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
-def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
-def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
-def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
-def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
-def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
-
-// All registers between 1000 and 1024 are reserved and cannot be used
-// unless commented in this section
-// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
-// r1020 is used to hold the frame index for local arrays
-// r1019 is used to hold the dynamic stack allocation pointer
-// r1018 is used as a temporary register for handwritten code
-// r1017 is used as a temporary register for handwritten code
-// r1016 is used as a temporary register for load/store code
-// r1015 is used as a temporary register for data segment offset
-// r1014 is used as a temporary register for store code
-// r1013 is used as the section data pointer register
-// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
-// r1009 is used as the frame pointer register
-// r999 is used as the mem register.
-// r998 is used as the return address register.
-//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
-//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
-//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
-//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
-//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
-//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
-def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
-def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
-def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
-def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
-def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
-def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
-def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
-def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
-def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
-def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
-def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
-def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
-def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
-def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
-def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
-def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
-def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
-def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
-def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
-def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
-def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
-def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
-def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
-{
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
-def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
-{
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
-def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
-{
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
+++ /dev/null
-//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILSIDevice.h"
-#include "AMDILEvergreenDevice.h"
-#include "AMDILNIDevice.h"
-#include "AMDGPUSubtarget.h"
-
-using namespace llvm;
-
-AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
- : AMDGPUEvergreenDevice(ST)
-{
-}
-AMDGPUSIDevice::~AMDGPUSIDevice()
-{
-}
-
-size_t
-AMDGPUSIDevice::getMaxLDSSize() const
-{
- if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_900;
- } else {
- return 0;
- }
-}
-
-uint32_t
-AMDGPUSIDevice::getGeneration() const
-{
- return AMDGPUDeviceInfo::HD7XXX;
-}
-
-std::string
-AMDGPUSIDevice::getDataLayout() const
-{
- return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
-}
+++ /dev/null
-//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===---------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===---------------------------------------------------------------------===//
-#ifndef _AMDILSIDEVICE_H_
-#define _AMDILSIDEVICE_H_
-#include "AMDILEvergreenDevice.h"
-#include "AMDGPUSubtarget.h"
-
-namespace llvm {
- class AMDGPUSubtarget;
-//===---------------------------------------------------------------------===//
-// SI generation of devices and their respective sub classes
-//===---------------------------------------------------------------------===//
-
-// The AMDGPUSIDevice is the base class for all Northern Island series of
-// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
-// exception being differences in wavefront size and hardware capabilities. The
-// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
-// integer operations
-
- class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
- public:
- AMDGPUSIDevice(AMDGPUSubtarget*);
- virtual ~AMDGPUSIDevice();
- virtual size_t getMaxLDSSize() const;
- virtual uint32_t getGeneration() const;
- virtual std::string getDataLayout() const;
- protected:
- }; // AMDGPUSIDevice
-
-} // namespace llvm
-#endif // _AMDILSIDEVICE_H_
+++ /dev/null
-//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file provides helper macros for expanding case statements.
-//
-//===----------------------------------------------------------------------===//
-#ifndef AMDILUTILITYFUNCTIONS_H_
-#define AMDILUTILITYFUNCTIONS_H_
-
-// Macros that are used to help with switch statements for various data types
-// However, these macro's do not return anything unlike the second set below.
-#define ExpandCaseTo32bitIntTypes(Instr) \
-case Instr##_i32:
-
-#define ExpandCaseTo32bitIntTruncTypes(Instr) \
-case Instr##_i32i8: \
-case Instr##_i32i16:
-
-#define ExpandCaseToIntTypes(Instr) \
- ExpandCaseTo32bitIntTypes(Instr)
-
-#define ExpandCaseToIntTruncTypes(Instr) \
- ExpandCaseTo32bitIntTruncTypes(Instr)
-
-#define ExpandCaseToFloatTypes(Instr) \
- case Instr##_f32:
-
-#define ExpandCaseTo32bitScalarTypes(Instr) \
- ExpandCaseTo32bitIntTypes(Instr) \
-case Instr##_f32:
-
-#define ExpandCaseToAllScalarTypes(Instr) \
- ExpandCaseToFloatTypes(Instr) \
-ExpandCaseToIntTypes(Instr)
-
-#define ExpandCaseToAllScalarTruncTypes(Instr) \
- ExpandCaseToFloatTruncTypes(Instr) \
-ExpandCaseToIntTruncTypes(Instr)
-
-#define ExpandCaseToAllTypes(Instr) \
-ExpandCaseToAllScalarTypes(Instr)
-
-#define ExpandCaseToAllTruncTypes(Instr) \
-ExpandCaseToAllScalarTruncTypes(Instr)
-
-// Macros that expand into statements with return values
-#define ExpandCaseTo32bitIntReturn(Instr, Return) \
-case Instr##_i32: return Return##_i32;
-
-#define ExpandCaseToIntReturn(Instr, Return) \
- ExpandCaseTo32bitIntReturn(Instr, Return)
-
-#define ExpandCaseToFloatReturn(Instr, Return) \
- case Instr##_f32: return Return##_f32;\
-
-#define ExpandCaseToAllScalarReturn(Instr, Return) \
- ExpandCaseToFloatReturn(Instr, Return) \
-ExpandCaseToIntReturn(Instr, Return)
-
-// These macros expand to common groupings of RegClass ID's
-#define ExpandCaseTo1CompRegID \
-case AMDGPU::GPRI32RegClassID: \
-case AMDGPU::GPRF32RegClassID:
-
-#define ExpandCaseTo32BitType(Instr) \
-case Instr##_i32: \
-case Instr##_f32:
-
-#endif // AMDILUTILITYFUNCTIONS_H_
+++ /dev/null
-
-#include "AMDGPUInstPrinter.h"
-#include "llvm/MC/MCInst.h"
-
-using namespace llvm;
-
-void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot) {
- printInstruction(MI, OS);
-
- printAnnotation(OS, Annot);
-}
-
-void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
-
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- O << getRegisterName(Op.getReg());
- } else if (Op.isImm()) {
- O << Op.getImm();
- } else if (Op.isFPImm()) {
- O << Op.getFPImm();
- } else {
- assert(!"unknown operand type in printOperand");
- }
-}
-
-void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printOperand(MI, OpNo, O);
-}
-
-#include "AMDGPUGenAsmWriter.inc"
+++ /dev/null
-
-#ifndef AMDGPUINSTPRINTER_H
-#define AMDGPUINSTPRINTER_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-
-class AMDGPUInstPrinter : public MCInstPrinter {
-public:
- AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
-
- //Autogenerated by tblgen
- void printInstruction(const MCInst *MI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
-// virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-
-private:
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-// void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
- void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-
-};
-
-} // End namespace llvm
-
-#endif // AMDGPUINSTRPRINTER_H
+++ /dev/null
-==============================================================================
-LLVM Release License
-==============================================================================
-University of Illinois/NCSA
-Open Source License
-
-Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
-All rights reserved.
-
-Developed by:
-
- LLVM Team
-
- University of Illinois at Urbana-Champaign
-
- http://llvm.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal with
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimers.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimers in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the names of the LLVM Team, University of Illinois at
- Urbana-Champaign, nor the names of its contributors may be used to
- endorse or promote products derived from this Software without specific
- prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-SOFTWARE.
+++ /dev/null
-//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUMCObjectWriter : public MCObjectWriter {
-public:
- AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
- virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- //XXX: Implement if necessary.
- }
- virtual void RecordRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) {
- assert(!"Not implemented");
- }
-
- virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
-
-};
-
-class AMDGPUAsmBackend : public MCAsmBackend {
-public:
- AMDGPUAsmBackend(const Target &T)
- : MCAsmBackend() {}
-
- virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
- virtual unsigned getNumFixupKinds() const { return 0; };
- virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const { assert(!"Not implemented"); }
- virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCInstFragment *DF,
- const MCAsmLayout &Layout) const {
- return false;
- }
- virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
- assert(!"Not implemented");
- }
- virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
- virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- return true;
- }
-};
-
-} //End anonymous namespace
-
-void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
- Asm.writeSectionData(I, Layout);
- }
-}
-
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) {
- return new AMDGPUAsmBackend(T);
-}
-
-AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
- raw_ostream &OS) const {
- return new AMDGPUMCObjectWriter(OS);
-}
+++ /dev/null
-//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUMCAsmInfo.h"
-
-using namespace llvm;
-AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
-{
- HasSingleParameterDotFile = false;
- WeakDefDirective = 0;
- //===------------------------------------------------------------------===//
- HasSubsectionsViaSymbols = true;
- HasMachoZeroFillDirective = false;
- HasMachoTBSSDirective = false;
- HasStaticCtorDtorReferenceInStaticMode = false;
- LinkerRequiresNonEmptyDwarfLines = true;
- MaxInstLength = 16;
- PCSymbol = "$";
- SeparatorString = "\n";
- CommentColumn = 40;
- CommentString = ";";
- LabelSuffix = ":";
- GlobalPrefix = "@";
- PrivateGlobalPrefix = ";.";
- LinkerPrivateGlobalPrefix = "!";
- InlineAsmStart = ";#ASMSTART";
- InlineAsmEnd = ";#ASMEND";
- AssemblerDialect = 0;
- AllowQuotesInName = false;
- AllowNameToStartWithDigit = false;
- AllowPeriodsInName = false;
-
- //===--- Data Emission Directives -------------------------------------===//
- ZeroDirective = ".zero";
- AsciiDirective = ".ascii\t";
- AscizDirective = ".asciz\t";
- Data8bitsDirective = ".byte\t";
- Data16bitsDirective = ".short\t";
- Data32bitsDirective = ".long\t";
- Data64bitsDirective = ".quad\t";
- GPRel32Directive = 0;
- SunStyleELFSectionSwitchSyntax = true;
- UsesELFSectionDirectiveForBSS = true;
- HasMicrosoftFastStdCallMangling = false;
-
- //===--- Alignment Information ----------------------------------------===//
- AlignDirective = ".align\t";
- AlignmentIsInBytes = true;
- TextAlignFillValue = 0;
-
- //===--- Global Variable Emission Directives --------------------------===//
- GlobalDirective = ".global";
- ExternDirective = ".extern";
- HasSetDirective = false;
- HasAggressiveSymbolFolding = true;
- LCOMMDirectiveType = LCOMM::None;
- COMMDirectiveAlignmentIsInBytes = false;
- HasDotTypeDotSizeDirective = false;
- HasNoDeadStrip = true;
- HasSymbolResolver = false;
- WeakRefDirective = ".weakref\t";
- LinkOnceDirective = 0;
- //===--- Dwarf Emission Directives -----------------------------------===//
- HasLEB128 = true;
- SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::None;
- DwarfUsesInlineInfoSection = false;
- DwarfSectionOffsetDirective = ".offset";
- DwarfUsesLabelOffsetForRanges = true;
-
- //===--- CBE Asm Translation Table -----------------------------------===//
- AsmTransCBE = 0;
-}
-const char*
-AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
-{
- switch (AS) {
- default:
- return 0;
- case 0:
- return 0;
- };
- return 0;
-}
-
-const MCSection*
-AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
-{
- return 0;
-}
+++ /dev/null
-//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUMCASMINFO_H_
-#define AMDGPUMCASMINFO_H_
-
-#include "llvm/MC/MCAsmInfo.h"
-namespace llvm {
- class Target;
- class StringRef;
-
- class AMDGPUMCAsmInfo : public MCAsmInfo {
- public:
- explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
- const char*
- getDataASDirective(unsigned int Size, unsigned int AS) const;
- const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
- };
-} // namespace llvm
-#endif // AMDGPUMCASMINFO_H_
+++ /dev/null
-//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// CodeEmitter interface for R600 and SI codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUCODEEMITTER_H
-#define AMDGPUCODEEMITTER_H
-
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-
- class MCInst;
- class MCOperand;
-
- class AMDGPUMCCodeEmitter : public MCCodeEmitter {
- public:
-
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
- virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
-
- virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
- virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
- virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
- return Value;
- }
- virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
- virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
- };
-
-} // End namespace llvm
-
-#endif // AMDGPUCODEEMITTER_H
+++ /dev/null
-//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides AMDGPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUMCTargetDesc.h"
-#include "AMDGPUMCAsmInfo.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "AMDGPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "AMDGPUGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "AMDGPUGenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createAMDGPUMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitAMDGPUMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitAMDGPUMCRegisterInfo(X, 0);
- return X;
-}
-
-static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo * X = new MCSubtargetInfo();
- InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM, OL);
- return X;
-}
-
-static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- return new AMDGPUInstPrinter(MAI, MII, MRI);
-}
-
-static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
- return createSIMCCodeEmitter(MCII, STI, Ctx);
- } else {
- return createR600MCCodeEmitter(MCII, STI, Ctx);
- }
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- return createPureStreamer(Ctx, MAB, _OS, _Emitter);
-}
-
-extern "C" void LLVMInitializeAMDGPUTargetMC() {
-
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
-
- TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
-
- TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
-
- TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
-
- TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
-
- TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
-
- TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
-
- TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
-
- TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
-}
+++ /dev/null
-//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides AMDGPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#ifndef AMDGPUMCTARGETDESC_H
-#define AMDGPUMCTARGETDESC_H
-
-#include "llvm/ADT/StringRef.h"
-
-namespace llvm {
-class MCAsmBackend;
-class MCCodeEmitter;
-class MCContext;
-class MCInstrInfo;
-class MCRegisterInfo;
-class MCSubtargetInfo;
-class Target;
-
-extern Target TheAMDGPUTarget;
-
-MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
-MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
-MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT);
-} // End llvm namespace
-
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-
-#define GET_INSTRINFO_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "AMDGPUGenSubtargetInfo.inc"
-
-#endif // AMDGPUMCTARGETDESC_H
+++ /dev/null
-//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This code emitters outputs bytecode that is understood by the r600g driver
-// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
-// except that the size of the instruction fields are rounded up to the
-// nearest byte.
-//
-// [1] http://www.mesa3d.org/
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600Defines.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <stdio.h>
-
-#define SRC_BYTE_COUNT 11
-#define DST_BYTE_COUNT 5
-
-using namespace llvm;
-
-namespace {
-
-class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
- R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
- const MCInstrInfo &MCII;
- const MCSubtargetInfo &STI;
- MCContext &Ctx;
-
-public:
-
- R600MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : MCII(mcii), STI(sti), Ctx(ctx) { }
-
- /// EncodeInstruction - Encode the instruction and write it to the OS.
- virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
- /// getMachineOpValue - Reutrn the encoding for an MCOperand.
- virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const;
-private:
-
- void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const;
- void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
- void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
- raw_ostream &OS) const;
- void EmitDst(const MCInst &MI, raw_ostream &OS) const;
- void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const;
- void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
-
- void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
-
- void EmitByte(unsigned int byte, raw_ostream &OS) const;
-
- void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
-
- void Emit(uint32_t value, raw_ostream &OS) const;
- void Emit(uint64_t value, raw_ostream &OS) const;
-
- unsigned getHWRegIndex(unsigned reg) const;
- unsigned getHWRegChan(unsigned reg) const;
- unsigned getHWReg(unsigned regNo) const;
-
- bool isFCOp(unsigned opcode) const;
- bool isTexOp(unsigned opcode) const;
- bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
-
- /// getHWRegIndexGen - Get the register's hardware index. Implemented in
- /// R600HwRegInfo.include.
- unsigned getHWRegIndexGen(unsigned int Reg) const;
-
- /// getHWRegChanGen - Get the register's channel. Implemented in
- /// R600HwRegInfo.include.
- unsigned getHWRegChanGen(unsigned int Reg) const;
-};
-
-} // End anonymous namespace
-
-enum RegElement {
- ELEMENT_X = 0,
- ELEMENT_Y,
- ELEMENT_Z,
- ELEMENT_W
-};
-
-enum InstrTypes {
- INSTR_ALU = 0,
- INSTR_TEX,
- INSTR_FC,
- INSTR_NATIVE,
- INSTR_VTX
-};
-
-enum FCInstr {
- FC_IF = 0,
- FC_IF_INT,
- FC_ELSE,
- FC_ENDIF,
- FC_BGNLOOP,
- FC_ENDLOOP,
- FC_BREAK,
- FC_BREAK_NZ_INT,
- FC_CONTINUE,
- FC_BREAK_Z_INT,
- FC_BREAK_NZ
-};
-
-enum TextureTypes {
- TEXTURE_1D = 1,
- TEXTURE_2D,
- TEXTURE_3D,
- TEXTURE_CUBE,
- TEXTURE_RECT,
- TEXTURE_SHADOW1D,
- TEXTURE_SHADOW2D,
- TEXTURE_SHADOWRECT,
- TEXTURE_1D_ARRAY,
- TEXTURE_2D_ARRAY,
- TEXTURE_SHADOW1D_ARRAY,
- TEXTURE_SHADOW2D_ARRAY
-};
-
-MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new R600MCCodeEmitter(MCII, STI, Ctx);
-}
-
-void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
- if (isTexOp(MI.getOpcode())) {
- EmitTexInstr(MI, Fixups, OS);
- } else if (isFCOp(MI.getOpcode())){
- EmitFCInstr(MI, OS);
- } else if (MI.getOpcode() == AMDGPU::RETURN ||
- MI.getOpcode() == AMDGPU::BUNDLE ||
- MI.getOpcode() == AMDGPU::KILL) {
- return;
- } else {
- switch(MI.getOpcode()) {
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
- {
- uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
- EmitByte(INSTR_NATIVE, OS);
- Emit(inst, OS);
- break;
- }
- case AMDGPU::CONSTANT_LOAD_eg:
- case AMDGPU::VTX_READ_PARAM_i32_eg:
- case AMDGPU::VTX_READ_PARAM_f32_eg:
- case AMDGPU::VTX_READ_GLOBAL_i8_eg:
- case AMDGPU::VTX_READ_GLOBAL_i32_eg:
- case AMDGPU::VTX_READ_GLOBAL_f32_eg:
- case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
- case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
- {
- uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
- uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
-
- EmitByte(INSTR_VTX, OS);
- Emit(InstWord01, OS);
- Emit(InstWord2, OS);
- break;
- }
-
- default:
- EmitALUInstr(MI, Fixups, OS);
- break;
- }
- }
-}
-
-void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const {
- const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
- unsigned NumOperands = MI.getNumOperands();
-
- if(MCDesc.findFirstPredOperandIdx() > -1)
- NumOperands--;
-
- if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0)
- NumOperands--;
-
- if(MI.getOpcode() == AMDGPU::PRED_X)
- NumOperands = 2;
-
- // XXX Check if instruction writes a result
- if (NumOperands < 1) {
- return;
- }
-
- // Emit instruction type
- EmitByte(INSTR_ALU, OS);
-
- uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-
- //older alu have different encoding for instructions with one or two src
- //parameters.
- if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
- !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
- uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
- InstWord01 &= ~(0x3FFULL << 39);
- InstWord01 |= ISAOpCode << 1;
- }
-
- unsigned int OpIndex;
- for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) {
- // Literal constants are always stored as the last operand.
- if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) {
- break;
- }
- EmitSrcISA(MI, OpIndex, InstWord01, OS);
- }
-
- // Emit zeros for unused sources
- for ( ; OpIndex < 4; OpIndex++) {
- EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
- }
-
- // Emit destination register
- const MCOperand &dstOp = MI.getOperand(0);
- if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) {
- //element of destination register
- InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61;
-
- // isClamped
- if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
- InstWord01 |= 1ULL << 63;
- }
-
- // write mask
- if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) {
- InstWord01 |= 1ULL << 36;
- }
-
- // XXX: Emit relative addressing mode
- }
-
- // Emit ALU
-
- // Emit IsLast (for this instruction group) (1 byte)
- if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
- InstWord01 |= 1ULL << 31;
- }
-
- // XXX: Emit push modifier
- if(isFlagSet(MI, 1, MO_FLAG_PUSH)) {
- InstWord01 |= 1ULL << 34;
- }
-
- // XXX: Emit predicate (1 byte)
- int PredIdx = MCDesc.findFirstPredOperandIdx();
- if (PredIdx != -1) {
- switch(MI.getOperand(PredIdx).getReg()) {
- case AMDGPU::PRED_SEL_ZERO:
- InstWord01 |= 2ULL << 29;
- break;
- case AMDGPU::PRED_SEL_ONE:
- InstWord01 |= 3ULL << 29;
- break;
- }
- }
-
- //XXX: predicate
- //XXX: bank swizzle
- //XXX: OMOD
- //XXX: index mode
-
- Emit(InstWord01, OS);
-}
-
-void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
- raw_ostream &OS) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- union {
- float f;
- uint32_t i;
- } Value;
- Value.i = 0;
- // Emit the source select (2 bytes). For GPRs, this is the register index.
- // For other potential instruction operands, (e.g. constant registers) the
- // value of the source select is defined in the r600isa docs.
- if (MO.isReg()) {
- unsigned reg = MO.getReg();
- EmitTwoBytes(getHWReg(reg), OS);
- if (reg == AMDGPU::ALU_LITERAL_X) {
- unsigned ImmOpIndex = MI.getNumOperands() - 1;
- MCOperand ImmOp = MI.getOperand(ImmOpIndex);
- if (ImmOp.isFPImm()) {
- Value.f = ImmOp.getFPImm();
- } else {
- assert(ImmOp.isImm());
- Value.i = ImmOp.getImm();
- }
- }
- } else {
- // XXX: Handle other operand types.
- EmitTwoBytes(0, OS);
- }
-
- // Emit the source channel (1 byte)
- if (MO.isReg()) {
- EmitByte(getHWRegChan(MO.getReg()), OS);
- } else {
- EmitByte(0, OS);
- }
-
- // XXX: Emit isNegated (1 byte)
- if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
- && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
- (MO.isReg() &&
- (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
- }
-
- // Emit isAbsolute (1 byte)
- if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
- }
-
- // XXX: Emit relative addressing mode (1 byte)
- EmitByte(0, OS);
-
- // Emit kc_bank, This will be adjusted later by r600_asm
- EmitByte(0, OS);
-
- // Emit the literal value, if applicable (4 bytes).
- Emit(Value.i, OS);
-
-}
-
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
- uint64_t &Value, raw_ostream &OS) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- union {
- float f;
- uint32_t i;
- } InlineConstant;
- InlineConstant.i = 0;
- // Emit the source select (2 bytes). For GPRs, this is the register index.
- // For other potential instruction operands, (e.g. constant registers) the
- // value of the source select is defined in the r600isa docs.
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
- }
-
- if (Reg == AMDGPU::ALU_LITERAL_X) {
- unsigned ImmOpIndex = MI.getNumOperands() - 1;
- MCOperand ImmOp = MI.getOperand(ImmOpIndex);
- if (ImmOp.isFPImm()) {
- InlineConstant.f = ImmOp.getFPImm();
- } else {
- assert(ImmOp.isImm());
- InlineConstant.i = ImmOp.getImm();
- }
- }
- } else {
- // XXX: Handle other operand types.
- EmitTwoBytes(0, OS);
- }
-
- // source channel
- uint64_t sourceChannelValue = getHWRegChan(MO.getReg());
- if (OpIdx == 1)
- Value |= sourceChannelValue << 10;
- if (OpIdx == 2)
- Value |= sourceChannelValue << 23;
- if (OpIdx == 3)
- Value |= sourceChannelValue << 42;
-
- // isNegated
- if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
- && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
- (MO.isReg() &&
- (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
- if (OpIdx == 1)
- Value |= 1ULL << 12;
- else if (OpIdx == 2)
- Value |= 1ULL << 25;
- else if (OpIdx == 3)
- Value |= 1ULL << 44;
- }
-
- // isAbsolute
- if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
- assert(OpIdx < 3);
- Value |= 1ULL << (32+OpIdx-1);
- }
-
- // XXX: relative addressing mode
- // XXX: kc_bank
-
- // Emit the literal value, if applicable (4 bytes).
- Emit(InlineConstant.i, OS);
-
-}
-
-void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const {
-
- unsigned opcode = MI.getOpcode();
- bool hasOffsets = (opcode == AMDGPU::TEX_LD);
- unsigned op_offset = hasOffsets ? 3 : 0;
- int64_t sampler = MI.getOperand(op_offset+2).getImm();
- int64_t textureType = MI.getOperand(op_offset+3).getImm();
- unsigned srcSelect[4] = {0, 1, 2, 3};
-
- // Emit instruction type
- EmitByte(1, OS);
-
- // Emit instruction
- EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
-
- // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
- EmitByte(sampler + 1 + 1, OS);
-
- // Emit source register
- EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
-
- // XXX: Emit src isRelativeAddress
- EmitByte(0, OS);
-
- // Emit destination register
- EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
-
- // XXX: Emit dst isRealtiveAddress
- EmitByte(0, OS);
-
- // XXX: Emit dst select
- EmitByte(0, OS); // X
- EmitByte(1, OS); // Y
- EmitByte(2, OS); // Z
- EmitByte(3, OS); // W
-
- // XXX: Emit lod bias
- EmitByte(0, OS);
-
- // XXX: Emit coord types
- unsigned coordType[4] = {1, 1, 1, 1};
-
- if (textureType == TEXTURE_RECT
- || textureType == TEXTURE_SHADOWRECT) {
- coordType[ELEMENT_X] = 0;
- coordType[ELEMENT_Y] = 0;
- }
-
- if (textureType == TEXTURE_1D_ARRAY
- || textureType == TEXTURE_SHADOW1D_ARRAY) {
- if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
- coordType[ELEMENT_Y] = 0;
- } else {
- coordType[ELEMENT_Z] = 0;
- srcSelect[ELEMENT_Z] = ELEMENT_Y;
- }
- } else if (textureType == TEXTURE_2D_ARRAY
- || textureType == TEXTURE_SHADOW2D_ARRAY) {
- coordType[ELEMENT_Z] = 0;
- }
-
- for (unsigned i = 0; i < 4; i++) {
- EmitByte(coordType[i], OS);
- }
-
- // XXX: Emit offsets
- if (hasOffsets)
- for (unsigned i = 2; i < 5; i++)
- EmitByte(MI.getOperand(i).getImm()<<1, OS);
- else
- EmitNullBytes(3, OS);
-
- // Emit sampler id
- EmitByte(sampler, OS);
-
- // XXX:Emit source select
- if ((textureType == TEXTURE_SHADOW1D
- || textureType == TEXTURE_SHADOW2D
- || textureType == TEXTURE_SHADOWRECT
- || textureType == TEXTURE_SHADOW1D_ARRAY)
- && opcode != AMDGPU::TEX_SAMPLE_C_L
- && opcode != AMDGPU::TEX_SAMPLE_C_LB) {
- srcSelect[ELEMENT_W] = ELEMENT_Z;
- }
-
- for (unsigned i = 0; i < 4; i++) {
- EmitByte(srcSelect[i], OS);
- }
-}
-
-void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
-
- // Emit instruction type
- EmitByte(INSTR_FC, OS);
-
- // Emit SRC
- unsigned NumOperands = MI.getNumOperands();
- if (NumOperands > 0) {
- assert(NumOperands == 1);
- EmitSrc(MI, 0, OS);
- } else {
- EmitNullBytes(SRC_BYTE_COUNT, OS);
- }
-
- // Emit FC Instruction
- enum FCInstr instr;
- switch (MI.getOpcode()) {
- case AMDGPU::BREAK_LOGICALZ_f32:
- instr = FC_BREAK;
- break;
- case AMDGPU::BREAK_LOGICALNZ_f32:
- instr = FC_BREAK_NZ;
- break;
- case AMDGPU::BREAK_LOGICALNZ_i32:
- instr = FC_BREAK_NZ_INT;
- break;
- case AMDGPU::BREAK_LOGICALZ_i32:
- instr = FC_BREAK_Z_INT;
- break;
- case AMDGPU::CONTINUE_LOGICALNZ_f32:
- case AMDGPU::CONTINUE_LOGICALNZ_i32:
- instr = FC_CONTINUE;
- break;
- case AMDGPU::IF_LOGICALNZ_f32:
- instr = FC_IF;
- case AMDGPU::IF_LOGICALNZ_i32:
- instr = FC_IF_INT;
- break;
- case AMDGPU::IF_LOGICALZ_f32:
- abort();
- break;
- case AMDGPU::ELSE:
- instr = FC_ELSE;
- break;
- case AMDGPU::ENDIF:
- instr = FC_ENDIF;
- break;
- case AMDGPU::ENDLOOP:
- instr = FC_ENDLOOP;
- break;
- case AMDGPU::WHILELOOP:
- instr = FC_BGNLOOP;
- break;
- default:
- abort();
- break;
- }
- EmitByte(instr, OS);
-}
-
-void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
- raw_ostream &OS) const {
-
- for (unsigned int i = 0; i < ByteCount; i++) {
- EmitByte(0, OS);
- }
-}
-
-void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
- OS.write((uint8_t) Byte & 0xff);
-}
-
-void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
- raw_ostream &OS) const {
- OS.write((uint8_t) (Bytes & 0xff));
- OS.write((uint8_t) ((Bytes >> 8) & 0xff));
-}
-
-void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
- for (unsigned i = 0; i < 4; i++) {
- OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
- }
-}
-
-void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
- for (unsigned i = 0; i < 8; i++) {
- EmitByte((Value >> (8 * i)) & 0xff, OS);
- }
-}
-
-unsigned R600MCCodeEmitter::getHWRegIndex(unsigned reg) const {
- switch(reg) {
- case AMDGPU::ZERO: return 248;
- case AMDGPU::ONE:
- case AMDGPU::NEG_ONE: return 249;
- case AMDGPU::ONE_INT: return 250;
- case AMDGPU::HALF:
- case AMDGPU::NEG_HALF: return 252;
- case AMDGPU::ALU_LITERAL_X: return 253;
- case AMDGPU::PREDICATE_BIT:
- case AMDGPU::PRED_SEL_OFF:
- case AMDGPU::PRED_SEL_ZERO:
- case AMDGPU::PRED_SEL_ONE:
- return 0;
- default: return getHWRegIndexGen(reg);
- }
-}
-
-unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
- switch(reg) {
- case AMDGPU::ZERO:
- case AMDGPU::ONE:
- case AMDGPU::ONE_INT:
- case AMDGPU::NEG_ONE:
- case AMDGPU::HALF:
- case AMDGPU::NEG_HALF:
- case AMDGPU::ALU_LITERAL_X:
- case AMDGPU::PREDICATE_BIT:
- case AMDGPU::PRED_SEL_OFF:
- case AMDGPU::PRED_SEL_ZERO:
- case AMDGPU::PRED_SEL_ONE:
- return 0;
- default: return getHWRegChanGen(reg);
- }
-}
-unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
- unsigned HWReg;
-
- HWReg = getHWRegIndex(RegNo);
- if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) {
- HWReg += 512;
- }
- return HWReg;
-}
-
-uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
- const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixup) const {
- if (MO.isReg()) {
- return getHWRegIndex(MO.getReg());
- } else if (MO.isImm()) {
- return MO.getImm();
- } else {
- assert(0);
- return 0;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Encoding helper functions
-//===----------------------------------------------------------------------===//
-
-bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
- switch(opcode) {
- default: return false;
- case AMDGPU::BREAK_LOGICALZ_f32:
- case AMDGPU::BREAK_LOGICALNZ_i32:
- case AMDGPU::BREAK_LOGICALZ_i32:
- case AMDGPU::BREAK_LOGICALNZ_f32:
- case AMDGPU::CONTINUE_LOGICALNZ_f32:
- case AMDGPU::IF_LOGICALNZ_i32:
- case AMDGPU::IF_LOGICALZ_f32:
- case AMDGPU::ELSE:
- case AMDGPU::ENDIF:
- case AMDGPU::ENDLOOP:
- case AMDGPU::IF_LOGICALNZ_f32:
- case AMDGPU::WHILELOOP:
- return true;
- }
-}
-
-bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
- switch(opcode) {
- default: return false;
- case AMDGPU::TEX_LD:
- case AMDGPU::TEX_GET_TEXTURE_RESINFO:
- case AMDGPU::TEX_SAMPLE:
- case AMDGPU::TEX_SAMPLE_C:
- case AMDGPU::TEX_SAMPLE_L:
- case AMDGPU::TEX_SAMPLE_C_L:
- case AMDGPU::TEX_SAMPLE_LB:
- case AMDGPU::TEX_SAMPLE_C_LB:
- case AMDGPU::TEX_SAMPLE_G:
- case AMDGPU::TEX_SAMPLE_C_G:
- case AMDGPU::TEX_GET_GRADIENTS_H:
- case AMDGPU::TEX_GET_GRADIENTS_V:
- case AMDGPU::TEX_SET_GRADIENTS_H:
- case AMDGPU::TEX_SET_GRADIENTS_V:
- return true;
- }
-}
-
-bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
- unsigned Flag) const {
- const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
- unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
- if (FlagIndex == 0) {
- return false;
- }
- assert(MI.getOperand(FlagIndex).isImm());
- return !!((MI.getOperand(FlagIndex).getImm() >>
- (NUM_MO_FLAGS * Operand)) & Flag);
-}
-#define R600RegisterInfo R600MCCodeEmitter
-#include "R600HwRegInfo.include"
-#undef R600RegisterInfo
-
-#include "AMDGPUGenMCCodeEmitter.inc"
+++ /dev/null
-//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The SI code emitter produces machine code that can be executed directly on
-// the GPU device.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
-#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
-
-// These must be kept in sync with SIInstructions.td and also the
-// InstrEncodingInfo array in SIInstrInfo.cpp.
-//
-// NOTE: This enum is only used to identify the encoding type within LLVM,
-// the actual encoding type that is part of the instruction format is different
-namespace SIInstrEncodingType {
- enum Encoding {
- EXP = 0,
- LDS = 1,
- MIMG = 2,
- MTBUF = 3,
- MUBUF = 4,
- SMRD = 5,
- SOP1 = 6,
- SOP2 = 7,
- SOPC = 8,
- SOPK = 9,
- SOPP = 10,
- VINTRP = 11,
- VOP1 = 12,
- VOP2 = 13,
- VOP3 = 14,
- VOPC = 15
- };
-}
-
-using namespace llvm;
-
-namespace {
-class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
- SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
- const MCInstrInfo &MCII;
- const MCSubtargetInfo &STI;
- MCContext &Ctx;
-
-public:
- SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : MCII(mcii), STI(sti), Ctx(ctx) { }
-
- ~SIMCCodeEmitter() { }
-
- /// EncodeInstruction - Encode the instruction and write it to the OS.
- virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
- /// getMachineOpValue - Reutrn the encoding for an MCOperand.
- virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
-public:
-
- /// GPRAlign - Encode a sequence of registers with the correct alignment.
- unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
-
- /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
- virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const;
-
- /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
- virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const;
-
- /// SMRDmemriEncode - Encoding for SMRD indexed loads
- virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const;
-
- /// VOPPostEncode - Post-Encoder method for VOP instructions
- virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
-
-private:
-
- ///getEncodingType = Return this SIInstrEncodingType for this instruction.
- unsigned getEncodingType(const MCInst &MI) const;
-
- ///getEncodingBytes - Get then size in bytes of this instructions encoding.
- unsigned getEncodingBytes(const MCInst &MI) const;
-
- /// getRegBinaryCode - Returns the hardware encoding for a register
- unsigned getRegBinaryCode(unsigned reg) const;
-
- /// getHWRegNum - Generated function that returns the hardware encoding for
- /// a register
- unsigned getHWRegNum(unsigned reg) const;
-
-};
-
-} // End anonymous namespace
-
-MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new SIMCCodeEmitter(MCII, STI, Ctx);
-}
-
-void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
- uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
- unsigned bytes = getEncodingBytes(MI);
- for (unsigned i = 0; i < bytes; i++) {
- OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
- }
-}
-
-uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
- const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const {
- if (MO.isReg()) {
- return getRegBinaryCode(MO.getReg());
- } else if (MO.isImm()) {
- return MO.getImm();
- } else if (MO.isFPImm()) {
- // XXX: Not all instructions can use inline literals
- // XXX: We should make sure this is a 32-bit constant
- union {
- float F;
- uint32_t I;
- } Imm;
- Imm.F = MO.getFPImm();
- return Imm.I;
- } else{
- llvm_unreachable("Encoding of this operand type is not supported yet.");
- }
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Custom Operand Encodings
-//===----------------------------------------------------------------------===//
-
-unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
- unsigned shift) const {
- unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
- return regCode >> shift;
- return 0;
-}
-unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
- unsigned OpNo ,
- SmallVectorImpl<MCFixup> &Fixup) const {
- return GPRAlign(MI, OpNo, 1);
-}
-
-unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
- unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const {
- return GPRAlign(MI, OpNo, 2);
-}
-
-#define SMRD_OFFSET_MASK 0xff
-#define SMRD_IMM_SHIFT 8
-#define SMRD_SBASE_MASK 0x3f
-#define SMRD_SBASE_SHIFT 9
-/// SMRDmemriEncode - This function is responsibe for encoding the offset
-/// and the base ptr for SMRD instructions it should return a bit string in
-/// this format:
-///
-/// OFFSET = bits{7-0}
-/// IMM = bits{8}
-/// SBASE = bits{14-9}
-///
-uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const {
- uint32_t Encoding;
-
- const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
-
- //XXX: Use this function for SMRD loads with register offsets
- assert(OffsetOp.isImm());
-
- Encoding =
- (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
- | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
- | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
- ;
-
- return Encoding;
-}
-
-//===----------------------------------------------------------------------===//
-// Post Encoder Callbacks
-//===----------------------------------------------------------------------===//
-
-uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
- unsigned encodingType = getEncodingType(MI);
- unsigned numSrcOps;
- unsigned vgprBitOffset;
-
- if (encodingType == SIInstrEncodingType::VOP3) {
- numSrcOps = 3;
- vgprBitOffset = 32;
- } else {
- numSrcOps = 1;
- vgprBitOffset = 0;
- }
-
- // Add one to skip over the destination reg operand.
- for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
- const MCOperand &MO = MI.getOperand(opIdx);
- if (MO.isReg()) {
- unsigned reg = MI.getOperand(opIdx).getReg();
- if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
- AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
- Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
- }
- } else if (MO.isFPImm()) {
- union {
- float f;
- uint32_t i;
- } Imm;
- // XXX: Not all instructions can use inline literals
- // XXX: We should make sure this is a 32-bit constant
- Imm.f = MO.getFPImm();
- Value |= ((uint64_t)Imm.i) << 32;
- }
- }
- return Value;
-}
-
-//===----------------------------------------------------------------------===//
-// Encoding helper functions
-//===----------------------------------------------------------------------===//
-
-unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
- return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
-}
-
-unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
-
- // These instructions aren't real instructions with an encoding type, so
- // we need to manually specify their size.
- switch (MI.getOpcode()) {
- default: break;
- case AMDGPU::SI_LOAD_LITERAL_I32:
- case AMDGPU::SI_LOAD_LITERAL_F32:
- return 4;
- }
-
- unsigned encoding_type = getEncodingType(MI);
- switch (encoding_type) {
- case SIInstrEncodingType::EXP:
- case SIInstrEncodingType::LDS:
- case SIInstrEncodingType::MUBUF:
- case SIInstrEncodingType::MTBUF:
- case SIInstrEncodingType::MIMG:
- case SIInstrEncodingType::VOP3:
- return 8;
- default:
- return 4;
- }
-}
-
-
-unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
- switch (reg) {
- case AMDGPU::VCC: return 106;
- case AMDGPU::M0: return 124;
- case AMDGPU::EXEC: return 126;
- case AMDGPU::EXEC_LO: return 126;
- case AMDGPU::EXEC_HI: return 127;
- case AMDGPU::SREG_LIT_0: return 128;
- case AMDGPU::SI_LITERAL_CONSTANT: return 255;
- default: return getHWRegNum(reg);
- }
-}
-
-#define SIRegisterInfo SIMCCodeEmitter
-#include "SIRegisterGetHWRegNum.inc"
-#undef SIRegisterInfo
LIBRARY_INCLUDES = -I$(TOP)/include
-TBLGEN = $(LLVM_BINDIR)/llvm-tblgen
-
CXXFLAGS+= $(LLVM_CXXFLAGS)
-ifeq ($(LLVM_VERSION),3.1)
- CPP_SOURCES += $(LLVM_CPP_SOURCES)
- GENERATED_SOURCES = $(LLVM_GENERATED_SOURCES)
-else
- CXXFLAGS+= -DEXTERNAL_LLVM
-endif
-
include ../../Makefile.template
CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
-
-tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
-
-HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td)
-
-SIRegisterInfo.td: SIGenRegisterInfo.pl
- $(PERL) $^ > $@
-
-SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl
- $(PERL) $^ $@ > /dev/null
-
-R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td
-ifeq ($(HAVE_LLVM_INTRINSICS),)
- cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td
-else
- cp R600IntrinsicsOpenCL.td R600Intrinsics.td
-endif
-
-R600RegisterInfo.td: R600GenRegisterInfo.pl
- $(PERL) $^ > $@
-
-AMDGPUGenRegisterInfo.inc: $(TD_FILES)
- $(call tablegen, -gen-register-info, AMDGPU.td, $@)
-
-AMDGPUGenInstrInfo.inc: $(TD_FILES)
- $(call tablegen, -gen-instr-info, AMDGPU.td, $@)
-
-AMDGPUGenAsmWriter.inc: $(TD_FILES)
- $(call tablegen, -gen-asm-writer, AMDGPU.td, $@)
-
-AMDGPUGenDAGISel.inc: $(TD_FILES)
- $(call tablegen, -gen-dag-isel, AMDGPU.td, $@)
-
-AMDGPUGenCallingConv.inc: $(TD_FILES)
- $(call tablegen, -gen-callingconv, AMDGPU.td, $@)
-
-AMDGPUGenSubtargetInfo.inc: $(TD_FILES)
- $(call tablegen, -gen-subtarget, AMDGPU.td, $@)
-
-AMDGPUGenEDInfo.inc: $(TD_FILES)
- $(call tablegen, -gen-enhanced-disassembly-info, AMDGPU.td, $@)
-
-AMDGPUGenIntrinsics.inc: $(TD_FILES)
- $(call tablegen, -gen-tgt-intrinsic, AMDGPU.td, $@)
-
-AMDGPUGenCodeEmitter.inc: $(TD_FILES)
- $(call tablegen, -gen-emitter, AMDGPU.td, $@)
-
-AMDGPUGenMCCodeEmitter.inc: $(TD_FILES)
- $(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@)
-
-AMDGPUGenDFAPacketizer.inc: $(TD_FILES)
- $(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@)
-
-LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser)
-loader: loader.o libradeon.a
- gcc -o loader $(LLVM_LDFLAGS) -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm
-
-TD_FILES := \
- AMDGPU.td \
- AMDGPUInstrInfo.td \
- AMDGPUInstructions.td \
- AMDGPUIntrinsics.td \
- AMDGPURegisterInfo.td \
- AMDILBase.td \
- AMDILInstrInfo.td \
- AMDILIntrinsics.td \
- AMDILRegisterInfo.td \
- Processors.td \
- R600Instructions.td \
- R600Intrinsics.td \
- R600IntrinsicsNoOpenCL.td \
- R600IntrinsicsOpenCL.td \
- R600RegisterInfo.td \
- R600Schedule.td \
- SIInstrFormats.td \
- SIInstrInfo.td \
- SIInstructions.td \
- SIIntrinsics.td \
- SIRegisterInfo.td \
- SISchedule.td
-
-LLVM_GENERATED_SOURCES := \
- R600Intrinsics.td \
- R600RegisterInfo.td \
- SIRegisterInfo.td \
- SIRegisterGetHWRegNum.inc \
- AMDGPUGenRegisterInfo.inc \
- AMDGPUGenInstrInfo.inc \
- AMDGPUGenAsmWriter.inc \
- AMDGPUGenDAGISel.inc \
- AMDGPUGenCallingConv.inc \
- AMDGPUGenSubtargetInfo.inc \
- AMDGPUGenEDInfo.inc \
- AMDGPUGenIntrinsics.inc \
- AMDGPUGenCodeEmitter.inc \
- AMDGPUGenMCCodeEmitter.inc \
- AMDGPUGenDFAPacketizer.inc
-
-LLVM_CPP_SOURCES := \
- AMDIL7XXDevice.cpp \
- AMDILCFGStructurizer.cpp \
- AMDILDevice.cpp \
- AMDILDeviceInfo.cpp \
- AMDILEvergreenDevice.cpp \
- AMDILFrameLowering.cpp \
- AMDILIntrinsicInfo.cpp \
- AMDILISelDAGToDAG.cpp \
- AMDILISelLowering.cpp \
- AMDILNIDevice.cpp \
- AMDILPeepholeOptimizer.cpp \
- AMDILSIDevice.cpp \
- AMDGPUAsmPrinter.cpp \
- AMDGPUMCInstLower.cpp \
- AMDGPUSubtarget.cpp \
- AMDGPUTargetMachine.cpp \
- AMDGPUISelLowering.cpp \
- AMDGPUConvertToISA.cpp \
- AMDGPUInstrInfo.cpp \
- AMDGPURegisterInfo.cpp \
- R600ExpandSpecialInstrs.cpp \
- R600ISelLowering.cpp \
- R600InstrInfo.cpp \
- R600MachineFunctionInfo.cpp \
- R600RegisterInfo.cpp \
- SIAssignInterpRegs.cpp \
- SIInstrInfo.cpp \
- SIISelLowering.cpp \
- SILowerLiteralConstants.cpp \
- SILowerFlowControl.cpp \
- SIMachineFunctionInfo.cpp \
- SIRegisterInfo.cpp \
- InstPrinter/AMDGPUInstPrinter.cpp \
- MCTargetDesc/AMDGPUMCAsmInfo.cpp \
- MCTargetDesc/AMDGPUAsmBackend.cpp \
- MCTargetDesc/AMDGPUMCTargetDesc.cpp \
- MCTargetDesc/SIMCCodeEmitter.cpp \
- MCTargetDesc/R600MCCodeEmitter.cpp \
- TargetInfo/AMDGPUTargetInfo.cpp \
-
CPP_SOURCES := \
radeon_llvm_emit.cpp
+++ /dev/null
-//===-- Processors.td - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// AMDIL processors supported.
-//
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
-: Processor<Name, itin, Features>;
-def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rv710", R600_EG_Itin, []>;
-def : Proc<"rv730", R600_EG_Itin, []>;
-def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
-def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
-
+++ /dev/null
-//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-// Operand Flags
-#define MO_FLAG_CLAMP (1 << 0)
-#define MO_FLAG_NEG (1 << 1)
-#define MO_FLAG_ABS (1 << 2)
-#define MO_FLAG_MASK (1 << 3)
-#define MO_FLAG_PUSH (1 << 4)
-#define MO_FLAG_NOT_LAST (1 << 5)
-#define NUM_MO_FLAGS 6
-
-// Helper for finding getting the operand index for the instruction flags
-// operand.
-#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
-
-namespace R600_InstFlag {
- enum TIF {
- TRANS_ONLY = (1 << 0),
- TEX = (1 << 1),
- REDUCTION = (1 << 2),
- FC = (1 << 3),
- TRIG = (1 << 4),
- OP3 = (1 << 5),
- VECTOR = (1 << 6)
- //FlagOperand bits 7, 8
- };
-}
+++ /dev/null
-//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Vector, Reduction, and Cube instructions need to fill the entire instruction
-// group to work correctly. This pass expands these individual instructions
-// into several instructions that will completely fill the instruction group.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600RegisterInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
-
-private:
- static char ID;
- const R600InstrInfo *TII;
-
- bool ExpandInputPerspective(MachineInstr& MI);
- bool ExpandInputConstant(MachineInstr& MI);
-
-public:
- R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
- TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const {
- return "R600 Expand special instructions pass";
- }
-};
-
-} // End anonymous namespace
-
-char R600ExpandSpecialInstrsPass::ID = 0;
-
-FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
- return new R600ExpandSpecialInstrsPass(TM);
-}
-
-bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
-{
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
- if (MI.getOpcode() != AMDGPU::input_perspective)
- return false;
-
- MachineBasicBlock::iterator I = &MI;
- unsigned DstReg = MI.getOperand(0).getReg();
- R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
- ->getInfo<R600MachineFunctionInfo>();
- unsigned IJIndexBase;
-
- // In Evergreen ISA doc section 8.3.2 :
- // We need to interpolate XY and ZW in two different instruction groups.
- // An INTERP_* must occupy all 4 slots of an instruction group.
- // Output of INTERP_XY is written in X,Y slots
- // Output of INTERP_ZW is written in Z,W slots
- //
- // Thus interpolation requires the following sequences :
- //
- // AnyGPR.x = INTERP_ZW; (Write Masked Out)
- // AnyGPR.y = INTERP_ZW; (Write Masked Out)
- // DstGPR.z = INTERP_ZW;
- // DstGPR.w = INTERP_ZW; (End of first IG)
- // DstGPR.x = INTERP_XY;
- // DstGPR.y = INTERP_XY;
- // AnyGPR.z = INTERP_XY; (Write Masked Out)
- // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
- //
- switch (MI.getOperand(1).getImm()) {
- case 0:
- IJIndexBase = MFI->GetIJPerspectiveIndex();
- break;
- case 1:
- IJIndexBase = MFI->GetIJLinearIndex();
- break;
- default:
- assert(0 && "Unknow ij index");
- }
-
- for (unsigned i = 0; i < 8; i++) {
- unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
- 2 * IJIndexBase + ((i + 1) % 2));
- unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
- 4 * MI.getOperand(2).getImm());
-
- unsigned Sel;
- switch (i % 4) {
- case 0:Sel = AMDGPU::sel_x;break;
- case 1:Sel = AMDGPU::sel_y;break;
- case 2:Sel = AMDGPU::sel_z;break;
- case 3:Sel = AMDGPU::sel_w;break;
- default:break;
- }
-
- unsigned Res = TRI.getSubReg(DstReg, Sel);
-
- const MCInstrDesc &Opcode = (i < 4)?
- TII->get(AMDGPU::INTERP_ZW):
- TII->get(AMDGPU::INTERP_XY);
-
- MachineInstr *NewMI = BuildMI(*(MI.getParent()),
- I, MI.getParent()->findDebugLoc(I),
- Opcode, Res)
- .addReg(IJIndex)
- .addReg(ReadReg)
- .addImm(0);
-
- if (!(i> 1 && i < 6)) {
- TII->addFlag(NewMI, 0, MO_FLAG_MASK);
- }
-
- if (i % 4 != 3)
- TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
- }
-
- MI.eraseFromParent();
-
- return true;
-}
-
-bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
-{
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
- if (MI.getOpcode() != AMDGPU::input_constant)
- return false;
-
- MachineBasicBlock::iterator I = &MI;
- unsigned DstReg = MI.getOperand(0).getReg();
-
- for (unsigned i = 0; i < 4; i++) {
- unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
- 4 * MI.getOperand(1).getImm() + i);
-
- unsigned Sel;
- switch (i % 4) {
- case 0:Sel = AMDGPU::sel_x;break;
- case 1:Sel = AMDGPU::sel_y;break;
- case 2:Sel = AMDGPU::sel_z;break;
- case 3:Sel = AMDGPU::sel_w;break;
- default:break;
- }
-
- unsigned Res = TRI.getSubReg(DstReg, Sel);
-
- MachineInstr *NewMI = BuildMI(*(MI.getParent()),
- I, MI.getParent()->findDebugLoc(I),
- TII->get(AMDGPU::INTERP_LOAD_P0), Res)
- .addReg(ReadReg)
- .addImm(0);
-
- if (i % 4 != 3)
- TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
- }
-
- MI.eraseFromParent();
-
- return true;
-}
-
-bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
-
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- MachineBasicBlock::iterator I = MBB.begin();
- while (I != MBB.end()) {
- MachineInstr &MI = *I;
- I = llvm::next(I);
-
- if (ExpandInputPerspective(MI))
- continue;
- if (ExpandInputConstant(MI))
- continue;
-
- bool IsReduction = TII->isReductionOp(MI.getOpcode());
- bool IsVector = TII->isVector(MI);
- bool IsCube = TII->isCubeOp(MI.getOpcode());
- if (!IsReduction && !IsVector && !IsCube) {
- continue;
- }
-
- // Expand the instruction
- //
- // Reduction instructions:
- // T0_X = DP4 T1_XYZW, T2_XYZW
- // becomes:
- // TO_X = DP4 T1_X, T2_X
- // TO_Y (write masked) = DP4 T1_Y, T2_Y
- // TO_Z (write masked) = DP4 T1_Z, T2_Z
- // TO_W (write masked) = DP4 T1_W, T2_W
- //
- // Vector instructions:
- // T0_X = MULLO_INT T1_X, T2_X
- // becomes:
- // T0_X = MULLO_INT T1_X, T2_X
- // T0_Y (write masked) = MULLO_INT T1_X, T2_X
- // T0_Z (write masked) = MULLO_INT T1_X, T2_X
- // T0_W (write masked) = MULLO_INT T1_X, T2_X
- //
- // Cube instructions:
- // T0_XYZW = CUBE T1_XYZW
- // becomes:
- // TO_X = CUBE T1_Z, T1_Y
- // T0_Y = CUBE T1_Z, T1_X
- // T0_Z = CUBE T1_X, T1_Z
- // T0_W = CUBE T1_Y, T1_Z
- for (unsigned Chan = 0; Chan < 4; Chan++) {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Src0 = MI.getOperand(1).getReg();
- unsigned Src1 = 0;
-
- // Determine the correct source registers
- if (!IsCube) {
- Src1 = MI.getOperand(2).getReg();
- }
- if (IsReduction) {
- unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
- Src0 = TRI.getSubReg(Src0, SubRegIndex);
- Src1 = TRI.getSubReg(Src1, SubRegIndex);
- } else if (IsCube) {
- static const int CubeSrcSwz[] = {2, 2, 0, 1};
- unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
- unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
- Src1 = TRI.getSubReg(Src0, SubRegIndex1);
- Src0 = TRI.getSubReg(Src0, SubRegIndex0);
- }
-
- // Determine the correct destination registers;
- unsigned Flags = 0;
- if (IsCube) {
- unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
- DstReg = TRI.getSubReg(DstReg, SubRegIndex);
- } else {
- // Mask the write if the original instruction does not write to
- // the current Channel.
- Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
- unsigned DstBase = TRI.getHWRegIndex(DstReg);
- DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
- }
-
- // Set the IsLast bit
- Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
-
- // Add the new instruction
- unsigned Opcode;
- if (IsCube) {
- switch (MI.getOpcode()) {
- case AMDGPU::CUBE_r600_pseudo:
- Opcode = AMDGPU::CUBE_r600_real;
- break;
- case AMDGPU::CUBE_eg_pseudo:
- Opcode = AMDGPU::CUBE_eg_real;
- break;
- default:
- assert(!"Unknown CUBE instruction");
- Opcode = 0;
- break;
- }
- } else {
- Opcode = MI.getOpcode();
- }
- MachineInstr *NewMI =
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
- .addReg(Src0)
- .addReg(Src1)
- .addImm(0); // Flag
-
- NewMI->setIsInsideBundle(Chan != 0);
- TII->addFlag(NewMI, 0, Flags);
- }
- MI.eraseFromParent();
- }
- }
- return false;
-}
+++ /dev/null
-#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-#
-# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
-# it also generates a file called R600HwRegInfo.include, which contains helper
-# functions for determining the hw encoding of registers.
-#
-#===------------------------------------------------------------------------===#
-
-use strict;
-use warnings;
-
-use constant CONST_REG_COUNT => 512;
-use constant TEMP_REG_COUNT => 128;
-
-my $CREG_MAX = CONST_REG_COUNT - 1;
-my $TREG_MAX = TEMP_REG_COUNT - 1;
-
-print <<STRING;
-
-class R600Reg <string name> : Register<name> {
- let Namespace = "AMDGPU";
-}
-
-class R600Reg_128<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
-}
-
-STRING
-
-my $i;
-
-### REG DEFS ###
-
-my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
-my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
-
-my @t128reg;
-my @treg_x;
-for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
- my $name = "T$i\_XYZW";
- print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W] >;\n};
- $t128reg[$i] = $name;
- $treg_x[$i] = "T$i\_X";
-}
-
-my $treg_string = join(",", @treg_list);
-my $creg_list = join(",", @creg_list);
-my $t128_string = join(",", @t128reg);
-my $treg_x_string = join(",", @treg_x);
-print <<STRING;
-
-class RegSet <dag s> {
- dag set = s;
-}
-
-def ZERO : R600Reg<"0.0">;
-def HALF : R600Reg<"0.5">;
-def ONE : R600Reg<"1.0">;
-def ONE_INT : R600Reg<"1">;
-def NEG_HALF : R600Reg<"-0.5">;
-def NEG_ONE : R600Reg<"-1.0">;
-def PV_X : R600Reg<"pv.x">;
-def ALU_LITERAL_X : R600Reg<"literal.x">;
-def PREDICATE_BIT : R600Reg<"PredicateBit">;
-def PRED_SEL_OFF: R600Reg<"Pred_sel_off">;
-def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">;
-def PRED_SEL_ONE : R600Reg<"Pred_sel_one">;
-
-def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
- $creg_list)>;
-
-def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
- $treg_string)>;
-
-def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
- $treg_x_string)>;
-
-def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
- R600_TReg32,
- R600_CReg32,
- ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
-
-def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
- PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
-
-def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
- PREDICATE_BIT)>;
-
-def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
- $t128_string)>
-{
- let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)];
- let CopyCost = -1;
-}
-
-STRING
-
-my %index_map;
-my %chan_map;
-
-for ($i = 0; $i <= $#creg_list; $i++) {
- push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
- push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
-}
-
-for ($i = 0; $i <= $#treg_list; $i++) {
- push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
- push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
-}
-
-for ($i = 0; $i <= $#t128reg; $i++) {
- push(@{$index_map{$i}}, $t128reg[$i]);
- push(@{$chan_map{'X'}}, $t128reg[$i]);
-}
-
-open(OUTFILE, ">", "R600HwRegInfo.include");
-
-print OUTFILE <<STRING;
-
-unsigned R600RegisterInfo::getHWRegIndexGen(unsigned reg) const
-{
- switch(reg) {
- default: assert(!"Unknown register"); return 0;
-STRING
-foreach my $key (keys(%index_map)) {
- foreach my $reg (@{$index_map{$key}}) {
- print OUTFILE " case AMDGPU::$reg:\n";
- }
- print OUTFILE " return $key;\n\n";
-}
-
-print OUTFILE " }\n}\n\n";
-
-print OUTFILE <<STRING;
-
-unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
-{
- switch(reg) {
- default: assert(!"Unknown register"); return 0;
-STRING
-
-foreach my $key (keys(%chan_map)) {
- foreach my $reg (@{$chan_map{$key}}) {
- print OUTFILE " case AMDGPU::$reg:\n";
- }
- my $val;
- if ($key eq 'X') {
- $val = 0;
- } elsif ($key eq 'Y') {
- $val = 1;
- } elsif ($key eq 'Z') {
- $val = 2;
- } elsif ($key eq 'W') {
- $val = 3;
- } else {
- die("Unknown chan value; $key");
- }
- print OUTFILE " return $val;\n\n";
-}
-
-print OUTFILE " }\n}\n\n";
-
-sub print_reg_defs {
- my ($count, $prefix) = @_;
-
- my @reg_list;
-
- for ($i = 0; $i < $count; $i++) {
- my $hw_index = get_hw_index($i);
- my $chan= get_chan_str($i);
- my $name = "$prefix$hw_index\_$chan";
- print qq{def $name : R600Reg <"$prefix$hw_index.$chan">;\n};
- $reg_list[$i] = $name;
- }
- return @reg_list;
-}
-
-#Helper functions
-sub get_hw_index {
- my ($index) = @_;
- return int($index / 4);
-}
-
-sub get_chan_str {
- my ($index) = @_;
- my $chan = $index % 4;
- if ($chan == 0 ) {
- return 'X';
- } elsif ($chan == 1) {
- return 'Y';
- } elsif ($chan == 2) {
- return 'Z';
- } elsif ($chan == 3) {
- return 'W';
- } else {
- die("Unknown chan value: $chan");
- }
-}
+++ /dev/null
-//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
-// is mostly EmitInstrWithCustomInserter().
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600ISelLowering.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/Argument.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-
-using namespace llvm;
-
-R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
- AMDGPUTargetLowering(TM),
- TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
-{
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
- computeRegisterProperties();
-
- setOperationAction(ISD::FADD, MVT::v4f32, Expand);
- setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
-
- setOperationAction(ISD::BR_CC, MVT::i32, Custom);
- setOperationAction(ISD::BR_CC, MVT::f32, Custom);
-
- setOperationAction(ISD::FSUB, MVT::f32, Expand);
-
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
-
- setOperationAction(ISD::ROTL, MVT::i32, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::f32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
-
- setTargetDAGCombine(ISD::FP_ROUND);
-
- setSchedulingPreference(Sched::VLIW);
-}
-
-MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
- MachineInstr * MI, MachineBasicBlock * BB) const
-{
- MachineFunction * MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineBasicBlock::iterator I = *MI;
-
- switch (MI->getOpcode()) {
- default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- case AMDGPU::SHADER_TYPE: break;
- case AMDGPU::CLAMP_R600:
- {
- MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(0) // Flags
- .addReg(AMDGPU::PRED_SEL_OFF);
- TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
- break;
- }
- case AMDGPU::FABS_R600:
- {
- MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(0) // Flags
- .addReg(AMDGPU::PRED_SEL_OFF);
- TII->addFlag(NewMI, 1, MO_FLAG_ABS);
- break;
- }
-
- case AMDGPU::FNEG_R600:
- {
- MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(0) // Flags
- .addReg(AMDGPU::PRED_SEL_OFF);
- TII->addFlag(NewMI, 1, MO_FLAG_NEG);
- break;
- }
-
- case AMDGPU::R600_LOAD_CONST:
- {
- int64_t RegIndex = MI->getOperand(1).getImm();
- unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
- .addOperand(MI->getOperand(0))
- .addReg(ConstantReg);
- break;
- }
-
- case AMDGPU::MASK_WRITE:
- {
- unsigned maskedRegister = MI->getOperand(0).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
- MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
- TII->addFlag(defInstr, 0, MO_FLAG_MASK);
- // Return early so the instruction is not erased
- return BB;
- }
-
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
- {
- // Convert to DWORD address
- unsigned NewAddr = MRI.createVirtualRegister(
- &AMDGPU::R600_TReg32_XRegClass);
- unsigned ShiftValue = MRI.createVirtualRegister(
- &AMDGPU::R600_TReg32RegClass);
- unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
-
- // XXX In theory, we should be able to pass ShiftValue directly to
- // the LSHR_eg instruction as an inline literal, but I tried doing it
- // this way and it didn't produce the correct results.
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
- ShiftValue)
- .addReg(AMDGPU::ALU_LITERAL_X)
- .addReg(AMDGPU::PRED_SEL_OFF)
- .addImm(2);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
- .addOperand(MI->getOperand(1))
- .addReg(ShiftValue)
- .addReg(AMDGPU::PRED_SEL_OFF);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
- .addOperand(MI->getOperand(0))
- .addReg(NewAddr)
- .addImm(EOP); // Set End of program bit
- break;
- }
-
- case AMDGPU::RESERVE_REG:
- {
- R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
- int64_t ReservedIndex = MI->getOperand(0).getImm();
- unsigned ReservedReg =
- AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
- MFI->ReservedRegs.push_back(ReservedReg);
- break;
- }
-
- case AMDGPU::TXD:
- {
- unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
- unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
- .addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5));
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
- .addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5));
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addReg(t0, RegState::Implicit)
- .addReg(t1, RegState::Implicit);
- break;
- }
- case AMDGPU::TXD_SHADOW:
- {
- unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
- unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
-
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
- .addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5));
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
- .addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5));
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addReg(t0, RegState::Implicit)
- .addReg(t1, RegState::Implicit);
- break;
- }
- case AMDGPU::BRANCH:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI->getOperand(0))
- .addReg(0);
- break;
- case AMDGPU::BRANCH_COND_f32:
- {
- MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
- .addReg(AMDGPU::PREDICATE_BIT)
- .addOperand(MI->getOperand(1))
- .addImm(OPCODE_IS_NOT_ZERO)
- .addImm(0); // Flags
- TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI->getOperand(0))
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- break;
- }
- case AMDGPU::BRANCH_COND_i32:
- {
- MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
- .addReg(AMDGPU::PREDICATE_BIT)
- .addOperand(MI->getOperand(1))
- .addImm(OPCODE_IS_NOT_ZERO_INT)
- .addImm(0); // Flags
- TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI->getOperand(0))
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- break;
- }
- case AMDGPU::input_perspective:
- {
- R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
-
- // XXX Be more fine about register reservation
- for (unsigned i = 0; i < 4; i ++) {
- unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
- MFI->ReservedRegs.push_back(ReservedReg);
- }
-
- switch (MI->getOperand(1).getImm()) {
- case 0:// Perspective
- MFI->HasPerspectiveInterpolation = true;
- break;
- case 1:// Linear
- MFI->HasLinearInterpolation = true;
- break;
- default:
- assert(0 && "Unknow ij index");
- }
-
- return BB;
- }
- }
-
- MI->eraseFromParent();
- return BB;
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG Lowering Operations
-//===----------------------------------------------------------------------===//
-
-using namespace llvm::Intrinsic;
-using namespace llvm::AMDGPUIntrinsic;
-
-SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
-{
- switch (Op.getOpcode()) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
- case ISD::ROTL: return LowerROTL(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::INTRINSIC_VOID: {
- SDValue Chain = Op.getOperand(0);
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::AMDGPU_store_output: {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
- if (!MRI.isLiveOut(Reg)) {
- MRI.addLiveOut(Reg);
- }
- return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
- }
- // default for switch(IntrinsicID)
- default: break;
- }
- // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
- break;
- }
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
- switch(IntrinsicID) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case AMDGPUIntrinsic::R600_load_input: {
- int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
- }
- case AMDGPUIntrinsic::R600_load_input_perspective: {
- unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- SDValue FullVector = DAG.getNode(
- AMDGPUISD::INTERP,
- DL, MVT::v4f32,
- DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
- }
- case AMDGPUIntrinsic::R600_load_input_linear: {
- unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- SDValue FullVector = DAG.getNode(
- AMDGPUISD::INTERP,
- DL, MVT::v4f32,
- DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
- }
- case AMDGPUIntrinsic::R600_load_input_constant: {
- unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- SDValue FullVector = DAG.getNode(
- AMDGPUISD::INTERP_P0,
- DL, MVT::v4f32,
- DAG.getConstant(slot / 4 , MVT::i32));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
- }
- case AMDGPUIntrinsic::R600_load_input_position: {
- unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
- SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- RegIndex, MVT::f32);
- if ((slot % 4) == 3) {
- return DAG.getNode(ISD::FDIV,
- DL, VT,
- DAG.getConstantFP(1.0f, MVT::f32),
- Reg);
- } else {
- return Reg;
- }
- }
-
- case r600_read_ngroups_x:
- return LowerImplicitParameter(DAG, VT, DL, 0);
- case r600_read_ngroups_y:
- return LowerImplicitParameter(DAG, VT, DL, 1);
- case r600_read_ngroups_z:
- return LowerImplicitParameter(DAG, VT, DL, 2);
- case r600_read_global_size_x:
- return LowerImplicitParameter(DAG, VT, DL, 3);
- case r600_read_global_size_y:
- return LowerImplicitParameter(DAG, VT, DL, 4);
- case r600_read_global_size_z:
- return LowerImplicitParameter(DAG, VT, DL, 5);
- case r600_read_local_size_x:
- return LowerImplicitParameter(DAG, VT, DL, 6);
- case r600_read_local_size_y:
- return LowerImplicitParameter(DAG, VT, DL, 7);
- case r600_read_local_size_z:
- return LowerImplicitParameter(DAG, VT, DL, 8);
-
- case r600_read_tgid_x:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_X, VT);
- case r600_read_tgid_y:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Y, VT);
- case r600_read_tgid_z:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Z, VT);
- case r600_read_tidig_x:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_X, VT);
- case r600_read_tidig_y:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Y, VT);
- case r600_read_tidig_z:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Z, VT);
- }
- // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
- break;
- }
- } // end switch(Op.getOpcode())
- return SDValue();
-}
-
-void R600TargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const
-{
- switch (N->getOpcode()) {
- default: return;
- case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
- case ISD::INTRINSIC_WO_CHAIN:
- {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
- Results.push_back(LowerInputFace(N, DAG));
- } else {
- return;
- }
- }
- }
-}
-
-SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
-{
- unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
- unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
- SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- RegIndex, MVT::f32);
- return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
- Reg, DAG.getConstantFP(0.0f, MVT::f32),
- DAG.getCondCode(ISD::SETUGT));
-}
-
-SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
-{
- return DAG.getNode(
- ISD::SETCC,
- Op.getDebugLoc(),
- MVT::i1,
- Op, DAG.getConstantFP(0.0f, MVT::f32),
- DAG.getCondCode(ISD::SETNE)
- );
-}
-
-SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue CC = Op.getOperand(1);
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue JumpT = Op.getOperand(4);
- SDValue CmpValue;
- SDValue Result;
-
- if (LHS.getValueType() == MVT::i32) {
- CmpValue = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::i32,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- } else if (LHS.getValueType() == MVT::f32) {
- CmpValue = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::f32,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- } else {
- assert(0 && "Not valid type for br_cc");
- }
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- CmpValue.getDebugLoc(),
- MVT::Other, Chain,
- JumpT, CmpValue);
- return Result;
-}
-
-SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
- DebugLoc DL,
- unsigned DwordOffset) const
-{
- unsigned ByteOffset = DwordOffset * 4;
- PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::PARAM_I_ADDRESS);
-
- // We shouldn't be using an offset wider than 16-bits for implicit parameters.
- assert(isInt<16>(ByteOffset));
-
- return DAG.getLoad(VT, DL, DAG.getEntryNode(),
- DAG.getConstant(ByteOffset, MVT::i32), // PTR
- MachinePointerInfo(ConstantPointerNull::get(PtrType)),
- false, false, false, 0);
-}
-
-SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
- Op.getOperand(0),
- Op.getOperand(0),
- DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(32, MVT::i32),
- Op.getOperand(1)));
-}
-
-bool R600TargetLowering::isZero(SDValue Op) const
-{
- if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
- return Cst->isNullValue();
- } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
- return CstFP->isZero();
- } else {
- return false;
- }
-}
-
-SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue True = Op.getOperand(2);
- SDValue False = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
- SDValue Temp;
-
- // LHS and RHS are guaranteed to be the same value type
- EVT CompareVT = LHS.getValueType();
-
- // We need all the operands of SELECT_CC to have the same value type, so if
- // necessary we need to convert LHS and RHS to be the same type True and
- // False. True and False are guaranteed to have the same type as this
- // SELECT_CC node.
-
- if (isHWTrueValue(True) && isHWFalseValue(False)) {
- if (CompareVT != VT) {
- if (VT == MVT::f32 && CompareVT == MVT::i32) {
- SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
- } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
- SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
- } else {
- // I don't think there will be any other type pairings.
- assert(!"Unhandled operand type parings in SELECT_CC");
- }
- } else {
- return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
- }
- }
-
-
- // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
- // we can handle this with a native instruction, but we need to swap true
- // and false and change the conditional.
- if (isHWTrueValue(False) && isHWFalseValue(True)) {
- }
-
- // Check if we can lower this to a native operation.
- // CND* instructions requires all operands to have the same type,
- // and RHS to be zero.
-
- if (isZero(LHS) || isZero(RHS)) {
- SDValue Cond = (isZero(LHS) ? RHS : LHS);
- SDValue Zero = (isZero(LHS) ? LHS : RHS);
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- if (CompareVT != VT) {
- True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
- False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
- }
- if (isZero(LHS)) {
- CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
- }
-
- switch (CCOpcode) {
- case ISD::SETONE:
- case ISD::SETUNE:
- case ISD::SETNE:
- case ISD::SETULE:
- case ISD::SETULT:
- case ISD::SETOLE:
- case ISD::SETOLT:
- case ISD::SETLE:
- case ISD::SETLT:
- CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
- Temp = True;
- True = False;
- False = Temp;
- break;
- default:
- break;
- }
- SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- Cond, Zero,
- True, False,
- DAG.getCondCode(CCOpcode));
- return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
- }
-
-
- // If we make it this for it means we have no native instructions to handle
- // this SELECT_CC, so we must lower it.
- SDValue HWTrue, HWFalse;
-
- if (CompareVT == MVT::f32) {
- HWTrue = DAG.getConstantFP(1.0f, CompareVT);
- HWFalse = DAG.getConstantFP(0.0f, CompareVT);
- } else if (CompareVT == MVT::i32) {
- HWTrue = DAG.getConstant(-1, CompareVT);
- HWFalse = DAG.getConstant(0, CompareVT);
- }
- else {
- assert(!"Unhandled value type in LowerSELECT_CC");
- }
-
- // Lower this unsupported SELECT_CC into a combination of two supported
- // SELECT_CC operations.
- SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
-
- return DAG.getNode(ISD::SELECT_CC, DL, VT,
- Cond, HWFalse,
- True, False,
- DAG.getCondCode(ISD::SETNE));
-}
-
-SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Cond;
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
- assert(Op.getValueType() == MVT::i32);
- if (LHS.getValueType() == MVT::i32) {
- Cond = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::i32,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- } else if (LHS.getValueType() == MVT::f32) {
- Cond = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::f32,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- Cond = DAG.getNode(
- ISD::FP_TO_SINT,
- DL,
- MVT::i32,
- Cond);
- } else {
- assert(0 && "Not valid type for set_cc");
- }
- Cond = DAG.getNode(
- ISD::AND,
- DL,
- MVT::i32,
- DAG.getConstant(1, MVT::i32),
- Cond);
- return Cond;
-}
-
-// XXX Only kernel functions are supporte, so we can assume for now that
-// every function is a kernel function, but in the future we should use
-// separate calling conventions for kernel and non-kernel functions.
-// Only kernel functions are supported, so we can assume for now
-SDValue R600TargetLowering::LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const
-{
- unsigned ParamOffsetBytes = 36;
- for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
- EVT VT = Ins[i].VT;
- PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::PARAM_I_ADDRESS);
- SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(),
- DAG.getConstant(ParamOffsetBytes, MVT::i32),
- MachinePointerInfo(new Argument(PtrTy)),
- false, false, false, 4);
- InVals.push_back(Arg);
- ParamOffsetBytes += (VT.getStoreSize());
- }
- return Chain;
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG Optimizations
-//===----------------------------------------------------------------------===//
-
-SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const
-{
- SelectionDAG &DAG = DCI.DAG;
-
- switch (N->getOpcode()) {
- // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
- case ISD::FP_ROUND: {
- SDValue Arg = N->getOperand(0);
- if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
- return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
- Arg.getOperand(0));
- }
- break;
- }
- }
- return SDValue();
-}
+++ /dev/null
-//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 DAG Lowering interface definition
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef R600ISELLOWERING_H
-#define R600ISELLOWERING_H
-
-#include "AMDGPUISelLowering.h"
-
-namespace llvm {
-
-class R600InstrInfo;
-
-class R600TargetLowering : public AMDGPUTargetLowering
-{
-public:
- R600TargetLowering(TargetMachine &TM);
- virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock * BB) const;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- void ReplaceNodeResults(SDNode * N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
- virtual SDValue LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-private:
- const R600InstrInfo * TII;
-
- /// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
- /// that are stored in the first nine dwords of a Vertex Buffer. These
- /// implicit parameters are lowered to load instructions which retreive the
- /// values from the Vertex Buffer.
- SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
- DebugLoc DL, unsigned DwordOffset) const;
-
- void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
- MachineRegisterInfo & MRI, unsigned dword_offset) const;
-
- SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
-
- /// LowerROTL - Lower ROTL opcode to BITALIGN
- SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
- SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
-
- bool isZero(SDValue Op) const;
-};
-
-} // End namespace llvm;
-
-#endif // R600ISELLOWERING_H
+++ /dev/null
-//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 Implementation of TargetInstrInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600InstrInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "AMDILUtilityFunctions.h"
-
-#define GET_INSTRINFO_CTOR
-#include "AMDGPUGenDFAPacketizer.inc"
-
-using namespace llvm;
-
-R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
- : AMDGPUInstrInfo(tm),
- RI(tm, *this),
- TM(tm)
- { }
-
-const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
-{
- return RI;
-}
-
-bool R600InstrInfo::isTrig(const MachineInstr &MI) const
-{
- return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
-}
-
-bool R600InstrInfo::isVector(const MachineInstr &MI) const
-{
- return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
-}
-
-void
-R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const
-{
- if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
- && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
- for (unsigned I = 0; I < 4; I++) {
- unsigned SubRegIndex = RI.getSubRegFromChannel(I);
- BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
- .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
- .addReg(RI.getSubReg(SrcReg, SubRegIndex))
- .addImm(0) // Flag
- .addReg(0) // PREDICATE_BIT
- .addReg(DestReg, RegState::Define | RegState::Implicit);
- }
- } else {
-
- /* We can't copy vec4 registers */
- assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
- && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
-
- BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0) // Flag
- .addReg(0); // PREDICATE_BIT
- }
-}
-
-MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
- unsigned DstReg, int64_t Imm) const
-{
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
- MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
- MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
- MachineInstrBuilder(MI).addImm(Imm);
- MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
-
- return MI;
-}
-
-unsigned R600InstrInfo::getIEQOpcode() const
-{
- return AMDGPU::SETE_INT;
-}
-
-bool R600InstrInfo::isMov(unsigned Opcode) const
-{
-
-
- switch(Opcode) {
- default: return false;
- case AMDGPU::MOV:
- case AMDGPU::MOV_IMM_F32:
- case AMDGPU::MOV_IMM_I32:
- return true;
- }
-}
-
-// Some instructions act as place holders to emulate operations that the GPU
-// hardware does automatically. This function can be used to check if
-// an opcode falls into this category.
-bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
-{
- switch (Opcode) {
- default: return false;
- case AMDGPU::RETURN:
- case AMDGPU::MASK_WRITE:
- case AMDGPU::RESERVE_REG:
- return true;
- }
-}
-
-bool R600InstrInfo::isReductionOp(unsigned Opcode) const
-{
- switch(Opcode) {
- default: return false;
- case AMDGPU::DOT4_r600:
- case AMDGPU::DOT4_eg:
- return true;
- }
-}
-
-bool R600InstrInfo::isCubeOp(unsigned Opcode) const
-{
- switch(Opcode) {
- default: return false;
- case AMDGPU::CUBE_r600_pseudo:
- case AMDGPU::CUBE_r600_real:
- case AMDGPU::CUBE_eg_pseudo:
- case AMDGPU::CUBE_eg_real:
- return true;
- }
-}
-
-DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const
-{
- const InstrItineraryData *II = TM->getInstrItineraryData();
- return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
-}
-
-static bool
-isPredicateSetter(unsigned Opcode)
-{
- switch (Opcode) {
- case AMDGPU::PRED_X:
- return true;
- default:
- return false;
- }
-}
-
-static MachineInstr *
-findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I)
-{
- while (I != MBB.begin()) {
- --I;
- MachineInstr *MI = I;
- if (isPredicateSetter(MI->getOpcode()))
- return MI;
- }
-
- return NULL;
-}
-
-bool
-R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const
-{
- // Most of the following comes from the ARM implementation of AnalyzeBranch
-
- // If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
- if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
- return false;
- }
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
-
- // If there is only one terminator instruction, process it.
- unsigned LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() ||
- static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
- if (LastOpc == AMDGPU::JUMP) {
- if(!isPredicated(LastInst)) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else {
- MachineInstr *predSet = I;
- while (!isPredicateSetter(predSet->getOpcode())) {
- predSet = --I;
- }
- TBB = LastInst->getOperand(0).getMBB();
- Cond.push_back(predSet->getOperand(1));
- Cond.push_back(predSet->getOperand(2));
- Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
- return false;
- }
- }
- return true; // Can't handle indirect branch.
- }
-
- // Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
-
- // If the block ends with a B and a Bcc, handle it.
- if (SecondLastOpc == AMDGPU::JUMP &&
- isPredicated(SecondLastInst) &&
- LastOpc == AMDGPU::JUMP &&
- !isPredicated(LastInst)) {
- MachineInstr *predSet = --I;
- while (!isPredicateSetter(predSet->getOpcode())) {
- predSet = --I;
- }
- TBB = SecondLastInst->getOperand(0).getMBB();
- FBB = LastInst->getOperand(0).getMBB();
- Cond.push_back(predSet->getOperand(1));
- Cond.push_back(predSet->getOperand(2));
- Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
- return false;
- }
-
- // Otherwise, can't handle this.
- return true;
-}
-
-int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
- const MachineInstr *MI = op.getParent();
-
- switch (MI->getDesc().OpInfo->RegClass) {
- default: // FIXME: fallthrough??
- case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
- case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
- };
-}
-
-unsigned
-R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const
-{
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
- if (FBB == 0) {
- if (Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
- return 1;
- } else {
- MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
- assert(PredSet && "No previous predicate !");
- addFlag(PredSet, 1, MO_FLAG_PUSH);
- PredSet->getOperand(2).setImm(Cond[1].getImm());
-
- BuildMI(&MBB, DL, get(AMDGPU::JUMP))
- .addMBB(TBB)
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- return 1;
- }
- } else {
- MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
- assert(PredSet && "No previous predicate !");
- addFlag(PredSet, 1, MO_FLAG_PUSH);
- PredSet->getOperand(2).setImm(Cond[1].getImm());
- BuildMI(&MBB, DL, get(AMDGPU::JUMP))
- .addMBB(TBB)
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
- return 2;
- }
-}
-
-unsigned
-R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
-{
-
- // Note : we leave PRED* instructions there.
- // They may be needed when predicating instructions.
-
- MachineBasicBlock::iterator I = MBB.end();
-
- if (I == MBB.begin()) {
- return 0;
- }
- --I;
- switch (I->getOpcode()) {
- default:
- return 0;
- case AMDGPU::JUMP:
- if (isPredicated(I)) {
- MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
- clearFlag(predSet, 1, MO_FLAG_PUSH);
- }
- I->eraseFromParent();
- break;
- }
- I = MBB.end();
-
- if (I == MBB.begin()) {
- return 1;
- }
- --I;
- switch (I->getOpcode()) {
- // FIXME: only one case??
- default:
- return 1;
- case AMDGPU::JUMP:
- if (isPredicated(I)) {
- MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
- clearFlag(predSet, 1, MO_FLAG_PUSH);
- }
- I->eraseFromParent();
- break;
- }
- return 2;
-}
-
-bool
-R600InstrInfo::isPredicated(const MachineInstr *MI) const
-{
- int idx = MI->findFirstPredOperandIdx();
- if (idx < 0)
- return false;
-
- unsigned Reg = MI->getOperand(idx).getReg();
- switch (Reg) {
- default: return false;
- case AMDGPU::PRED_SEL_ONE:
- case AMDGPU::PRED_SEL_ZERO:
- case AMDGPU::PREDICATE_BIT:
- return true;
- }
-}
-
-bool
-R600InstrInfo::isPredicable(MachineInstr *MI) const
-{
- return AMDGPUInstrInfo::isPredicable(MI);
-}
-
-
-bool
-R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const{
- return true;
-}
-
-bool
-R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles,
- unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles,
- unsigned ExtraFCycles,
- const BranchProbability &Probability) const
-{
- return true;
-}
-
-bool
-R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
- const BranchProbability &Probability)
- const
-{
- return true;
-}
-
-bool
-R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const
-{
- return false;
-}
-
-
-bool
-R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
-{
- MachineOperand &MO = Cond[1];
- switch (MO.getImm()) {
- case OPCODE_IS_ZERO_INT:
- MO.setImm(OPCODE_IS_NOT_ZERO_INT);
- break;
- case OPCODE_IS_NOT_ZERO_INT:
- MO.setImm(OPCODE_IS_ZERO_INT);
- break;
- case OPCODE_IS_ZERO:
- MO.setImm(OPCODE_IS_NOT_ZERO);
- break;
- case OPCODE_IS_NOT_ZERO:
- MO.setImm(OPCODE_IS_ZERO);
- break;
- default:
- return true;
- }
-
- MachineOperand &MO2 = Cond[2];
- switch (MO2.getReg()) {
- case AMDGPU::PRED_SEL_ZERO:
- MO2.setReg(AMDGPU::PRED_SEL_ONE);
- break;
- case AMDGPU::PRED_SEL_ONE:
- MO2.setReg(AMDGPU::PRED_SEL_ZERO);
- break;
- default:
- return true;
- }
- return false;
-}
-
-bool
-R600InstrInfo::DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const
-{
- return isPredicateSetter(MI->getOpcode());
-}
-
-
-bool
-R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const
-{
- return false;
-}
-
-
-bool
-R600InstrInfo::PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const
-{
- int PIdx = MI->findFirstPredOperandIdx();
-
- if (PIdx != -1) {
- MachineOperand &PMO = MI->getOperand(PIdx);
- PMO.setReg(Pred[2].getReg());
- MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
- return true;
- }
-
- return false;
-}
-
-int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const
-{
- if (PredCost)
- *PredCost = 2;
- return 2;
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction flag getters/setters
-//===----------------------------------------------------------------------===//
-
-bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
-{
- return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
-}
-
-MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
-{
- unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
- assert(FlagIndex != 0 &&
- "Instruction flags not supported for this instruction");
- MachineOperand &FlagOp = MI->getOperand(FlagIndex);
- assert(FlagOp.isImm());
- return FlagOp;
-}
-
-void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
- unsigned Flag) const
-{
- MachineOperand &FlagOp = getFlagOp(MI);
- FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
-}
-
-void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
- unsigned Flag) const
-{
- MachineOperand &FlagOp = getFlagOp(MI);
- unsigned InstFlags = FlagOp.getImm();
- InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
- FlagOp.setImm(InstFlags);
-}
+++ /dev/null
-//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface definition for R600InstrInfo
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef R600INSTRUCTIONINFO_H_
-#define R600INSTRUCTIONINFO_H_
-
-#include "AMDIL.h"
-#include "AMDGPUInstrInfo.h"
-#include "R600RegisterInfo.h"
-
-#include <map>
-
-namespace llvm {
-
- class AMDGPUTargetMachine;
- class DFAPacketizer;
- class ScheduleDAG;
- class MachineFunction;
- class MachineInstr;
- class MachineInstrBuilder;
-
- class R600InstrInfo : public AMDGPUInstrInfo {
- private:
- const R600RegisterInfo RI;
- AMDGPUTargetMachine &TM;
-
- int getBranchInstr(const MachineOperand &op) const;
-
- public:
- explicit R600InstrInfo(AMDGPUTargetMachine &tm);
-
- const R600RegisterInfo &getRegisterInfo() const;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- bool isTrig(const MachineInstr &MI) const;
- bool isPlaceHolderOpcode(unsigned opcode) const;
- bool isReductionOp(unsigned opcode) const;
- bool isCubeOp(unsigned opcode) const;
-
- /// isVector - Vector instructions are instructions that must fill all
- /// instruction slots within an instruction group.
- bool isVector(const MachineInstr &MI) const;
-
- virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const;
-
- virtual unsigned getIEQOpcode() const;
- virtual bool isMov(unsigned Opcode) const;
-
- DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const;
-
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
-
- unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
-
- unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- bool isPredicated(const MachineInstr *MI) const;
-
- bool isPredicable(MachineInstr *MI) const;
-
- bool
- isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- const BranchProbability &Probability) const;
-
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const ;
-
- bool
- isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles, unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const;
-
- bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
-
- bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
-
- bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const;
-
- bool PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const;
-
- int getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost = 0) const;
-
- virtual int getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *Node) const { return 1;}
-
- ///hasFlagOperand - Returns true if this instruction has an operand for
- /// storing target flags.
- bool hasFlagOperand(const MachineInstr &MI) const;
-
- ///addFlag - Add one of the MO_FLAG* flags to the specified Operand.
- void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
-
- ///isFlagSet - Determine if the specified flag is set on this Operand.
- bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
-
- ///getFlagOp - Return the operand containing the flags for this instruction.
- MachineOperand &getFlagOp(MachineInstr *MI) const;
-
- ///clearFlag - Clear the specified flag on the instruction.
- void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
-};
-
-} // End llvm namespace
-
-#endif // R600INSTRINFO_H_
+++ /dev/null
-//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 Tablegen instruction definitions
-//
-//===----------------------------------------------------------------------===//
-
-include "R600Intrinsics.td"
-
-class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin>
- : AMDGPUInst <outs, ins, asm, pattern> {
-
- field bits<64> Inst;
- bit Trig = 0;
- bit Op3 = 0;
- bit isVector = 0;
- bits<2> FlagOperandIdx = 0;
-
- bits<11> op_code = inst;
- //let Inst = inst;
- let Namespace = "AMDGPU";
- let OutOperandList = outs;
- let InOperandList = ins;
- let AsmString = asm;
- let Pattern = pattern;
- let Itinerary = itin;
-
- let TSFlags{4} = Trig;
- let TSFlags{5} = Op3;
-
- // Vector instructions are instructions that must fill all slots in an
- // instruction group
- let TSFlags{6} = isVector;
- let TSFlags{8-7} = FlagOperandIdx;
-}
-
-class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
- AMDGPUInst <outs, ins, asm, pattern>
-{
- field bits<64> Inst;
-
- let Namespace = "AMDGPU";
-}
-
-def MEMxi : Operand<iPTR> {
- let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
-}
-
-def MEMrr : Operand<iPTR> {
- let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
-}
-
-def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
-def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
-def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
-
-class R600_ALU {
-
- bits<7> DST_GPR = 0;
- bits<9> SRC0_SEL = 0;
- bits<1> SRC0_NEG = 0;
- bits<9> SRC1_SEL = 0;
- bits<1> SRC1_NEG = 0;
- bits<1> CLAMP = 0;
-
-}
-
-def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
- (ops PRED_SEL_OFF)>;
-
-
-class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
- InstR600 <inst,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src, R600_Pred:$p, variable_ops),
- !strconcat(opName, " $dst, $src ($p)"),
- pattern,
- itin>{
- bits<7> dst;
- bits<9> src;
- let Inst{8-0} = src;
- let Inst{49-39} = inst;
- let Inst{59-53} = dst;
- }
-
-class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
- InstR600 <inst,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops),
- !strconcat(opName, " $dst, $src0, $src1"),
- pattern,
- itin>{
- bits<7> dst;
- bits<9> src0;
- bits<9> src1;
- let Inst{8-0} = src0;
- let Inst{21-13} = src1;
- let Inst{49-39} = inst;
- let Inst{59-53} = dst;
- }
-
-class R600_3OP <bits<11> inst, string opName, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
- InstR600 <inst,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops),
- !strconcat(opName, " $dst, $src0, $src1, $src2"),
- pattern,
- itin>{
- bits<7> dst;
- bits<9> src0;
- bits<9> src1;
- bits<9> src2;
- let Inst{8-0} = src0;
- let Inst{21-13} = src1;
- let Inst{40-32} = src2;
- let Inst{49-45} = inst{4-0};
- let Inst{59-53} = dst;
- let Op3 = 1;
- }
-
-
-
-def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst),
- (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
- "PRED $dst, $src0, $src1",
- [], NullALU>
-{
- bits<7> dst;
- bits<9> src0;
- bits<11> src1;
- let Inst{8-0} = src0;
- let Inst{49-39} = src1;
- let Inst{59-53} = dst;
- let FlagOperandIdx = 3;
-}
-
-let isTerminator = 1, isBranch = 1, isPseudo = 1 in {
-def JUMP : InstR600 <0x10,
- (outs),
- (ins brtarget:$target, R600_Pred:$p),
- "JUMP $target ($p)",
- [], AnyALU
- >;
-}
-
-class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin = VecALU> :
- InstR600 <inst,
- (outs R600_Reg32:$dst),
- ins,
- asm,
- pattern,
- itin>{
- bits<7> dst;
- let Inst{49-39} = inst;
- let Inst{59-53} = dst;
- }
-
-class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
- InstR600 <inst,
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2),
- !strconcat(opName, "$dst, $src0, $src1, $src2"),
- pattern,
- itin>{
- let Inst {10-0} = inst;
- }
-
-def TEX_SHADOW : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12;
- }]
->;
-
-class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
- dag ins, string asm, list<dag> pattern> :
- InstR600ISA <outs, ins, asm, pattern>
-{
- bits<7> RW_GPR;
- bits<7> INDEX_GPR;
-
- bits<2> RIM;
- bits<2> TYPE;
- bits<1> RW_REL;
- bits<2> ELEM_SIZE;
-
- bits<12> ARRAY_SIZE;
- bits<4> COMP_MASK;
- bits<4> BURST_COUNT;
- bits<1> VPM;
- bits<1> eop;
- bits<1> MARK;
- bits<1> BARRIER;
-
- // CF_ALLOC_EXPORT_WORD0_RAT
- let Inst{3-0} = rat_id;
- let Inst{9-4} = rat_inst;
- let Inst{10} = 0; // Reserved
- let Inst{12-11} = RIM;
- let Inst{14-13} = TYPE;
- let Inst{21-15} = RW_GPR;
- let Inst{22} = RW_REL;
- let Inst{29-23} = INDEX_GPR;
- let Inst{31-30} = ELEM_SIZE;
-
- // CF_ALLOC_EXPORT_WORD1_BUF
- let Inst{43-32} = ARRAY_SIZE;
- let Inst{47-44} = COMP_MASK;
- let Inst{51-48} = BURST_COUNT;
- let Inst{52} = VPM;
- let Inst{53} = eop;
- let Inst{61-54} = cf_inst;
- let Inst{62} = MARK;
- let Inst{63} = BARRIER;
-}
-
-def load_param : PatFrag<(ops node:$ptr),
- (load node:$ptr),
- [{
- const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
- if (Src) {
- PointerType * PT = dyn_cast<PointerType>(Src->getType());
- return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS;
- }
- return false;
- }]>;
-
-def isR600 : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
-def isR700 : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
- "Subtarget.device()->getDeviceFlag()"
- ">= OCL_DEVICE_RV710">;
-def isEG : Predicate<
- "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
- "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
- "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
-
-def isCayman : Predicate<"Subtarget.device()"
- "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
-def isEGorCayman : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
- "|| Subtarget.device()->getGeneration() =="
- "AMDGPUDeviceInfo::HD6XXX">;
-
-def isR600toCayman : Predicate<
- "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
-
-//===----------------------------------------------------------------------===//
-// Interpolation Instructions
-//===----------------------------------------------------------------------===//
-
-def INTERP: SDNode<"AMDGPUISD::INTERP",
- SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
- >;
-
-def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
- >;
-
-let usesCustomInserter = 1 in {
-def input_perspective : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src0, i32imm:$src1),
- "input_perspective $src0 $src1 : dst",
- [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
-} // End usesCustomInserter = 1
-
-def input_constant : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src),
- "input_perspective $src : dst",
- [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
-
-
-
-def INTERP_XY : InstR600 <0xD6,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
- "INTERP_XY dst",
- [], AnyALU>
-{
- let FlagOperandIdx = 3;
-}
-
-def INTERP_ZW : InstR600 <0xD7,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
- "INTERP_ZW dst",
- [], AnyALU>
-{
- let FlagOperandIdx = 3;
-}
-
-def INTERP_LOAD_P0 : InstR600 <0xE0,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src, i32imm:$flags),
- "INTERP_LOAD_P0 dst",
- [], AnyALU>
-{
- let FlagOperandIdx = 2;
-}
-
-let Predicates = [isR600toCayman] in {
-
-//===----------------------------------------------------------------------===//
-// Common Instructions R600, R700, Evergreen, Cayman
-//===----------------------------------------------------------------------===//
-
-def ADD : R600_2OP <
- 0x0, "ADD",
- [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-// Non-IEEE MUL: 0 * anything = 0
-def MUL : R600_2OP <
- 0x1, "MUL NON-IEEE",
- [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def MUL_IEEE : R600_2OP <
- 0x2, "MUL_IEEE",
- [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def MAX : R600_2OP <
- 0x3, "MAX",
- [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def MIN : R600_2OP <
- 0x4, "MIN",
- [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
-// so some of the instruction names don't match the asm string.
-// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
-
-def SETE : R600_2OP <
- 0x08, "SETE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_EQ))]
->;
-
-def SGT : R600_2OP <
- 0x09, "SETGT",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_GT))]
->;
-
-def SGE : R600_2OP <
- 0xA, "SETGE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_GE))]
->;
-
-def SNE : R600_2OP <
- 0xB, "SETNE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_NE))]
->;
-
-def FRACT : R600_1OP <
- 0x10, "FRACT",
- [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))]
->;
-
-def TRUNC : R600_1OP <
- 0x11, "TRUNC",
- [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))]
->;
-
-def CEIL : R600_1OP <
- 0x12, "CEIL",
- [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))]
->;
-
-def RNDNE : R600_1OP <
- 0x13, "RNDNE",
- [(set R600_Reg32:$dst, (frint R600_Reg32:$src))]
->;
-
-def FLOOR : R600_1OP <
- 0x14, "FLOOR",
- [(set R600_Reg32:$dst, (ffloor R600_Reg32:$src))]
->;
-
-def MOV : InstR600 <0x19, (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, i32imm:$flags,
- R600_Pred:$p),
- "MOV $dst, $src0", [], AnyALU> {
- let FlagOperandIdx = 2;
- bits<7> dst;
- bits<9> src0;
- let Inst{8-0} = src0;
- let Inst{49-39} = op_code;
- let Inst{59-53} = dst;
-}
-
-class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm),
- "MOV_IMM $dst, $imm",
- [], AnyALU
->{
- bits<7> dst;
- bits<9> alu_literal;
- bits<9> p;
- let Inst{8-0} = alu_literal;
- let Inst{21-13} = p;
- let Inst{49-39} = op_code;
- let Inst{59-53} = dst;
-}
-
-def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
-def : Pat <
- (imm:$val),
- (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val)
->;
-
-def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
-def : Pat <
- (fpimm:$val),
- (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val)
->;
-
-def KILLGT : InstR600 <0x2D,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p,
- variable_ops),
- "KILLGT $dst, $src0, $src1, $flags ($p)",
- [],
- NullALU>{
- let FlagOperandIdx = 3;
- bits<7> dst;
- bits<9> src0;
- bits<9> src1;
- let Inst{8-0} = src0;
- let Inst{21-13} = src1;
- let Inst{49-39} = op_code;
- let Inst{59-53} = dst;
-}
-
-def AND_INT : R600_2OP <
- 0x30, "AND_INT",
- [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def OR_INT : R600_2OP <
- 0x31, "OR_INT",
- [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def XOR_INT : R600_2OP <
- 0x32, "XOR_INT",
- [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def NOT_INT : R600_1OP <
- 0x33, "NOT_INT",
- [(set R600_Reg32:$dst, (not R600_Reg32:$src))]
->;
-
-def ADD_INT : R600_2OP <
- 0x34, "ADD_INT",
- [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def SUB_INT : R600_2OP <
- 0x35, "SUB_INT",
- [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def MAX_INT : R600_2OP <
- 0x36, "MAX_INT",
- [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>;
-
-def MIN_INT : R600_2OP <
- 0x37, "MIN_INT",
- [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>;
-
-def MAX_UINT : R600_2OP <
- 0x38, "MAX_UINT",
- [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def MIN_UINT : R600_2OP <
- 0x39, "MIN_UINT",
- [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-def SETE_INT : R600_2OP <
- 0x3A, "SETE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
->;
-
-def SETGT_INT : R600_2OP <
- 0x3B, "SGT_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
->;
-
-def SETGE_INT : R600_2OP <
- 0x3C, "SETGE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
->;
-
-def SETNE_INT : R600_2OP <
- 0x3D, "SETNE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
->;
-
-def SETGT_UINT : R600_2OP <
- 0x3E, "SETGT_UINT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
->;
-
-def SETGE_UINT : R600_2OP <
- 0x3F, "SETGE_UINT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
->;
-
-def CNDE_INT : R600_3OP <
- 0x1C, "CNDE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), 0,
- (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
- COND_EQ))]
->;
-
-def CNDGE_INT : R600_3OP <
- 0x1E, "CNDGE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), 0,
- (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
- COND_GE))]
->;
-
-def CNDGT_INT : R600_3OP <
- 0x1D, "CNDGT_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), 0,
- (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
- COND_GT))]
->;
-
-//===----------------------------------------------------------------------===//
-// Texture instructions
-//===----------------------------------------------------------------------===//
-
-def TEX_LD : R600_TEX <
- 0x03, "TEX_LD",
- [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))]
-> {
-let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5";
-let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5);
-}
-
-def TEX_GET_TEXTURE_RESINFO : R600_TEX <
- 0x04, "TEX_GET_TEXTURE_RESINFO",
- [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))]
->;
-
-def TEX_GET_GRADIENTS_H : R600_TEX <
- 0x07, "TEX_GET_GRADIENTS_H",
- [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))]
->;
-
-def TEX_GET_GRADIENTS_V : R600_TEX <
- 0x08, "TEX_GET_GRADIENTS_V",
- [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))]
->;
-
-def TEX_SET_GRADIENTS_H : R600_TEX <
- 0x0B, "TEX_SET_GRADIENTS_H",
- []
->;
-
-def TEX_SET_GRADIENTS_V : R600_TEX <
- 0x0C, "TEX_SET_GRADIENTS_V",
- []
->;
-
-def TEX_SAMPLE : R600_TEX <
- 0x10, "TEX_SAMPLE",
- [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))]
->;
-
-def TEX_SAMPLE_C : R600_TEX <
- 0x18, "TEX_SAMPLE_C",
- [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
->;
-
-def TEX_SAMPLE_L : R600_TEX <
- 0x11, "TEX_SAMPLE_L",
- [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))]
->;
-
-def TEX_SAMPLE_C_L : R600_TEX <
- 0x19, "TEX_SAMPLE_C_L",
- [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
->;
-
-def TEX_SAMPLE_LB : R600_TEX <
- 0x12, "TEX_SAMPLE_LB",
- [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))]
->;
-
-def TEX_SAMPLE_C_LB : R600_TEX <
- 0x1A, "TEX_SAMPLE_C_LB",
- [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
->;
-
-def TEX_SAMPLE_G : R600_TEX <
- 0x14, "TEX_SAMPLE_G",
- []
->;
-
-def TEX_SAMPLE_C_G : R600_TEX <
- 0x1C, "TEX_SAMPLE_C_G",
- []
->;
-
-//===----------------------------------------------------------------------===//
-// Helper classes for common instructions
-//===----------------------------------------------------------------------===//
-
-class MUL_LIT_Common <bits<11> inst> : R600_3OP <
- inst, "MUL_LIT",
- []
->;
-
-class MULADD_Common <bits<11> inst> : R600_3OP <
- inst, "MULADD",
- [(set (f32 R600_Reg32:$dst),
- (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
->;
-
-class CNDE_Common <bits<11> inst> : R600_3OP <
- inst, "CNDE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
- (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
- COND_EQ))]
->;
-
-class CNDGT_Common <bits<11> inst> : R600_3OP <
- inst, "CNDGT",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
- (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
- COND_GT))]
->;
-
-class CNDGE_Common <bits<11> inst> : R600_3OP <
- inst, "CNDGE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
- (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
- COND_GE))]
->;
-
-class DOT4_Common <bits<11> inst> : R600_REDUCTION <
- inst,
- (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags),
- "DOT4 $dst $src0, $src1",
- []
- > {
- bits<9> src0;
- bits<9> src1;
- let Inst{8-0} = src0;
- let Inst{21-13} = src1;
- let FlagOperandIdx = 3;
-}
-
-class DOT4_Pat <Instruction dot4> : Pat <
- (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1),
- (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0)
->;
-
-multiclass CUBE_Common <bits<11> inst> {
-
- def _pseudo : InstR600 <
- inst,
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src),
- "CUBE $dst $src",
- [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
- VecALU
- >;
-
- def _real : InstR600 <
- inst,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
- "CUBE $dst, $src0, $src1",
- [], VecALU
- >{
- let FlagOperandIdx = 3;
- bits<7> dst;
- bits<9> src0;
- bits<9> src1;
- let Inst{8-0} = src0;
- let Inst{21-13} = src1;
- let Inst{49-39} = inst;
- let Inst{59-53} = dst;
- }
-}
-
-class EXP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "EXP_IEEE",
- [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))]
->;
-
-class FLT_TO_INT_Common <bits<11> inst> : R600_1OP <
- inst, "FLT_TO_INT",
- [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))]
->;
-
-class INT_TO_FLT_Common <bits<11> inst> : R600_1OP <
- inst, "INT_TO_FLT",
- [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))]
->;
-
-class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP <
- inst, "FLT_TO_UINT",
- [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))]
->;
-
-class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP <
- inst, "UINT_TO_FLT",
- [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))]
->;
-
-class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "LOG_CLAMPED",
- []
->;
-
-class LOG_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "LOG_IEEE",
- [(set R600_Reg32:$dst, (flog2 R600_Reg32:$src))]
->;
-
-class LSHL_Common <bits<11> inst> : R600_2OP <
- inst, "LSHL $dst, $src0, $src1",
- [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-class LSHR_Common <bits<11> inst> : R600_2OP <
- inst, "LSHR $dst, $src0, $src1",
- [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-class ASHR_Common <bits<11> inst> : R600_2OP <
- inst, "ASHR $dst, $src0, $src1",
- [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-class MULHI_INT_Common <bits<11> inst> : R600_2OP <
- inst, "MULHI_INT $dst, $src0, $src1",
- [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-class MULHI_UINT_Common <bits<11> inst> : R600_2OP <
- inst, "MULHI $dst, $src0, $src1",
- [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-class MULLO_INT_Common <bits<11> inst> : R600_2OP <
- inst, "MULLO_INT $dst, $src0, $src1",
- [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))]
->;
-
-class MULLO_UINT_Common <bits<11> inst> : R600_2OP <
- inst, "MULLO_UINT $dst, $src0, $src1",
- []
->;
-
-class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_CLAMPED",
- []
->;
-
-class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_IEEE",
- [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))]
->;
-
-class RECIP_UINT_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_INT $dst, $src",
- [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))]
->;
-
-class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "RECIPSQRT_CLAMPED",
- [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))]
->;
-
-class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIPSQRT_IEEE",
- []
->;
-
-class SIN_Common <bits<11> inst> : R600_1OP <
- inst, "SIN", []>{
- let Trig = 1;
-}
-
-class COS_Common <bits<11> inst> : R600_1OP <
- inst, "COS", []> {
- let Trig = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Helper patterns for complex intrinsics
-//===----------------------------------------------------------------------===//
-
-multiclass DIV_Common <InstR600 recip_ieee> {
-def : Pat<
- (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
->;
-
-def : Pat<
- (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
->;
-}
-
-class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat <
- (int_AMDGPU_ssg R600_Reg32:$src),
- (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE)))
->;
-
-class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
- (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
- (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
->;
-
-//===----------------------------------------------------------------------===//
-// R600 / R700 Instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isR600] in {
-
- def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
- def MULADD_r600 : MULADD_Common<0x10>;
- def CNDE_r600 : CNDE_Common<0x18>;
- def CNDGT_r600 : CNDGT_Common<0x19>;
- def CNDGE_r600 : CNDGE_Common<0x1A>;
- def DOT4_r600 : DOT4_Common<0x50>;
- def : DOT4_Pat <DOT4_r600>;
- defm CUBE_r600 : CUBE_Common<0x52>;
- def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
- def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
- def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
- def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
- def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
- def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
- def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
- def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
- def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
- def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
- def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
- def SIN_r600 : SIN_Common<0x6E>;
- def COS_r600 : COS_Common<0x6F>;
- def ASHR_r600 : ASHR_Common<0x70>;
- def LSHR_r600 : LSHR_Common<0x71>;
- def LSHL_r600 : LSHL_Common<0x72>;
- def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
- def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
- def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
- def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
- def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
-
- defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
- def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>;
- def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>;
- def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
-
-}
-
-// Helper pattern for normalizing inputs to triginomic instructions for R700+
-// cards.
-class COS_PAT <InstR600 trig> : Pat<
- (fcos R600_Reg32:$src),
- (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src))
->;
-
-class SIN_PAT <InstR600 trig> : Pat<
- (fsin R600_Reg32:$src),
- (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src))
->;
-
-//===----------------------------------------------------------------------===//
-// R700 Only instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isR700] in {
- def SIN_r700 : SIN_Common<0x6E>;
- def COS_r700 : COS_Common<0x6F>;
-
- // R700 normalizes inputs to SIN/COS the same as EG
- def : SIN_PAT <SIN_r700>;
- def : COS_PAT <COS_r700>;
-}
-
-//===----------------------------------------------------------------------===//
-// Evergreen Only instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isEG] in {
-
-def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
-
-def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
-def MULHI_INT_eg : MULHI_INT_Common<0x90>;
-def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
-def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
-def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
-
-} // End Predicates = [isEG]
-
-//===----------------------------------------------------------------------===//
-// Evergreen / Cayman Instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isEGorCayman] in {
-
- // BFE_UINT - bit_extract, an optimization for mask and shift
- // Src0 = Input
- // Src1 = Offset
- // Src2 = Width
- //
- // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
- //
- // Example Usage:
- // (Offset, Width)
- //
- // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
- // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
- // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
- // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
- def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
- [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
- R600_Reg32:$src1,
- R600_Reg32:$src2))],
- VecALU
- >;
-
- def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
- [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
- R600_Reg32:$src2))],
- VecALU
- >;
-
- def MULADD_eg : MULADD_Common<0x14>;
- def ASHR_eg : ASHR_Common<0x15>;
- def LSHR_eg : LSHR_Common<0x16>;
- def LSHL_eg : LSHL_Common<0x17>;
- def CNDE_eg : CNDE_Common<0x19>;
- def CNDGT_eg : CNDGT_Common<0x1A>;
- def CNDGE_eg : CNDGE_Common<0x1B>;
- def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
- def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
- def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
- def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
- def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
- def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
- def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
- def SIN_eg : SIN_Common<0x8D>;
- def COS_eg : COS_Common<0x8E>;
- def DOT4_eg : DOT4_Common<0xBE>;
- def : DOT4_Pat <DOT4_eg>;
- defm CUBE_eg : CUBE_Common<0xC0>;
-
- defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
- def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;
- def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>;
- def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
-
- def : SIN_PAT <SIN_eg>;
- def : COS_PAT <COS_eg>;
-
- def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
- let Pattern = [];
- }
-
- def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
-
- def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
- let Pattern = [];
- }
-
- def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
-
- def : Pat<(fp_to_sint R600_Reg32:$src),
- (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>;
-
- def : Pat<(fp_to_uint R600_Reg32:$src),
- (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>;
-
- def : Pat<(fsqrt R600_Reg32:$src),
- (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
-
-//===----------------------------------------------------------------------===//
-// Memory read/write instructions
-//===----------------------------------------------------------------------===//
-
-let usesCustomInserter = 1 in {
-
-class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT <
- 0x57, 0x2, 0, (outs), ins, !strconcat(name, " $rw_gpr, $index_gpr, $eop"), []>
-{
- let RIM = 0;
- // XXX: Have a separate instruction for non-indexed writes.
- let TYPE = 1;
- let RW_REL = 0;
- let ELEM_SIZE = 0;
-
- let ARRAY_SIZE = 0;
- let COMP_MASK = comp_mask;
- let BURST_COUNT = 0;
- let VPM = 0;
- let MARK = 0;
- let BARRIER = 1;
-}
-
-} // End usesCustomInserter = 1
-
-// 32-bit store
-def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
- (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop),
- 0x1, "RAT_WRITE_CACHELESS_32_eg"
->;
-
-// i32 global_store
-def : Pat <
- (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
- (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
->;
-
-// Floating point global_store
-def : Pat <
- (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
- (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
->;
-
-//128-bit store
-def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
- (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop),
- 0xf, "RAT_WRITE_CACHELESS_128"
->;
-
-// v4f32 global store
-def : Pat <
- (global_store (v4f32 R600_Reg128:$val), R600_TReg32_X:$ptr),
- (RAT_WRITE_CACHELESS_128_eg R600_Reg128:$val, R600_TReg32_X:$ptr, 0)
->;
-
-class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern>
- : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> {
-
- // Operands
- bits<7> DST_GPR;
- bits<7> SRC_GPR;
-
- // Static fields
- bits<5> VC_INST = 0;
- bits<2> FETCH_TYPE = 2;
- bits<1> FETCH_WHOLE_QUAD = 0;
- bits<8> BUFFER_ID = buffer_id;
- bits<1> SRC_REL = 0;
- // XXX: We can infer this field based on the SRC_GPR. This would allow us
- // to store vertex addresses in any channel, not just X.
- bits<2> SRC_SEL_X = 0;
- bits<6> MEGA_FETCH_COUNT;
- bits<1> DST_REL = 0;
- bits<3> DST_SEL_X;
- bits<3> DST_SEL_Y;
- bits<3> DST_SEL_Z;
- bits<3> DST_SEL_W;
- // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
- // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
- // however, based on my testing if USE_CONST_FIELDS is set, then all
- // these fields need to be set to 0.
- bits<1> USE_CONST_FIELDS = 0;
- bits<6> DATA_FORMAT;
- bits<2> NUM_FORMAT_ALL = 1;
- bits<1> FORMAT_COMP_ALL = 0;
- bits<1> SRF_MODE_ALL = 0;
-
- // LLVM can only encode 64-bit instructions, so these fields are manually
- // encoded in R600CodeEmitter
- //
- // bits<16> OFFSET;
- // bits<2> ENDIAN_SWAP = 0;
- // bits<1> CONST_BUF_NO_STRIDE = 0;
- // bits<1> MEGA_FETCH = 0;
- // bits<1> ALT_CONST = 0;
- // bits<2> BUFFER_INDEX_MODE = 0;
-
- // VTX_WORD0
- let Inst{4-0} = VC_INST;
- let Inst{6-5} = FETCH_TYPE;
- let Inst{7} = FETCH_WHOLE_QUAD;
- let Inst{15-8} = BUFFER_ID;
- let Inst{22-16} = SRC_GPR;
- let Inst{23} = SRC_REL;
- let Inst{25-24} = SRC_SEL_X;
- let Inst{31-26} = MEGA_FETCH_COUNT;
-
- // VTX_WORD1_GPR
- let Inst{38-32} = DST_GPR;
- let Inst{39} = DST_REL;
- let Inst{40} = 0; // Reserved
- let Inst{43-41} = DST_SEL_X;
- let Inst{46-44} = DST_SEL_Y;
- let Inst{49-47} = DST_SEL_Z;
- let Inst{52-50} = DST_SEL_W;
- let Inst{53} = USE_CONST_FIELDS;
- let Inst{59-54} = DATA_FORMAT;
- let Inst{61-60} = NUM_FORMAT_ALL;
- let Inst{62} = FORMAT_COMP_ALL;
- let Inst{63} = SRF_MODE_ALL;
-
- // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
- // is done in R600CodeEmitter
- //
- // Inst{79-64} = OFFSET;
- // Inst{81-80} = ENDIAN_SWAP;
- // Inst{82} = CONST_BUF_NO_STRIDE;
- // Inst{83} = MEGA_FETCH;
- // Inst{84} = ALT_CONST;
- // Inst{86-85} = BUFFER_INDEX_MODE;
- // Inst{95-86} = 0; Reserved
-
- // VTX_WORD3 (Padding)
- //
- // Inst{127-96} = 0;
-}
-
-class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> {
-
- let MEGA_FETCH_COUNT = 1;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 1; // FMT_8
-}
-
-class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> {
-
- let MEGA_FETCH_COUNT = 4;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 0xD; // COLOR_32
-
- // This is not really necessary, but there were some GPU hangs that appeared
- // to be caused by ALU instructions in the next instruction group that wrote
- // to the $ptr registers of the VTX_READ.
- // e.g.
- // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24
- // %T2_X<def> = MOV %ZERO
- //Adding this constraint prevents this from happening.
- let Constraints = "$ptr.ptr = $dst";
-}
-
-class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> {
-
- let MEGA_FETCH_COUNT = 16;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 1;
- let DST_SEL_Z = 2;
- let DST_SEL_W = 3;
- let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
-
- // XXX: Need to force VTX_READ_128 instructions to write to the same register
- // that holds its buffer address to avoid potential hangs. We can't use
- // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
- // registers are different sizes.
-}
-
-//===----------------------------------------------------------------------===//
-// VTX Read from parameter memory space
-//===----------------------------------------------------------------------===//
-
-class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0,
- [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
->;
-
-def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>;
-def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>;
-
-
-//===----------------------------------------------------------------------===//
-// VTX Read from global memory space
-//===----------------------------------------------------------------------===//
-
-// 8-bit reads
-def VTX_READ_GLOBAL_i8_eg : VTX_READ_8_eg <1,
- [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
->;
-
-// 32-bit reads
-
-class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1,
- [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
->;
-
-def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>;
-def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>;
-
-// 128-bit reads
-
-class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1,
- [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
->;
-
-def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>;
-def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>;
-
-//===----------------------------------------------------------------------===//
-// Constant Loads
-// XXX: We are currently storing all constants in the global address space.
-//===----------------------------------------------------------------------===//
-
-def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
- [(set (f32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
->;
-
-}
-
-let Predicates = [isCayman] in {
-
-let isVector = 1 in {
-
-def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
-
-def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
-def MULHI_INT_cm : MULHI_INT_Common<0x90>;
-def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
-def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
-
-} // End isVector = 1
-
-// RECIP_UINT emulation for Cayman
-def : Pat <
- (AMDGPUurecip R600_Reg32:$src0),
- (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
- (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000)))
->;
-
-} // End isCayman
-
-let isCodeGenOnly = 1 in {
-
- def MULLIT : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
- "MULLIT $dst, $src0, $src1",
- [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
- >;
-
-let usesCustomInserter = 1, isPseudo = 1 in {
-
-class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst <
- (outs R600_TReg32:$dst),
- (ins),
- asm,
- [(set R600_TReg32:$dst, (intr))]
->;
-
-def R600_LOAD_CONST : AMDGPUShaderInst <
- (outs R600_Reg32:$dst),
- (ins i32imm:$src0),
- "R600_LOAD_CONST $dst, $src0",
- [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
->;
-
-def RESERVE_REG : AMDGPUShaderInst <
- (outs),
- (ins i32imm:$src),
- "RESERVE_REG $src",
- [(int_AMDGPU_reserve_reg imm:$src)]
->;
-
-def TXD: AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
- "TXD $dst, $src0, $src1, $src2, $src3, $src4",
- [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))]
->;
-
-def TXD_SHADOW: AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
- "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4",
- [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))]
->;
-
-} // End usesCustomInserter = 1, isPseudo = 1
-
-} // End isCodeGenOnly = 1
-
-def CLAMP_R600 : CLAMP <R600_Reg32>;
-def FABS_R600 : FABS<R600_Reg32>;
-def FNEG_R600 : FNEG<R600_Reg32>;
-
-let usesCustomInserter = 1 in {
-
-def MASK_WRITE : AMDGPUShaderInst <
- (outs),
- (ins R600_Reg32:$src),
- "MASK_WRITE $src",
- []
->;
-
-} // End usesCustomInserter = 1
-
-//===---------------------------------------------------------------------===//
-// Return instruction
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
- def RETURN : ILFormat<(outs), (ins variable_ops),
- "RETURN", [(IL_retflag)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// ISel Patterns
-//===----------------------------------------------------------------------===//
-
-// KIL Patterns
-def KILP : Pat <
- (int_AMDGPU_kilp),
- (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0))
->;
-
-def KIL : Pat <
- (int_AMDGPU_kill R600_Reg32:$src0),
- (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0))
->;
-
-// SGT Reverse args
-def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
- (SGT R600_Reg32:$src1, R600_Reg32:$src0)
->;
-
-// SGE Reverse args
-def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
- (SGE R600_Reg32:$src1, R600_Reg32:$src0)
->;
-
-// SETGT_INT reverse args
-def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
- (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
->;
-
-// SETGE_INT reverse args
-def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
- (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
->;
-
-// SETGT_UINT reverse args
-def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
- (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
->;
-
-// SETGE_UINT reverse args
-def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
- (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1)
->;
-
-// The next two patterns are special cases for handling 'true if ordered' and
-// 'true if unordered' conditionals. The assumption here is that the behavior of
-// SETE and SNE conforms to the Direct3D 10 rules for floating point values
-// described here:
-// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
-// We assume that SETE returns false when one of the operands is NAN and
-// SNE returns true when on of the operands is NAN
-
-//SETE - 'true if ordered'
-def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
- (SETE R600_Reg32:$src0, R600_Reg32:$src1)
->;
-
-//SNE - 'true if unordered'
-def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
- (SNE R600_Reg32:$src0, R600_Reg32:$src1)
->;
-
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
-
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
-
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
-
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
-
-def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
-
-// bitconvert patterns
-
-def : BitConvert <i32, f32, R600_Reg32>;
-def : BitConvert <f32, i32, R600_Reg32>;
-def : BitConvert <v4f32, v4i32, R600_Reg128>;
-
-} // End isR600toCayman Predicate
+++ /dev/null
-//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 Intrinsic Definitions
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "R600", isTarget = 1 in {
- def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_R600_load_input_perspective :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_constant :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_linear :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_position :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_face :
- Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
-}
-
-let TargetPrefix = "r600", isTarget = 1 in {
-
-class R600ReadPreloadRegisterIntrinsic<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
-
-multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
- def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
- def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
- def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
-}
-
-defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_global_size">;
-defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_local_size">;
-defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_ngroups">;
-defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_tgid">;
-defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_tidig">;
-} // End TargetPrefix = "r600"
+++ /dev/null
-//===-- R600Intrinsics.td - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "R600", isTarget = 1 in {
- def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_R600_load_input_perspective :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_constant :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_linear :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_position :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_face :
- Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
-}
+++ /dev/null
-//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600MachineFunctionInfo.h"
-
-using namespace llvm;
-
-R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
- : MachineFunctionInfo(),
- HasLinearInterpolation(false),
- HasPerspectiveInterpolation(false)
- { }
-
-unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
-{
- assert(HasPerspectiveInterpolation);
- return 0;
-}
-
-unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
-{
- assert(HasLinearInterpolation);
- if (HasPerspectiveInterpolation)
- return 1;
- else
- return 0;
-}
+++ /dev/null
-//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600MachineFunctionInfo is used for keeping track of which registers have
-// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef R600MACHINEFUNCTIONINFO_H
-#define R600MACHINEFUNCTIONINFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include <vector>
-
-namespace llvm {
-
-class R600MachineFunctionInfo : public MachineFunctionInfo {
-
-public:
- R600MachineFunctionInfo(const MachineFunction &MF);
- std::vector<unsigned> ReservedRegs;
- bool HasLinearInterpolation;
- bool HasPerspectiveInterpolation;
-
- unsigned GetIJLinearIndex() const;
- unsigned GetIJPerspectiveIndex() const;
-
-};
-
-} // End llvm namespace
-
-#endif //R600MACHINEFUNCTIONINFO_H
+++ /dev/null
-//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The file contains the R600 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600RegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "R600MachineFunctionInfo.h"
-
-using namespace llvm;
-
-R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
- const TargetInstrInfo &tii)
-: AMDGPURegisterInfo(tm, tii),
- TM(tm),
- TII(tii)
- { }
-
-BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
-{
- BitVector Reserved(getNumRegs());
- const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
-
- Reserved.set(AMDGPU::ZERO);
- Reserved.set(AMDGPU::HALF);
- Reserved.set(AMDGPU::ONE);
- Reserved.set(AMDGPU::ONE_INT);
- Reserved.set(AMDGPU::NEG_HALF);
- Reserved.set(AMDGPU::NEG_ONE);
- Reserved.set(AMDGPU::PV_X);
- Reserved.set(AMDGPU::ALU_LITERAL_X);
- Reserved.set(AMDGPU::PREDICATE_BIT);
- Reserved.set(AMDGPU::PRED_SEL_OFF);
- Reserved.set(AMDGPU::PRED_SEL_ZERO);
- Reserved.set(AMDGPU::PRED_SEL_ONE);
-
- for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
- E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
- Reserved.set(*I);
- }
-
- for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
- E = MFI->ReservedRegs.end(); I != E; ++I) {
- Reserved.set(*I);
- Reserved.set(*(getSuperRegisters(*I)));
- }
-
- return Reserved;
-}
-
-const TargetRegisterClass *
-R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
-{
- switch (rc->getID()) {
- case AMDGPU::GPRF32RegClassID:
- case AMDGPU::GPRI32RegClassID:
- return &AMDGPU::R600_Reg32RegClass;
- default: return rc;
- }
-}
-
-unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
-{
- switch(reg) {
- case AMDGPU::ZERO: return 248;
- case AMDGPU::ONE:
- case AMDGPU::NEG_ONE: return 249;
- case AMDGPU::ONE_INT: return 250;
- case AMDGPU::HALF:
- case AMDGPU::NEG_HALF: return 252;
- case AMDGPU::ALU_LITERAL_X: return 253;
- case AMDGPU::PREDICATE_BIT:
- case AMDGPU::PRED_SEL_OFF:
- case AMDGPU::PRED_SEL_ZERO:
- case AMDGPU::PRED_SEL_ONE:
- return 0;
- default: return getHWRegIndexGen(reg);
- }
-}
-
-unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
-{
- switch(reg) {
- case AMDGPU::ZERO:
- case AMDGPU::ONE:
- case AMDGPU::ONE_INT:
- case AMDGPU::NEG_ONE:
- case AMDGPU::HALF:
- case AMDGPU::NEG_HALF:
- case AMDGPU::ALU_LITERAL_X:
- case AMDGPU::PREDICATE_BIT:
- case AMDGPU::PRED_SEL_OFF:
- case AMDGPU::PRED_SEL_ZERO:
- case AMDGPU::PRED_SEL_ONE:
- return 0;
- default: return getHWRegChanGen(reg);
- }
-}
-
-const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
- MVT VT) const
-{
- switch(VT.SimpleTy) {
- default:
- case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
- }
-}
-
-unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const
-{
- switch (Channel) {
- default: assert(!"Invalid channel index"); return 0;
- case 0: return AMDGPU::sel_x;
- case 1: return AMDGPU::sel_y;
- case 2: return AMDGPU::sel_z;
- case 3: return AMDGPU::sel_w;
- }
-}
-
-#include "R600HwRegInfo.include"
+++ /dev/null
-//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface definition for R600RegisterInfo
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef R600REGISTERINFO_H_
-#define R600REGISTERINFO_H_
-
-#include "AMDGPUTargetMachine.h"
-#include "AMDGPURegisterInfo.h"
-
-namespace llvm {
-
-class R600TargetMachine;
-class TargetInstrInfo;
-
-struct R600RegisterInfo : public AMDGPURegisterInfo
-{
- AMDGPUTargetMachine &TM;
- const TargetInstrInfo &TII;
-
- R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
-
- virtual BitVector getReservedRegs(const MachineFunction &MF) const;
-
- /// getISARegClass - rc is an AMDIL reg class. This function returns the
- /// R600 reg class that is equivalent to the given AMDIL reg class.
- virtual const TargetRegisterClass * getISARegClass(
- const TargetRegisterClass * rc) const;
-
- /// getHWRegIndex - get the HW encoding for a register.
- unsigned getHWRegIndex(unsigned reg) const;
-
- /// getHWRegChan - get the HW encoding for a register's channel.
- unsigned getHWRegChan(unsigned reg) const;
-
- /// getCFGStructurizerRegClass - get the register class of the specified
- /// type to use in the CFGStructurizer
- virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
-
- /// getSubRegFromChannel - Return the sub reg enum value for the given
- /// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
- unsigned getSubRegFromChannel(unsigned Channel) const;
-
-private:
- /// getHWRegIndexGen - Generated function returns a register's encoding
- unsigned getHWRegIndexGen(unsigned reg) const;
- /// getHWRegChanGen - Generated function returns a register's channel
- /// encoding.
- unsigned getHWRegChanGen(unsigned reg) const;
-};
-
-} // End namespace llvm
-
-#endif // AMDIDSAREGISTERINFO_H_
+++ /dev/null
-//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
-// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
-// slot has been removed.
-//
-//===----------------------------------------------------------------------===//
-
-
-def ALU_X : FuncUnit;
-def ALU_Y : FuncUnit;
-def ALU_Z : FuncUnit;
-def ALU_W : FuncUnit;
-def TRANS : FuncUnit;
-
-def AnyALU : InstrItinClass;
-def VecALU : InstrItinClass;
-def TransALU : InstrItinClass;
-
-def R600_EG_Itin : ProcessorItineraries <
- [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
- [],
- [
- InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
- InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
- InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
- InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
- ]
->;
+++ /dev/null
-//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass maps the pseudo interpolation registers to the correct physical
-// registers. Prior to executing a fragment shader, the GPU loads interpolation
-// parameters into physical registers. The specific physical register that each
-// interpolation parameter ends up in depends on the type of the interpolation
-// parameter as well as how many interpolation parameters are used by the
-// shader.
-//
-//===----------------------------------------------------------------------===//
-
-
-
-#include "AMDGPU.h"
-#include "AMDIL.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIAssignInterpRegsPass : public MachineFunctionPass {
-
-private:
- static char ID;
- TargetMachine &TM;
-
- void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
- unsigned physReg, unsigned virtReg);
-
-public:
- SIAssignInterpRegsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "SI Assign intrpolation registers"; }
-};
-
-} // End anonymous namespace
-
-char SIAssignInterpRegsPass::ID = 0;
-
-#define INTERP_VALUES 16
-#define REQUIRED_VALUE_MAX_INDEX 7
-
-struct InterpInfo {
- bool Enabled;
- unsigned Regs[3];
- unsigned RegCount;
-};
-
-
-FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
- return new SIAssignInterpRegsPass(tm);
-}
-
-bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
-{
-
- struct InterpInfo InterpUse[INTERP_VALUES] = {
- {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
- {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
- {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
- {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
- {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
- {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
- {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
- {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
- {false, {AMDGPU::POS_X_FLOAT}, 1},
- {false, {AMDGPU::POS_Y_FLOAT}, 1},
- {false, {AMDGPU::POS_Z_FLOAT}, 1},
- {false, {AMDGPU::POS_W_FLOAT}, 1},
- {false, {AMDGPU::FRONT_FACE}, 1},
- {false, {AMDGPU::ANCILLARY}, 1},
- {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
- {false, {AMDGPU::POS_FIXED_PT}, 1}
- };
-
- SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
- // This pass is only needed for pixel shaders.
- if (MFI->ShaderType != ShaderType::PIXEL) {
- return false;
- }
- MachineRegisterInfo &MRI = MF.getRegInfo();
- bool ForceEnable = true;
-
- // First pass, mark the interpolation values that are used.
- for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
- for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
- RegIdx++) {
- InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
- !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
- if (InterpUse[InterpIdx].Enabled &&
- InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
- ForceEnable = false;
- }
- }
- }
-
- // At least one interpolation mode must be enabled or else the GPU will hang.
- if (ForceEnable) {
- InterpUse[0].Enabled = true;
- }
-
- unsigned UsedVgprs = 0;
-
- // Second pass, replace with VGPRs.
- for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
- if (!InterpUse[InterpIdx].Enabled) {
- continue;
- }
- MFI->SPIPSInputAddr |= (1 << InterpIdx);
-
- for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
- RegIdx++, UsedVgprs++) {
- unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
- unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
- addLiveIn(&MF, MRI, NewReg, VirtReg);
- }
- }
-
- return false;
-}
-
-void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
- MachineRegisterInfo & MRI,
- unsigned physReg, unsigned virtReg)
-{
- const TargetInstrInfo * TII = TM.getInstrInfo();
- if (!MRI.isLiveIn(physReg)) {
- MRI.addLiveIn(physReg, virtReg);
- MF->front().addLiveIn(physReg);
- BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
- TII->get(TargetOpcode::COPY), virtReg)
- .addReg(physReg);
- } else {
- MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
- }
-}
+++ /dev/null
-#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-#
-# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
-# it also generates a file called SIHwRegInfo.include, which contains helper
-# functions for determining the hw encoding of registers.
-#
-#===------------------------------------------------------------------------===#
-
-use strict;
-use warnings;
-
-my $SGPR_COUNT = 104;
-my $VGPR_COUNT = 256;
-
-my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
-my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
-
-my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
-
-print <<STRING;
-
-let Namespace = "AMDGPU" in {
- def low : SubRegIndex;
- def high : SubRegIndex;
-
- def sub0 : SubRegIndex;
- def sub1 : SubRegIndex;
- def sub2 : SubRegIndex;
- def sub3 : SubRegIndex;
- def sub4 : SubRegIndex;
- def sub5 : SubRegIndex;
- def sub6 : SubRegIndex;
- def sub7 : SubRegIndex;
-}
-
-class SIReg <string n> : Register<n> {
- let Namespace = "AMDGPU";
-}
-
-class SI_64 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [low, high];
-}
-
-class SI_128 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
-}
-
-class SI_256 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
-}
-
-class SGPR_32 <bits<8> num, string name> : SIReg<name> {
- field bits<8> Num;
-
- let Num = num;
-}
-
-
-class VGPR_32 <bits<9> num, string name> : SIReg<name> {
- field bits<9> Num;
-
- let Num = num;
-}
-
-class SGPR_64 <bits<8> num, string name, list<Register> subregs> :
- SI_64 <name, subregs>;
-
-class VGPR_64 <bits<9> num, string name, list<Register> subregs> :
- SI_64 <name, subregs>;
-
-class SGPR_128 <bits<8> num, string name, list<Register> subregs> :
- SI_128 <name, subregs>;
-
-class VGPR_128 <bits<9> num, string name, list<Register> subregs> :
- SI_128 <name, subregs>;
-
-class SGPR_256 <bits<8> num, string name, list<Register> subregs> :
- SI_256 <name, subregs>;
-
-def VCC : SIReg<"VCC">;
-def EXEC_LO : SIReg<"EXEC LO">;
-def EXEC_HI : SIReg<"EXEC HI">;
-def EXEC : SI_64<"EXEC", [EXEC_LO,EXEC_HI]>;
-def SCC : SIReg<"SCC">;
-def SREG_LIT_0 : SIReg <"S LIT 0">;
-def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">;
-
-def M0 : SIReg <"M0">;
-
-//Interpolation registers
-
-def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
-def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
-def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
-def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
-def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
-def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
-def PERSP_I_W : SIReg <"PERSP_I_W">;
-def PERSP_J_W : SIReg <"PERSP_J_W">;
-def PERSP_1_W : SIReg <"PERSP_1_W">;
-def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
-def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
-def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
-def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
-def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
-def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
-def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
-def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
-def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
-def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
-def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
-def FRONT_FACE : SIReg <"FRONT_FACE">;
-def ANCILLARY : SIReg <"ANCILLARY">;
-def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
-def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
-
-STRING
-
-#32 bit register
-
-my @SGPR;
-for (my $i = 0; $i < $SGPR_COUNT; $i++) {
- print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
- $SGPR[$i] = "SGPR$i";
-}
-
-my @VGPR;
-for (my $i = 0; $i < $VGPR_COUNT; $i++) {
- print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
- $VGPR[$i] = "VGPR$i";
-}
-
-print <<STRING;
-
-def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
->;
-
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
- PERSP_SAMPLE_I, PERSP_SAMPLE_J,
- PERSP_CENTER_I, PERSP_CENTER_J,
- PERSP_CENTROID_I, PERSP_CENTROID_J,
- PERSP_I_W, PERSP_J_W, PERSP_1_W,
- LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
- LINEAR_CENTER_I, LINEAR_CENTER_J,
- LINEAR_CENTROID_I, LINEAR_CENTROID_J,
- LINE_STIPPLE_TEX_COORD,
- POS_X_FLOAT,
- POS_Y_FLOAT,
- POS_Z_FLOAT,
- POS_W_FLOAT,
- FRONT_FACE,
- ANCILLARY,
- SAMPLE_COVERAGE,
- POS_FIXED_PT
- )
->;
-
-def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add VReg_32, SReg_32)
->;
-
-def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
-def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
-def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
-def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
-
-
-STRING
-
-my @subregs_64 = ('low', 'high');
-my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
-my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
-
-my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
-my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
-my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
-
-my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
-my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
-
-
-my $sgpr64_list = join(',', @SGPR64);
-my $vgpr64_list = join(',', @VGPR64);
-print <<STRING;
-
-def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
- (add $sgpr64_list, $vgpr64_list)
->;
-
-STRING
-
-if ($INDEX_FILE ne '') {
- open(my $fh, ">", $INDEX_FILE);
- my %hw_values;
-
- for (my $i = 0; $i <= $#SGPR; $i++) {
- push (@{$hw_values{$i}}, $SGPR[$i]);
- }
-
- for (my $i = 0; $i <= $#SGPR64; $i++) {
- push (@{$hw_values{$i * 2}}, $SGPR64[$i])
- }
-
- for (my $i = 0; $i <= $#SGPR128; $i++) {
- push (@{$hw_values{$i * 4}}, $SGPR128[$i]);
- }
-
- for (my $i = 0; $i <= $#SGPR256; $i++) {
- push (@{$hw_values{$i * 8}}, $SGPR256[$i]);
- }
-
- for (my $i = 0; $i <= $#VGPR; $i++) {
- push (@{$hw_values{$i}}, $VGPR[$i]);
- }
- for (my $i = 0; $i <= $#VGPR64; $i++) {
- push (@{$hw_values{$i * 2}}, $VGPR64[$i]);
- }
-
- for (my $i = 0; $i <= $#VGPR128; $i++) {
- push (@{$hw_values{$i * 4}}, $VGPR128[$i]);
- }
-
-
- print $fh "unsigned SIRegisterInfo::getHWRegNum(unsigned reg) const\n{\n switch(reg) {\n";
- for my $key (keys(%hw_values)) {
- my @names = @{$hw_values{$key}};
- for my $regname (@names) {
- print $fh " case AMDGPU::$regname:\n"
- }
- print $fh " return $key;\n";
- }
- print $fh " default: assert(!\"Unknown Register\"); return 0;\n }\n}\n"
-}
-
-
-
-
-sub print_sgpr_class {
- my ($reg_width, $sub_reg_ref, @types) = @_;
- return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
-}
-
-sub print_vgpr_class {
- my ($reg_width, $sub_reg_ref, @types) = @_;
- return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
-}
-
-sub print_reg_class {
- my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
- my @registers;
- my $component_count = $reg_width / 32;
-
- for (my $i = 0; $i < $reg_count; $i += $component_count) {
- my $reg_name = $reg_prefix . $i . '_' . $reg_width;
- my @sub_regs;
- for (my $idx = 0; $idx < $component_count; $idx++) {
- my $sub_idx = $i + $idx;
- push(@sub_regs, $reg_prefix . $sub_idx);
- }
- print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
- push (@registers, $reg_name);
- }
-
- #Add VCC to SReg_64
- if ($class_prefix eq 'SReg' and $reg_width == 64) {
- push (@registers, 'VCC')
- }
-
- #Add EXEC to SReg_64
- if ($class_prefix eq 'SReg' and $reg_width == 64) {
- push (@registers, 'EXEC')
- }
-
- my $reg_list = join(', ', @registers);
-
- print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
- print " let SubRegClasses = [($class_prefix\_", ($reg_width / $component_count) , ' ', join(', ', @{$sub_reg_ref}), ")];\n}\n";
- return @registers;
-}
+++ /dev/null
-//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
-// mostly EmitInstrWithCustomInserter().
-//
-//===----------------------------------------------------------------------===//
-
-#include "SIISelLowering.h"
-#include "AMDIL.h"
-#include "AMDILIntrinsicInfo.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-
-using namespace llvm;
-
-SITargetLowering::SITargetLowering(TargetMachine &TM) :
- AMDGPUTargetLowering(TM),
- TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
-{
- addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
- addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
- addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
- addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
-
- addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
- addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
-
- computeRegisterProperties();
-
- setOperationAction(ISD::AND, MVT::i1, Custom);
-
- setOperationAction(ISD::ADD, MVT::i64, Legal);
- setOperationAction(ISD::ADD, MVT::i32, Legal);
-
- setOperationAction(ISD::BR_CC, MVT::i32, Custom);
-
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
- // We need to custom lower loads from the USER_SGPR address space, so we can
- // add the SGPRs as livein registers.
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::i64, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setTargetDAGCombine(ISD::SELECT_CC);
-
- setTargetDAGCombine(ISD::SETCC);
-}
-
-MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
- MachineInstr * MI, MachineBasicBlock * BB) const
-{
- const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
- MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
- MachineBasicBlock::iterator I = MI;
-
- if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
- AppendS_WAITCNT(MI, *BB, llvm::next(I));
- return BB;
- }
-
- switch (MI->getOpcode()) {
- default:
- return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- case AMDGPU::BRANCH: return BB;
- case AMDGPU::CLAMP_SI:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- // VSRC1-2 are unused, but we still need to fill all the
- // operand slots, so we just reuse the VSRC0 operand
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(1))
- .addImm(0) // ABS
- .addImm(1) // CLAMP
- .addImm(0) // OMOD
- .addImm(0); // NEG
- MI->eraseFromParent();
- break;
-
- case AMDGPU::FABS_SI:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- // VSRC1-2 are unused, but we still need to fill all the
- // operand slots, so we just reuse the VSRC0 operand
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(1))
- .addImm(1) // ABS
- .addImm(0) // CLAMP
- .addImm(0) // OMOD
- .addImm(0); // NEG
- MI->eraseFromParent();
- break;
-
- case AMDGPU::FNEG_SI:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- // VSRC1-2 are unused, but we still need to fill all the
- // operand slots, so we just reuse the VSRC0 operand
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(1))
- .addImm(0) // ABS
- .addImm(0) // CLAMP
- .addImm(0) // OMOD
- .addImm(1); // NEG
- MI->eraseFromParent();
- break;
- case AMDGPU::SHADER_TYPE:
- BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
- MI->getOperand(0).getImm();
- MI->eraseFromParent();
- break;
-
- case AMDGPU::SI_INTERP:
- LowerSI_INTERP(MI, *BB, I, MRI);
- break;
- case AMDGPU::SI_INTERP_CONST:
- LowerSI_INTERP_CONST(MI, *BB, I, MRI);
- break;
- case AMDGPU::SI_KIL:
- LowerSI_KIL(MI, *BB, I, MRI);
- break;
- case AMDGPU::SI_WQM:
- LowerSI_WQM(MI, *BB, I, MRI);
- break;
- case AMDGPU::SI_V_CNDLT:
- LowerSI_V_CNDLT(MI, *BB, I, MRI);
- break;
- }
- return BB;
-}
-
-void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I) const
-{
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
-}
-
-
-void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
-{
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
-
- MI->eraseFromParent();
-}
-
-void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
-{
- unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
- MachineOperand dst = MI->getOperand(0);
- MachineOperand iReg = MI->getOperand(1);
- MachineOperand jReg = MI->getOperand(2);
- MachineOperand attr_chan = MI->getOperand(3);
- MachineOperand attr = MI->getOperand(4);
- MachineOperand params = MI->getOperand(5);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
- .addOperand(params);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
- .addOperand(iReg)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
- .addOperand(dst)
- .addReg(tmp)
- .addOperand(jReg)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- MI->eraseFromParent();
-}
-
-void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
- MachineBasicBlock &BB, MachineBasicBlock::iterator I,
- MachineRegisterInfo &MRI) const
-{
- MachineOperand dst = MI->getOperand(0);
- MachineOperand attr_chan = MI->getOperand(1);
- MachineOperand attr = MI->getOperand(2);
- MachineOperand params = MI->getOperand(3);
- unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
- .addOperand(params);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
- .addOperand(dst)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- MI->eraseFromParent();
-}
-
-void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
-{
- // Clear this pixel from the exec mask if the operand is negative
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
- AMDGPU::VCC)
- .addReg(AMDGPU::SREG_LIT_0)
- .addOperand(MI->getOperand(0));
-
- // If the exec mask is non-zero, skip the next two instructions
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
-
- // Exec mask is zero: Export to NULL target...
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0);
-
- // ... and terminate wavefront
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
-
- MI->eraseFromParent();
-}
-
-void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
-{
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32),
- AMDGPU::VCC)
- .addReg(AMDGPU::SREG_LIT_0)
- .addOperand(MI->getOperand(1));
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(2))
- .addReg(AMDGPU::VCC);
-
- MI->eraseFromParent();
-}
-
-EVT SITargetLowering::getSetCCResultType(EVT VT) const
-{
- return MVT::i1;
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG Lowering Operations
-//===----------------------------------------------------------------------===//
-
-SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
-{
- switch (Op.getOpcode()) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_vs_load_buffer_index:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR0, VT);
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- }
- break;
- }
- }
- return SDValue();
-}
-
-/// Loweri1ContextSwitch - The function is for lowering i1 operations on the
-/// VCC register. In the VALU context, VCC is a one bit register, but in the
-/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
-/// the SALU can perform operations on the VCC register, we need to promote
-/// the operand types from i1 to i64 in order for tablegen to be able to match
-/// this operation to the correct SALU instruction. We do this promotion by
-/// wrapping the operands in a CopyToReg node.
-///
-SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
- SelectionDAG &DAG,
- unsigned VCCNode) const
-{
- DebugLoc DL = Op.getDebugLoc();
-
- SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
- DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
- Op.getOperand(0)),
- DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
- Op.getOperand(1)));
-
- return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
-}
-
-SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue CC = Op.getOperand(1);
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue JumpT = Op.getOperand(4);
- SDValue CmpValue;
- SDValue Result;
- CmpValue = DAG.getNode(
- ISD::SETCC,
- Op.getDebugLoc(),
- MVT::i1,
- LHS, RHS,
- CC);
-
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- CmpValue.getDebugLoc(),
- MVT::Other, Chain,
- JumpT, CmpValue);
- return Result;
-}
-
-SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
-{
- EVT VT = Op.getValueType();
- LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
-
- assert(Ptr);
-
- unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
-
- // We only need to lower USER_SGPR address space loads
- if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
- return SDValue();
- }
-
- // Loads from the USER_SGPR address space can only have constant value
- // pointers.
- ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
- assert(BasePtr);
-
- unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
- const TargetRegisterClass * dstClass;
- switch (TypeDwordWidth) {
- default:
- assert(!"USER_SGPR value size not implemented");
- return SDValue();
- case 1:
- dstClass = &AMDGPU::SReg_32RegClass;
- break;
- case 2:
- dstClass = &AMDGPU::SReg_64RegClass;
- break;
- }
- uint64_t Index = BasePtr->getZExtValue();
- assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
- unsigned SGPRIndex = Index / TypeDwordWidth;
- unsigned Reg = dstClass->getRegister(SGPRIndex);
-
- DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
- VT));
- return SDValue();
-}
-
-SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue True = Op.getOperand(2);
- SDValue False = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
- EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
-
- SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
- return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG optimizations
-//===----------------------------------------------------------------------===//
-
-SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = N->getDebugLoc();
- EVT VT = N->getValueType(0);
-
- switch (N->getOpcode()) {
- default: break;
- case ISD::SELECT_CC: {
- N->dump();
- ConstantSDNode *True, *False;
- // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
- if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
- && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
- && True->isAllOnesValue()
- && False->isNullValue()
- && VT == MVT::i1) {
- return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
- N->getOperand(1), N->getOperand(4));
-
- }
- break;
- }
- case ISD::SETCC: {
- SDValue Arg0 = N->getOperand(0);
- SDValue Arg1 = N->getOperand(1);
- SDValue CC = N->getOperand(2);
- ConstantSDNode * C = NULL;
- ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
-
- // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
- if (VT == MVT::i1
- && Arg0.getOpcode() == ISD::SIGN_EXTEND
- && Arg0.getOperand(0).getValueType() == MVT::i1
- && (C = dyn_cast<ConstantSDNode>(Arg1))
- && C->isNullValue()
- && CCOp == ISD::SETNE) {
- return SimplifySetCC(VT, Arg0.getOperand(0),
- DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
- }
- break;
- }
- }
- return SDValue();
-}
-
-#define NODE_NAME_CASE(node) case SIISD::node: return #node;
-
-const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const
-{
- switch (Opcode) {
- default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
- NODE_NAME_CASE(VCC_AND)
- NODE_NAME_CASE(VCC_BITCAST)
- }
-}
+++ /dev/null
-//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SI DAG Lowering interface definition
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SIISELLOWERING_H
-#define SIISELLOWERING_H
-
-#include "AMDGPUISelLowering.h"
-#include "SIInstrInfo.h"
-
-namespace llvm {
-
-class SITargetLowering : public AMDGPUTargetLowering
-{
- const SIInstrInfo * TII;
-
- /// AppendS_WAITCNT - Memory reads and writes are syncronized using the
- /// S_WAITCNT instruction. This function takes the most conservative
- /// approach and inserts an S_WAITCNT instruction after every read and
- /// write.
- void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I) const;
- void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, unsigned Opocde) const;
- void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
- void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
- void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
- void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
- void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
-
- SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
- unsigned VCCNode) const;
- SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-
-public:
- SITargetLowering(TargetMachine &tm);
- virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
- MachineBasicBlock * BB) const;
- virtual EVT getSetCCResultType(EVT VT) const;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- virtual const char* getTargetNodeName(unsigned Opcode) const;
-};
-
-} // End namespace llvm
-
-#endif //SIISELLOWERING_H
+++ /dev/null
-//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SI Instruction format definitions.
-//
-// Instructions with _32 take 32-bit operands.
-// Instructions with _64 take 64-bit operands.
-//
-// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
-// encoding is the standard encoding, but instruction that make use of
-// any of the instruction modifiers must use the 64-bit encoding.
-//
-// Instructions with _e32 use the 32-bit encoding.
-// Instructions with _e64 use the 64-bit encoding.
-//
-//===----------------------------------------------------------------------===//
-
-
-class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
- : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
-
-class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
- : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
-
-
-class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
- : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
-
-class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
- : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
-
-class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
- : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
-
-class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
- : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
-
-class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
- : SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
-
-class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> :
- VOP1 <
- op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
- >;
-
-multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
- def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
- def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
-}
-
-multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
-
- def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
-
- def _e64 : VOP3_64 <
- {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
-}
-
-class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> :
- VOP2 <
- op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
- >;
-
-multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
-
- def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
-
- def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
-}
-
-multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
- def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
-
- def _e64 : VOP3_64 <
- {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
-}
-
-class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
- : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
-
-class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
- : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
-
-class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> :
- VOPC <
- op, (ins arc:$src0, vrc:$src1), opName, pattern
- >;
-
-multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
-
- def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
-
- def _e64 : VOP3_32 <
- {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
-}
-
-multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
-
- def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
-
- def _e64 : VOP3_64 <
- {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
-}
-
-class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
- : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
-
-class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
- : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
-
+++ /dev/null
-//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SI Implementation of TargetInstrInfo.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "SIInstrInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/MC/MCInstrDesc.h"
-
-#include <stdio.h>
-
-using namespace llvm;
-
-SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
- : AMDGPUInstrInfo(tm),
- RI(tm, *this),
- TM(tm)
- { }
-
-const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
-{
- return RI;
-}
-
-void
-SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const
-{
-
- // If we are trying to copy to or from SCC, there is a bug somewhere else in
- // the backend. While it may be theoretically possible to do this, it should
- // never be necessary.
- assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const
-{
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
- MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
- MachineInstrBuilder(MI).addImm(Imm);
-
- return MI;
-
-}
-
-bool SIInstrInfo::isMov(unsigned Opcode) const
-{
- switch(Opcode) {
- default: return false;
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_IMM_F32:
- case AMDGPU::V_MOV_IMM_I32:
- case AMDGPU::S_MOV_IMM_I32:
- return true;
- }
-}
+++ /dev/null
-//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface definition for SIInstrInfo.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef SIINSTRINFO_H
-#define SIINSTRINFO_H
-
-#include "AMDGPUInstrInfo.h"
-#include "SIRegisterInfo.h"
-
-namespace llvm {
-
-class SIInstrInfo : public AMDGPUInstrInfo {
-private:
- const SIRegisterInfo RI;
- AMDGPUTargetMachine &TM;
-
-public:
- explicit SIInstrInfo(AMDGPUTargetMachine &tm);
-
- const SIRegisterInfo &getRegisterInfo() const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- /// getEncodingType - Returns the encoding type of this instruction.
- unsigned getEncodingType(const MachineInstr &MI) const;
-
- /// getEncodingBytes - Returns the size of this instructions encoding in
- /// number of bytes.
- unsigned getEncodingBytes(const MachineInstr &MI) const;
-
- virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const;
-
- virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
- virtual bool isMov(unsigned Opcode) const;
-
- };
-
-} // End namespace llvm
-
-namespace SIInstrFlags {
- enum Flags {
- // First 4 bits are the instruction encoding
- NEED_WAIT = 1 << 4
- };
-}
-
-#endif //SIINSTRINFO_H
+++ /dev/null
-//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SI DAG Profiles
-//===----------------------------------------------------------------------===//
-def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
- SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
-]>;
-
-//===----------------------------------------------------------------------===//
-// SI DAG Nodes
-//===----------------------------------------------------------------------===//
-
-// and operation on 64-bit wide vcc
-def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// Special bitcast node for sharing VCC register between VALU and SALU
-def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
- SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
->;
-
-class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
- AMDGPUInst<outs, ins, asm, pattern> {
-
- field bits<4> EncodingType = 0;
- field bits<1> NeedWait = 0;
-
- let TSFlags{3-0} = EncodingType;
- let TSFlags{4} = NeedWait;
-
-}
-
-class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- field bits<32> Inst;
-}
-
-class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- field bits<64> Inst;
-}
-
-class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
- let EncoderMethod = "encodeOperand";
- let MIOperandInfo = opInfo;
-}
-
-def IMM16bit : ImmLeaf <
- i16,
- [{return isInt<16>(Imm);}]
->;
-
-def IMM8bit : ImmLeaf <
- i32,
- [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
->;
-
-def IMM12bit : ImmLeaf <
- i16,
- [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
->;
-
-def IMM32bitIn64bit : ImmLeaf <
- i64,
- [{return isInt<32>(Imm);}]
->;
-
-class GPR4Align <RegisterClass rc> : Operand <vAny> {
- let EncoderMethod = "GPR4AlignEncode";
- let MIOperandInfo = (ops rc:$reg);
-}
-
-class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
- let EncoderMethod = "GPR2AlignEncode";
- let MIOperandInfo = (ops rc:$reg);
-}
-
-def SMRDmemrr : Operand<iPTR> {
- let MIOperandInfo = (ops SReg_64, SReg_32);
- let EncoderMethod = "GPR2AlignEncode";
-}
-
-def SMRDmemri : Operand<iPTR> {
- let MIOperandInfo = (ops SReg_64, i32imm);
- let EncoderMethod = "SMRDmemriEncode";
-}
-
-def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
-def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
-
-let Uses = [EXEC] in {
-def EXP : Enc64<
- (outs),
- (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
- VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
- "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
- [] > {
-
- bits<4> EN;
- bits<6> TGT;
- bits<1> COMPR;
- bits<1> DONE;
- bits<1> VM;
- bits<8> VSRC0;
- bits<8> VSRC1;
- bits<8> VSRC2;
- bits<8> VSRC3;
-
- let Inst{3-0} = EN;
- let Inst{9-4} = TGT;
- let Inst{10} = COMPR;
- let Inst{11} = DONE;
- let Inst{12} = VM;
- let Inst{31-26} = 0x3e;
- let Inst{39-32} = VSRC0;
- let Inst{47-40} = VSRC1;
- let Inst{55-48} = VSRC2;
- let Inst{63-56} = VSRC3;
- let EncodingType = 0; //SIInstrEncodingType::EXP
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
-}
-
-class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
-
- bits<8> VDATA;
- bits<4> DMASK;
- bits<1> UNORM;
- bits<1> GLC;
- bits<1> DA;
- bits<1> R128;
- bits<1> TFE;
- bits<1> LWE;
- bits<1> SLC;
- bits<8> VADDR;
- bits<5> SRSRC;
- bits<5> SSAMP;
-
- let Inst{11-8} = DMASK;
- let Inst{12} = UNORM;
- let Inst{13} = GLC;
- let Inst{14} = DA;
- let Inst{15} = R128;
- let Inst{16} = TFE;
- let Inst{17} = LWE;
- let Inst{24-18} = op;
- let Inst{25} = SLC;
- let Inst{31-26} = 0x3c;
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
- let Inst{57-53} = SSAMP;
-
- let EncodingType = 2; //SIInstrEncodingType::MIMG
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
-}
-
-class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64<outs, ins, asm, pattern> {
-
- bits<8> VDATA;
- bits<12> OFFSET;
- bits<1> OFFEN;
- bits<1> IDXEN;
- bits<1> GLC;
- bits<1> ADDR64;
- bits<4> DFMT;
- bits<3> NFMT;
- bits<8> VADDR;
- bits<5> SRSRC;
- bits<1> SLC;
- bits<1> TFE;
- bits<8> SOFFSET;
-
- let Inst{11-0} = OFFSET;
- let Inst{12} = OFFEN;
- let Inst{13} = IDXEN;
- let Inst{14} = GLC;
- let Inst{15} = ADDR64;
- let Inst{18-16} = op;
- let Inst{22-19} = DFMT;
- let Inst{25-23} = NFMT;
- let Inst{31-26} = 0x3a; //encoding
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
- let Inst{54} = SLC;
- let Inst{55} = TFE;
- let Inst{63-56} = SOFFSET;
- let EncodingType = 3; //SIInstrEncodingType::MTBUF
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
- let neverHasSideEffects = 1;
-}
-
-class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64<outs, ins, asm, pattern> {
-
- bits<8> VDATA;
- bits<12> OFFSET;
- bits<1> OFFEN;
- bits<1> IDXEN;
- bits<1> GLC;
- bits<1> ADDR64;
- bits<1> LDS;
- bits<8> VADDR;
- bits<5> SRSRC;
- bits<1> SLC;
- bits<1> TFE;
- bits<8> SOFFSET;
-
- let Inst{11-0} = OFFSET;
- let Inst{12} = OFFEN;
- let Inst{13} = IDXEN;
- let Inst{14} = GLC;
- let Inst{15} = ADDR64;
- let Inst{16} = LDS;
- let Inst{24-18} = op;
- let Inst{31-26} = 0x38; //encoding
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
- let Inst{54} = SLC;
- let Inst{55} = TFE;
- let Inst{63-56} = SOFFSET;
- let EncodingType = 4; //SIInstrEncodingType::MUBUF
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
- let neverHasSideEffects = 1;
-}
-} // End Uses = [EXEC]
-
-class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
-
- bits<7> SDST;
- bits<15> PTR;
- bits<8> OFFSET = PTR{7-0};
- bits<1> IMM = PTR{8};
- bits<6> SBASE = PTR{14-9};
-
- let Inst{7-0} = OFFSET;
- let Inst{8} = IMM;
- let Inst{14-9} = SBASE;
- let Inst{21-15} = SDST;
- let Inst{26-22} = op;
- let Inst{31-27} = 0x18; //encoding
- let EncodingType = 5; //SIInstrEncodingType::SMRD
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
-}
-
-class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
-
- bits<7> SDST;
- bits<8> SSRC0;
-
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = op;
- let Inst{22-16} = SDST;
- let Inst{31-23} = 0x17d; //encoding;
- let EncodingType = 6; //SIInstrEncodingType::SOP1
-}
-
-class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<7> SDST;
- bits<8> SSRC0;
- bits<8> SSRC1;
-
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = SSRC1;
- let Inst{22-16} = SDST;
- let Inst{29-23} = op;
- let Inst{31-30} = 0x2; // encoding
- let EncodingType = 7; // SIInstrEncodingType::SOP2
-}
-
-class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
-
- bits<8> SSRC0;
- bits<8> SSRC1;
-
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = SSRC1;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17e;
- let EncodingType = 8; // SIInstrEncodingType::SOPC
-
- let DisableEncoding = "$dst";
-}
-
-class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins , asm, pattern> {
-
- bits <7> SDST;
- bits <16> SIMM16;
-
- let Inst{15-0} = SIMM16;
- let Inst{22-16} = SDST;
- let Inst{27-23} = op;
- let Inst{31-28} = 0xb; //encoding
- let EncodingType = 9; // SIInstrEncodingType::SOPK
-}
-
-class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
- (outs),
- ins,
- asm,
- pattern > {
-
- bits <16> SIMM16;
-
- let Inst{15-0} = SIMM16;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17f; // encoding
- let EncodingType = 10; // SIInstrEncodingType::SOPP
-}
-
-
-let Uses = [EXEC] in {
-class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<8> VSRC;
- bits<2> ATTRCHAN;
- bits<6> ATTR;
-
- let Inst{7-0} = VSRC;
- let Inst{9-8} = ATTRCHAN;
- let Inst{15-10} = ATTR;
- let Inst{17-16} = op;
- let Inst{25-18} = VDST;
- let Inst{31-26} = 0x32; // encoding
- let EncodingType = 11; // SIInstrEncodingType::VINTRP
-
- let neverHasSideEffects = 1;
-}
-
-class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<9> SRC0;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = op;
- let Inst{24-17} = VDST;
- let Inst{31-25} = 0x3f; //encoding
-
- let EncodingType = 12; // SIInstrEncodingType::VOP1
- let PostEncoderMethod = "VOPPostEncode";
-}
-
-class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<9> SRC0;
- bits<8> VSRC1;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = VSRC1;
- let Inst{24-17} = VDST;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; //encoding
-
- let EncodingType = 13; // SIInstrEncodingType::VOP2
- let PostEncoderMethod = "VOPPostEncode";
-}
-
-class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<9> SRC0;
- bits<9> SRC1;
- bits<9> SRC2;
- bits<3> ABS;
- bits<1> CLAMP;
- bits<2> OMOD;
- bits<3> NEG;
-
- let Inst{7-0} = VDST;
- let Inst{10-8} = ABS;
- let Inst{11} = CLAMP;
- let Inst{25-17} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = SRC0;
- let Inst{49-41} = SRC1;
- let Inst{58-50} = SRC2;
- let Inst{60-59} = OMOD;
- let Inst{63-61} = NEG;
-
- let EncodingType = 14; // SIInstrEncodingType::VOP3
- let PostEncoderMethod = "VOPPostEncode";
-}
-
-class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
- Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
-
- bits<9> SRC0;
- bits<8> VSRC1;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = VSRC1;
- let Inst{24-17} = op;
- let Inst{31-25} = 0x3e;
-
- let EncodingType = 15; //SIInstrEncodingType::VOPC
- let PostEncoderMethod = "VOPPostEncode";
- let DisableEncoding = "$dst";
-}
-} // End Uses = [EXEC]
-
-class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
- op,
- (outs VReg_128:$vdata),
- (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
- GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
- asm,
- []
->;
-
-class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
- op,
- (outs regClass:$dst),
- (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
- i1imm:$tfe, SReg_32:$soffset),
- asm,
- []> {
- let mayLoad = 1;
-}
-
-class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
- op,
- (outs regClass:$dst),
- (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
- i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
- asm,
- []> {
- let mayLoad = 1;
-}
-
-class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
- op,
- (outs),
- (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
- i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
- GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
- asm,
- []> {
- let mayStore = 1;
-}
-
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
- ValueType vt> {
- def _IMM : SMRD <
- op,
- (outs dstClass:$dst),
- (ins SMRDmemri:$src0),
- asm,
- [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
- >;
-
- def _SGPR : SMRD <
- op,
- (outs dstClass:$dst),
- (ins SMRDmemrr:$src0),
- asm,
- [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
- >;
-}
-
-multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
- defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
- defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
-}
-
-include "SIInstrFormats.td"
-include "SIInstructions.td"
+++ /dev/null
-//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-def isSI : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
-
-let Predicates = [isSI] in {
-
-let neverHasSideEffects = 1 in {
-def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
-def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
-def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
-def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
-def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
-def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
-def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
-def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
-def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
-} // End neverHasSideEffects = 1
-////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
-////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
-////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
-////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
-////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
-////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
-////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
-//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
-//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
-def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
-//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
-//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
-//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
-////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
-////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
-////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
-////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
-def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
-def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
-def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
-def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
-def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
-def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
-def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
-////def S_ANDN2_SAVEEXEC_B64 : SOP1_ANDN2 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
-////def S_ORN2_SAVEEXEC_B64 : SOP1_ORN2 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
-def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
-def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
-def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
-def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
-def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
-def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
-def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
-def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
-def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
-//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
-def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
-def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
-def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
-def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
-def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
-
-/*
-This instruction is disabled for now until we can figure out how to teach
-the instruction selector to correctly use the S_CMP* vs V_CMP*
-instructions.
-
-When this instruction is enabled the code generator sometimes produces this
-invalid sequence:
-
-SCC = S_CMPK_EQ_I32 SGPR0, imm
-VCC = COPY SCC
-VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
-
-def S_CMPK_EQ_I32 : SOPK <
- 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
- "S_CMPK_EQ_I32",
- [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
->;
-*/
-
-def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
-def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
-def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
-def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
-def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
-def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
-def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
-def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
-def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
-def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
-def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
-def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
-def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
-//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
-def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
-def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
-def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
-//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
-//def EXP : EXP_ <0x00000000, "EXP", []>;
-
-defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>;
-defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32",
- [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT))]
->;
-defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32",
- [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ))]
->;
-defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32",
- [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE))]
->;
-defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32",
- [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT))]
->;
-defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32",
- [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))]
->;
-defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32",
- [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE))]
->;
-defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>;
-defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>;
-defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>;
-defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>;
-defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>;
-defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>;
-defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32",
- [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))]
->;
-defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>;
-defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>;
-defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>;
-defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>;
-defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>;
-defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>;
-defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>;
-defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>;
-defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>;
-defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>;
-defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>;
-defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>;
-defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>;
-defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>;
-defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>;
-defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>;
-defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>;
-defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>;
-defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>;
-defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>;
-defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>;
-defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>;
-defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>;
-defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>;
-defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>;
-defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>;
-defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>;
-defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>;
-defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>;
-defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>;
-defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>;
-defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>;
-defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>;
-defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>;
-defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>;
-defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>;
-defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>;
-defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>;
-defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>;
-defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>;
-defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>;
-defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>;
-defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>;
-defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>;
-defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>;
-defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>;
-defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>;
-defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>;
-defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>;
-defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>;
-defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>;
-defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>;
-defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>;
-defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>;
-defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>;
-defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>;
-defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>;
-defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>;
-defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>;
-defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>;
-defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>;
-defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>;
-defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>;
-defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>;
-defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>;
-defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>;
-defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>;
-defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>;
-defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>;
-defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>;
-defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>;
-defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>;
-defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>;
-defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>;
-defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>;
-defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>;
-defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>;
-defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>;
-defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>;
-defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>;
-defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>;
-defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>;
-defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>;
-defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>;
-defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>;
-defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>;
-defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>;
-defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>;
-defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>;
-defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>;
-defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>;
-defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>;
-defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>;
-defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>;
-defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>;
-defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>;
-defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>;
-defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>;
-defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>;
-defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>;
-defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>;
-defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>;
-defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>;
-defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>;
-defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>;
-defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>;
-defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>;
-defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>;
-defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>;
-defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>;
-defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>;
-defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>;
-defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>;
-defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>;
-defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>;
-defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32",
- [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLT))]
->;
-defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32",
- [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETEQ))]
->;
-defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32",
- [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLE))]
->;
-defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32",
- [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGT))]
->;
-defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32",
- [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETNE))]
->;
-defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32",
- [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGE))]
->;
-defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>;
-defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>;
-defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>;
-defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>;
-defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>;
-defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>;
-defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>;
-defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>;
-defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>;
-defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>;
-defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>;
-defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>;
-defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>;
-defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>;
-defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>;
-defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>;
-defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>;
-defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>;
-defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>;
-defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>;
-defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>;
-defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>;
-defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>;
-defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>;
-defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>;
-defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>;
-defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>;
-defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>;
-defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>;
-defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>;
-defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>;
-defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>;
-defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>;
-defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>;
-defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>;
-defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>;
-defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>;
-defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>;
-defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>;
-defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>;
-defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>;
-defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>;
-defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>;
-defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>;
-defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>;
-defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>;
-defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>;
-defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>;
-defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>;
-defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>;
-defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>;
-defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>;
-defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>;
-defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>;
-defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>;
-defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>;
-defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>;
-defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>;
-defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>;
-defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>;
-defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>;
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
-def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
-//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
-//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
-//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
-//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
-//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
-//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
-//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
-//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
-//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
-//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
-//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
-//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
-//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
-//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
-//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
-//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
-//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
-//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
-//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
-//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
-//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
-//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
-//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
-//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
-//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
-//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
-//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
-//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
-//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
-//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
-//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
-//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
-//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
-//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
-//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
-//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
-//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
-//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
-//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
-//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
-//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
-//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
-//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
-def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
-//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
-//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
-//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
-
-defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>;
-
-//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>;
-//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
-//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
-//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
-//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
-//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
-//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
-
-//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
-//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
-//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
-//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
-//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
-//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
-//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
-//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
-//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
-//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
-//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
-//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
-//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
-//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
-//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
-//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
-//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
-//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
-//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
-//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
-//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
-//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
-//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
-//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
-//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
-//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
-//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
-//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
-//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-//def IMAGE_SAMPLE_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_D", 0x00000022>;
-//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-//def IMAGE_SAMPLE_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_L", 0x00000024>;
-//def IMAGE_SAMPLE_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_B", 0x00000025>;
-//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
-//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>;
-//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
-//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
-//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>;
-//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>;
-//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
-//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
-//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
-//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
-//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
-//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
-//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
-//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
-//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
-//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
-//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
-//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
-//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
-//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
-//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
-//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
-//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
-//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
-//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
-//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
-//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
-//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
-//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
-//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
-//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
-//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
-//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
-//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
-//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
-//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
-//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
-//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
-//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
-//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
-//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
-//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
-//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
-//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
-//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
-//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
-//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
-//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
-//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
-//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
-//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
-//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
-//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
-//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
-//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
-//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
-//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
-//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
-//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
-
-let neverHasSideEffects = 1 in {
-defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
-} // End neverHasSideEffects
-defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
-//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
-//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
-defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
- [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))]
->;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
-defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
- [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))]
->;
-defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
-////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
-//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
-//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
-//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
-//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
-//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
-//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
-//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
-//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
-//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
-//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
-//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
-//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
-defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
- [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))]
->;
-defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
-defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>;
-defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
- [(set VReg_32:$dst, (frint AllReg_32:$src0))]
->;
-defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
- [(set VReg_32:$dst, (ffloor AllReg_32:$src0))]
->;
-defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
- [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))]
->;
-defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
-defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>;
-defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
-defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
-defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
- [(set VReg_32:$dst, (int_AMDGPU_rcp AllReg_32:$src0))]
->;
-defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
-defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
-defm V_RSQ_LEGACY_F32 : VOP1_32 <
- 0x0000002d, "V_RSQ_LEGACY_F32",
- [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))]
->;
-defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
-defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
-defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
-defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
-defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
-defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
-defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
-defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
-defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
-defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
-defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
-defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
-defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
-defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
-//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
-defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
-defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
-//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
-defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
-//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
-defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
-defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
-defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
-
-def V_INTERP_P1_F32 : VINTRP <
- 0x00000000,
- (outs VReg_32:$dst),
- (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P1_F32",
- []> {
- let DisableEncoding = "$m0";
-}
-
-def V_INTERP_P2_F32 : VINTRP <
- 0x00000001,
- (outs VReg_32:$dst),
- (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P2_F32",
- []> {
-
- let Constraints = "$src0 = $dst";
- let DisableEncoding = "$src0,$m0";
-
-}
-
-def V_INTERP_MOV_F32 : VINTRP <
- 0x00000002,
- (outs VReg_32:$dst),
- (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_MOV_F32",
- []> {
- let VSRC = 0;
- let DisableEncoding = "$m0";
-}
-
-//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
-
-let isTerminator = 1 in {
-
-def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
- [(IL_retflag)]> {
- let SIMM16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
-}
-
-let isBranch = 1 in {
-def S_BRANCH : SOPP <
- 0x00000002, (ins brtarget:$target), "S_BRANCH",
- []
->;
-
-let DisableEncoding = "$scc" in {
-def S_CBRANCH_SCC0 : SOPP <
- 0x00000004, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC0", []
->;
-def S_CBRANCH_SCC1 : SOPP <
- 0x00000005, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC1",
- []
->;
-} // End DisableEncoding = "$scc"
-
-def S_CBRANCH_VCCZ : SOPP <
- 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCZ",
- []
->;
-def S_CBRANCH_VCCNZ : SOPP <
- 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCNZ",
- []
->;
-
-let DisableEncoding = "$exec" in {
-def S_CBRANCH_EXECZ : SOPP <
- 0x00000008, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECZ",
- []
->;
-def S_CBRANCH_EXECNZ : SOPP <
- 0x00000009, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECNZ",
- []
->;
-} // End DisableEncoding = "$exec"
-
-
-} // End isBranch = 1
-} // End isTerminator = 1
-
-//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
-def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
- []
->;
-//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
-//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
-//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
-//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
-//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
-//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
-//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
-//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
-//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
-
-/* XXX: No VOP3 version of this instruction yet */
-def V_CNDMASK_B32 : VOP2 <0x00000000, (outs VReg_32:$dst),
- (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32",
- [(set (i32 VReg_32:$dst),
- (select VCCReg:$vcc, VReg_32:$src1, AllReg_32:$src0))] > {
-
- let DisableEncoding = "$vcc";
-}
-
-//f32 pattern for V_CNDMASK_B32
-def : Pat <
- (f32 (select VCCReg:$vcc, VReg_32:$src0, AllReg_32:$src1)),
- (V_CNDMASK_B32 AllReg_32:$src1, VReg_32:$src0, VCCReg:$vcc)
->;
-
-defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
-defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
-
-defm V_ADD_F32 : VOP2_32 <
- 0x00000003, "V_ADD_F32",
- [(set VReg_32:$dst, (fadd AllReg_32:$src0, VReg_32:$src1))]
->;
-
-defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
- [(set VReg_32:$dst, (fsub AllReg_32:$src0, VReg_32:$src1))]
->;
-defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
-defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
-defm V_MUL_LEGACY_F32 : VOP2_32 <
- 0x00000007, "V_MUL_LEGACY_F32",
- [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))]
->;
-
-defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
- [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))]
->;
-//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
-//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
-defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
->;
-
-defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
->;
-defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
-defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
-defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
-defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
-defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
-defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
- [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))]
->;
-defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
- [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))]
->;
-defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
- [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))]
->;
-defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
-defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
-defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
-defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
-//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
-//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
-//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
-let Defs = [VCC] in { // Carry-out goes to VCC
-defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32",
- [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
->;
-defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32",
- [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
->;
-} // End Defs = [VCC]
-defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>;
-defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>;
-defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>;
-defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
-////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
-////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
-////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
- [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
->;
-////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
-////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
-def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
-def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
-def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
-def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
-def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
-def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
-def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
-def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
-def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
-def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
-def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
-def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
-////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
-////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
-////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
-////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
-//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
-
-let neverHasSideEffects = 1 in {
-
-def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
-def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
-//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
-//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
-
-} // End neverHasSideEffects
-def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
-def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
-def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
-def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
-def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
-def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
-def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
-def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
-def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
-//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
-def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
-def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
-def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
-////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
-////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
-////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
-////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
-////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
-////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
-////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
-////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
-////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
-//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
-//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
-//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
-def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
-def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
-def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
-def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
-def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
-def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
-def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
-def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
-def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
-def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
-def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
-def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
-def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
-def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
-def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
-//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
-//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
-//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
-def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
-def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
-def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
-def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
-def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
-def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
-def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
-def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
-def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
-
-def S_CSELECT_B32 : SOP2 <
- 0x0000000a, (outs SReg_32:$dst),
- (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
- [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
->;
-
-def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
-
-// f32 pattern for S_CSELECT_B32
-def : Pat <
- (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
- (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
->;
-
-def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
-
-def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
- [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))]
->;
-def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64",
- [(set VCCReg:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))]
->;
-def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
-def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
-def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
-def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
-////def S_ANDN2_B32 : SOP2_ANDN2 <0x00000014, "S_ANDN2_B32", []>;
-////def S_ANDN2_B64 : SOP2_ANDN2 <0x00000015, "S_ANDN2_B64", []>;
-////def S_ORN2_B32 : SOP2_ORN2 <0x00000016, "S_ORN2_B32", []>;
-////def S_ORN2_B64 : SOP2_ORN2 <0x00000017, "S_ORN2_B64", []>;
-def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
-def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
-def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
-def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
-def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
-def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
-def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
-def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
-def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
-def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
-def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
-def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
-def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
-def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
-def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
-def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
-//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
-def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
-
-class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
- (outs VReg_32:$dst),
- (ins immType:$src0),
- "V_MOV_IMM",
- [(set VReg_32:$dst, (immNode:$src0))]
->;
-
-let isCodeGenOnly = 1, isPseudo = 1 in {
-
-def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
-def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
-
-def S_MOV_IMM_I32 : InstSI <
- (outs SReg_32:$dst),
- (ins i32imm:$src0),
- "S_MOV_IMM_I32",
- [(set SReg_32:$dst, (imm:$src0))]
->;
-
-// i64 immediates aren't really supported in hardware, but LLVM will use the i64
-// type for indices on load and store instructions. The pattern for
-// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
-// which the hardware can handle.
-def S_MOV_IMM_I64 : InstSI <
- (outs SReg_64:$dst),
- (ins i64imm:$src0),
- "S_MOV_IMM_I64 $dst, $src0",
- [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
->;
-
-} // End isCodeGenOnly, isPseudo = 1
-
-class SI_LOAD_LITERAL<Operand ImmType> :
- Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
-
- bits<32> imm;
- let Inst{31-0} = imm;
-}
-
-def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
-def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
-
-let isCodeGenOnly = 1, isPseudo = 1 in {
-
-def SET_M0 : InstSI <
- (outs SReg_32:$dst),
- (ins i32imm:$src0),
- "SET_M0",
- [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
->;
-
-def CONFIG_WRITE : InstSI <
- (outs i32imm:$reg),
- (ins i32imm:$val),
- "CONFIG_WRITE $reg, $val",
- [] > {
- field bits<32> Inst = 0;
-}
-
-def LOAD_CONST : AMDGPUShaderInst <
- (outs GPRF32:$dst),
- (ins i32imm:$src),
- "LOAD_CONST $dst, $src",
- [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
->;
-
-let usesCustomInserter = 1 in {
-
-def SI_V_CNDLT : InstSI <
- (outs VReg_32:$dst),
- (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
- "SI_V_CNDLT $dst, $src0, $src1, $src2",
- [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))]
->;
-
-def SI_INTERP : InstSI <
- (outs VReg_32:$dst),
- (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
- "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
- []
->;
-
-def SI_INTERP_CONST : InstSI <
- (outs VReg_32:$dst),
- (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
- "SI_INTERP_CONST $dst, $attr_chan, $attr, $params",
- [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan,
- imm:$attr, SReg_32:$params))]
->;
-
-def SI_KIL : InstSI <
- (outs),
- (ins VReg_32:$src),
- "SI_KIL $src",
- [(int_AMDGPU_kill VReg_32:$src)]
->;
-
-def SI_WQM : InstSI <
- (outs),
- (ins),
- "SI_WQM",
- [(int_SI_wqm)]
->;
-
-} // end usesCustomInserter
-
-// SI Psuedo branch instructions. These are used by the CFG structurizer pass
-// and should be lowered to ISA instructions prior to codegen.
-
-let isBranch = 1, isTerminator = 1 in {
-def SI_IF_NZ : InstSI <
- (outs),
- (ins brtarget:$target, VCCReg:$vcc),
- "SI_BRANCH_NZ",
- [(IL_brcond bb:$target, VCCReg:$vcc)]
->;
-
-def SI_IF_Z : InstSI <
- (outs),
- (ins brtarget:$target, VCCReg:$vcc),
- "SI_BRANCH_Z",
- []
->;
-} // end isBranch = 1, isTerminator = 1
-} // end IsCodeGenOnly, isPseudo
-
-/* int_SI_vs_load_input */
-def : Pat<
- (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
- VReg_32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
- VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
- 0, 0, (i32 SREG_LIT_0))
->;
-
-/* int_SI_export */
-def : Pat <
- (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
- VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
- (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
- VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
->;
-
-/* int_SI_sample */
-def : Pat <
- (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
- (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
- SReg_256:$rsrc, SReg_128:$sampler)
->;
-
-def CLAMP_SI : CLAMP<VReg_32>;
-def FABS_SI : FABS<VReg_32>;
-def FNEG_SI : FNEG<VReg_32>;
-
-def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
-
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
-
-def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
-
-def : BitConvert <i32, f32, SReg_32>;
-def : BitConvert <i32, f32, VReg_32>;
-
-def : BitConvert <f32, i32, SReg_32>;
-def : BitConvert <f32, i32, VReg_32>;
-
-def : Pat <
- (i64 (SIvcc_bitcast VCCReg:$vcc)),
- (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64))
->;
-
-def : Pat <
- (i1 (SIvcc_bitcast SReg_64:$vcc)),
- (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
->;
-
-/********** ===================== **********/
-/********** Interpolation Paterns **********/
-/********** ===================== **********/
-
-def : Pat <
- (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_read_face),
- (f32 FRONT_FACE)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 0),
- (f32 POS_X_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 1),
- (f32 POS_Y_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 2),
- (f32 POS_Z_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 3),
- (f32 POS_W_FLOAT)
->;
-
-/********** ================== **********/
-/********** Intrinsic Patterns **********/
-/********** ================== **********/
-
-/* llvm.AMDGPU.pow */
-/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
-
-def : Pat <
- (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1),
- (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1))
->;
-
-/********** ================== **********/
-/********** VOP3 Patterns **********/
-/********** ================== **********/
-
-def : Pat <(f32 (IL_mad AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2)),
- (V_MAD_LEGACY_F32 AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2,
- 0, 0, 0, 0)>;
-
-} // End isSI predicate
+++ /dev/null
-//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SI Intrinsic Definitions
-//
-//===----------------------------------------------------------------------===//
-
-
-let TargetPrefix = "SI", isTarget = 1 in {
-
- def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- /* XXX: We may need a seperate intrinsic here for loading integer values */
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
- def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
- def int_SI_wqm : Intrinsic <[], [], []>;
-
- def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
-
- /* Interpolation Intrinsics */
-
- def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
- class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
-
- def int_SI_fs_interp_linear_center : Interp;
- def int_SI_fs_interp_linear_centroid : Interp;
- def int_SI_fs_interp_persp_center : Interp;
- def int_SI_fs_interp_persp_centroid : Interp;
- def int_SI_fs_interp_constant : Interp;
-
- def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
- def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-}
+++ /dev/null
-//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF)
-// to predicated instructions.
-//
-// All flow control (except loops) is handled using predicated instructions and
-// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
-// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
-// by writting to the 64-bit EXEC register (each bit corresponds to a
-// single vector ALU). Typically, for predicates, a vector ALU will write
-// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
-// Vector ALU) and then the ScalarALU will AND the VCC register with the
-// EXEC to update the predicates.
-//
-// For example:
-// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
-// SI_IF_NZ %VCC
-// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
-// ELSE
-// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
-// ENDIF
-//
-// becomes:
-//
-// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask
-// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask
-// S_CBRANCH_EXECZ label0 // This instruction is an
-// // optimization which allows us to
-// // branch if all the bits of
-// // EXEC are zero.
-// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
-//
-// label0:
-// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the
-// // Then block.
-// %EXEC = S_AND_B64 %SGPR0, %EXEC
-// S_BRANCH_EXECZ label1 // Use our branch optimization
-// // instruction again.
-// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
-// label1:
-// S_MOV_B64 // Restore the old EXEC value
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class SILowerFlowControlPass : public MachineFunctionPass {
-
-private:
- static char ID;
- const TargetInstrInfo *TII;
- std::vector<unsigned> PredicateStack;
- std::vector<unsigned> UnusedRegisters;
-
- void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
- void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
-
-public:
- SILowerFlowControlPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const {
- return "SI Lower flow control instructions";
- }
-
-};
-
-} // End anonymous namespace
-
-char SILowerFlowControlPass::ID = 0;
-
-FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) {
- return new SILowerFlowControlPass(tm);
-}
-
-bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
-
- // Find all the unused registers that can be used for the predicate stack.
- for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(),
- I = AMDGPU::SReg_64RegClass.end();
- I != S; --I) {
- unsigned Reg = *I;
- if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
- UnusedRegisters.push_back(Reg);
- }
- }
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next, Next = llvm::next(I)) {
- MachineInstr &MI = *I;
- switch (MI.getOpcode()) {
- default: break;
- case AMDGPU::SI_IF_NZ:
- pushExecMask(MBB, I);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
- AMDGPU::EXEC)
- .addOperand(MI.getOperand(0)) // VCC
- .addReg(AMDGPU::EXEC);
- MI.eraseFromParent();
- break;
- case AMDGPU::ELSE:
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
- AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
- AMDGPU::EXEC)
- .addReg(PredicateStack.back())
- .addReg(AMDGPU::EXEC);
- MI.eraseFromParent();
- break;
- case AMDGPU::ENDIF:
- popExecMask(MBB, I);
- MI.eraseFromParent();
- break;
- }
- }
- }
- return false;
-}
-
-void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
-
- assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
- unsigned StackReg = UnusedRegisters.back();
- UnusedRegisters.pop_back();
- PredicateStack.push_back(StackReg);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
- StackReg)
- .addReg(AMDGPU::EXEC);
-}
-
-void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- unsigned StackReg = PredicateStack.back();
- PredicateStack.pop_back();
- UnusedRegisters.push_back(StackReg);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
- AMDGPU::EXEC)
- .addReg(StackReg);
-}
+++ /dev/null
-//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This pass performs the following transformation on instructions with
-// literal constants:
-//
-// %VGPR0 = V_MOV_IMM_I32 1
-//
-// becomes:
-//
-// BUNDLE
-// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
-// * SI_LOAD_LITERAL 1
-//
-// The resulting sequence matches exactly how the hardware handles immediate
-// operands, so this transformation greatly simplifies the code generator.
-//
-// Only the *_MOV_IMM_* support immediate operands at the moment, but when
-// support for immediate operands is added to other instructions, they
-// will be lowered here as well.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-
-using namespace llvm;
-
-namespace {
-
-class SILowerLiteralConstantsPass : public MachineFunctionPass {
-
-private:
- static char ID;
- const TargetInstrInfo *TII;
-
-public:
- SILowerLiteralConstantsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const {
- return "SI Lower literal constants pass";
- }
-};
-
-} // End anonymous namespace
-
-char SILowerLiteralConstantsPass::ID = 0;
-
-FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
- return new SILowerLiteralConstantsPass(tm);
-}
-
-bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next, Next = llvm::next(I)) {
- MachineInstr &MI = *I;
- switch (MI.getOpcode()) {
- default: break;
- case AMDGPU::S_MOV_IMM_I32:
- case AMDGPU::S_MOV_IMM_I64:
- case AMDGPU::V_MOV_IMM_F32:
- case AMDGPU::V_MOV_IMM_I32: {
- unsigned MovOpcode;
- unsigned LoadLiteralOpcode;
- MachineOperand LiteralOp = MI.getOperand(1);
- if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
- MovOpcode = AMDGPU::V_MOV_B32_e32;
- } else {
- MovOpcode = AMDGPU::S_MOV_B32;
- }
- if (LiteralOp.isImm()) {
- LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
- } else {
- LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
- }
- MachineInstr *First =
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
- MI.getOperand(0).getReg())
- .addReg(AMDGPU::SI_LITERAL_CONSTANT);
- MachineInstr *Last =
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
- .addOperand(MI.getOperand(1));
- Last->setIsInsideBundle();
- llvm::finalizeBundle(MBB, First, Last);
- MI.eraseFromParent();
- break;
- }
- }
- }
- }
- return false;
-}
+++ /dev/null
-//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "SIMachineFunctionInfo.h"
-
-using namespace llvm;
-
-SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
- : MachineFunctionInfo(),
- SPIPSInputAddr(0),
- ShaderType(0)
- { }
+++ /dev/null
-//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
-// register, which is to tell the hardware which interpolation parameters to
-// load.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef _SIMACHINEFUNCTIONINFO_H_
-#define _SIMACHINEFUNCTIONINFO_H_
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-class SIMachineFunctionInfo : public MachineFunctionInfo {
-
- private:
-
- public:
- SIMachineFunctionInfo(const MachineFunction &MF);
- unsigned SPIPSInputAddr;
- unsigned ShaderType;
-
-};
-
-} // End namespace llvm
-
-
-#endif //_SIMACHINEFUNCTIONINFO_H_
+++ /dev/null
-//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SI implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "SIRegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-
-using namespace llvm;
-
-SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
- const TargetInstrInfo &tii)
-: AMDGPURegisterInfo(tm, tii),
- TM(tm),
- TII(tii)
- { }
-
-BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const
-{
- BitVector Reserved(getNumRegs());
- return Reserved;
-}
-
-unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const
-{
- switch (reg) {
- case AMDGPU::M0: return 124;
- case AMDGPU::SREG_LIT_0: return 128;
- default: return getHWRegNum(reg);
- }
-}
-
-const TargetRegisterClass *
-SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
-{
- switch (rc->getID()) {
- case AMDGPU::GPRF32RegClassID:
- return &AMDGPU::VReg_32RegClass;
- default: return rc;
- }
-}
-
-const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
- MVT VT) const
-{
- switch(VT.SimpleTy) {
- default:
- case MVT::i32: return &AMDGPU::VReg_32RegClass;
- }
-}
-#include "SIRegisterGetHWRegNum.inc"
+++ /dev/null
-//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface definition for SIRegisterInfo
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef SIREGISTERINFO_H_
-#define SIREGISTERINFO_H_
-
-#include "AMDGPURegisterInfo.h"
-
-namespace llvm {
-
-class AMDGPUTargetMachine;
-class TargetInstrInfo;
-
-struct SIRegisterInfo : public AMDGPURegisterInfo
-{
- AMDGPUTargetMachine &TM;
- const TargetInstrInfo &TII;
-
- SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
-
- virtual BitVector getReservedRegs(const MachineFunction &MF) const;
-
- /// getBinaryCode - Returns the hardware encoding for a register
- virtual unsigned getBinaryCode(unsigned reg) const;
-
- /// getISARegClass - rc is an AMDIL reg class. This function returns the
- /// SI register class that is equivalent to the given AMDIL register class.
- virtual const TargetRegisterClass *
- getISARegClass(const TargetRegisterClass * rc) const;
-
- /// getHWRegNum - Generated function that returns the hardware encoding for
- /// a register
- unsigned getHWRegNum(unsigned reg) const;
-
- /// getCFGStructurizerRegClass - get the register class of the specified
- /// type to use in the CFGStructurizer
- virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
-
-};
-
-} // End namespace llvm
-
-#endif // SIREGISTERINFO_H_
+++ /dev/null
-//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: This is just a place holder for now.
-//
-//===----------------------------------------------------------------------===//
-
-
-def SI_Itin : ProcessorItineraries <[], [], []>;
+++ /dev/null
-//===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-/// The target for the AMDGPU backend
-Target llvm::TheAMDGPUTarget;
-
-/// Extern function to initialize the targets for the AMDGPU backend
-extern "C" void LLVMInitializeAMDGPUTargetInfo() {
- RegisterTarget<Triple::r600, false>
- R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
-}
+++ /dev/null
-
-#include "radeon_llvm_emit.h"
-
-#include <llvm/Support/CommandLine.h>
-#include <llvm/Support/IRReader.h>
-#include <llvm/Support/SourceMgr.h>
-#include <llvm/LLVMContext.h>
-#include <llvm/Module.h>
-#include <stdio.h>
-
-#include <llvm-c/Core.h>
-
-using namespace llvm;
-
-static cl::opt<std::string>
-InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
-
-static cl::opt<std::string>
-TargetGPUName("gpu", cl::desc("target gpu name"), cl::value_desc("gpu_name"));
-
-int main(int argc, char ** argv)
-{
- unsigned char * bytes;
- unsigned byte_count;
-
- std::auto_ptr<Module> M;
- LLVMContext &Context = getGlobalContext();
- SMDiagnostic Err;
- cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
- M.reset(ParseIRFile(InputFilename, Err, Context));
-
- Module * mod = M.get();
-
- radeon_llvm_compile(wrap(mod), &bytes, &byte_count, TargetGPUName.c_str(), 1);
-}
#include <llvm/Target/TargetMachine.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm-c/Target.h>
-
-#if HAVE_LLVM < 0x0302
-#include <llvm/Target/TargetData.h>
-#else
#include <llvm/DataLayout.h>
-#endif
#include <iostream>
#include <stdlib.h>
using namespace llvm;
-#ifndef EXTERNAL_LLVM
-extern "C" {
-
-void LLVMInitializeAMDGPUAsmPrinter(void);
-void LLVMInitializeAMDGPUTargetMC(void);
-void LLVMInitializeAMDGPUTarget(void);
-void LLVMInitializeAMDGPUTargetInfo(void);
-}
-#endif
-
namespace {
class LLVMEnsureMultithreaded {
Triple AMDGPUTriple(sys::getDefaultTargetTriple());
-#if HAVE_LLVM == 0x0302
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetMC();
- LLVMInitializeAMDGPUAsmPrinter();
-#else
LLVMInitializeR600TargetInfo();
LLVMInitializeR600Target();
LLVMInitializeR600TargetMC();
LLVMInitializeR600AsmPrinter();
-#endif
std::string err;
const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err);
));
TargetMachine &AMDGPUTargetMachine = *tm.get();
PassManager PM;
-#if HAVE_LLVM < 0x0302
- PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData()));
-#else
PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout()));
-#endif
PM.add(createPromoteMemoryToRegisterPass());
AMDGPUTargetMachine.setAsmVerbosityDefault(true);