From 022f6d88616bf5ea3903c5056d6147e9cf356aa9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 31 Aug 2012 11:29:49 -0400 Subject: [PATCH] radeon/llvm: Rework how immediate operands are handled with SI Immediate operands were previously handled in the CodeEmitter, but that code was buggy and very confusing. This commit adds a pass that simplifies the handling of immediate operands by spliting the loading of the immediate into a sperate insruction that is bundled with the original. --- src/gallium/drivers/radeon/AMDGPU.h | 1 + .../drivers/radeon/AMDGPUAsmPrinter.cpp | 6 +- .../drivers/radeon/AMDGPUTargetMachine.cpp | 2 + .../radeon/MCTargetDesc/SIMCCodeEmitter.cpp | 39 +++---- src/gallium/drivers/radeon/Makefile.sources | 1 + .../drivers/radeon/SIGenRegisterInfo.pl | 1 + src/gallium/drivers/radeon/SIISelLowering.h | 2 + src/gallium/drivers/radeon/SIInstrInfo.td | 10 -- src/gallium/drivers/radeon/SIInstructions.td | 27 +++-- .../radeon/SILowerLiteralConstants.cpp | 105 ++++++++++++++++++ 10 files changed, 150 insertions(+), 44 deletions(-) create mode 100644 src/gallium/drivers/radeon/SILowerLiteralConstants.cpp diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h index 514adb621f4..ab6871c1da4 100644 --- a/src/gallium/drivers/radeon/AMDGPU.h +++ b/src/gallium/drivers/radeon/AMDGPU.h @@ -26,6 +26,7 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); // SI Passes FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); +FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); // Passes common to R600 and SI FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp index bf3d89c1b83..2d6c86d9d20 100644 --- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp +++ b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp @@ -81,9 +81,13 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { VCCUsed = true; continue; } - if (reg == AMDGPU::EXEC) { + switch (reg) { + default: break; + case AMDGPU::EXEC: + case AMDGPU::SI_LITERAL_CONSTANT: continue; } + if (AMDGPU::SReg_32RegClass.contains(reg)) { isSGPR = true; width = 1; diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp index 8de7bb7a790..2d1ca068905 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp @@ -137,6 +137,8 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { PM->add(createR600ExpandSpecialInstrsPass(*TM)); addPass(FinalizeMachineBundlesID); + } else { + PM->add(createSILowerLiteralConstantsPass(*TM)); } return false; diff --git a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp index 5569cf6c3cc..c2b5e47bbbe 100644 --- a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp @@ -22,11 +22,9 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" -#define LITERAL_REG 255 #define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) #define SI_INSTR_FLAGS_ENCODING_MASK 0xf - // These must be kept in sync with SIInstructions.td and also the // InstrEncodingInfo array in SIInstrInfo.cpp. // @@ -91,11 +89,6 @@ public: virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixup) const; - /// i32LiteralEncode - Encode an i32 literal this is used as an operand - /// for an instruction in place of a register. - virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixup) const; - /// SMRDmemriEncode - Encoding for SMRD indexed loads virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixup) const; @@ -147,7 +140,12 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, } else if (MO.isFPImm()) { // XXX: Not all instructions can use inline literals // XXX: We should make sure this is a 32-bit constant - return LITERAL_REG; + union { + float F; + uint32_t I; + } Imm; + Imm.F = MO.getFPImm(); + return Imm.I; } else{ llvm_unreachable("Encoding of this operand type is not supported yet."); } @@ -176,12 +174,6 @@ unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, return GPRAlign(MI, OpNo, 2); } -uint64_t SIMCCodeEmitter::i32LiteralEncode(const MCInst &MI, - unsigned OpNo, - SmallVectorImpl &Fixup) const { - return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32); -} - #define SMRD_OFFSET_MASK 0xff #define SMRD_IMM_SHIFT 8 #define SMRD_SBASE_MASK 0x3f @@ -262,17 +254,13 @@ unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { - // Instructions with literal constants are expanded to 64-bits, and - // the constant is stored in bits [63:32] - for (unsigned i = 0; i < MI.getNumOperands(); i++) { - if (MI.getOperand(i).isFPImm()) { - return 8; - } - } - - // This instruction always has a literal - if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) { - return 8; + // These instructions aren't real instructions with an encoding type, so + // we need to manually specify their size. + switch (MI.getOpcode()) { + default: break; + case AMDGPU::SI_LOAD_LITERAL_I32: + case AMDGPU::SI_LOAD_LITERAL_F32: + return 4; } unsigned encoding_type = getEncodingType(MI); @@ -294,6 +282,7 @@ unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { switch (reg) { case AMDGPU::M0: return 124; case AMDGPU::SREG_LIT_0: return 128; + case AMDGPU::SI_LITERAL_CONSTANT: return 255; default: return getHWRegNum(reg); } } diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 687acb3f420..2eb11208957 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -46,6 +46,7 @@ CPP_SOURCES := \ SIAssignInterpRegs.cpp \ SIInstrInfo.cpp \ SIISelLowering.cpp \ + SILowerLiteralConstants.cpp \ SIMachineFunctionInfo.cpp \ SIRegisterInfo.cpp \ InstPrinter/AMDGPUInstPrinter.cpp \ diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl index 84f677ed324..e47fb56868e 100644 --- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl @@ -91,6 +91,7 @@ def VCC : SIReg<"VCC">; def EXEC : SIReg<"EXEC">; def SCC : SIReg<"SCC">; def SREG_LIT_0 : SIReg <"S LIT 0">; +def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">; def M0 : SIReg <"M0">; diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h index 77d61d86f49..80c7f4bb86c 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.h +++ b/src/gallium/drivers/radeon/SIISelLowering.h @@ -29,6 +29,8 @@ class SITargetLowering : public AMDGPUTargetLowering /// write. void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I) const; + void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, unsigned Opocde) const; void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td index a7ce9a6ec84..135f279b39f 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ b/src/gallium/drivers/radeon/SIInstrInfo.td @@ -86,16 +86,6 @@ class GPR2Align : Operand { let MIOperandInfo = (ops rc:$reg); } -def i32Literal : Operand { - let EncoderMethod = "i32LiteralEncode"; -} - -// i64Literal uses the same encoder method as i32 literal, because an -// i64Literal is really a i32 literal with the top 32-bits all set to zero. -def i64Literal : Operand { - let EncoderMethod = "i32LiteralEncode"; -} - def SMRDmemrr : Operand { let MIOperandInfo = (ops SReg_64, SReg_32); let EncoderMethod = "GPR2AlignEncode"; diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index 7802ec6cb54..291c03418a0 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -887,21 +887,21 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; -class V_MOV_IMM : VOP1 < - 0x1, +class V_MOV_IMM : InstSI < (outs VReg_32:$dst), (ins immType:$src0), "V_MOV_IMM", [(set VReg_32:$dst, (immNode:$src0))] >; +let isCodeGenOnly = 1, isPseudo = 1 in { + def V_MOV_IMM_I32 : V_MOV_IMM; def V_MOV_IMM_F32 : V_MOV_IMM; -def S_MOV_IMM_I32 : SOP1 < - 0x3, +def S_MOV_IMM_I32 : InstSI < (outs SReg_32:$dst), - (ins i32Literal:$src0), + (ins i32imm:$src0), "S_MOV_IMM_I32", [(set SReg_32:$dst, (imm:$src0))] >; @@ -910,14 +910,25 @@ def S_MOV_IMM_I32 : SOP1 < // type for indices on load and store instructions. The pattern for // S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, // which the hardware can handle. -def S_MOV_IMM_I64 : SOP1 < - 0x3, +def S_MOV_IMM_I64 : InstSI < (outs SReg_64:$dst), - (ins i64Literal:$src0), + (ins i64imm:$src0), "S_MOV_IMM_I64 $dst, $src0", [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] >; +} // End isCodeGenOnly, isPseudo = 1 + +class SI_LOAD_LITERAL : + Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { + + bits<32> imm; + let Inst{31-0} = imm; +} + +def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL; +def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL; + let isCodeGenOnly = 1, isPseudo = 1 in { def SET_M0 : InstSI < diff --git a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp b/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp new file mode 100644 index 00000000000..720245091f7 --- /dev/null +++ b/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp @@ -0,0 +1,105 @@ +//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass performs the following transformation on instructions with +// literal constants: +// +// %VGPR0 = V_MOV_IMM_I32 1 +// +// becomes: +// +// BUNDLE +// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT +// * SI_LOAD_LITERAL 1 +// +// The resulting sequence matches exactly how the hardware handles immediate +// operands, so this transformation greatly simplifies the code generator. +// +// Only the *_MOV_IMM_* support immediate operands at the moment, but when +// support for immediate operands is added to other instructions, they +// will be lowered here as well. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" + +using namespace llvm; + +namespace { + +class SILowerLiteralConstantsPass : public MachineFunctionPass { + +private: + static char ID; + const TargetInstrInfo *TII; + +public: + SILowerLiteralConstantsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI Lower literal constants pass"; + } +}; + +} // End anonymous namespace + +char SILowerLiteralConstantsPass::ID = 0; + +FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { + return new SILowerLiteralConstantsPass(tm); +} + +bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next, Next = llvm::next(I)) { + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: break; + case AMDGPU::S_MOV_IMM_I32: + case AMDGPU::S_MOV_IMM_I64: + case AMDGPU::V_MOV_IMM_F32: + case AMDGPU::V_MOV_IMM_I32: { + unsigned MovOpcode; + unsigned LoadLiteralOpcode; + MachineOperand LiteralOp = MI.getOperand(1); + if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { + MovOpcode = AMDGPU::V_MOV_B32_e32; + } else { + MovOpcode = AMDGPU::S_MOV_B32; + } + if (LiteralOp.isImm()) { + LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; + } else { + LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; + } + MachineInstr *First = + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode), + MI.getOperand(0).getReg()) + .addReg(AMDGPU::SI_LITERAL_CONSTANT); + MachineInstr *Last = + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode)) + .addOperand(MI.getOperand(1)); + Last->setIsInsideBundle(); + llvm::finalizeBundle(MBB, First, Last); + MI.eraseFromParent(); + break; + } + } + } + } + return false; +} -- 2.30.2