radeon/llvm: Rework how immediate operands are handled with SI

author Tom Stellard <thomas.stellard@amd.com>

Fri, 31 Aug 2012 15:29:49 +0000 (11:29 -0400)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 31 Aug 2012 16:54:58 +0000 (12:54 -0400)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 31 Aug 2012 15:29:49 +0000 (11:29 -0400)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 31 Aug 2012 16:54:58 +0000 (12:54 -0400)
diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h

index 514adb621f43652ab867f425d564c41a3ddf2586..ab6871c1da49ce10d80fa8153438d8541880395a 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPU.h
+++ b/src/gallium/drivers/radeon/AMDGPU.h
@@ -26,6 +26,7 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
  // SI Passes
  FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
  FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
  
  // Passes common to R600 and SI
  FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp

index bf3d89c1b8364c914026809c055502dd3f2999a6..2d6c86d9d2056de9e0eea1bb0b8cabf03b5bf46e 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp
@@ -81,9 +81,13 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
            VCCUsed = true;
            continue;
          }
-        if (reg == AMDGPU::EXEC) {
+        switch (reg) {
+        default: break;
+        case AMDGPU::EXEC:
+        case AMDGPU::SI_LITERAL_CONSTANT:
            continue;
          }
+
          if (AMDGPU::SReg_32RegClass.contains(reg)) {
            isSGPR = true;
            width = 1;
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp

index 8de7bb7a790c8eb20333860a77f240c3bd4e03dc..2d1ca0689051a2c4702cbe799e774294804104c7 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -137,6 +137,8 @@ bool AMDGPUPassConfig::addPreEmitPass() {
    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
      PM->add(createR600ExpandSpecialInstrsPass(*TM));
      addPass(FinalizeMachineBundlesID);
+  } else {
+    PM->add(createSILowerLiteralConstantsPass(*TM));
    }
  
    return false;
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp

index 5569cf6c3cc5ceb43f462c213138563f70972822..c2b5e47bbbeba393a30a3c4b83031ddc004636e2 100644 (file)
--- a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -22,11 +22,9 @@
  #include "llvm/MC/MCSubtargetInfo.h"
  #include "llvm/Support/raw_ostream.h"
  
-#define LITERAL_REG 255
  #define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
  #define SI_INSTR_FLAGS_ENCODING_MASK 0xf
  
-
  // These must be kept in sync with SIInstructions.td and also the
  // InstrEncodingInfo array in SIInstrInfo.cpp.
  //
@@ -91,11 +89,6 @@ public:
    virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
                                     SmallVectorImpl<MCFixup> &Fixup) const;
  
-  /// i32LiteralEncode - Encode an i32 literal this is used as an operand
-  /// for an instruction in place of a register.
-  virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
-                                   SmallVectorImpl<MCFixup> &Fixup) const;
-
    /// SMRDmemriEncode - Encoding for SMRD indexed loads
    virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
                                     SmallVectorImpl<MCFixup> &Fixup) const;
@@ -147,7 +140,12 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
    } else if (MO.isFPImm()) {
      // XXX: Not all instructions can use inline literals
      // XXX: We should make sure this is a 32-bit constant
-    return LITERAL_REG;
+    union {
+      float F;
+      uint32_t I;
+    } Imm;
+    Imm.F = MO.getFPImm();
+    return Imm.I;
    } else{
      llvm_unreachable("Encoding of this operand type is not supported yet.");
    }
@@ -176,12 +174,6 @@ unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
    return GPRAlign(MI, OpNo, 2);
  }
  
-uint64_t SIMCCodeEmitter::i32LiteralEncode(const MCInst &MI,
-                                           unsigned OpNo,
-                                        SmallVectorImpl<MCFixup> &Fixup) const {
-  return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
-}
-
  #define SMRD_OFFSET_MASK 0xff
  #define SMRD_IMM_SHIFT 8
  #define SMRD_SBASE_MASK 0x3f
@@ -262,17 +254,13 @@ unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
  
  unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
  
-  // Instructions with literal constants are expanded to 64-bits, and
-  // the constant is stored in bits [63:32]
-  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
-    if (MI.getOperand(i).isFPImm()) {
-      return 8;
-    }
-  }
-
-  // This instruction always has a literal
-  if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
-    return 8;
+  // These instructions aren't real instructions with an encoding type, so
+  // we need to manually specify their size.
+  switch (MI.getOpcode()) {
+  default: break;
+  case AMDGPU::SI_LOAD_LITERAL_I32:
+  case AMDGPU::SI_LOAD_LITERAL_F32:
+    return 4;
    }
  
    unsigned encoding_type = getEncodingType(MI);
@@ -294,6 +282,7 @@ unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
    switch (reg) {
      case AMDGPU::M0: return 124;
      case AMDGPU::SREG_LIT_0: return 128;
+    case AMDGPU::SI_LITERAL_CONSTANT: return 255;
      default: return getHWRegNum(reg);
    }
  }
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources

index 687acb3f42012fd61c79b11d7c08b482a19ed23f..2eb1120895780328a984a253b81e0d721680089f 100644 (file)
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -46,6 +46,7 @@ CPP_SOURCES := \
         SIAssignInterpRegs.cpp          \
         SIInstrInfo.cpp                 \
         SIISelLowering.cpp              \
+       SILowerLiteralConstants.cpp             \
         SIMachineFunctionInfo.cpp       \
         SIRegisterInfo.cpp              \
         InstPrinter/AMDGPUInstPrinter.cpp \
diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl

index 84f677ed324c9390793f481cc0c6161d004bce8b..e47fb56868e17f6707e62a43b0f38bca8241da6f 100644 (file)
--- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
+++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
@@ -91,6 +91,7 @@ def VCC : SIReg<"VCC">;
  def EXEC : SIReg<"EXEC">;
  def SCC : SIReg<"SCC">;
  def SREG_LIT_0 : SIReg <"S LIT 0">;
+def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">;
  
  def M0 : SIReg <"M0">;
  
diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h

index 77d61d86f493112ed828e57cefdfda988a13f2f5..80c7f4bb86c6e621135cb870c266c6caf0ac77df 100644 (file)
--- a/src/gallium/drivers/radeon/SIISelLowering.h
+++ b/src/gallium/drivers/radeon/SIISelLowering.h
@@ -29,6 +29,8 @@ class SITargetLowering : public AMDGPUTargetLowering
    /// write.
    void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
                MachineBasicBlock::iterator I) const;
+  void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
+              MachineBasicBlock::iterator I, unsigned Opocde) const;
    void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
    void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td

index a7ce9a6ec84940a895cb2ca2cfb583e456c97cc8..135f279b39fc23e6e16601ea03d07531f497cfe9 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.td
+++ b/src/gallium/drivers/radeon/SIInstrInfo.td
@@ -86,16 +86,6 @@ class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
    let MIOperandInfo = (ops rc:$reg);
  }
  
-def i32Literal : Operand <i32> {
-  let EncoderMethod = "i32LiteralEncode";
-}
-
-// i64Literal uses the same encoder method as i32 literal, because an
-// i64Literal is really a i32 literal with the top 32-bits all set to zero.
-def i64Literal : Operand <i64> {
-  let EncoderMethod = "i32LiteralEncode";
-}
-
  def SMRDmemrr : Operand<iPTR> {
    let MIOperandInfo = (ops SReg_64, SReg_32);
    let EncoderMethod = "GPR2AlignEncode";
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td

index 7802ec6cb548bb4cfec07f6eddd6f9f92a66cc28..291c03418a0f5373f5a7bf40c7ef7d5b9d328d30 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -887,21 +887,21 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
  //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
  def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
  
-class V_MOV_IMM <Operand immType, SDNode immNode> : VOP1 <
-  0x1,
+class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
    (outs VReg_32:$dst),
    (ins immType:$src0),
    "V_MOV_IMM",
     [(set VReg_32:$dst, (immNode:$src0))]
  >;
  
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
  def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
  def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
  
-def S_MOV_IMM_I32 : SOP1 <
-  0x3,
+def S_MOV_IMM_I32 : InstSI <
    (outs SReg_32:$dst),
-  (ins i32Literal:$src0),
+  (ins i32imm:$src0),
    "S_MOV_IMM_I32",
    [(set SReg_32:$dst, (imm:$src0))]
  >;
@@ -910,14 +910,25 @@ def S_MOV_IMM_I32 : SOP1 <
  // type for indices on load and store instructions.  The pattern for
  // S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
  // which the hardware can handle.
-def S_MOV_IMM_I64 : SOP1 <
-  0x3,
+def S_MOV_IMM_I64 : InstSI <
    (outs SReg_64:$dst),
-  (ins i64Literal:$src0),
+  (ins i64imm:$src0),
    "S_MOV_IMM_I64 $dst, $src0",
    [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
  >;
  
+} // End isCodeGenOnly, isPseudo = 1
+
+class SI_LOAD_LITERAL<Operand ImmType> :
+    Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
+
+  bits<32> imm;
+  let Inst{31-0} = imm;
+}
+
+def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
+def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
+
  let isCodeGenOnly = 1, isPseudo = 1 in {
  
  def SET_M0 : InstSI <
diff --git a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp b/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp

new file mode 100644 (file)

index 0000000..7202450
--- /dev/null
+++ b/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp
@@ -0,0 +1,105 @@
+//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass performs the following transformation on instructions with
+// literal constants:
+//
+// %VGPR0 = V_MOV_IMM_I32 1
+//
+// becomes:
+//
+// BUNDLE
+//   * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
+//   * SI_LOAD_LITERAL 1
+//
+// The resulting sequence matches exactly how the hardware handles immediate
+// operands, so this transformation greatly simplifies the code generator.
+//
+// Only the *_MOV_IMM_* support immediate operands at the moment, but when
+// support for immediate operands is added to other instructions, they
+// will be lowered here as well.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerLiteralConstantsPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const TargetInstrInfo *TII;
+
+public:
+  SILowerLiteralConstantsPass(TargetMachine &tm) :
+    MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const {
+    return "SI Lower literal constants pass";
+  }
+};
+
+} // End anonymous namespace
+
+char SILowerLiteralConstantsPass::ID = 0;
+
+FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
+  return new SILowerLiteralConstantsPass(tm);
+}
+
+bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+                               I != MBB.end(); I = Next, Next = llvm::next(I)) {
+      MachineInstr &MI = *I;
+      switch (MI.getOpcode()) {
+      default: break;
+      case AMDGPU::S_MOV_IMM_I32:
+      case AMDGPU::S_MOV_IMM_I64:
+      case AMDGPU::V_MOV_IMM_F32:
+      case AMDGPU::V_MOV_IMM_I32: {
+          unsigned MovOpcode;
+          unsigned LoadLiteralOpcode;
+          MachineOperand LiteralOp = MI.getOperand(1);
+          if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
+            MovOpcode = AMDGPU::V_MOV_B32_e32;
+          } else {
+            MovOpcode = AMDGPU::S_MOV_B32;
+          }
+          if (LiteralOp.isImm()) {
+            LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
+          } else {
+            LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
+          }
+          MachineInstr *First =
+            BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
+                    MI.getOperand(0).getReg())
+                    .addReg(AMDGPU::SI_LITERAL_CONSTANT);
+          MachineInstr *Last =
+            BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
+                    .addOperand(MI.getOperand(1));
+          Last->setIsInsideBundle();
+          llvm::finalizeBundle(MBB, First, Last);
+          MI.eraseFromParent();
+          break;
+        }
+      }
+    }
+  }
+  return false;
+}
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 31 Aug 2012 15:29:49 +0000 (11:29 -0400)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 31 Aug 2012 16:54:58 +0000 (12:54 -0400)
src/gallium/drivers/radeon/AMDGPU.h		patch \| blob \| history
src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp		patch \| blob \| history
src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp		patch \| blob \| history
src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp		patch \| blob \| history
src/gallium/drivers/radeon/Makefile.sources		patch \| blob \| history
src/gallium/drivers/radeon/SIGenRegisterInfo.pl		patch \| blob \| history
src/gallium/drivers/radeon/SIISelLowering.h		patch \| blob \| history
src/gallium/drivers/radeon/SIInstrInfo.td		patch \| blob \| history
src/gallium/drivers/radeon/SIInstructions.td		patch \| blob \| history
src/gallium/drivers/radeon/SILowerLiteralConstants.cpp	[new file with mode: 0644]	patch \| blob