radeon/llvm: Add R600ExpandSpecialInstrs pass

author Tom Stellard <thomas.stellard@amd.com>

Mon, 20 Aug 2012 21:09:00 +0000 (21:09 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Tue, 21 Aug 2012 15:42:44 +0000 (15:42 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Mon, 20 Aug 2012 21:09:00 +0000 (21:09 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Tue, 21 Aug 2012 15:42:44 +0000 (15:42 +0000)
diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h

index 191f495eaa43e25982e852884d0d55d6e88e41fe..927e62a27dd458b7c574c8551b29550e62c653f7 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPU.h
+++ b/src/gallium/drivers/radeon/AMDGPU.h
@@ -22,6 +22,7 @@ class AMDGPUTargetMachine;
  // R600 Passes
  FunctionPass* createR600KernelParametersPass(const TargetData* TD);
  FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
  
  // SI Passes
  FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h

index 31400a7b60fea03b95dfdb8696a6c7bf1dd21f24..de3c59406135730143df384b4e15a0248b36a3a9 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -30,6 +30,7 @@
  #define MO_FLAG_ABS   (1 << 2)
  #define MO_FLAG_MASK  (1 << 3)
  #define MO_FLAG_PUSH  (1 << 4)
+#define MO_FLAG_LAST  (1 << 5)
  
  #define OPCODE_IS_ZERO_INT 0x00000045
  #define OPCODE_IS_NOT_ZERO_INT 0x00000042
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp

index b97c0febfc41748cacfb998f2c0020f54906de24..6f15430bd53793def878161422267005d0b3454d 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -158,6 +158,12 @@ bool AMDGPUPassConfig::addPreEmitPass() {
    PM->add(createAMDGPUCFGPreparationPass(*TM));
    PM->add(createAMDGPUCFGStructurizerPass(*TM));
  
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    PM->add(createR600ExpandSpecialInstrsPass(*TM));
+    addPass(FinalizeMachineBundlesID);
+  }
+
    return false;
  }
  
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources

index 3a75ce96945c003e1fa5803e83f4fd9d52ab1a63..0e9825f0fe86bbe47eaf56f426099b0bc5d8adb3 100644 (file)
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -35,6 +35,7 @@ CPP_SOURCES := \
         AMDGPUInstrInfo.cpp             \
         AMDGPURegisterInfo.cpp          \
         R600CodeEmitter.cpp             \
+       R600ExpandSpecialInstrs.cpp     \
         R600ISelLowering.cpp            \
         R600InstrInfo.cpp               \
         R600KernelParameters.cpp        \
diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp

index 02b6fdb748b7c06fb82f2ccf79c3a0eda3a32035..14e877b2518ad85b2f9c069526aaeeada8ebec3d 100644 (file)
--- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
@@ -50,7 +50,6 @@ private:
    const R600InstrInfo * TII;
  
    bool IsCube;
-  bool IsReduction;
    bool IsVector;
    unsigned currentElement;
    bool IsLast;
@@ -60,7 +59,7 @@ private:
  public:
  
    R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
-      _OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false),
+      _OS(OS), TM(NULL), IsCube(false), IsVector(false),
        IsLast(true) { }
  
    const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
@@ -159,10 +158,9 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
                                                    BB != BB_E; ++BB) {
       MachineBasicBlock &MBB = *BB;
-     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-                                                       I != E; ++I) {
+     for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
+                                            E = MBB.instr_end(); I != E; ++I) {
            MachineInstr &MI = *I;
-         IsReduction = TII->isReductionOp(MI.getOpcode());
           IsVector = TII->isVector(MI);
           IsCube = TII->isCubeOp(MI.getOpcode());
            if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
@@ -172,7 +170,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
              EmitTexInstr(MI);
            } else if (TII->isFCOp(MI.getOpcode())){
              EmitFCInstr(MI);
-          } else if (IsReduction || IsVector || IsCube) {
+          } else if (IsVector || IsCube) {
              IsLast = false;
              // XXX: On Cayman, some (all?) of the vector instructions only need
              // to fill the first three slots.
@@ -180,7 +178,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
                IsLast = (currentElement == 3);
                EmitALUInstr(MI);
              }
-            IsReduction = false;
             IsVector = false;
             IsCube = false;
            } else if (MI.getOpcode() == AMDGPU::RETURN ||
@@ -310,8 +307,6 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
    // Emit the source channel (1 byte)
    if (chan_override != -1) {
      EmitByte(chan_override);
-  } else if (IsReduction) {
-    EmitByte(currentElement);
    } else if (MO.isReg()) {
      EmitByte(TRI->getHWRegChan(MO.getReg()));
    } else {
@@ -353,7 +348,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
      EmitByte(getHWReg(MO.getReg()));
  
      // Emit the element of the destination register (1 byte)
-    if (IsReduction || IsCube || IsVector) {
+    if (IsCube || IsVector) {
        EmitByte(currentElement);
      } else {
        EmitByte(TRI->getHWRegChan(MO.getReg()));
@@ -367,7 +362,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
      }
  
      // Emit writemask (1 byte).
-    if (((IsReduction || IsVector) &&
+    if ((IsVector &&
            currentElement != TRI->getHWRegChan(MO.getReg()))
         || MO.getTargetFlags() & MO_FLAG_MASK) {
        EmitByte(0);
@@ -389,11 +384,14 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
    EmitTwoBytes(getBinaryCodeForInstr(MI));
  
    // Emit IsLast (for this instruction group) (1 byte)
-  if (IsLast) {
-    EmitByte(1);
-  } else {
+  if (!IsLast ||
+                               (MI.isInsideBundle() &&
+                               !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) {
      EmitByte(0);
+  } else {
+    EmitByte(1);
    }
+
    // Emit isOp3 (1 byte)
    if (numSrc == 3) {
      EmitByte(1);
diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp

new file mode 100644 (file)

index 0000000..4c67ba4
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
@@ -0,0 +1,91 @@
+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Vector, Reduction, and Cube instructions need to fill the entire instruction
+// group to work correctly.  This pass expands these individual instructions
+// into several instructions that will completely fill the instruction group. 
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+public:
+  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const {
+    return "R600 Expand special instructions pass";
+  }
+};
+
+} // End anonymous namespace
+
+char R600ExpandSpecialInstrsPass::ID = 0;
+
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
+  return new R600ExpandSpecialInstrsPass(TM);
+}
+
+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
+
+  const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    MachineBasicBlock::iterator I = MBB.begin();
+    while (I != MBB.end()) {
+      MachineInstr &MI = *I;
+      I = llvm::next(I);
+
+      if (!TII->isReductionOp(MI.getOpcode())) {
+        continue;
+      }
+
+      // Expand the instruction
+      for (unsigned Chan = 0; Chan < 4; Chan++) {
+        unsigned DstReg = MI.getOperand(0).getReg();
+        unsigned Src0 = MI.getOperand(1).getReg();
+        unsigned Src1 = MI.getOperand(2).getReg();
+        unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+        unsigned NewSrc0 = TRI.getSubReg(Src0, SubRegIndex);
+        unsigned NewSrc1 = TRI.getSubReg(Src1, SubRegIndex);
+        unsigned DstBase = TRI.getHWRegIndex(DstReg);
+        unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+        unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
+        Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
+        MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true);
+        NewDstOp.addTargetFlag(Flags);
+
+        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode()))
+                .addOperand(NewDstOp)
+                .addReg(NewSrc0)
+                .addReg(NewSrc1)
+                ->setIsInsideBundle(Chan != 0);
+      }
+      MI.eraseFromParent();
+    }
+  }
+  return false;
+}
author	Tom Stellard <thomas.stellard@amd.com>
	Mon, 20 Aug 2012 21:09:00 +0000 (21:09 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Tue, 21 Aug 2012 15:42:44 +0000 (15:42 +0000)
src/gallium/drivers/radeon/AMDGPU.h		patch \| blob \| history
src/gallium/drivers/radeon/AMDGPUInstrInfo.h		patch \| blob \| history
src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp		patch \| blob \| history
src/gallium/drivers/radeon/Makefile.sources		patch \| blob \| history
src/gallium/drivers/radeon/R600CodeEmitter.cpp		patch \| blob \| history
src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp	[new file with mode: 0644]	patch \| blob