radeon/llvm: Support for predicate bit
authorVincent Lejeune <vljn@ovi.com>
Wed, 1 Aug 2012 20:49:40 +0000 (22:49 +0200)
committerTom Stellard <thomas.stellard@amd.com>
Wed, 15 Aug 2012 21:07:13 +0000 (21:07 +0000)
Tom Stellard:
  - A few changes to predicate register defs

Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
src/gallium/drivers/radeon/AMDGPUInstrInfo.h
src/gallium/drivers/radeon/R600CodeEmitter.cpp
src/gallium/drivers/radeon/R600GenRegisterInfo.pl
src/gallium/drivers/radeon/R600ISelLowering.cpp
src/gallium/drivers/radeon/R600InstrInfo.cpp
src/gallium/drivers/radeon/R600InstrInfo.h
src/gallium/drivers/radeon/R600Instructions.td
src/gallium/drivers/radeon/R600RegisterInfo.cpp

index 7232c0beeee05a81f73a343ca6cda92059b2a16f..28952cfa60fb6a420829b14f8c8f4632ad41f370 100644 (file)
 #define MO_FLAG_NEG   (1 << 1)
 #define MO_FLAG_ABS   (1 << 2)
 #define MO_FLAG_MASK  (1 << 3)
+#define MO_FLAG_PUSH  (1 << 4)
+
+#define OPCODE_IS_ZERO_INT 0x00000045
+#define OPCODE_IS_NOT_ZERO_INT 0x00000042
+#define OPCODE_IS_ZERO 0x00000020
+#define OPCODE_IS_NOT_ZERO 0x00000023
 
 namespace llvm {
 
index 870d375b6e70ca7588f10315839cd913af685873..02b6fdb748b7c06fb82f2ccf79c3a0eda3a32035 100644 (file)
@@ -235,6 +235,8 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
 {
 
   unsigned numOperands = MI.getNumExplicitOperands();
+  if(MI.findFirstPredOperandIdx() > -1)
+    numOperands--;
 
    // Some instructions are just place holder instructions that represent
    // operations that the GPU does automatically.  They should be ignored.
@@ -242,6 +244,9 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
     return;
   }
 
+  if(MI.getOpcode() == AMDGPU::PRED_X)
+    numOperands = 2;
+
   // XXX Check if instruction writes a result
   if (numOperands < 1) {
     return;
@@ -343,7 +348,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
 
 void R600CodeEmitter::EmitDst(const MachineOperand & MO)
 {
-  if (MO.isReg()) {
+  if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) {
     // Emit the destination register index (1 byte)
     EmitByte(getHWReg(MO.getReg()));
 
@@ -396,8 +401,31 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
     EmitByte(0);
   }
 
-  // XXX: Emit predicate (1 byte)
-  EmitByte(0);
+  // XXX: Emit push modifier
+    if(MI.getOperand(1).getTargetFlags() & MO_FLAG_PUSH) {
+    EmitByte(1);
+  } else {
+    EmitByte(0);
+  }
+
+    // XXX: Emit predicate (1 byte)
+  int predidx = MI.findFirstPredOperandIdx();
+  if (predidx > -1)
+    switch(MI.getOperand(predidx).getReg()) {
+    case AMDGPU::PRED_SEL_ZERO:
+      EmitByte(2);
+      break;
+    case AMDGPU::PRED_SEL_ONE:
+      EmitByte(3);
+      break;
+    default:
+      EmitByte(0);
+      break;
+    }
+  else {
+    EmitByte(0);
+  }
+
 
   // XXX: Emit bank swizzle. (1 byte)  Do we need this?  It looks like
   // r600_asm.c sets it.
index 6bbe21c5f0a636a22ec95849230a63568151dbca..a28a3ad1d933aba8cfa3de71e9dd23107e9096c5 100644 (file)
@@ -69,6 +69,10 @@ def NEG_HALF : R600Reg<"-0.5">;
 def NEG_ONE : R600Reg<"-1.0">;
 def PV_X : R600Reg<"pv.x">;
 def ALU_LITERAL_X : R600Reg<"literal.x">;
+def PREDICATE_BIT : R600Reg<"PredicateBit">;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off">;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one">;
 
 def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
     $creg_list)>;
@@ -84,6 +88,12 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
     R600_CReg32,
     ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
 
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+    PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+    PREDICATE_BIT)>;
+
 def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
     $t128_string)>
 {
index f33d90e4fd430bc79bcbde5b0ec822d08e1d5615..26f14fa49faf686bc525aae0fab9ce5d8130fbb3 100644 (file)
@@ -90,21 +90,24 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
     MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
            .addOperand(MI->getOperand(0))
-           .addOperand(MI->getOperand(1));
+           .addOperand(MI->getOperand(1))
+           .addReg(AMDGPU::PRED_SEL_OFF);
     break;
 
   case AMDGPU::FABS_R600:
     MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
            .addOperand(MI->getOperand(0))
-           .addOperand(MI->getOperand(1));
+           .addOperand(MI->getOperand(1))
+           .addReg(AMDGPU::PRED_SEL_OFF);
     break;
 
   case AMDGPU::FNEG_R600:
     MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
             .addOperand(MI->getOperand(0))
-            .addOperand(MI->getOperand(1));
+            .addOperand(MI->getOperand(1))
+            .addReg(AMDGPU::PRED_SEL_OFF);
     break;
 
   case AMDGPU::R600_LOAD_CONST:
@@ -141,10 +144,12 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
       // this way and it didn't produce the correct results.
       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
               .addReg(AMDGPU::ALU_LITERAL_X)
+              .addReg(AMDGPU::PRED_SEL_OFF)
               .addImm(2);
       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
               .addOperand(MI->getOperand(1))
-              .addReg(ShiftValue);
+              .addReg(ShiftValue)
+              .addReg(AMDGPU::PRED_SEL_OFF);
       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
               .addOperand(MI->getOperand(0))
               .addReg(NewAddr);
index 3c9e4eb1de61b93f5734b1412e12a5d694815138..c807d5c440f601bbd72e60410d9a85a2ea368b93 100644 (file)
@@ -16,6 +16,7 @@
 #include "AMDGPUSubtarget.h"
 #include "R600RegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "AMDILUtilityFunctions.h"
 
 #define GET_INSTRINFO_CTOR
 #include "AMDGPUGenDFAPacketizer.inc"
@@ -59,6 +60,7 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
               .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define)
               .addReg(RI.getSubReg(SrcReg, subRegMap[i]))
+              .addReg(0) // PREDICATE_BIT
               .addReg(DestReg, RegState::Define | RegState::Implicit);
     }
   } else {
@@ -68,7 +70,8 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
            && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
 
     BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
+      .addReg(SrcReg, getKillRegState(KillSrc))
+      .addReg(0); // PREDICATE_BIT
   }
 }
 
@@ -79,6 +82,7 @@ MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
   MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
   MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
   MachineInstrBuilder(MI).addImm(Imm);
+  MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
 
   return MI;
 }
@@ -183,3 +187,27 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
   const InstrItineraryData *II = TM->getInstrItineraryData();
   return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
 }
+
+bool
+R600InstrInfo::isPredicated(const MachineInstr *MI) const
+{
+  int idx = MI->findFirstPredOperandIdx();
+  if (idx < 0)
+    return false;
+
+  MI->dump();
+  unsigned Reg = MI->getOperand(idx).getReg();
+  switch (Reg) {
+  default: return false;
+  case AMDGPU::PRED_SEL_ONE:
+  case AMDGPU::PRED_SEL_ZERO:
+  case AMDGPU::PREDICATE_BIT:
+    return true;
+  }
+}
+
+bool
+R600InstrInfo::isPredicable(MachineInstr *MI) const
+{
+  return AMDGPUInstrInfo::isPredicable(MI);
+}
index 72ea151508606a4f6c0ad339eb8e813fb83e732d..9bdda7a1e13127a40fbc223ea81b93b75a8bd637 100644 (file)
@@ -62,6 +62,9 @@ namespace llvm {
 
   DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
                                            const ScheduleDAG *DAG) const;
+  bool isPredicated(const MachineInstr *MI) const;
+
+  bool isPredicable(MachineInstr *MI) const;
 };
 
 } // End llvm namespace
index 45598a67b457637a072ee5dd12a4179e0dc3f587..6f2ab1fab8fe6b0996ed548cb3008938ea30459a 100644 (file)
@@ -69,13 +69,16 @@ class R600_ALU {
   
 }
 
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+                                     (ops PRED_SEL_OFF)>;
+
 
 class R600_1OP <bits<32> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
-          (ins R600_Reg32:$src, variable_ops),
-          !strconcat(opName, " $dst, $src"),
+          (ins R600_Reg32:$src, R600_Pred:$p, variable_ops),
+          !strconcat(opName, " $dst, $src ($p)"),
           pattern,
           itin
   >;
@@ -84,7 +87,7 @@ class R600_2OP <bits<32> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
-          (ins R600_Reg32:$src0, R600_Reg32:$src1, variable_ops),
+          (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops),
           !strconcat(opName, " $dst, $src0, $src1"),
           pattern,
           itin
@@ -94,7 +97,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
   InstR600 <inst,
           (outs R600_Reg32:$dst),
-          (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops),
+          (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops),
           !strconcat(opName, " $dst, $src0, $src1, $src2"),
           pattern,
           itin>{
@@ -102,6 +105,22 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
     let Op3 = 1;
   }
 
+
+
+def PRED_X : AMDGPUInst <(outs R600_Predicate_Bit:$dst),
+           (ins R600_Reg32:$src0, i32imm:$src1),
+           "PRED $dst, $src0, $src1",
+           []>
+{
+  let DisableEncoding = "$src0";
+  field bits<32> Inst;
+  bits<32> src1;
+
+  let Inst = src1;
+}
+
+
+
 class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
                       InstrItinClass itin = VecALU> :
   InstR600 <inst,
@@ -341,7 +360,7 @@ def MOV : R600_1OP <0x19, "MOV", []>;
 
 class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
   (outs R600_Reg32:$dst),
-  (ins R600_Reg32:$alu_literal, immType:$imm),
+  (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm),
   "MOV_IMM $dst, $imm",
   [], AnyALU
 >;
index 86bc169a10c333b8f00ded357ff9aaa7d73fcb29..94752410bfb3abe4e3c038ed5fdc79708a35714e 100644 (file)
@@ -37,6 +37,10 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
   Reserved.set(AMDGPU::NEG_ONE);
   Reserved.set(AMDGPU::PV_X);
   Reserved.set(AMDGPU::ALU_LITERAL_X);
+  Reserved.set(AMDGPU::PREDICATE_BIT);
+  Reserved.set(AMDGPU::PRED_SEL_OFF);
+  Reserved.set(AMDGPU::PRED_SEL_ZERO);
+  Reserved.set(AMDGPU::PRED_SEL_ONE);
 
   for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
                         E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
@@ -72,6 +76,11 @@ unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
   case AMDGPU::HALF:
   case AMDGPU::NEG_HALF: return 252;
   case AMDGPU::ALU_LITERAL_X: return 253;
+  case AMDGPU::PREDICATE_BIT:
+  case AMDGPU::PRED_SEL_OFF:
+  case AMDGPU::PRED_SEL_ZERO:
+  case AMDGPU::PRED_SEL_ONE:
+    return 0;
   default: return getHWRegIndexGen(reg);
   }
 }
@@ -86,6 +95,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
   case AMDGPU::HALF:
   case AMDGPU::NEG_HALF:
   case AMDGPU::ALU_LITERAL_X:
+  case AMDGPU::PREDICATE_BIT:
+  case AMDGPU::PRED_SEL_OFF:
+  case AMDGPU::PRED_SEL_ZERO:
+  case AMDGPU::PRED_SEL_ONE:
     return 0;
   default: return getHWRegChanGen(reg);
   }