radeon/llvm: Use a custom inserter to lower CLAMP

author Tom Stellard <thomas.stellard@amd.com>

Fri, 25 May 2012 14:50:35 +0000 (10:50 -0400)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 25 May 2012 19:40:58 +0000 (15:40 -0400)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 25 May 2012 14:50:35 +0000 (10:50 -0400)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 25 May 2012 19:40:58 +0000 (15:40 -0400)
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td

index e1ace508e88dc276d2e482480cf3596762af9dbf..1f0d582d82bd5194266e827a4974b4ee9114b837 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -39,6 +39,16 @@ int TWO_PI_INV = 0x3e22f983;
  }
  def CONST : Constants;
  
+def FP_ZERO : PatLeaf <
+  (fpimm),
+  [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+  (fpimm),
+  [{return N->isExactlyValue(1.0);}]
+>;
+
  let isCodeGenOnly = 1 in {
  
    def MASK_WRITE : AMDGPUShaderInst <
@@ -50,6 +60,13 @@ let isCodeGenOnly = 1 in {
  
  let isPseudo = 1, usesCustomInserter = 1  in {
  
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "CLAMP $dst, $src0",
+  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
  class FABS <RegisterClass rc> : AMDGPUShaderInst <
    (outs rc:$dst),
    (ins rc:$src0),
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td

index 6f1f4d55ca9c520f4ff63a0c0cea179b64e8d4ec..869c2bb6af2448e6b3d71a98db7dc03bb3287c9a 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILInstructions.td
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -230,7 +230,6 @@ defm DIV  : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>;
    }
  }
    let mayLoad = 0, mayStore=0 in {
-defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>;
  defm FMA  : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
  defm LERP  : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
    }
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp

index 4d789cb22b6c7f3b2c2a8635dd8259e529b78ced..59a2bb1cb288e26665a887d1c128b6f955c27337 100644 (file)
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -101,6 +101,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
      lowerImplicitParameter(MI, *BB, MRI, 8);
      break;
  
+  case AMDIL::CLAMP_R600:
+    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
+           .addOperand(MI->getOperand(0))
+           .addOperand(MI->getOperand(1));
+    break;
+
    case AMDIL::FABS_R600:
      MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td

index cb9a7bba3d642a35db48e87891d310046a88c403..22f3fc1b7802a2fc905283ef35f33a877c6f984b 100644 (file)
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -119,16 +119,6 @@ def TEX_SHADOW : PatLeaf<
    }]
  >;
  
-def FP_ZERO : PatLeaf <
-  (fpimm),
-  [{return N->getValueAPF().isZero();}]
->;
-
-def FP_ONE : PatLeaf <
-  (fpimm),
-  [{return N->isExactlyValue(1.0);}]
->;
-
  def COND_EQ : PatLeaf <
    (cond),
    [{switch(N->get()){{default: return false;
@@ -1078,6 +1068,7 @@ def TXD_SHADOW: AMDGPUShaderInst <
  
  } // End isCodeGenOnly = 1
  
+def CLAMP_R600 :  CLAMP <R600_Reg32>;
  def FABS_R600 : FABS<R600_Reg32>;
  
  let isPseudo = 1 in {
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp

index 3a1a12e635feec3cc4d423b0cb7404b6ca8def61..1795b38dfb6c11636bdc7b535bbfc7b3eb6610df 100644 (file)
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -83,23 +83,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
                  .addOperand(MI.getOperand(1));
          break;
  
-      case AMDIL::CLAMP_f32:
-        {
-          MachineOperand lowOp = MI.getOperand(2);
-          MachineOperand highOp = MI.getOperand(3);
-        if (lowOp.isReg() && highOp.isReg()
-            && lowOp.getReg() == AMDIL::ZERO && highOp.getReg() == AMDIL::ONE) {
-          MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV))
-                  .addOperand(MI.getOperand(0))
-                  .addOperand(MI.getOperand(1));
-        } else {
-          /* XXX: Handle other cases */
-          abort();
-        }
-        break;
-        }
-
        /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */
  /*      case AMDIL::DIV_INF_f32:
          {
@@ -218,16 +201,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
            }
  
            if (canInline) {
-            MachineOperand * use = dstOp.getNextOperandForReg();
-            /* The lowering operation for CLAMP needs to have the immediates
-             * as operands, so we must propagate them. */
-            while (use) {
-              MachineOperand * next = use->getNextOperandForReg();
-              if (use->getParent()->getOpcode() == AMDIL::CLAMP_f32) {
-                use->setReg(inlineReg);
-              }
-              use = next;
-            }
              BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY))
                      .addOperand(dstOp)
                      .addReg(inlineReg);
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp

index e192af091b62b2f865546bc566c5be8441ea416b..2455b536f9f6aa22e7a8e2a32f356d98dd908857 100644 (file)
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -46,6 +46,21 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    default:
      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  
+  case AMDIL::CLAMP_SI:
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
+           .addOperand(MI->getOperand(0))
+           .addOperand(MI->getOperand(1))
+          /* VSRC1-2 are unused, but we still need to fill all the
+           * operand slots, so we just reuse the VSRC0 operand */
+           .addOperand(MI->getOperand(1))
+           .addOperand(MI->getOperand(1))
+           .addImm(0) // ABS
+           .addImm(1) // CLAMP
+           .addImm(0) // OMOD
+           .addImm(0); // NEG
+    MI->eraseFromParent();
+    break;
+
    case AMDIL::FABS_SI:
      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
                   .addOperand(MI->getOperand(0))
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp

index df2cd9bb02370b98866f3f074b5762ce68ea22d5..4ee3e5d5f8d4f2800ee1f956af63fa8bf21930e0 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp
@@ -80,12 +80,6 @@ unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
  MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
      DebugLoc DL) const
  {
-
-  switch (MI.getOpcode()) {
-    default: break;
-    case AMDIL::CLAMP_f32: return convertCLAMP_f32(MI, MF, DL);
-  }
-
    MachineInstr * newMI = AMDGPUInstrInfo::convertToISA(MI, MF, DL);
    const MCInstrDesc &newDesc = get(newMI->getOpcode());
  
@@ -111,40 +105,3 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const
    default: return AMDILopcode;
    }
  }
-
-MachineInstr * SIInstrInfo::convertCLAMP_f32(MachineInstr & clampInstr,
-    MachineFunction &MF, DebugLoc DL) const
-{
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  /* XXX: HACK assume that low == zero and high == one for now until
-   * we have a way to propogate the immediates. */
-
-/*
-  uint32_t zero = (uint32_t)APFloat(0.0f).bitcastToAPInt().getZExtValue();
-  uint32_t one = (uint32_t)APFloat(1.0f).bitcastToAPInt().getZExtValue();
-  uint32_t low = clampInstr.getOperand(2).getImm();
-  uint32_t high = clampInstr.getOperand(3).getImm();
-*/
-//  if (low == zero && high == one) {
-  
-  /* Convert the desination register to the VReg_32 class */
-  if (TargetRegisterInfo::isVirtualRegister(clampInstr.getOperand(0).getReg())) {
-    MRI.setRegClass(clampInstr.getOperand(0).getReg(),
-                    AMDIL::VReg_32RegisterClass);
-  }
-  return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64))
-           .addOperand(clampInstr.getOperand(0))
-           .addOperand(clampInstr.getOperand(1))
-          /* VSRC1-2 are unused, but we still need to fill all the
-           * operand slots, so we just reuse the VSRC0 operand */
-           .addOperand(clampInstr.getOperand(1))
-           .addOperand(clampInstr.getOperand(1))
-           .addImm(0) // ABS
-           .addImm(1) // CLAMP
-           .addImm(0) // OMOD
-           .addImm(0); // NEG
-//  } else {
-    /* XXX: Handle other cases */
-//    abort();
-//  }
-}
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h

index 1d137d4efd47cf2daede0df8dbe1d4cf7f9c1a9f..0614638517a7c46bd31362cc95d7ffa6a0d66aac 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ b/src/gallium/drivers/radeon/SIInstrInfo.h
@@ -25,9 +25,6 @@ private:
    const SIRegisterInfo RI;
    AMDGPUTargetMachine &TM;
  
-  MachineInstr * convertCLAMP_f32(MachineInstr & clampInstr,
-                                  MachineFunction &MF, DebugLoc DL) const;
-
  public:
    explicit SIInstrInfo(AMDGPUTargetMachine &tm);
  
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td

index 87034684f0becfab1036404d3a9fae57a5b81dd0..b6097ef1eeb0f148f62b3c3725d9af152bb717ee 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -907,6 +907,7 @@ def : Pat <
                  (S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */
  >;
  
+def CLAMP_SI : CLAMP<VReg_32>;
  def FABS_SI : FABS<VReg_32>;
  
  def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 25 May 2012 14:50:35 +0000 (10:50 -0400)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 25 May 2012 19:40:58 +0000 (15:40 -0400)
src/gallium/drivers/radeon/AMDGPUInstructions.td		patch \| blob \| history
src/gallium/drivers/radeon/AMDILInstructions.td		patch \| blob \| history
src/gallium/drivers/radeon/R600ISelLowering.cpp		patch \| blob \| history
src/gallium/drivers/radeon/R600Instructions.td		patch \| blob \| history
src/gallium/drivers/radeon/R600LowerInstructions.cpp		patch \| blob \| history
src/gallium/drivers/radeon/SIISelLowering.cpp		patch \| blob \| history
src/gallium/drivers/radeon/SIInstrInfo.cpp		patch \| blob \| history
src/gallium/drivers/radeon/SIInstrInfo.h		patch \| blob \| history
src/gallium/drivers/radeon/SIInstructions.td		patch \| blob \| history