radeon/llvm: Use a custom inserter to lower FABS

author Tom Stellard <thomas.stellard@amd.com>

Fri, 25 May 2012 14:29:09 +0000 (10:29 -0400)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 25 May 2012 19:40:58 +0000 (15:40 -0400)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 25 May 2012 14:29:09 +0000 (10:29 -0400)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 25 May 2012 19:40:58 +0000 (15:40 -0400)
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp

index ab036d1df8834891239505aaa1c7987da69748f1..9d076bdf568b4e0af9a410e3038442536f04613e 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
@@ -64,6 +64,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        return LowerIntrinsicIABS(Op, DAG);
      case AMDGPUIntrinsic::AMDIL_exp:
        return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+    case AMDGPUIntrinsic::AMDIL_fabs:
+      return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
      case AMDGPUIntrinsic::AMDGPU_lrp:
        return LowerIntrinsicLRP(Op, DAG);
      case AMDGPUIntrinsic::AMDIL_fraction:
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td

index 291081fef43bccf661d4e4377ad933e305b7f046..e1ace508e88dc276d2e482480cf3596762af9dbf 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -47,7 +47,19 @@ let isCodeGenOnly = 1 in {
      "MASK_WRITE $src",
      []
    >;
-}
+
+let isPseudo = 1, usesCustomInserter = 1  in {
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "FABS $dst, $src0",
+  [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+} // End isPseudo = 1, hasCustomInserter = 1
+
+} // End isCodeGenOnly = 1
  
  /* Generic helper patterns for intrinsics */
  /* -------------------------------------- */
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td

index 4d9c1637ad96814fa59249dff79ba3daf1922ed1..6f1f4d55ca9c520f4ff63a0c0cea179b64e8d4ec 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILInstructions.td
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -196,7 +196,6 @@ def LUSHR        : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
  // float math instructions start here
  //===---------------------------------------------------------------------===//
  let mayLoad=0, mayStore=0 in {
-defm ABS : UnaryIntrinsicFloat<IL_OP_ABS, int_AMDIL_fabs>;
  defm PIREDUCE : UnaryIntrinsicFloat<IL_OP_PI_REDUCE, int_AMDIL_pireduce>;
  defm ROUND_NEGINF : UnaryIntrinsicFloat<IL_OP_ROUND_NEG_INF,
            int_AMDIL_round_neginf>;
@@ -236,7 +235,6 @@ defm FMA  : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
  defm LERP  : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
    }
  defm SUB  : BinaryOpMCf32<IL_OP_SUB, fsub>;
-defm FABS  : UnaryOpMCf32<IL_OP_ABS, fabs>;
  defm NEAR : UnaryOpMCf32<IL_OP_ROUND_NEAR, fnearbyint>;
  defm RND_Z : UnaryOpMCf32<IL_OP_ROUND_ZERO, ftrunc>;
  
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp

index 05a31d3ff9b420a65d9daf12bc31818a8c85860a..4d789cb22b6c7f3b2c2a8635dd8259e529b78ced 100644 (file)
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -13,6 +13,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "R600ISelLowering.h"
+#include "AMDGPUUtil.h"
  #include "R600InstrInfo.h"
  #include "R600MachineFunctionInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -100,6 +101,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
      lowerImplicitParameter(MI, *BB, MRI, 8);
      break;
  
+  case AMDIL::FABS_R600:
+    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
+           .addOperand(MI->getOperand(0))
+           .addOperand(MI->getOperand(1));
+    break;
+
    case AMDIL::R600_LOAD_CONST:
      {
        int64_t RegIndex = MI->getOperand(1).getImm();
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td

index b6b9fe002ad2ba6f7e4792d2ed83dd08a1a43aa9..cb9a7bba3d642a35db48e87891d310046a88c403 100644 (file)
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -1078,7 +1078,7 @@ def TXD_SHADOW: AMDGPUShaderInst <
  
  } // End isCodeGenOnly = 1
  
-
+def FABS_R600 : FABS<R600_Reg32>;
  
  let isPseudo = 1 in {
  
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp

index 1946708f681fcee60c37bb5639963789fcdbf384..3a1a12e635feec3cc4d423b0cb7404b6ca8def61 100644 (file)
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -83,16 +83,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
                  .addOperand(MI.getOperand(1));
          break;
  
-      /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and
-       * remove the ABS instruction.*/
-      case AMDIL::FABS_f32:
-      case AMDIL::ABS_f32:
-        MI.getOperand(1).addTargetFlag(MO_FLAG_ABS);
-        BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::MOVE_f32))
-                .addOperand(MI.getOperand(0))
-                .addOperand(MI.getOperand(1));
-        break;
-
        case AMDIL::CLAMP_f32:
          {
            MachineOperand lowOp = MI.getOperand(2);
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp

index 4615db24266bbadfa55ce0a52eb05d128798ceff..e192af091b62b2f865546bc566c5be8441ea416b 100644 (file)
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -45,6 +45,22 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    switch (MI->getOpcode()) {
    default:
      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+
+  case AMDIL::FABS_SI:
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
+                 .addOperand(MI->getOperand(0))
+                 .addOperand(MI->getOperand(1))
+                /* VSRC1-2 are unused, but we still need to fill all the
+                 * operand slots, so we just reuse the VSRC0 operand */
+                 .addOperand(MI->getOperand(1))
+                 .addOperand(MI->getOperand(1))
+                 .addImm(1) // ABS
+                 .addImm(0) // CLAMP
+                 .addImm(0) // OMOD
+                 .addImm(0); // NEG
+    MI->eraseFromParent();
+    break;
+
    case AMDIL::SI_INTERP:
      LowerSI_INTERP(MI, *BB, I, MRI);
      break;
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp

index d0b39d0ba01d202df1362c97c729f5ec09a30f38..df2cd9bb02370b98866f3f074b5762ce68ea22d5 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp
@@ -83,7 +83,6 @@ MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
  
    switch (MI.getOpcode()) {
      default: break;
-    case AMDIL::ABS_f32: return convertABS_f32(MI, MF, DL);
      case AMDIL::CLAMP_f32: return convertCLAMP_f32(MI, MF, DL);
    }
  
@@ -113,30 +112,6 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const
    }
  }
  
-MachineInstr * SIInstrInfo::convertABS_f32(MachineInstr & absInstr,
-    MachineFunction &MF, DebugLoc DL) const
-{
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  MachineOperand &dst = absInstr.getOperand(0);
-
-  /* Convert the desination register to the VReg_32 class */
-  if (TargetRegisterInfo::isVirtualRegister(dst.getReg())) {
-    MRI.setRegClass(dst.getReg(), AMDIL::VReg_32RegisterClass);
-  }
-
-  return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64))
-                 .addOperand(absInstr.getOperand(0))
-                 .addOperand(absInstr.getOperand(1))
-                /* VSRC1-2 are unused, but we still need to fill all the
-                 * operand slots, so we just reuse the VSRC0 operand */
-                 .addOperand(absInstr.getOperand(1))
-                 .addOperand(absInstr.getOperand(1))
-                 .addImm(1) // ABS
-                 .addImm(0) // CLAMP
-                 .addImm(0) // OMOD
-                 .addImm(0); // NEG
-}
-
  MachineInstr * SIInstrInfo::convertCLAMP_f32(MachineInstr & clampInstr,
      MachineFunction &MF, DebugLoc DL) const
  {
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h

index 24f7a56f63888f88dab01a7804d847f9afbeaeba..1d137d4efd47cf2daede0df8dbe1d4cf7f9c1a9f 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ b/src/gallium/drivers/radeon/SIInstrInfo.h
@@ -25,9 +25,6 @@ private:
    const SIRegisterInfo RI;
    AMDGPUTargetMachine &TM;
  
-  MachineInstr * convertABS_f32(MachineInstr & absInstr, MachineFunction &MF,
-                                DebugLoc DL) const;
-
    MachineInstr * convertCLAMP_f32(MachineInstr & clampInstr,
                                    MachineFunction &MF, DebugLoc DL) const;
  
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td

index 8505df9b0ce3a8021e60fe6cf97e0170c011421e..87034684f0becfab1036404d3a9fae57a5b81dd0 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -907,6 +907,7 @@ def : Pat <
                  (S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */
  >;
  
+def FABS_SI : FABS<VReg_32>;
  
  def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
  def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 25 May 2012 14:29:09 +0000 (10:29 -0400)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 25 May 2012 19:40:58 +0000 (15:40 -0400)
src/gallium/drivers/radeon/AMDGPUISelLowering.cpp		patch \| blob \| history
src/gallium/drivers/radeon/AMDGPUInstructions.td		patch \| blob \| history
src/gallium/drivers/radeon/AMDILInstructions.td		patch \| blob \| history
src/gallium/drivers/radeon/R600ISelLowering.cpp		patch \| blob \| history
src/gallium/drivers/radeon/R600Instructions.td		patch \| blob \| history
src/gallium/drivers/radeon/R600LowerInstructions.cpp		patch \| blob \| history
src/gallium/drivers/radeon/SIISelLowering.cpp		patch \| blob \| history
src/gallium/drivers/radeon/SIInstrInfo.cpp		patch \| blob \| history
src/gallium/drivers/radeon/SIInstrInfo.h		patch \| blob \| history
src/gallium/drivers/radeon/SIInstructions.td		patch \| blob \| history