radeon/llvm: Turn on the BitExtract peephole optimization

author Tom Stellard <thomas.stellard@amd.com>

Wed, 20 Jun 2012 21:43:11 +0000 (17:43 -0400)

committer Tom Stellard <thomas.stellard@amd.com>

Thu, 21 Jun 2012 20:42:06 +0000 (20:42 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 20 Jun 2012 21:43:11 +0000 (17:43 -0400)
committer Tom Stellard <thomas.stellard@amd.com>
Thu, 21 Jun 2012 20:42:06 +0000 (20:42 +0000)
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp

index b62c7ab048bc5a158c7379b5e27bb6d3a5cf0914..5b5932ac8c2231bcc974cd96323169829d09e0da 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
@@ -691,6 +691,11 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
    }
    Type *aType = inst->getType();
    bool isVector = aType->isVectorTy();
+
+  // XXX Support vector types
+  if (isVector) {
+    return false;
+  }
    int numEle = 1;
    // This only works on 32bit integers
    if (aType->getScalarType()
@@ -792,23 +797,24 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
    callTypes.push_back(aType);
    callTypes.push_back(aType);
    FunctionType *funcType = FunctionType::get(aType, callTypes, false);
-  std::string name = "__amdil_ubit_extract";
+  std::string name = "llvm.AMDIL.bit.extract.u32";
    if (isVector) {
-    name += "_v" + itostr(numEle) + "i32";
+    name += ".v" + itostr(numEle) + "i32";
    } else {
-    name += "_i32";
+    name += ".";
    }
    // Lets create the function.
    Function *Func = 
      dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
                         getOrInsertFunction(llvm::StringRef(name), funcType));
    Value *Operands[3] = {
-    newMaskConst,
+    ShiftInst->getOperand(0),
      shiftValConst,
-    ShiftInst->getOperand(0)
+    newMaskConst
    };
    // Lets create the Call with the operands
    CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+  CI->setDoesNotAccessMemory();
    CI->insertBefore(inst);
    inst->replaceAllUsesWith(CI);
    return true;
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td

index 409969b0586c6269a13b796667aa6fae90578850..6c74c6cd7bdb12bc271b5af2590e04c584bf9918 100644 (file)
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -822,6 +822,27 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
  
  let Predicates = [isEGorCayman] in {
  
+  // BFE_UINT - bit_extract, an optimization for mask and shift
+  // Src0 = Input
+  // Src1 = Offset
+  // Src2 = Width
+  //
+  // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+  //
+  // Example Usage:
+  // (Offset, Width)
+  //
+  // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0
+  // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8
+  // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
+  // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
+  def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+    [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
+                                                      R600_Reg32:$src1,
+                                                      R600_Reg32:$src2))],
+    VecALU
+  >;
+
    def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
      [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
                                            R600_Reg32:$src2))],
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 20 Jun 2012 21:43:11 +0000 (17:43 -0400)
committer	Tom Stellard <thomas.stellard@amd.com>
	Thu, 21 Jun 2012 20:42:06 +0000 (20:42 +0000)
src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp		patch \| blob \| history
src/gallium/drivers/radeon/R600Instructions.td		patch \| blob \| history