src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp

   1 //===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9
  10 #define DEBUG_TYPE "PeepholeOpt"
  11 #ifdef DEBUG
  12 #define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
  13 #else
  14 #define DEBUGME 0
  15 #endif
  16
  17 #include "AMDILAlgorithms.tpp"
  18 #include "AMDILDevices.h"
  19 #include "AMDILUtilityFunctions.h"
  20 #include "llvm/ADT/Statistic.h"
  21 #include "llvm/ADT/StringExtras.h"
  22 #include "llvm/ADT/StringRef.h"
  23 #include "llvm/ADT/Twine.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Instructions.h"
  28 #include "llvm/Module.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/MathExtras.h"
  31
  32 #include <sstream>
  33
  34 #if 0
  35 STATISTIC(PointerAssignments, "Number of dynamic pointer "
  36     "assigments discovered");
  37 STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
  38 #endif
  39
  40 using namespace llvm;
  41 // The Peephole optimization pass is used to do simple last minute optimizations
  42 // that are required for correct code or to remove redundant functions
  43 namespace {
  44 class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
  45 public:
  46   TargetMachine &TM;
  47   static char ID;
  48   AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
  49   ~AMDILPeepholeOpt();
  50   const char *getPassName() const;
  51   bool runOnFunction(Function &F);
  52   bool doInitialization(Module &M);
  53   bool doFinalization(Module &M);
  54   void getAnalysisUsage(AnalysisUsage &AU) const;
  55 protected:
  56 private:
  57   // Function to initiate all of the instruction level optimizations.
  58   bool instLevelOptimizations(BasicBlock::iterator *inst);
  59   // Quick check to see if we need to dump all of the pointers into the
  60   // arena. If this is correct, then we set all pointers to exist in arena. This
  61   // is a workaround for aliasing of pointers in a struct/union.
  62   bool dumpAllIntoArena(Function &F);
  63   // Because I don't want to invalidate any pointers while in the
  64   // safeNestedForEachFunction. I push atomic conversions to a vector and handle
  65   // it later. This function does the conversions if required.
  66   void doAtomicConversionIfNeeded(Function &F);
  67   // Because __amdil_is_constant cannot be properly evaluated if
  68   // optimizations are disabled, the call's are placed in a vector
  69   // and evaluated after the __amdil_image* functions are evaluated
  70   // which should allow the __amdil_is_constant function to be
  71   // evaluated correctly.
  72   void doIsConstCallConversionIfNeeded();
  73   bool mChanged;
  74   bool mDebug;
  75   bool mConvertAtomics;
  76   CodeGenOpt::Level optLevel;
  77   // Run a series of tests to see if we can optimize a CALL instruction.
  78   bool optimizeCallInst(BasicBlock::iterator *bbb);
  79   // A peephole optimization to optimize bit extract sequences.
  80   bool optimizeBitExtract(Instruction *inst);
  81   // A peephole optimization to optimize bit insert sequences.
  82   bool optimizeBitInsert(Instruction *inst);
  83   bool setupBitInsert(Instruction *base,
  84                       Instruction *&src,
  85                       Constant *&mask,
  86                       Constant *&shift);
  87   // Expand the bit field insert instruction on versions of OpenCL that
  88   // don't support it.
  89   bool expandBFI(CallInst *CI);
  90   // Expand the bit field mask instruction on version of OpenCL that
  91   // don't support it.
  92   bool expandBFM(CallInst *CI);
  93   // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
  94   // this case we need to expand them. These functions check for 24bit functions
  95   // and then expand.
  96   bool isSigned24BitOps(CallInst *CI);
  97   void expandSigned24BitOps(CallInst *CI);
  98   // One optimization that can occur is that if the required workgroup size is
  99   // specified then the result of get_local_size is known at compile time and
 100   // can be returned accordingly.
 101   bool isRWGLocalOpt(CallInst *CI);
 102   // On northern island cards, the division is slightly less accurate than on
 103   // previous generations, so we need to utilize a more accurate division. So we
 104   // can translate the accurate divide to a normal divide on all other cards.
 105   bool convertAccurateDivide(CallInst *CI);
 106   void expandAccurateDivide(CallInst *CI);
 107   // If the alignment is set incorrectly, it can produce really inefficient
 108   // code. This checks for this scenario and fixes it if possible.
 109   bool correctMisalignedMemOp(Instruction *inst);
 110
 111   // If we are in no opt mode, then we need to make sure that
 112   // local samplers are properly propagated as constant propagation
 113   // doesn't occur and we need to know the value of kernel defined
 114   // samplers at compile time.
 115   bool propagateSamplerInst(CallInst *CI);
 116
 117   LLVMContext *mCTX;
 118   Function *mF;
 119   const AMDILSubtarget *mSTM;
 120   SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
 121   SmallVector<CallInst *, 16> isConstVec;
 122 }; // class AMDILPeepholeOpt
 123   char AMDILPeepholeOpt::ID = 0;
 124 } // anonymous namespace
 125
 126 namespace llvm {
 127   FunctionPass *
 128   createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
 129   {
 130     return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR);
 131   }
 132 } // llvm namespace
 133
 134 AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
 135   : FunctionPass(ID), TM(tm)
 136 {
 137   mDebug = DEBUGME;
 138   optLevel = TM.getOptLevel();
 139
 140 }
 141
 142 AMDILPeepholeOpt::~AMDILPeepholeOpt()
 143 {
 144 }
 145
 146 const char *
 147 AMDILPeepholeOpt::getPassName() const
 148 {
 149   return "AMDIL PeepHole Optimization Pass";
 150 }
 151
 152 bool
 153 containsPointerType(Type *Ty)
 154 {
 155   if (!Ty) {
 156     return false;
 157   }
 158   switch(Ty->getTypeID()) {
 159   default:
 160     return false;
 161   case Type::StructTyID: {
 162     const StructType *ST = dyn_cast<StructType>(Ty);
 163     for (StructType::element_iterator stb = ST->element_begin(),
 164            ste = ST->element_end(); stb != ste; ++stb) {
 165       if (!containsPointerType(*stb)) {
 166         continue;
 167       }
 168       return true;
 169     }
 170     break;
 171   }
 172   case Type::VectorTyID:
 173   case Type::ArrayTyID:
 174     return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
 175   case Type::PointerTyID:
 176     return true;
 177   };
 178   return false;
 179 }
 180
 181 bool
 182 AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
 183 {
 184   bool dumpAll = false;
 185   for (Function::const_arg_iterator cab = F.arg_begin(),
 186        cae = F.arg_end(); cab != cae; ++cab) {
 187     const Argument *arg = cab;
 188     const PointerType *PT = dyn_cast<PointerType>(arg->getType());
 189     if (!PT) {
 190       continue;
 191     }
 192     Type *DereferencedType = PT->getElementType();
 193     if (!dyn_cast<StructType>(DereferencedType)
 194         ) {
 195       continue;
 196     }
 197     if (!containsPointerType(DereferencedType)) {
 198       continue;
 199     }
 200     // FIXME: Because a pointer inside of a struct/union may be aliased to
 201     // another pointer we need to take the conservative approach and place all
 202     // pointers into the arena until more advanced detection is implemented.
 203     dumpAll = true;
 204   }
 205   return dumpAll;
 206 }
 207 void
 208 AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
 209 {
 210   if (isConstVec.empty()) {
 211     return;
 212   }
 213   for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
 214     CallInst *CI = isConstVec[x];
 215     Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
 216     Type *aType = Type::getInt32Ty(*mCTX);
 217     Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
 218       : ConstantInt::get(aType, 0);
 219     CI->replaceAllUsesWith(Val);
 220     CI->eraseFromParent();
 221   }
 222   isConstVec.clear();
 223 }
 224 void
 225 AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
 226 {
 227   // Don't do anything if we don't have any atomic operations.
 228   if (atomicFuncs.empty()) {
 229     return;
 230   }
 231   // Change the function name for the atomic if it is required
 232   uint32_t size = atomicFuncs.size();
 233   for (uint32_t x = 0; x < size; ++x) {
 234     atomicFuncs[x].first->setOperand(
 235         atomicFuncs[x].first->getNumOperands()-1,
 236         atomicFuncs[x].second);
 237
 238   }
 239   mChanged = true;
 240   if (mConvertAtomics) {
 241     return;
 242   }
 243 }
 244
 245 bool
 246 AMDILPeepholeOpt::runOnFunction(Function &MF)
 247 {
 248   mChanged = false;
 249   mF = &MF;
 250   mSTM = &TM.getSubtarget<AMDILSubtarget>();
 251   if (mDebug) {
 252     MF.dump();
 253   }
 254   mCTX = &MF.getType()->getContext();
 255   mConvertAtomics = true;
 256   safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
 257      std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
 258                   this));
 259
 260   doAtomicConversionIfNeeded(MF);
 261   doIsConstCallConversionIfNeeded();
 262
 263   if (mDebug) {
 264     MF.dump();
 265   }
 266   return mChanged;
 267 }
 268
 269 bool
 270 AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
 271 {
 272   Instruction *inst = (*bbb);
 273   CallInst *CI = dyn_cast<CallInst>(inst);
 274   if (!CI) {
 275     return false;
 276   }
 277   if (isSigned24BitOps(CI)) {
 278     expandSigned24BitOps(CI);
 279     ++(*bbb);
 280     CI->eraseFromParent();
 281     return true;
 282   }
 283   if (propagateSamplerInst(CI)) {
 284     return false;
 285   }
 286   if (expandBFI(CI) || expandBFM(CI)) {
 287     ++(*bbb);
 288     CI->eraseFromParent();
 289     return true;
 290   }
 291   if (convertAccurateDivide(CI)) {
 292     expandAccurateDivide(CI);
 293     ++(*bbb);
 294     CI->eraseFromParent();
 295     return true;
 296   }
 297
 298   StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
 299   if (calleeName.startswith("__amdil_is_constant")) {
 300     // If we do not have optimizations, then this
 301     // cannot be properly evaluated, so we add the
 302     // call instruction to a vector and process
 303     // them at the end of processing after the
 304     // samplers have been correctly handled.
 305     if (optLevel == CodeGenOpt::None) {
 306       isConstVec.push_back(CI);
 307       return false;
 308     } else {
 309       Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
 310       Type *aType = Type::getInt32Ty(*mCTX);
 311       Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
 312         : ConstantInt::get(aType, 0);
 313       CI->replaceAllUsesWith(Val);
 314       ++(*bbb);
 315       CI->eraseFromParent();
 316       return true;
 317     }
 318   }
 319
 320   if (calleeName.equals("__amdil_is_asic_id_i32")) {
 321     ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
 322     Type *aType = Type::getInt32Ty(*mCTX);
 323     Value *Val = CV;
 324     if (Val) {
 325       Val = ConstantInt::get(aType,
 326           mSTM->device()->getDeviceFlag() & CV->getZExtValue());
 327     } else {
 328       Val = ConstantInt::get(aType, 0);
 329     }
 330     CI->replaceAllUsesWith(Val);
 331     ++(*bbb);
 332     CI->eraseFromParent();
 333     return true;
 334   }
 335   Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
 336   if (!F) {
 337     return false;
 338   }
 339   if (F->getName().startswith("__atom") && !CI->getNumUses()
 340       && F->getName().find("_xchg") == StringRef::npos) {
 341     std::string buffer(F->getName().str() + "_noret");
 342     F = dyn_cast<Function>(
 343           F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
 344     atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
 345   }
 346
 347   if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
 348       && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
 349     return false;
 350   }
 351   if (!mConvertAtomics) {
 352     return false;
 353   }
 354   StringRef name = F->getName();
 355   if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
 356     mConvertAtomics = false;
 357   }
 358   return false;
 359 }
 360
 361 bool
 362 AMDILPeepholeOpt::setupBitInsert(Instruction *base,
 363     Instruction *&src,
 364     Constant *&mask,
 365     Constant *&shift)
 366 {
 367   if (!base) {
 368     if (mDebug) {
 369       dbgs() << "Null pointer passed into function.\n";
 370     }
 371     return false;
 372   }
 373   bool andOp = false;
 374   if (base->getOpcode() == Instruction::Shl) {
 375     shift = dyn_cast<Constant>(base->getOperand(1));
 376   } else if (base->getOpcode() == Instruction::And) {
 377     mask = dyn_cast<Constant>(base->getOperand(1));
 378     andOp = true;
 379   } else {
 380     if (mDebug) {
 381       dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
 382     }
 383     // If the base is neither a Shl or a And, we don't fit any of the patterns above.
 384     return false;
 385   }
 386   src = dyn_cast<Instruction>(base->getOperand(0));
 387   if (!src) {
 388     if (mDebug) {
 389       dbgs() << "Failed setup since the base operand is not an instruction!\n";
 390     }
 391     return false;
 392   }
 393   // If we find an 'and' operation, then we don't need to
 394   // find the next operation as we already know the
 395   // bits that are valid at this point.
 396   if (andOp) {
 397     return true;
 398   }
 399   if (src->getOpcode() == Instruction::Shl && !shift) {
 400     shift = dyn_cast<Constant>(src->getOperand(1));
 401     src = dyn_cast<Instruction>(src->getOperand(0));
 402   } else if (src->getOpcode() == Instruction::And && !mask) {
 403     mask = dyn_cast<Constant>(src->getOperand(1));
 404   }
 405   if (!mask && !shift) {
 406     if (mDebug) {
 407       dbgs() << "Failed setup since both mask and shift are NULL!\n";
 408     }
 409     // Did not find a constant mask or a shift.
 410     return false;
 411   }
 412   return true;
 413 }
 414 bool
 415 AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
 416 {
 417   if (!inst) {
 418     return false;
 419   }
 420   if (!inst->isBinaryOp()) {
 421     return false;
 422   }
 423   if (inst->getOpcode() != Instruction::Or) {
 424     return false;
 425   }
 426   if (optLevel == CodeGenOpt::None) {
 427     return false;
 428   }
 429   // We want to do an optimization on a sequence of ops that in the end equals a
 430   // single ISA instruction.
 431   // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
 432   // Some simplified versions of this pattern are as follows:
 433   // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
 434   // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
 435   // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
 436   // (A & B) | (D << F) when (1 << F) >= B
 437   // (A << C) | (D & E) when (1 << C) >= E
 438   if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
 439     // The HD4XXX hardware doesn't support the ubit_insert instruction.
 440     return false;
 441   }
 442   Type *aType = inst->getType();
 443   bool isVector = aType->isVectorTy();
 444   int numEle = 1;
 445   // This optimization only works on 32bit integers.
 446   if (aType->getScalarType()
 447       != Type::getInt32Ty(inst->getContext())) {
 448     return false;
 449   }
 450   if (isVector) {
 451     const VectorType *VT = dyn_cast<VectorType>(aType);
 452     numEle = VT->getNumElements();
 453     // We currently cannot support more than 4 elements in a intrinsic and we
 454     // cannot support Vec3 types.
 455     if (numEle > 4 || numEle == 3) {
 456       return false;
 457     }
 458   }
 459   // TODO: Handle vectors.
 460   if (isVector) {
 461     if (mDebug) {
 462       dbgs() << "!!! Vectors are not supported yet!\n";
 463     }
 464     return false;
 465   }
 466   Instruction *LHSSrc = NULL, *RHSSrc = NULL;
 467   Constant *LHSMask = NULL, *RHSMask = NULL;
 468   Constant *LHSShift = NULL, *RHSShift = NULL;
 469   Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
 470   Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
 471   if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
 472     if (mDebug) {
 473       dbgs() << "Found an OR Operation that failed setup!\n";
 474       inst->dump();
 475       if (LHS) { LHS->dump(); }
 476       if (LHSSrc) { LHSSrc->dump(); }
 477       if (LHSMask) { LHSMask->dump(); }
 478       if (LHSShift) { LHSShift->dump(); }
 479     }
 480     // There was an issue with the setup for BitInsert.
 481     return false;
 482   }
 483   if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
 484     if (mDebug) {
 485       dbgs() << "Found an OR Operation that failed setup!\n";
 486       inst->dump();
 487       if (RHS) { RHS->dump(); }
 488       if (RHSSrc) { RHSSrc->dump(); }
 489       if (RHSMask) { RHSMask->dump(); }
 490       if (RHSShift) { RHSShift->dump(); }
 491     }
 492     // There was an issue with the setup for BitInsert.
 493     return false;
 494   }
 495   if (mDebug) {
 496     dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
 497     dbgs() << "Op:        "; inst->dump();
 498     dbgs() << "LHS:       "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
 499     dbgs() << "LHS Src:   "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
 500     dbgs() << "LHS Mask:  "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
 501     dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
 502     dbgs() << "RHS:       "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
 503     dbgs() << "RHS Src:   "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
 504     dbgs() << "RHS Mask:  "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
 505     dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
 506   }
 507   Constant *offset = NULL;
 508   Constant *width = NULL;
 509   int32_t lhsMaskVal = 0, rhsMaskVal = 0;
 510   int32_t lhsShiftVal = 0, rhsShiftVal = 0;
 511   int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
 512   int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
 513   lhsMaskVal = (int32_t)(LHSMask
 514       ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
 515   rhsMaskVal = (int32_t)(RHSMask
 516       ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
 517   lhsShiftVal = (int32_t)(LHSShift
 518       ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
 519   rhsShiftVal = (int32_t)(RHSShift
 520       ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
 521   lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
 522   rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
 523   lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
 524   rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
 525   // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
 526   if (mDebug) {
 527       dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
 528       dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
 529       dbgs() << (RHSMask ? " & E)" : ")");
 530       dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
 531       dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
 532       dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
 533       dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
 534       dbgs() << "width(B) = " << lhsMaskWidth;
 535       dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
 536       dbgs() << "offset(B) = " << lhsMaskOffset;
 537       dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
 538       dbgs() << "Constraints: \n";
 539       dbgs() << "\t(1) B ^ E == 0\n";
 540       dbgs() << "\t(2-LHS) B is a mask\n";
 541       dbgs() << "\t(2-LHS) E is a mask\n";
 542       dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
 543       dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
 544   }
 545   if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
 546     if (mDebug) {
 547       dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
 548       dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
 549       dbgs() << "Failed constraint 1!\n";
 550     }
 551     return false;
 552   }
 553   if (mDebug) {
 554     dbgs() << "LHS = " << lhsMaskOffset << "";
 555     dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
 556     dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
 557     dbgs() << "\nRHS = " << rhsMaskOffset << "";
 558     dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
 559     dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
 560     dbgs() << "\n";
 561   }
 562   if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
 563     offset = ConstantInt::get(aType, lhsMaskOffset, false);
 564     width = ConstantInt::get(aType, lhsMaskWidth, false);
 565     RHSSrc = RHS;
 566     if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
 567       if (mDebug) {
 568         dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
 569         dbgs() << "Failed constraint 2!\n";
 570       }
 571       return false;
 572     }
 573     if (!LHSShift) {
 574       LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
 575           "MaskShr", LHS);
 576     } else if (lhsShiftVal != lhsMaskOffset) {
 577       LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
 578           "MaskShr", LHS);
 579     }
 580     if (mDebug) {
 581       dbgs() << "Optimizing LHS!\n";
 582     }
 583   } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
 584     offset = ConstantInt::get(aType, rhsMaskOffset, false);
 585     width = ConstantInt::get(aType, rhsMaskWidth, false);
 586     LHSSrc = RHSSrc;
 587     RHSSrc = LHS;
 588     if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
 589       if (mDebug) {
 590         dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
 591         dbgs() << "Failed constraint 2!\n";
 592       }
 593       return false;
 594     }
 595     if (!RHSShift) {
 596       LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
 597           "MaskShr", RHS);
 598     } else if (rhsShiftVal != rhsMaskOffset) {
 599       LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
 600           "MaskShr", RHS);
 601     }
 602     if (mDebug) {
 603       dbgs() << "Optimizing RHS!\n";
 604     }
 605   } else {
 606     if (mDebug) {
 607       dbgs() << "Failed constraint 3!\n";
 608     }
 609     return false;
 610   }
 611   if (mDebug) {
 612     dbgs() << "Width:  "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
 613     dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
 614     dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
 615     dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
 616   }
 617   if (!offset || !width) {
 618     if (mDebug) {
 619       dbgs() << "Either width or offset are NULL, failed detection!\n";
 620     }
 621     return false;
 622   }
 623   // Lets create the function signature.
 624   std::vector<Type *> callTypes;
 625   callTypes.push_back(aType);
 626   callTypes.push_back(aType);
 627   callTypes.push_back(aType);
 628   callTypes.push_back(aType);
 629   FunctionType *funcType = FunctionType::get(aType, callTypes, false);
 630   std::string name = "__amdil_ubit_insert";
 631   if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
 632   Function *Func =
 633     dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
 634         getOrInsertFunction(llvm::StringRef(name), funcType));
 635   Value *Operands[4] = {
 636     width,
 637     offset,
 638     LHSSrc,
 639     RHSSrc
 640   };
 641   CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
 642   if (mDebug) {
 643     dbgs() << "Old Inst: ";
 644     inst->dump();
 645     dbgs() << "New Inst: ";
 646     CI->dump();
 647     dbgs() << "\n\n";
 648   }
 649   CI->insertBefore(inst);
 650   inst->replaceAllUsesWith(CI);
 651   return true;
 652 }
 653
 654 bool
 655 AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
 656 {
 657   if (!inst) {
 658     return false;
 659   }
 660   if (!inst->isBinaryOp()) {
 661     return false;
 662   }
 663   if (inst->getOpcode() != Instruction::And) {
 664     return false;
 665   }
 666   if (optLevel == CodeGenOpt::None) {
 667     return false;
 668   }
 669   // We want to do some simple optimizations on Shift right/And patterns. The
 670   // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
 671   // value smaller than 32 and C is a mask. If C is a constant value, then the
 672   // following transformation can occur. For signed integers, it turns into the
 673   // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
 674   // integers, it turns into the function call dst =
 675   // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
 676   // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
 677   // Evergreen hardware.
 678   if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
 679     // This does not work on HD4XXX hardware.
 680     return false;
 681   }
 682   Type *aType = inst->getType();
 683   bool isVector = aType->isVectorTy();
 684   int numEle = 1;
 685   // This only works on 32bit integers
 686   if (aType->getScalarType()
 687       != Type::getInt32Ty(inst->getContext())) {
 688     return false;
 689   }
 690   if (isVector) {
 691     const VectorType *VT = dyn_cast<VectorType>(aType);
 692     numEle = VT->getNumElements();
 693     // We currently cannot support more than 4 elements in a intrinsic and we
 694     // cannot support Vec3 types.
 695     if (numEle > 4 || numEle == 3) {
 696       return false;
 697     }
 698   }
 699   BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
 700   // If the first operand is not a shift instruction, then we can return as it
 701   // doesn't match this pattern.
 702   if (!ShiftInst || !ShiftInst->isShift()) {
 703     return false;
 704   }
 705   // If we are a shift left, then we need don't match this pattern.
 706   if (ShiftInst->getOpcode() == Instruction::Shl) {
 707     return false;
 708   }
 709   bool isSigned = ShiftInst->isArithmeticShift();
 710   Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
 711   Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
 712   // Lets make sure that the shift value and the and mask are constant integers.
 713   if (!AndMask || !ShrVal) {
 714     return false;
 715   }
 716   Constant *newMaskConst;
 717   Constant *shiftValConst;
 718   if (isVector) {
 719     // Handle the vector case
 720     std::vector<Constant *> maskVals;
 721     std::vector<Constant *> shiftVals;
 722     ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
 723     ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
 724     Type *scalarType = AndMaskVec->getType()->getScalarType();
 725     assert(AndMaskVec->getNumOperands() ==
 726            ShrValVec->getNumOperands() && "cannot have a "
 727            "combination where the number of elements to a "
 728            "shift and an and are different!");
 729     for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
 730       ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
 731       ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
 732       if (!AndCI || !ShiftIC) {
 733         return false;
 734       }
 735       uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
 736       if (!isMask_32(maskVal)) {
 737         return false;
 738       }
 739       maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
 740       uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
 741       // If the mask or shiftval is greater than the bitcount, then break out.
 742       if (maskVal >= 32 || shiftVal >= 32) {
 743         return false;
 744       }
 745       // If the mask val is greater than the the number of original bits left
 746       // then this optimization is invalid.
 747       if (maskVal > (32 - shiftVal)) {
 748         return false;
 749       }
 750       maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
 751       shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
 752     }
 753     newMaskConst = ConstantVector::get(maskVals);
 754     shiftValConst = ConstantVector::get(shiftVals);
 755   } else {
 756     // Handle the scalar case
 757     uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
 758     // This must be a mask value where all lower bits are set to 1 and then any
 759     // bit higher is set to 0.
 760     if (!isMask_32(maskVal)) {
 761       return false;
 762     }
 763     maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
 764     // Count the number of bits set in the mask, this is the width of the
 765     // resulting bit set that is extracted from the source value.
 766     uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
 767     // If the mask or shift val is greater than the bitcount, then break out.
 768     if (maskVal >= 32 || shiftVal >= 32) {
 769       return false;
 770     }
 771     // If the mask val is greater than the the number of original bits left then
 772     // this optimization is invalid.
 773     if (maskVal > (32 - shiftVal)) {
 774       return false;
 775     }
 776     newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
 777     shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
 778   }
 779   // Lets create the function signature.
 780   std::vector<Type *> callTypes;
 781   callTypes.push_back(aType);
 782   callTypes.push_back(aType);
 783   callTypes.push_back(aType);
 784   FunctionType *funcType = FunctionType::get(aType, callTypes, false);
 785   std::string name = "__amdil_ubit_extract";
 786   if (isVector) {
 787     name += "_v" + itostr(numEle) + "i32";
 788   } else {
 789     name += "_i32";
 790   }
 791   // Lets create the function.
 792   Function *Func =
 793     dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
 794                        getOrInsertFunction(llvm::StringRef(name), funcType));
 795   Value *Operands[3] = {
 796     newMaskConst,
 797     shiftValConst,
 798     ShiftInst->getOperand(0)
 799   };
 800   // Lets create the Call with the operands
 801   CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
 802   CI->insertBefore(inst);
 803   inst->replaceAllUsesWith(CI);
 804   return true;
 805 }
 806
 807 bool
 808 AMDILPeepholeOpt::expandBFI(CallInst *CI)
 809 {
 810   if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
 811     return false;
 812   }
 813   Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
 814   if (!LHS->getName().startswith("__amdil_bfi")) {
 815     return false;
 816   }
 817   Type* type = CI->getOperand(0)->getType();
 818   Constant *negOneConst = NULL;
 819   if (type->isVectorTy()) {
 820     std::vector<Constant *> negOneVals;
 821     negOneConst = ConstantInt::get(CI->getContext(),
 822         APInt(32, StringRef("-1"), 10));
 823     for (size_t x = 0,
 824         y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
 825       negOneVals.push_back(negOneConst);
 826     }
 827     negOneConst = ConstantVector::get(negOneVals);
 828   } else {
 829     negOneConst = ConstantInt::get(CI->getContext(),
 830         APInt(32, StringRef("-1"), 10));
 831   }
 832   // __amdil_bfi => (A & B) | (~A & C)
 833   BinaryOperator *lhs =
 834     BinaryOperator::Create(Instruction::And, CI->getOperand(0),
 835         CI->getOperand(1), "bfi_and", CI);
 836   BinaryOperator *rhs =
 837     BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
 838         "bfi_not", CI);
 839   rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
 840       "bfi_and", CI);
 841   lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
 842   CI->replaceAllUsesWith(lhs);
 843   return true;
 844 }
 845
 846 bool
 847 AMDILPeepholeOpt::expandBFM(CallInst *CI)
 848 {
 849   if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
 850     return false;
 851   }
 852   Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
 853   if (!LHS->getName().startswith("__amdil_bfm")) {
 854     return false;
 855   }
 856   // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
 857   Constant *newMaskConst = NULL;
 858   Constant *newShiftConst = NULL;
 859   Type* type = CI->getOperand(0)->getType();
 860   if (type->isVectorTy()) {
 861     std::vector<Constant*> newMaskVals, newShiftVals;
 862     newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
 863     newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
 864     for (size_t x = 0,
 865         y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
 866       newMaskVals.push_back(newMaskConst);
 867       newShiftVals.push_back(newShiftConst);
 868     }
 869     newMaskConst = ConstantVector::get(newMaskVals);
 870     newShiftConst = ConstantVector::get(newShiftVals);
 871   } else {
 872     newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
 873     newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
 874   }
 875   BinaryOperator *lhs =
 876     BinaryOperator::Create(Instruction::And, CI->getOperand(0),
 877         newMaskConst, "bfm_mask", CI);
 878   lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
 879       lhs, "bfm_shl", CI);
 880   lhs = BinaryOperator::Create(Instruction::Sub, lhs,
 881       newShiftConst, "bfm_sub", CI);
 882   BinaryOperator *rhs =
 883     BinaryOperator::Create(Instruction::And, CI->getOperand(1),
 884         newMaskConst, "bfm_mask", CI);
 885   lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
 886   CI->replaceAllUsesWith(lhs);
 887   return true;
 888 }
 889
 890 bool
 891 AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
 892 {
 893   Instruction *inst = (*bbb);
 894   if (optimizeCallInst(bbb)) {
 895     return true;
 896   }
 897   if (optimizeBitExtract(inst)) {
 898     return false;
 899   }
 900   if (optimizeBitInsert(inst)) {
 901     return false;
 902   }
 903   if (correctMisalignedMemOp(inst)) {
 904     return false;
 905   }
 906   return false;
 907 }
 908 bool
 909 AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
 910 {
 911   LoadInst *linst = dyn_cast<LoadInst>(inst);
 912   StoreInst *sinst = dyn_cast<StoreInst>(inst);
 913   unsigned alignment;
 914   Type* Ty = inst->getType();
 915   if (linst) {
 916     alignment = linst->getAlignment();
 917     Ty = inst->getType();
 918   } else if (sinst) {
 919     alignment = sinst->getAlignment();
 920     Ty = sinst->getValueOperand()->getType();
 921   } else {
 922     return false;
 923   }
 924   unsigned size = getTypeSize(Ty);
 925   if (size == alignment || size < alignment) {
 926     return false;
 927   }
 928   if (!Ty->isStructTy()) {
 929     return false;
 930   }
 931   if (alignment < 4) {
 932     if (linst) {
 933       linst->setAlignment(0);
 934       return true;
 935     } else if (sinst) {
 936       sinst->setAlignment(0);
 937       return true;
 938     }
 939   }
 940   return false;
 941 }
 942 bool
 943 AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
 944 {
 945   if (!CI) {
 946     return false;
 947   }
 948   Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
 949   std::string namePrefix = LHS->getName().substr(0, 14);
 950   if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
 951       && namePrefix != "__amdil__imul24_high") {
 952     return false;
 953   }
 954   if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
 955     return false;
 956   }
 957   return true;
 958 }
 959
 960 void
 961 AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
 962 {
 963   assert(isSigned24BitOps(CI) && "Must be a "
 964       "signed 24 bit operation to call this function!");
 965   Value *LHS = CI->getOperand(CI->getNumOperands()-1);
 966   // On 7XX and 8XX we do not have signed 24bit, so we need to
 967   // expand it to the following:
 968   // imul24 turns into 32bit imul
 969   // imad24 turns into 32bit imad
 970   // imul24_high turns into 32bit imulhigh
 971   if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
 972     Type *aType = CI->getOperand(0)->getType();
 973     bool isVector = aType->isVectorTy();
 974     int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
 975     std::vector<Type*> callTypes;
 976     callTypes.push_back(CI->getOperand(0)->getType());
 977     callTypes.push_back(CI->getOperand(1)->getType());
 978     callTypes.push_back(CI->getOperand(2)->getType());
 979     FunctionType *funcType =
 980       FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
 981     std::string name = "__amdil_imad";
 982     if (isVector) {
 983       name += "_v" + itostr(numEle) + "i32";
 984     } else {
 985       name += "_i32";
 986     }
 987     Function *Func = dyn_cast<Function>(
 988                        CI->getParent()->getParent()->getParent()->
 989                        getOrInsertFunction(llvm::StringRef(name), funcType));
 990     Value *Operands[3] = {
 991       CI->getOperand(0),
 992       CI->getOperand(1),
 993       CI->getOperand(2)
 994     };
 995     CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
 996     nCI->insertBefore(CI);
 997     CI->replaceAllUsesWith(nCI);
 998   } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
 999     BinaryOperator *mulOp =
1000       BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
1001           CI->getOperand(1), "imul24", CI);
1002     CI->replaceAllUsesWith(mulOp);
1003   } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
1004     Type *aType = CI->getOperand(0)->getType();
1005
1006     bool isVector = aType->isVectorTy();
1007     int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
1008     std::vector<Type*> callTypes;
1009     callTypes.push_back(CI->getOperand(0)->getType());
1010     callTypes.push_back(CI->getOperand(1)->getType());
1011     FunctionType *funcType =
1012       FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
1013     std::string name = "__amdil_imul_high";
1014     if (isVector) {
1015       name += "_v" + itostr(numEle) + "i32";
1016     } else {
1017       name += "_i32";
1018     }
1019     Function *Func = dyn_cast<Function>(
1020                        CI->getParent()->getParent()->getParent()->
1021                        getOrInsertFunction(llvm::StringRef(name), funcType));
1022     Value *Operands[2] = {
1023       CI->getOperand(0),
1024       CI->getOperand(1)
1025     };
1026     CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
1027     nCI->insertBefore(CI);
1028     CI->replaceAllUsesWith(nCI);
1029   }
1030 }
1031
1032 bool
1033 AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
1034 {
1035   return (CI != NULL
1036           && CI->getOperand(CI->getNumOperands() - 1)->getName()
1037           == "__amdil_get_local_size_int");
1038 }
1039
1040 bool
1041 AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
1042 {
1043   if (!CI) {
1044     return false;
1045   }
1046   if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
1047       && (mSTM->getDeviceName() == "cayman")) {
1048     return false;
1049   }
1050   return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
1051       == "__amdil_improved_div";
1052 }
1053
1054 void
1055 AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
1056 {
1057   assert(convertAccurateDivide(CI)
1058          && "expanding accurate divide can only happen if it is expandable!");
1059   BinaryOperator *divOp =
1060     BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
1061                            CI->getOperand(1), "fdiv32", CI);
1062   CI->replaceAllUsesWith(divOp);
1063 }
1064
1065 bool
1066 AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
1067 {
1068   if (optLevel != CodeGenOpt::None) {
1069     return false;
1070   }
1071
1072   if (!CI) {
1073     return false;
1074   }
1075
1076   unsigned funcNameIdx = 0;
1077   funcNameIdx = CI->getNumOperands() - 1;
1078   StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
1079   if (calleeName != "__amdil_image2d_read_norm"
1080    && calleeName != "__amdil_image2d_read_unnorm"
1081    && calleeName != "__amdil_image3d_read_norm"
1082    && calleeName != "__amdil_image3d_read_unnorm") {
1083     return false;
1084   }
1085
1086   unsigned samplerIdx = 2;
1087   samplerIdx = 1;
1088   Value *sampler = CI->getOperand(samplerIdx);
1089   LoadInst *lInst = dyn_cast<LoadInst>(sampler);
1090   if (!lInst) {
1091     return false;
1092   }
1093
1094   if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
1095     return false;
1096   }
1097
1098   GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
1099   // If we are loading from what is not a global value, then we
1100   // fail and return.
1101   if (!gv) {
1102     return false;
1103   }
1104
1105   // If we don't have an initializer or we have an initializer and
1106   // the initializer is not a 32bit integer, we fail.
1107   if (!gv->hasInitializer()
1108       || !gv->getInitializer()->getType()->isIntegerTy(32)) {
1109       return false;
1110   }
1111
1112   // Now that we have the global variable initializer, lets replace
1113   // all uses of the load instruction with the samplerVal and
1114   // reparse the __amdil_is_constant() function.
1115   Constant *samplerVal = gv->getInitializer();
1116   lInst->replaceAllUsesWith(samplerVal);
1117   return true;
1118 }
1119
1120 bool
1121 AMDILPeepholeOpt::doInitialization(Module &M)
1122 {
1123   return false;
1124 }
1125
1126 bool
1127 AMDILPeepholeOpt::doFinalization(Module &M)
1128 {
1129   return false;
1130 }
1131
1132 void
1133 AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
1134 {
1135   AU.addRequired<MachineFunctionAnalysis>();
1136   FunctionPass::getAnalysisUsage(AU);
1137   AU.setPreservesAll();
1138 }