src/gallium/drivers/radeon/AMDILInliner.cpp

   1 //===-- AMDILInliner.cpp - TODO: Add brief description -------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9
  10 #define DEBUG_TYPE "amdilinline"
  11 #include "AMDIL.h"
  12 #include "AMDILCompilerErrors.h"
  13 #include "AMDILMachineFunctionInfo.h"
  14 #include "AMDILSubtarget.h"
  15 #include "llvm/ADT/SmallPtrSet.h"
  16 #include "llvm/ADT/SmallVector.h"
  17 #include "llvm/CodeGen/MachineFunction.h"
  18 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
  19 #include "llvm/CodeGen/Passes.h"
  20 #include "llvm/Function.h"
  21 #include "llvm/Instructions.h"
  22 #include "llvm/IntrinsicInst.h"
  23 #include "llvm/Support/CallSite.h"
  24 #include "llvm/Support/Debug.h"
  25 #include "llvm/Support/raw_ostream.h"
  26 #include "llvm/Target/TargetData.h"
  27 #include "llvm/Target/TargetMachine.h"
  28 #include "llvm/Transforms/Utils/Cloning.h"
  29 #include "llvm/Transforms/Utils/Local.h"
  30
  31 using namespace llvm;
  32
  33 namespace
  34 {
  35   class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass
  36
  37   {
  38     public:
  39       TargetMachine &TM;
  40       static char ID;
  41       AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
  42       ~AMDILInlinePass();
  43       virtual const char* getPassName() const;
  44       virtual bool runOnFunction(Function &F);
  45       bool doInitialization(Module &M);
  46       bool doFinalization(Module &M);
  47       virtual void getAnalysisUsage(AnalysisUsage &AU) const;
  48     private:
  49       typedef DenseMap<const ArrayType*, SmallVector<AllocaInst*,
  50               DEFAULT_VEC_SLOTS> > InlinedArrayAllocasTy;
  51       bool
  52         AMDILInlineCallIfPossible(CallSite CS,
  53             const TargetData *TD,
  54             InlinedArrayAllocasTy &InlinedArrayAllocas);
  55
  56       CodeGenOpt::Level OptLevel;
  57   };
  58   char AMDILInlinePass::ID = 0;
  59 } // anonymouse namespace
  60
  61
  62 namespace llvm
  63 {
  64   FunctionPass*
  65     createAMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
  66     {
  67       return new AMDILInlinePass(tm AMDIL_OPT_LEVEL_VAR);
  68     }
  69 } // llvm namespace
  70
  71   AMDILInlinePass::AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
  72 : FunctionPass(ID), TM(tm)
  73 {
  74   OptLevel = tm.getOptLevel();
  75 }
  76 AMDILInlinePass::~AMDILInlinePass()
  77 {
  78 }
  79
  80
  81 bool
  82 AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS,
  83     const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas) {
  84   Function *Callee = CS.getCalledFunction();
  85   Function *Caller = CS.getCaller();
  86
  87   // Try to inline the function.  Get the list of static allocas that were
  88   // inlined.
  89   SmallVector<AllocaInst*, 16> StaticAllocas;
  90   InlineFunctionInfo IFI;
  91   if (!InlineFunction(CS, IFI))
  92     return false;
  93   DEBUG(errs() << "<amdilinline> function " << Caller->getName()
  94       << ": inlined call to "<< Callee->getName() << "\n");
  95
  96   // If the inlined function had a higher stack protection level than the
  97   // calling function, then bump up the caller's stack protection level.
  98   if (Callee->hasFnAttr(Attribute::StackProtectReq))
  99     Caller->addFnAttr(Attribute::StackProtectReq);
 100   else if (Callee->hasFnAttr(Attribute::StackProtect) &&
 101       !Caller->hasFnAttr(Attribute::StackProtectReq))
 102     Caller->addFnAttr(Attribute::StackProtect);
 103
 104
 105   // Look at all of the allocas that we inlined through this call site.  If we
 106   // have already inlined other allocas through other calls into this function,
 107   // then we know that they have disjoint lifetimes and that we can merge them.
 108   //
 109   // There are many heuristics possible for merging these allocas, and the
 110   // different options have different tradeoffs.  One thing that we *really*
 111   // don't want to hurt is SRoA: once inlining happens, often allocas are no
 112   // longer address taken and so they can be promoted.
 113   //
 114   // Our "solution" for that is to only merge allocas whose outermost type is an
 115   // array type.  These are usually not promoted because someone is using a
 116   // variable index into them.  These are also often the most important ones to
 117   // merge.
 118   //
 119   // A better solution would be to have real memory lifetime markers in the IR
 120   // and not have the inliner do any merging of allocas at all.  This would
 121   // allow the backend to do proper stack slot coloring of all allocas that
 122   // *actually make it to the backend*, which is really what we want.
 123   //
 124   // Because we don't have this information, we do this simple and useful hack.
 125   //
 126   SmallPtrSet<AllocaInst*, 16> UsedAllocas;
 127
 128   // Loop over all the allocas we have so far and see if they can be merged with
 129   // a previously inlined alloca.  If not, remember that we had it.
 130
 131   for (unsigned AllocaNo = 0,
 132       e = IFI.StaticAllocas.size();
 133       AllocaNo != e; ++AllocaNo) {
 134
 135     AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
 136
 137     // Don't bother trying to merge array allocations (they will usually be
 138     // canonicalized to be an allocation *of* an array), or allocations whose
 139     // type is not itself an array (because we're afraid of pessimizing SRoA).
 140     const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
 141     if (ATy == 0 || AI->isArrayAllocation())
 142       continue;
 143
 144     // Get the list of all available allocas for this array type.
 145     SmallVector<AllocaInst*, DEFAULT_VEC_SLOTS> &AllocasForType
 146       = InlinedArrayAllocas[ATy];
 147
 148     // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
 149     // that we have to be careful not to reuse the same "available" alloca for
 150     // multiple different allocas that we just inlined, we use the 'UsedAllocas'
 151     // set to keep track of which "available" allocas are being used by this
 152     // function.  Also, AllocasForType can be empty of course!
 153     bool MergedAwayAlloca = false;
 154     for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
 155       AllocaInst *AvailableAlloca = AllocasForType[i];
 156
 157       // The available alloca has to be in the right function, not in some other
 158       // function in this SCC.
 159       if (AvailableAlloca->getParent() != AI->getParent())
 160         continue;
 161
 162       // If the inlined function already uses this alloca then we can't reuse
 163       // it.
 164       if (!UsedAllocas.insert(AvailableAlloca))
 165         continue;
 166
 167       // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
 168       // success!
 169       DEBUG(errs() << "    ***MERGED ALLOCA: " << *AI);
 170
 171       AI->replaceAllUsesWith(AvailableAlloca);
 172       AI->eraseFromParent();
 173       MergedAwayAlloca = true;
 174       break;
 175     }
 176
 177     // If we already nuked the alloca, we're done with it.
 178     if (MergedAwayAlloca)
 179       continue;
 180
 181     // If we were unable to merge away the alloca either because there are no
 182     // allocas of the right type available or because we reused them all
 183     // already, remember that this alloca came from an inlined function and mark
 184     // it used so we don't reuse it for other allocas from this inline
 185     // operation.
 186     AllocasForType.push_back(AI);
 187     UsedAllocas.insert(AI);
 188   }
 189
 190   return true;
 191 }
 192
 193   bool
 194 AMDILInlinePass::runOnFunction(Function &MF)
 195 {
 196   Function *F = &MF;
 197   const AMDILSubtarget &STM = TM.getSubtarget<AMDILSubtarget>();
 198   if (STM.device()->isSupported(AMDILDeviceInfo::NoInline)) {
 199     return false;
 200   }
 201   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
 202   SmallVector<CallSite, 16> CallSites;
 203   for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
 204     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
 205       CallSite CS = CallSite(cast<Value>(I));
 206       // If this isn't a call, or it is a call to an intrinsic, it can
 207       // never be inlined.
 208       if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
 209         continue;
 210
 211       // If this is a direct call to an external function, we can never inline
 212       // it.  If it is an indirect call, inlining may resolve it to be a
 213       // direct call, so we keep it.
 214       if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
 215         continue;
 216
 217       // We don't want to inline if we are recursive.
 218       if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) {
 219         AMDILMachineFunctionInfo *MFI =
 220           getAnalysis<MachineFunctionAnalysis>().getMF()
 221           .getInfo<AMDILMachineFunctionInfo>();
 222         MFI->addErrorMsg(amd::CompilerErrorMessage[RECURSIVE_FUNCTION]);
 223         continue;
 224       }
 225
 226       CallSites.push_back(CS);
 227     }
 228   }
 229
 230   InlinedArrayAllocasTy InlinedArrayAllocas;
 231   bool Changed = false;
 232   for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
 233     CallSite CS = CallSites[CSi];
 234
 235     Function *Callee = CS.getCalledFunction();
 236
 237     // We can only inline direct calls to non-declarations.
 238     if (Callee == 0 || Callee->isDeclaration()) continue;
 239
 240     // Attempt to inline the function...
 241     if (!AMDILInlineCallIfPossible(CS, TD, InlinedArrayAllocas))
 242       continue;
 243     Changed = true;
 244   }
 245   return Changed;
 246 }
 247
 248 const char*
 249 AMDILInlinePass::getPassName() const
 250 {
 251   return "AMDIL Inline Function Pass";
 252 }
 253   bool
 254 AMDILInlinePass::doInitialization(Module &M)
 255 {
 256   return false;
 257 }
 258
 259   bool
 260 AMDILInlinePass::doFinalization(Module &M)
 261 {
 262   return false;
 263 }
 264
 265 void
 266 AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const
 267 {
 268   AU.addRequired<MachineFunctionAnalysis>();
 269   FunctionPass::getAnalysisUsage(AU);
 270   AU.setPreservesAll();
 271 }