1 //===-- AMDILInliner.cpp - TODO: Add brief description -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 #define DEBUG_TYPE "amdilinline"
12 #include "AMDILCompilerErrors.h"
13 #include "AMDILMachineFunctionInfo.h"
14 #include "AMDILSubtarget.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/Function.h"
21 #include "llvm/Instructions.h"
22 #include "llvm/IntrinsicInst.h"
23 #include "llvm/Support/CallSite.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/Target/TargetData.h"
27 #include "llvm/Target/TargetMachine.h"
28 #include "llvm/Transforms/Utils/Cloning.h"
29 #include "llvm/Transforms/Utils/Local.h"
35 class LLVM_LIBRARY_VISIBILITY AMDILInlinePass
: public FunctionPass
41 AMDILInlinePass(TargetMachine
&tm AMDIL_OPT_LEVEL_DECL
);
43 virtual const char* getPassName() const;
44 virtual bool runOnFunction(Function
&F
);
45 bool doInitialization(Module
&M
);
46 bool doFinalization(Module
&M
);
47 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const;
49 typedef DenseMap
<const ArrayType
*, SmallVector
<AllocaInst
*,
50 DEFAULT_VEC_SLOTS
> > InlinedArrayAllocasTy
;
52 AMDILInlineCallIfPossible(CallSite CS
,
54 InlinedArrayAllocasTy
&InlinedArrayAllocas
);
56 CodeGenOpt::Level OptLevel
;
58 char AMDILInlinePass::ID
= 0;
59 } // anonymouse namespace
65 createAMDILInlinePass(TargetMachine
&tm AMDIL_OPT_LEVEL_DECL
)
67 return new AMDILInlinePass(tm AMDIL_OPT_LEVEL_VAR
);
71 AMDILInlinePass::AMDILInlinePass(TargetMachine
&tm AMDIL_OPT_LEVEL_DECL
)
72 : FunctionPass(ID
), TM(tm
)
74 OptLevel
= tm
.getOptLevel();
76 AMDILInlinePass::~AMDILInlinePass()
82 AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS
,
83 const TargetData
*TD
, InlinedArrayAllocasTy
&InlinedArrayAllocas
) {
84 Function
*Callee
= CS
.getCalledFunction();
85 Function
*Caller
= CS
.getCaller();
87 // Try to inline the function. Get the list of static allocas that were
89 SmallVector
<AllocaInst
*, 16> StaticAllocas
;
90 InlineFunctionInfo IFI
;
91 if (!InlineFunction(CS
, IFI
))
93 DEBUG(errs() << "<amdilinline> function " << Caller
->getName()
94 << ": inlined call to "<< Callee
->getName() << "\n");
96 // If the inlined function had a higher stack protection level than the
97 // calling function, then bump up the caller's stack protection level.
98 if (Callee
->hasFnAttr(Attribute::StackProtectReq
))
99 Caller
->addFnAttr(Attribute::StackProtectReq
);
100 else if (Callee
->hasFnAttr(Attribute::StackProtect
) &&
101 !Caller
->hasFnAttr(Attribute::StackProtectReq
))
102 Caller
->addFnAttr(Attribute::StackProtect
);
105 // Look at all of the allocas that we inlined through this call site. If we
106 // have already inlined other allocas through other calls into this function,
107 // then we know that they have disjoint lifetimes and that we can merge them.
109 // There are many heuristics possible for merging these allocas, and the
110 // different options have different tradeoffs. One thing that we *really*
111 // don't want to hurt is SRoA: once inlining happens, often allocas are no
112 // longer address taken and so they can be promoted.
114 // Our "solution" for that is to only merge allocas whose outermost type is an
115 // array type. These are usually not promoted because someone is using a
116 // variable index into them. These are also often the most important ones to
119 // A better solution would be to have real memory lifetime markers in the IR
120 // and not have the inliner do any merging of allocas at all. This would
121 // allow the backend to do proper stack slot coloring of all allocas that
122 // *actually make it to the backend*, which is really what we want.
124 // Because we don't have this information, we do this simple and useful hack.
126 SmallPtrSet
<AllocaInst
*, 16> UsedAllocas
;
128 // Loop over all the allocas we have so far and see if they can be merged with
129 // a previously inlined alloca. If not, remember that we had it.
131 for (unsigned AllocaNo
= 0,
132 e
= IFI
.StaticAllocas
.size();
133 AllocaNo
!= e
; ++AllocaNo
) {
135 AllocaInst
*AI
= IFI
.StaticAllocas
[AllocaNo
];
137 // Don't bother trying to merge array allocations (they will usually be
138 // canonicalized to be an allocation *of* an array), or allocations whose
139 // type is not itself an array (because we're afraid of pessimizing SRoA).
140 const ArrayType
*ATy
= dyn_cast
<ArrayType
>(AI
->getAllocatedType());
141 if (ATy
== 0 || AI
->isArrayAllocation())
144 // Get the list of all available allocas for this array type.
145 SmallVector
<AllocaInst
*, DEFAULT_VEC_SLOTS
> &AllocasForType
146 = InlinedArrayAllocas
[ATy
];
148 // Loop over the allocas in AllocasForType to see if we can reuse one. Note
149 // that we have to be careful not to reuse the same "available" alloca for
150 // multiple different allocas that we just inlined, we use the 'UsedAllocas'
151 // set to keep track of which "available" allocas are being used by this
152 // function. Also, AllocasForType can be empty of course!
153 bool MergedAwayAlloca
= false;
154 for (unsigned i
= 0, e
= AllocasForType
.size(); i
!= e
; ++i
) {
155 AllocaInst
*AvailableAlloca
= AllocasForType
[i
];
157 // The available alloca has to be in the right function, not in some other
158 // function in this SCC.
159 if (AvailableAlloca
->getParent() != AI
->getParent())
162 // If the inlined function already uses this alloca then we can't reuse
164 if (!UsedAllocas
.insert(AvailableAlloca
))
167 // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
169 DEBUG(errs() << " ***MERGED ALLOCA: " << *AI
);
171 AI
->replaceAllUsesWith(AvailableAlloca
);
172 AI
->eraseFromParent();
173 MergedAwayAlloca
= true;
177 // If we already nuked the alloca, we're done with it.
178 if (MergedAwayAlloca
)
181 // If we were unable to merge away the alloca either because there are no
182 // allocas of the right type available or because we reused them all
183 // already, remember that this alloca came from an inlined function and mark
184 // it used so we don't reuse it for other allocas from this inline
186 AllocasForType
.push_back(AI
);
187 UsedAllocas
.insert(AI
);
194 AMDILInlinePass::runOnFunction(Function
&MF
)
197 const AMDILSubtarget
&STM
= TM
.getSubtarget
<AMDILSubtarget
>();
198 if (STM
.device()->isSupported(AMDILDeviceInfo::NoInline
)) {
201 const TargetData
*TD
= getAnalysisIfAvailable
<TargetData
>();
202 SmallVector
<CallSite
, 16> CallSites
;
203 for (Function::iterator BB
= F
->begin(), E
= F
->end(); BB
!= E
; ++BB
) {
204 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
; ++I
) {
205 CallSite CS
= CallSite(cast
<Value
>(I
));
206 // If this isn't a call, or it is a call to an intrinsic, it can
208 if (CS
.getInstruction() == 0 || isa
<IntrinsicInst
>(I
))
211 // If this is a direct call to an external function, we can never inline
212 // it. If it is an indirect call, inlining may resolve it to be a
213 // direct call, so we keep it.
214 if (CS
.getCalledFunction() && CS
.getCalledFunction()->isDeclaration())
217 // We don't want to inline if we are recursive.
218 if (CS
.getCalledFunction() && CS
.getCalledFunction()->getName() == MF
.getName()) {
219 AMDILMachineFunctionInfo
*MFI
=
220 getAnalysis
<MachineFunctionAnalysis
>().getMF()
221 .getInfo
<AMDILMachineFunctionInfo
>();
222 MFI
->addErrorMsg(amd::CompilerErrorMessage
[RECURSIVE_FUNCTION
]);
226 CallSites
.push_back(CS
);
230 InlinedArrayAllocasTy InlinedArrayAllocas
;
231 bool Changed
= false;
232 for (unsigned CSi
= 0; CSi
!= CallSites
.size(); ++CSi
) {
233 CallSite CS
= CallSites
[CSi
];
235 Function
*Callee
= CS
.getCalledFunction();
237 // We can only inline direct calls to non-declarations.
238 if (Callee
== 0 || Callee
->isDeclaration()) continue;
240 // Attempt to inline the function...
241 if (!AMDILInlineCallIfPossible(CS
, TD
, InlinedArrayAllocas
))
249 AMDILInlinePass::getPassName() const
251 return "AMDIL Inline Function Pass";
254 AMDILInlinePass::doInitialization(Module
&M
)
260 AMDILInlinePass::doFinalization(Module
&M
)
266 AMDILInlinePass::getAnalysisUsage(AnalysisUsage
&AU
) const
268 AU
.addRequired
<MachineFunctionAnalysis
>();
269 FunctionPass::getAnalysisUsage(AU
);
270 AU
.setPreservesAll();