1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 #define DEBUG_TYPE "structcfg"
13 #include "AMDGPUInstrInfo.h"
15 #include "AMDILUtilityFunctions.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/DominatorInternals.h"
20 #include "llvm/Analysis/Dominators.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Target/TargetInstrInfo.h"
33 #define FirstNonDebugInstr(A) A->begin()
38 //===----------------------------------------------------------------------===//
40 // Statistics for CFGStructurizer.
42 //===----------------------------------------------------------------------===//
44 STATISTIC(numSerialPatternMatch
, "CFGStructurizer number of serial pattern "
46 STATISTIC(numIfPatternMatch
, "CFGStructurizer number of if pattern "
48 STATISTIC(numLoopbreakPatternMatch
, "CFGStructurizer number of loop-break "
50 STATISTIC(numLoopcontPatternMatch
, "CFGStructurizer number of loop-continue "
52 STATISTIC(numLoopPatternMatch
, "CFGStructurizer number of loop pattern "
54 STATISTIC(numClonedBlock
, "CFGStructurizer cloned blocks");
55 STATISTIC(numClonedInstr
, "CFGStructurizer cloned instructions");
57 //===----------------------------------------------------------------------===//
59 // Miscellaneous utility for CFGStructurizer.
61 //===----------------------------------------------------------------------===//
62 namespace llvmCFGStruct
64 #define SHOWNEWINSTR(i) \
65 if (DEBUGME) errs() << "New instr: " << *i << "\n"
67 #define SHOWNEWBLK(b, msg) \
69 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
73 #define SHOWBLK_DETAIL(b, msg) \
76 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
82 #define INVALIDSCCNUM -1
83 #define INVALIDREGNUM 0
85 template<class LoopinfoT
>
86 void PrintLoopinfo(const LoopinfoT
&LoopInfo
, llvm::raw_ostream
&OS
) {
87 for (typename
LoopinfoT::iterator iter
= LoopInfo
.begin(),
88 iterEnd
= LoopInfo
.end();
89 iter
!= iterEnd
; ++iter
) {
90 (*iter
)->print(OS
, 0);
95 void ReverseVector(SmallVector
<NodeT
*, DEFAULT_VEC_SLOTS
> &Src
) {
96 size_t sz
= Src
.size();
97 for (size_t i
= 0; i
< sz
/2; ++i
) {
99 Src
[i
] = Src
[sz
- i
- 1];
104 } //end namespace llvmCFGStruct
107 //===----------------------------------------------------------------------===//
109 // MachinePostDominatorTree
111 //===----------------------------------------------------------------------===//
115 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
116 /// to compute the a post-dominator tree.
118 struct MachinePostDominatorTree
: public MachineFunctionPass
{
119 static char ID
; // Pass identification, replacement for typeid
120 DominatorTreeBase
<MachineBasicBlock
> *DT
;
121 MachinePostDominatorTree() : MachineFunctionPass(ID
)
123 DT
= new DominatorTreeBase
<MachineBasicBlock
>(true); //true indicate
127 ~MachinePostDominatorTree();
129 virtual bool runOnMachineFunction(MachineFunction
&MF
);
131 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
132 AU
.setPreservesAll();
133 MachineFunctionPass::getAnalysisUsage(AU
);
136 inline const std::vector
<MachineBasicBlock
*> &getRoots() const {
137 return DT
->getRoots();
140 inline MachineDomTreeNode
*getRootNode() const {
141 return DT
->getRootNode();
144 inline MachineDomTreeNode
*operator[](MachineBasicBlock
*BB
) const {
145 return DT
->getNode(BB
);
148 inline MachineDomTreeNode
*getNode(MachineBasicBlock
*BB
) const {
149 return DT
->getNode(BB
);
152 inline bool dominates(MachineDomTreeNode
*A
, MachineDomTreeNode
*B
) const {
153 return DT
->dominates(A
, B
);
156 inline bool dominates(MachineBasicBlock
*A
, MachineBasicBlock
*B
) const {
157 return DT
->dominates(A
, B
);
161 properlyDominates(const MachineDomTreeNode
*A
, MachineDomTreeNode
*B
) const {
162 return DT
->properlyDominates(A
, B
);
166 properlyDominates(MachineBasicBlock
*A
, MachineBasicBlock
*B
) const {
167 return DT
->properlyDominates(A
, B
);
170 inline MachineBasicBlock
*
171 findNearestCommonDominator(MachineBasicBlock
*A
, MachineBasicBlock
*B
) {
172 return DT
->findNearestCommonDominator(A
, B
);
175 virtual void print(llvm::raw_ostream
&OS
, const Module
*M
= 0) const {
179 } //end of namespace llvm
181 char MachinePostDominatorTree::ID
= 0;
182 static RegisterPass
<MachinePostDominatorTree
>
183 machinePostDominatorTreePass("machinepostdomtree",
184 "MachinePostDominator Tree Construction",
187 //const PassInfo *const llvm::MachinePostDominatorsID
188 //= &machinePostDominatorTreePass;
190 bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction
&F
) {
196 MachinePostDominatorTree::~MachinePostDominatorTree() {
200 //===----------------------------------------------------------------------===//
202 // supporting data structure for CFGStructurizer
204 //===----------------------------------------------------------------------===//
206 namespace llvmCFGStruct
208 template<class PassT
>
209 struct CFGStructTraits
{
212 template <class InstrT
>
213 class BlockInformation
{
217 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
218 //Instructions defining the corresponding successor.
219 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM
) {}
222 template <class BlockT
, class InstrT
, class RegiT
>
223 class LandInformation
{
226 std::set
<RegiT
> breakInitRegs
; //Registers that need to "reg = 0", before
227 //WHILELOOP(thisloop) init before entering
229 std::set
<RegiT
> contInitRegs
; //Registers that need to "reg = 0", after
230 //WHILELOOP(thisloop) init after entering
232 std::set
<RegiT
> endbranchInitRegs
; //Init before entering this loop, at loop
233 //land block, branch cond on this reg.
234 std::set
<RegiT
> breakOnRegs
; //registers that need to "if (reg) break
235 //endif" after ENDLOOP(thisloop) break
236 //outerLoopOf(thisLoop).
237 std::set
<RegiT
> contOnRegs
; //registers that need to "if (reg) continue
238 //endif" after ENDLOOP(thisloop) continue on
239 //outerLoopOf(thisLoop).
240 LandInformation() : landBlk(NULL
) {}
243 } //end of namespace llvmCFGStruct
245 //===----------------------------------------------------------------------===//
249 //===----------------------------------------------------------------------===//
251 namespace llvmCFGStruct
253 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
254 template<class PassT
>
255 class CFGStructurizer
260 SinglePath_InPath
= 1,
261 SinglePath_NotInPath
= 2
265 typedef typename
PassT::InstructionType InstrT
;
266 typedef typename
PassT::FunctionType FuncT
;
267 typedef typename
PassT::DominatortreeType DomTreeT
;
268 typedef typename
PassT::PostDominatortreeType PostDomTreeT
;
269 typedef typename
PassT::DomTreeNodeType DomTreeNodeT
;
270 typedef typename
PassT::LoopinfoType LoopInfoT
;
272 typedef GraphTraits
<FuncT
*> FuncGTraits
;
273 //typedef FuncGTraits::nodes_iterator BlockIterator;
274 typedef typename
FuncT::iterator BlockIterator
;
276 typedef typename
FuncGTraits::NodeType BlockT
;
277 typedef GraphTraits
<BlockT
*> BlockGTraits
;
278 typedef GraphTraits
<Inverse
<BlockT
*> > InvBlockGTraits
;
279 //typedef BlockGTraits::succ_iterator InstructionIterator;
280 typedef typename
BlockT::iterator InstrIterator
;
282 typedef CFGStructTraits
<PassT
> CFGTraits
;
283 typedef BlockInformation
<InstrT
> BlockInfo
;
284 typedef std::map
<BlockT
*, BlockInfo
*> BlockInfoMap
;
287 typedef typename
PassT::LoopType LoopT
;
288 typedef LandInformation
<BlockT
, InstrT
, RegiT
> LoopLandInfo
;
289 typedef std::map
<LoopT
*, LoopLandInfo
*> LoopLandInfoMap
;
290 //landing info for loop break
291 typedef SmallVector
<BlockT
*, 32> BlockTSmallerVector
;
297 /// Perform the CFG structurization
298 bool run(FuncT
&Func
, PassT
&Pass
, const AMDGPURegisterInfo
*tri
);
300 /// Perform the CFG preparation
301 bool prepare(FuncT
&Func
, PassT
&Pass
, const AMDGPURegisterInfo
*tri
);
304 void reversePredicateSetter(typename
BlockT::iterator
);
306 void printOrderedBlocks(llvm::raw_ostream
&OS
);
307 int patternMatch(BlockT
*CurBlock
);
308 int patternMatchGroup(BlockT
*CurBlock
);
310 int serialPatternMatch(BlockT
*CurBlock
);
311 int ifPatternMatch(BlockT
*CurBlock
);
312 int switchPatternMatch(BlockT
*CurBlock
);
313 int loopendPatternMatch(BlockT
*CurBlock
);
314 int loopPatternMatch(BlockT
*CurBlock
);
316 int loopbreakPatternMatch(LoopT
*LoopRep
, BlockT
*LoopHeader
);
317 int loopcontPatternMatch(LoopT
*LoopRep
, BlockT
*LoopHeader
);
318 //int loopWithoutBreak(BlockT *);
320 void handleLoopbreak (BlockT
*ExitingBlock
, LoopT
*ExitingLoop
,
321 BlockT
*ExitBlock
, LoopT
*exitLoop
, BlockT
*landBlock
);
322 void handleLoopcontBlock(BlockT
*ContingBlock
, LoopT
*contingLoop
,
323 BlockT
*ContBlock
, LoopT
*contLoop
);
324 bool isSameloopDetachedContbreak(BlockT
*Src1Block
, BlockT
*Src2Block
);
325 int handleJumpintoIf(BlockT
*HeadBlock
, BlockT
*TrueBlock
,
327 int handleJumpintoIfImp(BlockT
*HeadBlock
, BlockT
*TrueBlock
,
329 int improveSimpleJumpintoIf(BlockT
*HeadBlock
, BlockT
*TrueBlock
,
330 BlockT
*FalseBlock
, BlockT
**LandBlockPtr
);
331 void showImproveSimpleJumpintoIf(BlockT
*HeadBlock
, BlockT
*TrueBlock
,
332 BlockT
*FalseBlock
, BlockT
*LandBlock
,
333 bool Detail
= false);
334 PathToKind
singlePathTo(BlockT
*SrcBlock
, BlockT
*DstBlock
,
335 bool AllowSideEntry
= true);
336 BlockT
*singlePathEnd(BlockT
*srcBlock
, BlockT
*DstBlock
,
337 bool AllowSideEntry
= true);
338 int cloneOnSideEntryTo(BlockT
*PreBlock
, BlockT
*SrcBlock
, BlockT
*DstBlock
);
339 void mergeSerialBlock(BlockT
*DstBlock
, BlockT
*srcBlock
);
341 void mergeIfthenelseBlock(InstrT
*BranchInstr
, BlockT
*CurBlock
,
342 BlockT
*TrueBlock
, BlockT
*FalseBlock
,
344 void mergeLooplandBlock(BlockT
*DstBlock
, LoopLandInfo
*LoopLand
);
345 void mergeLoopbreakBlock(BlockT
*ExitingBlock
, BlockT
*ExitBlock
,
346 BlockT
*ExitLandBlock
, RegiT SetReg
);
347 void settleLoopcontBlock(BlockT
*ContingBlock
, BlockT
*ContBlock
,
349 BlockT
*relocateLoopcontBlock(LoopT
*ParentLoopRep
, LoopT
*LoopRep
,
350 std::set
<BlockT
*> &ExitBlockSet
,
351 BlockT
*ExitLandBlk
);
352 BlockT
*addLoopEndbranchBlock(LoopT
*LoopRep
,
353 BlockTSmallerVector
&ExitingBlocks
,
354 BlockTSmallerVector
&ExitBlocks
);
355 BlockT
*normalizeInfiniteLoopExit(LoopT
*LoopRep
);
356 void removeUnconditionalBranch(BlockT
*SrcBlock
);
357 void removeRedundantConditionalBranch(BlockT
*SrcBlock
);
358 void addDummyExitBlock(SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
> &RetBlocks
);
360 void removeSuccessor(BlockT
*SrcBlock
);
361 BlockT
*cloneBlockForPredecessor(BlockT
*CurBlock
, BlockT
*PredBlock
);
362 BlockT
*exitingBlock2ExitBlock (LoopT
*LoopRep
, BlockT
*exitingBlock
);
364 void migrateInstruction(BlockT
*SrcBlock
, BlockT
*DstBlock
,
365 InstrIterator InsertPos
);
367 void recordSccnum(BlockT
*SrcBlock
, int SCCNum
);
368 int getSCCNum(BlockT
*srcBlk
);
370 void retireBlock(BlockT
*DstBlock
, BlockT
*SrcBlock
);
371 bool isRetiredBlock(BlockT
*SrcBlock
);
372 bool isActiveLoophead(BlockT
*CurBlock
);
373 bool needMigrateBlock(BlockT
*Block
);
375 BlockT
*recordLoopLandBlock(LoopT
*LoopRep
, BlockT
*LandBlock
,
376 BlockTSmallerVector
&exitBlocks
,
377 std::set
<BlockT
*> &ExitBlockSet
);
378 void setLoopLandBlock(LoopT
*LoopRep
, BlockT
*Block
= NULL
);
379 BlockT
*getLoopLandBlock(LoopT
*LoopRep
);
380 LoopLandInfo
*getLoopLandInfo(LoopT
*LoopRep
);
382 void addLoopBreakOnReg(LoopT
*LoopRep
, RegiT RegNum
);
383 void addLoopContOnReg(LoopT
*LoopRep
, RegiT RegNum
);
384 void addLoopBreakInitReg(LoopT
*LoopRep
, RegiT RegNum
);
385 void addLoopContInitReg(LoopT
*LoopRep
, RegiT RegNum
);
386 void addLoopEndbranchInitReg(LoopT
*LoopRep
, RegiT RegNum
);
388 bool hasBackEdge(BlockT
*curBlock
);
389 unsigned getLoopDepth (LoopT
*LoopRep
);
390 int countActiveBlock(
391 typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator IterStart
,
392 typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator IterEnd
);
393 BlockT
*findNearestCommonPostDom(std::set
<BlockT
*>&);
394 BlockT
*findNearestCommonPostDom(BlockT
*Block1
, BlockT
*Block2
);
398 PostDomTreeT
*postDomTree
;
403 BlockInfoMap blockInfoMap
;
404 LoopLandInfoMap loopLandInfoMap
;
405 SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
> orderedBlks
;
406 const AMDGPURegisterInfo
*TRI
;
408 }; //template class CFGStructurizer
410 template<class PassT
> CFGStructurizer
<PassT
>::CFGStructurizer()
411 : domTree(NULL
), postDomTree(NULL
), loopInfo(NULL
) {
414 template<class PassT
> CFGStructurizer
<PassT
>::~CFGStructurizer() {
415 for (typename
BlockInfoMap::iterator I
= blockInfoMap
.begin(),
416 E
= blockInfoMap
.end(); I
!= E
; ++I
) {
421 template<class PassT
>
422 bool CFGStructurizer
<PassT
>::prepare(FuncT
&func
, PassT
&pass
,
423 const AMDGPURegisterInfo
* tri
) {
428 bool changed
= false;
429 //func.RenumberBlocks();
431 //to do, if not reducible flow graph, make it so ???
434 errs() << "AMDGPUCFGStructurizer::prepare\n";
436 //func.viewCFGOnly();
440 //FIXME: gcc complains on this.
441 //domTree = &pass.getAnalysis<DomTreeT>();
442 //domTree = CFGTraits::getDominatorTree(pass);
444 // domTree->print(errs());
447 //FIXME: gcc complains on this.
448 //domTree = &pass.getAnalysis<DomTreeT>();
449 //postDomTree = CFGTraits::getPostDominatorTree(pass);
451 // postDomTree->print(errs());
454 //FIXME: gcc complains on this.
455 //loopInfo = &pass.getAnalysis<LoopInfoT>();
456 loopInfo
= CFGTraits::getLoopInfo(pass
);
458 errs() << "LoopInfo:\n";
459 PrintLoopinfo(*loopInfo
, errs());
464 errs() << "Ordered blocks:\n";
465 printOrderedBlocks(errs());
468 SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
> retBlks
;
470 for (typename
LoopInfoT::iterator iter
= loopInfo
->begin(),
471 iterEnd
= loopInfo
->end();
472 iter
!= iterEnd
; ++iter
) {
473 LoopT
* loopRep
= (*iter
);
474 BlockTSmallerVector exitingBlks
;
475 loopRep
->getExitingBlocks(exitingBlks
);
477 if (exitingBlks
.size() == 0) {
478 BlockT
* dummyExitBlk
= normalizeInfiniteLoopExit(loopRep
);
479 if (dummyExitBlk
!= NULL
)
480 retBlks
.push_back(dummyExitBlk
);
484 // Remove unconditional branch instr.
485 // Add dummy exit block iff there are multiple returns.
487 for (typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator
488 iterBlk
= orderedBlks
.begin(), iterEndBlk
= orderedBlks
.end();
489 iterBlk
!= iterEndBlk
;
491 BlockT
*curBlk
= *iterBlk
;
492 removeUnconditionalBranch(curBlk
);
493 removeRedundantConditionalBranch(curBlk
);
494 if (CFGTraits::isReturnBlock(curBlk
)) {
495 retBlks
.push_back(curBlk
);
497 assert(curBlk
->succ_size() <= 2);
498 //assert(curBlk->size() > 0);
499 //removeEmptyBlock(curBlk) ??
502 if (retBlks
.size() >= 2) {
503 addDummyExitBlock(retBlks
);
508 } //CFGStructurizer::prepare
510 template<class PassT
>
511 bool CFGStructurizer
<PassT
>::run(FuncT
&func
, PassT
&pass
,
512 const AMDGPURegisterInfo
* tri
) {
517 //func.RenumberBlocks();
519 //Assume reducible CFG...
521 errs() << "AMDGPUCFGStructurizer::run\n";
522 //errs() << func.getFunction()->getNameStr() << "\n";
524 //func.viewCFGOnly();
529 //FIXME: gcc complains on this.
530 //domTree = &pass.getAnalysis<DomTreeT>();
531 domTree
= CFGTraits::getDominatorTree(pass
);
533 domTree
->print(errs(), (const llvm::Module
*)0);
537 //FIXME: gcc complains on this.
538 //domTree = &pass.getAnalysis<DomTreeT>();
539 postDomTree
= CFGTraits::getPostDominatorTree(pass
);
541 postDomTree
->print(errs());
544 //FIXME: gcc complains on this.
545 //loopInfo = &pass.getAnalysis<LoopInfoT>();
546 loopInfo
= CFGTraits::getLoopInfo(pass
);
548 errs() << "LoopInfo:\n";
549 PrintLoopinfo(*loopInfo
, errs());
555 //Use the worse block ordering to test the algorithm.
556 ReverseVector(orderedBlks
);
560 errs() << "Ordered blocks:\n";
561 printOrderedBlocks(errs());
566 bool makeProgress
= false;
567 int numRemainedBlk
= countActiveBlock(orderedBlks
.begin(),
573 errs() << "numIter = " << numIter
574 << ", numRemaintedBlk = " << numRemainedBlk
<< "\n";
577 typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator
578 iterBlk
= orderedBlks
.begin();
579 typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator
580 iterBlkEnd
= orderedBlks
.end();
582 typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator
583 sccBeginIter
= iterBlk
;
584 BlockT
*sccBeginBlk
= NULL
;
585 int sccNumBlk
= 0; // The number of active blocks, init to a
586 // maximum possible number.
587 int sccNumIter
; // Number of iteration in this SCC.
589 while (iterBlk
!= iterBlkEnd
) {
592 if (sccBeginBlk
== NULL
) {
593 sccBeginIter
= iterBlk
;
594 sccBeginBlk
= curBlk
;
596 sccNumBlk
= numRemainedBlk
; // Init to maximum possible number.
598 errs() << "start processing SCC" << getSCCNum(sccBeginBlk
);
603 if (!isRetiredBlock(curBlk
)) {
604 patternMatch(curBlk
);
609 bool contNextScc
= true;
610 if (iterBlk
== iterBlkEnd
611 || getSCCNum(sccBeginBlk
) != getSCCNum(*iterBlk
)) {
612 // Just finish one scc.
614 int sccRemainedNumBlk
= countActiveBlock(sccBeginIter
, iterBlk
);
615 if (sccRemainedNumBlk
!= 1 && sccRemainedNumBlk
>= sccNumBlk
) {
617 errs() << "Can't reduce SCC " << getSCCNum(curBlk
)
618 << ", sccNumIter = " << sccNumIter
;
619 errs() << "doesn't make any progress\n";
622 } else if (sccRemainedNumBlk
!= 1 && sccRemainedNumBlk
< sccNumBlk
) {
623 sccNumBlk
= sccRemainedNumBlk
;
624 iterBlk
= sccBeginIter
;
627 errs() << "repeat processing SCC" << getSCCNum(curBlk
)
628 << "sccNumIter = " << sccNumIter
<< "\n";
630 //func.viewCFGOnly();
633 // Finish the current scc.
637 // Continue on next component in the current scc.
644 } //while, "one iteration" over the function.
646 BlockT
*entryBlk
= FuncGTraits::nodes_begin(&func
);
647 if (entryBlk
->succ_size() == 0) {
650 errs() << "Reduce to one block\n";
653 int newnumRemainedBlk
654 = countActiveBlock(orderedBlks
.begin(), orderedBlks
.end());
655 // consider cloned blocks ??
656 if (newnumRemainedBlk
== 1 || newnumRemainedBlk
< numRemainedBlk
) {
658 numRemainedBlk
= newnumRemainedBlk
;
660 makeProgress
= false;
662 errs() << "No progress\n";
666 } while (!finish
&& makeProgress
);
668 // Misc wrap up to maintain the consistency of the Function representation.
669 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func
));
671 // Detach retired Block, release memory.
672 for (typename
BlockInfoMap::iterator iterMap
= blockInfoMap
.begin(),
673 iterEndMap
= blockInfoMap
.end(); iterMap
!= iterEndMap
; ++iterMap
) {
674 if ((*iterMap
).second
&& (*iterMap
).second
->isRetired
) {
675 assert(((*iterMap
).first
)->getNumber() != -1);
677 errs() << "Erase BB" << ((*iterMap
).first
)->getNumber() << "\n";
679 (*iterMap
).first
->eraseFromParent(); //Remove from the parent Function.
681 delete (*iterMap
).second
;
683 blockInfoMap
.clear();
685 // clear loopLandInfoMap
686 for (typename
LoopLandInfoMap::iterator iterMap
= loopLandInfoMap
.begin(),
687 iterEndMap
= loopLandInfoMap
.end(); iterMap
!= iterEndMap
; ++iterMap
) {
688 delete (*iterMap
).second
;
690 loopLandInfoMap
.clear();
698 assert(!"IRREDUCIBL_CF");
702 } //CFGStructurizer::run
704 /// Print the ordered Blocks.
706 template<class PassT
>
707 void CFGStructurizer
<PassT
>::printOrderedBlocks(llvm::raw_ostream
&os
) {
709 for (typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator
710 iterBlk
= orderedBlks
.begin(), iterBlkEnd
= orderedBlks
.end();
711 iterBlk
!= iterBlkEnd
;
713 os
<< "BB" << (*iterBlk
)->getNumber();
714 os
<< "(" << getSCCNum(*iterBlk
) << "," << (*iterBlk
)->size() << ")";
715 if (i
!= 0 && i
% 10 == 0) {
721 } //printOrderedBlocks
723 /// Compute the reversed DFS post order of Blocks
725 template<class PassT
> void CFGStructurizer
<PassT
>::orderBlocks() {
728 for (scc_iterator
<FuncT
*> sccIter
= scc_begin(funcRep
),
729 sccEnd
= scc_end(funcRep
); sccIter
!= sccEnd
; ++sccIter
, ++sccNum
) {
730 std::vector
<BlockT
*> &sccNext
= *sccIter
;
731 for (typename
std::vector
<BlockT
*>::const_iterator
732 blockIter
= sccNext
.begin(), blockEnd
= sccNext
.end();
733 blockIter
!= blockEnd
; ++blockIter
) {
735 orderedBlks
.push_back(bb
);
736 recordSccnum(bb
, sccNum
);
740 //walk through all the block in func to check for unreachable
741 for (BlockIterator blockIter1
= FuncGTraits::nodes_begin(funcRep
),
742 blockEnd1
= FuncGTraits::nodes_end(funcRep
);
743 blockIter1
!= blockEnd1
; ++blockIter1
) {
744 BlockT
*bb
= &(*blockIter1
);
745 sccNum
= getSCCNum(bb
);
746 if (sccNum
== INVALIDSCCNUM
) {
747 errs() << "unreachable block BB" << bb
->getNumber() << "\n";
752 template<class PassT
> int CFGStructurizer
<PassT
>::patternMatch(BlockT
*curBlk
) {
757 errs() << "Begin patternMatch BB" << curBlk
->getNumber() << "\n";
760 while ((curMatch
= patternMatchGroup(curBlk
)) > 0) {
761 numMatch
+= curMatch
;
765 errs() << "End patternMatch BB" << curBlk
->getNumber()
766 << ", numMatch = " << numMatch
<< "\n";
772 template<class PassT
>
773 int CFGStructurizer
<PassT
>::patternMatchGroup(BlockT
*curBlk
) {
775 numMatch
+= serialPatternMatch(curBlk
);
776 numMatch
+= ifPatternMatch(curBlk
);
777 //numMatch += switchPatternMatch(curBlk);
778 numMatch
+= loopendPatternMatch(curBlk
);
779 numMatch
+= loopPatternMatch(curBlk
);
783 template<class PassT
>
784 int CFGStructurizer
<PassT
>::serialPatternMatch(BlockT
*curBlk
) {
785 if (curBlk
->succ_size() != 1) {
789 BlockT
*childBlk
= *curBlk
->succ_begin();
790 if (childBlk
->pred_size() != 1 || isActiveLoophead(childBlk
)) {
794 mergeSerialBlock(curBlk
, childBlk
);
795 ++numSerialPatternMatch
;
797 } //serialPatternMatch
799 template<class PassT
>
800 int CFGStructurizer
<PassT
>::ifPatternMatch(BlockT
*curBlk
) {
802 if (curBlk
->succ_size() != 2) {
806 if (hasBackEdge(curBlk
)) {
810 InstrT
*branchInstr
= CFGTraits::getNormalBlockBranchInstr(curBlk
);
811 if (branchInstr
== NULL
) {
815 assert(CFGTraits::isCondBranch(branchInstr
));
817 BlockT
*trueBlk
= CFGTraits::getTrueBranch(branchInstr
);
818 BlockT
*falseBlk
= CFGTraits::getFalseBranch(curBlk
, branchInstr
);
823 if (trueBlk
->succ_size() == 1 && falseBlk
->succ_size() == 1
824 && *trueBlk
->succ_begin() == *falseBlk
->succ_begin()) {
825 landBlk
= *trueBlk
->succ_begin();
826 } else if (trueBlk
->succ_size() == 0 && falseBlk
->succ_size() == 0) {
828 } else if (trueBlk
->succ_size() == 1 && *trueBlk
->succ_begin() == falseBlk
) {
831 } else if (falseBlk
->succ_size() == 1
832 && *falseBlk
->succ_begin() == trueBlk
) {
835 } else if (falseBlk
->succ_size() == 1
836 && isSameloopDetachedContbreak(trueBlk
, falseBlk
)) {
837 landBlk
= *falseBlk
->succ_begin();
838 } else if (trueBlk
->succ_size() == 1
839 && isSameloopDetachedContbreak(falseBlk
, trueBlk
)) {
840 landBlk
= *trueBlk
->succ_begin();
842 return handleJumpintoIf(curBlk
, trueBlk
, falseBlk
);
845 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
846 // new BB created for landBlk==NULL may introduce new challenge to the
847 // reduction process.
848 if (landBlk
!= NULL
&&
849 ((trueBlk
&& trueBlk
->pred_size() > 1)
850 || (falseBlk
&& falseBlk
->pred_size() > 1))) {
851 cloned
+= improveSimpleJumpintoIf(curBlk
, trueBlk
, falseBlk
, &landBlk
);
854 if (trueBlk
&& trueBlk
->pred_size() > 1) {
855 trueBlk
= cloneBlockForPredecessor(trueBlk
, curBlk
);
859 if (falseBlk
&& falseBlk
->pred_size() > 1) {
860 falseBlk
= cloneBlockForPredecessor(falseBlk
, curBlk
);
864 mergeIfthenelseBlock(branchInstr
, curBlk
, trueBlk
, falseBlk
, landBlk
);
868 numClonedBlock
+= cloned
;
873 template<class PassT
>
874 int CFGStructurizer
<PassT
>::switchPatternMatch(BlockT
*curBlk
) {
876 } //switchPatternMatch
878 template<class PassT
>
879 int CFGStructurizer
<PassT
>::loopendPatternMatch(BlockT
*curBlk
) {
880 LoopT
*loopRep
= loopInfo
->getLoopFor(curBlk
);
881 typename
std::vector
<LoopT
*> nestedLoops
;
883 nestedLoops
.push_back(loopRep
);
884 loopRep
= loopRep
->getParentLoop();
887 if (nestedLoops
.size() == 0) {
891 // Process nested loop outside->inside, so "continue" to a outside loop won't
892 // be mistaken as "break" of the current loop.
894 for (typename
std::vector
<LoopT
*>::reverse_iterator
895 iter
= nestedLoops
.rbegin(), iterEnd
= nestedLoops
.rend();
896 iter
!= iterEnd
; ++iter
) {
899 if (getLoopLandBlock(loopRep
) != NULL
) {
903 BlockT
*loopHeader
= loopRep
->getHeader();
905 int numBreak
= loopbreakPatternMatch(loopRep
, loopHeader
);
907 if (numBreak
== -1) {
911 int numCont
= loopcontPatternMatch(loopRep
, loopHeader
);
912 num
+= numBreak
+ numCont
;
916 } //loopendPatternMatch
918 template<class PassT
>
919 int CFGStructurizer
<PassT
>::loopPatternMatch(BlockT
*curBlk
) {
920 if (curBlk
->succ_size() != 0) {
925 LoopT
*loopRep
= loopInfo
->getLoopFor(curBlk
);
926 while (loopRep
&& loopRep
->getHeader() == curBlk
) {
927 LoopLandInfo
*loopLand
= getLoopLandInfo(loopRep
);
929 BlockT
*landBlk
= loopLand
->landBlk
;
931 if (!isRetiredBlock(landBlk
)) {
932 mergeLooplandBlock(curBlk
, loopLand
);
936 loopRep
= loopRep
->getParentLoop();
939 numLoopPatternMatch
+= numLoop
;
944 template<class PassT
>
945 int CFGStructurizer
<PassT
>::loopbreakPatternMatch(LoopT
*loopRep
,
946 BlockT
*loopHeader
) {
947 BlockTSmallerVector exitingBlks
;
948 loopRep
->getExitingBlocks(exitingBlks
);
951 errs() << "Loop has " << exitingBlks
.size() << " exiting blocks\n";
954 if (exitingBlks
.size() == 0) {
955 setLoopLandBlock(loopRep
);
959 // Compute the corresponding exitBlks and exit block set.
960 BlockTSmallerVector exitBlks
;
961 std::set
<BlockT
*> exitBlkSet
;
962 for (typename
BlockTSmallerVector::const_iterator iter
= exitingBlks
.begin(),
963 iterEnd
= exitingBlks
.end(); iter
!= iterEnd
; ++iter
) {
964 BlockT
*exitingBlk
= *iter
;
965 BlockT
*exitBlk
= exitingBlock2ExitBlock(loopRep
, exitingBlk
);
966 exitBlks
.push_back(exitBlk
);
967 exitBlkSet
.insert(exitBlk
); //non-duplicate insert
970 assert(exitBlkSet
.size() > 0);
971 assert(exitBlks
.size() == exitingBlks
.size());
974 errs() << "Loop has " << exitBlkSet
.size() << " exit blocks\n";
978 BlockT
*exitLandBlk
= NULL
;
982 if (exitBlkSet
.size() == 1)
984 exitLandBlk
= *exitBlkSet
.begin();
986 exitLandBlk
= findNearestCommonPostDom(exitBlkSet
);
988 if (exitLandBlk
== NULL
) {
992 bool allInPath
= true;
993 bool allNotInPath
= true;
994 for (typename
std::set
<BlockT
*>::const_iterator
995 iter
= exitBlkSet
.begin(),
996 iterEnd
= exitBlkSet
.end();
997 iter
!= iterEnd
; ++iter
) {
998 BlockT
*exitBlk
= *iter
;
1000 PathToKind pathKind
= singlePathTo(exitBlk
, exitLandBlk
, true);
1002 errs() << "BB" << exitBlk
->getNumber()
1003 << " to BB" << exitLandBlk
->getNumber() << " PathToKind="
1004 << pathKind
<< "\n";
1007 allInPath
= allInPath
&& (pathKind
== SinglePath_InPath
);
1008 allNotInPath
= allNotInPath
&& (pathKind
== SinglePath_NotInPath
);
1010 if (!allInPath
&& !allNotInPath
) {
1012 errs() << "singlePath check fail\n";
1016 } // check all exit blocks
1021 // TODO: Simplify, maybe separate function?
1022 //funcRep->viewCFG();
1023 LoopT
*parentLoopRep
= loopRep
->getParentLoop();
1024 BlockT
*parentLoopHeader
= NULL
;
1026 parentLoopHeader
= parentLoopRep
->getHeader();
1028 if (exitLandBlk
== parentLoopHeader
&&
1029 (exitLandBlk
= relocateLoopcontBlock(parentLoopRep
,
1032 exitLandBlk
)) != NULL
) {
1034 errs() << "relocateLoopcontBlock success\n";
1036 } else if ((exitLandBlk
= addLoopEndbranchBlock(loopRep
,
1038 exitBlks
)) != NULL
) {
1040 errs() << "insertEndbranchBlock success\n";
1044 errs() << "loop exit fail\n";
1053 // Handle side entry to exit path.
1056 for (typename
BlockTSmallerVector::iterator iterExiting
=
1057 exitingBlks
.begin(),
1058 iterExitingEnd
= exitingBlks
.end();
1059 iterExiting
!= iterExitingEnd
; ++iterExiting
) {
1060 BlockT
*exitingBlk
= *iterExiting
;
1061 BlockT
*exitBlk
= exitingBlock2ExitBlock(loopRep
, exitingBlk
);
1062 BlockT
*newExitBlk
= exitBlk
;
1064 if (exitBlk
!= exitLandBlk
&& exitBlk
->pred_size() > 1) {
1065 newExitBlk
= cloneBlockForPredecessor(exitBlk
, exitingBlk
);
1069 numCloned
+= cloneOnSideEntryTo(exitingBlk
, newExitBlk
, exitLandBlk
);
1071 exitBlks
.push_back(newExitBlk
);
1072 exitBlkSet
.insert(newExitBlk
);
1075 for (typename
BlockTSmallerVector::iterator iterExit
= exitBlks
.begin(),
1076 iterExitEnd
= exitBlks
.end();
1077 iterExit
!= iterExitEnd
; ++iterExit
) {
1078 BlockT
*exitBlk
= *iterExit
;
1079 numSerial
+= serialPatternMatch(exitBlk
);
1082 for (typename
BlockTSmallerVector::iterator iterExit
= exitBlks
.begin(),
1083 iterExitEnd
= exitBlks
.end();
1084 iterExit
!= iterExitEnd
; ++iterExit
) {
1085 BlockT
*exitBlk
= *iterExit
;
1086 if (exitBlk
->pred_size() > 1) {
1087 if (exitBlk
!= exitLandBlk
) {
1091 if (exitBlk
!= exitLandBlk
&&
1092 (exitBlk
->succ_size() != 1 ||
1093 *exitBlk
->succ_begin() != exitLandBlk
)) {
1100 // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1101 exitLandBlk
= recordLoopLandBlock(loopRep
, exitLandBlk
, exitBlks
, exitBlkSet
);
1103 // Fold break into the breaking block. Leverage across level breaks.
1104 assert(exitingBlks
.size() == exitBlks
.size());
1105 for (typename
BlockTSmallerVector::const_iterator iterExit
= exitBlks
.begin(),
1106 iterExiting
= exitingBlks
.begin(), iterExitEnd
= exitBlks
.end();
1107 iterExit
!= iterExitEnd
; ++iterExit
, ++iterExiting
) {
1108 BlockT
*exitBlk
= *iterExit
;
1109 BlockT
*exitingBlk
= *iterExiting
;
1110 assert(exitBlk
->pred_size() == 1 || exitBlk
== exitLandBlk
);
1111 LoopT
*exitingLoop
= loopInfo
->getLoopFor(exitingBlk
);
1112 handleLoopbreak(exitingBlk
, exitingLoop
, exitBlk
, loopRep
, exitLandBlk
);
1115 int numBreak
= static_cast<int>(exitingBlks
.size());
1116 numLoopbreakPatternMatch
+= numBreak
;
1117 numClonedBlock
+= numCloned
;
1118 return numBreak
+ numSerial
+ numCloned
;
1119 } //loopbreakPatternMatch
1121 template<class PassT
>
1122 int CFGStructurizer
<PassT
>::loopcontPatternMatch(LoopT
*loopRep
,
1123 BlockT
*loopHeader
) {
1125 SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
> contBlk
;
1126 for (typename
InvBlockGTraits::ChildIteratorType iter
=
1127 InvBlockGTraits::child_begin(loopHeader
),
1128 iterEnd
= InvBlockGTraits::child_end(loopHeader
);
1129 iter
!= iterEnd
; ++iter
) {
1130 BlockT
*curBlk
= *iter
;
1131 if (loopRep
->contains(curBlk
)) {
1132 handleLoopcontBlock(curBlk
, loopInfo
->getLoopFor(curBlk
),
1133 loopHeader
, loopRep
);
1134 contBlk
.push_back(curBlk
);
1139 for (typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::iterator
1140 iter
= contBlk
.begin(), iterEnd
= contBlk
.end();
1141 iter
!= iterEnd
; ++iter
) {
1142 (*iter
)->removeSuccessor(loopHeader
);
1145 numLoopcontPatternMatch
+= numCont
;
1148 } //loopcontPatternMatch
1151 template<class PassT
>
1152 bool CFGStructurizer
<PassT
>::isSameloopDetachedContbreak(BlockT
*src1Blk
,
1154 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1155 // same loop with LoopLandInfo without explicitly keeping track of
1156 // loopContBlks and loopBreakBlks, this is a method to get the information.
1158 if (src1Blk
->succ_size() == 0) {
1159 LoopT
*loopRep
= loopInfo
->getLoopFor(src1Blk
);
1160 if (loopRep
!= NULL
&& loopRep
== loopInfo
->getLoopFor(src2Blk
)) {
1161 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
1162 if (theEntry
!= NULL
) {
1164 errs() << "isLoopContBreakBlock yes src1 = BB"
1165 << src1Blk
->getNumber()
1166 << " src2 = BB" << src2Blk
->getNumber() << "\n";
1173 } //isSameloopDetachedContbreak
1175 template<class PassT
>
1176 int CFGStructurizer
<PassT
>::handleJumpintoIf(BlockT
*headBlk
,
1179 int num
= handleJumpintoIfImp(headBlk
, trueBlk
, falseBlk
);
1182 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1184 num
= handleJumpintoIfImp(headBlk
, falseBlk
, trueBlk
);
1189 template<class PassT
>
1190 int CFGStructurizer
<PassT
>::handleJumpintoIfImp(BlockT
*headBlk
,
1196 //trueBlk could be the common post dominator
1200 errs() << "handleJumpintoIfImp head = BB" << headBlk
->getNumber()
1201 << " true = BB" << trueBlk
->getNumber()
1202 << ", numSucc=" << trueBlk
->succ_size()
1203 << " false = BB" << falseBlk
->getNumber() << "\n";
1208 errs() << "check down = BB" << downBlk
->getNumber();
1211 if (//postDomTree->dominates(downBlk, falseBlk) &&
1212 singlePathTo(falseBlk
, downBlk
) == SinglePath_InPath
) {
1214 errs() << " working\n";
1217 num
+= cloneOnSideEntryTo(headBlk
, trueBlk
, downBlk
);
1218 num
+= cloneOnSideEntryTo(headBlk
, falseBlk
, downBlk
);
1220 numClonedBlock
+= num
;
1221 num
+= serialPatternMatch(*headBlk
->succ_begin());
1222 num
+= serialPatternMatch(*(++headBlk
->succ_begin()));
1223 num
+= ifPatternMatch(headBlk
);
1229 errs() << " not working\n";
1231 downBlk
= (downBlk
->succ_size() == 1) ? (*downBlk
->succ_begin()) : NULL
;
1232 } // walk down the postDomTree
1235 } //handleJumpintoIf
1237 template<class PassT
>
1238 void CFGStructurizer
<PassT
>::showImproveSimpleJumpintoIf(BlockT
*headBlk
,
1243 errs() << "head = BB" << headBlk
->getNumber()
1244 << " size = " << headBlk
->size();
1247 headBlk
->print(errs());
1252 errs() << ", true = BB" << trueBlk
->getNumber() << " size = "
1253 << trueBlk
->size() << " numPred = " << trueBlk
->pred_size();
1256 trueBlk
->print(errs());
1261 errs() << ", false = BB" << falseBlk
->getNumber() << " size = "
1262 << falseBlk
->size() << " numPred = " << falseBlk
->pred_size();
1265 falseBlk
->print(errs());
1270 errs() << ", land = BB" << landBlk
->getNumber() << " size = "
1271 << landBlk
->size() << " numPred = " << landBlk
->pred_size();
1274 landBlk
->print(errs());
1280 } //showImproveSimpleJumpintoIf
1282 template<class PassT
>
1283 int CFGStructurizer
<PassT
>::improveSimpleJumpintoIf(BlockT
*headBlk
,
1286 BlockT
**plandBlk
) {
1287 bool migrateTrue
= false;
1288 bool migrateFalse
= false;
1290 BlockT
*landBlk
= *plandBlk
;
1292 assert((trueBlk
== NULL
|| trueBlk
->succ_size() <= 1)
1293 && (falseBlk
== NULL
|| falseBlk
->succ_size() <= 1));
1295 if (trueBlk
== falseBlk
) {
1301 errs() << "improveSimpleJumpintoIf: ";
1302 showImproveSimpleJumpintoIf(headBlk
, trueBlk
, falseBlk
, landBlk
, 0);
1306 // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1307 // May consider the # landBlk->pred_size() as it represents the number of
1308 // assignment initReg = .. needed to insert.
1309 migrateTrue
= needMigrateBlock(trueBlk
);
1310 migrateFalse
= needMigrateBlock(falseBlk
);
1312 if (!migrateTrue
&& !migrateFalse
) {
1316 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1317 // have more than one predecessors. without doing this, its predecessor
1318 // rather than headBlk will have undefined value in initReg.
1319 if (!migrateTrue
&& trueBlk
&& trueBlk
->pred_size() > 1) {
1322 if (!migrateFalse
&& falseBlk
&& falseBlk
->pred_size() > 1) {
1323 migrateFalse
= true;
1327 errs() << "before improveSimpleJumpintoIf: ";
1328 showImproveSimpleJumpintoIf(headBlk
, trueBlk
, falseBlk
, landBlk
, 0);
1329 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1332 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1334 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1335 // {initReg = 0; org falseBlk branch }
1336 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1338 // if landBlk->pred_size() > 2, put the about if-else inside
1339 // if (initReg !=2) {...}
1341 // add initReg = initVal to headBlk
1343 const TargetRegisterClass
* I32RC
= TRI
->getCFGStructurizerRegClass(MVT::i32
);
1345 funcRep
->getRegInfo().createVirtualRegister(I32RC
);
1346 if (!migrateTrue
|| !migrateFalse
) {
1347 int initVal
= migrateTrue
? 0 : 1;
1348 CFGTraits::insertAssignInstrBefore(headBlk
, passRep
, initReg
, initVal
);
1353 if (landBlk
== NULL
) {
1354 landBlk
= funcRep
->CreateMachineBasicBlock();
1355 funcRep
->push_back(landBlk
); //insert to function
1358 trueBlk
->addSuccessor(landBlk
);
1360 headBlk
->addSuccessor(landBlk
);
1364 falseBlk
->addSuccessor(landBlk
);
1366 headBlk
->addSuccessor(landBlk
);
1372 bool landBlkHasOtherPred
= (landBlk
->pred_size() > 2);
1374 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1375 typename
BlockT::iterator insertPos
=
1376 CFGTraits::getInstrPos
1377 (landBlk
, CFGTraits::insertInstrBefore(landBlk
, AMDGPU::ENDIF
, passRep
));
1379 if (landBlkHasOtherPred
) {
1381 funcRep
->getRegInfo().createVirtualRegister(I32RC
);
1382 CFGTraits::insertAssignInstrBefore(insertPos
, passRep
, immReg
, 2);
1383 unsigned cmpResReg
=
1384 funcRep
->getRegInfo().createVirtualRegister(I32RC
);
1386 CFGTraits::insertCompareInstrBefore(landBlk
, insertPos
, passRep
, cmpResReg
,
1388 CFGTraits::insertCondBranchBefore(landBlk
, insertPos
,
1389 AMDGPU::IF_LOGICALZ_i32
, passRep
,
1390 cmpResReg
, DebugLoc());
1393 CFGTraits::insertCondBranchBefore(landBlk
, insertPos
, AMDGPU::IF_LOGICALNZ_i32
,
1394 passRep
, initReg
, DebugLoc());
1397 migrateInstruction(trueBlk
, landBlk
, insertPos
);
1398 // need to uncondionally insert the assignment to ensure a path from its
1399 // predecessor rather than headBlk has valid value in initReg if
1401 CFGTraits::insertAssignInstrBefore(trueBlk
, passRep
, initReg
, 1);
1403 CFGTraits::insertInstrBefore(insertPos
, AMDGPU::ELSE
, passRep
);
1406 migrateInstruction(falseBlk
, landBlk
, insertPos
);
1407 // need to uncondionally insert the assignment to ensure a path from its
1408 // predecessor rather than headBlk has valid value in initReg if
1410 CFGTraits::insertAssignInstrBefore(falseBlk
, passRep
, initReg
, 0);
1412 //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1414 if (landBlkHasOtherPred
) {
1416 CFGTraits::insertInstrBefore(insertPos
, AMDGPU::ENDIF
, passRep
);
1418 // put initReg = 2 to other predecessors of landBlk
1419 for (typename
BlockT::pred_iterator predIter
= landBlk
->pred_begin(),
1420 predIterEnd
= landBlk
->pred_end(); predIter
!= predIterEnd
;
1422 BlockT
*curBlk
= *predIter
;
1423 if (curBlk
!= trueBlk
&& curBlk
!= falseBlk
) {
1424 CFGTraits::insertAssignInstrBefore(curBlk
, passRep
, initReg
, 2);
1429 errs() << "result from improveSimpleJumpintoIf: ";
1430 showImproveSimpleJumpintoIf(headBlk
, trueBlk
, falseBlk
, landBlk
, 0);
1431 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1435 *plandBlk
= landBlk
;
1438 } //improveSimpleJumpintoIf
1440 template<class PassT
>
1441 void CFGStructurizer
<PassT
>::handleLoopbreak(BlockT
*exitingBlk
,
1447 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop
)
1448 << " from loop-depth = " << getLoopDepth(exitingLoop
) << "\n";
1450 const TargetRegisterClass
* I32RC
= TRI
->getCFGStructurizerRegClass(MVT::i32
);
1452 RegiT initReg
= INVALIDREGNUM
;
1453 if (exitingLoop
!= exitLoop
) {
1454 initReg
= static_cast<int>
1455 (funcRep
->getRegInfo().createVirtualRegister(I32RC
));
1456 assert(initReg
!= INVALIDREGNUM
);
1457 addLoopBreakInitReg(exitLoop
, initReg
);
1458 while (exitingLoop
!= exitLoop
&& exitingLoop
) {
1459 addLoopBreakOnReg(exitingLoop
, initReg
);
1460 exitingLoop
= exitingLoop
->getParentLoop();
1462 assert(exitingLoop
== exitLoop
);
1465 mergeLoopbreakBlock(exitingBlk
, exitBlk
, landBlk
, initReg
);
1469 template<class PassT
>
1470 void CFGStructurizer
<PassT
>::handleLoopcontBlock(BlockT
*contingBlk
,
1475 errs() << "loopcontPattern cont = BB" << contingBlk
->getNumber()
1476 << " header = BB" << contBlk
->getNumber() << "\n";
1478 errs() << "Trying to continue loop-depth = "
1479 << getLoopDepth(contLoop
)
1480 << " from loop-depth = " << getLoopDepth(contingLoop
) << "\n";
1483 RegiT initReg
= INVALIDREGNUM
;
1484 const TargetRegisterClass
* I32RC
= TRI
->getCFGStructurizerRegClass(MVT::i32
);
1485 if (contingLoop
!= contLoop
) {
1486 initReg
= static_cast<int>
1487 (funcRep
->getRegInfo().createVirtualRegister(I32RC
));
1488 assert(initReg
!= INVALIDREGNUM
);
1489 addLoopContInitReg(contLoop
, initReg
);
1490 while (contingLoop
&& contingLoop
->getParentLoop() != contLoop
) {
1491 addLoopBreakOnReg(contingLoop
, initReg
); //not addLoopContOnReg
1492 contingLoop
= contingLoop
->getParentLoop();
1494 assert(contingLoop
&& contingLoop
->getParentLoop() == contLoop
);
1495 addLoopContOnReg(contingLoop
, initReg
);
1498 settleLoopcontBlock(contingBlk
, contBlk
, initReg
);
1499 //contingBlk->removeSuccessor(loopHeader);
1500 } //handleLoopcontBlock
1502 template<class PassT
>
1503 void CFGStructurizer
<PassT
>::mergeSerialBlock(BlockT
*dstBlk
, BlockT
*srcBlk
) {
1505 errs() << "serialPattern BB" << dstBlk
->getNumber()
1506 << " <= BB" << srcBlk
->getNumber() << "\n";
1508 //removeUnconditionalBranch(dstBlk);
1509 dstBlk
->splice(dstBlk
->end(), srcBlk
, FirstNonDebugInstr(srcBlk
), srcBlk
->end());
1511 dstBlk
->removeSuccessor(srcBlk
);
1512 CFGTraits::cloneSuccessorList(dstBlk
, srcBlk
);
1514 removeSuccessor(srcBlk
);
1515 retireBlock(dstBlk
, srcBlk
);
1516 } //mergeSerialBlock
1518 template<class PassT
>
1519 void CFGStructurizer
<PassT
>::mergeIfthenelseBlock(InstrT
*branchInstr
,
1525 errs() << "ifPattern BB" << curBlk
->getNumber();
1528 errs() << "BB" << trueBlk
->getNumber();
1530 errs() << " } else ";
1533 errs() << "BB" << falseBlk
->getNumber();
1536 errs() << "landBlock: ";
1537 if (landBlk
== NULL
) {
1540 errs() << "BB" << landBlk
->getNumber();
1545 int oldOpcode
= branchInstr
->getOpcode();
1546 DebugLoc branchDL
= branchInstr
->getDebugLoc();
1556 typename
BlockT::iterator branchInstrPos
=
1557 CFGTraits::getInstrPos(curBlk
, branchInstr
);
1558 CFGTraits::insertCondBranchBefore(branchInstrPos
,
1559 CFGTraits::getBranchNzeroOpcode(oldOpcode
),
1564 curBlk
->splice(branchInstrPos
, trueBlk
, FirstNonDebugInstr(trueBlk
), trueBlk
->end());
1565 curBlk
->removeSuccessor(trueBlk
);
1566 if (landBlk
&& trueBlk
->succ_size()!=0) {
1567 trueBlk
->removeSuccessor(landBlk
);
1569 retireBlock(curBlk
, trueBlk
);
1571 CFGTraits::insertInstrBefore(branchInstrPos
, AMDGPU::ELSE
, passRep
);
1574 curBlk
->splice(branchInstrPos
, falseBlk
, FirstNonDebugInstr(falseBlk
),
1576 curBlk
->removeSuccessor(falseBlk
);
1577 if (landBlk
&& falseBlk
->succ_size() != 0) {
1578 falseBlk
->removeSuccessor(landBlk
);
1580 retireBlock(curBlk
, falseBlk
);
1582 CFGTraits::insertInstrBefore(branchInstrPos
, AMDGPU::ENDIF
, passRep
);
1584 //curBlk->remove(branchInstrPos);
1585 branchInstr
->eraseFromParent();
1587 if (landBlk
&& trueBlk
&& falseBlk
) {
1588 curBlk
->addSuccessor(landBlk
);
1591 } //mergeIfthenelseBlock
1593 template<class PassT
>
1594 void CFGStructurizer
<PassT
>::mergeLooplandBlock(BlockT
*dstBlk
,
1595 LoopLandInfo
*loopLand
) {
1596 BlockT
*landBlk
= loopLand
->landBlk
;
1599 errs() << "loopPattern header = BB" << dstBlk
->getNumber()
1600 << " land = BB" << landBlk
->getNumber() << "\n";
1603 // Loop contInitRegs are init at the beginning of the loop.
1604 for (typename
std::set
<RegiT
>::const_iterator iter
=
1605 loopLand
->contInitRegs
.begin(),
1606 iterEnd
= loopLand
->contInitRegs
.end(); iter
!= iterEnd
; ++iter
) {
1607 CFGTraits::insertAssignInstrBefore(dstBlk
, passRep
, *iter
, 0);
1610 /* we last inserterd the DebugLoc in the
1611 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1612 * search for the DebugLoc in the that statement.
1613 * if not found, we have to insert the empty/default DebugLoc */
1614 InstrT
*loopBreakInstr
= CFGTraits::getLoopBreakInstr(dstBlk
);
1615 DebugLoc DLBreak
= (loopBreakInstr
) ? loopBreakInstr
->getDebugLoc() : DebugLoc();
1617 CFGTraits::insertInstrBefore(dstBlk
, AMDGPU::WHILELOOP
, passRep
, DLBreak
);
1618 // Loop breakInitRegs are init before entering the loop.
1619 for (typename
std::set
<RegiT
>::const_iterator iter
=
1620 loopLand
->breakInitRegs
.begin(),
1621 iterEnd
= loopLand
->breakInitRegs
.end(); iter
!= iterEnd
; ++iter
)
1623 CFGTraits::insertAssignInstrBefore(dstBlk
, passRep
, *iter
, 0);
1625 // Loop endbranchInitRegs are init before entering the loop.
1626 for (typename
std::set
<RegiT
>::const_iterator iter
=
1627 loopLand
->endbranchInitRegs
.begin(),
1628 iterEnd
= loopLand
->endbranchInitRegs
.end(); iter
!= iterEnd
; ++iter
) {
1629 CFGTraits::insertAssignInstrBefore(dstBlk
, passRep
, *iter
, 0);
1632 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1633 * search for the DebugLoc in the continue statement.
1634 * if not found, we have to insert the empty/default DebugLoc */
1635 InstrT
*continueInstr
= CFGTraits::getContinueInstr(dstBlk
);
1636 DebugLoc DLContinue
= (continueInstr
) ? continueInstr
->getDebugLoc() : DebugLoc();
1638 CFGTraits::insertInstrEnd(dstBlk
, AMDGPU::ENDLOOP
, passRep
, DLContinue
);
1639 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1641 for (typename
std::set
<RegiT
>::const_iterator iter
=
1642 loopLand
->breakOnRegs
.begin(),
1643 iterEnd
= loopLand
->breakOnRegs
.end(); iter
!= iterEnd
; ++iter
) {
1644 CFGTraits::insertCondBranchEnd(dstBlk
, AMDGPU::BREAK_LOGICALNZ_i32
, passRep
,
1648 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1650 for (std::set
<RegiT
>::const_iterator iter
= loopLand
->contOnRegs
.begin(),
1651 iterEnd
= loopLand
->contOnRegs
.end(); iter
!= iterEnd
; ++iter
) {
1652 CFGTraits::insertCondBranchEnd(dstBlk
, AMDGPU::CONTINUE_LOGICALNZ_i32
,
1656 dstBlk
->splice(dstBlk
->end(), landBlk
, landBlk
->begin(), landBlk
->end());
1658 for (typename
BlockT::succ_iterator iter
= landBlk
->succ_begin(),
1659 iterEnd
= landBlk
->succ_end(); iter
!= iterEnd
; ++iter
) {
1660 dstBlk
->addSuccessor(*iter
); // *iter's predecessor is also taken care of.
1663 removeSuccessor(landBlk
);
1664 retireBlock(dstBlk
, landBlk
);
1665 } //mergeLooplandBlock
1667 template<class PassT
>
1668 void CFGStructurizer
<PassT
>::reversePredicateSetter(typename
BlockT::iterator I
)
1671 if (I
->getOpcode() == AMDGPU::PRED_X
) {
1672 switch (static_cast<MachineInstr
*>(I
)->getOperand(2).getImm()) {
1673 case OPCODE_IS_ZERO_INT
:
1674 static_cast<MachineInstr
*>(I
)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT
);
1676 case OPCODE_IS_NOT_ZERO_INT
:
1677 static_cast<MachineInstr
*>(I
)->getOperand(2).setImm(OPCODE_IS_ZERO_INT
);
1679 case OPCODE_IS_ZERO
:
1680 static_cast<MachineInstr
*>(I
)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO
);
1682 case OPCODE_IS_NOT_ZERO
:
1683 static_cast<MachineInstr
*>(I
)->getOperand(2).setImm(OPCODE_IS_ZERO
);
1686 assert(0 && "PRED_X Opcode invalid!");
1692 template<class PassT
>
1693 void CFGStructurizer
<PassT
>::mergeLoopbreakBlock(BlockT
*exitingBlk
,
1695 BlockT
*exitLandBlk
,
1698 errs() << "loopbreakPattern exiting = BB" << exitingBlk
->getNumber()
1699 << " exit = BB" << exitBlk
->getNumber()
1700 << " land = BB" << exitLandBlk
->getNumber() << "\n";
1703 InstrT
*branchInstr
= CFGTraits::getLoopendBlockBranchInstr(exitingBlk
);
1704 assert(branchInstr
&& CFGTraits::isCondBranch(branchInstr
));
1706 DebugLoc DL
= branchInstr
->getDebugLoc();
1708 BlockT
*trueBranch
= CFGTraits::getTrueBranch(branchInstr
);
1709 int oldOpcode
= branchInstr
->getOpcode();
1711 // transform exitingBlk to
1713 // exitBlk (if exitBlk != exitLandBlk)
1717 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1719 typename
BlockT::iterator branchInstrPos
=
1720 CFGTraits::getInstrPos(exitingBlk
, branchInstr
);
1722 if (exitBlk
== exitLandBlk
&& setReg
== INVALIDREGNUM
) {
1725 if (trueBranch
!= exitBlk
) {
1726 reversePredicateSetter(branchInstrPos
);
1728 int newOpcode
= CFGTraits::getBreakZeroOpcode(oldOpcode
);
1729 CFGTraits::insertCondBranchBefore(branchInstrPos
, newOpcode
, passRep
, DL
);
1731 if (trueBranch
!= exitBlk
) {
1732 reversePredicateSetter(branchInstr
);
1734 int newOpcode
= CFGTraits::getBreakZeroOpcode(oldOpcode
);
1735 CFGTraits::insertCondBranchBefore(branchInstrPos
, newOpcode
, passRep
, DL
);
1736 if (exitBlk
!= exitLandBlk
) {
1737 //splice is insert-before ...
1738 exitingBlk
->splice(branchInstrPos
, exitBlk
, exitBlk
->begin(),
1741 if (setReg
!= INVALIDREGNUM
) {
1742 CFGTraits::insertAssignInstrBefore(branchInstrPos
, passRep
, setReg
, 1);
1744 CFGTraits::insertInstrBefore(branchInstrPos
, AMDGPU::BREAK
, passRep
);
1745 CFGTraits::insertInstrBefore(branchInstrPos
, AMDGPU::ENDIF
, passRep
);
1748 //now branchInst can be erase safely
1749 //exitingBlk->eraseFromParent(branchInstr);
1750 branchInstr
->eraseFromParent();
1752 //now take care of successors, retire blocks
1753 exitingBlk
->removeSuccessor(exitBlk
);
1754 if (exitBlk
!= exitLandBlk
) {
1755 //splice is insert-before ...
1756 exitBlk
->removeSuccessor(exitLandBlk
);
1757 retireBlock(exitingBlk
, exitBlk
);
1760 } //mergeLoopbreakBlock
1762 template<class PassT
>
1763 void CFGStructurizer
<PassT
>::settleLoopcontBlock(BlockT
*contingBlk
,
1767 errs() << "settleLoopcontBlock conting = BB"
1768 << contingBlk
->getNumber()
1769 << ", cont = BB" << contBlk
->getNumber() << "\n";
1772 InstrT
*branchInstr
= CFGTraits::getLoopendBlockBranchInstr(contingBlk
);
1774 assert(CFGTraits::isCondBranch(branchInstr
));
1775 typename
BlockT::iterator branchInstrPos
=
1776 CFGTraits::getInstrPos(contingBlk
, branchInstr
);
1777 BlockT
*trueBranch
= CFGTraits::getTrueBranch(branchInstr
);
1778 int oldOpcode
= branchInstr
->getOpcode();
1779 DebugLoc DL
= branchInstr
->getDebugLoc();
1781 // transform contingBlk to
1783 // move instr after branchInstr
1789 // successor = {orgSuccessor(contingBlk) - loopHeader}
1791 bool useContinueLogical
=
1792 (setReg
== INVALIDREGNUM
&& (&*contingBlk
->rbegin()) == branchInstr
);
1794 if (useContinueLogical
== false)
1797 trueBranch
== contBlk
? CFGTraits::getBranchNzeroOpcode(oldOpcode
)
1798 : CFGTraits::getBranchZeroOpcode(oldOpcode
);
1800 CFGTraits::insertCondBranchBefore(branchInstrPos
, branchOpcode
, passRep
, DL
);
1802 if (setReg
!= INVALIDREGNUM
) {
1803 CFGTraits::insertAssignInstrBefore(branchInstrPos
, passRep
, setReg
, 1);
1804 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1805 CFGTraits::insertInstrEnd(contingBlk
, AMDGPU::BREAK
, passRep
, DL
);
1807 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1808 CFGTraits::insertInstrEnd(contingBlk
, AMDGPU::CONTINUE
, passRep
, DL
);
1811 CFGTraits::insertInstrEnd(contingBlk
, AMDGPU::ENDIF
, passRep
, DL
);
1814 trueBranch
== contBlk
? CFGTraits::getContinueNzeroOpcode(oldOpcode
)
1815 : CFGTraits::getContinueZeroOpcode(oldOpcode
);
1817 CFGTraits::insertCondBranchBefore(branchInstrPos
, branchOpcode
, passRep
, DL
);
1820 //contingBlk->eraseFromParent(branchInstr);
1821 branchInstr
->eraseFromParent();
1823 /* if we've arrived here then we've already erased the branch instruction
1824 * travel back up the basic block to see the last reference of our debug location
1825 * we've just inserted that reference here so it should be representative */
1826 if (setReg
!= INVALIDREGNUM
) {
1827 CFGTraits::insertAssignInstrBefore(contingBlk
, passRep
, setReg
, 1);
1828 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1829 CFGTraits::insertInstrEnd(contingBlk
, AMDGPU::BREAK
, passRep
, CFGTraits::getLastDebugLocInBB(contingBlk
));
1831 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1832 CFGTraits::insertInstrEnd(contingBlk
, AMDGPU::CONTINUE
, passRep
, CFGTraits::getLastDebugLocInBB(contingBlk
));
1836 } //settleLoopcontBlock
1838 // BBs in exitBlkSet are determined as in break-path for loopRep,
1839 // before we can put code for BBs as inside loop-body for loopRep
1840 // check whether those BBs are determined as cont-BB for parentLoopRep
1842 // If so, generate a new BB newBlk
1843 // (1) set newBlk common successor of BBs in exitBlkSet
1844 // (2) change the continue-instr in BBs in exitBlkSet to break-instr
1845 // (3) generate continue-instr in newBlk
1847 template<class PassT
>
1848 typename CFGStructurizer
<PassT
>::BlockT
*
1849 CFGStructurizer
<PassT
>::relocateLoopcontBlock(LoopT
*parentLoopRep
,
1851 std::set
<BlockT
*> &exitBlkSet
,
1852 BlockT
*exitLandBlk
) {
1853 std::set
<BlockT
*> endBlkSet
;
1855 // BlockT *parentLoopHead = parentLoopRep->getHeader();
1858 for (typename
std::set
<BlockT
*>::const_iterator iter
= exitBlkSet
.begin(),
1859 iterEnd
= exitBlkSet
.end();
1860 iter
!= iterEnd
; ++iter
) {
1861 BlockT
*exitBlk
= *iter
;
1862 BlockT
*endBlk
= singlePathEnd(exitBlk
, exitLandBlk
);
1864 if (endBlk
== NULL
|| CFGTraits::getContinueInstr(endBlk
) == NULL
)
1867 endBlkSet
.insert(endBlk
);
1870 BlockT
*newBlk
= funcRep
->CreateMachineBasicBlock();
1871 funcRep
->push_back(newBlk
); //insert to function
1872 CFGTraits::insertInstrEnd(newBlk
, AMDGPU::CONTINUE
, passRep
);
1873 SHOWNEWBLK(newBlk
, "New continue block: ");
1875 for (typename
std::set
<BlockT
*>::const_iterator iter
= endBlkSet
.begin(),
1876 iterEnd
= endBlkSet
.end();
1877 iter
!= iterEnd
; ++iter
) {
1878 BlockT
*endBlk
= *iter
;
1879 InstrT
*contInstr
= CFGTraits::getContinueInstr(endBlk
);
1881 contInstr
->eraseFromParent();
1883 endBlk
->addSuccessor(newBlk
);
1885 errs() << "Add new continue Block to BB"
1886 << endBlk
->getNumber() << " successors\n";
1891 } //relocateLoopcontBlock
1894 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1895 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
1896 // pathes corresponding to the loop exiting branches.
1898 template<class PassT
>
1899 typename CFGStructurizer
<PassT
>::BlockT
*
1900 CFGStructurizer
<PassT
>::addLoopEndbranchBlock(LoopT
*loopRep
,
1901 BlockTSmallerVector
&exitingBlks
,
1902 BlockTSmallerVector
&exitBlks
) {
1903 const AMDGPUInstrInfo
*tii
=
1904 static_cast<const AMDGPUInstrInfo
*>(passRep
->getTargetInstrInfo());
1905 const TargetRegisterClass
* I32RC
= TRI
->getCFGStructurizerRegClass(MVT::i32
);
1907 RegiT endBranchReg
= static_cast<int>
1908 (funcRep
->getRegInfo().createVirtualRegister(I32RC
));
1909 assert(endBranchReg
>= 0);
1911 // reg = 0 before entering the loop
1912 addLoopEndbranchInitReg(loopRep
, endBranchReg
);
1914 uint32_t numBlks
= static_cast<uint32_t>(exitingBlks
.size());
1915 assert(numBlks
>=2 && numBlks
== exitBlks
.size());
1917 BlockT
*preExitingBlk
= exitingBlks
[0];
1918 BlockT
*preExitBlk
= exitBlks
[0];
1919 BlockT
*preBranchBlk
= funcRep
->CreateMachineBasicBlock();
1920 funcRep
->push_back(preBranchBlk
); //insert to function
1921 SHOWNEWBLK(preBranchBlk
, "New loopEndbranch block: ");
1923 BlockT
*newLandBlk
= preBranchBlk
;
1925 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk
, preExitBlk
,
1927 preExitingBlk
->removeSuccessor(preExitBlk
);
1928 preExitingBlk
->addSuccessor(newLandBlk
);
1930 //it is redundant to add reg = 0 to exitingBlks[0]
1932 // For 1..n th exiting path (the last iteration handles two pathes) create the
1933 // branch to the previous path and the current path.
1934 for (uint32_t i
= 1; i
< numBlks
; ++i
) {
1935 BlockT
*curExitingBlk
= exitingBlks
[i
];
1936 BlockT
*curExitBlk
= exitBlks
[i
];
1937 BlockT
*curBranchBlk
;
1939 if (i
== numBlks
- 1) {
1940 curBranchBlk
= curExitBlk
;
1942 curBranchBlk
= funcRep
->CreateMachineBasicBlock();
1943 funcRep
->push_back(curBranchBlk
); //insert to function
1944 SHOWNEWBLK(curBranchBlk
, "New loopEndbranch block: ");
1947 // Add reg = i to exitingBlks[i].
1948 CFGTraits::insertAssignInstrBefore(curExitingBlk
, passRep
,
1951 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1952 // (exitingBlks[i], newLandBlk).
1953 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk
, curExitBlk
,
1955 curExitingBlk
->removeSuccessor(curExitBlk
);
1956 curExitingBlk
->addSuccessor(newLandBlk
);
1958 // add to preBranchBlk the branch instruction:
1959 // if (endBranchReg == preVal)
1964 // preValReg = i - 1
1967 RegiT preValReg
= static_cast<int>
1968 (funcRep
->getRegInfo().createVirtualRegister(I32RC
));
1970 preBranchBlk
->insert(preBranchBlk
->begin(),
1971 tii
->getMovImmInstr(preBranchBlk
->getParent(), preValReg
,
1974 // condResReg = (endBranchReg == preValReg)
1975 RegiT condResReg
= static_cast<int>
1976 (funcRep
->getRegInfo().createVirtualRegister(I32RC
));
1977 BuildMI(preBranchBlk
, DL
, tii
->get(tii
->getIEQOpcode()), condResReg
)
1978 .addReg(endBranchReg
).addReg(preValReg
);
1980 BuildMI(preBranchBlk
, DL
, tii
->get(AMDGPU::BRANCH_COND_i32
))
1981 .addMBB(preExitBlk
).addReg(condResReg
);
1983 preBranchBlk
->addSuccessor(preExitBlk
);
1984 preBranchBlk
->addSuccessor(curBranchBlk
);
1986 // Update preExitingBlk, preExitBlk, preBranchBlk.
1987 preExitingBlk
= curExitingBlk
;
1988 preExitBlk
= curExitBlk
;
1989 preBranchBlk
= curBranchBlk
;
1991 } //end for 1 .. n blocks
1994 } //addLoopEndbranchBlock
1996 template<class PassT
>
1997 typename CFGStructurizer
<PassT
>::PathToKind
1998 CFGStructurizer
<PassT
>::singlePathTo(BlockT
*srcBlk
, BlockT
*dstBlk
,
1999 bool allowSideEntry
) {
2002 if (srcBlk
== dstBlk
) {
2003 return SinglePath_InPath
;
2006 while (srcBlk
&& srcBlk
->succ_size() == 1) {
2007 srcBlk
= *srcBlk
->succ_begin();
2008 if (srcBlk
== dstBlk
) {
2009 return SinglePath_InPath
;
2012 if (!allowSideEntry
&& srcBlk
->pred_size() > 1) {
2013 return Not_SinglePath
;
2017 if (srcBlk
&& srcBlk
->succ_size()==0) {
2018 return SinglePath_NotInPath
;
2021 return Not_SinglePath
;
2024 // If there is a single path from srcBlk to dstBlk, return the last block before
2025 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
2026 // last block in the path Otherwise, return NULL
2027 template<class PassT
>
2028 typename CFGStructurizer
<PassT
>::BlockT
*
2029 CFGStructurizer
<PassT
>::singlePathEnd(BlockT
*srcBlk
, BlockT
*dstBlk
,
2030 bool allowSideEntry
) {
2033 if (srcBlk
== dstBlk
) {
2037 if (srcBlk
->succ_size() == 0) {
2041 while (srcBlk
&& srcBlk
->succ_size() == 1) {
2042 BlockT
*preBlk
= srcBlk
;
2044 srcBlk
= *srcBlk
->succ_begin();
2045 if (srcBlk
== NULL
) {
2049 if (!allowSideEntry
&& srcBlk
->pred_size() > 1) {
2054 if (srcBlk
&& srcBlk
->succ_size()==0) {
2062 template<class PassT
>
2063 int CFGStructurizer
<PassT
>::cloneOnSideEntryTo(BlockT
*preBlk
, BlockT
*srcBlk
,
2066 assert(preBlk
->isSuccessor(srcBlk
));
2067 while (srcBlk
&& srcBlk
!= dstBlk
) {
2068 assert(srcBlk
->succ_size() == 1);
2069 if (srcBlk
->pred_size() > 1) {
2070 srcBlk
= cloneBlockForPredecessor(srcBlk
, preBlk
);
2075 srcBlk
= *srcBlk
->succ_begin();
2079 } //cloneOnSideEntryTo
2081 template<class PassT
>
2082 typename CFGStructurizer
<PassT
>::BlockT
*
2083 CFGStructurizer
<PassT
>::cloneBlockForPredecessor(BlockT
*curBlk
,
2085 assert(predBlk
->isSuccessor(curBlk
) &&
2086 "succBlk is not a prececessor of curBlk");
2088 BlockT
*cloneBlk
= CFGTraits::clone(curBlk
); //clone instructions
2089 CFGTraits::replaceInstrUseOfBlockWith(predBlk
, curBlk
, cloneBlk
);
2090 //srcBlk, oldBlk, newBlk
2092 predBlk
->removeSuccessor(curBlk
);
2093 predBlk
->addSuccessor(cloneBlk
);
2095 // add all successor to cloneBlk
2096 CFGTraits::cloneSuccessorList(cloneBlk
, curBlk
);
2098 numClonedInstr
+= curBlk
->size();
2101 errs() << "Cloned block: " << "BB"
2102 << curBlk
->getNumber() << "size " << curBlk
->size() << "\n";
2105 SHOWNEWBLK(cloneBlk
, "result of Cloned block: ");
2108 } //cloneBlockForPredecessor
2110 template<class PassT
>
2111 typename CFGStructurizer
<PassT
>::BlockT
*
2112 CFGStructurizer
<PassT
>::exitingBlock2ExitBlock(LoopT
*loopRep
,
2113 BlockT
*exitingBlk
) {
2114 BlockT
*exitBlk
= NULL
;
2116 for (typename
BlockT::succ_iterator iterSucc
= exitingBlk
->succ_begin(),
2117 iterSuccEnd
= exitingBlk
->succ_end();
2118 iterSucc
!= iterSuccEnd
; ++iterSucc
) {
2119 BlockT
*curBlk
= *iterSucc
;
2120 if (!loopRep
->contains(curBlk
)) {
2121 assert(exitBlk
== NULL
);
2126 assert(exitBlk
!= NULL
);
2129 } //exitingBlock2ExitBlock
2131 template<class PassT
>
2132 void CFGStructurizer
<PassT
>::migrateInstruction(BlockT
*srcBlk
,
2134 InstrIterator insertPos
) {
2135 InstrIterator spliceEnd
;
2136 //look for the input branchinstr, not the AMDGPU branchinstr
2137 InstrT
*branchInstr
= CFGTraits::getNormalBlockBranchInstr(srcBlk
);
2138 if (branchInstr
== NULL
) {
2140 errs() << "migrateInstruction don't see branch instr\n" ;
2142 spliceEnd
= srcBlk
->end();
2145 errs() << "migrateInstruction see branch instr\n" ;
2146 branchInstr
->dump();
2148 spliceEnd
= CFGTraits::getInstrPos(srcBlk
, branchInstr
);
2151 errs() << "migrateInstruction before splice dstSize = " << dstBlk
->size()
2152 << "srcSize = " << srcBlk
->size() << "\n";
2155 //splice insert before insertPos
2156 dstBlk
->splice(insertPos
, srcBlk
, srcBlk
->begin(), spliceEnd
);
2159 errs() << "migrateInstruction after splice dstSize = " << dstBlk
->size()
2160 << "srcSize = " << srcBlk
->size() << "\n";
2162 } //migrateInstruction
2164 // normalizeInfiniteLoopExit change
2166 // uncond_br LoopHeader
2170 // cond_br 1 LoopHeader dummyExit
2171 // and return the newly added dummy exit block
2173 template<class PassT
>
2174 typename CFGStructurizer
<PassT
>::BlockT
*
2175 CFGStructurizer
<PassT
>::normalizeInfiniteLoopExit(LoopT
* LoopRep
) {
2178 loopHeader
= LoopRep
->getHeader();
2179 loopLatch
= LoopRep
->getLoopLatch();
2180 BlockT
*dummyExitBlk
= NULL
;
2181 const TargetRegisterClass
* I32RC
= TRI
->getCFGStructurizerRegClass(MVT::i32
);
2182 if (loopHeader
!=NULL
&& loopLatch
!=NULL
) {
2183 InstrT
*branchInstr
= CFGTraits::getLoopendBlockBranchInstr(loopLatch
);
2184 if (branchInstr
!=NULL
&& CFGTraits::isUncondBranch(branchInstr
)) {
2185 dummyExitBlk
= funcRep
->CreateMachineBasicBlock();
2186 funcRep
->push_back(dummyExitBlk
); //insert to function
2187 SHOWNEWBLK(dummyExitBlk
, "DummyExitBlock to normalize infiniteLoop: ");
2189 if (DEBUGME
) errs() << "Old branch instr: " << *branchInstr
<< "\n";
2191 typename
BlockT::iterator insertPos
=
2192 CFGTraits::getInstrPos(loopLatch
, branchInstr
);
2194 funcRep
->getRegInfo().createVirtualRegister(I32RC
);
2195 CFGTraits::insertAssignInstrBefore(insertPos
, passRep
, immReg
, 1);
2197 CFGTraits::insertInstrBefore(insertPos
, AMDGPU::BRANCH_COND_i32
, passRep
);
2198 MachineInstrBuilder(newInstr
).addMBB(loopHeader
).addReg(immReg
, false);
2200 SHOWNEWINSTR(newInstr
);
2202 branchInstr
->eraseFromParent();
2203 loopLatch
->addSuccessor(dummyExitBlk
);
2207 return dummyExitBlk
;
2208 } //normalizeInfiniteLoopExit
2210 template<class PassT
>
2211 void CFGStructurizer
<PassT
>::removeUnconditionalBranch(BlockT
*srcBlk
) {
2212 InstrT
*branchInstr
;
2214 // I saw two unconditional branch in one basic block in example
2215 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2216 while ((branchInstr
= CFGTraits::getLoopendBlockBranchInstr(srcBlk
))
2217 && CFGTraits::isUncondBranch(branchInstr
)) {
2219 errs() << "Removing unconditional branch instruction" ;
2220 branchInstr
->dump();
2222 branchInstr
->eraseFromParent();
2224 } //removeUnconditionalBranch
2226 template<class PassT
>
2227 void CFGStructurizer
<PassT
>::removeRedundantConditionalBranch(BlockT
*srcBlk
) {
2228 if (srcBlk
->succ_size() == 2) {
2229 BlockT
*blk1
= *srcBlk
->succ_begin();
2230 BlockT
*blk2
= *(++srcBlk
->succ_begin());
2233 InstrT
*branchInstr
= CFGTraits::getNormalBlockBranchInstr(srcBlk
);
2234 assert(branchInstr
&& CFGTraits::isCondBranch(branchInstr
));
2236 errs() << "Removing unneeded conditional branch instruction" ;
2237 branchInstr
->dump();
2239 branchInstr
->eraseFromParent();
2240 SHOWNEWBLK(blk1
, "Removing redundant successor");
2241 srcBlk
->removeSuccessor(blk1
);
2244 } //removeRedundantConditionalBranch
2246 template<class PassT
>
2247 void CFGStructurizer
<PassT
>::addDummyExitBlock(SmallVector
<BlockT
*,
2248 DEFAULT_VEC_SLOTS
> &retBlks
) {
2249 BlockT
*dummyExitBlk
= funcRep
->CreateMachineBasicBlock();
2250 funcRep
->push_back(dummyExitBlk
); //insert to function
2251 CFGTraits::insertInstrEnd(dummyExitBlk
, AMDGPU::RETURN
, passRep
);
2253 for (typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::iterator iter
=
2255 iterEnd
= retBlks
.end(); iter
!= iterEnd
; ++iter
) {
2256 BlockT
*curBlk
= *iter
;
2257 InstrT
*curInstr
= CFGTraits::getReturnInstr(curBlk
);
2259 curInstr
->eraseFromParent();
2262 if (curBlk
->size()==0 && curBlk
->pred_size() == 1) {
2264 errs() << "Replace empty block BB" << curBlk
->getNumber()
2265 << " with dummyExitBlock\n";
2267 BlockT
*predb
= *curBlk
->pred_begin();
2268 predb
->removeSuccessor(curBlk
);
2270 } //handle empty curBlk
2272 curBlk
->addSuccessor(dummyExitBlk
);
2274 errs() << "Add dummyExitBlock to BB" << curBlk
->getNumber()
2279 SHOWNEWBLK(dummyExitBlk
, "DummyExitBlock: ");
2280 } //addDummyExitBlock
2282 template<class PassT
>
2283 void CFGStructurizer
<PassT
>::removeSuccessor(BlockT
*srcBlk
) {
2284 while (srcBlk
->succ_size()) {
2285 srcBlk
->removeSuccessor(*srcBlk
->succ_begin());
2289 template<class PassT
>
2290 void CFGStructurizer
<PassT
>::recordSccnum(BlockT
*srcBlk
, int sccNum
) {
2291 BlockInfo
*&srcBlkInfo
= blockInfoMap
[srcBlk
];
2293 if (srcBlkInfo
== NULL
) {
2294 srcBlkInfo
= new BlockInfo();
2297 srcBlkInfo
->sccNum
= sccNum
;
2300 template<class PassT
>
2301 int CFGStructurizer
<PassT
>::getSCCNum(BlockT
*srcBlk
) {
2302 BlockInfo
*srcBlkInfo
= blockInfoMap
[srcBlk
];
2303 return srcBlkInfo
? srcBlkInfo
->sccNum
: INVALIDSCCNUM
;
2306 template<class PassT
>
2307 void CFGStructurizer
<PassT
>::retireBlock(BlockT
*dstBlk
, BlockT
*srcBlk
) {
2309 errs() << "Retiring BB" << srcBlk
->getNumber() << "\n";
2312 BlockInfo
*&srcBlkInfo
= blockInfoMap
[srcBlk
];
2314 if (srcBlkInfo
== NULL
) {
2315 srcBlkInfo
= new BlockInfo();
2318 srcBlkInfo
->isRetired
= true;
2319 //int i = srcBlk->succ_size();
2320 //int j = srcBlk->pred_size();
2321 assert(srcBlk
->succ_size() == 0 && srcBlk
->pred_size() == 0
2322 && "can't retire block yet");
2325 template<class PassT
>
2326 bool CFGStructurizer
<PassT
>::isRetiredBlock(BlockT
*srcBlk
) {
2327 BlockInfo
*srcBlkInfo
= blockInfoMap
[srcBlk
];
2328 return (srcBlkInfo
&& srcBlkInfo
->isRetired
);
2331 template<class PassT
>
2332 bool CFGStructurizer
<PassT
>::isActiveLoophead(BlockT
*curBlk
) {
2333 LoopT
*loopRep
= loopInfo
->getLoopFor(curBlk
);
2334 while (loopRep
&& loopRep
->getHeader() == curBlk
) {
2335 LoopLandInfo
*loopLand
= getLoopLandInfo(loopRep
);
2337 if(loopLand
== NULL
)
2340 BlockT
*landBlk
= loopLand
->landBlk
;
2342 if (!isRetiredBlock(landBlk
)) {
2346 loopRep
= loopRep
->getParentLoop();
2350 } //isActiveLoophead
2352 template<class PassT
>
2353 bool CFGStructurizer
<PassT
>::needMigrateBlock(BlockT
*blk
) {
2354 const unsigned blockSizeThreshold
= 30;
2355 const unsigned cloneInstrThreshold
= 100;
2357 bool multiplePreds
= blk
&& (blk
->pred_size() > 1);
2362 unsigned blkSize
= blk
->size();
2363 return ((blkSize
> blockSizeThreshold
)
2364 && (blkSize
* (blk
->pred_size() - 1) > cloneInstrThreshold
));
2365 } //needMigrateBlock
2367 template<class PassT
>
2368 typename CFGStructurizer
<PassT
>::BlockT
*
2369 CFGStructurizer
<PassT
>::recordLoopLandBlock(LoopT
*loopRep
, BlockT
*landBlk
,
2370 BlockTSmallerVector
&exitBlks
,
2371 std::set
<BlockT
*> &exitBlkSet
) {
2372 SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
> inpathBlks
; //in exit path blocks
2374 for (typename
BlockT::pred_iterator predIter
= landBlk
->pred_begin(),
2375 predIterEnd
= landBlk
->pred_end();
2376 predIter
!= predIterEnd
; ++predIter
) {
2377 BlockT
*curBlk
= *predIter
;
2378 if (loopRep
->contains(curBlk
) || exitBlkSet
.count(curBlk
)) {
2379 inpathBlks
.push_back(curBlk
);
2383 //if landBlk has predecessors that are not in the given loop,
2384 //create a new block
2385 BlockT
*newLandBlk
= landBlk
;
2386 if (inpathBlks
.size() != landBlk
->pred_size()) {
2387 newLandBlk
= funcRep
->CreateMachineBasicBlock();
2388 funcRep
->push_back(newLandBlk
); //insert to function
2389 newLandBlk
->addSuccessor(landBlk
);
2390 for (typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::iterator iter
=
2392 iterEnd
= inpathBlks
.end(); iter
!= iterEnd
; ++iter
) {
2393 BlockT
*curBlk
= *iter
;
2394 CFGTraits::replaceInstrUseOfBlockWith(curBlk
, landBlk
, newLandBlk
);
2395 //srcBlk, oldBlk, newBlk
2396 curBlk
->removeSuccessor(landBlk
);
2397 curBlk
->addSuccessor(newLandBlk
);
2399 for (size_t i
= 0, tot
= exitBlks
.size(); i
< tot
; ++i
) {
2400 if (exitBlks
[i
] == landBlk
) {
2401 exitBlks
[i
] = newLandBlk
;
2404 SHOWNEWBLK(newLandBlk
, "NewLandingBlock: ");
2407 setLoopLandBlock(loopRep
, newLandBlk
);
2410 } // recordLoopbreakLand
2412 template<class PassT
>
2413 void CFGStructurizer
<PassT
>::setLoopLandBlock(LoopT
*loopRep
, BlockT
*blk
) {
2414 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2416 if (theEntry
== NULL
) {
2417 theEntry
= new LoopLandInfo();
2419 assert(theEntry
->landBlk
== NULL
);
2422 blk
= funcRep
->CreateMachineBasicBlock();
2423 funcRep
->push_back(blk
); //insert to function
2424 SHOWNEWBLK(blk
, "DummyLandingBlock for loop without break: ");
2427 theEntry
->landBlk
= blk
;
2430 errs() << "setLoopLandBlock loop-header = BB"
2431 << loopRep
->getHeader()->getNumber()
2432 << " landing-block = BB" << blk
->getNumber() << "\n";
2434 } // setLoopLandBlock
2436 template<class PassT
>
2437 void CFGStructurizer
<PassT
>::addLoopBreakOnReg(LoopT
*loopRep
, RegiT regNum
) {
2438 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2440 if (theEntry
== NULL
) {
2441 theEntry
= new LoopLandInfo();
2444 theEntry
->breakOnRegs
.insert(regNum
);
2447 errs() << "addLoopBreakOnReg loop-header = BB"
2448 << loopRep
->getHeader()->getNumber()
2449 << " regNum = " << regNum
<< "\n";
2451 } // addLoopBreakOnReg
2453 template<class PassT
>
2454 void CFGStructurizer
<PassT
>::addLoopContOnReg(LoopT
*loopRep
, RegiT regNum
) {
2455 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2457 if (theEntry
== NULL
) {
2458 theEntry
= new LoopLandInfo();
2460 theEntry
->contOnRegs
.insert(regNum
);
2463 errs() << "addLoopContOnReg loop-header = BB"
2464 << loopRep
->getHeader()->getNumber()
2465 << " regNum = " << regNum
<< "\n";
2467 } // addLoopContOnReg
2469 template<class PassT
>
2470 void CFGStructurizer
<PassT
>::addLoopBreakInitReg(LoopT
*loopRep
, RegiT regNum
) {
2471 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2473 if (theEntry
== NULL
) {
2474 theEntry
= new LoopLandInfo();
2476 theEntry
->breakInitRegs
.insert(regNum
);
2479 errs() << "addLoopBreakInitReg loop-header = BB"
2480 << loopRep
->getHeader()->getNumber()
2481 << " regNum = " << regNum
<< "\n";
2483 } // addLoopBreakInitReg
2485 template<class PassT
>
2486 void CFGStructurizer
<PassT
>::addLoopContInitReg(LoopT
*loopRep
, RegiT regNum
) {
2487 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2489 if (theEntry
== NULL
) {
2490 theEntry
= new LoopLandInfo();
2492 theEntry
->contInitRegs
.insert(regNum
);
2495 errs() << "addLoopContInitReg loop-header = BB"
2496 << loopRep
->getHeader()->getNumber()
2497 << " regNum = " << regNum
<< "\n";
2499 } // addLoopContInitReg
2501 template<class PassT
>
2502 void CFGStructurizer
<PassT
>::addLoopEndbranchInitReg(LoopT
*loopRep
,
2504 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2506 if (theEntry
== NULL
) {
2507 theEntry
= new LoopLandInfo();
2509 theEntry
->endbranchInitRegs
.insert(regNum
);
2513 errs() << "addLoopEndbranchInitReg loop-header = BB"
2514 << loopRep
->getHeader()->getNumber()
2515 << " regNum = " << regNum
<< "\n";
2517 } // addLoopEndbranchInitReg
2519 template<class PassT
>
2520 typename CFGStructurizer
<PassT
>::LoopLandInfo
*
2521 CFGStructurizer
<PassT
>::getLoopLandInfo(LoopT
*loopRep
) {
2522 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2525 } // getLoopLandInfo
2527 template<class PassT
>
2528 typename CFGStructurizer
<PassT
>::BlockT
*
2529 CFGStructurizer
<PassT
>::getLoopLandBlock(LoopT
*loopRep
) {
2530 LoopLandInfo
*&theEntry
= loopLandInfoMap
[loopRep
];
2532 return theEntry
? theEntry
->landBlk
: NULL
;
2533 } // getLoopLandBlock
2536 template<class PassT
>
2537 bool CFGStructurizer
<PassT
>::hasBackEdge(BlockT
*curBlk
) {
2538 LoopT
*loopRep
= loopInfo
->getLoopFor(curBlk
);
2539 if (loopRep
== NULL
)
2542 BlockT
*loopHeader
= loopRep
->getHeader();
2544 return curBlk
->isSuccessor(loopHeader
);
2548 template<class PassT
>
2549 unsigned CFGStructurizer
<PassT
>::getLoopDepth(LoopT
*loopRep
) {
2550 return loopRep
? loopRep
->getLoopDepth() : 0;
2553 template<class PassT
>
2554 int CFGStructurizer
<PassT
>::countActiveBlock
2555 (typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator iterStart
,
2556 typename SmallVector
<BlockT
*, DEFAULT_VEC_SLOTS
>::const_iterator iterEnd
) {
2558 while (iterStart
!= iterEnd
) {
2559 if (!isRetiredBlock(*iterStart
)) {
2566 } //countActiveBlock
2568 // This is work around solution for findNearestCommonDominator not avaiable to
2569 // post dom a proper fix should go to Dominators.h.
2571 template<class PassT
>
2572 typename CFGStructurizer
<PassT
>::BlockT
*
2573 CFGStructurizer
<PassT
>::findNearestCommonPostDom(BlockT
*blk1
, BlockT
*blk2
) {
2575 if (postDomTree
->dominates(blk1
, blk2
)) {
2578 if (postDomTree
->dominates(blk2
, blk1
)) {
2582 DomTreeNodeT
*node1
= postDomTree
->getNode(blk1
);
2583 DomTreeNodeT
*node2
= postDomTree
->getNode(blk2
);
2585 // Handle newly cloned node.
2586 if (node1
== NULL
&& blk1
->succ_size() == 1) {
2587 return findNearestCommonPostDom(*blk1
->succ_begin(), blk2
);
2589 if (node2
== NULL
&& blk2
->succ_size() == 1) {
2590 return findNearestCommonPostDom(blk1
, *blk2
->succ_begin());
2593 if (node1
== NULL
|| node2
== NULL
) {
2597 node1
= node1
->getIDom();
2599 if (postDomTree
->dominates(node1
, node2
)) {
2600 return node1
->getBlock();
2602 node1
= node1
->getIDom();
2608 template<class PassT
>
2609 typename CFGStructurizer
<PassT
>::BlockT
*
2610 CFGStructurizer
<PassT
>::findNearestCommonPostDom
2611 (typename
std::set
<BlockT
*> &blks
) {
2613 typename
std::set
<BlockT
*>::const_iterator iter
= blks
.begin();
2614 typename
std::set
<BlockT
*>::const_iterator iterEnd
= blks
.end();
2615 for (commonDom
= *iter
; iter
!= iterEnd
&& commonDom
!= NULL
; ++iter
) {
2616 BlockT
*curBlk
= *iter
;
2617 if (curBlk
!= commonDom
) {
2618 commonDom
= findNearestCommonPostDom(curBlk
, commonDom
);
2623 errs() << "Common post dominator for exit blocks is ";
2625 errs() << "BB" << commonDom
->getNumber() << "\n";
2632 } //findNearestCommonPostDom
2634 } //end namespace llvm
2639 //===----------------------------------------------------------------------===//
2641 // CFGStructurizer for AMDGPU
2643 //===----------------------------------------------------------------------===//
2646 using namespace llvmCFGStruct
;
2650 class AMDGPUCFGStructurizer
: public MachineFunctionPass
2653 typedef MachineInstr InstructionType
;
2654 typedef MachineFunction FunctionType
;
2655 typedef MachineBasicBlock BlockType
;
2656 typedef MachineLoopInfo LoopinfoType
;
2657 typedef MachineDominatorTree DominatortreeType
;
2658 typedef MachinePostDominatorTree PostDominatortreeType
;
2659 typedef MachineDomTreeNode DomTreeNodeType
;
2660 typedef MachineLoop LoopType
;
2664 const TargetInstrInfo
*TII
;
2665 const AMDGPURegisterInfo
*TRI
;
2668 AMDGPUCFGStructurizer(char &pid
, TargetMachine
&tm
);
2669 const TargetInstrInfo
*getTargetInstrInfo() const;
2670 //bool runOnMachineFunction(MachineFunction &F);
2674 }; //end of class AMDGPUCFGStructurizer
2676 //char AMDGPUCFGStructurizer::ID = 0;
2677 } //end of namespace llvm
2678 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid
, TargetMachine
&tm
2680 : MachineFunctionPass(pid
), TM(tm
), TII(tm
.getInstrInfo()),
2681 TRI(static_cast<const AMDGPURegisterInfo
*>(tm
.getRegisterInfo())
2685 const TargetInstrInfo
*AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2688 //===----------------------------------------------------------------------===//
2692 //===----------------------------------------------------------------------===//
2695 using namespace llvmCFGStruct
;
2699 class AMDGPUCFGPrepare
: public AMDGPUCFGStructurizer
2705 AMDGPUCFGPrepare(TargetMachine
&tm
);
2707 virtual const char *getPassName() const;
2708 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const;
2710 bool runOnMachineFunction(MachineFunction
&F
);
2714 }; //end of class AMDGPUCFGPrepare
2716 char AMDGPUCFGPrepare::ID
= 0;
2717 } //end of namespace llvm
2719 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine
&tm
)
2720 : AMDGPUCFGStructurizer(ID
, tm
)
2723 const char *AMDGPUCFGPrepare::getPassName() const {
2724 return "AMD IL Control Flow Graph Preparation Pass";
2727 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage
&AU
) const {
2728 AU
.addPreserved
<MachineFunctionAnalysis
>();
2729 AU
.addRequired
<MachineFunctionAnalysis
>();
2730 AU
.addRequired
<MachineDominatorTree
>();
2731 AU
.addRequired
<MachinePostDominatorTree
>();
2732 AU
.addRequired
<MachineLoopInfo
>();
2735 //===----------------------------------------------------------------------===//
2739 //===----------------------------------------------------------------------===//
2742 using namespace llvmCFGStruct
;
2746 class AMDGPUCFGPerform
: public AMDGPUCFGStructurizer
2752 AMDGPUCFGPerform(TargetMachine
&tm
);
2753 virtual const char *getPassName() const;
2754 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const;
2755 bool runOnMachineFunction(MachineFunction
&F
);
2759 }; //end of class AMDGPUCFGPerform
2761 char AMDGPUCFGPerform::ID
= 0;
2762 } //end of namespace llvm
2764 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine
&tm
)
2765 : AMDGPUCFGStructurizer(ID
, tm
)
2769 const char *AMDGPUCFGPerform::getPassName() const {
2770 return "AMD IL Control Flow Graph structurizer Pass";
2773 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage
&AU
) const {
2774 AU
.addPreserved
<MachineFunctionAnalysis
>();
2775 AU
.addRequired
<MachineFunctionAnalysis
>();
2776 AU
.addRequired
<MachineDominatorTree
>();
2777 AU
.addRequired
<MachinePostDominatorTree
>();
2778 AU
.addRequired
<MachineLoopInfo
>();
2781 //===----------------------------------------------------------------------===//
2783 // CFGStructTraits<AMDGPUCFGStructurizer>
2785 //===----------------------------------------------------------------------===//
2787 namespace llvmCFGStruct
2789 // this class is tailor to the AMDGPU backend
2791 struct CFGStructTraits
<AMDGPUCFGStructurizer
>
2795 static int getBreakNzeroOpcode(int oldOpcode
) {
2797 case AMDGPU::JUMP
: return AMDGPU::BREAK_LOGICALNZ_i32
;
2799 assert(0 && "internal error");
2804 static int getBreakZeroOpcode(int oldOpcode
) {
2806 case AMDGPU::JUMP
: return AMDGPU::BREAK_LOGICALZ_i32
;
2808 assert(0 && "internal error");
2813 static int getBranchNzeroOpcode(int oldOpcode
) {
2815 case AMDGPU::JUMP
: return AMDGPU::IF_LOGICALNZ_i32
;
2816 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND
, AMDGPU::IF_LOGICALNZ
);
2817 case AMDGPU::SI_IF_NZ
: return AMDGPU::SI_IF_NZ
;
2819 assert(0 && "internal error");
2824 static int getBranchZeroOpcode(int oldOpcode
) {
2826 case AMDGPU::JUMP
: return AMDGPU::IF_LOGICALZ_i32
;
2827 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND
, AMDGPU::IF_LOGICALZ
);
2828 case AMDGPU::SI_IF_Z
: return AMDGPU::SI_IF_Z
;
2830 assert(0 && "internal error");
2835 static int getContinueNzeroOpcode(int oldOpcode
)
2838 case AMDGPU::JUMP
: return AMDGPU::CONTINUE_LOGICALNZ_i32
;
2840 assert(0 && "internal error");
2845 static int getContinueZeroOpcode(int oldOpcode
) {
2847 case AMDGPU::JUMP
: return AMDGPU::CONTINUE_LOGICALZ_i32
;
2849 assert(0 && "internal error");
2854 // the explicitly represented branch target is the true branch target
2855 #define getExplicitBranch getTrueBranch
2856 #define setExplicitBranch setTrueBranch
2858 static MachineBasicBlock
*getTrueBranch(MachineInstr
*instr
) {
2859 return instr
->getOperand(0).getMBB();
2862 static void setTrueBranch(MachineInstr
*instr
, MachineBasicBlock
*blk
) {
2863 instr
->getOperand(0).setMBB(blk
);
2866 static MachineBasicBlock
*
2867 getFalseBranch(MachineBasicBlock
*blk
, MachineInstr
*instr
) {
2868 assert(blk
->succ_size() == 2);
2869 MachineBasicBlock
*trueBranch
= getTrueBranch(instr
);
2870 MachineBasicBlock::succ_iterator iter
= blk
->succ_begin();
2871 MachineBasicBlock::succ_iterator iterNext
= iter
;
2874 return (*iter
== trueBranch
) ? *iterNext
: *iter
;
2877 static bool isCondBranch(MachineInstr
*instr
) {
2878 switch (instr
->getOpcode()) {
2880 return instr
->getOperand(instr
->findFirstPredOperandIdx()).getReg() != 0;
2881 ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND
);
2882 case AMDGPU::SI_IF_NZ
:
2883 case AMDGPU::SI_IF_Z
:
2891 static bool isUncondBranch(MachineInstr
*instr
) {
2892 switch (instr
->getOpcode()) {
2894 return instr
->getOperand(instr
->findFirstPredOperandIdx()).getReg() == 0;
2901 static DebugLoc
getLastDebugLocInBB(MachineBasicBlock
*blk
) {
2902 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2904 for (MachineBasicBlock::iterator iter
= blk
->begin(); iter
!= blk
->end(); ++iter
) {
2905 MachineInstr
*instr
= &(*iter
);
2906 if (instr
->getDebugLoc().isUnknown() == false) {
2907 DL
= instr
->getDebugLoc();
2913 static MachineInstr
*getNormalBlockBranchInstr(MachineBasicBlock
*blk
) {
2914 MachineBasicBlock::reverse_iterator iter
= blk
->rbegin();
2915 MachineInstr
*instr
= &*iter
;
2916 if (instr
&& (isCondBranch(instr
) || isUncondBranch(instr
))) {
2922 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2924 // BB with backward-edge could have move instructions after the branch
2925 // instruction. Such move instruction "belong to" the loop backward-edge.
2927 static MachineInstr
*getLoopendBlockBranchInstr(MachineBasicBlock
*blk
) {
2928 const AMDGPUInstrInfo
* TII
= static_cast<const AMDGPUInstrInfo
*>(
2929 blk
->getParent()->getTarget().getInstrInfo());
2931 for (MachineBasicBlock::reverse_iterator iter
= blk
->rbegin(),
2932 iterEnd
= blk
->rend(); iter
!= iterEnd
; ++iter
) {
2934 MachineInstr
*instr
= &*iter
;
2936 if (isCondBranch(instr
) || isUncondBranch(instr
)) {
2938 } else if (!TII
->isMov(instr
->getOpcode())) {
2946 static MachineInstr
*getReturnInstr(MachineBasicBlock
*blk
) {
2947 MachineBasicBlock::reverse_iterator iter
= blk
->rbegin();
2948 if (iter
!= blk
->rend()) {
2949 MachineInstr
*instr
= &(*iter
);
2950 if (instr
->getOpcode() == AMDGPU::RETURN
) {
2957 static MachineInstr
*getContinueInstr(MachineBasicBlock
*blk
) {
2958 MachineBasicBlock::reverse_iterator iter
= blk
->rbegin();
2959 if (iter
!= blk
->rend()) {
2960 MachineInstr
*instr
= &(*iter
);
2961 if (instr
->getOpcode() == AMDGPU::CONTINUE
) {
2968 static MachineInstr
*getLoopBreakInstr(MachineBasicBlock
*blk
) {
2969 for (MachineBasicBlock::iterator iter
= blk
->begin(); (iter
!= blk
->end()); ++iter
) {
2970 MachineInstr
*instr
= &(*iter
);
2971 if ((instr
->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32
) || (instr
->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32
)) {
2978 static bool isReturnBlock(MachineBasicBlock
*blk
) {
2979 MachineInstr
*instr
= getReturnInstr(blk
);
2980 bool isReturn
= (blk
->succ_size() == 0);
2983 } else if (isReturn
) {
2985 errs() << "BB" << blk
->getNumber()
2986 <<" is return block without RETURN instr\n";
2993 static MachineBasicBlock::iterator
2994 getInstrPos(MachineBasicBlock
*blk
, MachineInstr
*instr
) {
2995 assert(instr
->getParent() == blk
&& "instruction doesn't belong to block");
2996 MachineBasicBlock::iterator iter
= blk
->begin();
2997 MachineBasicBlock::iterator iterEnd
= blk
->end();
2998 while (&(*iter
) != instr
&& iter
!= iterEnd
) {
3002 assert(iter
!= iterEnd
);
3006 static MachineInstr
*insertInstrBefore(MachineBasicBlock
*blk
, int newOpcode
,
3007 AMDGPUCFGStructurizer
*passRep
) {
3008 return insertInstrBefore(blk
,newOpcode
,passRep
,DebugLoc());
3009 } //insertInstrBefore
3011 static MachineInstr
*insertInstrBefore(MachineBasicBlock
*blk
, int newOpcode
,
3012 AMDGPUCFGStructurizer
*passRep
, DebugLoc DL
) {
3013 const TargetInstrInfo
*tii
= passRep
->getTargetInstrInfo();
3014 MachineInstr
*newInstr
=
3015 blk
->getParent()->CreateMachineInstr(tii
->get(newOpcode
), DL
);
3017 MachineBasicBlock::iterator res
;
3018 if (blk
->begin() != blk
->end()) {
3019 blk
->insert(blk
->begin(), newInstr
);
3021 blk
->push_back(newInstr
);
3024 SHOWNEWINSTR(newInstr
);
3027 } //insertInstrBefore
3029 static void insertInstrEnd(MachineBasicBlock
*blk
, int newOpcode
,
3030 AMDGPUCFGStructurizer
*passRep
) {
3031 insertInstrEnd(blk
,newOpcode
,passRep
,DebugLoc());
3034 static void insertInstrEnd(MachineBasicBlock
*blk
, int newOpcode
,
3035 AMDGPUCFGStructurizer
*passRep
, DebugLoc DL
) {
3036 const TargetInstrInfo
*tii
= passRep
->getTargetInstrInfo();
3037 MachineInstr
*newInstr
= blk
->getParent()
3038 ->CreateMachineInstr(tii
->get(newOpcode
), DL
);
3040 blk
->push_back(newInstr
);
3041 //assume the instruction doesn't take any reg operand ...
3043 SHOWNEWINSTR(newInstr
);
3046 static MachineInstr
*insertInstrBefore(MachineBasicBlock::iterator instrPos
,
3048 AMDGPUCFGStructurizer
*passRep
) {
3049 MachineInstr
*oldInstr
= &(*instrPos
);
3050 const TargetInstrInfo
*tii
= passRep
->getTargetInstrInfo();
3051 MachineBasicBlock
*blk
= oldInstr
->getParent();
3052 MachineInstr
*newInstr
=
3053 blk
->getParent()->CreateMachineInstr(tii
->get(newOpcode
),
3056 blk
->insert(instrPos
, newInstr
);
3057 //assume the instruction doesn't take any reg operand ...
3059 SHOWNEWINSTR(newInstr
);
3061 } //insertInstrBefore
3063 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos
,
3065 AMDGPUCFGStructurizer
*passRep
,
3067 MachineInstr
*oldInstr
= &(*instrPos
);
3068 const TargetInstrInfo
*tii
= passRep
->getTargetInstrInfo();
3069 MachineBasicBlock
*blk
= oldInstr
->getParent();
3070 MachineInstr
*newInstr
=
3071 blk
->getParent()->CreateMachineInstr(tii
->get(newOpcode
),
3074 blk
->insert(instrPos
, newInstr
);
3075 MachineInstrBuilder(newInstr
).addReg(oldInstr
->getOperand(1).getReg(),
3078 SHOWNEWINSTR(newInstr
);
3079 //erase later oldInstr->eraseFromParent();
3080 } //insertCondBranchBefore
3082 static void insertCondBranchBefore(MachineBasicBlock
*blk
,
3083 MachineBasicBlock::iterator insertPos
,
3085 AMDGPUCFGStructurizer
*passRep
,
3088 const TargetInstrInfo
*tii
= passRep
->getTargetInstrInfo();
3090 MachineInstr
*newInstr
=
3091 blk
->getParent()->CreateMachineInstr(tii
->get(newOpcode
), DL
);
3094 blk
->insert(insertPos
, newInstr
);
3095 MachineInstrBuilder(newInstr
).addReg(regNum
, false);
3097 SHOWNEWINSTR(newInstr
);
3098 } //insertCondBranchBefore
3100 static void insertCondBranchEnd(MachineBasicBlock
*blk
,
3102 AMDGPUCFGStructurizer
*passRep
,
3104 const TargetInstrInfo
*tii
= passRep
->getTargetInstrInfo();
3105 MachineInstr
*newInstr
=
3106 blk
->getParent()->CreateMachineInstr(tii
->get(newOpcode
), DebugLoc());
3108 blk
->push_back(newInstr
);
3109 MachineInstrBuilder(newInstr
).addReg(regNum
, false);
3111 SHOWNEWINSTR(newInstr
);
3112 } //insertCondBranchEnd
3115 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos
,
3116 AMDGPUCFGStructurizer
*passRep
,
3117 RegiT regNum
, int regVal
) {
3118 MachineInstr
*oldInstr
= &(*instrPos
);
3119 const AMDGPUInstrInfo
*tii
=
3120 static_cast<const AMDGPUInstrInfo
*>(passRep
->getTargetInstrInfo());
3121 MachineBasicBlock
*blk
= oldInstr
->getParent();
3122 MachineInstr
*newInstr
= tii
->getMovImmInstr(blk
->getParent(), regNum
,
3124 blk
->insert(instrPos
, newInstr
);
3126 SHOWNEWINSTR(newInstr
);
3127 } //insertAssignInstrBefore
3129 static void insertAssignInstrBefore(MachineBasicBlock
*blk
,
3130 AMDGPUCFGStructurizer
*passRep
,
3131 RegiT regNum
, int regVal
) {
3132 const AMDGPUInstrInfo
*tii
=
3133 static_cast<const AMDGPUInstrInfo
*>(passRep
->getTargetInstrInfo());
3135 MachineInstr
*newInstr
= tii
->getMovImmInstr(blk
->getParent(), regNum
,
3137 if (blk
->begin() != blk
->end()) {
3138 blk
->insert(blk
->begin(), newInstr
);
3140 blk
->push_back(newInstr
);
3143 SHOWNEWINSTR(newInstr
);
3145 } //insertInstrBefore
3147 static void insertCompareInstrBefore(MachineBasicBlock
*blk
,
3148 MachineBasicBlock::iterator instrPos
,
3149 AMDGPUCFGStructurizer
*passRep
,
3150 RegiT dstReg
, RegiT src1Reg
,
3152 const AMDGPUInstrInfo
*tii
=
3153 static_cast<const AMDGPUInstrInfo
*>(passRep
->getTargetInstrInfo());
3154 MachineInstr
*newInstr
=
3155 blk
->getParent()->CreateMachineInstr(tii
->get(tii
->getIEQOpcode()), DebugLoc());
3157 MachineInstrBuilder(newInstr
).addReg(dstReg
, RegState::Define
); //set target
3158 MachineInstrBuilder(newInstr
).addReg(src1Reg
); //set src value
3159 MachineInstrBuilder(newInstr
).addReg(src2Reg
); //set src value
3161 blk
->insert(instrPos
, newInstr
);
3162 SHOWNEWINSTR(newInstr
);
3164 } //insertCompareInstrBefore
3166 static void cloneSuccessorList(MachineBasicBlock
*dstBlk
,
3167 MachineBasicBlock
*srcBlk
) {
3168 for (MachineBasicBlock::succ_iterator iter
= srcBlk
->succ_begin(),
3169 iterEnd
= srcBlk
->succ_end(); iter
!= iterEnd
; ++iter
) {
3170 dstBlk
->addSuccessor(*iter
); // *iter's predecessor is also taken care of
3172 } //cloneSuccessorList
3174 static MachineBasicBlock
*clone(MachineBasicBlock
*srcBlk
) {
3175 MachineFunction
*func
= srcBlk
->getParent();
3176 MachineBasicBlock
*newBlk
= func
->CreateMachineBasicBlock();
3177 func
->push_back(newBlk
); //insert to function
3178 //newBlk->setNumber(srcBlk->getNumber());
3179 for (MachineBasicBlock::iterator iter
= srcBlk
->begin(),
3180 iterEnd
= srcBlk
->end();
3181 iter
!= iterEnd
; ++iter
) {
3182 MachineInstr
*instr
= func
->CloneMachineInstr(iter
);
3183 newBlk
->push_back(instr
);
3188 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
3189 //the AMDGPU instruction is not recognized as terminator fix this and retire
3191 static void replaceInstrUseOfBlockWith(MachineBasicBlock
*srcBlk
,
3192 MachineBasicBlock
*oldBlk
,
3193 MachineBasicBlock
*newBlk
) {
3194 MachineInstr
*branchInstr
= getLoopendBlockBranchInstr(srcBlk
);
3195 if (branchInstr
&& isCondBranch(branchInstr
) &&
3196 getExplicitBranch(branchInstr
) == oldBlk
) {
3197 setExplicitBranch(branchInstr
, newBlk
);
3201 static void wrapup(MachineBasicBlock
*entryBlk
) {
3202 assert((!entryBlk
->getParent()->getJumpTableInfo()
3203 || entryBlk
->getParent()->getJumpTableInfo()->isEmpty())
3204 && "found a jump table");
3206 //collect continue right before endloop
3207 SmallVector
<MachineInstr
*, DEFAULT_VEC_SLOTS
> contInstr
;
3208 MachineBasicBlock::iterator pre
= entryBlk
->begin();
3209 MachineBasicBlock::iterator iterEnd
= entryBlk
->end();
3210 MachineBasicBlock::iterator iter
= pre
;
3211 while (iter
!= iterEnd
) {
3212 if (pre
->getOpcode() == AMDGPU::CONTINUE
3213 && iter
->getOpcode() == AMDGPU::ENDLOOP
) {
3214 contInstr
.push_back(pre
);
3220 //delete continue right before endloop
3221 for (unsigned i
= 0; i
< contInstr
.size(); ++i
) {
3222 contInstr
[i
]->eraseFromParent();
3225 // TODO to fix up jump table so later phase won't be confused. if
3226 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3227 // there isn't such an interface yet. alternatively, replace all the other
3228 // blocks in the jump table with the entryBlk //}
3232 static MachineDominatorTree
*getDominatorTree(AMDGPUCFGStructurizer
&pass
) {
3233 return &pass
.getAnalysis
<MachineDominatorTree
>();
3236 static MachinePostDominatorTree
*
3237 getPostDominatorTree(AMDGPUCFGStructurizer
&pass
) {
3238 return &pass
.getAnalysis
<MachinePostDominatorTree
>();
3241 static MachineLoopInfo
*getLoopInfo(AMDGPUCFGStructurizer
&pass
) {
3242 return &pass
.getAnalysis
<MachineLoopInfo
>();
3244 }; // template class CFGStructTraits
3245 } //end of namespace llvm
3247 // createAMDGPUCFGPreparationPass- Returns a pass
3248 FunctionPass
*llvm::createAMDGPUCFGPreparationPass(TargetMachine
&tm
3250 return new AMDGPUCFGPrepare(tm
);
3253 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction
&func
) {
3254 return llvmCFGStruct::CFGStructurizer
<AMDGPUCFGStructurizer
>().prepare(func
,
3259 // createAMDGPUCFGStructurizerPass- Returns a pass
3260 FunctionPass
*llvm::createAMDGPUCFGStructurizerPass(TargetMachine
&tm
3262 return new AMDGPUCFGPerform(tm
);
3265 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction
&func
) {
3266 return llvmCFGStruct::CFGStructurizer
<AMDGPUCFGStructurizer
>().run(func
,
3271 //end of file newline goes below