mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
Preserve domtree and loop-simplify for runtime unrolling.
Mostly straightforward changes; we just didn't do the computation before. One sort of interesting change in LoopUnroll.cpp: we weren't handling dominance for children of the loop latch correctly, but foldBlockIntoPredecessor hid the problem for complete unrolling. Currently punting on loop peeling; made some minor changes to isolate that problem to LoopUnrollPeel.cpp. Adds a flag -unroll-verify-domtree; it verifies the domtree immediately after we finish updating it. This is on by default for +Asserts builds. Differential Revision: https://reviews.llvm.org/D28073 llvm-svn: 292447
This commit is contained in:
parent
c5bd88df38
commit
21d28d5c67
@ -56,7 +56,7 @@ void computePeelCount(Loop *L, unsigned LoopSize,
|
||||
TargetTransformInfo::UnrollingPreferences &UP);
|
||||
|
||||
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
|
||||
DominatorTree *DT, bool PreserveLCSSA);
|
||||
DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
|
||||
|
||||
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
|
||||
}
|
||||
|
@ -51,6 +51,16 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
|
||||
cl::desc("Allow runtime unrolled loops to be unrolled "
|
||||
"with epilog instead of prolog."));
|
||||
|
||||
static cl::opt<bool>
|
||||
UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
|
||||
cl::desc("Verify domtree after unrolling"),
|
||||
#ifdef NDEBUG
|
||||
cl::init(false)
|
||||
#else
|
||||
cl::init(true)
|
||||
#endif
|
||||
);
|
||||
|
||||
/// Convert the instruction operands from referencing the current values into
|
||||
/// those specified by VMap.
|
||||
static inline void remapInstruction(Instruction *I,
|
||||
@ -327,7 +337,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
"and peeling for the same loop");
|
||||
|
||||
if (PeelCount)
|
||||
peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);
|
||||
peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
|
||||
|
||||
// Loops containing convergent instructions must have a count that divides
|
||||
// their TripMultiple.
|
||||
@ -612,14 +622,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
Term->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
// Update dominators of blocks we might reach through exits.
|
||||
// Immediate dominator of such block might change, because we add more
|
||||
// routes which can lead to the exit: we can now reach it from the copied
|
||||
// iterations too. Thus, the new idom of the block will be the nearest
|
||||
// common dominator of the previous idom and common dominator of all copies of
|
||||
// the previous idom. This is equivalent to the nearest common dominator of
|
||||
// the previous idom and the first latch, which dominates all copies of the
|
||||
// previous idom.
|
||||
// iterations too.
|
||||
if (DT && Count > 1) {
|
||||
for (auto *BB : OriginalLoopBlocks) {
|
||||
auto *BBDomNode = DT->getNode(BB);
|
||||
@ -629,12 +636,38 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
if (!L->contains(ChildBB))
|
||||
ChildrenToUpdate.push_back(ChildBB);
|
||||
}
|
||||
BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]);
|
||||
BasicBlock *NewIDom;
|
||||
if (BB == LatchBlock) {
|
||||
// The latch is special because we emit unconditional branches in
|
||||
// some cases where the original loop contained a conditional branch.
|
||||
// Since the latch is always at the bottom of the loop, if the latch
|
||||
// dominated an exit before unrolling, the new dominator of that exit
|
||||
// must also be a latch. Specifically, the dominator is the first
|
||||
// latch which ends in a conditional branch, or the last latch if
|
||||
// there is no such latch.
|
||||
NewIDom = Latches.back();
|
||||
for (BasicBlock *IterLatch : Latches) {
|
||||
TerminatorInst *Term = IterLatch->getTerminator();
|
||||
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
|
||||
NewIDom = IterLatch;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// The new idom of the block will be the nearest common dominator
|
||||
// of all copies of the previous idom. This is equivalent to the
|
||||
// nearest common dominator of the previous idom and the first latch,
|
||||
// which dominates all copies of the previous idom.
|
||||
NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
|
||||
}
|
||||
for (auto *ChildBB : ChildrenToUpdate)
|
||||
DT->changeImmediateDominator(ChildBB, NewIDom);
|
||||
}
|
||||
}
|
||||
|
||||
if (DT && UnrollVerifyDomtree)
|
||||
DT->verifyDomTree();
|
||||
|
||||
// Merge adjacent basic blocks, if possible.
|
||||
SmallPtrSet<Loop *, 4> ForgottenLoops;
|
||||
for (BasicBlock *Latch : Latches) {
|
||||
@ -652,13 +685,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: We only preserve DT info for complete unrolling now. Incrementally
|
||||
// updating domtree after partial loop unrolling should also be easy.
|
||||
if (DT && !CompletelyUnroll)
|
||||
DT->recalculate(*L->getHeader()->getParent());
|
||||
else if (DT)
|
||||
DEBUG(DT->verifyDomTree());
|
||||
|
||||
// Simplify any new induction variables in the partially unrolled loop.
|
||||
if (SE && !CompletelyUnroll && Count > 1) {
|
||||
SmallVector<WeakVH, 16> DeadInsts;
|
||||
@ -718,8 +744,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
// at least one layer outside of the loop that was unrolled so that any
|
||||
// changes to the parent loop exposed by the unrolling are considered.
|
||||
if (DT) {
|
||||
if (!OuterL && !CompletelyUnroll)
|
||||
OuterL = L;
|
||||
if (OuterL) {
|
||||
// OuterL includes all loops for which we can break loop-simplify, so
|
||||
// it's sufficient to simplify only it (it'll recursively simplify inner
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/Transforms/Utils/LoopSimplify.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include "llvm/Transforms/Utils/UnrollLoop.h"
|
||||
#include <algorithm>
|
||||
@ -257,7 +258,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
|
||||
/// optimizations.
|
||||
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
|
||||
ScalarEvolution *SE, DominatorTree *DT,
|
||||
bool PreserveLCSSA) {
|
||||
AssumptionCache *AC, bool PreserveLCSSA) {
|
||||
if (!canPeel(L))
|
||||
return false;
|
||||
|
||||
@ -404,10 +405,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
|
||||
LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
|
||||
}
|
||||
|
||||
// FIXME: Incrementally update domtree.
|
||||
DT->recalculate(*L->getHeader()->getParent());
|
||||
|
||||
// If the loop is nested, we changed the parent loop, update SE.
|
||||
if (Loop *ParentLoop = L->getParentLoop())
|
||||
if (Loop *ParentLoop = L->getParentLoop()) {
|
||||
SE->forgetLoop(ParentLoop);
|
||||
|
||||
// FIXME: Incrementally update loop-simplify
|
||||
simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
|
||||
} else {
|
||||
// FIXME: Incrementally update loop-simplify
|
||||
simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
|
||||
}
|
||||
|
||||
NumPeeled++;
|
||||
|
||||
return true;
|
||||
|
@ -146,6 +146,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
||||
// Add the branch to the exit block (around the unrolled loop)
|
||||
B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
|
||||
InsertPt->eraseFromParent();
|
||||
if (DT)
|
||||
DT->changeImmediateDominator(Exit, PrologExit);
|
||||
}
|
||||
|
||||
/// Connect the unrolling epilog code to the original loop.
|
||||
@ -260,13 +262,20 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
|
||||
IRBuilder<> B(InsertPt);
|
||||
Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
|
||||
assert(Exit && "Loop must have a single exit block only");
|
||||
// Split the exit to maintain loop canonicalization guarantees
|
||||
// Split the epilogue exit to maintain loop canonicalization guarantees
|
||||
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
||||
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
|
||||
PreserveLCSSA);
|
||||
// Add the branch to the exit block (around the unrolling loop)
|
||||
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
|
||||
InsertPt->eraseFromParent();
|
||||
if (DT)
|
||||
DT->changeImmediateDominator(Exit, NewExit);
|
||||
|
||||
// Split the main loop exit to maintain canonicalization guarantees.
|
||||
SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
|
||||
SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
|
||||
PreserveLCSSA);
|
||||
}
|
||||
|
||||
/// Create a clone of the blocks in a loop and connect them together.
|
||||
@ -284,7 +293,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
|
||||
BasicBlock *Preheader,
|
||||
std::vector<BasicBlock *> &NewBlocks,
|
||||
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
|
||||
LoopInfo *LI) {
|
||||
DominatorTree *DT, LoopInfo *LI) {
|
||||
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
|
||||
BasicBlock *Header = L->getHeader();
|
||||
BasicBlock *Latch = L->getLoopLatch();
|
||||
@ -321,6 +330,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
|
||||
InsertTop->getTerminator()->setSuccessor(0, NewBB);
|
||||
}
|
||||
|
||||
if (DT) {
|
||||
if (Header == *BB) {
|
||||
// The header is dominated by the preheader.
|
||||
DT->addNewBlock(NewBB, InsertTop);
|
||||
} else {
|
||||
// Copy information from original loop to unrolled loop.
|
||||
BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
|
||||
DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
|
||||
}
|
||||
}
|
||||
|
||||
if (Latch == *BB) {
|
||||
// For the last block, if CreateRemainderLoop is false, create a direct
|
||||
// jump to InsertBot. If not, create a loop back to cloned head.
|
||||
@ -594,6 +614,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
||||
// Branch to either remainder (extra iterations) loop or unrolling loop.
|
||||
B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
|
||||
PreHeaderBR->eraseFromParent();
|
||||
if (DT) {
|
||||
if (UseEpilogRemainder)
|
||||
DT->changeImmediateDominator(NewExit, PreHeader);
|
||||
else
|
||||
DT->changeImmediateDominator(PrologExit, PreHeader);
|
||||
}
|
||||
Function *F = Header->getParent();
|
||||
// Get an ordered list of blocks in the loop to help with the ordering of the
|
||||
// cloned blocks in the prolog/epilog code
|
||||
@ -618,7 +644,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
||||
BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
|
||||
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
|
||||
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
|
||||
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
|
||||
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
|
||||
|
||||
// Insert the cloned blocks into the function.
|
||||
F->getBasicBlockList().splice(InsertBot->getIterator(),
|
||||
|
Loading…
Reference in New Issue
Block a user