1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

Preserve domtree and loop-simplify for runtime unrolling.

Mostly straightforward changes; we just didn't do the computation before.
One sort of interesting change in LoopUnroll.cpp: we weren't handling
dominance for children of the loop latch correctly, but
foldBlockIntoPredecessor hid the problem for complete unrolling.

Currently punting on loop peeling; made some minor changes to isolate
that problem to LoopUnrollPeel.cpp.

Adds a flag -unroll-verify-domtree; it verifies the domtree immediately
after we finish updating it. This is on by default for +Asserts builds.

Differential Revision: https://reviews.llvm.org/D28073

llvm-svn: 292447
This commit is contained in:
Eli Friedman 2017-01-18 23:26:37 +00:00
parent c5bd88df38
commit 21d28d5c67
4 changed files with 83 additions and 22 deletions

View File

@ -56,7 +56,7 @@ void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP);
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, bool PreserveLCSSA);
DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
}

View File

@ -51,6 +51,16 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
cl::desc("Allow runtime unrolled loops to be unrolled "
"with epilog instead of prolog."));
static cl::opt<bool>
UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
cl::desc("Verify domtree after unrolling"),
#ifdef NDEBUG
cl::init(false)
#else
cl::init(true)
#endif
);
/// Convert the instruction operands from referencing the current values into
/// those specified by VMap.
static inline void remapInstruction(Instruction *I,
@ -327,7 +337,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
"and peeling for the same loop");
if (PeelCount)
peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);
peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
// Loops containing convergent instructions must have a count that divides
// their TripMultiple.
@ -612,14 +622,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
Term->eraseFromParent();
}
}
// Update dominators of blocks we might reach through exits.
// Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can now reach it from the copied
// iterations too. Thus, the new idom of the block will be the nearest
// common dominator of the previous idom and common dominator of all copies of
// the previous idom. This is equivalent to the nearest common dominator of
// the previous idom and the first latch, which dominates all copies of the
// previous idom.
// iterations too.
if (DT && Count > 1) {
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
@ -629,12 +636,38 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (!L->contains(ChildBB))
ChildrenToUpdate.push_back(ChildBB);
}
BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]);
BasicBlock *NewIDom;
if (BB == LatchBlock) {
// The latch is special because we emit unconditional branches in
// some cases where the original loop contained a conditional branch.
// Since the latch is always at the bottom of the loop, if the latch
// dominated an exit before unrolling, the new dominator of that exit
// must also be a latch. Specifically, the dominator is the first
// latch which ends in a conditional branch, or the last latch if
// there is no such latch.
NewIDom = Latches.back();
for (BasicBlock *IterLatch : Latches) {
TerminatorInst *Term = IterLatch->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
NewIDom = IterLatch;
break;
}
}
} else {
// The new idom of the block will be the nearest common dominator
// of all copies of the previous idom. This is equivalent to the
// nearest common dominator of the previous idom and the first latch,
// which dominates all copies of the previous idom.
NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
}
for (auto *ChildBB : ChildrenToUpdate)
DT->changeImmediateDominator(ChildBB, NewIDom);
}
}
if (DT && UnrollVerifyDomtree)
DT->verifyDomTree();
// Merge adjacent basic blocks, if possible.
SmallPtrSet<Loop *, 4> ForgottenLoops;
for (BasicBlock *Latch : Latches) {
@ -652,13 +685,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
}
}
// FIXME: We only preserve DT info for complete unrolling now. Incrementally
// updating domtree after partial loop unrolling should also be easy.
if (DT && !CompletelyUnroll)
DT->recalculate(*L->getHeader()->getParent());
else if (DT)
DEBUG(DT->verifyDomTree());
// Simplify any new induction variables in the partially unrolled loop.
if (SE && !CompletelyUnroll && Count > 1) {
SmallVector<WeakVH, 16> DeadInsts;
@ -718,8 +744,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
// at least one layer outside of the loop that was unrolled so that any
// changes to the parent loop exposed by the unrolling are considered.
if (DT) {
if (!OuterL && !CompletelyUnroll)
OuterL = L;
if (OuterL) {
// OuterL includes all loops for which we can break loop-simplify, so
// it's sufficient to simplify only it (it'll recursively simplify inner

View File

@ -28,6 +28,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
@ -257,7 +258,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
bool PreserveLCSSA) {
AssumptionCache *AC, bool PreserveLCSSA) {
if (!canPeel(L))
return false;
@ -404,10 +405,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
// FIXME: Incrementally update domtree.
DT->recalculate(*L->getHeader()->getParent());
// If the loop is nested, we changed the parent loop, update SE.
if (Loop *ParentLoop = L->getParentLoop())
if (Loop *ParentLoop = L->getParentLoop()) {
SE->forgetLoop(ParentLoop);
// FIXME: Incrementally update loop-simplify
simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
} else {
// FIXME: Incrementally update loop-simplify
simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
}
NumPeeled++;
return true;

View File

@ -146,6 +146,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
InsertPt->eraseFromParent();
if (DT)
DT->changeImmediateDominator(Exit, PrologExit);
}
/// Connect the unrolling epilog code to the original loop.
@ -260,13 +262,20 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
IRBuilder<> B(InsertPt);
Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
// Split the epilogue exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
if (DT)
DT->changeImmediateDominator(Exit, NewExit);
// Split the main loop exit to maintain canonicalization guarantees.
SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
PreserveLCSSA);
}
/// Create a clone of the blocks in a loop and connect them together.
@ -284,7 +293,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
LoopInfo *LI) {
DominatorTree *DT, LoopInfo *LI) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@ -321,6 +330,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
InsertTop->getTerminator()->setSuccessor(0, NewBB);
}
if (DT) {
if (Header == *BB) {
// The header is dominated by the preheader.
DT->addNewBlock(NewBB, InsertTop);
} else {
// Copy information from original loop to unrolled loop.
BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
}
}
if (Latch == *BB) {
// For the last block, if CreateRemainderLoop is false, create a direct
// jump to InsertBot. If not, create a loop back to cloned head.
@ -594,6 +614,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Branch to either remainder (extra iterations) loop or unrolling loop.
B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
PreHeaderBR->eraseFromParent();
if (DT) {
if (UseEpilogRemainder)
DT->changeImmediateDominator(NewExit, PreHeader);
else
DT->changeImmediateDominator(PrologExit, PreHeader);
}
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog/epilog code
@ -618,7 +644,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),