From 21d28d5c6717e3236e4935eaa459ebbeaa434812 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 18 Jan 2017 23:26:37 +0000 Subject: [PATCH] Preserve domtree and loop-simplify for runtime unrolling. Mostly straightforward changes; we just didn't do the computation before. One sort of interesting change in LoopUnroll.cpp: we weren't handling dominance for children of the loop latch correctly, but foldBlockIntoPredecessor hid the problem for complete unrolling. Currently punting on loop peeling; made some minor changes to isolate that problem to LoopUnrollPeel.cpp. Adds a flag -unroll-verify-domtree; it verifies the domtree immediately after we finish updating it. This is on by default for +Asserts builds. Differential Revision: https://reviews.llvm.org/D28073 llvm-svn: 292447 --- include/llvm/Transforms/Utils/UnrollLoop.h | 2 +- lib/Transforms/Utils/LoopUnroll.cpp | 56 +++++++++++++++------- lib/Transforms/Utils/LoopUnrollPeel.cpp | 15 +++++- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 32 +++++++++++-- 4 files changed, 83 insertions(+), 22 deletions(-) diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index f322bea7aa2..885edc135c4 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -56,7 +56,7 @@ void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP); bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, bool PreserveLCSSA); + DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index f9a602bc268..9ea4a4b6810 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -51,6 +51,16 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog.")); +static cl::opt +UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, + cl::desc("Verify domtree after unrolling"), +#ifdef NDEBUG + cl::init(false) +#else + cl::init(true) +#endif + ); + /// Convert the instruction operands from referencing the current values into /// those specified by VMap. static inline void remapInstruction(Instruction *I, @@ -327,7 +337,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, "and peeling for the same loop"); if (PeelCount) - peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); + peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); // Loops containing convergent instructions must have a count that divides // their TripMultiple. @@ -612,14 +622,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, Term->eraseFromParent(); } } + // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied - // iterations too. Thus, the new idom of the block will be the nearest - // common dominator of the previous idom and common dominator of all copies of - // the previous idom. This is equivalent to the nearest common dominator of - // the previous idom and the first latch, which dominates all copies of the - // previous idom. + // iterations too. if (DT && Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); @@ -629,12 +636,38 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (!L->contains(ChildBB)) ChildrenToUpdate.push_back(ChildBB); } - BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]); + BasicBlock *NewIDom; + if (BB == LatchBlock) { + // The latch is special because we emit unconditional branches in + // some cases where the original loop contained a conditional branch. + // Since the latch is always at the bottom of the loop, if the latch + // dominated an exit before unrolling, the new dominator of that exit + // must also be a latch. Specifically, the dominator is the first + // latch which ends in a conditional branch, or the last latch if + // there is no such latch. + NewIDom = Latches.back(); + for (BasicBlock *IterLatch : Latches) { + TerminatorInst *Term = IterLatch->getTerminator(); + if (isa(Term) && cast(Term)->isConditional()) { + NewIDom = IterLatch; + break; + } + } + } else { + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); + } for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } } + if (DT && UnrollVerifyDomtree) + DT->verifyDomTree(); + // Merge adjacent basic blocks, if possible. SmallPtrSet ForgottenLoops; for (BasicBlock *Latch : Latches) { @@ -652,13 +685,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } } - // FIXME: We only preserve DT info for complete unrolling now. Incrementally - // updating domtree after partial loop unrolling should also be easy. - if (DT && !CompletelyUnroll) - DT->recalculate(*L->getHeader()->getParent()); - else if (DT) - DEBUG(DT->verifyDomTree()); - // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { SmallVector DeadInsts; @@ -718,8 +744,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (DT) { - if (!OuterL && !CompletelyUnroll) - OuterL = L; if (OuterL) { // OuterL includes all loops for which we can break loop-simplify, so // it's sufficient to simplify only it (it'll recursively simplify inner diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 842cf31f2e3..f021617f2ff 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -28,6 +28,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -257,7 +258,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - bool PreserveLCSSA) { + AssumptionCache *AC, bool PreserveLCSSA) { if (!canPeel(L)) return false; @@ -404,10 +405,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } + // FIXME: Incrementally update domtree. + DT->recalculate(*L->getHeader()->getParent()); + // If the loop is nested, we changed the parent loop, update SE. - if (Loop *ParentLoop = L->getParentLoop()) + if (Loop *ParentLoop = L->getParentLoop()) { SE->forgetLoop(ParentLoop); + // FIXME: Incrementally update loop-simplify + simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA); + } else { + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); + } + NumPeeled++; return true; diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 85da3ba899a..d5d54cf1d84 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -146,6 +146,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -260,13 +262,20 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); - // Split the exit to maintain loop canonicalization guarantees + // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, NewExit); + + // Split the main loop exit to maintain canonicalization guarantees. + SmallVector NewExitPreds{Latch}; + SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, + PreserveLCSSA); } /// Create a clone of the blocks in a loop and connect them together. @@ -284,7 +293,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - LoopInfo *LI) { + DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -321,6 +330,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, InsertTop->getTerminator()->setSuccessor(0, NewBB); } + if (DT) { + if (Header == *BB) { + // The header is dominated by the preheader. + DT->addNewBlock(NewBB, InsertTop); + } else { + // Copy information from original loop to unrolled loop. + BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); + DT->addNewBlock(NewBB, cast(VMap[IDomBB])); + } + } + if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. @@ -594,6 +614,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); + if (DT) { + if (UseEpilogRemainder) + DT->changeImmediateDominator(NewExit, PreHeader); + else + DT->changeImmediateDominator(PrologExit, PreHeader); + } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code @@ -618,7 +644,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(),