Preserve domtree and loop-simplify for runtime unrolling.

Mostly straightforward changes; we just didn't do the computation before. One sort of interesting change in LoopUnroll.cpp: we weren't handling dominance for children of the loop latch correctly, but foldBlockIntoPredecessor hid the problem for complete unrolling. Currently punting on loop peeling; made some minor changes to isolate that problem to LoopUnrollPeel.cpp. Adds a flag -unroll-verify-domtree; it verifies the domtree immediately after we finish updating it. This is on by default for +Asserts builds. Differential Revision: https://reviews.llvm.org/D28073 llvm-svn: 292447
2024-11-24 11:42:57 +01:00 · 2017-01-18 23:26:37 +00:00 · 2017-01-18 23:26:37 +00:00 · 21d28d5c67
commit 21d28d5c67
parent c5bd88df38
4 changed files with 83 additions and 22 deletions
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@ -56,7 +56,7 @@ void computePeelCount(Loop *L, unsigned LoopSize,
                      TargetTransformInfo::UnrollingPreferences &UP);

 bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
-              DominatorTree *DT, bool PreserveLCSSA);
+              DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);

 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 }
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@ -51,6 +51,16 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
                    cl::desc("Allow runtime unrolled loops to be unrolled "
                             "with epilog instead of prolog."));

+static cl::opt<bool>
+UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
+                    cl::desc("Verify domtree after unrolling"),
+#ifdef NDEBUG
+    cl::init(false)
+#else
+    cl::init(true)
+#endif
+                    );
+
 /// Convert the instruction operands from referencing the current values into
 /// those specified by VMap.
 static inline void remapInstruction(Instruction *I,
@ -327,7 +337,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
         "and peeling for the same loop");

  if (PeelCount)
-    peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);
+    peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);

  // Loops containing convergent instructions must have a count that divides
  // their TripMultiple.
@ -612,14 +622,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
      Term->eraseFromParent();
    }
  }
+
  // Update dominators of blocks we might reach through exits.
  // Immediate dominator of such block might change, because we add more
  // routes which can lead to the exit: we can now reach it from the copied
-  // iterations too. Thus, the new idom of the block will be the nearest
-  // common dominator of the previous idom and common dominator of all copies of
-  // the previous idom. This is equivalent to the nearest common dominator of
-  // the previous idom and the first latch, which dominates all copies of the
-  // previous idom.
+  // iterations too.
  if (DT && Count > 1) {
    for (auto *BB : OriginalLoopBlocks) {
      auto *BBDomNode = DT->getNode(BB);
@ -629,12 +636,38 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
        if (!L->contains(ChildBB))
          ChildrenToUpdate.push_back(ChildBB);
      }
-      BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]);
+      BasicBlock *NewIDom;
+      if (BB == LatchBlock) {
+        // The latch is special because we emit unconditional branches in
+        // some cases where the original loop contained a conditional branch.
+        // Since the latch is always at the bottom of the loop, if the latch
+        // dominated an exit before unrolling, the new dominator of that exit
+        // must also be a latch.  Specifically, the dominator is the first
+        // latch which ends in a conditional branch, or the last latch if
+        // there is no such latch.
+        NewIDom = Latches.back();
+        for (BasicBlock *IterLatch : Latches) {
+          TerminatorInst *Term = IterLatch->getTerminator();
+          if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
+            NewIDom = IterLatch;
+            break;
+          }
+        }
+      } else {
+        // The new idom of the block will be the nearest common dominator
+        // of all copies of the previous idom. This is equivalent to the
+        // nearest common dominator of the previous idom and the first latch,
+        // which dominates all copies of the previous idom.
+        NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
+      }
      for (auto *ChildBB : ChildrenToUpdate)
        DT->changeImmediateDominator(ChildBB, NewIDom);
    }
  }

+  if (DT && UnrollVerifyDomtree)
+    DT->verifyDomTree();
+
  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<Loop *, 4> ForgottenLoops;
  for (BasicBlock *Latch : Latches) {
@ -652,13 +685,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
    }
  }

-  // FIXME: We only preserve DT info for complete unrolling now. Incrementally
-  // updating domtree after partial loop unrolling should also be easy.
-  if (DT && !CompletelyUnroll)
-    DT->recalculate(*L->getHeader()->getParent());
-  else if (DT)
-    DEBUG(DT->verifyDomTree());
-
  // Simplify any new induction variables in the partially unrolled loop.
  if (SE && !CompletelyUnroll && Count > 1) {
    SmallVector<WeakVH, 16> DeadInsts;
@ -718,8 +744,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
  // at least one layer outside of the loop that was unrolled so that any
  // changes to the parent loop exposed by the unrolling are considered.
  if (DT) {
-    if (!OuterL && !CompletelyUnroll)
-      OuterL = L;
    if (OuterL) {
      // OuterL includes all loops for which we can break loop-simplify, so
      // it's sufficient to simplify only it (it'll recursively simplify inner
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@ -28,6 +28,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <algorithm>
@ -257,7 +258,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
 /// optimizations.
 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
                    ScalarEvolution *SE, DominatorTree *DT,
-                    bool PreserveLCSSA) {
+                    AssumptionCache *AC, bool PreserveLCSSA) {
  if (!canPeel(L))
    return false;

@ -404,10 +405,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
    LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
  }

+  // FIXME: Incrementally update domtree.
+  DT->recalculate(*L->getHeader()->getParent());
+
  // If the loop is nested, we changed the parent loop, update SE.
-  if (Loop *ParentLoop = L->getParentLoop())
+  if (Loop *ParentLoop = L->getParentLoop()) {
    SE->forgetLoop(ParentLoop);

+    // FIXME: Incrementally update loop-simplify
+    simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
+  } else {
+    // FIXME: Incrementally update loop-simplify
+    simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
+  }
+
  NumPeeled++;

  return true;
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@ -146,6 +146,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
  // Add the branch to the exit block (around the unrolled loop)
  B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
  InsertPt->eraseFromParent();
+  if (DT)
+    DT->changeImmediateDominator(Exit, PrologExit);
 }

 /// Connect the unrolling epilog code to the original loop.
@ -260,13 +262,20 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
  IRBuilder<> B(InsertPt);
  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
  assert(Exit && "Loop must have a single exit block only");
-  // Split the exit to maintain loop canonicalization guarantees
+  // Split the epilogue exit to maintain loop canonicalization guarantees
  SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
                         PreserveLCSSA);
  // Add the branch to the exit block (around the unrolling loop)
  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
  InsertPt->eraseFromParent();
+  if (DT)
+    DT->changeImmediateDominator(Exit, NewExit);
+
+  // Split the main loop exit to maintain canonicalization guarantees.
+  SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
+  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+                         PreserveLCSSA);
 }

 /// Create a clone of the blocks in a loop and connect them together.
@ -284,7 +293,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
                            BasicBlock *Preheader,
                            std::vector<BasicBlock *> &NewBlocks,
                            LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
-                            LoopInfo *LI) {
+                            DominatorTree *DT, LoopInfo *LI) {
  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
@ -321,6 +330,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
      InsertTop->getTerminator()->setSuccessor(0, NewBB);
    }

+    if (DT) {
+      if (Header == *BB) {
+        // The header is dominated by the preheader.
+        DT->addNewBlock(NewBB, InsertTop);
+      } else {
+        // Copy information from original loop to unrolled loop.
+        BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
+        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+      }
+    }
+
    if (Latch == *BB) {
      // For the last block, if CreateRemainderLoop is false, create a direct
      // jump to InsertBot. If not, create a loop back to cloned head.
@ -594,6 +614,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
  // Branch to either remainder (extra iterations) loop or unrolling loop.
  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
  PreHeaderBR->eraseFromParent();
+  if (DT) {
+    if (UseEpilogRemainder)
+      DT->changeImmediateDominator(NewExit, PreHeader);
+    else
+      DT->changeImmediateDominator(PrologExit, PreHeader);
+  }
  Function *F = Header->getParent();
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog/epilog code
@ -618,7 +644,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
  BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
  CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
-                  InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
+                  InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);

  // Insert the cloned blocks into the function.
  F->getBasicBlockList().splice(InsertBot->getIterator(),