[RuntimeUnrolling] Add logic for loops with multiple exit blocks

Summary: Runtime unrolling is done for loops with a single exit block and a single exiting block (and this exiting block should be the latch block). This patch adds logic to support unrolling in the presence of multiple exit blocks (which also means multiple exiting blocks). Currently this is under an off-by-default option and is supported when epilog code is generated. Support in presence of prolog code will be in a future patch (we just need to add more tests, and update comments). This patch is essentially an implementation patch. I have not added any heuristic (in terms of branches added or code size) to decide when this should be enabled. Reviewers: mkuper, sanjoy, reames, evstupac Reviewed by: reames Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D33001 llvm-svn: 306846
2024-11-24 11:42:57 +01:00 · 2017-06-30 17:57:07 +00:00 · 2017-06-30 17:57:07 +00:00 · 77fdc59c1c
commit 77fdc59c1c
parent d9c5162c5d
2 changed files with 381 additions and 24 deletions
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@ -36,6 +36,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <algorithm>

@ -45,6 +46,10 @@ using namespace llvm;

 STATISTIC(NumRuntimeUnrolled,
          "Number of loops unrolled with run-time trip counts");
+static cl::opt<bool> UnrollRuntimeMultiExit(
+    "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
+    cl::desc("Allow runtime unrolling for loops with multiple exits, when "
+             "epilog is generated"));

 /// Connect the unrolling prolog code to the original loop.
 /// The unrolling prolog code contains code to execute the
@ -285,15 +290,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
 /// The cloned blocks should be inserted between InsertTop and InsertBot.
 /// If loop structure is cloned InsertTop should be new preheader, InsertBot
 /// new loop exit.
-///
-static void CloneLoopBlocks(Loop *L, Value *NewIter,
-                            const bool CreateRemainderLoop,
-                            const bool UseEpilogRemainder,
-                            BasicBlock *InsertTop, BasicBlock *InsertBot,
-                            BasicBlock *Preheader,
-                            std::vector<BasicBlock *> &NewBlocks,
-                            LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
-                            DominatorTree *DT, LoopInfo *LI) {
+/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+static Loop *
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
+                const bool UseEpilogRemainder, BasicBlock *InsertTop,
+                BasicBlock *InsertBot, BasicBlock *Preheader,
+                std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+                ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
@ -418,7 +421,10 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
    // Set operand 0 to refer to the loop id itself.
    NewLoopID->replaceOperandWith(0, NewLoopID);
    NewLoop->setLoopID(NewLoopID);
+    return NewLoop;
  }
+  else
+    return nullptr;
 }

 /// Insert code in the prolog/epilog code when unrolling a loop with a
@ -465,29 +471,52 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                      LoopInfo *LI, ScalarEvolution *SE,
                                      DominatorTree *DT, bool PreserveLCSSA) {
  // for now, only unroll loops that contain a single exit
-  if (!L->getExitingBlock())
+  if (!UnrollRuntimeMultiExit && !L->getExitingBlock())
    return false;

-  // Make sure the loop is in canonical form, and there is a single
-  // exit block only.
+  // Make sure the loop is in canonical form.
  if (!L->isLoopSimplifyForm())
    return false;

  // Guaranteed by LoopSimplifyForm.
  BasicBlock *Latch = L->getLoopLatch();
+  BasicBlock *Header = L->getHeader();

  BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
-  if (!LatchExit)
+  if (!LatchExit && !UnrollRuntimeMultiExit)
    return false;
-  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
-  // targets of the Latch be the single exit block out of the loop. This needs
-  // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
+  // These are exit blocks other than the target of the latch exiting block.
+  SmallVector<BasicBlock *, 4> OtherExits;
  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-  assert((LatchBR->getSuccessor(0) == LatchExit ||
-          LatchBR->getSuccessor(1) == LatchExit) &&
-         "one of the loop latch successors should be "
-         "the exit block!");
-  (void)LatchBR;
+  unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
+  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+  // targets of the Latch be an exit block out of the loop. This needs
+  // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
+  assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) &&
+         "one of the loop latch successors should be the exit block!");
+  // Support runtime unrolling for multiple exit blocks and multiple exiting
+  // blocks.
+  if (!LatchExit) {
+    assert(UseEpilogRemainder && "Multi exit unrolling is currently supported "
+                                 "unrolling with epilog remainder only!");
+    LatchExit = LatchBR->getSuccessor(ExitIndex);
+    // We rely on LCSSA form being preserved when the exit blocks are
+    // transformed.
+    if (!PreserveLCSSA)
+      return false;
+    // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This
+    // will need updating the logic in connectEpilog.
+    if (!LatchExit->getSinglePredecessor())
+        return false;
+    SmallVector<BasicBlock *, 4> Exits;
+    L->getUniqueExitBlocks(Exits);
+    for (auto *BB : Exits)
+      if (BB != LatchExit)
+        OtherExits.push_back(BB);
+  }
+
+  assert(LatchExit && "Latch Exit should exist!");
+
  // Use Scalar Evolution to compute the trip count. This allows more loops to
  // be unrolled than relying on induction var simplification.
  if (!SE)
@ -512,7 +541,6 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
  if (isa<SCEVCouldNotCompute>(TripCountSC))
    return false;

-  BasicBlock *Header = L->getHeader();
  BasicBlock *PreHeader = L->getLoopPreheader();
  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  const DataLayout &DL = Header->getModule()->getDataLayout();
@ -654,8 +682,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
  // iterations. This function adds the appropriate CFG connections.
  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
-  CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
-                  InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+  Loop *remainderLoop = CloneLoopBlocks(
+      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+      NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);

  // Insert the cloned blocks into the function.
  F->getBasicBlockList().splice(InsertBot->getIterator(),
@ -663,6 +692,42 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                NewBlocks[0]->getIterator(),
                                F->end());

+  // Now the loop blocks are cloned and the other exiting blocks from the
+  // remainder are connected to the original Loop's exit blocks. The remaining
+  // work is to update the phi nodes in the original loop, and take in the
+  // values from the cloned region. Also update the dominator info for
+  // OtherExits, since we have new edges into OtherExits.
+  for (auto *BB : OtherExits) {
+   for (auto &II : *BB) {
+
+     // Given we preserve LCSSA form, we know that the values used outside the
+     // loop will be used through these phi nodes at the exit blocks that are
+     // transformed below.
+     if (!isa<PHINode>(II))
+       break;
+     PHINode *Phi = cast<PHINode>(&II);
+     unsigned oldNumOperands = Phi->getNumIncomingValues();
+     // Add the incoming values from the remainder code to the end of the phi
+     // node.
+     for (unsigned i =0; i < oldNumOperands; i++){
+       Value *newVal = VMap[Phi->getIncomingValue(i)];
+       if (!newVal) {
+         assert(isa<Constant>(Phi->getIncomingValue(i)) &&
+                "VMap should exist for all values except constants!");
+         newVal = Phi->getIncomingValue(i);
+       }
+       Phi->addIncoming(newVal,
+                           cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
+     }
+   }
+   // Update the dominator info because the immediate dominator is no longer the
+   // header of the original Loop. BB has edges both from L and remainder code.
+   // Since the preheader determines which loop is run (L or directly jump to
+   // the remainder code), we set the immediate dominator as the preheader.
+   if (DT)
+     DT->changeImmediateDominator(BB, PreHeader);
+  }
+
  // Loop structure should be the following:
  //  Epilog             Prolog
  //
@ -725,6 +790,19 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

+  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
+  // cannot rely on the LoopUnrollPass to do this because it only does
+  // canonicalization for parent/subloops and not the sibling loops.
+  if (OtherExits.size() > 0) {
+    // Generate dedicated exit blocks for the original loop, to preserve
+    // LoopSimplifyForm.
+    formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+    // Generate dedicated exit blocks for the remainder loop if one exists, to
+    // preserve LoopSimplifyForm.
+    if (remainderLoop)
+      formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+  }
+
  NumRuntimeUnrolled++;
  return true;
 }
--- a/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@ -0,0 +1,279 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S| FileCheck %s
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine
+
+; the second RUN generates an epilog remainder block for all the test
+; cases below (it does not generate a loop).
+
+; test with three exiting and three exit blocks.
+; none of the exit blocks have successors
+define void @test1(i64 %trip, i1 %cond) {
+; CHECK-LABEL: test1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; CHECK:       entry.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK-LABEL:  loop_latch.epil:
+; CHECK-NEXT:     %epil.iter.sub = add i64 %epil.iter, -1
+; CHECK-NEXT:     %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0
+; CHECK-NEXT:     br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil
+; CHECK-LABEL:  loop_latch.7:
+; CHECK-NEXT:     %niter.nsub.7 = add i64 %niter, -8
+; CHECK-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; CHECK-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  br i1 %cond, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+  br i1 false, label %loop_exiting_bb2, label %exit1
+
+loop_exiting_bb2:
+  br i1 false, label %loop_latch, label %exit3
+
+exit3:
+  ret void
+
+loop_latch:
+  %iv_next = add i64 %iv, 1
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+  ret void
+}
+
+
+; test with three exiting and two exit blocks.
+; The non-latch exit block has 2 unique predecessors.
+; There are 2 values passed to the exit blocks that are calculated at every iteration.
+; %sum.02 and %add. Both of these are incoming values for phi from every exiting
+; unrolled block.
+define i32 @test2(i32* nocapture %a, i64 %n) {
+; CHECK-LABEL: test2
+; CHECK-LABEL: for.exit2.loopexit:
+; CHECK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ],
+; CHECK-NEXT:    br label %for.exit2
+; CHECK-LABEL: for.exit2.loopexit2:
+; CHECK-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; CHECK-NEXT:    br label %for.exit2
+; CHECK-LABEL: for.exit2:
+; CHECK-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; CHECK-NEXT:    ret i32 %retval
+; CHECK: %niter.nsub.7 = add i64 %niter, -8
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  br i1 false, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %for.body
+
+for.body:
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.end, label %header
+
+for.end:                                          ; preds = %for.body
+  %sum.0.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+
+for.exit2:
+  %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ]
+  ret i32 %retval
+}
+
+; test with two exiting and three exit blocks.
+; the non-latch exiting block has a switch.
+define void @test3(i64 %trip, i64 %add) {
+; CHECK-LABEL: test3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; CHECK:       entry.new:
+; CHECK-NEXT:    %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK-LABEL:  loop_header:
+; CHECK-NEXT:     %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
+; CHECK-NEXT:     %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; CHECK-LABEL:  loop_exiting_bb1.7:
+; CHECK-NEXT:     switch i64 %sum.next.6, label %loop_latch.7
+; CHECK-LABEL:  loop_latch.7:
+; CHECK-NEXT:     %sum.next.7 = add i64 %sum.next.6, %add
+; CHECK-NEXT:     %niter.nsub.7 = add i64 %niter, -8
+; CHECK-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; CHECK-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ]
+  br i1 undef, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+   switch i64 %sum, label %loop_latch [
+     i64 24, label %exit1
+     i64 42, label %exit3
+   ]
+
+exit3:
+  ret void
+
+loop_latch:
+  %iv_next = add nuw nsw i64 %iv, 1
+  %sum.next = add i64 %sum, %add
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+  ret void
+}
+
+; FIXME: Support multiple exiting blocks to the same latch exit block.
+define i32 @test4(i32* nocapture %a, i64 %n, i1 %cond) {
+; CHECK-LABEL: test4
+; CHECK-NOT: .unr
+; CHECK-NOT: .epil
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  br i1 %cond, label %for.end, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.end, label %header
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %header ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+
+for.exit2:
+  ret i32 42
+}
+
+; two exiting and two exit blocks.
+; the non-latch exiting block has duplicate edges to the non-latch exit block.
+define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
+; CHECK-LABEL: test5
+; CHECK-LABEL:   exit1.loopexit:
+; CHECK-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; CHECK-NEXT:      br label %exit1
+; CHECK-LABEL:   exit1.loopexit2:
+; CHECK-NEXT:      %ivy.epil = add i64 %iv.epil, %add
+; CHECK-NEXT:      br label %exit1
+; CHECK-LABEL:   exit1:
+; CHECK-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ]
+; CHECK-NEXT:      ret i64 %result
+; CHECK-LABEL:   loop_latch.7:
+; CHECK: %niter.nsub.7 = add i64 %niter, -8
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ]
+  br i1 %cond, label %loop_latch, label %loop_exiting
+
+loop_exiting:
+   %ivy = add i64 %iv, %add
+   switch i64 %sum, label %loop_latch [
+     i64 24, label %exit1
+     i64 42, label %exit1
+   ]
+
+loop_latch:
+  %iv_next = add nuw nsw i64 %iv, 1
+  %sum.next = add i64 %sum, %add
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %latchexit
+
+exit1:
+ %result = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ]
+ ret i64 %result
+
+latchexit:
+  ret i64 %sum.next
+}
+
+; test when exit blocks have successors.
+define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
+; CHECK-LABEL: test6
+; CHECK-LABEL:   for.exit2.loopexit:
+; CHECK-NEXT:      %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ],
+; CHECK-NEXT:      br label %for.exit2
+; CHECK-LABEL:   for.exit2.loopexit2:
+; CHECK-NEXT:      %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; CHECK-NEXT:      br label %for.exit2
+; CHECK-LABEL:   for.exit2:
+; CHECK-NEXT:      %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; CHECK-NEXT:      br i1 %cond, label %exit_true, label %exit_false
+; CHECK-LABEL:   latch.7:
+; CHECK:           %niter.nsub.7 = add i64 %niter, -8
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br i1 false, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %latch
+
+latch:
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %load = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %load, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latch_exit, label %header
+
+latch_exit:
+  %sum.0.lcssa = phi i32 [ %add, %latch ]
+  ret i32 %sum.0.lcssa
+
+for.exit2:
+  %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ]
+  %addx = add i32 %retval, %x
+  br i1 %cond, label %exit_true, label %exit_false
+
+exit_true:
+  ret i32 %retval
+
+exit_false:
+  ret i32 %addx
+}