diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 352aff19965..2c4937b6bef 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1316,25 +1316,32 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { FromBB->getTerminator()->getIterator()); } -/// Update BI to jump to NewBB instead of OldBB. Records updates to -/// the dominator tree in DTUpdates, if DT should be preserved. +// Update BI to jump to NewBB instead of OldBB. Records updates to the +// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that +// \p OldBB is exactly once in BI's successor list. static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, - std::vector &DTUpdates) { - assert(llvm::count_if(successors(BI), - [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && - "BI must jump to OldBB at most once."); - for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) { - if (BI->getSuccessor(i) == OldBB) { - BI->setSuccessor(i, NewBB); - - DTUpdates.push_back( - {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); - DTUpdates.push_back( - {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); - break; + std::vector &DTUpdates, + bool MustUpdateOnce = true) { + assert((!MustUpdateOnce || + llvm::count_if(successors(BI), + [OldBB](BasicBlock *BB) { + return BB == OldBB; + }) == 1) && "BI must jump to OldBB exactly once."); + bool Changed = false; + for (Use &Op : BI->operands()) + if (Op == OldBB) { + Op.set(NewBB); + Changed = true; } + + if (Changed) { + DTUpdates.push_back( + {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); + DTUpdates.push_back( + {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); } + assert(Changed && "Expected a successor to be updated"); } // Move Lcssa PHIs to the right place. @@ -1481,12 +1488,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() { if (!InnerLoopHeaderSuccessor) return false; - // Adjust Loop Preheader and headers + // Adjust Loop Preheader and headers. + // The branches in the outer loop predecessor and the outer loop header can + // be unconditional branches or conditional branches with duplicates. Consider + // this when updating the successors. updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, - InnerLoopPreHeader, DTUpdates); - updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates); + InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); + // The outer loop header might or might not branch to the outer latch. + // We are guaranteed to branch to the inner loop preheader. + if (std::find(succ_begin(OuterLoopHeaderBI), succ_end(OuterLoopHeaderBI), + OuterLoopLatch) != succ_end(OuterLoopHeaderBI)) + updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates, + /*MustUpdateOnce=*/false); updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, - InnerLoopHeaderSuccessor, DTUpdates); + InnerLoopHeaderSuccessor, DTUpdates, + /*MustUpdateOnce=*/false); // Adjust reduction PHI's now that the incoming block has changed. InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader, diff --git a/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll b/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll new file mode 100644 index 00000000000..3f178443ee6 --- /dev/null +++ b/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-interchange -S %s | FileCheck %s + + +@global = external dso_local global [1000 x [1000 x i32]], align 16 + +; Test that we support updating conditional branches where both targets are the same +; in the predecessor of the outer loop header. + +define void @foo(i1 %cmp) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[INNER_HEADER_PREHEADER:%.*]], label [[INNER_HEADER_PREHEADER]] +; CHECK: bb1: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[BB1:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER_HEADER_SPLIT1:%.*]] +; CHECK: inner.header.preheader: +; CHECK-NEXT: br label [[INNER_HEADER:%.*]] +; CHECK: inner.header: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_HEADER_SPLIT:%.*]] ], [ 5, [[INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[BB1]] +; CHECK: inner.header.split1: +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]] +; CHECK-NEXT: [[V:%.*]] = mul i32 [[LV]], 100 +; CHECK-NEXT: store i32 [[V]], i32* [[PTR]] +; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[COND1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 1000 +; CHECK-NEXT: br label [[OUTER_LATCH]] +; CHECK: inner.header.split: +; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 1000 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[INNER_HEADER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[COND2]], label [[INNER_HEADER_SPLIT]], label [[OUTER_HEADER]] +; CHECK: bb9: +; CHECK-NEXT: br label [[BB10:%.*]] +; CHECK: bb10: +; CHECK-NEXT: ret void +; +entry: + br i1 %cmp, label %bb1, label %bb1 + +bb1: ; preds = %entry, %entry + br label %outer.header + +outer.header: ; preds = %outer.latch, %bb1 + %outer.iv = phi i64 [ 0, %bb1], [ %outer.iv.next, %outer.latch ] + br label %inner.header + +inner.header: ; preds = %inner.header, %outer.header + %inner.iv = phi i64 [ %inner.iv.next, %inner.header ], [ 5, %outer.header ] + %ptr = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 %inner.iv, i64 %outer.iv + %lv = load i32, i32* %ptr + %v = mul i32 %lv, 100 + store i32 %v, i32* %ptr + %inner.iv.next = add nsw i64 %inner.iv, 1 + %cond1 = icmp eq i64 %inner.iv.next , 1000 + br i1 %cond1, label %outer.latch, label %inner.header + +outer.latch: ; preds = %inner.header + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %cond2 = icmp eq i64 %outer.iv.next, 1000 + br i1 %cond2, label %bb9, label %outer.header + +bb9: ; preds = %outer.latch + br label %bb10 + +bb10: ; preds = %bb9 + ret void +} + + +define void @foo1(i1 %cmp) { +; CHECK-LABEL: @foo1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[CMP]], label [[INNER_HEADER_PREHEADER:%.*]], label [[INNER_HEADER_PREHEADER]] +; CHECK: outer.header.preheader: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: br i1 [[CMP]], label [[INNER_HEADER_SPLIT1:%.*]], label [[INNER_HEADER_SPLIT1]] +; CHECK: inner.header.preheader: +; CHECK-NEXT: br label [[INNER_HEADER:%.*]] +; CHECK: inner.header: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_HEADER_SPLIT:%.*]] ], [ 5, [[INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[OUTER_HEADER_PREHEADER]] +; CHECK: inner.header.split1: +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]] +; CHECK-NEXT: [[V:%.*]] = mul i32 [[LV]], 100 +; CHECK-NEXT: store i32 [[V]], i32* [[PTR]] +; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[COND1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 1000 +; CHECK-NEXT: br label [[OUTER_LATCH]] +; CHECK: inner.header.split: +; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 1000 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[INNER_HEADER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[COND2]], label [[INNER_HEADER_SPLIT]], label [[OUTER_HEADER]] +; CHECK: bb9: +; CHECK-NEXT: br label [[BB10:%.*]] +; CHECK: bb10: +; CHECK-NEXT: ret void +; +entry: + br i1 %cmp, label %bb1, label %bb1 + +bb1: ; preds = %entry, %entry + br i1 %cmp, label %outer.header, label %outer.header + +outer.header: ; preds = %outer.latch, %bb1 + %outer.iv = phi i64 [ 0, %bb1 ], [ 0, %bb1 ], [ %outer.iv.next, %outer.latch ] + br i1 %cmp, label %inner.header, label %inner.header + +inner.header: ; preds = %inner.header, %outer.header + %inner.iv = phi i64 [ %inner.iv.next, %inner.header ], [ 5, %outer.header ], [ 5, %outer.header ] + %ptr = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 %inner.iv, i64 %outer.iv + %lv = load i32, i32* %ptr + %v = mul i32 %lv, 100 + store i32 %v, i32* %ptr + %inner.iv.next = add nsw i64 %inner.iv, 1 + %cond1 = icmp eq i64 %inner.iv.next , 1000 + br i1 %cond1, label %outer.latch, label %inner.header + +outer.latch: ; preds = %inner.header + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %cond2 = icmp eq i64 %outer.iv.next, 1000 + br i1 %cond2, label %bb9, label %outer.header + +bb9: ; preds = %outer.latch + br label %bb10 + +bb10: ; preds = %bb9 + ret void +}