From 9b44b72937fd0848b9be74b199350a0334a61e81 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Jun 2021 10:49:51 +0200 Subject: [PATCH] [LoopUnroll] Clamp unroll count to MaxTripCount Unrolling with more iterations than MaxTripCount is pointless, as those iterations can never be executed. As such, we clamp ULO.Count to MaxTripCount if it is known. This means we no longer need to consider iterations after MaxTripCount for exit folding, and the CompletelyUnroll flag becomes independent of ULO.TripCount. Differential Revision: https://reviews.llvm.org/D103748 --- lib/Transforms/Utils/LoopUnroll.cpp | 44 +++++---- test/Transforms/LoopUnroll/multiple-exits.ll | 41 -------- test/Transforms/LoopUnroll/nonlatchcondbr.ll | 22 ++--- test/Transforms/LoopUnroll/runtime-loop5.ll | 93 ++----------------- .../LoopUnroll/runtime-small-upperbound.ll | 88 +++++++----------- 5 files changed, 71 insertions(+), 217 deletions(-) diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index f7590accb31..b3658fbe9e1 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -301,11 +301,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (ULO.TripMultiple != 1) LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n"); - // Effectively "DCE" unrolled iterations that are beyond the tripcount - // and will never be executed. - if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount) - ULO.Count = ULO.TripCount; - // Don't enter the unroll code if there is nothing to do. if (ULO.TripCount == 0 && ULO.Count < 2) { LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); @@ -316,17 +311,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, assert(ULO.TripMultiple > 0); assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0); - // Are we eliminating the loop control altogether? Note that we can know - // we're eliminating the backedge without knowing exactly which iteration - // of the unrolled body exits. - const bool CompletelyUnroll = ULO.Count == ULO.TripCount; - - // We assume a run-time trip count if the compiler cannot - // figure out the loop trip count and the unroll-runtime - // flag is specified. - bool RuntimeTripCount = - (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime); - // All these values should be taken only after peeling because they might have // changed. BasicBlock *Preheader = L->getLoopPreheader(); @@ -336,6 +320,27 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, L->getExitBlocks(ExitBlocks); std::vector OriginalLoopBlocks = L->getBlocks(); + const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); + const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); + + // Effectively "DCE" unrolled iterations that are beyond the max tripcount + // and will never be executed. + if (MaxTripCount && ULO.Count > MaxTripCount) + ULO.Count = MaxTripCount; + + // Are we eliminating the loop control altogether? Note that we can know + // we're eliminating the backedge without knowing exactly which iteration + // of the unrolled body exits. + const bool CompletelyUnroll = ULO.Count == MaxTripCount; + + const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; + + // We assume a run-time trip count if the compiler cannot + // figure out the loop trip count and the unroll-runtime + // flag is specified. + bool RuntimeTripCount = + !CompletelyUnroll && ULO.TripCount == 0 && ULO.AllowRuntime; + // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which // means that complete unrolling might break this form. We need to either fix @@ -347,11 +352,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, any_of(ExitBlocks, [](const BasicBlock *BB) { return isa(BB->begin()); }); - const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); - const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); - - const bool PreserveOnlyFirst = ULO.Count == MaxTripCount && MaxOrZero; - // The current loop unroll pass can unroll loops that have // (1) single latch; and // (2a) latch is unconditional; or @@ -728,8 +728,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // Complete (but possibly inexact) unrolling if (j == 0) return true; - if (MaxTripCount && j >= MaxTripCount) - return false; // Warning: ExactTripCount is the trip count of the exiting // block which ends in ExitingBI, not neccessarily the loop. if (ExactTripCount && j != ExactTripCount) diff --git a/test/Transforms/LoopUnroll/multiple-exits.ll b/test/Transforms/LoopUnroll/multiple-exits.ll index 39dfe32d24b..8a3f51a1fb9 100644 --- a/test/Transforms/LoopUnroll/multiple-exits.ll +++ b/test/Transforms/LoopUnroll/multiple-exits.ll @@ -3,7 +3,6 @@ declare void @bar() -; TODO: We should unroll by 10, not 20 here define void @test1() { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: @@ -55,46 +54,6 @@ define void @test1() { ; CHECK-NEXT: br i1 false, label [[LATCH_10:%.*]], label [[EXIT]] ; CHECK: latch.10: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_11:%.*]], label [[EXIT]] -; CHECK: latch.11: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_12:%.*]], label [[EXIT]] -; CHECK: latch.12: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_13:%.*]], label [[EXIT]] -; CHECK: latch.13: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_14:%.*]], label [[EXIT]] -; CHECK: latch.14: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_15:%.*]], label [[EXIT]] -; CHECK: latch.15: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_16:%.*]], label [[EXIT]] -; CHECK: latch.16: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_17:%.*]], label [[EXIT]] -; CHECK: latch.17: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_18:%.*]], label [[EXIT]] -; CHECK: latch.18: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_19:%.*]], label [[EXIT]] -; CHECK: latch.19: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br i1 false, label [[LATCH_20:%.*]], label [[EXIT]] -; CHECK: latch.20: -; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[EXIT]] ; entry: diff --git a/test/Transforms/LoopUnroll/nonlatchcondbr.ll b/test/Transforms/LoopUnroll/nonlatchcondbr.ll index e8dfa0a1a5b..3076084c533 100644 --- a/test/Transforms/LoopUnroll/nonlatchcondbr.ll +++ b/test/Transforms/LoopUnroll/nonlatchcondbr.ll @@ -165,43 +165,35 @@ define void @test3(i32* noalias %A, i1 %cond) { ; CHECK-NEXT: call void @bar(i32 [[TMP0]]) ; CHECK-NEXT: br label [[FOR_HEADER:%.*]] ; CHECK: for.header: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]] ] -; CHECK-NEXT: call void @bar(i32 [[TMP1]]) +; CHECK-NEXT: call void @bar(i32 [[TMP0]]) ; CHECK-NEXT: br i1 [[COND:%.*]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; CHECK: for.body.1: -; CHECK-NEXT: [[INC_1:%.*]] = add nuw nsw i64 [[INC]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.1: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_1]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_1]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_2:%.*]], label [[FOR_END]] ; CHECK: for.body.2: -; CHECK-NEXT: [[INC_2:%.*]] = add nuw nsw i64 [[INC_1]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.2: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_2]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 ; CHECK-NEXT: [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_2]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_3:%.*]], label [[FOR_END]] ; CHECK: for.body.3: -; CHECK-NEXT: [[INC_3]] = add nuw nsw i64 [[INC_2]], 1 -; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]], label [[FOR_END]] +; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.3: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]] -; CHECK-NEXT: [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4 -; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: unreachable ; entry: %0 = load i32, i32* %A, align 4 @@ -243,7 +235,7 @@ define void @test4(i32 %arg) { ; CHECK: bb1.2: ; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1_3:%.*]] ; CHECK: bb1.3: -; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]] ; bb: br label %bb1 diff --git a/test/Transforms/LoopUnroll/runtime-loop5.ll b/test/Transforms/LoopUnroll/runtime-loop5.ll index a1c8ffedaeb..07fcefc621d 100644 --- a/test/Transforms/LoopUnroll/runtime-loop5.ll +++ b/test/Transforms/LoopUnroll/runtime-loop5.ll @@ -16,123 +16,50 @@ define i3 @test(i3* %a, i3 %n) { ; UNROLL-16: for.body.preheader: ; UNROLL-16-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-16: for.body: -; UNROLL-16-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_15:%.*]], [[FOR_BODY_15:%.*]] ] -; UNROLL-16-NEXT: [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD_15:%.*]], [[FOR_BODY_15]] ] -; UNROLL-16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i3, i3* [[A:%.*]], i64 [[INDVARS_IV]] -; UNROLL-16-NEXT: [[TMP0:%.*]] = load i3, i3* [[ARRAYIDX]], align 1 -; UNROLL-16-NEXT: [[ADD:%.*]] = add nsw i3 [[TMP0]], [[SUM_02]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; UNROLL-16-NEXT: [[TMP0:%.*]] = load i3, i3* [[A:%.*]], align 1 ; UNROLL-16-NEXT: [[EXITCOND:%.*]] = icmp eq i3 1, [[N]] ; UNROLL-16-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_1:%.*]] ; UNROLL-16: for.end.loopexit: -; UNROLL-16-NEXT: [[ADD_LCSSA:%.*]] = phi i3 [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7:%.*]] ], [ [[ADD_8:%.*]], [[FOR_BODY_8:%.*]] ], [ [[ADD_9:%.*]], [[FOR_BODY_9:%.*]] ], [ [[ADD_10:%.*]], [[FOR_BODY_10:%.*]] ], [ [[ADD_11:%.*]], [[FOR_BODY_11:%.*]] ], [ [[ADD_12:%.*]], [[FOR_BODY_12:%.*]] ], [ [[ADD_13:%.*]], [[FOR_BODY_13:%.*]] ], [ [[ADD_14:%.*]], [[FOR_BODY_14:%.*]] ], [ [[ADD_15]], [[FOR_BODY_15]] ] +; UNROLL-16-NEXT: [[ADD_LCSSA:%.*]] = phi i3 [ [[TMP0]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ] ; UNROLL-16-NEXT: br label [[FOR_END]] ; UNROLL-16: for.end: ; UNROLL-16-NEXT: [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] ; UNROLL-16-NEXT: ret i3 [[SUM_0_LCSSA]] ; UNROLL-16: for.body.1: -; UNROLL-16-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT]] +; UNROLL-16-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 1 ; UNROLL-16-NEXT: [[TMP1:%.*]] = load i3, i3* [[ARRAYIDX_1]], align 1 -; UNROLL-16-NEXT: [[ADD_1]] = add nsw i3 [[TMP1]], [[ADD]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 +; UNROLL-16-NEXT: [[ADD_1]] = add nsw i3 [[TMP1]], [[TMP0]] ; UNROLL-16-NEXT: [[EXITCOND_1:%.*]] = icmp eq i3 2, [[N]] ; UNROLL-16-NEXT: br i1 [[EXITCOND_1]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_2]] ; UNROLL-16: for.body.2: -; UNROLL-16-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; UNROLL-16-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 2 ; UNROLL-16-NEXT: [[TMP2:%.*]] = load i3, i3* [[ARRAYIDX_2]], align 1 ; UNROLL-16-NEXT: [[ADD_2]] = add nsw i3 [[TMP2]], [[ADD_1]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 ; UNROLL-16-NEXT: [[EXITCOND_2:%.*]] = icmp eq i3 3, [[N]] ; UNROLL-16-NEXT: br i1 [[EXITCOND_2]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_3]] ; UNROLL-16: for.body.3: -; UNROLL-16-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; UNROLL-16-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 3 ; UNROLL-16-NEXT: [[TMP3:%.*]] = load i3, i3* [[ARRAYIDX_3]], align 1 ; UNROLL-16-NEXT: [[ADD_3]] = add nsw i3 [[TMP3]], [[ADD_2]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 ; UNROLL-16-NEXT: [[EXITCOND_3:%.*]] = icmp eq i3 -4, [[N]] ; UNROLL-16-NEXT: br i1 [[EXITCOND_3]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_4]] ; UNROLL-16: for.body.4: -; UNROLL-16-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; UNROLL-16-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 4 ; UNROLL-16-NEXT: [[TMP4:%.*]] = load i3, i3* [[ARRAYIDX_4]], align 1 ; UNROLL-16-NEXT: [[ADD_4]] = add nsw i3 [[TMP4]], [[ADD_3]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 ; UNROLL-16-NEXT: [[EXITCOND_4:%.*]] = icmp eq i3 -3, [[N]] ; UNROLL-16-NEXT: br i1 [[EXITCOND_4]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_5]] ; UNROLL-16: for.body.5: -; UNROLL-16-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; UNROLL-16-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 5 ; UNROLL-16-NEXT: [[TMP5:%.*]] = load i3, i3* [[ARRAYIDX_5]], align 1 ; UNROLL-16-NEXT: [[ADD_5]] = add nsw i3 [[TMP5]], [[ADD_4]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 ; UNROLL-16-NEXT: [[EXITCOND_5:%.*]] = icmp eq i3 -2, [[N]] ; UNROLL-16-NEXT: br i1 [[EXITCOND_5]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_6]] ; UNROLL-16: for.body.6: -; UNROLL-16-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; UNROLL-16-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 6 ; UNROLL-16-NEXT: [[TMP6:%.*]] = load i3, i3* [[ARRAYIDX_6]], align 1 ; UNROLL-16-NEXT: [[ADD_6]] = add nsw i3 [[TMP6]], [[ADD_5]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; UNROLL-16-NEXT: [[EXITCOND_6:%.*]] = icmp eq i3 -1, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_6]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_7]] -; UNROLL-16: for.body.7: -; UNROLL-16-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_6]] -; UNROLL-16-NEXT: [[TMP7:%.*]] = load i3, i3* [[ARRAYIDX_7]], align 1 -; UNROLL-16-NEXT: [[ADD_7]] = add nsw i3 [[TMP7]], [[ADD_6]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1 -; UNROLL-16-NEXT: br i1 false, label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_8]] -; UNROLL-16: for.body.8: -; UNROLL-16-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_7]] -; UNROLL-16-NEXT: [[TMP8:%.*]] = load i3, i3* [[ARRAYIDX_8]], align 1 -; UNROLL-16-NEXT: [[ADD_8]] = add nsw i3 [[TMP8]], [[ADD_7]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_8:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_7]], 1 -; UNROLL-16-NEXT: [[EXITCOND_8:%.*]] = icmp eq i3 1, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_8]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_9]] -; UNROLL-16: for.body.9: -; UNROLL-16-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_8]] -; UNROLL-16-NEXT: [[TMP9:%.*]] = load i3, i3* [[ARRAYIDX_9]], align 1 -; UNROLL-16-NEXT: [[ADD_9]] = add nsw i3 [[TMP9]], [[ADD_8]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_9:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_8]], 1 -; UNROLL-16-NEXT: [[EXITCOND_9:%.*]] = icmp eq i3 2, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_9]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_10]] -; UNROLL-16: for.body.10: -; UNROLL-16-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_9]] -; UNROLL-16-NEXT: [[TMP10:%.*]] = load i3, i3* [[ARRAYIDX_10]], align 1 -; UNROLL-16-NEXT: [[ADD_10]] = add nsw i3 [[TMP10]], [[ADD_9]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_10:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_9]], 1 -; UNROLL-16-NEXT: [[EXITCOND_10:%.*]] = icmp eq i3 3, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_10]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_11]] -; UNROLL-16: for.body.11: -; UNROLL-16-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_10]] -; UNROLL-16-NEXT: [[TMP11:%.*]] = load i3, i3* [[ARRAYIDX_11]], align 1 -; UNROLL-16-NEXT: [[ADD_11]] = add nsw i3 [[TMP11]], [[ADD_10]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_11:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_10]], 1 -; UNROLL-16-NEXT: [[EXITCOND_11:%.*]] = icmp eq i3 -4, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_11]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_12]] -; UNROLL-16: for.body.12: -; UNROLL-16-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_11]] -; UNROLL-16-NEXT: [[TMP12:%.*]] = load i3, i3* [[ARRAYIDX_12]], align 1 -; UNROLL-16-NEXT: [[ADD_12]] = add nsw i3 [[TMP12]], [[ADD_11]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_12:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_11]], 1 -; UNROLL-16-NEXT: [[EXITCOND_12:%.*]] = icmp eq i3 -3, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_12]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_13]] -; UNROLL-16: for.body.13: -; UNROLL-16-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_12]] -; UNROLL-16-NEXT: [[TMP13:%.*]] = load i3, i3* [[ARRAYIDX_13]], align 1 -; UNROLL-16-NEXT: [[ADD_13]] = add nsw i3 [[TMP13]], [[ADD_12]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_13:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_12]], 1 -; UNROLL-16-NEXT: [[EXITCOND_13:%.*]] = icmp eq i3 -2, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_13]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_14]] -; UNROLL-16: for.body.14: -; UNROLL-16-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_13]] -; UNROLL-16-NEXT: [[TMP14:%.*]] = load i3, i3* [[ARRAYIDX_14]], align 1 -; UNROLL-16-NEXT: [[ADD_14]] = add nsw i3 [[TMP14]], [[ADD_13]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_14:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_13]], 1 -; UNROLL-16-NEXT: [[EXITCOND_14:%.*]] = icmp eq i3 -1, [[N]] -; UNROLL-16-NEXT: br i1 [[EXITCOND_14]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_15]] -; UNROLL-16: for.body.15: -; UNROLL-16-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_14]] -; UNROLL-16-NEXT: [[TMP15:%.*]] = load i3, i3* [[ARRAYIDX_15]], align 1 -; UNROLL-16-NEXT: [[ADD_15]] = add nsw i3 [[TMP15]], [[ADD_14]] -; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_15]] = add i64 [[INDVARS_IV_NEXT_14]], 1 -; UNROLL-16-NEXT: br i1 false, label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; UNROLL-16-NEXT: br label [[FOR_END_LOOPEXIT]] ; ; UNROLL-4-LABEL: @test( ; UNROLL-4-NEXT: entry: diff --git a/test/Transforms/LoopUnroll/runtime-small-upperbound.ll b/test/Transforms/LoopUnroll/runtime-small-upperbound.ll index a16d567a36c..02e86de97e9 100644 --- a/test/Transforms/LoopUnroll/runtime-small-upperbound.ll +++ b/test/Transforms/LoopUnroll/runtime-small-upperbound.ll @@ -100,67 +100,45 @@ define dso_local void @hoge_5(i8 %arg) { ; UPPER-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 17 ; UPPER-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; UPPER: loop.preheader: -; UPPER-NEXT: [[TMP1:%.*]] = sub i32 16, [[X]] -; UPPER-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; UPPER-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 -; UPPER-NEXT: [[TMP4:%.*]] = urem i32 [[TMP2]], 6 -; UPPER-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 -; UPPER-NEXT: [[XTRAITER:%.*]] = urem i32 [[TMP5]], 6 -; UPPER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; UPPER-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]] -; UPPER: loop.prol.preheader: -; UPPER-NEXT: br label [[LOOP_PROL:%.*]] -; UPPER: loop.prol: -; UPPER-NEXT: [[IV_PROL:%.*]] = phi i32 [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[X]], [[LOOP_PROL_PREHEADER]] ] -; UPPER-NEXT: [[PTR_PROL:%.*]] = phi i8* [ [[PTR_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[Y]], [[LOOP_PROL_PREHEADER]] ] -; UPPER-NEXT: [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LOOP_PROL]] ] -; UPPER-NEXT: [[IV_NEXT_PROL]] = add nuw i32 [[IV_PROL]], 4 -; UPPER-NEXT: [[PTR_NEXT_PROL]] = getelementptr inbounds i8, i8* [[PTR_PROL]], i32 1 -; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT_PROL]], align 1 -; UPPER-NEXT: [[TMP6:%.*]] = icmp ult i32 [[IV_NEXT_PROL]], 17 -; UPPER-NEXT: [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1 -; UPPER-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0 -; UPPER-NEXT: br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]] -; UPPER: loop.prol.loopexit.unr-lcssa: -; UPPER-NEXT: [[IV_UNR_PH:%.*]] = phi i32 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ] -; UPPER-NEXT: [[PTR_UNR_PH:%.*]] = phi i8* [ [[PTR_NEXT_PROL]], [[LOOP_PROL]] ] -; UPPER-NEXT: br label [[LOOP_PROL_LOOPEXIT]] -; UPPER: loop.prol.loopexit: -; UPPER-NEXT: [[IV_UNR:%.*]] = phi i32 [ [[X]], [[LOOP_PREHEADER]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ] -; UPPER-NEXT: [[PTR_UNR:%.*]] = phi i8* [ [[Y]], [[LOOP_PREHEADER]] ], [ [[PTR_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ] -; UPPER-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 5 -; UPPER-NEXT: br i1 [[TMP7]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_PREHEADER_NEW:%.*]] -; UPPER: loop.preheader.new: ; UPPER-NEXT: br label [[LOOP:%.*]] ; UPPER: loop: -; UPPER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[IV_NEXT_5:%.*]], [[LOOP]] ] -; UPPER-NEXT: [[PTR:%.*]] = phi i8* [ [[PTR_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[PTR_NEXT_5:%.*]], [[LOOP]] ] -; UPPER-NEXT: [[IV_NEXT:%.*]] = add nuw i32 [[IV]], 4 -; UPPER-NEXT: [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT]], align 1 -; UPPER-NEXT: [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4 -; UPPER-NEXT: [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1 -; UPPER-NEXT: [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4 -; UPPER-NEXT: [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1 -; UPPER-NEXT: [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4 -; UPPER-NEXT: [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1 -; UPPER-NEXT: [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4 -; UPPER-NEXT: [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1 -; UPPER-NEXT: [[IV_NEXT_5]] = add nuw i32 [[IV_NEXT_4]], 4 -; UPPER-NEXT: [[PTR_NEXT_5]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1 -; UPPER-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IV_NEXT_5]], 17 -; UPPER-NEXT: br i1 [[TMP8]], label [[LOOP]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]] -; UPPER: exit.loopexit.unr-lcssa: -; UPPER-NEXT: br label [[EXIT_LOOPEXIT]] +; UPPER-NEXT: [[IV_NEXT:%.*]] = add nuw i32 [[X]], 4 +; UPPER-NEXT: [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 1 +; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1 +; UPPER-NEXT: [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17 +; UPPER-NEXT: br i1 [[TMP1]], label [[LOOP_1:%.*]], label [[EXIT_LOOPEXIT:%.*]] ; UPPER: exit.loopexit: ; UPPER-NEXT: br label [[EXIT]] ; UPPER: exit: ; UPPER-NEXT: ret void +; UPPER: loop.1: +; UPPER-NEXT: [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4 +; UPPER-NEXT: [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1 +; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1 +; UPPER-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IV_NEXT_1]], 17 +; UPPER-NEXT: br i1 [[TMP2]], label [[LOOP_2:%.*]], label [[EXIT_LOOPEXIT]] +; UPPER: loop.2: +; UPPER-NEXT: [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4 +; UPPER-NEXT: [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1 +; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1 +; UPPER-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IV_NEXT_2]], 17 +; UPPER-NEXT: br i1 [[TMP3]], label [[LOOP_3:%.*]], label [[EXIT_LOOPEXIT]] +; UPPER: loop.3: +; UPPER-NEXT: [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4 +; UPPER-NEXT: [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1 +; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1 +; UPPER-NEXT: [[TMP4:%.*]] = icmp ult i32 [[IV_NEXT_3]], 17 +; UPPER-NEXT: br i1 [[TMP4]], label [[LOOP_4:%.*]], label [[EXIT_LOOPEXIT]] +; UPPER: loop.4: +; UPPER-NEXT: [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4 +; UPPER-NEXT: [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1 +; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1 +; UPPER-NEXT: [[TMP5:%.*]] = icmp ult i32 [[IV_NEXT_4]], 17 +; UPPER-NEXT: br i1 [[TMP5]], label [[LOOP_5:%.*]], label [[EXIT_LOOPEXIT]] +; UPPER: loop.5: +; UPPER-NEXT: [[PTR_NEXT_5:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1 +; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1 +; UPPER-NEXT: br label [[EXIT_LOOPEXIT]] ; entry: %x = load i32, i32* @global, align 4