1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[LoopUnroll] Clamp unroll count to MaxTripCount

Unrolling with more iterations than MaxTripCount is pointless, as
those iterations can never be executed. As such, we clamp ULO.Count
to MaxTripCount if it is known. This means we no longer need to
consider iterations after MaxTripCount for exit folding, and the
CompletelyUnroll flag becomes independent of ULO.TripCount.

Differential Revision: https://reviews.llvm.org/D103748
This commit is contained in:
Nikita Popov 2021-06-05 10:49:51 +02:00
parent 4f565b8cdf
commit 9b44b72937
5 changed files with 71 additions and 217 deletions

View File

@ -301,11 +301,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (ULO.TripMultiple != 1)
LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
// Effectively "DCE" unrolled iterations that are beyond the tripcount
// and will never be executed.
if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
ULO.Count = ULO.TripCount;
// Don't enter the unroll code if there is nothing to do.
if (ULO.TripCount == 0 && ULO.Count < 2) {
LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
@ -316,17 +311,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
assert(ULO.TripMultiple > 0);
assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
// Are we eliminating the loop control altogether? Note that we can know
// we're eliminating the backedge without knowing exactly which iteration
// of the unrolled body exits.
const bool CompletelyUnroll = ULO.Count == ULO.TripCount;
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
// flag is specified.
bool RuntimeTripCount =
(ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
// All these values should be taken only after peeling because they might have
// changed.
BasicBlock *Preheader = L->getLoopPreheader();
@ -336,6 +320,27 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
L->getExitBlocks(ExitBlocks);
std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
// Effectively "DCE" unrolled iterations that are beyond the max tripcount
// and will never be executed.
if (MaxTripCount && ULO.Count > MaxTripCount)
ULO.Count = MaxTripCount;
// Are we eliminating the loop control altogether? Note that we can know
// we're eliminating the backedge without knowing exactly which iteration
// of the unrolled body exits.
const bool CompletelyUnroll = ULO.Count == MaxTripCount;
const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
// flag is specified.
bool RuntimeTripCount =
!CompletelyUnroll && ULO.TripCount == 0 && ULO.AllowRuntime;
// Go through all exits of L and see if there are any phi-nodes there. We just
// conservatively assume that they're inserted to preserve LCSSA form, which
// means that complete unrolling might break this form. We need to either fix
@ -347,11 +352,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
any_of(ExitBlocks,
[](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
const bool PreserveOnlyFirst = ULO.Count == MaxTripCount && MaxOrZero;
// The current loop unroll pass can unroll loops that have
// (1) single latch; and
// (2a) latch is unconditional; or
@ -728,8 +728,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// Complete (but possibly inexact) unrolling
if (j == 0)
return true;
if (MaxTripCount && j >= MaxTripCount)
return false;
// Warning: ExactTripCount is the trip count of the exiting
// block which ends in ExitingBI, not neccessarily the loop.
if (ExactTripCount && j != ExactTripCount)

View File

@ -3,7 +3,6 @@
declare void @bar()
; TODO: We should unroll by 10, not 20 here
define void @test1() {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
@ -55,46 +54,6 @@ define void @test1() {
; CHECK-NEXT: br i1 false, label [[LATCH_10:%.*]], label [[EXIT]]
; CHECK: latch.10:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_11:%.*]], label [[EXIT]]
; CHECK: latch.11:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_12:%.*]], label [[EXIT]]
; CHECK: latch.12:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_13:%.*]], label [[EXIT]]
; CHECK: latch.13:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_14:%.*]], label [[EXIT]]
; CHECK: latch.14:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_15:%.*]], label [[EXIT]]
; CHECK: latch.15:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_16:%.*]], label [[EXIT]]
; CHECK: latch.16:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_17:%.*]], label [[EXIT]]
; CHECK: latch.17:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_18:%.*]], label [[EXIT]]
; CHECK: latch.18:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_19:%.*]], label [[EXIT]]
; CHECK: latch.19:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_20:%.*]], label [[EXIT]]
; CHECK: latch.20:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[EXIT]]
;
entry:

View File

@ -165,43 +165,35 @@ define void @test3(i32* noalias %A, i1 %cond) {
; CHECK-NEXT: call void @bar(i32 [[TMP0]])
; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
; CHECK: for.header:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]] ]
; CHECK-NEXT: call void @bar(i32 [[TMP1]])
; CHECK-NEXT: call void @bar(i32 [[TMP0]])
; CHECK-NEXT: br i1 [[COND:%.*]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]], label [[FOR_END]]
; CHECK: for.body.for.body_crit_edge:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC]]
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
; CHECK-NEXT: call void @bar(i32 [[DOTPRE]])
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
; CHECK: for.body.1:
; CHECK-NEXT: [[INC_1:%.*]] = add nuw nsw i64 [[INC]], 1
; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]], label [[FOR_END]]
; CHECK: for.body.for.body_crit_edge.1:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_1]]
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
; CHECK-NEXT: [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4
; CHECK-NEXT: call void @bar(i32 [[DOTPRE_1]])
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_2:%.*]], label [[FOR_END]]
; CHECK: for.body.2:
; CHECK-NEXT: [[INC_2:%.*]] = add nuw nsw i64 [[INC_1]], 1
; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]], label [[FOR_END]]
; CHECK: for.body.for.body_crit_edge.2:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_2]]
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4
; CHECK-NEXT: call void @bar(i32 [[DOTPRE_2]])
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_3:%.*]], label [[FOR_END]]
; CHECK: for.body.3:
; CHECK-NEXT: [[INC_3]] = add nuw nsw i64 [[INC_2]], 1
; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]], label [[FOR_END]]
; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]]
; CHECK: for.body.for.body_crit_edge.3:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]]
; CHECK-NEXT: [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4
; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK-NEXT: unreachable
;
entry:
%0 = load i32, i32* %A, align 4
@ -243,7 +235,7 @@ define void @test4(i32 %arg) {
; CHECK: bb1.2:
; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1_3:%.*]]
; CHECK: bb1.3:
; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]]
;
bb:
br label %bb1

View File

@ -16,123 +16,50 @@ define i3 @test(i3* %a, i3 %n) {
; UNROLL-16: for.body.preheader:
; UNROLL-16-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-16: for.body:
; UNROLL-16-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_15:%.*]], [[FOR_BODY_15:%.*]] ]
; UNROLL-16-NEXT: [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD_15:%.*]], [[FOR_BODY_15]] ]
; UNROLL-16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i3, i3* [[A:%.*]], i64 [[INDVARS_IV]]
; UNROLL-16-NEXT: [[TMP0:%.*]] = load i3, i3* [[ARRAYIDX]], align 1
; UNROLL-16-NEXT: [[ADD:%.*]] = add nsw i3 [[TMP0]], [[SUM_02]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-16-NEXT: [[TMP0:%.*]] = load i3, i3* [[A:%.*]], align 1
; UNROLL-16-NEXT: [[EXITCOND:%.*]] = icmp eq i3 1, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_1:%.*]]
; UNROLL-16: for.end.loopexit:
; UNROLL-16-NEXT: [[ADD_LCSSA:%.*]] = phi i3 [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7:%.*]] ], [ [[ADD_8:%.*]], [[FOR_BODY_8:%.*]] ], [ [[ADD_9:%.*]], [[FOR_BODY_9:%.*]] ], [ [[ADD_10:%.*]], [[FOR_BODY_10:%.*]] ], [ [[ADD_11:%.*]], [[FOR_BODY_11:%.*]] ], [ [[ADD_12:%.*]], [[FOR_BODY_12:%.*]] ], [ [[ADD_13:%.*]], [[FOR_BODY_13:%.*]] ], [ [[ADD_14:%.*]], [[FOR_BODY_14:%.*]] ], [ [[ADD_15]], [[FOR_BODY_15]] ]
; UNROLL-16-NEXT: [[ADD_LCSSA:%.*]] = phi i3 [ [[TMP0]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ]
; UNROLL-16-NEXT: br label [[FOR_END]]
; UNROLL-16: for.end:
; UNROLL-16-NEXT: [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
; UNROLL-16-NEXT: ret i3 [[SUM_0_LCSSA]]
; UNROLL-16: for.body.1:
; UNROLL-16-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-16-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 1
; UNROLL-16-NEXT: [[TMP1:%.*]] = load i3, i3* [[ARRAYIDX_1]], align 1
; UNROLL-16-NEXT: [[ADD_1]] = add nsw i3 [[TMP1]], [[ADD]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; UNROLL-16-NEXT: [[ADD_1]] = add nsw i3 [[TMP1]], [[TMP0]]
; UNROLL-16-NEXT: [[EXITCOND_1:%.*]] = icmp eq i3 2, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_1]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_2]]
; UNROLL-16: for.body.2:
; UNROLL-16-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_1]]
; UNROLL-16-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 2
; UNROLL-16-NEXT: [[TMP2:%.*]] = load i3, i3* [[ARRAYIDX_2]], align 1
; UNROLL-16-NEXT: [[ADD_2]] = add nsw i3 [[TMP2]], [[ADD_1]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; UNROLL-16-NEXT: [[EXITCOND_2:%.*]] = icmp eq i3 3, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_2]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_3]]
; UNROLL-16: for.body.3:
; UNROLL-16-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_2]]
; UNROLL-16-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 3
; UNROLL-16-NEXT: [[TMP3:%.*]] = load i3, i3* [[ARRAYIDX_3]], align 1
; UNROLL-16-NEXT: [[ADD_3]] = add nsw i3 [[TMP3]], [[ADD_2]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; UNROLL-16-NEXT: [[EXITCOND_3:%.*]] = icmp eq i3 -4, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_3]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_4]]
; UNROLL-16: for.body.4:
; UNROLL-16-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_3]]
; UNROLL-16-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 4
; UNROLL-16-NEXT: [[TMP4:%.*]] = load i3, i3* [[ARRAYIDX_4]], align 1
; UNROLL-16-NEXT: [[ADD_4]] = add nsw i3 [[TMP4]], [[ADD_3]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
; UNROLL-16-NEXT: [[EXITCOND_4:%.*]] = icmp eq i3 -3, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_4]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_5]]
; UNROLL-16: for.body.5:
; UNROLL-16-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_4]]
; UNROLL-16-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 5
; UNROLL-16-NEXT: [[TMP5:%.*]] = load i3, i3* [[ARRAYIDX_5]], align 1
; UNROLL-16-NEXT: [[ADD_5]] = add nsw i3 [[TMP5]], [[ADD_4]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
; UNROLL-16-NEXT: [[EXITCOND_5:%.*]] = icmp eq i3 -2, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_5]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_6]]
; UNROLL-16: for.body.6:
; UNROLL-16-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_5]]
; UNROLL-16-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 6
; UNROLL-16-NEXT: [[TMP6:%.*]] = load i3, i3* [[ARRAYIDX_6]], align 1
; UNROLL-16-NEXT: [[ADD_6]] = add nsw i3 [[TMP6]], [[ADD_5]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
; UNROLL-16-NEXT: [[EXITCOND_6:%.*]] = icmp eq i3 -1, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_6]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_7]]
; UNROLL-16: for.body.7:
; UNROLL-16-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_6]]
; UNROLL-16-NEXT: [[TMP7:%.*]] = load i3, i3* [[ARRAYIDX_7]], align 1
; UNROLL-16-NEXT: [[ADD_7]] = add nsw i3 [[TMP7]], [[ADD_6]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
; UNROLL-16-NEXT: br i1 false, label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_8]]
; UNROLL-16: for.body.8:
; UNROLL-16-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_7]]
; UNROLL-16-NEXT: [[TMP8:%.*]] = load i3, i3* [[ARRAYIDX_8]], align 1
; UNROLL-16-NEXT: [[ADD_8]] = add nsw i3 [[TMP8]], [[ADD_7]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_8:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_7]], 1
; UNROLL-16-NEXT: [[EXITCOND_8:%.*]] = icmp eq i3 1, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_8]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_9]]
; UNROLL-16: for.body.9:
; UNROLL-16-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_8]]
; UNROLL-16-NEXT: [[TMP9:%.*]] = load i3, i3* [[ARRAYIDX_9]], align 1
; UNROLL-16-NEXT: [[ADD_9]] = add nsw i3 [[TMP9]], [[ADD_8]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_9:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_8]], 1
; UNROLL-16-NEXT: [[EXITCOND_9:%.*]] = icmp eq i3 2, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_9]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_10]]
; UNROLL-16: for.body.10:
; UNROLL-16-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_9]]
; UNROLL-16-NEXT: [[TMP10:%.*]] = load i3, i3* [[ARRAYIDX_10]], align 1
; UNROLL-16-NEXT: [[ADD_10]] = add nsw i3 [[TMP10]], [[ADD_9]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_10:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_9]], 1
; UNROLL-16-NEXT: [[EXITCOND_10:%.*]] = icmp eq i3 3, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_10]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_11]]
; UNROLL-16: for.body.11:
; UNROLL-16-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_10]]
; UNROLL-16-NEXT: [[TMP11:%.*]] = load i3, i3* [[ARRAYIDX_11]], align 1
; UNROLL-16-NEXT: [[ADD_11]] = add nsw i3 [[TMP11]], [[ADD_10]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_11:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_10]], 1
; UNROLL-16-NEXT: [[EXITCOND_11:%.*]] = icmp eq i3 -4, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_11]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_12]]
; UNROLL-16: for.body.12:
; UNROLL-16-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_11]]
; UNROLL-16-NEXT: [[TMP12:%.*]] = load i3, i3* [[ARRAYIDX_12]], align 1
; UNROLL-16-NEXT: [[ADD_12]] = add nsw i3 [[TMP12]], [[ADD_11]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_12:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_11]], 1
; UNROLL-16-NEXT: [[EXITCOND_12:%.*]] = icmp eq i3 -3, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_12]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_13]]
; UNROLL-16: for.body.13:
; UNROLL-16-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_12]]
; UNROLL-16-NEXT: [[TMP13:%.*]] = load i3, i3* [[ARRAYIDX_13]], align 1
; UNROLL-16-NEXT: [[ADD_13]] = add nsw i3 [[TMP13]], [[ADD_12]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_13:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_12]], 1
; UNROLL-16-NEXT: [[EXITCOND_13:%.*]] = icmp eq i3 -2, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_13]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_14]]
; UNROLL-16: for.body.14:
; UNROLL-16-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_13]]
; UNROLL-16-NEXT: [[TMP14:%.*]] = load i3, i3* [[ARRAYIDX_14]], align 1
; UNROLL-16-NEXT: [[ADD_14]] = add nsw i3 [[TMP14]], [[ADD_13]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_14:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_13]], 1
; UNROLL-16-NEXT: [[EXITCOND_14:%.*]] = icmp eq i3 -1, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_14]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_15]]
; UNROLL-16: for.body.15:
; UNROLL-16-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_14]]
; UNROLL-16-NEXT: [[TMP15:%.*]] = load i3, i3* [[ARRAYIDX_15]], align 1
; UNROLL-16-NEXT: [[ADD_15]] = add nsw i3 [[TMP15]], [[ADD_14]]
; UNROLL-16-NEXT: [[INDVARS_IV_NEXT_15]] = add i64 [[INDVARS_IV_NEXT_14]], 1
; UNROLL-16-NEXT: br i1 false, label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; UNROLL-16-NEXT: br label [[FOR_END_LOOPEXIT]]
;
; UNROLL-4-LABEL: @test(
; UNROLL-4-NEXT: entry:

View File

@ -100,67 +100,45 @@ define dso_local void @hoge_5(i8 %arg) {
; UPPER-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 17
; UPPER-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
; UPPER: loop.preheader:
; UPPER-NEXT: [[TMP1:%.*]] = sub i32 16, [[X]]
; UPPER-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
; UPPER-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
; UPPER-NEXT: [[TMP4:%.*]] = urem i32 [[TMP2]], 6
; UPPER-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
; UPPER-NEXT: [[XTRAITER:%.*]] = urem i32 [[TMP5]], 6
; UPPER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
; UPPER-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]]
; UPPER: loop.prol.preheader:
; UPPER-NEXT: br label [[LOOP_PROL:%.*]]
; UPPER: loop.prol:
; UPPER-NEXT: [[IV_PROL:%.*]] = phi i32 [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[X]], [[LOOP_PROL_PREHEADER]] ]
; UPPER-NEXT: [[PTR_PROL:%.*]] = phi i8* [ [[PTR_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[Y]], [[LOOP_PROL_PREHEADER]] ]
; UPPER-NEXT: [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LOOP_PROL]] ]
; UPPER-NEXT: [[IV_NEXT_PROL]] = add nuw i32 [[IV_PROL]], 4
; UPPER-NEXT: [[PTR_NEXT_PROL]] = getelementptr inbounds i8, i8* [[PTR_PROL]], i32 1
; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT_PROL]], align 1
; UPPER-NEXT: [[TMP6:%.*]] = icmp ult i32 [[IV_NEXT_PROL]], 17
; UPPER-NEXT: [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1
; UPPER-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0
; UPPER-NEXT: br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]]
; UPPER: loop.prol.loopexit.unr-lcssa:
; UPPER-NEXT: [[IV_UNR_PH:%.*]] = phi i32 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ]
; UPPER-NEXT: [[PTR_UNR_PH:%.*]] = phi i8* [ [[PTR_NEXT_PROL]], [[LOOP_PROL]] ]
; UPPER-NEXT: br label [[LOOP_PROL_LOOPEXIT]]
; UPPER: loop.prol.loopexit:
; UPPER-NEXT: [[IV_UNR:%.*]] = phi i32 [ [[X]], [[LOOP_PREHEADER]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
; UPPER-NEXT: [[PTR_UNR:%.*]] = phi i8* [ [[Y]], [[LOOP_PREHEADER]] ], [ [[PTR_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
; UPPER-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 5
; UPPER-NEXT: br i1 [[TMP7]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_PREHEADER_NEW:%.*]]
; UPPER: loop.preheader.new:
; UPPER-NEXT: br label [[LOOP:%.*]]
; UPPER: loop:
; UPPER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[IV_NEXT_5:%.*]], [[LOOP]] ]
; UPPER-NEXT: [[PTR:%.*]] = phi i8* [ [[PTR_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[PTR_NEXT_5:%.*]], [[LOOP]] ]
; UPPER-NEXT: [[IV_NEXT:%.*]] = add nuw i32 [[IV]], 4
; UPPER-NEXT: [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT]], align 1
; UPPER-NEXT: [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4
; UPPER-NEXT: [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1
; UPPER-NEXT: [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4
; UPPER-NEXT: [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1
; UPPER-NEXT: [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4
; UPPER-NEXT: [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1
; UPPER-NEXT: [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4
; UPPER-NEXT: [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1
; UPPER-NEXT: [[IV_NEXT_5]] = add nuw i32 [[IV_NEXT_4]], 4
; UPPER-NEXT: [[PTR_NEXT_5]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1
; UPPER-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IV_NEXT_5]], 17
; UPPER-NEXT: br i1 [[TMP8]], label [[LOOP]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]]
; UPPER: exit.loopexit.unr-lcssa:
; UPPER-NEXT: br label [[EXIT_LOOPEXIT]]
; UPPER-NEXT: [[IV_NEXT:%.*]] = add nuw i32 [[X]], 4
; UPPER-NEXT: [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 1
; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
; UPPER-NEXT: [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
; UPPER-NEXT: br i1 [[TMP1]], label [[LOOP_1:%.*]], label [[EXIT_LOOPEXIT:%.*]]
; UPPER: exit.loopexit:
; UPPER-NEXT: br label [[EXIT]]
; UPPER: exit:
; UPPER-NEXT: ret void
; UPPER: loop.1:
; UPPER-NEXT: [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4
; UPPER-NEXT: [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1
; UPPER-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IV_NEXT_1]], 17
; UPPER-NEXT: br i1 [[TMP2]], label [[LOOP_2:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.2:
; UPPER-NEXT: [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4
; UPPER-NEXT: [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1
; UPPER-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IV_NEXT_2]], 17
; UPPER-NEXT: br i1 [[TMP3]], label [[LOOP_3:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.3:
; UPPER-NEXT: [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4
; UPPER-NEXT: [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1
; UPPER-NEXT: [[TMP4:%.*]] = icmp ult i32 [[IV_NEXT_3]], 17
; UPPER-NEXT: br i1 [[TMP4]], label [[LOOP_4:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.4:
; UPPER-NEXT: [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4
; UPPER-NEXT: [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1
; UPPER-NEXT: [[TMP5:%.*]] = icmp ult i32 [[IV_NEXT_4]], 17
; UPPER-NEXT: br i1 [[TMP5]], label [[LOOP_5:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.5:
; UPPER-NEXT: [[PTR_NEXT_5:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1
; UPPER-NEXT: br label [[EXIT_LOOPEXIT]]
;
entry:
%x = load i32, i32* @global, align 4