1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[PM] Teach LoopUnroll to update the LPM infrastructure as it unrolls

loops.

We do this by reconstructing the newly added loops after the unroll
completes to avoid threading pass manager details through all the mess
of the unrolling infrastructure.

I've enabled some extra assertions in the LPM to try and catch issues
here and enabled a bunch of unroller tests to try and make sure this is
sane.

Currently, I'm manually running loop-simplify when needed. That should
go away once it is folded into the LPM infrastructure.

Differential Revision: https://reviews.llvm.org/D28848

llvm-svn: 293011
This commit is contained in:
Chandler Carruth 2017-01-25 02:49:01 +00:00
parent 365c1158a8
commit c3aa937b25
19 changed files with 255 additions and 2 deletions

View File

@ -42,6 +42,13 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
break;
}
#ifndef NDEBUG
// Verify the loop structure and LCSSA form before visiting the loop.
L.verifyLoop();
assert(L.isRecursivelyLCSSAForm(AR.DT, AR.LI) &&
"Loops must remain in LCSSA form!");
#endif
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
AM.invalidate(L, PassPA);

View File

@ -114,6 +114,15 @@ static cl::opt<bool>
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
// This option isn't ever intended to be enabled, it serves to allow
// experiments to check the assumptions about when this kind of revisit is
// necessary.
static cl::opt<bool> UnrollRevisitChildLoops(
"unroll-revisit-child-loops", cl::Hidden,
cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
"This shouldn't typically be needed as child loops (or their "
"clones) were already visited."));
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@ -1117,7 +1126,7 @@ Pass *llvm::createSimpleLoopUnrollPass() {
PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
LPMUpdater &Updater) {
const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
@ -1128,6 +1137,15 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
"cached at a higher level");
// Keep track of the previous loop structure so we can identify new loops
// created by unrolling.
Loop *ParentL = L.getParentLoop();
SmallPtrSet<Loop *, 4> OldLoops;
if (ParentL)
OldLoops.insert(ParentL->begin(), ParentL->end());
else
OldLoops.insert(AR.LI.begin(), AR.LI.end());
bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
/*PreserveLCSSA*/ true, ProvidedCount,
ProvidedThreshold, ProvidedAllowPartial,
@ -1135,5 +1153,60 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
if (!Changed)
return PreservedAnalyses::all();
// The parent must not be damaged by unrolling!
#ifndef NDEBUG
if (ParentL)
ParentL->verifyLoop();
#endif
// Unrolling can do several things to introduce new loops into a loop nest:
// - Partial unrolling clones child loops within the current loop.
// - Full unrolling clones child loops within the current loop but then
// removes the current loop making all of the children appear to be new
// sibling loops.
// - Loop peeling can directly introduce new sibling loops by peeling one
// iteration.
//
// When a new loop appears as a sibling loop, either from peeling an
// iteration or fully unrolling, its nesting structure has fundamentally
// changed and we want to revisit it to reflect that.
//
// When unrolling has removed the current loop, we need to tell the
// infrastructure that it is gone.
//
// Finally, we support a debugging/testing mode where we revisit child loops
// as well. These are not expected to require further optimizations as either
// they or the loop they were cloned from have been directly visited already.
// But the debugging mode allows us to check this assumption.
bool IsCurrentLoopValid = false;
SmallVector<Loop *, 4> SibLoops;
if (ParentL)
SibLoops.append(ParentL->begin(), ParentL->end());
else
SibLoops.append(AR.LI.begin(), AR.LI.end());
erase_if(SibLoops, [&](Loop *SibLoop) {
if (SibLoop == &L) {
IsCurrentLoopValid = true;
return true;
}
// Otherwise erase the loop from the list if it was in the old loops.
return OldLoops.count(SibLoop) != 0;
});
Updater.addSiblingLoops(SibLoops);
if (!IsCurrentLoopValid) {
Updater.markLoopAsDeleted(L);
} else {
// We can only walk child loops if the current loop remained valid.
if (UnrollRevisitChildLoops) {
// Walk *all* of the child loops. This is a highly speculative mode
// anyways so look for any simplifications that arose from partial
// unrolling or peeling off of iterations.
SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
Updater.addChildLoops(ChildLoops);
}
}
return getLoopPassPreservedAnalyses();
}

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -loop-unroll -S | FileCheck %s
; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll)' -S | FileCheck %s
; This should not unroll since the address of the loop header is taken.

View File

@ -1,4 +1,5 @@
; RUN: opt -S -loop-unroll < %s | FileCheck %s
; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll)' -S | FileCheck %s
; LLVM should not try to fully unroll this loop.

View File

@ -1,5 +1,6 @@
; Check that we don't crash on corner cases.
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; When examining gep-instructions we shouldn't consider them simplified if the

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define i64 @propagate_loop_phis() {

View File

@ -1,4 +1,5 @@
; RUN: opt -S -loop-unroll < %s | FileCheck %s
; RUN: opt -S -passes='require<opt-remark-emit>,loop(unroll)' < %s | FileCheck %s
; Unroll twice, with first loop exit kept
; CHECK-LABEL: @s32_max1

View File

@ -0,0 +1,150 @@
; This test checks that nested loops are revisited in various scenarios when
; unrolling. Note that if we ever start doing outer loop peeling a test case
; for that should be added here that will look essentially like a hybrid of the
; current two cases.
;
; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
; RUN: -passes='require<opt-remark-emit>,loop(unroll)' \
; RUN: | FileCheck %s
;
; Also run in a special mode that visits children.
; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \
; RUN: -passes='require<opt-remark-emit>,loop(unroll)' \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN
; Basic test is fully unrolled and we revisit the post-unroll new sibling
; loops, including the ones that used to be child loops.
define void @full_unroll(i1* %ptr) {
; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on full_unroll
; CHECK-NOT: LoopUnrollPass
entry:
br label %l0
l0:
%cond.0 = load volatile i1, i1* %ptr
br i1 %cond.0, label %l0.0.ph, label %exit
l0.0.ph:
br label %l0.0
l0.0:
%iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
%iv.next = add i32 %iv, 1
br label %l0.0.0.ph
l0.0.0.ph:
br label %l0.0.0
l0.0.0:
%cond.0.0.0 = load volatile i1, i1* %ptr
br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
; CHECK-NOT: LoopUnrollPass
l0.0.1.ph:
br label %l0.0.1
l0.0.1:
%cond.0.0.1 = load volatile i1, i1* %ptr
br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
; CHECK-NOT: LoopUnrollPass
l0.0.latch:
%cmp = icmp slt i32 %iv.next, 2
br i1 %cmp, label %l0.0, label %l0.latch
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
; CHECK-NOT: LoopUnrollPass
;
; Unrolling occurs, so we visit what were the inner loops twice over. First we
; visit their clones, and then we visit the original loops re-parented.
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.1<header>
; CHECK-NOT: LoopUnrollPass
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.1<header>
; CHECK-NOT: LoopUnrollPass
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1<header>
; CHECK-NOT: LoopUnrollPass
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0<header>
; CHECK-NOT: LoopUnrollPass
l0.latch:
br label %l0
; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0<header>
; CHECK-NOT: LoopUnrollPass
exit:
ret void
}
; Now we test forced runtime partial unrolling with metadata. Here we end up
; duplicating child loops without changing their structure and so they aren't by
; default visited, but will be visited with a special parameter.
define void @partial_unroll(i32 %count, i1* %ptr) {
; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on partial_unroll
; CHECK-NOT: LoopUnrollPass
entry:
br label %l0
l0:
%cond.0 = load volatile i1, i1* %ptr
br i1 %cond.0, label %l0.0.ph, label %exit
l0.0.ph:
br label %l0.0
l0.0:
%iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
%iv.next = add i32 %iv, 1
br label %l0.0.0.ph
l0.0.0.ph:
br label %l0.0.0
l0.0.0:
%cond.0.0.0 = load volatile i1, i1* %ptr
br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
; CHECK-NOT: LoopUnrollPass
l0.0.1.ph:
br label %l0.0.1
l0.0.1:
%cond.0.0.1 = load volatile i1, i1* %ptr
br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
; CHECK-NOT: LoopUnrollPass
l0.0.latch:
%cmp = icmp slt i32 %iv.next, %count
br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
; CHECK-NOT: LoopUnrollPass
;
; Partial unrolling occurs which introduces new child loops but not new sibling
; loops. We only visit the child loops in a special mode, not by default.
; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
; CHECK-CHILDREN-NOT: LoopUnrollPass
; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
; CHECK-CHILDREN-NOT: LoopUnrollPass
; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0.1<header>
; CHECK-CHILDREN-NOT: LoopUnrollPass
; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1.1<header>
; CHECK-CHILDREN-NOT: LoopUnrollPass
;
; When we revisit children, we also revisit the current loop.
; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0<header>
; CHECK-CHILDREN-NOT: LoopUnrollPass
l0.latch:
br label %l0
; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0<header>
; CHECK-NOT: LoopUnrollPass
exit:
ret void
}
!1 = !{!1, !2}
!2 = !{!"llvm.loop.unroll.count", i32 2}

View File

@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Tests for unrolling loops with run-time trip counts

View File

@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; This tests that setting the unroll count works

View File

@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Choose a smaller, power-of-two, unroll count if the loop is too large.
; This test makes sure we're not unrolling 'odd' counts

View File

@ -1,5 +1,6 @@
; REQUIRES: asserts
; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
; RUN: opt < %s -disable-output -stats -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
; Test that nested loops can be unrolled. We need to increase threshold to do it

View File

@ -1,6 +1,9 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
; Given that the trip-count of this loop is a 3-bit value, we cannot
; safely unroll it with a count of anything more than 8.

View File

@ -1,5 +1,5 @@
; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s
; RUN: opt < %s -S -passes='function(require<scalar-evolution>,require<targetir>,require<opt-remark-emit>,loop(unroll),verify<loops>)' | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll),verify<loops>' | FileCheck %s
;
; Unit tests for LoopInfo::markAsRemoved.

View File

@ -1,4 +1,5 @@
; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(unroll),require<block-freq>' | FileCheck %s
; Crasher from PR20987.
; CHECK: define void @update_loop_info_in_subloops