1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[LoopDeletion] Break backedge of loops when known not taken

The basic idea is that if SCEV can prove the backedge isn't taken, we can go ahead and get rid of the backedge (and thus the loop) while leaving the rest of the control in place. This nicely handles cases with dispatch between multiple exits and internal side effects.

Differential Revision: https://reviews.llvm.org/D93906
This commit is contained in:
Philip Reames 2021-01-04 09:19:29 -08:00
parent b4343abe68
commit e3f8b180b1
7 changed files with 424 additions and 7 deletions

View File

@ -179,6 +179,12 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
LoopInfo *LI, MemorySSA *MSSA = nullptr);
/// Remove the backedge of the specified loop. Handles loop nests and general
/// loop structures subject to the precondition that the loop has a single
/// latch block. Preserves all listed analyses.
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
LoopInfo &LI, MemorySSA *MSSA);
/// Try to promote memory values to scalars by sinking stores out of
/// the loop and moving loads to before the loop. We do this by looping over
/// the stores in the loop, looking for stores to Must pointers which are

View File

@ -26,6 +26,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
#define DEBUG_TYPE "loop-delete"
@ -38,6 +39,14 @@ enum class LoopDeletionResult {
Deleted,
};
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
return LoopDeletionResult::Deleted;
if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
return LoopDeletionResult::Modified;
return LoopDeletionResult::Unmodified;
}
/// Determines if a loop is dead.
///
/// This assumes that we've already checked for unique exit and exiting blocks,
@ -126,6 +135,26 @@ static bool isLoopNeverExecuted(Loop *L) {
return true;
}
/// If we can prove the backedge is untaken, remove it. This destroys the
/// loop, but leaves the (now trivially loop invariant) control flow and
/// side effects (if any) in place.
static LoopDeletionResult
breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
LoopInfo &LI, MemorySSA *MSSA,
OptimizationRemarkEmitter &ORE) {
assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
if (!L->getLoopLatch())
return LoopDeletionResult::Unmodified;
auto *BTC = SE.getBackedgeTakenCount(L);
if (!BTC->isZero())
return LoopDeletionResult::Unmodified;
breakLoopBackedge(L, DT, SE, LI, MSSA);
return LoopDeletionResult::Deleted;
}
/// Remove a loop if it is dead.
///
/// A loop is considered dead if it does not impact the observable behavior of
@ -162,7 +191,6 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
return LoopDeletionResult::Unmodified;
}
BasicBlock *ExitBlock = L->getUniqueExitBlock();
if (ExitBlock && isLoopNeverExecuted(L)) {
@ -240,6 +268,14 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
// If we can prove the backedge isn't taken, just break it and be done. This
// leaves the loop structure in place which means it can handle dispatching
// to the right exit based on whatever loop invariant structure remains.
if (Result != LoopDeletionResult::Deleted)
Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
AR.MSSA, ORE));
if (Result == LoopDeletionResult::Unmodified)
return PreservedAnalyses::all();
@ -299,6 +335,12 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE);
// If we can prove the backedge isn't taken, just break it and be done. This
// leaves the loop structure in place which means it can handle dispatching
// to the right exit based on whatever loop invariant structure remains.
if (Result != LoopDeletionResult::Deleted)
Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE));
if (Result == LoopDeletionResult::Deleted)
LPM.markLoopAsDeleted(*L);

View File

@ -756,6 +756,37 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
}
}
void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
LoopInfo &LI, MemorySSA *MSSA) {
auto *Latch = L->getLoopLatch();
assert(Latch);
auto *Header = L->getHeader();
SE.forgetLoop(L);
// Note: By splitting the backedge, and then explicitly making it unreachable
// we gracefully handle corner cases such as non-bottom tested loops and the
// like. We also have the benefit of being able to reuse existing well tested
// code. It might be worth special casing the common bottom tested case at
// some point to avoid code churn.
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
(void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false,
/*PreserveLCSSA*/true, &DTU, MSSAU.get());
// Erase (and destroy) this loop instance. Handles relinking sub-loops
// and blocks within the loop as needed.
LI.erase(L);
}
/// Checks if \p L has single exit through latch block except possibly
/// "deoptimizing" exits. Returns branch instruction terminating the loop
/// latch if above check is successful, nullptr otherwise.

View File

@ -23,8 +23,8 @@ define dso_local i32 @main() {
; CHECK-NEXT: [[I6:%.*]] = load i32, i32* @a, align 4
; CHECK-NEXT: [[I24:%.*]] = load i32, i32* @b, align 4
; CHECK-NEXT: [[D_PROMOTED9:%.*]] = load i32, i32* @d, align 4
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB13_PREHEADER:%.*]]
; CHECK: bb13.preheader:
; CHECK-NEXT: [[I8_LCSSA10:%.*]] = phi i32 [ [[D_PROMOTED9]], [[BB:%.*]] ], [ [[I8:%.*]], [[BB19_PREHEADER:%.*]] ]
; CHECK-NEXT: [[I8]] = and i32 [[I8_LCSSA10]], [[I6]]
; CHECK-NEXT: [[I21:%.*]] = icmp eq i32 [[I8]], 0
@ -33,7 +33,7 @@ define dso_local i32 @main() {
; CHECK-NEXT: [[I26:%.*]] = urem i32 [[I24]], [[I8]]
; CHECK-NEXT: store i32 [[I26]], i32* @e, align 4
; CHECK-NEXT: [[I30_NOT:%.*]] = icmp eq i32 [[I26]], 0
; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB1]]
; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB13_PREHEADER]]
; CHECK: bb13.preheader.bb27.thread.split_crit_edge:
; CHECK-NEXT: store i32 -1, i32* @f, align 4
; CHECK-NEXT: store i32 0, i32* @d, align 4

View File

@ -89,8 +89,10 @@ define i32 @zero_backedge_count_test(i32 %unknown_init, i32* %unknown_mem) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]]
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LEAVE:%.*]]
; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]], align 4
; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[LEAVE:%.*]]
; CHECK: loop.loop_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: leave:
; CHECK-NEXT: ret i32 [[UNKNOWN_INIT:%.*]]
;

View File

@ -44,7 +44,8 @@ for.body6: ; preds = %for.body6, %for.bod
%conv10 = zext i1 %cmp9 to i32
%and = and i32 %conv10, %g.138
%inc = add i32 %h.039, 1
br i1 undef, label %for.inc11, label %for.body6
%exit = icmp eq i32 %inc, 20000
br i1 %exit, label %for.inc11, label %for.body6
for.inc11: ; preds = %for.body6
%and.lcssa = phi i32 [ %and, %for.body6 ]

View File

@ -0,0 +1,335 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-deletion -S | FileCheck %s
@G = external global i32
define void @test_trivial() {
; CHECK-LABEL: @test_trivial(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
; CHECK: loop.loop_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
store i32 0, i32* @G
br i1 false, label %loop, label %exit
exit:
ret void
}
define void @test_bottom_tested() {
; CHECK-LABEL: @test_bottom_tested(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1
; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
; CHECK: loop.loop_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry], [ %iv.inc, %loop ]
store i32 0, i32* @G
%iv.inc = add i32 %iv, 1
%be_taken = icmp ne i32 %iv.inc, 1
br i1 %be_taken, label %loop, label %exit
exit:
ret void
}
define void @test_early_exit() {
; CHECK-LABEL: @test_early_exit(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1
; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: br label [[LATCH_SPLIT:%.*]]
; CHECK: latch.split:
; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry], [ %iv.inc, %latch ]
store i32 0, i32* @G
%iv.inc = add i32 %iv, 1
%be_taken = icmp ne i32 %iv.inc, 1
br i1 %be_taken, label %latch, label %exit
latch:
br label %loop
exit:
ret void
}
define void @test_multi_exit1() {
; CHECK-LABEL: @test_multi_exit1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1
; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[IV_INC]], 30
; CHECK-NEXT: br i1 [[COND2]], label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]]
; CHECK: latch.loop_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry], [ %iv.inc, %latch ]
store i32 0, i32* @G
%iv.inc = add i32 %iv, 1
%be_taken = icmp ne i32 %iv.inc, 1
br i1 %be_taken, label %latch, label %exit
latch:
store i32 1, i32* @G
%cond2 = icmp ult i32 %iv.inc, 30
br i1 %cond2, label %loop, label %exit
exit:
ret void
}
define void @test_multi_exit2() {
; CHECK-LABEL: @test_multi_exit2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]]
; CHECK: latch.loop_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
store i32 0, i32* @G
br i1 true, label %latch, label %exit
latch:
store i32 1, i32* @G
br i1 false, label %loop, label %exit
exit:
ret void
}
; TODO: SCEV seems not to recognize this as a zero btc loop
define void @test_multi_exit3(i1 %cond1) {
; CHECK-LABEL: @test_multi_exit3(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LATCH]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1
; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1
; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP]], label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry], [ %iv.inc, %latch ]
store i32 0, i32* @G
br i1 %cond1, label %latch, label %exit
latch:
store i32 1, i32* @G
%iv.inc = add i32 %iv, 1
%be_taken = icmp ne i32 %iv.inc, 1
br i1 %be_taken, label %loop, label %exit
exit:
ret void
}
; Subtle - This is either zero btc, or infinite, thus, can't break
; backedge
define void @test_multi_exit4(i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @test_multi_exit4(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
; CHECK-NEXT: br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
store i32 0, i32* @G
br i1 %cond1, label %latch, label %exit
latch:
store i32 1, i32* @G
br i1 %cond2, label %loop, label %exit
exit:
ret void
}
; A simple case with multiple exit blocks
define void @test_multi_exit5() {
; CHECK-LABEL: @test_multi_exit5(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT1:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT2:%.*]]
; CHECK: latch.loop_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: exit1:
; CHECK-NEXT: ret void
; CHECK: exit2:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
store i32 0, i32* @G
br i1 true, label %latch, label %exit1
latch:
store i32 1, i32* @G
br i1 false, label %loop, label %exit2
exit1:
ret void
exit2:
ret void
}
define void @test_live_inner() {
; CHECK-LABEL: @test_live_inner(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[IV_INC:%.*]], [[INNER]] ]
; CHECK-NEXT: store i32 [[IV]], i32* @G, align 4
; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CND:%.*]] = icmp ult i32 [[IV_INC]], 200
; CHECK-NEXT: br i1 [[CND]], label [[INNER]], label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
; CHECK: latch.loop_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
store i32 0, i32* @G
br label %inner
inner:
%iv = phi i32 [0, %loop], [%iv.inc, %inner]
store i32 %iv, i32* @G
%iv.inc = add i32 %iv, 1
%cnd = icmp ult i32 %iv.inc, 200
br i1 %cnd, label %inner, label %latch
latch:
br i1 false, label %loop, label %exit
exit:
ret void
}
define void @test_live_outer() {
; CHECK-LABEL: @test_live_outer(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: store i32 0, i32* @G, align 4
; CHECK-NEXT: br i1 false, label [[INNER_INNER_CRIT_EDGE:%.*]], label [[LATCH]]
; CHECK: inner.inner_crit_edge:
; CHECK-NEXT: unreachable
; CHECK: latch:
; CHECK-NEXT: store i32 [[IV]], i32* @G, align 4
; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CND:%.*]] = icmp ult i32 [[IV_INC]], 200
; CHECK-NEXT: br i1 [[CND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [0, %entry], [%iv.inc, %latch]
br label %inner
inner:
store i32 0, i32* @G
br i1 false, label %inner, label %latch
latch:
store i32 %iv, i32* @G
%iv.inc = add i32 %iv, 1
%cnd = icmp ult i32 %iv.inc, 200
br i1 %cnd, label %loop, label %exit
exit:
ret void
}