1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[LoopSimplifyCFG] Do not deal with loops with irreducible CFG inside

The current algorithm that collects live/dead/inloop blocks relies on some invariants
related to RPO and PO traversals. In particular, the important fact it requires is that
the only loop's latch is the first block in PO traversal. It also relies on fact that during
RPO we visit all prececessors of a block before we visit this block (backedges ignored).

If a loop has irreducible non-loop cycle inside, both these assumptions may break.
This patch adds detection for this situation and prohibits the terminator folding
for loops with irreducible CFG.

We can in theory support this later, for this some algorithmic changes are needed.
Besides, irreducible CFG is not a frequent situation and we can just don't bother.

Thanks @uabelho for finding this!

Differential Revision: https://reviews.llvm.org/D55357
Reviewed By: skatkov

llvm-svn: 348567
This commit is contained in:
Max Kazantsev 2018-12-07 05:44:45 +00:00
parent 699d18eb82
commit dafa7c4e1d
2 changed files with 91 additions and 0 deletions

View File

@ -85,6 +85,8 @@ private:
DominatorTree &DT;
MemorySSAUpdater *MSSAU;
// Whether or not the current loop has irreducible CFG.
bool HasIrreducibleCFG = false;
// Whether or not the current loop will still exist after terminator constant
// folding will be done. In theory, there are two ways how it can happen:
// 1. Loop's latch(es) become unreachable from loop header;
@ -143,6 +145,27 @@ private:
BlocksInLoopAfterFolding);
}
/// Whether or not the current loop has irreducible CFG.
bool hasIrreducibleCFG(LoopBlocksDFS &DFS) {
assert(DFS.isComplete() && "DFS is expected to be finished");
// Index of a basic block in RPO traversal.
DenseMap<const BasicBlock *, unsigned> RPO;
unsigned Current = 0;
for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I)
RPO[*I] = Current++;
for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
BasicBlock *BB = *I;
for (auto *Succ : successors(BB))
if (L.contains(Succ) && !LI.isLoopHeader(Succ) && RPO[BB] > RPO[Succ])
// If an edge goes from a block with greater order number into a block
// with lesses number, and it is not a loop backedge, then it can only
// be a part of irreducible non-loop cycle.
return true;
}
return false;
}
/// Fill all information about status of blocks and exits of the current loop
/// if constant folding of all branches will be done.
void analyze() {
@ -150,6 +173,18 @@ private:
DFS.perform(&LI);
assert(DFS.isComplete() && "DFS is expected to be finished");
// TODO: The algorithm below relies on both RPO and Postorder traversals.
// When the loop has only reducible CFG inside, then the invariant "all
// predecessors of X are processed before X in RPO" is preserved. However
// an irreducible loop can break this invariant (e.g. latch does not have to
// be the last block in the traversal in this case, and the algorithm relies
// on this). We can later decide to support such cases by altering the
// algorithms, but so far we just give up analyzing them.
if (hasIrreducibleCFG(DFS)) {
HasIrreducibleCFG = true;
return;
}
// Collect live and dead loop blocks and exits.
LiveLoopBlocks.insert(L.getHeader());
for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
@ -300,6 +335,11 @@ public:
LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName()
<< ": ");
if (HasIrreducibleCFG) {
LLVM_DEBUG(dbgs() << "Loops with irreducible CFG are not supported!\n");
return false;
}
// Nothing to constant-fold.
if (FoldCandidates.empty()) {
LLVM_DEBUG(

View File

@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; REQUIRES: asserts
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -passes='require<domtree>,loop(simplify-cfg)' -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -enable-mssa-loop-dependency=true -verify-memoryssa -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
; This test has irreducible CFG, and RPO may be the following:
; Header, Dead, Irreducible2, Latch, Irreducible3, Irreducible1.
; As result, we will process Irreducible2 before its predecessor Irreducible1.
; The current algorithm gets confused in this case. We may support irreducible
; CFG in the future.
define void @irreducible_cfg(i1 %cond) {
; CHECK-LABEL: @irreducible_cfg(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: br i1 false, label [[DEAD:%.*]], label [[IRREDUCIBLE1:%.*]]
; CHECK: dead:
; CHECK-NEXT: br label [[IRREDUCIBLE2:%.*]]
; CHECK: irreducible2:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[LATCH:%.*]], label [[IRREDUCIBLE3:%.*]]
; CHECK: latch:
; CHECK-NEXT: br label [[HEADER]]
; CHECK: irreducible3:
; CHECK-NEXT: br label [[IRREDUCIBLE1]]
; CHECK: irreducible1:
; CHECK-NEXT: br label [[IRREDUCIBLE2]]
;
entry:
br label %header
header: ; preds = %latch, %entry
br i1 false, label %dead, label %irreducible1
dead: ; preds = %header
br label %irreducible2
irreducible2: ; preds = %irreducible1, %dead
br i1 %cond, label %latch, label %irreducible3
latch: ; preds = %irreducible2
br label %header
irreducible3: ; preds = %irreducible2
br label %irreducible1
irreducible1: ; preds = %irreducible3, %header
br label %irreducible2
}