From dafa7c4e1dd584ce1c4b1ac41ab880d29629820f Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 7 Dec 2018 05:44:45 +0000 Subject: [PATCH] [LoopSimplifyCFG] Do not deal with loops with irreducible CFG inside The current algorithm that collects live/dead/inloop blocks relies on some invariants related to RPO and PO traversals. In particular, the important fact it requires is that the only loop's latch is the first block in PO traversal. It also relies on fact that during RPO we visit all prececessors of a block before we visit this block (backedges ignored). If a loop has irreducible non-loop cycle inside, both these assumptions may break. This patch adds detection for this situation and prohibits the terminator folding for loops with irreducible CFG. We can in theory support this later, for this some algorithmic changes are needed. Besides, irreducible CFG is not a frequent situation and we can just don't bother. Thanks @uabelho for finding this! Differential Revision: https://reviews.llvm.org/D55357 Reviewed By: skatkov llvm-svn: 348567 --- lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 40 +++++++++++++++ .../LoopSimplifyCFG/irreducible_cfg.ll | 51 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index 3de701e7887..c370efa1207 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -85,6 +85,8 @@ private: DominatorTree &DT; MemorySSAUpdater *MSSAU; + // Whether or not the current loop has irreducible CFG. + bool HasIrreducibleCFG = false; // Whether or not the current loop will still exist after terminator constant // folding will be done. In theory, there are two ways how it can happen: // 1. Loop's latch(es) become unreachable from loop header; @@ -143,6 +145,27 @@ private: BlocksInLoopAfterFolding); } + /// Whether or not the current loop has irreducible CFG. + bool hasIrreducibleCFG(LoopBlocksDFS &DFS) { + assert(DFS.isComplete() && "DFS is expected to be finished"); + // Index of a basic block in RPO traversal. + DenseMap RPO; + unsigned Current = 0; + for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) + RPO[*I] = Current++; + + for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) { + BasicBlock *BB = *I; + for (auto *Succ : successors(BB)) + if (L.contains(Succ) && !LI.isLoopHeader(Succ) && RPO[BB] > RPO[Succ]) + // If an edge goes from a block with greater order number into a block + // with lesses number, and it is not a loop backedge, then it can only + // be a part of irreducible non-loop cycle. + return true; + } + return false; + } + /// Fill all information about status of blocks and exits of the current loop /// if constant folding of all branches will be done. void analyze() { @@ -150,6 +173,18 @@ private: DFS.perform(&LI); assert(DFS.isComplete() && "DFS is expected to be finished"); + // TODO: The algorithm below relies on both RPO and Postorder traversals. + // When the loop has only reducible CFG inside, then the invariant "all + // predecessors of X are processed before X in RPO" is preserved. However + // an irreducible loop can break this invariant (e.g. latch does not have to + // be the last block in the traversal in this case, and the algorithm relies + // on this). We can later decide to support such cases by altering the + // algorithms, but so far we just give up analyzing them. + if (hasIrreducibleCFG(DFS)) { + HasIrreducibleCFG = true; + return; + } + // Collect live and dead loop blocks and exits. LiveLoopBlocks.insert(L.getHeader()); for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) { @@ -300,6 +335,11 @@ public: LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName() << ": "); + if (HasIrreducibleCFG) { + LLVM_DEBUG(dbgs() << "Loops with irreducible CFG are not supported!\n"); + return false; + } + // Nothing to constant-fold. if (FoldCandidates.empty()) { LLVM_DEBUG( diff --git a/test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll b/test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll new file mode 100644 index 00000000000..e7c8afc389d --- /dev/null +++ b/test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; REQUIRES: asserts +; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s +; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -passes='require,loop(simplify-cfg)' -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s +; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -enable-mssa-loop-dependency=true -verify-memoryssa -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" + +; This test has irreducible CFG, and RPO may be the following: +; Header, Dead, Irreducible2, Latch, Irreducible3, Irreducible1. +; As result, we will process Irreducible2 before its predecessor Irreducible1. +; The current algorithm gets confused in this case. We may support irreducible +; CFG in the future. +define void @irreducible_cfg(i1 %cond) { +; CHECK-LABEL: @irreducible_cfg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: br i1 false, label [[DEAD:%.*]], label [[IRREDUCIBLE1:%.*]] +; CHECK: dead: +; CHECK-NEXT: br label [[IRREDUCIBLE2:%.*]] +; CHECK: irreducible2: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[LATCH:%.*]], label [[IRREDUCIBLE3:%.*]] +; CHECK: latch: +; CHECK-NEXT: br label [[HEADER]] +; CHECK: irreducible3: +; CHECK-NEXT: br label [[IRREDUCIBLE1]] +; CHECK: irreducible1: +; CHECK-NEXT: br label [[IRREDUCIBLE2]] +; +entry: + br label %header + +header: ; preds = %latch, %entry + br i1 false, label %dead, label %irreducible1 + +dead: ; preds = %header + br label %irreducible2 + +irreducible2: ; preds = %irreducible1, %dead + br i1 %cond, label %latch, label %irreducible3 + +latch: ; preds = %irreducible2 + br label %header + +irreducible3: ; preds = %irreducible2 + br label %irreducible1 + +irreducible1: ; preds = %irreducible3, %header + br label %irreducible2 +}