[LoopSimplifyCFG] Do not deal with loops with irreducible CFG inside

The current algorithm that collects live/dead/inloop blocks relies on some invariants related to RPO and PO traversals. In particular, the important fact it requires is that the only loop's latch is the first block in PO traversal. It also relies on fact that during RPO we visit all prececessors of a block before we visit this block (backedges ignored). If a loop has irreducible non-loop cycle inside, both these assumptions may break. This patch adds detection for this situation and prohibits the terminator folding for loops with irreducible CFG. We can in theory support this later, for this some algorithmic changes are needed. Besides, irreducible CFG is not a frequent situation and we can just don't bother. Thanks @uabelho for finding this! Differential Revision: https://reviews.llvm.org/D55357 Reviewed By: skatkov llvm-svn: 348567
2024-11-26 04:32:44 +01:00 · 2018-12-07 05:44:45 +00:00 · 2018-12-07 05:44:45 +00:00 · dafa7c4e1d
commit dafa7c4e1d
parent 699d18eb82
2 changed files with 91 additions and 0 deletions
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@ -85,6 +85,8 @@ private:
  DominatorTree &DT;
  MemorySSAUpdater *MSSAU;

+  // Whether or not the current loop has irreducible CFG.
+  bool HasIrreducibleCFG = false;
  // Whether or not the current loop will still exist after terminator constant
  // folding will be done. In theory, there are two ways how it can happen:
  // 1. Loop's latch(es) become unreachable from loop header;
@ -143,6 +145,27 @@ private:
                  BlocksInLoopAfterFolding);
  }

+  /// Whether or not the current loop has irreducible CFG.
+  bool hasIrreducibleCFG(LoopBlocksDFS &DFS) {
+    assert(DFS.isComplete() && "DFS is expected to be finished");
+    // Index of a basic block in RPO traversal.
+    DenseMap<const BasicBlock *, unsigned> RPO;
+    unsigned Current = 0;
+    for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I)
+      RPO[*I] = Current++;
+
+    for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      for (auto *Succ : successors(BB))
+        if (L.contains(Succ) && !LI.isLoopHeader(Succ) && RPO[BB] > RPO[Succ])
+          // If an edge goes from a block with greater order number into a block
+          // with lesses number, and it is not a loop backedge, then it can only
+          // be a part of irreducible non-loop cycle.
+          return true;
+    }
+    return false;
+  }
+
  /// Fill all information about status of blocks and exits of the current loop
  /// if constant folding of all branches will be done.
  void analyze() {
@ -150,6 +173,18 @@ private:
    DFS.perform(&LI);
    assert(DFS.isComplete() && "DFS is expected to be finished");

+    // TODO: The algorithm below relies on both RPO and Postorder traversals.
+    // When the loop has only reducible CFG inside, then the invariant "all
+    // predecessors of X are processed before X in RPO" is preserved. However
+    // an irreducible loop can break this invariant (e.g. latch does not have to
+    // be the last block in the traversal in this case, and the algorithm relies
+    // on this). We can later decide to support such cases by altering the
+    // algorithms, but so far we just give up analyzing them.
+    if (hasIrreducibleCFG(DFS)) {
+      HasIrreducibleCFG = true;
+      return;
+    }
+
    // Collect live and dead loop blocks and exits.
    LiveLoopBlocks.insert(L.getHeader());
    for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
@ -300,6 +335,11 @@ public:
    LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName()
                      << ": ");

+    if (HasIrreducibleCFG) {
+      LLVM_DEBUG(dbgs() << "Loops with irreducible CFG are not supported!\n");
+      return false;
+    }
+
    // Nothing to constant-fold.
    if (FoldCandidates.empty()) {
      LLVM_DEBUG(
--- a/test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll
+++ b/test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll
@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; REQUIRES: asserts
+; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s
+; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -passes='require<domtree>,loop(simplify-cfg)' -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s
+; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -enable-mssa-loop-dependency=true -verify-memoryssa -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+
+; This test has irreducible CFG, and RPO may be the following:
+; Header, Dead, Irreducible2, Latch, Irreducible3, Irreducible1.
+; As result, we will process Irreducible2 before its predecessor Irreducible1.
+; The current algorithm gets confused in this case. We may support irreducible
+; CFG in the future.
+define void @irreducible_cfg(i1 %cond) {
+; CHECK-LABEL: @irreducible_cfg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    br i1 false, label [[DEAD:%.*]], label [[IRREDUCIBLE1:%.*]]
+; CHECK:       dead:
+; CHECK-NEXT:    br label [[IRREDUCIBLE2:%.*]]
+; CHECK:       irreducible2:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[LATCH:%.*]], label [[IRREDUCIBLE3:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    br label [[HEADER]]
+; CHECK:       irreducible3:
+; CHECK-NEXT:    br label [[IRREDUCIBLE1]]
+; CHECK:       irreducible1:
+; CHECK-NEXT:    br label [[IRREDUCIBLE2]]
+;
+entry:
+  br label %header
+
+header:                                             ; preds = %latch, %entry
+  br i1 false, label %dead, label %irreducible1
+
+dead:                                          ; preds = %header
+  br label %irreducible2
+
+irreducible2:                                             ; preds = %irreducible1, %dead
+  br i1 %cond, label %latch, label %irreducible3
+
+latch:                                         ; preds = %irreducible2
+  br label %header
+
+irreducible3:                                           ; preds = %irreducible2
+  br label %irreducible1
+
+irreducible1:                                          ; preds = %irreducible3, %header
+  br label %irreducible2
+}