From 5263838307588509b7687096d036db8d1b43d263 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Fri, 12 Aug 2016 22:47:13 +0000 Subject: [PATCH] [LoopVectorize] Detect loops in the innermost loop before creating InnerLoopVectorizer InnerLoopVectorizer shouldn't handle a loop with cycles inside the loop body, even if that cycle isn't a natural loop. Fixes PR28541. Differential Revision: https://reviews.llvm.org/D22952 llvm-svn: 278573 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 93 ++++++++++++++++++++-- test/Transforms/LoopVectorize/pr28541.ll | 71 +++++++++++++++++ 2 files changed, 158 insertions(+), 6 deletions(-) create mode 100644 test/Transforms/LoopVectorize/pr28541.ll diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 8aa826fe7ab..a6c92c9b0d3 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -50,6 +50,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -220,6 +221,81 @@ class LoopVectorizationLegality; class LoopVectorizationCostModel; class LoopVectorizationRequirements; +// A traits type that is intended to be used in graph algorithms. The graph it +// models starts at the loop header, and traverses the BasicBlocks that are in +// the loop body, but not the loop header. Since the loop header is skipped, +// the back edges are excluded. +struct LoopBodyTraits { + using NodeRef = std::pair; + + // This wraps a const Loop * into the iterator, so we know which edges to + // filter out. + class WrappedSuccIterator + : public iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { + using BaseT = iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>; + + const Loop *L; + + public: + WrappedSuccIterator(succ_iterator Begin, const Loop *L) + : BaseT(Begin), L(L) {} + + NodeRef operator*() const { return {L, *I}; } + }; + + struct LoopBodyFilter { + bool operator()(NodeRef N) const { + const Loop *L = N.first; + return N.second != L->getHeader() && L->contains(N.second); + } + }; + + using ChildIteratorType = + filter_iterator; + + static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; } + + static ChildIteratorType child_begin(NodeRef Node) { + return make_filter_range(make_range( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .begin(); + } + + static ChildIteratorType child_end(NodeRef Node) { + return make_filter_range(make_range( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .end(); + } +}; + +/// Returns true if the given loop body has a cycle, excluding the loop +/// itself. +static bool hasCyclesInLoopBody(const Loop &L) { + if (!L.empty()) + return true; + + for (const auto SCC : + make_range(scc_iterator::begin(L), + scc_iterator::end(L))) { + if (SCC.size() > 1) { + DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n"); + DEBUG(L.dump()); + return true; + } + } + return false; +} + /// \brief This modifies LoopAccessReport to initialize message with /// loop-vectorizer-specific part. class VectorizationReport : public LoopAccessReport { @@ -1843,12 +1919,14 @@ private: OptimizationRemarkEmitter &ORE; }; -static void addInnerLoop(Loop &L, SmallVectorImpl &V) { - if (L.empty()) - return V.push_back(&L); - +static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl &V) { + if (L.empty()) { + if (!hasCyclesInLoopBody(L)) + V.push_back(&L); + return; + } for (Loop *InnerL : L) - addInnerLoop(*InnerL, V); + addAcyclicInnerLoop(*InnerL, V); } /// The LoopVectorize Pass. @@ -4549,6 +4627,9 @@ bool LoopVectorizationLegality::canVectorize() { return false; } + // FIXME: The code is currently dead, since the loop gets sent to + // LoopVectorizationLegality is already an innermost loop. + // // We can only vectorize innermost loops. if (!TheLoop->empty()) { emitAnalysis(VectorizationReport() << "loop is not the innermost loop"); @@ -6800,7 +6881,7 @@ bool LoopVectorizePass::runImpl( SmallVector Worklist; for (Loop *L : *LI) - addInnerLoop(*L, Worklist); + addAcyclicInnerLoop(*L, Worklist); LoopsAnalyzed += Worklist.size(); diff --git a/test/Transforms/LoopVectorize/pr28541.ll b/test/Transforms/LoopVectorize/pr28541.ll new file mode 100644 index 00000000000..7bb7f0987fd --- /dev/null +++ b/test/Transforms/LoopVectorize/pr28541.ll @@ -0,0 +1,71 @@ +; RUN: opt -loop-vectorize -pass-remarks=loop-vectorize -S < %s 2>&1 | FileCheck %s + +; FIXME: Check for -pass-remarks-missed and -pass-remarks-analysis output when +; addAcyclicInnerLoop emits analysis. + +; Check that opt does not crash on such input: +; +; a, b, c; +; fn1() { +; while (b--) { +; c = a; +; switch (a & 3) +; case 0: +; do +; case 3: +; case 2: +; case 1: +; ; +; while (--c) +; ; +; } +; } + +@b = common global i32 0, align 4 +@a = common global i32 0, align 4 +@c = common global i32 0, align 4 + +; CHECK-NOT: vectorized loop +; CHECK-LABEL: fn1 + +define i32 @fn1() { +entry: + %tmp2 = load i32, i32* @b, align 4 + %dec3 = add nsw i32 %tmp2, -1 + store i32 %dec3, i32* @b, align 4 + %tobool4 = icmp eq i32 %tmp2, 0 + br i1 %tobool4, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + %tmp1 = load i32, i32* @a, align 4 + %and = and i32 %tmp1, 3 + %switch = icmp eq i32 %and, 0 + br label %while.body + +while.cond: ; preds = %do.cond + %dec = add nsw i32 %dec7, -1 + %tobool = icmp eq i32 %dec7, 0 + br i1 %tobool, label %while.cond.while.end_crit_edge, label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.cond + %dec7 = phi i32 [ %dec3, %while.body.lr.ph ], [ %dec, %while.cond ] + br i1 %switch, label %do.body, label %do.cond + +do.body: ; preds = %do.cond, %while.body + %dec25 = phi i32 [ %dec2, %do.cond ], [ %tmp1, %while.body ] + br label %do.cond + +do.cond: ; preds = %do.body, %while.body + %dec26 = phi i32 [ %dec25, %do.body ], [ %tmp1, %while.body ] + %dec2 = add nsw i32 %dec26, -1 + %tobool3 = icmp eq i32 %dec2, 0 + br i1 %tobool3, label %while.cond, label %do.body + +while.cond.while.end_crit_edge: ; preds = %while.cond + store i32 0, i32* @c, align 4 + store i32 -1, i32* @b, align 4 + br label %while.end + +while.end: ; preds = %while.cond.while.end_crit_edge, %entry + ret i32 undef +}