mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[LV] Run loop-simplify and LCSSA explicitly instead of "requiring" them
This changes the vectorizer to explicitly use the loopsimplify and lcssa utils, instead of "requiring" the transformations as if they were analyses. This is not NFC, since it changes the LCSSA behavior - we no longer run LCSSA for all loops, but rather only for the loops we expect to modify. Differential Revision: https://reviews.llvm.org/D28868 llvm-svn: 292456
This commit is contained in:
parent
a2f54981f9
commit
8718b01fcb
@ -92,6 +92,7 @@
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Transforms/Utils/LoopSimplify.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include "llvm/Transforms/Utils/LoopVersioning.h"
|
||||
#include "llvm/Transforms/Vectorize.h"
|
||||
@ -2134,8 +2135,6 @@ struct LoopVectorize : public FunctionPass {
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
AU.addRequiredID(LoopSimplifyID);
|
||||
AU.addRequiredID(LCSSAID);
|
||||
AU.addRequired<BlockFrequencyInfoWrapperPass>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
@ -7169,9 +7168,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
||||
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
|
||||
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
||||
@ -7543,6 +7540,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||
DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
|
||||
}
|
||||
|
||||
formLCSSARecursively(*L, *DT, LI, SE);
|
||||
|
||||
using namespace ore;
|
||||
if (!VectorizeLoop) {
|
||||
assert(IC > 1 && "interleave count should not be 1 or 0");
|
||||
@ -7618,6 +7617,16 @@ bool LoopVectorizePass::runImpl(
|
||||
if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
// The vectorizer requires loops to be in simplified form.
|
||||
// Since simplification may add new inner loops, it has to run before the
|
||||
// legality and profitability checks. This means running the loop vectorizer
|
||||
// will simplify all loops, regardless of whether anything end up being
|
||||
// vectorized.
|
||||
for (auto &L : *LI)
|
||||
Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
|
||||
|
||||
// Build up a worklist of inner-loops to vectorize. This is necessary as
|
||||
// the act of vectorizing or partially unrolling a loop creates new loops
|
||||
// and can invalidate iterators across the loops.
|
||||
@ -7629,7 +7638,6 @@ bool LoopVectorizePass::runImpl(
|
||||
LoopsAnalyzed += Worklist.size();
|
||||
|
||||
// Now walk the identified inner loops.
|
||||
bool Changed = false;
|
||||
while (!Worklist.empty())
|
||||
Changed |= processLoop(Worklist.pop_back_val());
|
||||
|
||||
|
54
test/Transforms/LoopVectorize/partial-lcssa.ll
Normal file
54
test/Transforms/LoopVectorize/partial-lcssa.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
|
||||
; We vectorize the inner loop, so we have to put it in LCSSA form.
|
||||
; However, there's no reason to touch the outer loop.
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-LABEL: for.end.inner.loopexit:
|
||||
; CHECK: %[[LCSSAPHI:.*]] = phi i64 [ %indvars.iv, %for.body.inner ], [ %{{.*}}, %middle.block ]
|
||||
; CHECK: store i64 %[[LCSSAPHI]], i64* %O1, align 4
|
||||
; CHECK-LABEL: for.end.outer.loopexit
|
||||
; CHECK: store i64 %indvars.outer, i64* %O2, align 4
|
||||
|
||||
|
||||
define i64 @foo(i32* nocapture %A, i32* nocapture %B, i64 %n, i64 %m, i64* %O1, i64* %O2) {
|
||||
entry:
|
||||
%cmp = icmp sgt i64 %n, 0
|
||||
br i1 %cmp, label %for.body.outer.preheader, label %for.end.outer
|
||||
|
||||
for.body.outer.preheader: ; preds = %entry
|
||||
br label %for.body.outer
|
||||
|
||||
for.body.outer: ; preds = %for.body.outer.preheader, %for.end.inner
|
||||
%indvars.outer = phi i64 [ %indvars.outer.next, %for.end.inner ], [ 0, %for.body.outer.preheader ]
|
||||
%cmp2 = icmp sgt i64 %m, 0
|
||||
br i1 %cmp2, label %for.body.inner.preheader, label %for.end.inner
|
||||
|
||||
for.body.inner.preheader: ; preds = %for.body.outer
|
||||
br label %for.body.inner
|
||||
|
||||
for.body.inner: ; preds = %for.body.inner.preheader, %for.body.inner
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body.inner ], [ 0, %for.body.inner.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%v = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
|
||||
store i32 %v, i32* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv, %n
|
||||
br i1 %exitcond, label %for.end.inner.loopexit, label %for.body.inner
|
||||
|
||||
for.end.inner.loopexit: ; preds = %for.body.inner
|
||||
store i64 %indvars.iv, i64 *%O1, align 4
|
||||
br label %for.end.inner
|
||||
|
||||
for.end.inner: ; preds = %for.end.inner.loopexit, %for.body.outer
|
||||
%indvars.outer.next = add i64 %indvars.outer, 1
|
||||
%exitcond.outer = icmp eq i64 %indvars.outer, %m
|
||||
br i1 %exitcond.outer, label %for.end.outer.loopexit, label %for.body.outer
|
||||
|
||||
for.end.outer.loopexit: ; preds = %for.end.inner
|
||||
store i64 %indvars.outer, i64 *%O2, align 4
|
||||
br label %for.end.outer
|
||||
|
||||
for.end.outer: ; preds = %for.end.outer.loopexit, %entry
|
||||
ret i64 undef
|
||||
}
|
@ -9,13 +9,6 @@
|
||||
; Since %inc54 is the IV of the outer loop, and %0 equivalent to it,
|
||||
; we get the situation described above.
|
||||
|
||||
; This test uses the new PM, because with the old PM, running loop-vectorize
|
||||
; would explicitly run loop-simplify. Even though this loop is already in
|
||||
; simplified form, loop-simplify would still clean up the phi.
|
||||
; The reason this matters is that in a real optimizer pipeline, LICM can create
|
||||
; such PHIs, and since it preserves loop simplified form, the cleanup has
|
||||
; no chance to run.
|
||||
|
||||
; Code that leads to this situation can look something like:
|
||||
;
|
||||
; int a, b[1], c;
|
||||
@ -28,11 +21,14 @@
|
||||
;
|
||||
; The PHI is an artifact of the register promotion of c.
|
||||
|
||||
; Note that we can no longer get the vectorizer to actually see such PHIs,
|
||||
; because LV now simplifies the loop internally, but the test is still
|
||||
; useful as a regression test, and in case loop-simplify behavior changes.
|
||||
|
||||
@c = external global i32, align 4
|
||||
@a = external global i32, align 4
|
||||
@b = external global [1 x i32], align 4
|
||||
|
||||
; CHECK: LV: PHI is a recurrence with respect to an outer loop.
|
||||
; CHECK: LV: Not vectorizing: Cannot prove legality.
|
||||
; CHECK-LABEL: @test
|
||||
define void @test() {
|
||||
|
Loading…
Reference in New Issue
Block a user