1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
Anna Thomas 146a9e87c5 [LV][LAA] Vectorize loop invariant values stored into loop invariant address
Summary:
We are overly conservative in loop vectorizer with respect to stores to loop
invariant addresses.
More details in https://bugs.llvm.org/show_bug.cgi?id=38546
This is the first part of the fix where we start with vectorizing loop invariant
values to loop invariant addresses.

This also includes changes to ORE for stores to invariant address.

Reviewers: anemet, Ayal, mkuper, mssimpso

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D50665

llvm-svn: 343028
2018-09-25 20:57:20 +00:00

64 lines
2.3 KiB
LLVM

; RUN: opt -passes='loop-vectorize' -debug -S < %s 2>&1 | FileCheck %s
; REQUIRES: asserts
; This checks we don't crash when the inner loop we're trying to vectorize
; is a SCEV AddRec with respect to an outer loop.
; In this case, the problematic PHI is:
; %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
; Since %inc54 is the IV of the outer loop, and %0 equivalent to it,
; we get the situation described above.
; Code that leads to this situation can look something like:
;
; int a, b[1], c;
; void fn1 ()
; {
; for (; c; c++)
; for (a = 0; a; a++)
; b[c] = 4;
; }
;
; The PHI is an artifact of the register promotion of c.
; Note that we can no longer get the vectorizer to actually see such PHIs,
; because LV now simplifies the loop internally, but the test is still
; useful as a regression test, and in case loop-simplify behavior changes.
@c = external global i32, align 4
@a = external global i32, align 4
@b = external global [1 x i32], align 4
; We can vectorize this loop because we are storing an invariant value into an
; invariant address.
; CHECK: LV: We can vectorize this loop!
; CHECK-LABEL: @test
define void @test() {
entry:
%a.promoted2 = load i32, i32* @a, align 1
%c.promoted = load i32, i32* @c, align 1
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.cond1.for.inc4_crit_edge, %entry
%inc54 = phi i32 [ %inc5, %for.cond1.for.inc4_crit_edge ], [ %c.promoted, %entry ]
%inc.lcssa3 = phi i32 [ %inc.lcssa, %for.cond1.for.inc4_crit_edge ], [ %a.promoted2, %entry ]
br label %for.body3
for.body3: ; preds = %for.body3, %for.cond1.preheader
%inc1 = phi i32 [ %inc.lcssa3, %for.cond1.preheader ], [ %inc, %for.body3 ]
%0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
%idxprom = sext i32 %0 to i64
%arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @b, i64 0, i64 %idxprom
store i32 4, i32* %arrayidx, align 4
%inc = add nsw i32 %inc1, 1
%tobool2 = icmp eq i32 %inc, 0
br i1 %tobool2, label %for.cond1.for.inc4_crit_edge, label %for.body3
for.cond1.for.inc4_crit_edge: ; preds = %for.body3
%inc.lcssa = phi i32 [ %inc, %for.body3 ]
%.lcssa = phi i32 [ %inc54, %for.body3 ]
%inc5 = add nsw i32 %.lcssa, 1
br label %for.cond1.preheader
}