mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-31 07:52:55 +01:00
0e2020dbaf
InstCombine can be uncooperative to vectorization and sink loads into conditional blocks. This prevents vectorization. Undo this optimization if there are unconditional memory accesses to the same addresses in the loop. radar://13815763 llvm-svn: 181860
70 lines
2.3 KiB
LLVM
70 lines
2.3 KiB
LLVM
; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
@A = common global [1024 x float] zeroinitializer, align 16
|
|
@B = common global [1024 x float] zeroinitializer, align 16
|
|
|
|
; Make sure we can vectorize in the presence of hoistable conditional loads.
|
|
; CHECK: hoist_cond_load
|
|
; CHECK: load <2 x float>
|
|
|
|
define void @hoist_cond_load() {
|
|
entry:
|
|
br label %for.body
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
|
|
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
|
|
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
|
|
%0 = load float* %arrayidx2, align 4
|
|
%cmp3 = fcmp oeq float %0, 0.000000e+00
|
|
br i1 %cmp3, label %if.end9, label %if.else
|
|
|
|
if.else:
|
|
%1 = load float* %arrayidx, align 4
|
|
br label %if.end9
|
|
|
|
if.end9:
|
|
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
|
|
store float %tmp.0, float* %arrayidx, align 4
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; However, we can't hoist loads whose address we have not seen unconditionally
|
|
; accessed.
|
|
; CHECK: dont_hoist_cond_load
|
|
; CHECK-NOT: load <2 x float>
|
|
|
|
define void @dont_hoist_cond_load() {
|
|
entry:
|
|
br label %for.body
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
|
|
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
|
|
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
|
|
%0 = load float* %arrayidx2, align 4
|
|
%cmp3 = fcmp oeq float %0, 0.000000e+00
|
|
br i1 %cmp3, label %if.end9, label %if.else
|
|
|
|
if.else:
|
|
%1 = load float* %arrayidx, align 4
|
|
br label %if.end9
|
|
|
|
if.end9:
|
|
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
|
|
store float %tmp.0, float* %arrayidx2, align 4
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|