mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
239ce88ecb
Summary: Loops with inequality comparers, such as: // unsigned bound for (unsigned i = 1; i < bound; ++i) {...} have getSmallConstantMaxTripCount report a large maximum static trip count - in this case, 0xffff fffe. However, profiling info may show that the trip count is much smaller, and thus counter-recommend vectorization. This change: - flips loop-vectorize-with-block-frequency on by default. - validates profiled loop frequency data supports vectorization, when static info appears to not counter-recommend it. Absence of profile data means we rely on static data, just as we've done so far. Reviewers: twoh, mkuper, davidxl, tejohnson, Ayal Reviewed By: davidxl Subscribers: bkramer, llvm-commits Differential Revision: https://reviews.llvm.org/D42946 llvm-svn: 324543
212 lines
6.9 KiB
LLVM
212 lines
6.9 KiB
LLVM
; This test verifies that the loop vectorizer will not vectorizes low trip count
|
|
; loops that require runtime checks (Trip count is computed with profile info).
|
|
; REQUIRES: asserts
|
|
; RUN: opt < %s -loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s
|
|
|
|
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
|
|
|
|
@tab = common global [32 x i8] zeroinitializer, align 1
|
|
|
|
define i32 @foo_low_trip_count1(i32 %bound) {
|
|
; Simple loop with low tripcount. Should not be vectorized.
|
|
|
|
; CHECK-LABEL: @foo_low_trip_count1(
|
|
; CHECK-NOT: <{{[0-9]+}} x i8>
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp eq i32 %i.08, %bound
|
|
br i1 %exitcond, label %for.end, label %for.body, !prof !1
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
define i32 @foo_low_trip_count2(i32 %bound) !prof !0 {
|
|
; The loop has a same invocation count with the function, but has a low
|
|
; trip_count per invocation and not worth to vectorize.
|
|
|
|
; CHECK-LABEL: @foo_low_trip_count2(
|
|
; CHECK-NOT: <{{[0-9]+}} x i8>
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp eq i32 %i.08, %bound
|
|
br i1 %exitcond, label %for.end, label %for.body, !prof !1
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
|
|
; The loop has low invocation count compare to the function invocation count,
|
|
; but has a high trip count per invocation. Vectorize it.
|
|
|
|
; CHECK-LABEL: @foo_low_trip_count3(
|
|
; CHECK: vector.body:
|
|
|
|
entry:
|
|
br i1 %cond, label %for.preheader, label %for.end, !prof !2
|
|
|
|
for.preheader:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp eq i32 %i.08, %bound
|
|
br i1 %exitcond, label %for.end, label %for.body, !prof !3
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
|
|
; Simple loop with low tripcount and inequality test for exit.
|
|
; Should not be vectorized.
|
|
|
|
; CHECK-LABEL: @foo_low_trip_count_icmp_sgt(
|
|
; CHECK-NOT: <{{[0-9]+}} x i8>
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp sgt i32 %i.08, %bound
|
|
br i1 %exitcond, label %for.end, label %for.body, !prof !1
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
define i32 @const_low_trip_count() {
|
|
; Simple loop with constant, small trip count and no profiling info.
|
|
|
|
; CHECK-LABEL: @const_low_trip_count
|
|
; CHECK-NOT: <{{[0-9]+}} x i8>
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp slt i32 %i.08, 2
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
define i32 @const_large_trip_count() {
|
|
; Simple loop with constant large trip count and no profiling info.
|
|
|
|
; CHECK-LABEL: @const_large_trip_count
|
|
; CHECK: <{{[0-9]+}} x i8>
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp slt i32 %i.08, 1000
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
define i32 @const_small_trip_count_step() {
|
|
; Simple loop with static, small trip count and no profiling info.
|
|
|
|
; CHECK-LABEL: @const_small_trip_count_step
|
|
; CHECK-NOT: <{{[0-9]+}} x i8>
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 5
|
|
%exitcond = icmp slt i32 %i.08, 10
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
define i32 @const_trip_over_profile() {
|
|
; constant trip count takes precedence over profile data
|
|
|
|
; CHECK-LABEL: @const_trip_over_profile
|
|
; CHECK: <{{[0-9]+}} x i8>
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
|
%0 = load i8, i8* %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp slt i32 %i.08, 1000
|
|
br i1 %exitcond, label %for.body, label %for.end, !prof !1
|
|
|
|
for.end: ; preds = %for.body
|
|
ret i32 0
|
|
}
|
|
|
|
!0 = !{!"function_entry_count", i64 100}
|
|
!1 = !{!"branch_weights", i32 100, i32 0}
|
|
!2 = !{!"branch_weights", i32 10, i32 90}
|
|
!3 = !{!"branch_weights", i32 10, i32 10000}
|