1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

LoopUnroll: respect pragma unroll when AllowRemainder is disabled

Currently when AllowRemainder is disabled, pragma unroll count is not
respected even though there is no remainder. This bug causes a loop
fully unrolled in many cases even though the user specifies a unroll
count. Especially it affects OpenCL/CUDA since in many cases a loop
contains convergent instructions and currently AllowRemainder is
disabled for such loops.

Differential Revision: https://reviews.llvm.org/D43826

llvm-svn: 326585
This commit is contained in:
Yaxun Liu 2018-03-02 16:22:32 +00:00
parent 6b69cafaec
commit ba35ee534e
3 changed files with 134 additions and 27 deletions

View File

@ -729,7 +729,7 @@ static bool computeUnrollCount(
UP.Runtime = true;
UP.AllowExpensiveTripCount = true;
UP.Force = true;
if (UP.AllowRemainder &&
if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return true;
}

View File

@ -80,4 +80,100 @@ exit:
ret i32 0
}
; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 divides trip count 4. The loop unroll should respect the pragma.
; CHECK-LABEL: @pragma_unroll_divisible_trip_count
define void @pragma_unroll_divisible_trip_count() {
entry:
br label %l3, !llvm.loop !1
l3:
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
; CHECK: call void @f()
; CHECK: call void @f()
; CHECK-NOT: call void @f()
call void @f() convergent
%inc = add nsw i32 %x.0, 1
%exitcond = icmp eq i32 %inc, 4
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
exit:
ret void
}
; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple
define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
entry:
%loop_ctl = mul nsw i32 %n, 2
br label %l3, !llvm.loop !1
l3:
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
; CHECK: call void @f()
; CHECK: call void @f()
; CHECK-NOT: call void @f()
call void @f() convergent
%inc = add nsw i32 %x.0, 1
%exitcond = icmp eq i32 %inc, %loop_ctl
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
exit:
ret i32 0
}
; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 is unknown to divide runtime trip count, the loop is not unrolled
; since remainder is forbidden for unrolling convergent loop.
; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
; in the future.
; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count
define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
entry:
br label %l3, !llvm.loop !1
l3:
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
; CHECK: call void @f()
; CHECK-NOT: call void @f()
call void @f() convergent
%inc = add nsw i32 %x.0, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
exit:
ret i32 0
}
; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 does not divide trip count 5, the loop is not unrolled by 2
; since remainder is forbidden for unrolling convergent loop. Instead, the
; loop gets fully unrolled.
; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
; in the future.
; CHECK-LABEL: @pragma_unroll_indivisible_trip_count
define i32 @pragma_unroll_indivisible_trip_count() {
entry:
br label %l3, !llvm.loop !1
l3:
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
; CHECK: call void @f()
; CHECK: call void @f()
; CHECK: call void @f()
; CHECK: call void @f()
; CHECK: call void @f()
; CHECK-NOT: call void @f()
call void @f() convergent
%inc = add nsw i32 %x.0, 1
%exitcond = icmp eq i32 %inc, 5
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
exit:
ret i32 0
}
!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}

View File

@ -1,5 +1,6 @@
; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
;
; Run loop unrolling twice to verify that loop unrolling metadata is properly
; removed and further unrolling is disabled after the pass is run once.
@ -168,20 +169,24 @@ for.end: ; preds = %for.body, %entry
; #pragma clang loop unroll_count(4)
; Loop has a runtime trip count. Runtime unrolling should occur and loop
; should be duplicated (original and 4x unrolled).
; should be duplicated (original and 4x unrolled) if remainder is allowed,
; otherwise loop should not be unrolled.
;
; CHECK-LABEL: @runtime_loop_with_count4(
; CHECK: for.body
; CHECK: store
; CHECK: store
; CHECK: store
; CHECK: store
; REM: store
; REM: store
; REM: store
; CHECK-NOT: store
; CHECK: br i1
; CHECK: for.body.epil:
; CHECK: store
; REM: for.body.epil:
; REM: store
; NOREM-NOT: for.body.epil:
; NOREM-NOT: store
; CHECK-NOT: store
; CHECK: br i1
; REM: br i1
; NOREM-NOT: br i1
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@ -284,24 +289,27 @@ for.end: ; preds = %for.body
; #pragma clang loop unroll(enable)
; Loop has a runtime trip count and should be runtime unrolled and duplicated
; (original and 8x).
; (original and 8x) if remainder is allowed, otherwise it should not be
; unrolled.
;
; CHECK-LABEL: @runtime_loop_with_enable(
; CHECK: for.body:
; CHECK: store i32
; CHECK: store i32
; CHECK: store i32
; CHECK: store i32
; CHECK: store i32
; CHECK: store i32
; CHECK: store i32
; CHECK: store i32
; REM: store i32
; REM: store i32
; REM: store i32
; REM: store i32
; REM: store i32
; REM: store i32
; REM: store i32
; CHECK-NOT: store i32
; CHECK: br i1
; CHECK: for.body.epil:
; CHECK: store
; REM: for.body.epil:
; NOREM-NOT: for.body.epil:
; REM: store
; CHECK-NOT: store
; CHECK: br i1
; REM: br i1
; NOREM-NOT: br i1
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@ -325,19 +333,22 @@ for.end: ; preds = %for.body, %entry
; #pragma clang loop unroll_count(3)
; Loop has a runtime trip count. Runtime unrolling should occur and loop
; should be duplicated (original and 3x unrolled).
; should be duplicated (original and 3x unrolled) if remainder is allowed,
; otherwise it should not be unrolled.
;
; CHECK-LABEL: @runtime_loop_with_count3(
; CHECK: for.body
; CHECK: store
; CHECK: store
; CHECK: store
; REM: store
; REM: store
; CHECK-NOT: store
; CHECK: br i1
; CHECK: for.body.epil:
; CHECK: store
; REM: for.body.epil:
; REM: store
; NOREM-NOT: for.body.epil:
; NOREM-NOT: store
; CHECK-NOT: store
; CHECK: br i1
; REM: br i1
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0