mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
LoopUnroll: respect pragma unroll when AllowRemainder is disabled
Currently when AllowRemainder is disabled, pragma unroll count is not respected even though there is no remainder. This bug causes a loop fully unrolled in many cases even though the user specifies a unroll count. Especially it affects OpenCL/CUDA since in many cases a loop contains convergent instructions and currently AllowRemainder is disabled for such loops. Differential Revision: https://reviews.llvm.org/D43826 llvm-svn: 326585
This commit is contained in:
parent
6b69cafaec
commit
ba35ee534e
@ -729,7 +729,7 @@ static bool computeUnrollCount(
|
||||
UP.Runtime = true;
|
||||
UP.AllowExpensiveTripCount = true;
|
||||
UP.Force = true;
|
||||
if (UP.AllowRemainder &&
|
||||
if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
|
||||
getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
|
||||
return true;
|
||||
}
|
||||
|
@ -80,4 +80,100 @@ exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; This loop contains a convergent instruction. Since the pragma loop unroll
|
||||
; count 2 divides trip count 4. The loop unroll should respect the pragma.
|
||||
; CHECK-LABEL: @pragma_unroll_divisible_trip_count
|
||||
define void @pragma_unroll_divisible_trip_count() {
|
||||
entry:
|
||||
br label %l3, !llvm.loop !1
|
||||
|
||||
l3:
|
||||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK-NOT: call void @f()
|
||||
call void @f() convergent
|
||||
%inc = add nsw i32 %x.0, 1
|
||||
%exitcond = icmp eq i32 %inc, 4
|
||||
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; This loop contains a convergent instruction. Since the pragma loop unroll
|
||||
; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
|
||||
; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple
|
||||
define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
|
||||
entry:
|
||||
%loop_ctl = mul nsw i32 %n, 2
|
||||
br label %l3, !llvm.loop !1
|
||||
|
||||
l3:
|
||||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK-NOT: call void @f()
|
||||
call void @f() convergent
|
||||
%inc = add nsw i32 %x.0, 1
|
||||
%exitcond = icmp eq i32 %inc, %loop_ctl
|
||||
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; This loop contains a convergent instruction. Since the pragma loop unroll
|
||||
; count 2 is unknown to divide runtime trip count, the loop is not unrolled
|
||||
; since remainder is forbidden for unrolling convergent loop.
|
||||
; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
|
||||
; in the future.
|
||||
; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count
|
||||
define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
|
||||
entry:
|
||||
br label %l3, !llvm.loop !1
|
||||
|
||||
l3:
|
||||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
||||
; CHECK: call void @f()
|
||||
; CHECK-NOT: call void @f()
|
||||
call void @f() convergent
|
||||
%inc = add nsw i32 %x.0, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; This loop contains a convergent instruction. Since the pragma loop unroll
|
||||
; count 2 does not divide trip count 5, the loop is not unrolled by 2
|
||||
; since remainder is forbidden for unrolling convergent loop. Instead, the
|
||||
; loop gets fully unrolled.
|
||||
; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
|
||||
; in the future.
|
||||
; CHECK-LABEL: @pragma_unroll_indivisible_trip_count
|
||||
define i32 @pragma_unroll_indivisible_trip_count() {
|
||||
entry:
|
||||
br label %l3, !llvm.loop !1
|
||||
|
||||
l3:
|
||||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK-NOT: call void @f()
|
||||
call void @f() convergent
|
||||
%inc = add nsw i32 %x.0, 1
|
||||
%exitcond = icmp eq i32 %inc, 5
|
||||
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
|
||||
!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
|
||||
; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
|
||||
; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
|
||||
;
|
||||
; Run loop unrolling twice to verify that loop unrolling metadata is properly
|
||||
; removed and further unrolling is disabled after the pass is run once.
|
||||
@ -168,20 +169,24 @@ for.end: ; preds = %for.body, %entry
|
||||
|
||||
; #pragma clang loop unroll_count(4)
|
||||
; Loop has a runtime trip count. Runtime unrolling should occur and loop
|
||||
; should be duplicated (original and 4x unrolled).
|
||||
; should be duplicated (original and 4x unrolled) if remainder is allowed,
|
||||
; otherwise loop should not be unrolled.
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_count4(
|
||||
; CHECK: for.body
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; REM: store
|
||||
; REM: store
|
||||
; REM: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; REM: for.body.epil:
|
||||
; REM: store
|
||||
; NOREM-NOT: for.body.epil:
|
||||
; NOREM-NOT: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; REM: br i1
|
||||
; NOREM-NOT: br i1
|
||||
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
@ -284,24 +289,27 @@ for.end: ; preds = %for.body
|
||||
|
||||
; #pragma clang loop unroll(enable)
|
||||
; Loop has a runtime trip count and should be runtime unrolled and duplicated
|
||||
; (original and 8x).
|
||||
; (original and 8x) if remainder is allowed, otherwise it should not be
|
||||
; unrolled.
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_enable(
|
||||
; CHECK: for.body:
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
; REM: store i32
|
||||
; REM: store i32
|
||||
; REM: store i32
|
||||
; REM: store i32
|
||||
; REM: store i32
|
||||
; REM: store i32
|
||||
; REM: store i32
|
||||
; CHECK-NOT: store i32
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; REM: for.body.epil:
|
||||
; NOREM-NOT: for.body.epil:
|
||||
; REM: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; REM: br i1
|
||||
; NOREM-NOT: br i1
|
||||
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
@ -325,19 +333,22 @@ for.end: ; preds = %for.body, %entry
|
||||
|
||||
; #pragma clang loop unroll_count(3)
|
||||
; Loop has a runtime trip count. Runtime unrolling should occur and loop
|
||||
; should be duplicated (original and 3x unrolled).
|
||||
; should be duplicated (original and 3x unrolled) if remainder is allowed,
|
||||
; otherwise it should not be unrolled.
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_count3(
|
||||
; CHECK: for.body
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; REM: store
|
||||
; REM: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; REM: for.body.epil:
|
||||
; REM: store
|
||||
; NOREM-NOT: for.body.epil:
|
||||
; NOREM-NOT: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; REM: br i1
|
||||
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
|
Loading…
Reference in New Issue
Block a user