mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[LoopUnroll] Respect the convergent attribute.
Summary: Specifically, when we perform runtime loop unrolling of a loop that contains a convergent op, we can only unroll k times, where k divides the loop trip multiple. Without this change, we'll happily unroll e.g. the following loop for (int i = 0; i < N; ++i) { if (i == 0) convergent_op(); foo(); } into int i = 0; if (N % 2 == 1) { convergent_op(); foo(); ++i; } for (; i < N - 1; i += 2) { if (i == 0) convergent_op(); foo(); foo(); }. This is unsafe, because we've just added a control-flow dependency to the convergent op in the prelude. In general, runtime unrolling loops that contain convergent ops is safe only if we don't have emit a prelude, which occurs when the unroll count divides the trip multiple. Reviewers: resistor Subscribers: llvm-commits, mzolotukhin Differential Revision: http://reviews.llvm.org/D17526 llvm-svn: 263509
This commit is contained in:
parent
ca1eed7aa6
commit
19453c8511
@ -362,7 +362,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
|
||||
|
||||
/// ApproximateLoopSize - Approximate the size of the loop.
|
||||
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
|
||||
bool &NotDuplicatable,
|
||||
bool &NotDuplicatable, bool &Convergent,
|
||||
const TargetTransformInfo &TTI,
|
||||
AssumptionCache *AC) {
|
||||
SmallPtrSet<const Value *, 32> EphValues;
|
||||
@ -373,6 +373,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
|
||||
Metrics.analyzeBasicBlock(BB, TTI, EphValues);
|
||||
NumCalls = Metrics.NumInlineCandidates;
|
||||
NotDuplicatable = Metrics.notDuplicatable;
|
||||
Convergent = Metrics.convergent;
|
||||
|
||||
unsigned LoopSize = Metrics.NumInsts;
|
||||
|
||||
@ -568,8 +569,9 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
||||
|
||||
unsigned NumInlineCandidates;
|
||||
bool NotDuplicatable;
|
||||
unsigned LoopSize =
|
||||
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, TTI, &AC);
|
||||
bool Convergent;
|
||||
unsigned LoopSize = ApproximateLoopSize(
|
||||
L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
|
||||
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
|
||||
|
||||
// When computing the unrolled size, note that the conditional branch on the
|
||||
@ -623,6 +625,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
||||
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
|
||||
AllowRuntime = false;
|
||||
}
|
||||
bool DecreasedCountDueToConvergence = false;
|
||||
if (Unrolling == Partial) {
|
||||
bool AllowPartial = PragmaEnableUnroll || UP.Partial;
|
||||
if (!AllowPartial && !CountSetExplicitly) {
|
||||
@ -643,14 +646,40 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
||||
<< "-unroll-runtime not given\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reduce unroll count to be the largest power-of-two factor of
|
||||
// the original count which satisfies the threshold limit.
|
||||
while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
|
||||
Count >>= 1;
|
||||
UnrolledSize = (LoopSize-2) * Count + 2;
|
||||
}
|
||||
|
||||
if (Count > UP.MaxCount)
|
||||
Count = UP.MaxCount;
|
||||
|
||||
// If the loop contains a convergent operation, the prelude we'd add
|
||||
// to do the first few instructions before we hit the unrolled loop
|
||||
// is unsafe -- it adds a control-flow dependency to the convergent
|
||||
// operation. Therefore Count must divide TripMultiple.
|
||||
//
|
||||
// TODO: This is quite conservative. In practice, convergent_op()
|
||||
// is likely to be called unconditionally in the loop. In this
|
||||
// case, the program would be ill-formed (on most architectures)
|
||||
// unless n were the same on all threads in a thread group.
|
||||
// Assuming n is the same on all threads, any kind of unrolling is
|
||||
// safe. But currently llvm's notion of convergence isn't powerful
|
||||
// enough to express this.
|
||||
unsigned OrigCount = Count;
|
||||
while (Convergent && Count != 0 && TripMultiple % Count != 0) {
|
||||
DecreasedCountDueToConvergence = true;
|
||||
Count >>= 1;
|
||||
}
|
||||
if (OrigCount > Count) {
|
||||
DEBUG(dbgs() << " loop contains a convergent instruction, so unroll "
|
||||
"count must divide the trip multiple, "
|
||||
<< TripMultiple << ". Reducing unroll count from "
|
||||
<< OrigCount << " to " << Count << ".\n");
|
||||
}
|
||||
DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
|
||||
}
|
||||
|
||||
@ -665,7 +694,16 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
||||
DebugLoc LoopLoc = L->getStartLoc();
|
||||
Function *F = Header->getParent();
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
if ((PragmaCount > 0) && Count != OriginalCount) {
|
||||
if (PragmaCount > 0 && DecreasedCountDueToConvergence) {
|
||||
emitOptimizationRemarkMissed(
|
||||
Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
Twine("Unable to unroll loop the number of times directed by "
|
||||
"unroll_count pragma because the loop contains a convergent "
|
||||
"instruction, and so must have an unroll count that divides "
|
||||
"the loop trip multiple of ") +
|
||||
Twine(TripMultiple) + ". Unrolling instead " + Twine(Count) +
|
||||
" time(s).");
|
||||
} else if ((PragmaCount > 0) && Count != OriginalCount) {
|
||||
emitOptimizationRemarkMissed(
|
||||
Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
"Unable to unroll loop the number of times directed by "
|
||||
|
@ -273,7 +273,23 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
||||
// flag is specified.
|
||||
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
|
||||
|
||||
if (RuntimeTripCount &&
|
||||
// Loops containing convergent instructions must have a count that divides
|
||||
// their TripMultiple.
|
||||
DEBUG(
|
||||
bool HasConvergent = false;
|
||||
for (auto &BB : L->blocks())
|
||||
for (auto &I : *BB)
|
||||
if (auto CS = CallSite(&I))
|
||||
HasConvergent |= CS.isConvergent();
|
||||
assert(
|
||||
!HasConvergent || TripMultiple % Count == 0 &&
|
||||
"Unroll count must divide trip multiple if loop contains a convergent "
|
||||
"operation.");
|
||||
);
|
||||
// Don't output the runtime loop prolog if Count is a multiple of
|
||||
// TripMultiple. Such a prolog is never needed, and is unsafe if the loop
|
||||
// contains a convergent instruction.
|
||||
if (RuntimeTripCount && TripMultiple % Count != 0 &&
|
||||
!UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
|
||||
PreserveLCSSA))
|
||||
return false;
|
||||
|
83
test/Transforms/LoopUnroll/convergent.ll
Normal file
83
test/Transforms/LoopUnroll/convergent.ll
Normal file
@ -0,0 +1,83 @@
|
||||
; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
|
||||
|
||||
declare void @f() convergent
|
||||
|
||||
; Although this loop contains a convergent instruction, it should be
|
||||
; fully unrolled.
|
||||
;
|
||||
; CHECK-LABEL: @full_unroll(
|
||||
define i32 @full_unroll() {
|
||||
entry:
|
||||
br label %l3
|
||||
|
||||
l3:
|
||||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK-NOT: call void @f()
|
||||
call void @f() ;convergent
|
||||
%inc = add nsw i32 %x.0, 1
|
||||
%exitcond = icmp eq i32 %inc, 3
|
||||
br i1 %exitcond, label %exit, label %l3
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; This loop contains a convergent instruction, but it should be partially
|
||||
; unrolled. The unroll count is the largest power of 2 that divides the
|
||||
; multiple -- 4, in this case.
|
||||
;
|
||||
; CHECK-LABEL: @runtime_unroll(
|
||||
define i32 @runtime_unroll(i32 %n) {
|
||||
entry:
|
||||
%loop_ctl = mul nsw i32 %n, 12
|
||||
br label %l3
|
||||
|
||||
l3:
|
||||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK-NOT: call void @f()
|
||||
call void @f() convergent
|
||||
%inc = add nsw i32 %x.0, 1
|
||||
%exitcond = icmp eq i32 %inc, %loop_ctl
|
||||
br i1 %exitcond, label %exit, label %l3
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; This loop contains a convergent instruction, so its partial unroll
|
||||
; count must divide its trip multiple. This overrides its unroll
|
||||
; pragma -- we unroll exactly 8 times, even though 16 is requested.
|
||||
; CHECK-LABEL: @pragma_unroll
|
||||
define i32 @pragma_unroll(i32 %n) {
|
||||
entry:
|
||||
%loop_ctl = mul nsw i32 %n, 24
|
||||
br label %l3, !llvm.loop !0
|
||||
|
||||
l3:
|
||||
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK: call void @f()
|
||||
; CHECK-NOT: call void @f()
|
||||
call void @f() convergent
|
||||
%inc = add nsw i32 %x.0, 1
|
||||
%exitcond = icmp eq i32 %inc, %loop_ctl
|
||||
br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
|
Loading…
x
Reference in New Issue
Block a user