1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[SCEV] Fix unsound reasoning in howManyLessThans

This is split from D105216, it handles only a subset of the cases in that patch.

Specifically, the issue being fixed is that the code incorrectly assumed that (Start-Stide) < End implied that the backedge was taken at least once. This is not true when e.g. Start = 4, Stride = 2, and End = 3. Note that we often do produce the right backedge taken count despite the flawed reasoning.

The fix chosen here is to use an alternate form of uceil (ceiling of unsigned divide) lowering which is safe when max(RHS,Start) > Start - Stride.  (Note that signedness of both max expression and comparison depend on the signedness of the comparison being analyzed, and that overflow in the Start - Stride expression is allowed.)  Note that this is weaker than proving the backedge is taken because it allows start - stride < end < start.  Some cases which can't be proven safe are sent down the generic path, and we do end up generating less optimal expressions in a few cases.

Credit for coming up with the approach goes entirely to Eli.  I just split it off, tweaked the comments a bit, and did some additional testing.

Differential Revision: https://reviews.llvm.org/D105942
This commit is contained in:
Philip Reames 2021-07-15 10:25:06 -07:00
parent 35af2802d5
commit 6a45d08863
4 changed files with 51 additions and 24 deletions

View File

@ -11761,17 +11761,42 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// and End is the RHS. // and End is the RHS.
const SCEV *BECountIfBackedgeTaken = const SCEV *BECountIfBackedgeTaken =
computeBECount(getMinusSCEV(End, Start), Stride); computeBECount(getMinusSCEV(End, Start), Stride);
// If the loop entry is guarded by the result of the backedge test of the
// first loop iteration, then we know the backedge will be taken at least // We use the expression (max(End,Start)-Start)/Stride to describe the
// once and so the backedge taken count is as above. If not then we use the // backedge count, as if the backedge is taken at least once max(End,Start)
// expression (max(End,Start)-Start)/Stride to describe the backedge count, // is End and so the result is as above, and if not max(End,Start) is Start
// as if the backedge is taken at least once max(End,Start) is End and so the // so we get a backedge count of zero.
// result is as above, and if not max(End,Start) is Start so we get a backedge const SCEV *BECount = nullptr;
// count of zero. auto *StartMinusStride = getMinusSCEV(OrigStart, Stride);
const SCEV *BECount; // Can we prove (max(RHS,Start) > Start - Stride?
if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(OrigStart, Stride), OrigRHS)) if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start) &&
BECount = BECountIfBackedgeTaken; isLoopEntryGuardedByCond(L, Cond, StartMinusStride, RHS)) {
else { // In this case, we can use a refined formula for computing backedge taken
// count. The general formula remains:
// "End-Start /uceiling Stride" where "End = max(RHS,Start)"
// We want to use the alternate formula:
// "((End - 1) - (Start - Stride)) /u Stride"
// Let's do a quick case analysis to show these are equivalent under
// our precondition that max(RHS,Start) > Start - Stride.
// * For RHS <= Start, the backedge-taken count must be zero.
// "((End - 1) - (Start - Stride)) /u Stride" reduces to
// "((Start - 1) - (Start - Stride)) /u Stride" which simplies to
// "Stride - 1 /u Stride" which is indeed zero for all non-zero values
// of Stride. For 0 stride, we've use umin(1,Stride) above, reducing
// this to the stride of 1 case.
// * For RHS >= Start, the backedge count must be "RHS-Start /uceil Stride".
// "((End - 1) - (Start - Stride)) /u Stride" reduces to
// "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to
// "((RHS - (Start - Stride) - 1) /u Stride".
// Our preconditions trivially imply no overflow in that form.
const SCEV *MinusOne = getMinusOne(Stride->getType());
const SCEV *Numerator =
getMinusSCEV(getAddExpr(RHS, MinusOne), StartMinusStride);
if (!isa<SCEVCouldNotCompute>(Numerator)) {
BECount = getUDivExpr(Numerator, Stride);
}
}
if (!BECount) {
auto canProveRHSGreaterThanEqualStart = [&]() { auto canProveRHSGreaterThanEqualStart = [&]() {
auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart)) if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart))

View File

@ -1,7 +1,7 @@
; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s ; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s ; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
; CHECK: Loop %bb: backedge-taken count is ((999 + (-1 * %x)) /u 3) ; CHECK: Loop %bb: backedge-taken count is ((-1 + (-1 * %x) + (1000 umax (3 + %x))) /u 3)
; CHECK: Loop %bb: max backedge-taken count is 334 ; CHECK: Loop %bb: max backedge-taken count is 334

View File

@ -5,7 +5,7 @@
; that this is not an infinite loop with side effects. ; that this is not an infinite loop with side effects.
; CHECK-LABEL: Determining loop execution counts for: @foo1 ; CHECK-LABEL: Determining loop execution counts for: @foo1
; CHECK: backedge-taken count is ((-1 + %n) /u %s) ; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)
; We should have a conservative estimate for the max backedge taken count for ; We should have a conservative estimate for the max backedge taken count for
; loops with unknown stride. ; loops with unknown stride.

View File

@ -17,22 +17,24 @@ define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n)
; CHECK-NEXT: [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]] ; CHECK-NEXT: [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]]
; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader: ; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], 4
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[M]] ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], -1
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[M]]
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 2
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3
; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body: ; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[M]], [[INDVAR]] ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[M]], [[INDVAR]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP5]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP6]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP6]], 2 ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP7]], 2
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP5]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]]
; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP5]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit: ; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]] ; CHECK-NEXT: br label [[FOR_END]]