1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[SCEV] Guard movement of insertion point for loop-invariants

This reinstates r347934, along with a tweak to address a problem with
PHI node ordering that that commit created (or exposed). (That commit
was reverted at r348426, due to the PHI node issue.)

Original commit message:

r320789 suppressed moving the insertion point of SCEV expressions with
dev/rem operations to the loop header in non-loop-invariant situations.
This, and similar, hoisting is also unsafe in the loop-invariant case,
since there may be a guard against a zero denominator. This is an
adjustment to the fix of r320789 to suppress the movement even in the
loop-invariant case.

This fixes PR30806.

Differential Revision: https://reviews.llvm.org/D57428

llvm-svn: 356392
This commit is contained in:
Warren Ristow 2019-03-18 18:52:35 +00:00
parent badc888cad
commit d2b89688b2
3 changed files with 178 additions and 41 deletions

View File

@ -1731,49 +1731,55 @@ Value *SCEVExpander::expand(const SCEV *S) {
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
Instruction *InsertPt = &*Builder.GetInsertPoint();
for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
L = L->getParentLoop())
if (SE.isLoopInvariant(S, L)) {
if (!L) break;
if (BasicBlock *Preheader = L->getLoopPreheader())
InsertPt = Preheader->getTerminator();
else {
// LSR sets the insertion point for AddRec start/step values to the
// block start to simplify value reuse, even though it's an invalid
// position. SCEVExpander must correct for this in all cases.
InsertPt = &*L->getHeader()->getFirstInsertionPt();
// We can move insertion point only if there is no div or rem operations
// otherwise we are risky to move it over the check for zero denominator.
auto SafeToHoist = [](const SCEV *S) {
return !SCEVExprContains(S, [](const SCEV *S) {
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
// Division by non-zero constants can be hoisted.
return SC->getValue()->isZero();
// All other divisions should not be moved as they may be
// divisions by zero and should be kept within the
// conditions of the surrounding loops that guard their
// execution (see PR35406).
return true;
}
return false;
});
};
if (SafeToHoist(S)) {
for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
L = L->getParentLoop()) {
if (SE.isLoopInvariant(S, L)) {
if (!L) break;
if (BasicBlock *Preheader = L->getLoopPreheader())
InsertPt = Preheader->getTerminator();
else
// LSR sets the insertion point for AddRec start/step values to the
// block start to simplify value reuse, even though it's an invalid
// position. SCEVExpander must correct for this in all cases.
InsertPt = &*L->getHeader()->getFirstInsertionPt();
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
InsertPt = &*L->getHeader()->getFirstInsertionPt();
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
(isInsertedInstruction(InsertPt) ||
isa<DbgInfoIntrinsic>(InsertPt)))
InsertPt = &*std::next(InsertPt->getIterator());
break;
}
} else {
// We can move insertion point only if there is no div or rem operations
// otherwise we are risky to move it over the check for zero denominator.
auto SafeToHoist = [](const SCEV *S) {
return !SCEVExprContains(S, [](const SCEV *S) {
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
// Division by non-zero constants can be hoisted.
return SC->getValue()->isZero();
// All other divisions should not be moved as they may be
// divisions by zero and should be kept within the
// conditions of the surrounding loops that guard their
// execution (see PR35406).
return true;
}
return false;
});
};
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
SafeToHoist(S))
InsertPt = &*L->getHeader()->getFirstInsertionPt();
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
(isInsertedInstruction(InsertPt) ||
isa<DbgInfoIntrinsic>(InsertPt))) {
InsertPt = &*std::next(InsertPt->getIterator());
}
break;
}
}
// IndVarSimplify sometimes sets the insertion point at the block start, even
// when there are PHIs at that point. We must correct for this.
if (isa<PHINode>(*InsertPt))
InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
// Check to see if we already expanded this here.
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));

View File

@ -0,0 +1,66 @@
; RUN: opt -S -indvars < %s | FileCheck %s
; Produced from the test-case:
;
; extern void foo(char *, unsigned , unsigned *);
; extern void bar(int *, long);
; extern char *processBuf(char *);
;
; extern unsigned theSize;
;
; void foo(char *buf, unsigned denominator, unsigned *flag) {
; int incr = (int) (theSize / denominator);
; int inx = 0;
; while (*flag) {
; int itmp = inx + incr;
; int i = (int) theSize;
; bar(&i, (long) itmp);
; buf = processBuf(buf);
; inx = itmp;
; }
; }
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@theSize = external local_unnamed_addr global i32, align 4
define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
entry:
%i = alloca i32, align 4
%0 = load i32, i32* @theSize, align 4
%div = udiv i32 %0, %denominator
%1 = load i32, i32* %flag, align 4
%tobool5 = icmp eq i32 %1, 0
br i1 %tobool5, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %entry
%2 = bitcast i32* %i to i8*
br label %while.body
while.body: ; preds = %while.body.lr.ph, %while.body
; Check that there are two PHIs followed by a 'sext' in the same block, and that
; the test does not crash.
; CHECK: phi
; CHECK-NEXT: phi
; CHECK-NEXT: sext
%buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
%inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
%add = add nsw i32 %inx.06, %div
%3 = load i32, i32* @theSize, align 4
store i32 %3, i32* %i, align 4
%conv = sext i32 %add to i64
call void @bar(i32* nonnull %i, i64 %conv)
%call = call i8* @processBuf(i8* %buf.addr.07)
%4 = load i32, i32* %flag, align 4
%tobool = icmp eq i32 %4, 0
br i1 %tobool, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
ret void
}
declare void @bar(i32*, i64) local_unnamed_addr
declare i8* @processBuf(i8*) local_unnamed_addr

View File

@ -0,0 +1,65 @@
; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s
; Produced from test-case:
;
; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim)
; {
; for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) {
; if (denom > 0) {
; const uint32_t lim = numer / denom;
;
; for (uint32_t i = 0; i < lim; ++i)
; ptr[i] = 1;
; }
; }
; }
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"
define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) {
entry:
%cmp1 = icmp eq i32 %outer_lim, 0
br i1 %cmp1, label %exit, label %loop1.preheader
; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and
; whatever label comes next.
loop1.preheader:
; CHECK-LABEL: loop1.preheader:
; CHECK-NOT: udiv
; CHECK-LABEL: :
br label %loop1
loop1:
%outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ]
%0 = add i32 %denom, -1
%1 = icmp ult i32 %0, %numer
br i1 %1, label %loop2.preheader, label %loop2.exit
; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and
; whatever label comes next.
loop2.preheader:
; CHECK-LABEL: loop2.preheader:
; CHECK: udiv
; CHECK-LABEL: :
%lim = udiv i32 %numer, %denom
%2 = zext i32 %lim to i64
br label %loop2
loop2:
%indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ]
%arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2
store i32 1, i32* %arrayidx, align 4
%indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1
%cmp2 = icmp ult i64 %indvar.loop2.next, %2
br i1 %cmp2, label %loop2, label %loop2.exit
loop2.exit:
%inc1 = add nuw i32 %outer_i, 1
%exitcond = icmp eq i32 %inc1, %outer_lim
br i1 %exitcond, label %exit, label %loop1
exit:
ret void
}