1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

Fix a trip-count overflow issue in LoopUnroll.

Currently LoopUnroll generates a prologue loop before the main loop
body to execute first N%UnrollFactor iterations. Also, this loop is
used if trip-count can overflow - it's determined by a runtime check.

However, we've been mistakenly optimizing this loop to a linear code for
UnrollFactor = 2, not taking into account that it also serves as a safe
version of the loop if its trip-count overflows.

llvm-svn: 222451
This commit is contained in:
Michael Zolotukhin 2014-11-20 20:19:55 +00:00
parent 970e7c5fb1
commit 7e8ae7cad7
3 changed files with 42 additions and 3 deletions

View File

@ -295,6 +295,10 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
return false;
// If BECount is INT_MAX, we can't compute trip-count without overflow.
if (BECount->isAllOnesValue())
return false;
// Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC =
SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
@ -357,11 +361,16 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
// If unroll count is 2 and we can't overflow in tripcount computation (which
// is BECount + 1), then we don't need a loop for prologue, and we can unroll
// it. We can be sure that we don't overflow only if tripcount is a constant.
bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
CloneLoopBlocks(L, ModVal, Count == 2, PH, PEnd, NewBlocks, LoopBlocks, VMap,
LI);
CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
VMap, LI);
// Insert the cloned blocks into function just before the original loop
F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],

View File

@ -3,7 +3,7 @@
; This tests that setting the unroll count works
; CHECK: for.body.prol:
; CHECK: br label %for.body.preheader.split
; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
; CHECK: for.body:
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body

View File

@ -0,0 +1,30 @@
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; When prologue is fully unrolled, the branch on its end is unconditional.
; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
; like in this example, where it comes from an argument.
;
; This test is based on an example from here:
; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
;
; CHECK: while.body.prol:
; CHECK: br i1
; CHECK: entry.split:
; Function Attrs: nounwind readnone ssp uwtable
define i32 @foo(i32 %N) #0 {
entry:
br label %while.body
while.body: ; preds = %while.body, %entry
%i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
%cmp = icmp eq i32 %i, %N
%inc = add i32 %i, 1
br i1 %cmp, label %while.end, label %while.body
while.end: ; preds = %while.body
ret i32 %i
}
attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }