1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

The patch replace the overflow check in loop vectorization with the minimum loop iterations check.

The loop minimum iterations check below ensures the loop has enough trip count so the generated
vector loop will likely be executed, and it covers the overflow check.

Differential Revision: http://reviews.llvm.org/D12107.

llvm-svn: 245952
This commit is contained in:
Wei Mi 2015-08-25 16:43:47 +00:00
parent 7089a6d8e2
commit f8e0f7a698
3 changed files with 72 additions and 24 deletions

View File

@ -2619,7 +2619,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
the vectorized instructions while the old loop will continue to run the
scalar remainder.
[ ] <-- Back-edge taken count overflow check.
[ ] <-- loop iteration number check.
/ |
/ v
| [ ] <-- vector loop bypass (may consist of multiple blocks).
@ -2683,21 +2683,25 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Notice that the pre-header does not change, only the loop body.
SCEVExpander Exp(*SE, DL, "induction");
// We need to test whether the backedge-taken count is uint##_max. Adding one
// to it will cause overflow and an incorrect loop trip count in the vector
// body. In case of overflow we want to directly jump to the scalar remainder
// loop.
Value *BackedgeCount =
Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
VectorPH->getTerminator());
if (BackedgeCount->getType()->isPointerTy())
BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
"backedge.ptrcnt.to.int",
VectorPH->getTerminator());
Instruction *CheckBCOverflow =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
Constant::getAllOnesValue(BackedgeCount->getType()),
"backedge.overflow", VectorPH->getTerminator());
// The loop minimum iterations check below is to ensure the loop has enough
// trip count so the generated vector loop will likely be executed and the
// preparation and rounding-off costs will likely be worthy.
//
// The minimum iteration check also covers case where the backedge-taken
// count is uint##_max. Adding one to it will cause overflow and an
// incorrect loop trip count being generated in the vector body. In this
// case we also want to directly jump to the scalar remainder loop.
Value *ExitCountValue = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
VectorPH->getTerminator());
if (ExitCountValue->getType()->isPointerTy())
ExitCountValue = CastInst::CreatePointerCast(ExitCountValue, IdxTy,
"exitcount.ptrcnt.to.int",
VectorPH->getTerminator());
Instruction *CheckMinIters =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, ExitCountValue,
ConstantInt::get(ExitCountValue->getType(), VF * UF),
"min.iters.check", VectorPH->getTerminator());
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
@ -2749,15 +2753,14 @@ void InnerLoopVectorizer::createEmptyLoop() {
// times the unroll factor (num of SIMD instructions).
Constant *Step = ConstantInt::get(IdxTy, VF * UF);
// Generate code to check that the loop's trip count that we computed by
// adding one to the backedge-taken count will not overflow.
// Generate code to check that the loop's trip count is not less than the
// minimum loop iteration number threshold.
BasicBlock *NewVectorPH =
VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked");
VectorPH->splitBasicBlock(VectorPH->getTerminator(), "min.iters.checked");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
ReplaceInstWithInst(
VectorPH->getTerminator(),
BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow));
ReplaceInstWithInst(VectorPH->getTerminator(),
BranchInst::Create(ScalarPH, NewVectorPH, CheckMinIters));
VectorPH = NewVectorPH;
// This is the IR builder that we use to add all of the logic for bypassing

View File

@ -113,8 +113,8 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
; condition and branch directly to the scalar loop.
; CHECK-LABEL: max_i32_backedgetaken
; CHECK: %backedge.overflow = icmp eq i32 -1, -1
; CHECK: br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
; CHECK: %min.iters.check = icmp ult i32 0, 2
; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
; CHECK: scalar.ph:
; CHECK: %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]

View File

@ -0,0 +1,45 @@
; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
; RUN: opt %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@b = common global [1000 x i32] zeroinitializer, align 16
@c = common global [1000 x i32] zeroinitializer, align 16
@a = common global [1000 x i32] zeroinitializer, align 16
; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
; CHECK-LABEL: foo(
; CHECK: %min.iters.check = icmp ult i64 %N, 4
; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
; UNROLL-LABEL: foo(
; UNROLL: %min.iters.check = icmp ult i64 %N, 8
; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
define void @foo(i64 %N) {
entry:
%cmp.8 = icmp sgt i64 %N, 0
br i1 %cmp.8, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body, %for.body.preheader
%i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
%tmp = load i32, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
%tmp1 = load i32, i32* %arrayidx1, align 4
%add = add nsw i32 %tmp1, %tmp
%arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
store i32 %add, i32* %arrayidx2, align 4
%inc = add nuw nsw i64 %i.09, 1
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret void
}