1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[LV] Fix bug when unrolling (only) a loop with non-latch exit

If we unroll a loop in the vectorizer (without vectorizing), and the cost model requires a epilogue be generated for correctness, the code generation must actually do so.

The included test case on an unmodified opt will access memory one past the expected bound.  As a result, this patch is fixing a latent miscompile.

Differential Revision: https://reviews.llvm.org/D103700
This commit is contained in:
Philip Reames 2021-06-29 07:54:53 -07:00
parent cc491fbe18
commit 718aae50fc
2 changed files with 23 additions and 28 deletions

View File

@ -1566,14 +1566,14 @@ public:
/// Returns true if we're required to use a scalar epilogue for at least
/// the final iteration of the original loop.
bool requiresScalarEpilogue() const {
bool requiresScalarEpilogue(ElementCount VF) const {
if (!isScalarEpilogueAllowed())
return false;
// If we might exit from anywhere but the latch, must run the exiting
// iteration in scalar form.
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch())
return true;
return InterleaveInfo.requiresScalarEpilogue();
return VF.isVector() && InterleaveInfo.requiresScalarEpilogue();
}
/// Returns true if a scalar epilogue is not allowed due to optsize or a
@ -3181,18 +3181,13 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
// unroll factor (number of SIMD instructions).
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
// There are two cases where we need to ensure (at least) the last iteration
// runs in the scalar remainder loop. Thus, if the step evenly divides
// the trip count, we set the remainder to be equal to the step. If the step
// does not evenly divide the trip count, no adjustment is necessary since
// there will already be scalar iterations. Note that the minimum iterations
// check ensures that N >= Step. The cases are:
// 1) If there is a non-reversed interleaved group that may speculatively
// access memory out-of-bounds.
// 2) If any instruction may follow a conditionally taken exit. That is, if
// the loop contains multiple exiting blocks, or a single exiting block
// which is not the latch.
if (VF.isVector() && Cost->requiresScalarEpilogue()) {
// There are cases where we *must* run at least one iteration in the remainder
// loop. See the cost model for when this can happen. If the step evenly
// divides the trip count, we set the remainder to be equal to the step. If
// the step does not evenly divide the trip count, no adjustment is necessary
// since there will already be scalar iterations. Note that the minimum
// iterations check ensures that N >= Step.
if (Cost->requiresScalarEpilogue(VF)) {
auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
R = Builder.CreateSelect(IsZero, Step, R);
}
@ -3246,8 +3241,8 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
// vector trip count is zero. This check also covers the case where adding one
// to the backedge-taken count overflowed leading to an incorrect trip count
// of zero. In this case we will also jump to the scalar loop.
auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
: ICmpInst::ICMP_ULT;
auto P = Cost->requiresScalarEpilogue(VF) ? ICmpInst::ICMP_ULE
: ICmpInst::ICMP_ULT;
// If tail is to be folded, vector loop takes care of all iterations.
Value *CheckMinIters = Builder.getFalse();
@ -8323,8 +8318,8 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
// Generate code to check if the loop's trip count is less than VF * UF of the
// main vector loop.
auto P =
Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
auto P = Cost->requiresScalarEpilogue(ForEpilogue ? EPI.EpilogueVF : VF) ?
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
Value *CheckMinIters = Builder.CreateICmp(
P, Count, ConstantInt::get(Count->getType(), VFactor * UFactor),
@ -8467,8 +8462,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
// Generate code to check if the loop's trip count is less than VF * UF of the
// vector epilogue loop.
auto P =
Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ?
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
Value *CheckMinIters = Builder.CreateICmp(
P, Count,

View File

@ -2,11 +2,11 @@
; RUN: opt %s -S -loop-vectorize -force-vector-interleave=2 | FileCheck %s
; Demonstrate a case where we unroll a loop, but don't vectorize it.
; This currently reveals a miscompile. The original loop runs stores in
; the latch block on iterations 0 to 1022, and exits when %indvars.iv = 1023.
; Currently, the unrolled loop produced by the vectorizer runs the iteration
; where %indvar.iv = 1023 in the vector.body loop before exiting. This results
; in an out of bounds access..
; The original loop runs stores in the latch block on iterations 0 to 1022,
; and exits when %indvars.iv = 1023. (That is, it actually runs the stores
; for an odd number of iterations.) If we unroll by two in the "vector.body"
; loop, we must exit to the epilogue on iteration with %indvars.iv = 1022 to
; avoid an out of bounds access.
define void @test(double* %data) {
; CHECK-LABEL: @test(
@ -31,13 +31,13 @@ define void @test(double* %data) {
; CHECK-NEXT: store double [[TMP8]], double* [[TMP4]], align 8
; CHECK-NEXT: store double [[TMP9]], double* [[TMP5]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1022
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_LATCH:%.*]] ]