mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[LV] Fix bug when unrolling (only) a loop with non-latch exit
If we unroll a loop in the vectorizer (without vectorizing), and the cost model requires a epilogue be generated for correctness, the code generation must actually do so. The included test case on an unmodified opt will access memory one past the expected bound. As a result, this patch is fixing a latent miscompile. Differential Revision: https://reviews.llvm.org/D103700
This commit is contained in:
parent
cc491fbe18
commit
718aae50fc
@ -1566,14 +1566,14 @@ public:
|
||||
|
||||
/// Returns true if we're required to use a scalar epilogue for at least
|
||||
/// the final iteration of the original loop.
|
||||
bool requiresScalarEpilogue() const {
|
||||
bool requiresScalarEpilogue(ElementCount VF) const {
|
||||
if (!isScalarEpilogueAllowed())
|
||||
return false;
|
||||
// If we might exit from anywhere but the latch, must run the exiting
|
||||
// iteration in scalar form.
|
||||
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch())
|
||||
return true;
|
||||
return InterleaveInfo.requiresScalarEpilogue();
|
||||
return VF.isVector() && InterleaveInfo.requiresScalarEpilogue();
|
||||
}
|
||||
|
||||
/// Returns true if a scalar epilogue is not allowed due to optsize or a
|
||||
@ -3181,18 +3181,13 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
|
||||
// unroll factor (number of SIMD instructions).
|
||||
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
|
||||
|
||||
// There are two cases where we need to ensure (at least) the last iteration
|
||||
// runs in the scalar remainder loop. Thus, if the step evenly divides
|
||||
// the trip count, we set the remainder to be equal to the step. If the step
|
||||
// does not evenly divide the trip count, no adjustment is necessary since
|
||||
// there will already be scalar iterations. Note that the minimum iterations
|
||||
// check ensures that N >= Step. The cases are:
|
||||
// 1) If there is a non-reversed interleaved group that may speculatively
|
||||
// access memory out-of-bounds.
|
||||
// 2) If any instruction may follow a conditionally taken exit. That is, if
|
||||
// the loop contains multiple exiting blocks, or a single exiting block
|
||||
// which is not the latch.
|
||||
if (VF.isVector() && Cost->requiresScalarEpilogue()) {
|
||||
// There are cases where we *must* run at least one iteration in the remainder
|
||||
// loop. See the cost model for when this can happen. If the step evenly
|
||||
// divides the trip count, we set the remainder to be equal to the step. If
|
||||
// the step does not evenly divide the trip count, no adjustment is necessary
|
||||
// since there will already be scalar iterations. Note that the minimum
|
||||
// iterations check ensures that N >= Step.
|
||||
if (Cost->requiresScalarEpilogue(VF)) {
|
||||
auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
|
||||
R = Builder.CreateSelect(IsZero, Step, R);
|
||||
}
|
||||
@ -3246,8 +3241,8 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
|
||||
// vector trip count is zero. This check also covers the case where adding one
|
||||
// to the backedge-taken count overflowed leading to an incorrect trip count
|
||||
// of zero. In this case we will also jump to the scalar loop.
|
||||
auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
|
||||
: ICmpInst::ICMP_ULT;
|
||||
auto P = Cost->requiresScalarEpilogue(VF) ? ICmpInst::ICMP_ULE
|
||||
: ICmpInst::ICMP_ULT;
|
||||
|
||||
// If tail is to be folded, vector loop takes care of all iterations.
|
||||
Value *CheckMinIters = Builder.getFalse();
|
||||
@ -8323,8 +8318,8 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
|
||||
|
||||
// Generate code to check if the loop's trip count is less than VF * UF of the
|
||||
// main vector loop.
|
||||
auto P =
|
||||
Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
|
||||
auto P = Cost->requiresScalarEpilogue(ForEpilogue ? EPI.EpilogueVF : VF) ?
|
||||
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
|
||||
|
||||
Value *CheckMinIters = Builder.CreateICmp(
|
||||
P, Count, ConstantInt::get(Count->getType(), VFactor * UFactor),
|
||||
@ -8467,8 +8462,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
|
||||
|
||||
// Generate code to check if the loop's trip count is less than VF * UF of the
|
||||
// vector epilogue loop.
|
||||
auto P =
|
||||
Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
|
||||
auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ?
|
||||
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
|
||||
|
||||
Value *CheckMinIters = Builder.CreateICmp(
|
||||
P, Count,
|
||||
|
@ -2,11 +2,11 @@
|
||||
; RUN: opt %s -S -loop-vectorize -force-vector-interleave=2 | FileCheck %s
|
||||
|
||||
; Demonstrate a case where we unroll a loop, but don't vectorize it.
|
||||
; This currently reveals a miscompile. The original loop runs stores in
|
||||
; the latch block on iterations 0 to 1022, and exits when %indvars.iv = 1023.
|
||||
; Currently, the unrolled loop produced by the vectorizer runs the iteration
|
||||
; where %indvar.iv = 1023 in the vector.body loop before exiting. This results
|
||||
; in an out of bounds access..
|
||||
; The original loop runs stores in the latch block on iterations 0 to 1022,
|
||||
; and exits when %indvars.iv = 1023. (That is, it actually runs the stores
|
||||
; for an odd number of iterations.) If we unroll by two in the "vector.body"
|
||||
; loop, we must exit to the epilogue on iteration with %indvars.iv = 1022 to
|
||||
; avoid an out of bounds access.
|
||||
|
||||
define void @test(double* %data) {
|
||||
; CHECK-LABEL: @test(
|
||||
@ -31,13 +31,13 @@ define void @test(double* %data) {
|
||||
; CHECK-NEXT: store double [[TMP8]], double* [[TMP4]], align 8
|
||||
; CHECK-NEXT: store double [[TMP9]], double* [[TMP5]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
|
||||
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1022
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_LATCH:%.*]] ]
|
||||
|
Loading…
x
Reference in New Issue
Block a user