From b0957e3eb4fde22baa3934c026509ee1e5eaaf46 Mon Sep 17 00:00:00 2001 From: Evgeniy Brevnov Date: Tue, 15 Sep 2020 17:09:47 +0700 Subject: [PATCH] [LV] Unroll factor is expected to be > 0 LV fails with assertion checking that UF > 0. We already set UF to 1 if it is 0 except the case when IC > MaxInterleaveCount. The fix is to set UF to 1 for that case as well. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87679 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 25 +++++++++++++------ .../LoopVectorize/SystemZ/zero_unroll.ll | 22 ++++++++++++++++ 2 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 95d55d062da..cf64081938f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5598,12 +5598,28 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, } // If trip count is known or estimated compile time constant, limit the - // interleave count to be less than the trip count divided by VF. + // interleave count to be less than the trip count divided by VF, provided it + // is at least 1. if (BestKnownTC) { MaxInterleaveCount = std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount); + // Make sure MaxInterleaveCount is greater than 0. + MaxInterleaveCount = std::max(1u, MaxInterleaveCount); } + assert(MaxInterleaveCount > 0 && + "Maximum interleave count must be greater than 0"); + + // Clamp the calculated IC to be between the 1 and the max interleave count + // that the target and trip count allows. + if (IC > MaxInterleaveCount) + IC = MaxInterleaveCount; + else + // Make sure IC is greater than 0. + IC = std::max(1u, IC); + + assert(IC > 0 && "Interleave count must be greater than 0."); + // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. if (LoopCost == 0) @@ -5611,13 +5627,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, assert(LoopCost && "Non-zero loop cost expected"); - // Clamp the calculated IC to be between the 1 and the max interleave count - // that the target and trip count allows. - if (IC > MaxInterleaveCount) - IC = MaxInterleaveCount; - else if (IC < 1) - IC = 1; - // Interleave if we vectorized this loop and there is a reduction that could // benefit from interleaving. if (VF.isVector() && HasReductions) { diff --git a/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll b/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll new file mode 100644 index 00000000000..6dd461de644 --- /dev/null +++ b/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll @@ -0,0 +1,22 @@ +; RUN: opt -S -loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s +; RUN: opt -S -passes=loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s + +define i32 @main(i32 %arg, i8** nocapture readnone %arg1) #0 { +;CHECK: vector.body: +entry: + %0 = alloca i8, align 1 + br label %loop + +loop: + %storemerge.i.i = phi i8 [ 0, %entry ], [ %tmp12.i.i, %loop ] + store i8 %storemerge.i.i, i8* %0, align 2 + %tmp8.i.i = icmp ult i8 %storemerge.i.i, 8 + %tmp12.i.i = add nuw nsw i8 %storemerge.i.i, 1 + br i1 %tmp8.i.i, label %loop, label %ret + +ret: + ret i32 0 +} + +attributes #0 = { "target-cpu"="z13" } +