diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 487a7fb72f0..28edd65f852 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6241,6 +6241,7 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, if (MaxVectorSize == 0) { DEBUG(dbgs() << "LV: The target has no vector registers.\n"); MaxVectorSize = 1; + return MaxVectorSize; } else if (ConstTripCount && ConstTripCount < MaxVectorSize && isPowerOf2_32(ConstTripCount)) { // We need to clamp the VF to be the ConstTripCount. There is no point in @@ -6253,10 +6254,11 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, unsigned MaxVF = MaxVectorSize; if (MaximizeBandwidth && !OptForSize) { - // Collect all viable vectorization factors. + // Collect all viable vectorization factors larger than the default MaxVF + // (i.e. MaxVectorSize). SmallVector VFs; unsigned NewMaxVectorSize = WidestRegister / SmallestType; - for (unsigned VS = MaxVectorSize; VS <= NewMaxVectorSize; VS *= 2) + for (unsigned VS = MaxVectorSize * 2; VS <= NewMaxVectorSize; VS *= 2) VFs.push_back(VS); // For each VF calculate its register usage. diff --git a/test/Transforms/LoopVectorize/X86/reg-usage.ll b/test/Transforms/LoopVectorize/X86/reg-usage.ll index 83974d2db80..9b276aa2bd7 100644 --- a/test/Transforms/LoopVectorize/X86/reg-usage.ll +++ b/test/Transforms/LoopVectorize/X86/reg-usage.ll @@ -10,8 +10,6 @@ define i32 @foo() { ; register usage doesn't exceed 16. ; ; CHECK-LABEL: foo -; CHECK: LV(REG): VF = 4 -; CHECK-NEXT: LV(REG): Found max usage: 4 ; CHECK: LV(REG): VF = 8 ; CHECK-NEXT: LV(REG): Found max usage: 7 ; CHECK: LV(REG): VF = 16 @@ -48,8 +46,6 @@ define i32 @goo() { ; it will not have vector version and the vector register usage will not exceed the ; available vector register number. ; CHECK-LABEL: goo -; CHECK: LV(REG): VF = 4 -; CHECK-NEXT: LV(REG): Found max usage: 4 ; CHECK: LV(REG): VF = 8 ; CHECK-NEXT: LV(REG): Found max usage: 7 ; CHECK: LV(REG): VF = 16