1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[LV] Fix crash when computing max VF too early

D90687 introduced a crash:

  llvm::LoopVectorizationCostModel::computeMaxVF(llvm::ElementCount, unsigned int):
    Assertion `WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() &&
    "No decisions should have been taken at this point"' failed.

when compiling the following C code:

  typedef struct {
  char a;
  } b;

  b *c;
  int d, e;

  int f() {
    int g = 0;
    for (; d; d++) {
      e = 0;
      for (; e < c[d].a; e++)
        g++;
    }
    return g;
  }

with:

  clang -Os -target hexagon -mhvx -fvectorize -mv67 testcase.c -S -o -

This occurred since prior to D90687 computeFeasibleMaxVF would only be
called in computeMaxVF when a scalar epilogue was allowed, but now it's
always called. This causes the assert above since computeFeasibleMaxVF
collects all viable VFs larger than the default MaxVF, and for each VF
calculates the register usage which results in analysis being done the
assert above guards against. This can occur in computeFeasibleMaxVF if
TTI.shouldMaximizeVectorBandwidth and this target hook is implemented in
the hexagon backend to always return true.

Reported by @iajbar.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D94869
This commit is contained in:
Cullen Rhodes 2021-01-16 16:08:40 +00:00
parent 77ed25e06f
commit a70388496a
2 changed files with 32 additions and 4 deletions

View File

@ -5505,11 +5505,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return None;
}
ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
return MaxVF;
return computeFeasibleMaxVF(TC, UserVF);
case CM_ScalarEpilogueNotAllowedUsePredicate:
LLVM_FALLTHROUGH;
case CM_ScalarEpilogueNotNeededUsePredicate:
@ -5547,7 +5545,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
"scalar epilogue instead.\n");
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
return MaxVF;
return computeFeasibleMaxVF(TC, UserVF);
}
return None;
}
@ -5564,6 +5562,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
}
ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
assert(!MaxVF.isScalable() &&
"Scalable vectors do not yet support tail folding");
assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&

View File

@ -0,0 +1,29 @@
; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -S < %s 2>&1 | FileCheck %s
; Check that we don't crash.
; CHECK-LABEL: @f
; CHECK: vector.body
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"
; Function Attrs: optsize
define i32 @f() #0 {
entry:
br label %loop
loop:
%g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ]
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%0 = load i8, i8* undef, align 1
%g.1.lcssa = add i32 %g.016, undef
%iv.next = add nsw i32 %iv, 1
%exitcond = icmp eq i32 %iv.next, 0
br i1 %exitcond, label %exit, label %loop
exit:
ret i32 %g.1.lcssa
}
attributes #0 = { optsize "target-features"="+hvx-length128b" }