1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[LoopVectorize] Fix crash for predicated instruction with scalable VF

This patch avoids computing discounts for predicated instructions  when the
VF is scalable.
There is no support for vectorization of loops with division because the
vectorizer cannot guarantee that zero divisions will not happen.

This loop now does not use VF scalable

```
for (long long i = 0; i < n; i++)
    if (cond[i])
      a[i] /= b[i];
```

Differential Revision: https://reviews.llvm.org/D101916
This commit is contained in:
Caroline Concatto 2021-05-05 15:20:16 +01:00
parent ceddd7eb41
commit 2a337676d3
2 changed files with 101 additions and 1 deletions

View File

@ -6745,9 +6745,11 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
for (Instruction &I : *BB)
if (isScalarWithPredication(&I)) {
ScalarCostsTy ScalarCosts;
// Do not apply discount if scalable, because that would lead to
// invalid scalarization costs.
// Do not apply discount logic if hacked cost is needed
// for emulated masked memrefs.
if (!useEmulatedMaskMemRefHack(&I) &&
if (!VF.isScalable() && !useEmulatedMaskMemRefHack(&I) &&
computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
// Remember that BB will remain after vectorization.
@ -7547,6 +7549,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
ScalarPredicatedBB = true;
if (ScalarPredicatedBB) {
// Not possible to scalarize scalable vector with predicated instructions.
if (VF.isScalable())
return InstructionCost::getInvalid();
// Return cost for branches around scalarized and predicated blocks.
auto *Vec_i1Ty =
VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);

View File

@ -0,0 +1,95 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
; The test predication_in_loop corresponds
; to the following function
; for (long long i = 0; i < 1024; i++) {
; if (cond[i])
; a[i] /= b[i];
; }
; Scalarizing the division cannot be done for scalable vectors at the moment
; when the loop needs predication
; Future implementation of llvm.vp could allow this to happen
define void @predication_in_loop(i32* %a, i32* %b, i32* %cond) #0 {
; CHECK-LABEL: @predication_in_loop
; CHECK-NOT: sdiv <vscale x 4 x i32>
;
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.inc, %entry
ret void
for.body: ; preds = %entry, %for.inc
%i.09 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %cond, i64 %i.09
%0 = load i32, i32* %arrayidx, align 4
%tobool.not = icmp eq i32 %0, 0
br i1 %tobool.not, label %for.inc, label %if.then
if.then: ; preds = %for.body
%arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i.09
%1 = load i32, i32* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.09
%2 = load i32, i32* %arrayidx2, align 4
%div = sdiv i32 %2, %1
store i32 %div, i32* %arrayidx2, align 4
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i64 %i.09, 1
%exitcond.not = icmp eq i64 %inc, 1024
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
;
; This test unpredicated_loop_predication_through_tailfolding corresponds
; to the following function
; for (long long i = 0; i < 1024; i++) {
; a[i] /= b[i];
; }
; Scalarization not possible in the main loop when there is no predication and
; epilogue should not be able to allow scalarization
; otherwise it could be able to vectorize, but will not because
; "Max legal vector width too small, scalable vectorization unfeasible.."
define void @unpredicated_loop_predication_through_tailfolding(i32* %a, i32* %b) #0 {
; CHECK-LABEL: @unpredicated_loop_predication_through_tailfolding
; CHECK-NOT: sdiv <vscale x 4 x i32>
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
%1 = load i32, i32* %arrayidx2, align 4
%sdiv = sdiv i32 %1, %0
%2 = add nuw nsw i64 %iv, 8
%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
store i32 %sdiv, i32* %arrayidx5, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
exit:
ret void
}
attributes #0 = { "target-features"="+sve" }
!0 = distinct !{!0, !1, !2, !3, !4}
!1 = !{!"llvm.loop.vectorize.width", i32 4}
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!3 = !{!"llvm.loop.interleave.count", i32 1}
!4 = !{!"llvm.loop.vectorize.enable", i1 true}