1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[LV] Prevent vectorization with unsupported element types.

This patch adds a TTI function, isElementTypeLegalForScalableVector, to query
whether it is possible to vectorize a given element type. This is called by
isLegalToVectorizeInstTypesForScalable to reject scalable vectorization if
any of the instruction types in the loop are unsupported, e.g:

  int foo(__int128_t* ptr, int N)
    #pragma clang loop vectorize_width(4, scalable)
    for (int i=0; i<N; ++i)
      ptr[i] = ptr[i] + 42;

This example currently crashes if we attempt to vectorize since i128 is not a
supported type for scalable vectorization.

Reviewed By: sdesmalen, david-arm

Differential Revision: https://reviews.llvm.org/D102253
This commit is contained in:
Kerry McLaughlin 2021-07-06 10:49:43 +01:00
parent f69dc8533f
commit 8dd39c43b3
8 changed files with 142 additions and 8 deletions

View File

@ -1327,6 +1327,9 @@ public:
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
/// \returns True if the given type is supported for scalable vectors
bool isElementTypeLegalForScalableVector(Type *Ty) const;
/// \returns The new vector factor value if the target doesn't support \p
/// SizeInBytes loads or has a better vector factor.
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@ -1710,6 +1713,7 @@ public:
unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const = 0;
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
@ -2261,6 +2265,9 @@ public:
ElementCount VF) const override {
return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
}
bool isElementTypeLegalForScalableVector(Type *Ty) const override {
return Impl.isElementTypeLegalForScalableVector(Ty);
}
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const override {

View File

@ -721,6 +721,8 @@ public:
return true;
}
bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {

View File

@ -1003,6 +1003,10 @@ bool TargetTransformInfo::isLegalToVectorizeReduction(
return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
}
bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const {
return TTIImpl->isElementTypeLegalForScalableVector(Ty);
}
unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
unsigned LoadSize,
unsigned ChainSizeInBytes,

View File

@ -1736,7 +1736,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
return true;
Type *Ty = RdxDesc.getRecurrenceType();
if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty))
if (Ty->isBFloatTy() || !isElementTypeLegalForScalableVector(Ty))
return false;
switch (RdxDesc.getRecurrenceKind()) {

View File

@ -208,7 +208,7 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
bool isLegalElementTypeForSVE(Type *Ty) const {
bool isElementTypeLegalForScalableVector(Type *Ty) const {
if (Ty->isPointerTy())
return true;
@ -218,7 +218,7 @@ public:
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
return true;
if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
return true;
@ -233,7 +233,8 @@ public:
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
return false; // Fall back to scalarization of masked operations.
return isLegalElementTypeForSVE(DataType->getScalarType());
return !DataType->getScalarType()->isIntegerTy(1) &&
isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@ -254,7 +255,8 @@ public:
DataTypeFVTy->getNumElements() < 2))
return false;
return isLegalElementTypeForSVE(DataType->getScalarType());
return !DataType->getScalarType()->isIntegerTy(1) &&
isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) const {

View File

@ -1519,7 +1519,7 @@ public:
/// Returns true if the target machine supports all of the reduction
/// variables found for the given VF.
bool canVectorizeReductions(ElementCount VF) {
bool canVectorizeReductions(ElementCount VF) const {
return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;
return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
@ -5677,12 +5677,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
auto MaxScalableVF = ElementCount::getScalable(
std::numeric_limits<ElementCount::ScalarTy>::max());
// Disable scalable vectorization if the loop contains unsupported reductions.
// Test that the loop-vectorizer can legalize all operations for this MaxVF.
// FIXME: While for scalable vectors this is currently sufficient, this should
// be replaced by a more detailed mechanism that filters out specific VFs,
// instead of invalidating vectorization for a whole set of VFs based on the
// MaxVF.
// Disable scalable vectorization if the loop contains unsupported reductions.
if (!canVectorizeReductions(MaxScalableVF)) {
reportVectorizationInfo(
"Scalable vectorization not supported for the reduction "
@ -5691,6 +5692,18 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
return ElementCount::getScalable(0);
}
// Disable scalable vectorization if the loop contains any instructions
// with element types not supported for scalable vectors.
if (any_of(ElementTypesInLoop, [&](Type *Ty) {
return !Ty->isVoidTy() &&
!this->TTI.isElementTypeLegalForScalableVector(Ty);
})) {
reportVectorizationInfo("Scalable vectorization is not supported "
"for all element types found in this loop.",
"ScalableVFUnfeasible", ORE, TheLoop);
return ElementCount::getScalable(0);
}
if (Legal->isSafeForAnyVectorWidth())
return MaxScalableVF;

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
; Reduction can be vectorized

View File

@ -0,0 +1,106 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
target triple = "aarch64-linux-gnu"
; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {
; CHECK-LABEL: @loop_sve_i128
; CHECK: vector.body
; CHECK: %[[LOAD1:.*]] = load i128, i128* {{.*}}
; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}}
; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42
; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42
; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}}
; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}}
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
%0 = load i128, i128* %arrayidx, align 16
%add = add nsw i128 %0, 42
store i128 %add, i128* %arrayidx, align 16
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %N
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
for.end:
ret void
}
; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) {
; CHECK-LABEL: @loop_sve_f128
; CHECK: vector.body
; CHECK: %[[LOAD1:.*]] = load fp128, fp128*
; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128*
; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000
; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000
; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}}
; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}}
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv
%0 = load fp128, fp128* %arrayidx, align 16
%add = fsub fp128 %0, 0xL00000000000000008000000000000000
store fp128 %add, fp128* %arrayidx, align 16
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %N
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
for.end:
ret void
}
; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) {
; CHECK-LABEL: @loop_invariant_sve_i128
; CHECK: vector.body
; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr
; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr
; CHECK-NEXT: store i128 %val, i128* %[[GEP1]]
; CHECK-NEXT: store i128 %val, i128* %[[GEP2]]
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
store i128 %val, i128* %arrayidx, align 16
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %N
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
for.end:
ret void
}
define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
; CHECK-LABEL: @loop_fixed_width_i128
; CHECK: load <4 x i128>, <4 x i128>*
; CHECK: add nsw <4 x i128> {{.*}}, <i128 42, i128 42, i128 42, i128 42>
; CHECK: store <4 x i128> {{.*}} <4 x i128>*
; CHECK-NOT: vscale
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
%0 = load i128, i128* %arrayidx, align 16
%add = add nsw i128 %0, 42
store i128 %add, i128* %arrayidx, align 16
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %N
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}