mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 10:32:48 +02:00
[LV] Prevent vectorization with unsupported element types.
This patch adds a TTI function, isElementTypeLegalForScalableVector, to query whether it is possible to vectorize a given element type. This is called by isLegalToVectorizeInstTypesForScalable to reject scalable vectorization if any of the instruction types in the loop are unsupported, e.g: int foo(__int128_t* ptr, int N) #pragma clang loop vectorize_width(4, scalable) for (int i=0; i<N; ++i) ptr[i] = ptr[i] + 42; This example currently crashes if we attempt to vectorize since i128 is not a supported type for scalable vectorization. Reviewed By: sdesmalen, david-arm Differential Revision: https://reviews.llvm.org/D102253
This commit is contained in:
parent
f69dc8533f
commit
8dd39c43b3
@ -1327,6 +1327,9 @@ public:
|
||||
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
|
||||
ElementCount VF) const;
|
||||
|
||||
/// \returns True if the given type is supported for scalable vectors
|
||||
bool isElementTypeLegalForScalableVector(Type *Ty) const;
|
||||
|
||||
/// \returns The new vector factor value if the target doesn't support \p
|
||||
/// SizeInBytes loads or has a better vector factor.
|
||||
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
|
||||
@ -1710,6 +1713,7 @@ public:
|
||||
unsigned AddrSpace) const = 0;
|
||||
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
|
||||
ElementCount VF) const = 0;
|
||||
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
|
||||
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
|
||||
unsigned ChainSizeInBytes,
|
||||
VectorType *VecTy) const = 0;
|
||||
@ -2261,6 +2265,9 @@ public:
|
||||
ElementCount VF) const override {
|
||||
return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
|
||||
}
|
||||
bool isElementTypeLegalForScalableVector(Type *Ty) const override {
|
||||
return Impl.isElementTypeLegalForScalableVector(Ty);
|
||||
}
|
||||
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
|
||||
unsigned ChainSizeInBytes,
|
||||
VectorType *VecTy) const override {
|
||||
|
@ -721,6 +721,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
|
||||
|
||||
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
|
||||
unsigned ChainSizeInBytes,
|
||||
VectorType *VecTy) const {
|
||||
|
@ -1003,6 +1003,10 @@ bool TargetTransformInfo::isLegalToVectorizeReduction(
|
||||
return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const {
|
||||
return TTIImpl->isElementTypeLegalForScalableVector(Ty);
|
||||
}
|
||||
|
||||
unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
|
||||
unsigned LoadSize,
|
||||
unsigned ChainSizeInBytes,
|
||||
|
@ -1736,7 +1736,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
|
||||
return true;
|
||||
|
||||
Type *Ty = RdxDesc.getRecurrenceType();
|
||||
if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty))
|
||||
if (Ty->isBFloatTy() || !isElementTypeLegalForScalableVector(Ty))
|
||||
return false;
|
||||
|
||||
switch (RdxDesc.getRecurrenceKind()) {
|
||||
|
@ -208,7 +208,7 @@ public:
|
||||
|
||||
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
|
||||
|
||||
bool isLegalElementTypeForSVE(Type *Ty) const {
|
||||
bool isElementTypeLegalForScalableVector(Type *Ty) const {
|
||||
if (Ty->isPointerTy())
|
||||
return true;
|
||||
|
||||
@ -218,7 +218,7 @@ public:
|
||||
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
|
||||
return true;
|
||||
|
||||
if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
|
||||
if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
|
||||
Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
|
||||
return true;
|
||||
|
||||
@ -233,7 +233,8 @@ public:
|
||||
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
|
||||
return false; // Fall back to scalarization of masked operations.
|
||||
|
||||
return isLegalElementTypeForSVE(DataType->getScalarType());
|
||||
return !DataType->getScalarType()->isIntegerTy(1) &&
|
||||
isElementTypeLegalForScalableVector(DataType->getScalarType());
|
||||
}
|
||||
|
||||
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
|
||||
@ -254,7 +255,8 @@ public:
|
||||
DataTypeFVTy->getNumElements() < 2))
|
||||
return false;
|
||||
|
||||
return isLegalElementTypeForSVE(DataType->getScalarType());
|
||||
return !DataType->getScalarType()->isIntegerTy(1) &&
|
||||
isElementTypeLegalForScalableVector(DataType->getScalarType());
|
||||
}
|
||||
|
||||
bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
|
||||
|
@ -1519,7 +1519,7 @@ public:
|
||||
|
||||
/// Returns true if the target machine supports all of the reduction
|
||||
/// variables found for the given VF.
|
||||
bool canVectorizeReductions(ElementCount VF) {
|
||||
bool canVectorizeReductions(ElementCount VF) const {
|
||||
return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
|
||||
const RecurrenceDescriptor &RdxDesc = Reduction.second;
|
||||
return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
|
||||
@ -5677,12 +5677,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
|
||||
auto MaxScalableVF = ElementCount::getScalable(
|
||||
std::numeric_limits<ElementCount::ScalarTy>::max());
|
||||
|
||||
// Disable scalable vectorization if the loop contains unsupported reductions.
|
||||
// Test that the loop-vectorizer can legalize all operations for this MaxVF.
|
||||
// FIXME: While for scalable vectors this is currently sufficient, this should
|
||||
// be replaced by a more detailed mechanism that filters out specific VFs,
|
||||
// instead of invalidating vectorization for a whole set of VFs based on the
|
||||
// MaxVF.
|
||||
|
||||
// Disable scalable vectorization if the loop contains unsupported reductions.
|
||||
if (!canVectorizeReductions(MaxScalableVF)) {
|
||||
reportVectorizationInfo(
|
||||
"Scalable vectorization not supported for the reduction "
|
||||
@ -5691,6 +5692,18 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
|
||||
return ElementCount::getScalable(0);
|
||||
}
|
||||
|
||||
// Disable scalable vectorization if the loop contains any instructions
|
||||
// with element types not supported for scalable vectors.
|
||||
if (any_of(ElementTypesInLoop, [&](Type *Ty) {
|
||||
return !Ty->isVoidTy() &&
|
||||
!this->TTI.isElementTypeLegalForScalableVector(Ty);
|
||||
})) {
|
||||
reportVectorizationInfo("Scalable vectorization is not supported "
|
||||
"for all element types found in this loop.",
|
||||
"ScalableVFUnfeasible", ORE, TheLoop);
|
||||
return ElementCount::getScalable(0);
|
||||
}
|
||||
|
||||
if (Legal->isSafeForAnyVectorWidth())
|
||||
return MaxScalableVF;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
|
||||
|
||||
; Reduction can be vectorized
|
||||
|
106
test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
Normal file
106
test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
Normal file
@ -0,0 +1,106 @@
|
||||
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
|
||||
target triple = "aarch64-linux-gnu"
|
||||
|
||||
; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
|
||||
define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {
|
||||
; CHECK-LABEL: @loop_sve_i128
|
||||
; CHECK: vector.body
|
||||
; CHECK: %[[LOAD1:.*]] = load i128, i128* {{.*}}
|
||||
; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}}
|
||||
; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42
|
||||
; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42
|
||||
; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}}
|
||||
; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}}
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
|
||||
%0 = load i128, i128* %arrayidx, align 16
|
||||
%add = add nsw i128 %0, 42
|
||||
store i128 %add, i128* %arrayidx, align 16
|
||||
%iv.next = add i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, %N
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
|
||||
define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) {
|
||||
; CHECK-LABEL: @loop_sve_f128
|
||||
; CHECK: vector.body
|
||||
; CHECK: %[[LOAD1:.*]] = load fp128, fp128*
|
||||
; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128*
|
||||
; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000
|
||||
; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000
|
||||
; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}}
|
||||
; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}}
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv
|
||||
%0 = load fp128, fp128* %arrayidx, align 16
|
||||
%add = fsub fp128 %0, 0xL00000000000000008000000000000000
|
||||
store fp128 %add, fp128* %arrayidx, align 16
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, %N
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
|
||||
define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) {
|
||||
; CHECK-LABEL: @loop_invariant_sve_i128
|
||||
; CHECK: vector.body
|
||||
; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr
|
||||
; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr
|
||||
; CHECK-NEXT: store i128 %val, i128* %[[GEP1]]
|
||||
; CHECK-NEXT: store i128 %val, i128* %[[GEP2]]
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
|
||||
store i128 %val, i128* %arrayidx, align 16
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, %N
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
|
||||
; CHECK-LABEL: @loop_fixed_width_i128
|
||||
; CHECK: load <4 x i128>, <4 x i128>*
|
||||
; CHECK: add nsw <4 x i128> {{.*}}, <i128 42, i128 42, i128 42, i128 42>
|
||||
; CHECK: store <4 x i128> {{.*}} <4 x i128>*
|
||||
; CHECK-NOT: vscale
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
|
||||
%0 = load i128, i128* %arrayidx, align 16
|
||||
%add = add nsw i128 %0, 42
|
||||
store i128 %add, i128* %arrayidx, align 16
|
||||
%iv.next = add i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, %N
|
||||
br i1 %exitcond.not, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = distinct !{!0, !1}
|
||||
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
Loading…
Reference in New Issue
Block a user