From 8dd39c43b3872ecb1cd6cb707fab0f2fc876db65 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Tue, 6 Jul 2021 10:49:43 +0100 Subject: [PATCH] [LV] Prevent vectorization with unsupported element types. This patch adds a TTI function, isElementTypeLegalForScalableVector, to query whether it is possible to vectorize a given element type. This is called by isLegalToVectorizeInstTypesForScalable to reject scalable vectorization if any of the instruction types in the loop are unsupported, e.g: int foo(__int128_t* ptr, int N) #pragma clang loop vectorize_width(4, scalable) for (int i=0; iisLegalToVectorizeReduction(RdxDesc, VF); } +bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const { + return TTIImpl->isElementTypeLegalForScalableVector(Ty); +} + unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index cd1a8e6e435..6005852f171 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1736,7 +1736,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction( return true; Type *Ty = RdxDesc.getRecurrenceType(); - if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty)) + if (Ty->isBFloatTy() || !isElementTypeLegalForScalableVector(Ty)) return false; switch (RdxDesc.getRecurrenceKind()) { diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 646d1db73e2..83956fbbe05 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -208,7 +208,7 @@ public: bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); - bool isLegalElementTypeForSVE(Type *Ty) const { + bool isElementTypeLegalForScalableVector(Type *Ty) const { if (Ty->isPointerTy()) return true; @@ -218,7 +218,7 @@ public: if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true; - if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || + if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) return true; @@ -233,7 +233,8 @@ public: if (isa(DataType) && !ST->useSVEForFixedLengthVectors()) return false; // Fall back to scalarization of masked operations. - return isLegalElementTypeForSVE(DataType->getScalarType()); + return !DataType->getScalarType()->isIntegerTy(1) && + isElementTypeLegalForScalableVector(DataType->getScalarType()); } bool isLegalMaskedLoad(Type *DataType, Align Alignment) { @@ -254,7 +255,8 @@ public: DataTypeFVTy->getNumElements() < 2)) return false; - return isLegalElementTypeForSVE(DataType->getScalarType()); + return !DataType->getScalarType()->isIntegerTy(1) && + isElementTypeLegalForScalableVector(DataType->getScalarType()); } bool isLegalMaskedGather(Type *DataType, Align Alignment) const { diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f3e5b020e8b..038822f7063 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1519,7 +1519,7 @@ public: /// Returns true if the target machine supports all of the reduction /// variables found for the given VF. - bool canVectorizeReductions(ElementCount VF) { + bool canVectorizeReductions(ElementCount VF) const { return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool { const RecurrenceDescriptor &RdxDesc = Reduction.second; return TTI.isLegalToVectorizeReduction(RdxDesc, VF); @@ -5677,12 +5677,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { auto MaxScalableVF = ElementCount::getScalable( std::numeric_limits::max()); - // Disable scalable vectorization if the loop contains unsupported reductions. // Test that the loop-vectorizer can legalize all operations for this MaxVF. // FIXME: While for scalable vectors this is currently sufficient, this should // be replaced by a more detailed mechanism that filters out specific VFs, // instead of invalidating vectorization for a whole set of VFs based on the // MaxVF. + + // Disable scalable vectorization if the loop contains unsupported reductions. if (!canVectorizeReductions(MaxScalableVF)) { reportVectorizationInfo( "Scalable vectorization not supported for the reduction " @@ -5691,6 +5692,18 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { return ElementCount::getScalable(0); } + // Disable scalable vectorization if the loop contains any instructions + // with element types not supported for scalable vectors. + if (any_of(ElementTypesInLoop, [&](Type *Ty) { + return !Ty->isVoidTy() && + !this->TTI.isElementTypeLegalForScalableVector(Ty); + })) { + reportVectorizationInfo("Scalable vectorization is not supported " + "for all element types found in this loop.", + "ScalableVFUnfeasible", ORE, TheLoop); + return ElementCount::getScalable(0); + } + if (Legal->isSafeForAnyVectorWidth()) return MaxScalableVF; diff --git a/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll index e45c535a49a..21b8afddb1a 100644 --- a/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ b/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK +; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK ; Reduction can be vectorized diff --git a/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll new file mode 100644 index 00000000000..b13cd9cff9d --- /dev/null +++ b/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll @@ -0,0 +1,106 @@ +; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS +target triple = "aarch64-linux-gnu" + +; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop +define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) { +; CHECK-LABEL: @loop_sve_i128 +; CHECK: vector.body +; CHECK: %[[LOAD1:.*]] = load i128, i128* {{.*}} +; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}} +; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42 +; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42 +; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}} +; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}} +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv + %0 = load i128, i128* %arrayidx, align 16 + %add = add nsw i128 %0, 42 + store i128 %add, i128* %arrayidx, align 16 + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop +define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) { +; CHECK-LABEL: @loop_sve_f128 +; CHECK: vector.body +; CHECK: %[[LOAD1:.*]] = load fp128, fp128* +; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128* +; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000 +; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000 +; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}} +; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}} +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv + %0 = load fp128, fp128* %arrayidx, align 16 + %add = fsub fp128 %0, 0xL00000000000000008000000000000000 + store fp128 %add, fp128* %arrayidx, align 16 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop +define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) { +; CHECK-LABEL: @loop_invariant_sve_i128 +; CHECK: vector.body +; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr +; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr +; CHECK-NEXT: store i128 %val, i128* %[[GEP1]] +; CHECK-NEXT: store i128 %val, i128* %[[GEP2]] +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv + store i128 %val, i128* %arrayidx, align 16 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) { +; CHECK-LABEL: @loop_fixed_width_i128 +; CHECK: load <4 x i128>, <4 x i128>* +; CHECK: add nsw <4 x i128> {{.*}}, +; CHECK: store <4 x i128> {{.*}} <4 x i128>* +; CHECK-NOT: vscale +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv + %0 = load i128, i128* %arrayidx, align 16 + %add = add nsw i128 %0, 42 + store i128 %add, i128* %arrayidx, align 16 + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}