[LV] Prevent vectorization with unsupported element types.

This patch adds a TTI function, isElementTypeLegalForScalableVector, to query whether it is possible to vectorize a given element type. This is called by isLegalToVectorizeInstTypesForScalable to reject scalable vectorization if any of the instruction types in the loop are unsupported, e.g: int foo(__int128_t* ptr, int N) #pragma clang loop vectorize_width(4, scalable) for (int i=0; i<N; ++i) ptr[i] = ptr[i] + 42; This example currently crashes if we attempt to vectorize since i128 is not a supported type for scalable vectorization. Reviewed By: sdesmalen, david-arm Differential Revision: https://reviews.llvm.org/D102253
2024-10-18 18:42:46 +02:00 · 2021-07-06 10:49:43 +01:00 · 2021-07-06 10:49:43 +01:00 · 8dd39c43b3
commit 8dd39c43b3
parent f69dc8533f
8 changed files with 142 additions and 8 deletions
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@ -1327,6 +1327,9 @@ public:
  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
                                   ElementCount VF) const;

+  /// \returns True if the given type is supported for scalable vectors
+  bool isElementTypeLegalForScalableVector(Type *Ty) const;
+
  /// \returns The new vector factor value if the target doesn't support \p
  /// SizeInBytes loads or has a better vector factor.
  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@ -1710,6 +1713,7 @@ public:
                                            unsigned AddrSpace) const = 0;
  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
                                           ElementCount VF) const = 0;
+  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                       unsigned ChainSizeInBytes,
                                       VectorType *VecTy) const = 0;
@ -2261,6 +2265,9 @@ public:
                                   ElementCount VF) const override {
    return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
  }
+  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
+    return Impl.isElementTypeLegalForScalableVector(Ty);
+  }
  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                               unsigned ChainSizeInBytes,
                               VectorType *VecTy) const override {
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -721,6 +721,8 @@ public:
    return true;
  }

+  bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
+
  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                               unsigned ChainSizeInBytes,
                               VectorType *VecTy) const {
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@ -1003,6 +1003,10 @@ bool TargetTransformInfo::isLegalToVectorizeReduction(
  return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
 }

+bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const {
+  return TTIImpl->isElementTypeLegalForScalableVector(Ty);
+}
+
 unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
                                                  unsigned LoadSize,
                                                  unsigned ChainSizeInBytes,
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -1736,7 +1736,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
    return true;

  Type *Ty = RdxDesc.getRecurrenceType();
-  if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty))
+  if (Ty->isBFloatTy() || !isElementTypeLegalForScalableVector(Ty))
    return false;

  switch (RdxDesc.getRecurrenceKind()) {
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -208,7 +208,7 @@ public:

  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);

-  bool isLegalElementTypeForSVE(Type *Ty) const {
+  bool isElementTypeLegalForScalableVector(Type *Ty) const {
    if (Ty->isPointerTy())
      return true;

@ -218,7 +218,7 @@ public:
    if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
      return true;

-    if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
+    if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
        Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
      return true;

@ -233,7 +233,8 @@ public:
    if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
      return false; // Fall back to scalarization of masked operations.

-    return isLegalElementTypeForSVE(DataType->getScalarType());
+    return !DataType->getScalarType()->isIntegerTy(1) &&
+           isElementTypeLegalForScalableVector(DataType->getScalarType());
  }

  bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@ -254,7 +255,8 @@ public:
                         DataTypeFVTy->getNumElements() < 2))
      return false;

-    return isLegalElementTypeForSVE(DataType->getScalarType());
+    return !DataType->getScalarType()->isIntegerTy(1) &&
+           isElementTypeLegalForScalableVector(DataType->getScalarType());
  }

  bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -1519,7 +1519,7 @@ public:

  /// Returns true if the target machine supports all of the reduction
  /// variables found for the given VF.
-  bool canVectorizeReductions(ElementCount VF) {
+  bool canVectorizeReductions(ElementCount VF) const {
    return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
      const RecurrenceDescriptor &RdxDesc = Reduction.second;
      return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
@ -5677,12 +5677,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
  auto MaxScalableVF = ElementCount::getScalable(
      std::numeric_limits<ElementCount::ScalarTy>::max());

-  // Disable scalable vectorization if the loop contains unsupported reductions.
  // Test that the loop-vectorizer can legalize all operations for this MaxVF.
  // FIXME: While for scalable vectors this is currently sufficient, this should
  // be replaced by a more detailed mechanism that filters out specific VFs,
  // instead of invalidating vectorization for a whole set of VFs based on the
  // MaxVF.
+
+  // Disable scalable vectorization if the loop contains unsupported reductions.
  if (!canVectorizeReductions(MaxScalableVF)) {
    reportVectorizationInfo(
        "Scalable vectorization not supported for the reduction "
@ -5691,6 +5692,18 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
    return ElementCount::getScalable(0);
  }

+  // Disable scalable vectorization if the loop contains any instructions
+  // with element types not supported for scalable vectors.
+  if (any_of(ElementTypesInLoop, [&](Type *Ty) {
+        return !Ty->isVoidTy() &&
+               !this->TTI.isElementTypeLegalForScalableVector(Ty);
+      })) {
+    reportVectorizationInfo("Scalable vectorization is not supported "
+                            "for all element types found in this loop.",
+                            "ScalableVFUnfeasible", ORE, TheLoop);
+    return ElementCount::getScalable(0);
+  }
+
  if (Legal->isSafeForAnyVectorWidth())
    return MaxScalableVF;

--- a/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
+; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
 ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK

 ; Reduction can be vectorized
--- a/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
+++ b/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
@ -0,0 +1,106 @@
+; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
+target triple = "aarch64-linux-gnu"
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_sve_i128
+; CHECK: vector.body
+; CHECK:  %[[LOAD1:.*]] = load i128, i128* {{.*}}
+; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}}
+; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42
+; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42
+; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}}
+; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}}
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+  %0 = load i128, i128* %arrayidx, align 16
+  %add = add nsw i128 %0, 42
+  store i128 %add, i128* %arrayidx, align 16
+  %iv.next = add i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_sve_f128
+; CHECK: vector.body
+; CHECK: %[[LOAD1:.*]] = load fp128, fp128*
+; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128*
+; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000
+; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000
+; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}}
+; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}}
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv
+  %0 = load fp128, fp128* %arrayidx, align 16
+  %add = fsub fp128 %0, 0xL00000000000000008000000000000000
+  store fp128 %add, fp128* %arrayidx, align 16
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) {
+; CHECK-LABEL: @loop_invariant_sve_i128
+; CHECK: vector.body
+; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr
+; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr
+; CHECK-NEXT: store i128 %val, i128* %[[GEP1]]
+; CHECK-NEXT: store i128 %val, i128* %[[GEP2]]
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+  store i128 %val, i128* %arrayidx, align 16
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_fixed_width_i128
+; CHECK: load <4 x i128>, <4 x i128>*
+; CHECK: add nsw <4 x i128> {{.*}}, <i128 42, i128 42, i128 42, i128 42>
+; CHECK: store <4 x i128> {{.*}} <4 x i128>*
+; CHECK-NOT: vscale
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+  %0 = load i128, i128* %arrayidx, align 16
+  %add = add nsw i128 %0, 42
+  store i128 %add, i128* %arrayidx, align 16
+  %iv.next = add i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}