From 8dd39c43b3872ecb1cd6cb707fab0f2fc876db65 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Tue, 6 Jul 2021 10:49:43 +0100
Subject: [PATCH] [LV] Prevent vectorization with unsupported element types.

This patch adds a TTI function, isElementTypeLegalForScalableVector, to query
whether it is possible to vectorize a given element type. This is called by
isLegalToVectorizeInstTypesForScalable to reject scalable vectorization if
any of the instruction types in the loop are unsupported, e.g:

  int foo(__int128_t* ptr, int N)
    #pragma clang loop vectorize_width(4, scalable)
    for (int i=0; i<N; ++i)
      ptr[i] = ptr[i] + 42;

This example currently crashes if we attempt to vectorize since i128 is not a
supported type for scalable vectorization.

Reviewed By: sdesmalen, david-arm

Differential Revision: https://reviews.llvm.org/D102253
---
 include/llvm/Analysis/TargetTransformInfo.h   |   7 ++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |   2 +
 lib/Analysis/TargetTransformInfo.cpp          |   4 +
 .../AArch64/AArch64TargetTransformInfo.cpp    |   2 +-
 .../AArch64/AArch64TargetTransformInfo.h      |  10 +-
 lib/Transforms/Vectorize/LoopVectorize.cpp    |  17 ++-
 .../AArch64/scalable-reductions.ll            |   2 +-
 .../LoopVectorize/AArch64/sve-illegal-type.ll | 106 ++++++++++++++++++
 8 files changed, 142 insertions(+), 8 deletions(-)
 create mode 100644 test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll

diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index c222593c934..a9b103b5457 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -1327,6 +1327,9 @@ public:
   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
                                    ElementCount VF) const;
 
+  /// \returns True if the given type is supported for scalable vectors
+  bool isElementTypeLegalForScalableVector(Type *Ty) const;
+
   /// \returns The new vector factor value if the target doesn't support \p
   /// SizeInBytes loads or has a better vector factor.
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -1710,6 +1713,7 @@ public:
                                             unsigned AddrSpace) const = 0;
   virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
                                            ElementCount VF) const = 0;
+  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                        unsigned ChainSizeInBytes,
                                        VectorType *VecTy) const = 0;
@@ -2261,6 +2265,9 @@ public:
                                    ElementCount VF) const override {
     return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
   }
+  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
+    return Impl.isElementTypeLegalForScalableVector(Ty);
+  }
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                unsigned ChainSizeInBytes,
                                VectorType *VecTy) const override {
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 657e8d81aa7..eff4a7cdb85 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -721,6 +721,8 @@ public:
     return true;
   }
 
+  bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
+
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                unsigned ChainSizeInBytes,
                                VectorType *VecTy) const {
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 05894bf6954..3b89a980786 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -1003,6 +1003,10 @@ bool TargetTransformInfo::isLegalToVectorizeReduction(
   return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
 }
 
+bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const {
+  return TTIImpl->isElementTypeLegalForScalableVector(Ty);
+}
+
 unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
                                                   unsigned LoadSize,
                                                   unsigned ChainSizeInBytes,
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index cd1a8e6e435..6005852f171 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1736,7 +1736,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
     return true;
 
   Type *Ty = RdxDesc.getRecurrenceType();
-  if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty))
+  if (Ty->isBFloatTy() || !isElementTypeLegalForScalableVector(Ty))
     return false;
 
   switch (RdxDesc.getRecurrenceKind()) {
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 646d1db73e2..83956fbbe05 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -208,7 +208,7 @@ public:
 
   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
 
-  bool isLegalElementTypeForSVE(Type *Ty) const {
+  bool isElementTypeLegalForScalableVector(Type *Ty) const {
     if (Ty->isPointerTy())
       return true;
 
@@ -218,7 +218,7 @@ public:
     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
       return true;
 
-    if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
+    if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
       return true;
 
@@ -233,7 +233,8 @@ public:
     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
       return false; // Fall back to scalarization of masked operations.
 
-    return isLegalElementTypeForSVE(DataType->getScalarType());
+    return !DataType->getScalarType()->isIntegerTy(1) &&
+           isElementTypeLegalForScalableVector(DataType->getScalarType());
   }
 
   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -254,7 +255,8 @@ public:
                          DataTypeFVTy->getNumElements() < 2))
       return false;
 
-    return isLegalElementTypeForSVE(DataType->getScalarType());
+    return !DataType->getScalarType()->isIntegerTy(1) &&
+           isElementTypeLegalForScalableVector(DataType->getScalarType());
   }
 
   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index f3e5b020e8b..038822f7063 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1519,7 +1519,7 @@ public:
 
   /// Returns true if the target machine supports all of the reduction
   /// variables found for the given VF.
-  bool canVectorizeReductions(ElementCount VF) {
+  bool canVectorizeReductions(ElementCount VF) const {
     return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
       const RecurrenceDescriptor &RdxDesc = Reduction.second;
       return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
@@ -5677,12 +5677,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
   auto MaxScalableVF = ElementCount::getScalable(
       std::numeric_limits<ElementCount::ScalarTy>::max());
 
-  // Disable scalable vectorization if the loop contains unsupported reductions.
   // Test that the loop-vectorizer can legalize all operations for this MaxVF.
   // FIXME: While for scalable vectors this is currently sufficient, this should
   // be replaced by a more detailed mechanism that filters out specific VFs,
   // instead of invalidating vectorization for a whole set of VFs based on the
   // MaxVF.
+
+  // Disable scalable vectorization if the loop contains unsupported reductions.
   if (!canVectorizeReductions(MaxScalableVF)) {
     reportVectorizationInfo(
         "Scalable vectorization not supported for the reduction "
@@ -5691,6 +5692,18 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
     return ElementCount::getScalable(0);
   }
 
+  // Disable scalable vectorization if the loop contains any instructions
+  // with element types not supported for scalable vectors.
+  if (any_of(ElementTypesInLoop, [&](Type *Ty) {
+        return !Ty->isVoidTy() &&
+               !this->TTI.isElementTypeLegalForScalableVector(Ty);
+      })) {
+    reportVectorizationInfo("Scalable vectorization is not supported "
+                            "for all element types found in this loop.",
+                            "ScalableVFUnfeasible", ORE, TheLoop);
+    return ElementCount::getScalable(0);
+  }
+
   if (Legal->isSafeForAnyVectorWidth())
     return MaxScalableVF;
 
diff --git a/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index e45c535a49a..21b8afddb1a 100644
--- a/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
+; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
 ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
 
 ; Reduction can be vectorized
diff --git a/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
new file mode 100644
index 00000000000..b13cd9cff9d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
+target triple = "aarch64-linux-gnu"
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_sve_i128
+; CHECK: vector.body
+; CHECK:  %[[LOAD1:.*]] = load i128, i128* {{.*}}
+; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}}
+; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42
+; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42
+; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}}
+; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}}
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+  %0 = load i128, i128* %arrayidx, align 16
+  %add = add nsw i128 %0, 42
+  store i128 %add, i128* %arrayidx, align 16
+  %iv.next = add i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_sve_f128
+; CHECK: vector.body
+; CHECK: %[[LOAD1:.*]] = load fp128, fp128*
+; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128*
+; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000
+; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000
+; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}}
+; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}}
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv
+  %0 = load fp128, fp128* %arrayidx, align 16
+  %add = fsub fp128 %0, 0xL00000000000000008000000000000000
+  store fp128 %add, fp128* %arrayidx, align 16
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) {
+; CHECK-LABEL: @loop_invariant_sve_i128
+; CHECK: vector.body
+; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr
+; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr
+; CHECK-NEXT: store i128 %val, i128* %[[GEP1]]
+; CHECK-NEXT: store i128 %val, i128* %[[GEP2]]
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+  store i128 %val, i128* %arrayidx, align 16
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_fixed_width_i128
+; CHECK: load <4 x i128>, <4 x i128>*
+; CHECK: add nsw <4 x i128> {{.*}}, <i128 42, i128 42, i128 42, i128 42>
+; CHECK: store <4 x i128> {{.*}} <4 x i128>*
+; CHECK-NOT: vscale
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+  %0 = load i128, i128* %arrayidx, align 16
+  %add = add nsw i128 %0, 42
+  store i128 %add, i128* %arrayidx, align 16
+  %iv.next = add i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}