Revert "[SVE] Fix bug in simplification of scalable vector instructions"

This reverts commit 31574d38ac5fa4646cf01dd252a23e682402134f. The newly added shufflevector test does not pass locally on either of my workstations.
2025-01-31 20:51:52 +01:00 · 2020-02-03 11:12:09 -08:00 · 2020-02-03 11:12:09 -08:00 · a1c473cd39
commit a1c473cd39
parent cd0c8f255c
7 changed files with 41 additions and 138 deletions
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@ -1060,13 +1060,13 @@ public:
                                   Ptr->getType()->getPointerAddressSpace());
    // Vector GEP
    if (Ptr->getType()->isVectorTy()) {
-      ElementCount EltCount = Ptr->getType()->getVectorElementCount();
-      return VectorType::get(PtrTy, EltCount);
+      unsigned NumElem = Ptr->getType()->getVectorNumElements();
+      return VectorType::get(PtrTy, NumElem);
    }
    for (Value *Index : IdxList)
      if (Index->getType()->isVectorTy()) {
-        ElementCount EltCount = Index->getType()->getVectorElementCount();
-        return VectorType::get(PtrTy, EltCount);
+        unsigned NumElem = Index->getType()->getVectorNumElements();
+        return VectorType::get(PtrTy, NumElem);
      }
    // Scalar GEP
    return PtrTy;
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@ -4074,9 +4074,9 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
  Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
  Type *GEPTy = PointerType::get(LastType, AS);
  if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
-    GEPTy = VectorType::get(GEPTy, VT->getElementCount());
+    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
  else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
-    GEPTy = VectorType::get(GEPTy, VT->getElementCount());
+    GEPTy = VectorType::get(GEPTy, VT->getNumElements());

  if (isa<UndefValue>(Ops[0]))
    return UndefValue::get(GEPTy);
@ -4445,66 +4445,52 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
    return UndefValue::get(RetTy);

  Type *InVecTy = Op0->getType();
-  ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
-  ElementCount InVecEltCount = InVecTy->getVectorElementCount();
-
-  assert(MaskEltCount.Scalable == InVecEltCount.Scalable &&
-         "vscale mismatch between input vector and mask");
-
-  bool Scalable = MaskEltCount.Scalable;
+  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+  unsigned InVecNumElts = InVecTy->getVectorNumElements();

  SmallVector<int, 32> Indices;
-  if (!Scalable) {
-    ShuffleVectorInst::getShuffleMask(Mask, Indices);
-    assert(MaskEltCount.Min == Indices.size() &&
-           "Size of Indices not same as number of mask elements?");
-  }
+  ShuffleVectorInst::getShuffleMask(Mask, Indices);
+  assert(MaskNumElts == Indices.size() &&
+         "Size of Indices not same as number of mask elements?");

-  if (!Scalable) {
-    // Canonicalization: If mask does not select elements from an input vector,
-    // replace that input vector with undef.
-    bool MaskSelects0 = false, MaskSelects1 = false;
-    for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
-      if (Indices[i] == -1)
-        continue;
-      if ((unsigned)Indices[i] < InVecEltCount.Min)
-        MaskSelects0 = true;
-      else
-        MaskSelects1 = true;
-    }
-    if (!MaskSelects0)
-      Op0 = UndefValue::get(InVecTy);
-    if (!MaskSelects1)
-      Op1 = UndefValue::get(InVecTy);
+  // Canonicalization: If mask does not select elements from an input vector,
+  // replace that input vector with undef.
+  bool MaskSelects0 = false, MaskSelects1 = false;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Indices[i] == -1)
+      continue;
+    if ((unsigned)Indices[i] < InVecNumElts)
+      MaskSelects0 = true;
+    else
+      MaskSelects1 = true;
  }
+  if (!MaskSelects0)
+    Op0 = UndefValue::get(InVecTy);
+  if (!MaskSelects1)
+    Op1 = UndefValue::get(InVecTy);

  auto *Op0Const = dyn_cast<Constant>(Op0);
  auto *Op1Const = dyn_cast<Constant>(Op1);

-  // If all operands are constant, constant fold the shuffle. This
-  // transformation depends on the value of the mask which is not known at
-  // compile time for scalable vectors
-  if (!Scalable && Op0Const && Op1Const)
+  // If all operands are constant, constant fold the shuffle.
+  if (Op0Const && Op1Const)
    return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);

  // Canonicalization: if only one input vector is constant, it shall be the
-  // second one. This transformation depends on the value of the mask which
-  // is not known at compile time for scalable vectors
-  if (!Scalable && Op0Const && !Op1Const) {
+  // second one.
+  if (Op0Const && !Op1Const) {
    std::swap(Op0, Op1);
-    ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min);
+    ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
  }

  // A splat of an inserted scalar constant becomes a vector constant:
  // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...>
  // NOTE: We may have commuted above, so analyze the updated Indices, not the
  //       original mask constant.
-  // NOTE: This transformation depends on the value of the mask which is not
-  // known at compile time for scalable vectors
  Constant *C;
  ConstantInt *IndexC;
-  if (!Scalable && match(Op0, m_InsertElement(m_Value(), m_Constant(C),
-                                              m_ConstantInt(IndexC)))) {
+  if (match(Op0, m_InsertElement(m_Value(), m_Constant(C),
+                                 m_ConstantInt(IndexC)))) {
    // Match a splat shuffle mask of the insert index allowing undef elements.
    int InsertIndex = IndexC->getZExtValue();
    if (all_of(Indices, [InsertIndex](int MaskElt) {
@ -4513,8 +4499,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
      assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");

      // Shuffle mask undefs become undefined constant result elements.
-      SmallVector<Constant *, 16> VecC(MaskEltCount.Min, C);
-      for (unsigned i = 0; i != MaskEltCount.Min; ++i)
+      SmallVector<Constant *, 16> VecC(MaskNumElts, C);
+      for (unsigned i = 0; i != MaskNumElts; ++i)
        if (Indices[i] == -1)
          VecC[i] = UndefValue::get(C->getType());
      return ConstantVector::get(VecC);
@ -4528,11 +4514,6 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
        OpShuf->getMask()->getSplatValue())
      return Op0;

-  // All remaining transformation depend on the value of the mask, which is
-  // not known at compile time for scalable vectors.
-  if (Scalable)
-    return nullptr;
-
  // Don't fold a shuffle with undef mask elements. This may get folded in a
  // better way using demanded bits or other analysis.
  // TODO: Should we allow this?
@ -4544,7 +4525,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
  // shuffle. This handles simple identity shuffles as well as chains of
  // shuffles that may widen/narrow and/or move elements across lanes and back.
  Value *RootVec = nullptr;
-  for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
    // Note that recursion is limited for each vector element, so if any element
    // exceeds the limit, this will fail to simplify.
    RootVec =
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@ -7224,8 +7224,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
  bool AteExtraComma = false;
  // GEP returns a vector of pointers if at least one of parameters is a vector.
  // All vector parameters should have the same vector width.
-  ElementCount GEPWidth = BaseType->isVectorTy() ?
-    BaseType->getVectorElementCount() : ElementCount(0, false);
+  unsigned GEPWidth = BaseType->isVectorTy() ?
+    BaseType->getVectorNumElements() : 0;

  while (EatIfPresent(lltok::comma)) {
    if (Lex.getKind() == lltok::MetadataVar) {
@ -7237,8 +7237,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
      return Error(EltLoc, "getelementptr index must be an integer");

    if (Val->getType()->isVectorTy()) {
-      ElementCount ValNumEl = Val->getType()->getVectorElementCount();
-      if (GEPWidth != ElementCount(0, false) && GEPWidth != ValNumEl)
+      unsigned ValNumEl = Val->getType()->getVectorNumElements();
+      if (GEPWidth && GEPWidth != ValNumEl)
        return Error(EltLoc,
          "getelementptr vector index has a wrong number of elements");
      GEPWidth = ValNumEl;
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@ -863,12 +863,12 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
 Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
                                                     Constant *V2,
                                                     Constant *Mask) {
-  ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
+  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
  Type *EltTy = V1->getType()->getVectorElementType();

  // Undefined shuffle mask -> undefined value.
  if (isa<UndefValue>(Mask))
-    return UndefValue::get(VectorType::get(EltTy, MaskEltCount));
+    return UndefValue::get(VectorType::get(EltTy, MaskNumElts));

  // Don't break the bitcode reader hack.
  if (isa<ConstantExpr>(Mask)) return nullptr;
@ -879,7 +879,6 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
  if (ValTy->isScalable())
    return nullptr;

-  unsigned MaskNumElts = MaskEltCount.Min;
  unsigned SrcNumElts = V1->getType()->getVectorNumElements();

  // Loop over the shuffle mask, evaluating each element.
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@ -1887,8 +1887,6 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,

 int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
  assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
-  assert(!Mask->getType()->getVectorElementCount().Scalable &&
-    "Length of scalable vectors unknown at compile time");
  if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask))
    return CDS->getElementAsInteger(i);
  Constant *C = Mask->getAggregateElement(i);
@ -1899,8 +1897,6 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {

 void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
                                       SmallVectorImpl<int> &Result) {
-  assert(!Mask->getType()->getVectorElementCount().Scalable &&
-    "Length of scalable vectors unknown at compile time");
  unsigned NumElts = Mask->getType()->getVectorNumElements();

  if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
--- a/test/Analysis/ConstantFolding/vscale-getelementptr.ll
+++ b/test/Analysis/ConstantFolding/vscale-getelementptr.ll
@ -1,32 +0,0 @@
-; RUN: opt -early-cse -S < %s | FileCheck %s
-
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64"
-
-; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() {
-; CHECK-NEXT:  ret <4 x i32*> undef
-define <4 x i32*> @fixed_length_version_first() {
-  %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef
-  ret <4 x i32*> %ptr
-}
-
-; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() {
-; CHECK-NEXT:  ret <4 x <4 x i32>*> undef
-define <4 x <4 x i32>*> @fixed_length_version_second() {
-  %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef
-  ret <4 x <4 x i32>*> %ptr
-}
-
-; CHECK-LABEL: define <vscale x 4 x i32*> @vscale_version_first() {
-; CHECK-NEXT:  ret <vscale x 4 x i32*> undef
-define <vscale x 4 x i32*> @vscale_version_first() {
-  %ptr = getelementptr i32, <vscale x 4 x i32*> undef, <vscale x 4 x i64> undef
-  ret <vscale x 4 x i32*> %ptr
-}
-
-; CHECK-LABEL: define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
-; CHECK-NEXT:  ret <vscale x 4 x <vscale x 4 x i32>*> undef
-define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
-  %ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* undef, <vscale x 4 x i64> undef
-  ret <vscale x 4 x <vscale x 4 x i32>*> %ptr
-}
--- a/test/Analysis/ConstantFolding/vscale-shufflevector.ll
+++ b/test/Analysis/ConstantFolding/vscale-shufflevector.ll
@ -1,41 +0,0 @@
-; RUN: opt -early-cse -S < %s | FileCheck %s
-
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64"
-
-; This test checks that SimplifyInstruction does not blow up in the face of
-; a scalable shufflevector. vscale is a constant value known only at runtime.
-; Therefore, it is not possible to know the concrete value of, or the length
-; of the mask at compile time. Simplifications that depend on the value
-; of the mask cannot be performed.
-
-; Given the fact that the value of the mask is unknown at compile time for
-; scalable vectors, very few simplifications will be done. Here, we want to
-; see that the instruction can be passed to SimplifyInstruction and not crash
-; the compiler. It happens to be the case that this will be the result.
-
-; CHECK-LABEL: define <vscale x 8 x i1> @vscale_version()
-; CHECK-NEXT: %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
-; CHECK-NEXT: %foo = shufflevector <vscale x 8 x i1> %splatter, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: ret <vscale x 8 x i1> %foo
-
-define <vscale x 8 x i1> @vscale_version() {
-  %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
-  %foo = shufflevector <vscale x 8 x i1> %splatter,
-                       <vscale x 8 x i1> undef,
-                       <vscale x 8 x i32> zeroinitializer
-  ret <vscale x 8 x i1> %foo
-}
-
-; The non-scalable version should be optimized as normal.
-
-; CHECK-LABEL: define <8 x i1> @fixed_length_version() {
-; CHECK-NEXT:  ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-define <8 x i1> @fixed_length_version() {
-  %splatter = insertelement <8 x i1> undef, i1 true, i32 0
-  %foo = shufflevector <8 x i1> %splatter,
-                       <8 x i1> undef,
-                       <8 x i32> zeroinitializer
-  ret <8 x i1> %foo
-}
-