[SVE] Remove calls to VectorType::getNumElements from AArch64

Reviewers: efriedma, paquette, david-arm, kmclaughlin Reviewed By: david-arm Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, danielkiss, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D82214
2024-11-23 03:02:36 +01:00 · 2020-06-30 11:07:24 -07:00 · 2020-06-30 11:07:24 -07:00 · 4a8eb5f3d3
commit 4a8eb5f3d3
parent b0b0a7801d
4 changed files with 37 additions and 30 deletions
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -9486,8 +9486,8 @@ static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
  };

  auto extractHalf = [](Value *FullV, Value *HalfV) {
-    auto *FullVT = cast<VectorType>(FullV->getType());
-    auto *HalfVT = cast<VectorType>(HalfV->getType());
+    auto *FullVT = cast<FixedVectorType>(FullV->getType());
+    auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
    return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
  };

@ -9507,7 +9507,7 @@ static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
  // elements.
  int M1Start = -1;
  int M2Start = -1;
-  int NumElements = cast<VectorType>(Op1->getType())->getNumElements() * 2;
+  int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
  if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
      !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
      M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
@ -9639,7 +9639,7 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType(
  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());

  // Ensure the number of vector elements is greater than 1.
-  if (VecTy->getNumElements() < 2)
+  if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
    return false;

  // Ensure the element type is legal.
@ -9673,22 +9673,24 @@ bool AArch64TargetLowering::lowerInterleavedLoad(

  const DataLayout &DL = LI->getModule()->getDataLayout();

-  VectorType *VecTy = Shuffles[0]->getType();
+  VectorType *VTy = Shuffles[0]->getType();

  // Skip if we do not have NEON and skip illegal vector types. We can
  // "legalize" wide vector types into multiple interleaved accesses as long as
  // the vector types are divisible by 128.
-  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
+  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
    return false;

-  unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+  unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
+
+  auto *FVTy = cast<FixedVectorType>(VTy);

  // A pointer vector can not be the return type of the ldN intrinsics. Need to
  // load integer vectors first and then convert to pointer vectors.
-  Type *EltTy = VecTy->getElementType();
+  Type *EltTy = FVTy->getElementType();
  if (EltTy->isPointerTy())
-    VecTy =
-        FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy->getNumElements());
+    FVTy =
+        FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());

  IRBuilder<> Builder(LI);

@ -9698,19 +9700,19 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
  if (NumLoads > 1) {
    // If we're going to generate more than one load, reset the sub-vector type
    // to something legal.
-    VecTy = FixedVectorType::get(VecTy->getElementType(),
-                                 VecTy->getNumElements() / NumLoads);
+    FVTy = FixedVectorType::get(FVTy->getElementType(),
+                                FVTy->getNumElements() / NumLoads);

    // We will compute the pointer operand of each load from the original base
    // address using GEPs. Cast the base address to a pointer to the scalar
    // element type.
    BaseAddr = Builder.CreateBitCast(
        BaseAddr,
-        VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
+        FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
  }

-  Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
-  Type *Tys[2] = {VecTy, PtrTy};
+  Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
+  Type *Tys[2] = {FVTy, PtrTy};
  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
                                            Intrinsic::aarch64_neon_ld3,
                                            Intrinsic::aarch64_neon_ld4};
@ -9727,8 +9729,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
    // If we're generating more than one load, compute the base address of
    // subsequent loads as an offset from the previous.
    if (LoadCount > 0)
-      BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
-                                            VecTy->getNumElements() * Factor);
+      BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
+                                            FVTy->getNumElements() * Factor);

    CallInst *LdN = Builder.CreateCall(
        LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
@ -9744,7 +9746,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
      if (EltTy->isPointerTy())
        SubVec = Builder.CreateIntToPtr(
            SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
-                                         VecTy->getNumElements()));
+                                         FVTy->getNumElements()));
      SubVecs[SVI].push_back(SubVec);
    }
  }
@ -9795,7 +9797,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
         "Invalid interleave factor");

-  VectorType *VecTy = SVI->getType();
+  auto *VecTy = cast<FixedVectorType>(SVI->getType());
  assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");

  unsigned LaneLen = VecTy->getNumElements() / Factor;
@ -9820,7 +9822,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
  // vectors to integer vectors.
  if (EltTy->isPointerTy()) {
    Type *IntTy = DL.getIntPtrType(EltTy);
-    unsigned NumOpElts = cast<VectorType>(Op0->getType())->getNumElements();
+    unsigned NumOpElts =
+        cast<FixedVectorType>(Op0->getType())->getNumElements();

    // Convert to the corresponding integer vector.
    auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
--- a/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/lib/Target/AArch64/AArch64StackTagging.cpp
@ -265,8 +265,9 @@ public:
      Type *EltTy = VecTy->getElementType();
      if (EltTy->isPointerTy()) {
        uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
-        auto *NewTy = FixedVectorType::get(IntegerType::get(Ctx, EltSize),
-                                           VecTy->getNumElements());
+        auto *NewTy = FixedVectorType::get(
+            IntegerType::get(Ctx, EltSize),
+            cast<FixedVectorType>(VecTy)->getNumElements());
        V = IRB.CreatePointerCast(V, NewTy);
      }
    }
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -212,7 +212,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
  // elements in type Ty determine the vector width.
  auto toVectorTy = [&](Type *ArgTy) {
    return FixedVectorType::get(ArgTy->getScalarType(),
-                                cast<VectorType>(DstTy)->getNumElements());
+                                cast<FixedVectorType>(DstTy)->getNumElements());
  };

  // Exit early if DstTy is not a vector type whose elements are at least
@ -724,8 +724,8 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
      // have to promote the elements to v.2.
      ProfitableNumElements = 8;

-    if (cast<VectorType>(Ty)->getNumElements() < ProfitableNumElements) {
-      unsigned NumVecElts = cast<VectorType>(Ty)->getNumElements();
+    if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
+      unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
      unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
      // We generate 2 instructions per vector element.
      return NumVectorizableInstsToAmortize * NumVecElts * 2;
@ -740,7 +740,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(
    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
    bool UseMaskForCond, bool UseMaskForGaps) {
  assert(Factor >= 2 && "Invalid interleave factor");
-  auto *VecVTy = cast<VectorType>(VecTy);
+  auto *VecVTy = cast<FixedVectorType>(VecTy);

  if (!UseMaskForCond && !UseMaskForGaps &&
      Factor <= TLI->getMaxSupportedInterleaveFactor()) {
@ -767,7 +767,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
  for (auto *I : Tys) {
    if (!I->isVectorTy())
      continue;
-    if (I->getScalarSizeInBits() * cast<VectorType>(I)->getNumElements() == 128)
+    if (I->getScalarSizeInBits() * cast<FixedVectorType>(I)->getNumElements() ==
+        128)
      Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
              getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
  }
@ -970,9 +971,10 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
  case Instruction::Mul:
    return false;
  case Instruction::Add:
-    return ScalarBits * VTy->getNumElements() >= 128;
+    return ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128;
  case Instruction::ICmp:
-    return (ScalarBits < 64) && (ScalarBits * VTy->getNumElements() >= 128);
+    return (ScalarBits < 64) &&
+           (ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128);
  case Instruction::FCmp:
    return Flags.NoNaN;
  default:
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -189,7 +189,8 @@ public:
    // the element type fits into a register and the number of elements is a
    // power of 2 > 1.
    if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
-      unsigned NumElements = DataTypeVTy->getNumElements();
+      unsigned NumElements =
+          cast<FixedVectorType>(DataTypeVTy)->getNumElements();
      unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
      return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
             EltSize <= 128 && isPowerOf2_64(EltSize);