[IR][SVE] Add new llvm.experimental.stepvector intrinsic

This patch adds a new llvm.experimental.stepvector intrinsic, which takes no arguments and returns a linear integer sequence of values of the form <0, 1, ...>. It is primarily intended for scalable vectors, although it will work for fixed width vectors too. It is intended that later patches will make use of this new intrinsic when vectorising induction variables, currently only supported for fixed width. I've added a new CreateStepVector method to the IRBuilder, which will generate a call to this intrinsic for scalable vectors and fall back on creating a ConstantVector for fixed width. For scalable vectors this intrinsic is lowered to a new ISD node called STEP_VECTOR, which takes a single constant integer argument as the step. During lowering this argument is set to a value of 1. The reason for this additional argument at the codegen level is because in future patches we will introduce various generic DAG combines such as mul step_vector(1), 2 -> step_vector(2) add step_vector(1), step_vector(1) -> step_vector(2) shl step_vector(1), 1 -> step_vector(2) etc. that encourage a canonical format for all targets. This hopefully means all other targets supporting scalable vectors can benefit from this too. I've added cost model tests for both fixed width and scalable vectors: llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll as well as codegen lowering tests for fixed width and scalable vectors: llvm/test/CodeGen/AArch64/neon-stepvector.ll llvm/test/CodeGen/AArch64/sve-stepvector.ll See this thread for discussion of the intrinsic: https://lists.llvm.org/pipermail/llvm-dev/2021-January/147943.html
2024-11-22 02:33:06 +01:00 · 2021-02-08 15:46:24 +00:00 · 2021-02-08 15:46:24 +00:00 · 42a72164a2
commit 42a72164a2
parent c7fbf3b5e8
26 changed files with 652 additions and 1 deletions
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@ -16673,6 +16673,36 @@ The first two operands are vectors with the same type. The third argument
 the source/result vector. The ``imm`` is a signed integer constant in the range
 ``-VL <= imm < VL``. For values outside of this range the result is poison.

+
+'``llvm.experimental.stepvector``' Intrinsic
+
+This is an overloaded intrinsic. You can use ``llvm.experimental.stepvector``
+to generate a vector whose lane values comprise the linear sequence
+<0, 1, 2, ...>. It is primarily intended for scalable vectors.
+
+::
+
+      declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+      declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+
+The '``llvm.experimental.stepvector``' intrinsics are used to create vectors
+of integers whose elements contain a linear sequence of values starting from 0
+with a step of 1.  This experimental intrinsic can only be used for vectors
+with integer elements that are at least 8 bits in size. If the sequence value
+exceeds the allowed limit for the element type then the result for that lane is
+undefined.
+
+These intrinsics work for both fixed and scalable vectors. While this intrinsic
+is marked as experimental, the recommended way to express this operation for
+fixed-width vectors is still to generate a constant vector instead.
+
+
+Arguments:
+""""""""""
+
+None.
+
+
 Matrix Intrinsics
 -----------------

--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@ -1249,6 +1249,12 @@ public:
      return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
                                             VarMask, Alignment, CostKind, I);
    }
+    case Intrinsic::experimental_stepvector: {
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+      // The cost of materialising a constant integer vector.
+      return TargetTransformInfo::TCC_Basic;
+    }
    case Intrinsic::experimental_vector_extract: {
      // FIXME: Handle case where a scalable vector is extracted from a scalable
      // vector
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@ -592,6 +592,14 @@ enum NodeType {
  /// scalars should have the same type.
  SPLAT_VECTOR_PARTS,

+  /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised
+  /// of a linear sequence of unsigned values starting from 0 with a step of
+  /// IMM, where IMM must be a constant positive integer value. The operation
+  /// does not support returning fixed-width vectors or non-constant operands.
+  /// If the sequence value exceeds the limit allowed for the element type then
+  /// the values for those lanes are undefined.
+  STEP_VECTOR,
+
  /// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
  /// producing an unsigned/signed value of type i[2*N], then return the top
  /// part.
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@ -833,6 +833,10 @@ public:
    return getNode(ISD::SPLAT_VECTOR, DL, VT, Op);
  }

+  /// Returns a vector of type ResVT whose elements contain the linear sequence
+  ///   <0, Step, Step * 2, Step * 3, ...>
+  SDValue getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step);
+
  /// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
  /// the shuffle node in input but with swapped operands.
  ///
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@ -854,6 +854,9 @@ public:
  /// will be the same type as that of \p Scaling.
  Value *CreateVScale(Constant *Scaling, const Twine &Name = "");

+  /// Creates a vector of type \p DstType with the linear sequence <0, 1, ...>
+  Value *CreateStepVector(Type *DstType, const Twine &Name = "");
+
  /// Create a call to intrinsic \p ID with 1 operand which is mangled on its
  /// type.
  CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@ -1337,6 +1337,9 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
 def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
                           [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;

+def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                                        [], [IntrNoMem]>;
+
 //===---------------- Vector Predication Intrinsics --------------===//

 // Speculatable Binary operators
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@ -664,6 +664,8 @@ def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>;
 def vector_splice : SDNode<"ISD::VECTOR_SPLICE", SDTVecSlice, []>;
 def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
 def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
+def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1,
+                       [SDTCisVec<0>, SDTCisInt<1>]>, []>;
 def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
                              []>;

--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@ -110,6 +110,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
                         Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
  case ISD::SPLAT_VECTOR:
                         Res = PromoteIntRes_SPLAT_VECTOR(N); break;
+  case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
  case ISD::CONCAT_VECTORS:
                         Res = PromoteIntRes_CONCAT_VECTORS(N); break;

@ -4782,6 +4783,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
  return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
 }

+SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
+  SDLoc dl(N);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+  EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                            NOutVT.getVectorElementType());
+  APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+  SDValue Step = DAG.getConstant(StepVal.getZExtValue(), dl, NOutElemVT);
+  return DAG.getStepVector(dl, NOutVT, Step);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
  SDLoc dl(N);

--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@ -304,6 +304,7 @@ private:
  SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
  SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
  SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
  SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
  SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
  SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
@ -836,6 +837,7 @@ private:
  void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -928,6 +928,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
  case ISD::SCALAR_TO_VECTOR:
    SplitVecRes_ScalarOp(N, Lo, Hi);
    break;
+  case ISD::STEP_VECTOR:
+    SplitVecRes_STEP_VECTOR(N, Lo, Hi);
+    break;
  case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
  case ISD::LOAD:
    SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
@ -1639,6 +1642,30 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
    Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }

+void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  EVT LoVT, HiVT;
+  SDLoc dl(N);
+  assert(N->getValueType(0).isScalableVector() &&
+         "Only scalable vectors are supported for STEP_VECTOR");
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  SDValue Step = N->getOperand(0);
+
+  Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step);
+
+  // Hi = Lo + (EltCnt * Step)
+  EVT EltVT = Step.getValueType();
+  SDValue StartOfHi =
+      DAG.getVScale(dl, EltVT,
+                    cast<ConstantSDNode>(Step)->getAPIntValue() *
+                        LoVT.getVectorMinNumElements());
+  StartOfHi = DAG.getZExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
+  StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
+
+  Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
+  Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi);
+}
+
 void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
  EVT LoVT, HiVT;
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -1744,6 +1744,18 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
  return SDValue(CondCodeNodes[Cond], 0);
 }

+SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step) {
+  if (ResVT.isScalableVector())
+    return getNode(ISD::STEP_VECTOR, DL, ResVT, Step);
+
+  EVT OpVT = Step.getValueType();
+  APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue();
+  SmallVector<SDValue, 16> OpsStepConstants;
+  for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++)
+    OpsStepConstants.push_back(getConstant(StepVal * i, DL, OpVT));
+  return getBuildVector(ResVT, DL, OpsStepConstants);
+}
+
 /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
 /// point at N1 to point at N2 and indices that point at N2 to point at N1.
 static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
@ -4339,6 +4351,14 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
  return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
 }

+static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
+                               SelectionDAG &DAG) {
+  if (cast<ConstantSDNode>(Step)->isNullValue())
+    return DAG.getConstant(0, DL, VT);
+
+  return SDValue();
+}
+
 static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
                                ArrayRef<SDValue> Ops,
                                SelectionDAG &DAG) {
@ -4560,6 +4580,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                        APFloat::rmNearestTiesToEven, &Ignored);
      return getConstantFP(FPV, DL, VT);
    }
+    case ISD::STEP_VECTOR: {
+      if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this))
+        return V;
+      break;
+    }
    }
  }

@ -4669,6 +4694,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

  unsigned OpOpcode = Operand.getNode()->getOpcode();
  switch (Opcode) {
+  case ISD::STEP_VECTOR:
+    assert(VT.isScalableVector() &&
+           "STEP_VECTOR can only be used with scalable types");
+    assert(VT.getScalarSizeInBits() >= 8 &&
+           "STEP_VECTOR can only be used with vectors of integers that are at "
+           "least 8 bits wide");
+    assert(Operand.getValueType().bitsGE(VT.getScalarType()) &&
+           "Operand type should be at least as large as the element type");
+    assert(isa<ConstantSDNode>(Operand) &&
+           cast<ConstantSDNode>(Operand)->getAPIntValue().isNonNegative() &&
+           "Expected positive integer constant for STEP_VECTOR");
+    break;
  case ISD::FREEZE:
    assert(VT == Operand.getValueType() && "Unexpected VT!");
    break;
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -6945,7 +6945,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
  case Intrinsic::experimental_deoptimize:
    LowerDeoptimizeCall(&I);
    return;
-
+  case Intrinsic::experimental_stepvector:
+    visitStepVector(I);
+    return;
  case Intrinsic::vector_reduce_fadd:
  case Intrinsic::vector_reduce_fmul:
  case Intrinsic::vector_reduce_add:
@ -10929,6 +10931,16 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
  }
 }

+void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  auto DL = getCurSDLoc();
+  EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+  EVT OpVT =
+      TLI.getTypeToTransformTo(*DAG.getContext(), ResultVT.getScalarType());
+  SDValue Step = DAG.getConstant(1, DL, OpVT);
+  setValue(&I, DAG.getStepVector(DL, ResultVT, Step));
+}
+
 void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@ -779,6 +779,7 @@ private:
  void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
  void visitVectorReverse(const CallInst &I);
  void visitVectorSplice(const CallInst &I);
+  void visitStepVector(const CallInst &I);

  void visitUserOp1(const Instruction &I) {
    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@ -292,6 +292,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
  case ISD::SPLAT_VECTOR:               return "splat_vector";
  case ISD::SPLAT_VECTOR_PARTS:         return "splat_vector_parts";
  case ISD::VECTOR_REVERSE:             return "vector_reverse";
+  case ISD::STEP_VECTOR:                return "step_vector";
  case ISD::CARRY_FALSE:                return "carry_false";
  case ISD::ADDC:                       return "addc";
  case ISD::ADDE:                       return "adde";
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@ -91,6 +91,23 @@ Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) {
             : CreateMul(CI, Scaling);
 }

+Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) {
+  if (isa<ScalableVectorType>(DstType))
+    return CreateIntrinsic(Intrinsic::experimental_stepvector, {DstType}, {},
+                           nullptr, Name);
+
+  Type *STy = DstType->getScalarType();
+  unsigned NumEls = cast<FixedVectorType>(DstType)->getNumElements();
+
+  // Create a vector of consecutive numbers from zero to VF.
+  SmallVector<Constant *, 8> Indices;
+  for (unsigned i = 0; i < NumEls; ++i)
+    Indices.push_back(ConstantInt::get(STy, i));
+
+  // Add the consecutive indices to the vector value.
+  return ConstantVector::get(Indices);
+}
+
 CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
                                      MaybeAlign Align, bool isVolatile,
                                      MDNode *TBAATag, MDNode *ScopeTag,
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@ -5195,6 +5195,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {

    break;
  }
+  case Intrinsic::experimental_stepvector: {
+    VectorType *VecTy = dyn_cast<VectorType>(Call.getType());
+    Assert(VecTy && VecTy->getScalarType()->isIntegerTy() &&
+               VecTy->getScalarSizeInBits() >= 8,
+           "experimental_stepvector only supported for vectors of integers "
+           "with a bitwidth of at least 8.",
+           &Call);
+    break;
+  }
  case Intrinsic::experimental_vector_insert: {
    VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
    VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -1135,6 +1135,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+      setOperationAction(ISD::STEP_VECTOR, VT, Custom);

      setOperationAction(ISD::MULHU, VT, Expand);
      setOperationAction(ISD::MULHS, VT, Expand);
@ -4402,6 +4403,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
    return LowerVECTOR_SHUFFLE(Op, DAG);
  case ISD::SPLAT_VECTOR:
    return LowerSPLAT_VECTOR(Op, DAG);
+  case ISD::STEP_VECTOR:
+    return LowerSTEP_VECTOR(Op, DAG);
  case ISD::EXTRACT_SUBVECTOR:
    return LowerEXTRACT_SUBVECTOR(Op, DAG);
  case ISD::INSERT_SUBVECTOR:
@ -9049,6 +9052,21 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
  return GenerateTBL(Op, ShuffleMask, DAG);
 }

+SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+  assert(VT.isScalableVector() &&
+         "Only expect scalable vectors for STEP_VECTOR");
+  EVT ElemVT = VT.getScalarType();
+  assert(ElemVT != MVT::i1 &&
+         "Vectors of i1 types not supported for STEP_VECTOR");
+
+  SDValue StepVal = Op.getOperand(0);
+  SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType());
+  return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal);
+}
+
 SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
                                                 SelectionDAG &DAG) const {
  SDLoc dl(Op);
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@ -936,6 +936,7 @@ private:
  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
                              bool OverrideNEON = false) const;
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -260,6 +260,19 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
      return LT.first;
    break;
  }
+  case Intrinsic::experimental_stepvector: {
+    unsigned Cost = 1; // Cost of the `index' instruction
+    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    // Legalisation of illegal vectors involves an `index' instruction plus
+    // (LT.first - 1) vector adds.
+    if (LT.first > 1) {
+      Type *LegalVTy = EVT(LT.second).getTypeForEVT(RetTy->getContext());
+      unsigned AddCost =
+          getArithmeticInstrCost(Instruction::Add, LegalVTy, CostKind);
+      Cost += AddCost * (LT.first - 1);
+    }
+    return Cost;
+  }
  default:
    break;
  }
--- a/test/Analysis/CostModel/AArch64/neon-stepvector.ll
+++ b/test/Analysis/CostModel/AArch64/neon-stepvector.ll
@ -0,0 +1,34 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon  < %s | FileCheck %s
+
+; Check icmp for legal integer vectors.
+define void @stepvector_legal_int() {
+; CHECK-LABEL: 'stepvector_legal_int'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+  %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+  %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+  %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  ret void
+}
+
+; Check icmp for an illegal integer vector.
+define void @stepvector_illegal_int() {
+; CHECK-LABEL: 'stepvector_illegal_int'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+  %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  ret void
+}
+
+
+declare <2 x i64> @llvm.experimental.stepvector.v2i64()
+declare <4 x i32> @llvm.experimental.stepvector.v4i32()
+declare <8 x i16> @llvm.experimental.stepvector.v8i16()
+declare <16 x i8> @llvm.experimental.stepvector.v16i8()
+
+declare <4 x i64> @llvm.experimental.stepvector.v4i64()
+declare <16 x i32> @llvm.experimental.stepvector.v16i32()
--- a/test/Analysis/CostModel/AArch64/sve-stepvector.ll
+++ b/test/Analysis/CostModel/AArch64/sve-stepvector.ll
@ -0,0 +1,39 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Check icmp for legal integer vectors.
+define void @stepvector_legal_int() {
+; CHECK-LABEL: 'stepvector_legal_int'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  %1 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  %2 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+  %3 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+  %4 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  ret void
+}
+
+; Check icmp for an illegal integer vector.
+define void @stepvector_illegal_int() {
+; CHECK-LABEL: 'stepvector_illegal_int'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %1 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %2 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  %1 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  %2 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  ret void
+}
+
+
+declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+declare <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+
+declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+declare <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
--- a/test/CodeGen/AArch64/neon-stepvector.ll
+++ b/test/CodeGen/AArch64/neon-stepvector.ll
@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK
+
+; LEGAL INTEGER TYPES
+
+define <2 x i64> @stepvector_v2i64() {
+; CHECK-LABEL: .LCPI0_0:
+; CHECK-NEXT:    .xword 0
+; CHECK-NEXT:    .xword 1
+; CHECK-LABEL: stepvector_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+  ret <2 x i64> %0
+}
+
+define <4 x i32> @stepvector_v4i32() {
+; CHECK-LABEL: .LCPI1_0:
+; CHECK-NEXT:    .word 0
+; CHECK-NEXT:    .word 1
+; CHECK-NEXT:    .word 2
+; CHECK-NEXT:    .word 3
+; CHECK-LABEL: stepvector_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+  ret <4 x i32> %0
+}
+
+define <8 x i16> @stepvector_v8i16() {
+; CHECK-LABEL: .LCPI2_0:
+; CHECK-NEXT:    .hword 0
+; CHECK-NEXT:    .hword 1
+; CHECK-NEXT:    .hword 2
+; CHECK-NEXT:    .hword 3
+; CHECK-NEXT:    .hword 4
+; CHECK-NEXT:    .hword 5
+; CHECK-NEXT:    .hword 6
+; CHECK-NEXT:    .hword 7
+; CHECK-LABEL: stepvector_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI2_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+  ret <8 x i16> %0
+}
+
+define <16 x i8> @stepvector_v16i8() {
+; CHECK-LABEL: .LCPI3_0:
+; CHECK-NEXT:    .byte 0
+; CHECK-NEXT:    .byte 1
+; CHECK-NEXT:    .byte 2
+; CHECK-NEXT:    .byte 3
+; CHECK-NEXT:    .byte 4
+; CHECK-NEXT:    .byte 5
+; CHECK-NEXT:    .byte 6
+; CHECK-NEXT:    .byte 7
+; CHECK-NEXT:    .byte 8
+; CHECK-NEXT:    .byte 9
+; CHECK-NEXT:    .byte 10
+; CHECK-NEXT:    .byte 11
+; CHECK-NEXT:    .byte 12
+; CHECK-NEXT:    .byte 13
+; CHECK-NEXT:    .byte 14
+; CHECK-NEXT:    .byte 15
+; CHECK-LABEL: stepvector_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI3_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  ret <16 x i8> %0
+}
+
+; ILLEGAL INTEGER TYPES
+
+define <4 x i64> @stepvector_v4i64() {
+; CHECK-LABEL: .LCPI4_0:
+; CHECK-NEXT:    .xword 0
+; CHECK-NEXT:    .xword 1
+; CHECK-LABEL: .LCPI4_1:
+; CHECK-NEXT:    .xword 2
+; CHECK-NEXT:    .xword 3
+; CHECK-LABEL: stepvector_v4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    adrp x9, .LCPI4_1
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI4_1]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+  ret <4 x i64> %0
+}
+
+define <16 x i32> @stepvector_v16i32() {
+; CHECK-LABEL: .LCPI5_0:
+; CHECK-NEXT:    .word 0
+; CHECK-NEXT:    .word 1
+; CHECK-NEXT:    .word 2
+; CHECK-NEXT:    .word 3
+; CHECK-LABEL: .LCPI5_1:
+; CHECK-NEXT:    .word 4
+; CHECK-NEXT:    .word 5
+; CHECK-NEXT:    .word 6
+; CHECK-NEXT:    .word 7
+; CHECK-LABEL: .LCPI5_2:
+; CHECK-NEXT:    .word 8
+; CHECK-NEXT:    .word 9
+; CHECK-NEXT:    .word 10
+; CHECK-NEXT:    .word 11
+; CHECK-LABEL: .LCPI5_3:
+; CHECK-NEXT:    .word 12
+; CHECK-NEXT:    .word 13
+; CHECK-NEXT:    .word 14
+; CHECK-NEXT:    .word 15
+; CHECK-LABEL: stepvector_v16i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI5_0
+; CHECK-NEXT:    adrp x9, .LCPI5_1
+; CHECK-NEXT:    adrp x10, .LCPI5_2
+; CHECK-NEXT:    adrp x11, .LCPI5_3
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI5_1]
+; CHECK-NEXT:    ldr q2, [x10, :lo12:.LCPI5_2]
+; CHECK-NEXT:    ldr q3, [x11, :lo12:.LCPI5_3]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  ret <16 x i32> %0
+}
+
+define <2 x i32> @stepvector_v2i32() {
+; CHECK-LABEL: .LCPI6_0:
+; CHECK-NEXT:    .word 0
+; CHECK-NEXT:    .word 1
+; CHECK-LABEL: stepvector_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI6_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI6_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <2 x i32> @llvm.experimental.stepvector.v2i32()
+  ret <2 x i32> %0
+}
+
+define <4 x i16> @stepvector_v4i16() {
+; CHECK-LABEL: .LCPI7_0:
+; CHECK-NEXT:    .hword 0
+; CHECK-NEXT:    .hword 1
+; CHECK-NEXT:    .hword 2
+; CHECK-NEXT:    .hword 3
+; CHECK-LABEL: stepvector_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i16> @llvm.experimental.stepvector.v4i16()
+  ret <4 x i16> %0
+}
+
+
+declare <2 x i64> @llvm.experimental.stepvector.v2i64()
+declare <4 x i32> @llvm.experimental.stepvector.v4i32()
+declare <8 x i16> @llvm.experimental.stepvector.v8i16()
+declare <16 x i8> @llvm.experimental.stepvector.v16i8()
+
+declare <4 x i64> @llvm.experimental.stepvector.v4i64()
+declare <16 x i32> @llvm.experimental.stepvector.v16i32()
+declare <2 x i32> @llvm.experimental.stepvector.v2i32()
+declare <4 x i16> @llvm.experimental.stepvector.v4i16()
--- a/test/CodeGen/AArch64/sve-stepvector.ll
+++ b/test/CodeGen/AArch64/sve-stepvector.ll
@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; LEGAL INTEGER TYPES
+
+define <vscale x 2 x i64> @stepvector_nxv2i64() {
+; CHECK-LABEL: stepvector_nxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @stepvector_nxv4i32() {
+; CHECK-LABEL: stepvector_nxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 8 x i16> @stepvector_nxv8i16() {
+; CHECK-LABEL: stepvector_nxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 16 x i8> @stepvector_nxv16i8() {
+; CHECK-LABEL: stepvector_nxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  ret <vscale x 16 x i8> %0
+}
+
+; ILLEGAL INTEGER TYPES
+
+define <vscale x 4 x i64> @stepvector_nxv4i64() {
+; CHECK-LABEL: stepvector_nxv4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    add z1.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  ret <vscale x 4 x i64> %0
+}
+
+define <vscale x 16 x i32> @stepvector_nxv16i32() {
+; CHECK-LABEL: stepvector_nxv16i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntw x9
+; CHECK-NEXT:    cnth x8
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov z1.s, w9
+; CHECK-NEXT:    mov z3.s, w8
+; CHECK-NEXT:    add z1.s, z0.s, z1.s
+; CHECK-NEXT:    add z2.s, z0.s, z3.s
+; CHECK-NEXT:    add z3.s, z1.s, z3.s
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  ret <vscale x 16 x i32> %0
+}
+
+define <vscale x 2 x i32> @stepvector_nxv2i32() {
+; CHECK-LABEL: stepvector_nxv2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
+  ret <vscale x 2 x i32> %0
+}
+
+define <vscale x 4 x i16> @stepvector_nxv4i16() {
+; CHECK-LABEL: stepvector_nxv4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
+  ret <vscale x 4 x i16> %0
+}
+
+define <vscale x 8 x i8> @stepvector_nxv8i8() {
+; CHECK-LABEL: stepvector_nxv8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
+  ret <vscale x 8 x i8> %0
+}
+
+declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+declare <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+
+declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+declare <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+declare <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
+declare <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
+declare <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
--- a/test/Verifier/stepvector-intrinsic.ll
+++ b/test/Verifier/stepvector-intrinsic.ll
@ -0,0 +1,29 @@
+; RUN: not opt -S -verify < %s 2>&1 | FileCheck %s
+
+; Reject stepvector intrinsics that return a scalar
+
+define i32 @stepvector_i32() {
+; CHECK: Intrinsic has incorrect return type!
+  %1 = call i32 @llvm.experimental.stepvector.i32()
+  ret i32 %1
+}
+
+; Reject vectors with non-integer elements
+
+define <vscale x 4 x float> @stepvector_float() {
+; CHECK: experimental_stepvector only supported for vectors of integers with a bitwidth of at least 8
+  %1 = call <vscale x 4 x float> @llvm.experimental.stepvector.nxv4f32()
+  ret <vscale x 4 x float> %1
+}
+
+; Reject vectors of integers less than 8 bits in width
+
+define <vscale x 16 x i1> @stepvector_i1() {
+; CHECK: experimental_stepvector only supported for vectors of integers with a bitwidth of at least 8
+  %1 = call <vscale x 16 x i1> @llvm.experimental.stepvector.nxv16i1()
+  ret <vscale x 16 x i1> %1
+}
+
+declare i32 @llvm.experimental.stepvector.i32()
+declare <vscale x 4 x float> @llvm.experimental.stepvector.nxv4f32()
+declare <vscale x 16 x i1> @llvm.experimental.stepvector.nxv16i1()
--- a/unittests/CodeGen/AArch64SelectionDAGTest.cpp
+++ b/unittests/CodeGen/AArch64SelectionDAGTest.cpp
@ -648,4 +648,18 @@ TEST_F(AArch64SelectionDAGTest, getTypeConversion_NoScalarizeEVT_nxv1f128) {
  EXPECT_DEATH(getTypeAction(FromVT), "Cannot legalize this vector");
 }

+TEST_F(AArch64SelectionDAGTest, TestFold_STEP_VECTOR) {
+  if (!TM)
+    return;
+
+  SDLoc Loc;
+  auto IntVT = EVT::getIntegerVT(Context, 8);
+  auto VecVT = EVT::getVectorVT(Context, MVT::i8, 16, true);
+
+  // Should create SPLAT_VECTOR
+  SDValue Zero = DAG->getConstant(0, Loc, IntVT);
+  SDValue Op = DAG->getNode(ISD::STEP_VECTOR, Loc, VecVT, Zero);
+  EXPECT_EQ(Op.getOpcode(), ISD::SPLAT_VECTOR);
+}
+
 } // end namespace llvm
--- a/unittests/IR/IRBuilderTest.cpp
+++ b/unittests/IR/IRBuilderTest.cpp
@ -180,6 +180,32 @@ TEST_F(IRBuilderTest, IntrinsicsWithScalableVectors) {
    EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType());
 }

+TEST_F(IRBuilderTest, CreateStepVector) {
+  IRBuilder<> Builder(BB);
+
+  // Fixed width vectors
+  Type *DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, false);
+  Value *StepVec = Builder.CreateStepVector(DstVecTy);
+  EXPECT_TRUE(isa<Constant>(StepVec));
+  EXPECT_EQ(StepVec->getType(), DstVecTy);
+
+  const auto *VectorValue = cast<Constant>(StepVec);
+  for (unsigned i = 0; i < 4; i++) {
+    EXPECT_TRUE(isa<ConstantInt>(VectorValue->getAggregateElement(i)));
+    ConstantInt *El = cast<ConstantInt>(VectorValue->getAggregateElement(i));
+    EXPECT_EQ(El->getValue(), i);
+  }
+
+  // Scalable vectors
+  DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, true);
+  StepVec = Builder.CreateStepVector(DstVecTy);
+  EXPECT_TRUE(isa<CallInst>(StepVec));
+  CallInst *Call = cast<CallInst>(StepVec);
+  FunctionType *FTy = Call->getFunctionType();
+  EXPECT_EQ(FTy->getReturnType(), DstVecTy);
+  EXPECT_EQ(Call->getIntrinsicID(), Intrinsic::experimental_stepvector);
+}
+
 TEST_F(IRBuilderTest, ConstrainedFP) {
  IRBuilder<> Builder(BB);
  Value *V;