[TTI] NFC: Change getTypeLegalizationCost to return InstructionCost.

This patch migrates the TTI cost interfaces to return an InstructionCost. See this patch for the introduction of the type: https://reviews.llvm.org/D91174 See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html Reviewed By: sdesmalen, kparzysz Differential Revision: https://reviews.llvm.org/D101533
2024-11-22 10:42:39 +01:00 · 2021-04-29 16:02:51 +03:00 · 2021-04-29 16:02:51 +03:00 · 3f292e8925
commit 3f292e8925
parent 8ceccff3f0
12 changed files with 128 additions and 101 deletions
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@ -357,7 +357,10 @@ public:
  }

  unsigned getRegUsageForType(Type *Ty) {
-    return getTLI()->getTypeLegalizationCost(DL, Ty).first;
+    InstructionCost::CostType Val =
+        *getTLI()->getTypeLegalizationCost(DL, Ty).first.getValue();
+    assert(Val >= 0 && "Negative cost!");
+    return Val;
  }

  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
@ -750,7 +753,7 @@ public:
                                           Opd1PropInfo, Opd2PropInfo,
                                           Args, CxtI);

-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

    bool IsFloat = Ty->isFPOrFPVectorTy();
    // Assume that floating point arithmetic operations cost twice as much as
@ -852,8 +855,10 @@ public:
    const TargetLoweringBase *TLI = getTLI();
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
    assert(ISD && "Invalid opcode");
-    std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
-    std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
+    std::pair<InstructionCost, MVT> SrcLT =
+        TLI->getTypeLegalizationCost(DL, Src);
+    std::pair<InstructionCost, MVT> DstLT =
+        TLI->getTypeLegalizationCost(DL, Dst);

    TypeSize SrcSize = SrcLT.second.getSizeInBits();
    TypeSize DstSize = DstLT.second.getSizeInBits();
@ -1025,7 +1030,8 @@ public:
      if (CondTy->isVectorTy())
        ISD = ISD::VSELECT;
    }
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, ValTy);

    if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
        !TLI->isOperationExpand(ISD, LT.second)) {
@ -1055,7 +1061,7 @@ public:

  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
                                     unsigned Index) {
-    std::pair<unsigned, MVT> LT =
+    std::pair<InstructionCost, MVT> LT =
        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());

    return LT.first;
@ -1069,7 +1075,8 @@ public:
    // Assume types, such as structs, are expensive.
    if (getTLI()->getValueType(DL, Src,  true) == MVT::Other)
      return 4;
-    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
+    std::pair<InstructionCost, MVT> LT =
+        getTLI()->getTypeLegalizationCost(DL, Src);

    // Assuming that all loads of legal types cost 1.
    InstructionCost Cost = LT.first;
@ -1836,10 +1843,11 @@ public:
    }

    const TargetLoweringBase *TLI = getTLI();
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, RetTy);

-    SmallVector<unsigned, 2> LegalCost;
-    SmallVector<unsigned, 2> CustomCost;
+    SmallVector<InstructionCost, 2> LegalCost;
+    SmallVector<InstructionCost, 2> CustomCost;
    for (unsigned ISD : ISDs) {
      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
        if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
@ -1945,8 +1953,9 @@ public:
  }

  unsigned getNumberOfParts(Type *Tp) {
-    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
-    return LT.first;
+    std::pair<InstructionCost, MVT> LT =
+        getTLI()->getTypeLegalizationCost(DL, Tp);
+    return *LT.first.getValue();
  }

  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
@ -2013,7 +2022,7 @@ public:
    unsigned NumReduxLevels = Log2_32(NumVecElts);
    InstructionCost ArithCost = 0;
    InstructionCost ShuffleCost = 0;
-    std::pair<unsigned, MVT> LT =
+    std::pair<InstructionCost, MVT> LT =
        thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
    unsigned LongVectorCount = 0;
    unsigned MVTLen =
@ -2069,7 +2078,7 @@ public:
    }
    InstructionCost MinMaxCost = 0;
    InstructionCost ShuffleCost = 0;
-    std::pair<unsigned, MVT> LT =
+    std::pair<InstructionCost, MVT> LT =
        thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
    unsigned LongVectorCount = 0;
    unsigned MVTLen =
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@ -1815,8 +1815,8 @@ public:
  int InstructionOpcodeToISD(unsigned Opcode) const;

  /// Estimate the cost of type-legalization and the legalized type.
-  std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
-                                              Type *Ty) const;
+  std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
+                                                          Type *Ty) const;

  /// @}

--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@ -1807,13 +1807,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
  llvm_unreachable("Unknown instruction type encountered!");
 }

-std::pair<int, MVT>
+std::pair<InstructionCost, MVT>
 TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
                                            Type *Ty) const {
  LLVMContext &C = Ty->getContext();
  EVT MTy = getValueType(DL, Ty);

-  int Cost = 1;
+  InstructionCost Cost = 1;
  // We keep legalizing the type until we find a legal kind. We assume that
  // the only operation that costs anything is the split. After splitting
  // we need to handle two types.
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -545,8 +545,10 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
    return false;

  // Get the total number of vector elements in the legalized types.
-  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
-  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
+  InstructionCost NumDstEls =
+      DstTyL.first * DstTyL.second.getVectorMinNumElements();
+  InstructionCost NumSrcEls =
+      SrcTyL.first * SrcTyL.second.getVectorMinNumElements();

  // Return true if the legalized types have the same number of vector elements
  // and the destination element type size is twice that of the source type.
@ -906,7 +908,7 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,

  if (Index != -1U) {
    // Legalize the type.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);

    // This type is legalized to a scalar type.
    if (!LT.second.isVector())
@ -938,7 +940,7 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
                                         Opd2PropInfo, Args, CxtI);

  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
  // add in the widening overhead specified by the sub-target. Since the
@ -1536,7 +1538,7 @@ AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
         "Both vector needs to be scalable");

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
  InstructionCost LegalizationCost = 0;
  if (LT.first > 1) {
    Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
@ -1558,7 +1560,7 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
    TTI::TargetCostKind CostKind) {
  assert(!IsPairwise && "Cannot be pair wise to continue");

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
  InstructionCost LegalizationCost = 0;
  if (LT.first > 1) {
    Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
@ -1593,7 +1595,7 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
                                             CostKind);

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
  MVT MTy = LT.second;
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  assert(ISD && "Invalid opcode");
@ -1694,7 +1696,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
      { TTI::SK_Reverse, MVT::nxv4i1,   1 },
      { TTI::SK_Reverse, MVT::nxv2i1,   1 },
    };
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
    if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
      return LT.first * Entry->Cost;
  }
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@ -531,7 +531,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
    assert(ISD && "Invalid opcode");

-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

    bool IsFloat = Ty->isFPOrFPVectorTy();
    // Assume that floating point arithmetic operations cost twice as much as
@ -569,7 +569,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
  }

  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  // Because we don't have any legal vector operations, but the legal types, we
@ -775,7 +775,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  }

  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);

  unsigned NElts = LT.second.isVector() ?
    LT.second.getVectorNumElements() : 1;
@ -857,7 +857,7 @@ GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
      OrigTy.getScalarSizeInBits() != 16)
    return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind);

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
  return LT.first * getFullRateInstrCost();
 }

@ -875,7 +875,7 @@ GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
                                         CostKind);

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
  return LT.first * getHalfRateInstrCost(CostKind);
 }

--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -12257,10 +12257,11 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
  return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
 }

-std::pair<int, MVT>
+std::pair<InstructionCost, MVT>
 SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
                                          Type *Ty) const {
-  auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> Cost =
+      TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
  auto Size = DL.getTypeSizeInBits(Ty);
  // Maximum load or store can handle 8 dwords for scalar and 4 for
  // vector ALU. Let's assume anything above 8 dwords is expensive
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@ -491,8 +491,8 @@ public:
                                      const SIRegisterInfo &TRI,
                                      SIMachineFunctionInfo &Info) const;

-  std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
-                                              Type *Ty) const;
+  std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
+                                                          Type *Ty) const;
 };

 } // End namespace llvm
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@ -559,7 +559,7 @@ InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
        {ISD::FP_EXTEND, MVT::v2f32, 2},
        {ISD::FP_EXTEND, MVT::v4f32, 4}};

-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
    if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
      return AdjustCost(LT.first * Entry->Cost);
  }
@ -825,7 +825,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
    // Integer cross-lane moves are more expensive than float, which can
    // sometimes just be vmovs. Integer involve being passes to GPR registers,
    // causing more of a delay.
-    std::pair<unsigned, MVT> LT =
+    std::pair<InstructionCost, MVT> LT =
        getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType());
    return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
  }
@ -851,7 +851,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    // - may require one or more conditional mov (including an IT),
    // - can't operate directly on immediates,
    // - require live flags, which we can't copy around easily.
-    int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
+    InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;

    // Possible IT instruction for Thumb2, or more for Thumb1.
    ++Cost;
@ -928,7 +928,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
        return Entry->Cost;
    }

-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, ValTy);
    return LT.first;
  }

@ -952,7 +953,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                    I);
    }

-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, ValTy);
    int BaseCost = ST->getMVEVectorCostFactor(CostKind);
    // There are two types - the input that specifies the type of the compare
    // and the output vXi1 type. Because we don't know how the output will be
@ -1156,8 +1158,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
          {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
          {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};

-      std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
+      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
      if (const auto *Entry =
              CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
        return LT.first * Entry->Cost;
@ -1178,8 +1179,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
          {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
          {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};

-      std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
+      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
      if (const auto *Entry =
              CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
        return LT.first * Entry->Cost;
@ -1203,7 +1203,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,

          {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};

-      std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
      if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
                                              ISD::VECTOR_SHUFFLE, LT.second))
        return LT.first * Entry->Cost;
@ -1219,8 +1219,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
          {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
          {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};

-      std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
+      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
      if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
                                              LT.second))
        return LT.first * Entry->Cost *
@ -1228,7 +1227,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
    }

    if (!Mask.empty()) {
-      std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
      if (Mask.size() <= LT.second.getVectorNumElements() &&
          (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
           isVREVMask(Mask, LT.second, 64)))
@ -1264,7 +1263,7 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
    }
  }

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

  if (ST->hasNEON()) {
    const unsigned FunctionCallDivCost = 20;
@ -1403,7 +1402,7 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
      cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
    // Unaligned loads/stores are extremely inefficient.
    // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
    return LT.first * 4;
  }

@ -1504,14 +1503,14 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(

  unsigned NumElems = VTy->getNumElements();
  unsigned EltSize = VTy->getScalarSizeInBits();
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);

  // For now, it is assumed that for the MVE gather instructions the loads are
  // all effectively serialised. This means the cost is the scalar cost
  // multiplied by the number of elements being loaded. This is possibly very
  // conservative, but even so we still end up vectorising loops because the
  // cost per iteration for many loops is lower than for scalar loops.
-  unsigned VectorCost =
+  InstructionCost VectorCost =
      NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind);
  // The scalarization cost should be a lot higher. We use the number of vector
  // elements plus the scalarization overhead.
@ -1598,7 +1597,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
                                             CostKind);

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

  static const CostTblEntry CostTblAdd[]{
      {ISD::ADD, MVT::v16i8, 1},
@ -1619,7 +1618,8 @@ ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
  EVT ValVT = TLI->getValueType(DL, ValTy);
  EVT ResVT = TLI->getValueType(DL, ResTy);
  if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, ValTy);
    if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
        (LT.second == MVT::v8i16 &&
         ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
@ -1654,8 +1654,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
      break;
    Type *VT = ICA.getReturnType();

-    std::pair<int, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, VT);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
        LT.second == MVT::v16i8) {
      // This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
@ -1675,7 +1674,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
      break;
    Type *VT = ICA.getReturnType();

-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
        LT.second == MVT::v16i8)
      return LT.first * ST->getMVEVectorCostFactor(CostKind);
@ -1686,7 +1685,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    if (!ST->hasMVEFloatOps())
      break;
    Type *VT = ICA.getReturnType();
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
    if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
      return LT.first * ST->getMVEVectorCostFactor(CostKind);
    break;
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@ -143,7 +143,8 @@ InstructionCost
 HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                      TTI::TargetCostKind CostKind) {
  if (ICA.getID() == Intrinsic::bswap) {
-    std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
+    std::pair<InstructionCost, MVT> LT =
+        TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
    return LT.first + 2;
  }
  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
@ -251,7 +252,7 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                                   TTI::TargetCostKind CostKind,
                                                   const Instruction *I) {
  if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
-    std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
    if (Opcode == Instruction::FCmp)
      return LT.first + FloatFactor * getTypeNumElements(ValTy);
  }
@ -271,7 +272,7 @@ InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
                                         Opd2PropInfo, Args, CxtI);

  if (Ty->isVectorTy()) {
-    std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
    if (LT.second.isFloatingPoint())
      return LT.first + FloatFactor * getTypeNumElements(Ty);
  }
@ -288,9 +289,12 @@ InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
    unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
    unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;

-    std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
-    std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
-    unsigned Cost = std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
+    std::pair<InstructionCost, MVT> SrcLT =
+        TLI.getTypeLegalizationCost(DL, SrcTy);
+    std::pair<InstructionCost, MVT> DstLT =
+        TLI.getTypeLegalizationCost(DL, DstTy);
+    InstructionCost Cost =
+        std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
    // TODO: Allow non-throughput costs that aren't binary.
    if (CostKind != TTI::TCK_RecipThroughput)
      return Cost == 0 ? 0 : 1;
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@ -375,7 +375,7 @@ InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

  int ISD = TLI->InstructionOpcodeToISD(Opcode);

--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@ -328,7 +328,8 @@ InstructionCost PPCTTIImpl::getUserCost(const User *U,

  if (U->getType()->isVectorTy()) {
    // Instructions that need to be split should cost more.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, U->getType());
    return LT.first * BaseT::getUserCost(U, Operands, CostKind);
  }

@ -946,7 +947,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
    return Cost;

-  std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
+  std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
  // If type legalization involves splitting the vector, we don't want to
  // double the cost at every step - only the last step.
  if (LT1.first != 1 || !LT1.second.isVector())
@ -957,7 +958,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
    return Cost;

  if (Ty2) {
-    std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
+    std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
    if (LT2.first != 1 || !LT2.second.isVector())
      return Cost;
  }
@ -988,7 +989,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
                                           ArrayRef<int> Mask, int Index,
                                           Type *SubTp) {
  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);

  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
  // (at least in the sense that there need only be one non-loop-invariant
@ -1113,7 +1114,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                  CostKind);
  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
         "Invalid Opcode");

@ -1198,7 +1199,7 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
         "Expect a vector type for interleaved memory op");

  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);

  // Firstly, the cost of load/store operation.
  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@ -185,7 +185,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
                                         Op2Info, Opd1PropInfo,
                                         Opd2PropInfo, Args, CxtI);
  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  assert(ISD && "Invalid opcode");
@ -966,7 +966,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                           VectorType *SubTp) {
  // 64-bit packed float vectors (v2f32) are widened to type v4f32.
  // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);

  Kind = improveShuffleKindFromMask(Kind, Mask);
  // Treat Transpose as 2-op shuffles - there's no difference in lowering.
@ -985,7 +985,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
    int NumElts = LT.second.getVectorNumElements();
    if ((Index % NumElts) == 0)
      return 0;
-    std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
+    std::pair<InstructionCost, MVT> SubLT =
+        TLI->getTypeLegalizationCost(DL, SubTp);
    if (SubLT.second.isVector()) {
      int NumSubElts = SubLT.second.getVectorNumElements();
      if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
@ -1031,7 +1032,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
  // isn't free, because we need to preserve the rest of the wide vector.
  if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
    int NumElts = LT.second.getVectorNumElements();
-    std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
+    std::pair<InstructionCost, MVT> SubLT =
+        TLI->getTypeLegalizationCost(DL, SubTp);
    if (SubLT.second.isVector()) {
      int NumSubElts = SubLT.second.getVectorNumElements();
      if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
@ -1091,12 +1093,12 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
      // Number of source vectors after legalization:
      unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
      // Number of destination vectors after legalization:
-      unsigned NumOfDests = LT.first;
+      InstructionCost NumOfDests = LT.first;

      auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(),
                                              LegalVT.getVectorNumElements());

-      unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
+      InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
      return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy,
                                            None, 0, nullptr);
    }
@ -1107,8 +1109,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
  // For 2-input shuffles, we must account for splitting the 2 inputs into many.
  if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
    // We assume that source and destination have the same vector type.
-    int NumOfDests = LT.first;
-    int NumOfShufflesPerDest = LT.first * 2 - 1;
+    InstructionCost NumOfDests = LT.first;
+    InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1;
    LT.first = NumOfDests * NumOfShufflesPerDest;
  }

@ -2024,8 +2026,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
    { ISD::TRUNCATE,    MVT::v2i32,  MVT::v2i64,  1 }, // PSHUFD
  };

-  std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
-  std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
+  std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<InstructionCost, MVT> LTDest =
+      TLI->getTypeLegalizationCost(DL, Dst);

  if (ST->hasSSE2() && !ST->hasAVX()) {
    if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
@ -2114,7 +2117,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                     I);

  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

  MVT MTy = LT.second;

@ -2804,7 +2807,7 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

  if (ISD != ISD::DELETED_NODE) {
    // Legalize the type.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
    MVT MTy = LT.second;

    // Attempt to lookup cost.
@ -2824,7 +2827,8 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
      return LT.first * Cost;
    }

-    auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost,
+    auto adjustTableCost = [](const CostTblEntry &Entry,
+                              InstructionCost LegalizationCost,
                              FastMathFlags FMF) {
      // If there are no NANs to deal with, then these are reduced to a
      // single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we
@ -3006,7 +3010,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

  if (ISD != ISD::DELETED_NODE) {
    // Legalize the type.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, RetTy);
    MVT MTy = LT.second;

    // Attempt to lookup cost.
@ -3045,7 +3050,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
  if (Index != -1U && (Opcode == Instruction::ExtractElement ||
                       Opcode == Instruction::InsertElement)) {
    // Legalize the type.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);

    // This type is legalized to a scalar type.
    if (!LT.second.isVector())
@ -3133,7 +3138,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
  // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
  // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
  if (Insert) {
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
    MVT MScalarTy = LT.second.getScalarType();

    if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
@ -3157,8 +3162,10 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
        // Case#2: inserting into 5th index needs extracti128 + vpinsrd +
        // inserti128.
        // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
-        unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * LT.first;
-        unsigned NumElts = LT.second.getVectorNumElements() * LT.first;
+        const int CostValue = *LT.first.getValue();
+        assert(CostValue >= 0 && "Negative cost!");
+        unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue;
+        unsigned NumElts = LT.second.getVectorNumElements() * CostValue;
        APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts);
        unsigned Scale = NumElts / Num128Lanes;
        // We iterate each 128-lane, and check if we need a
@ -3249,7 +3256,8 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
        Cost +=
            getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind);

-        std::pair<int, MVT> LST = TLI->getTypeLegalizationCost(DL, SubTy);
+        std::pair<InstructionCost, MVT> LST =
+            TLI->getTypeLegalizationCost(DL, SubTy);
        if (!LST.second.isVector()) {
          APInt DemandedElts =
              APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor);
@ -3267,10 +3275,10 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
  }

  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);

  // Each load/store unit costs 1.
-  int Cost = LT.first * 1;
+  InstructionCost Cost = LT.first * 1;

  // This isn't exactly right. We're using slow unaligned 32-byte accesses as a
  // proxy for a double-pumped AVX memory interface such as on Sandybridge.
@ -3316,7 +3324,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
  }

  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
  auto VT = TLI->getValueType(DL, SrcVTy);
  InstructionCost Cost = 0;
  if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
@ -3431,7 +3439,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
        return Entry->Cost;
  }

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

  MVT MTy = LT.second;

@ -3621,7 +3629,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,

 InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
                                          bool IsUnsigned) {
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

  MVT MTy = LT.second;

@ -3756,7 +3764,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
    return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
                                         CostKind);

-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

  MVT MTy = LT.second;

@ -4184,9 +4192,12 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,

  auto *IndexVTy = FixedVectorType::get(
      IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
-  std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
-  std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
-  int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
+  std::pair<InstructionCost, MVT> IdxsLT =
+      TLI->getTypeLegalizationCost(DL, IndexVTy);
+  std::pair<InstructionCost, MVT> SrcLT =
+      TLI->getTypeLegalizationCost(DL, SrcVTy);
+  InstructionCost::CostType SplitFactor =
+      *std::max(IdxsLT.first, SrcLT.first).getValue();
  if (SplitFactor > 1) {
    // Handle splitting of vector of pointers
    auto *SplitSrcTy =
@ -4722,7 +4733,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
        Indices.size() ? Indices.size() : Factor;
    auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
                                          VecTy->getNumElements() / Factor);
-    unsigned NumOfResults =
+    InstructionCost NumOfResults =
        getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
        NumOfLoadsInInterleaveGrp;

@ -4738,7 +4749,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
    // The SK_MergeTwoSrc shuffle clobbers one of src operands.
    // When we have more than one destination, we need additional instructions
    // to keep sources.
-    unsigned NumOfMoves = 0;
+    InstructionCost NumOfMoves = 0;
    if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
      NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;