[NFC][CostModel] Add TargetCostKind to relevant APIs

Make the kind of cost explicit throughout the cost model which, apart from making the cost clear, will allow the generic parts to calculate better costs. It will also allow some backends to approximate and correlate the different costs if they wish. Another benefit is that it will also help simplify the cost model around immediate and intrinsic costs, where we currently have multiple APIs. RFC thread: http://lists.llvm.org/pipermail/llvm-dev/2020-April/141263.html Differential Revision: https://reviews.llvm.org/D79002
2024-11-26 04:32:44 +01:00 · 2020-04-28 14:11:27 +01:00 · 2020-04-28 14:11:27 +01:00 · c8018d2237
commit c8018d2237
parent 52f56e2249
32 changed files with 1008 additions and 572 deletions
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@ -105,6 +105,9 @@ struct HardwareLoopInfo {
  bool canAnalyze(LoopInfo &LI);
 };
 class TargetTransformInfo;
 typedef TargetTransformInfo TTI;
 /// This pass provides access to the codegen interfaces that are needed
 /// for IR-level transformations.
 class TargetTransformInfo {
@ -205,7 +208,8 @@ public:
  /// Estimate the cost of a GEP operation when lowered.
  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands) const;
+                 ArrayRef<const Value *> Operands,
                 TargetCostKind CostKind = TCK_SizeAndLatency) const;
  /// Estimate the cost of a EXT operation when lowered.
  int getExtCost(const Instruction *I, const Value *Src) const;
@ -233,12 +237,14 @@ public:
  /// Estimate the cost of an intrinsic when lowered.
  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                       ArrayRef<Type *> ParamTys,
-                       const User *U = nullptr) const;
+                       const User *U = nullptr,
                       TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
  /// Estimate the cost of an intrinsic when lowered.
  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                       ArrayRef<const Value *> Arguments,
-                       const User *U = nullptr) const;
+                       const User *U = nullptr,
                       TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
  /// \return the expected cost of a memcpy, which could e.g. depend on the
  /// source/destination type and alignment and the number of bytes copied.
@ -702,15 +708,15 @@ public:
  /// Return the expected cost of materializing for the given integer
  /// immediate of the specified type.
-  int getIntImmCost(const APInt &Imm, Type *Ty) const;
+  int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
  /// Return the expected cost of materialization for the given integer
  /// immediate of the specified type for a given instruction. The cost can be
  /// zero if the immediate can be folded into the specified instruction.
  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
-                        Type *Ty) const;
+                        Type *Ty, TargetCostKind CostKind) const;
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty) const;
+                          Type *Ty, TargetCostKind CostKind) const;
  /// Return the expected cost for the given integer when optimising
  /// for size. This is different than the other integer immediate cost
@ -876,7 +882,9 @@ public:
  /// \p CxtI is the optional original context instruction, if one exists, to
  /// provide even more information.
  int getArithmeticInstrCost(
-      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      OperandValueKind Opd1Info = OK_AnyValue,
      OperandValueKind Opd2Info = OK_AnyValue,
      OperandValueProperties Opd1PropInfo = OP_None,
      OperandValueProperties Opd2PropInfo = OP_None,
@ -895,6 +903,7 @@ public:
  /// zext, etc. If there is an existing instruction that holds Opcode, it
  /// may be passed in the 'I' parameter.
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                       const Instruction *I = nullptr) const;
  /// \return The expected cost of a sign- or zero-extended vector extract. Use
@ -904,12 +913,14 @@ public:
  /// \return The expected cost of control-flow related instructions such as
  /// Phi, Ret, Br.
-  int getCFInstrCost(unsigned Opcode) const;
+  int getCFInstrCost(unsigned Opcode,
                     TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
  /// \returns The expected cost of compare and select instructions. If there
  /// is an existing instruction that holds Opcode, it may be passed in the
  /// 'I' parameter.
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
                         TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
                         const Instruction *I = nullptr) const;
  /// \return The expected cost of vector Insert and Extract.
@ -919,11 +930,13 @@ public:
  /// \return The cost of Load and Store instructions.
  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                      unsigned AddressSpace,
                      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
                      const Instruction *I = nullptr) const;
  /// \return The cost of masked Load and Store instructions.
-  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+  int getMaskedMemoryOpCost(
-                            unsigned AddressSpace) const;
+    unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace,
    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
  /// \return The cost of Gather or Scatter operation
  /// \p Opcode - is a type of memory access Load or Store
@ -934,9 +947,10 @@ public:
  /// \p Alignment - alignment of single element
  /// \p I - the optional original context instruction, if one exists, e.g. the
  ///        load/store to transform or the call to the gather/scatter intrinsic
-  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+  int getGatherScatterOpCost(
-                             bool VariableMask, unsigned Alignment,
+    unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
-                             const Instruction *I = nullptr) const;
+    unsigned Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
    const Instruction *I = nullptr) const;
  /// \return The cost of the interleaved memory operation.
  /// \p Opcode is the memory operation code
@ -948,11 +962,11 @@ public:
  /// \p AddressSpace is address space of the pointer.
  /// \p UseMaskForCond indicates if the memory access is predicated.
  /// \p UseMaskForGaps indicates if gaps should be masked.
-  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+  int getInterleavedMemoryOpCost(
-                                 ArrayRef<unsigned> Indices, unsigned Alignment,
+    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
-                                 unsigned AddressSpace,
+    unsigned Alignment, unsigned AddressSpace,
-                                 bool UseMaskForCond = false,
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-                                 bool UseMaskForGaps = false) const;
+    bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
  /// Calculate the cost of performing a vector reduction.
  ///
@ -967,33 +981,39 @@ public:
  /// Split:
  ///  (v0, v1, v2, v3)
  ///  ((v0+v2), (v1+v3), undef, undef)
-  int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+  int getArithmeticReductionCost(
-                                 bool IsPairwiseForm) const;
+    unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
-  int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-                             bool IsPairwiseForm, bool IsUnsigned) const;
+
  int getMinMaxReductionCost(
    VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
  /// Three cases are handled: 1. scalar instruction 2. vector instruction
  /// 3. scalar instruction which is to be vectorized with VF.
  /// I is the optional original context instruction holding the call to the
  /// intrinsic
-  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+  int getIntrinsicInstrCost(
-                            ArrayRef<Value *> Args, FastMathFlags FMF,
+    Intrinsic::ID ID, Type *RetTy, ArrayRef<Value *> Args,
-                            unsigned VF = 1,
+    FastMathFlags FMF, unsigned VF = 1,
-                            const Instruction *I = nullptr) const;
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
    const Instruction *I = nullptr) const;
  /// \returns The cost of Intrinsic instructions. Types analysis only.
  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
  /// arguments and the return value will be computed based on types.
  /// I is the optional original context instruction holding the call to the
  /// intrinsic
-  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
+  int getIntrinsicInstrCost(
-                            FastMathFlags FMF,
+    Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
-                            unsigned ScalarizationCostPassed = UINT_MAX,
+    unsigned ScalarizationCostPassed = UINT_MAX,
-                            const Instruction *I = nullptr) const;
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
    const Instruction *I = nullptr) const;
  /// \returns The cost of Call instructions.
-  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
+  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
  /// \returns The number of pieces into which the provided type must be
  /// split during legalization. Zero is returned when the answer is unknown.
@ -1166,15 +1186,18 @@ public:
  virtual ~Concept() = 0;
  virtual const DataLayout &getDataLayout() const = 0;
  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
-                         ArrayRef<const Value *> Operands) = 0;
+                         ArrayRef<const Value *> Operands,
                         TTI::TargetCostKind CostKind) = 0;
  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
  virtual unsigned getInliningThresholdMultiplier() = 0;
  virtual int getInlinerVectorBonusPercent() = 0;
  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                               ArrayRef<Type *> ParamTys, const User *U) = 0;
+                               ArrayRef<Type *> ParamTys, const User *U,
                               enum TargetCostKind CostKind) = 0;
  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                               ArrayRef<const Value *> Arguments,
-                               const User *U) = 0;
+                               const User *U,
                               enum TargetCostKind CostKind) = 0;
  virtual int getMemcpyCost(const Instruction *I) = 0;
  virtual unsigned
  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
@ -1261,11 +1284,13 @@ public:
  virtual int getFPOpCost(Type *Ty) = 0;
  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
                                    const APInt &Imm, Type *Ty) = 0;
-  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
+  virtual int getIntImmCost(const APInt &Imm, Type *Ty,
                            TargetCostKind CostKind) = 0;
  virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
-                                Type *Ty) = 0;
+                                Type *Ty, TargetCostKind CostKind) = 0;
  virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                  const APInt &Imm, Type *Ty) = 0;
+                                  const APInt &Imm, Type *Ty,
                                  TargetCostKind CostKind) = 0;
  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
  virtual unsigned getRegisterClassForType(bool Vector,
                                           Type *Ty = nullptr) const = 0;
@ -1306,47 +1331,65 @@ public:
  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
  virtual unsigned getArithmeticInstrCost(
-      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind,
      OperandValueKind Opd1Info,
      OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
      OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
      const Instruction *CxtI = nullptr) = 0;
  virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
                             VectorType *SubTp) = 0;
  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                               TTI::TargetCostKind CostKind,
                               const Instruction *I) = 0;
  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
                                       VectorType *VecTy, unsigned Index) = 0;
-  virtual int getCFInstrCost(unsigned Opcode) = 0;
+  virtual int getCFInstrCost(unsigned Opcode,
                             TTI::TargetCostKind CostKind) = 0;
  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                                 TTI::TargetCostKind CostKind,
                                 const Instruction *I) = 0;
  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
                                 unsigned Index) = 0;
  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                              unsigned AddressSpace, const Instruction *I) = 0;
+                              unsigned AddressSpace,
                              TTI::TargetCostKind CostKind,
                              const Instruction *I) = 0;
  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
                                    unsigned Alignment,
-                                    unsigned AddressSpace) = 0;
+                                    unsigned AddressSpace,
-  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+                                    TTI::TargetCostKind CostKind) = 0;
-                                     bool VariableMask, unsigned Alignment,
+  virtual int getGatherScatterOpCost(
-                                     const Instruction *I = nullptr) = 0;
+    unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
    unsigned Alignment, TTI::TargetCostKind CostKind,
    const Instruction *I = nullptr) = 0;
  virtual int
  getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                             ArrayRef<unsigned> Indices, unsigned Alignment,
-                             unsigned AddressSpace, bool UseMaskForCond = false,
+                             unsigned AddressSpace,
                             TTI::TargetCostKind CostKind,
                             bool UseMaskForCond = false,
                             bool UseMaskForGaps = false) = 0;
  virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                         bool IsPairwiseForm) = 0;
+                                         bool IsPairwiseForm,
                                         TTI::TargetCostKind CostKind) = 0;
  virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                                     bool IsPairwiseForm, bool IsUnsigned) = 0;
+                                     bool IsPairwiseForm, bool IsUnsigned,
                                     TTI::TargetCostKind CostKind) = 0;
  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                    ArrayRef<Type *> Tys, FastMathFlags FMF,
                                    unsigned ScalarizationCostPassed,
                                    TTI::TargetCostKind CostKind,
                                    const Instruction *I) = 0;
  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                    ArrayRef<Value *> Args, FastMathFlags FMF,
-                                    unsigned VF, const Instruction *I) = 0;
+                                    unsigned VF,
                                    TTI::TargetCostKind CostKind,
                                    const Instruction *I) = 0;
  virtual int getCallInstrCost(Function *F, Type *RetTy,
-                               ArrayRef<Type *> Tys) = 0;
+                               ArrayRef<Type *> Tys,
                               TTI::TargetCostKind CostKind) = 0;
  virtual unsigned getNumberOfParts(Type *Tp) = 0;
  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
                                        const SCEV *Ptr) = 0;
@ -1408,7 +1451,8 @@ public:
  }
  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands) override {
+                 ArrayRef<const Value *> Operands,
                 enum TargetTransformInfo::TargetCostKind CostKind) override {
    return Impl.getGEPCost(PointeeType, Ptr, Operands);
  }
  int getExtCost(const Instruction *I, const Value *Src) override {
@ -1422,13 +1466,15 @@ public:
  }
  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                       ArrayRef<Type *> ParamTys,
-                       const User *U = nullptr) override {
+                       const User *U = nullptr,
-    return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
+                       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
    return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
  }
  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                       ArrayRef<const Value *> Arguments,
-                       const User *U = nullptr) override {
+                       const User *U = nullptr,
-    return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
+                       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
    return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
  }
  int getMemcpyCost(const Instruction *I) override {
    return Impl.getMemcpyCost(I);
@ -1613,16 +1659,17 @@ public:
                            Type *Ty) override {
    return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
  }
-  int getIntImmCost(const APInt &Imm, Type *Ty) override {
+  int getIntImmCost(const APInt &Imm, Type *Ty,
-    return Impl.getIntImmCost(Imm, Ty);
+                    TargetCostKind CostKind) override {
    return Impl.getIntImmCost(Imm, Ty, CostKind);
  }
  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
-                        Type *Ty) override {
+                        Type *Ty, TargetCostKind CostKind) override {
-    return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty);
+    return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind);
  }
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty) override {
+                          Type *Ty, TargetCostKind CostKind) override {
-    return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty);
+    return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
  }
  unsigned getNumberOfRegisters(unsigned ClassID) const override {
    return Impl.getNumberOfRegisters(ClassID);
@ -1698,13 +1745,14 @@ public:
    return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
  }
  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                  TTI::TargetCostKind CostKind,
                                  OperandValueKind Opd1Info,
                                  OperandValueKind Opd2Info,
                                  OperandValueProperties Opd1PropInfo,
                                  OperandValueProperties Opd2PropInfo,
                                  ArrayRef<const Value *> Args,
                                  const Instruction *CxtI = nullptr) override {
-    return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+    return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
  }
  int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
@ -1712,67 +1760,84 @@ public:
    return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
  }
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       TTI::TargetCostKind CostKind,
                       const Instruction *I) override {
-    return Impl.getCastInstrCost(Opcode, Dst, Src, I);
+    return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I);
  }
  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
                               unsigned Index) override {
    return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
  }
-  int getCFInstrCost(unsigned Opcode) override {
+  int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
-    return Impl.getCFInstrCost(Opcode);
+    return Impl.getCFInstrCost(Opcode, CostKind);
  }
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                         TTI::TargetCostKind CostKind,
                         const Instruction *I) override {
-    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
  }
  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
    return Impl.getVectorInstrCost(Opcode, Val, Index);
  }
  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                      unsigned AddressSpace, const Instruction *I) override {
+                      unsigned AddressSpace, TTI::TargetCostKind CostKind,
-    return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+                      const Instruction *I) override {
    return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                CostKind, I);
  }
  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                            unsigned AddressSpace) override {
+                            unsigned AddressSpace,
-    return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+                            TTI::TargetCostKind CostKind) override {
    return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                      CostKind);
  }
-  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+  int getGatherScatterOpCost(
-                             bool VariableMask, unsigned Alignment,
+      unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
-                             const Instruction *I = nullptr) override {
+      unsigned Alignment, TTI::TargetCostKind CostKind,
      const Instruction *I = nullptr) override {
    return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
-                                       Alignment, I);
+                                       Alignment, CostKind, I);
  }
  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                 ArrayRef<unsigned> Indices, unsigned Alignment,
-                                 unsigned AddressSpace, bool UseMaskForCond,
+                                 unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind,
                                 bool UseMaskForCond,
                                 bool UseMaskForGaps) override {
    return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace,
+                                           Alignment, AddressSpace, CostKind,
                                           UseMaskForCond, UseMaskForGaps);
  }
  int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                 bool IsPairwiseForm) override {
+                                 bool IsPairwiseForm,
-    return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
+                                 TTI::TargetCostKind CostKind) override {
    return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
                                           CostKind);
  }
  int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                             bool IsPairwiseForm, bool IsUnsigned) override {
+                             bool IsPairwiseForm, bool IsUnsigned,
-    return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
+                             TTI::TargetCostKind CostKind) override {
    return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
                                       CostKind);
  }
  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
                            FastMathFlags FMF, unsigned ScalarizationCostPassed,
                            TTI::TargetCostKind CostKind,
                            const Instruction *I) override {
    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
-                                      ScalarizationCostPassed, I);
+                                      ScalarizationCostPassed, CostKind, I);
  }
  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                            ArrayRef<Value *> Args, FastMathFlags FMF,
-                            unsigned VF, const Instruction *I) override {
+                            unsigned VF,
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
+                            TTI::TargetCostKind CostKind,
                            const Instruction *I) override {
    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
  }
  int getCallInstrCost(Function *F, Type *RetTy,
-                       ArrayRef<Type *> Tys) override {
+                       ArrayRef<Type *> Tys,
-    return Impl.getCallInstrCost(F, RetTy, Tys);
+                       TTI::TargetCostKind CostKind) override {
    return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
  }
  unsigned getNumberOfParts(Type *Tp) override {
    return Impl.getNumberOfParts(Tp);
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -44,7 +44,8 @@ public:
  const DataLayout &getDataLayout() const { return DL; }
  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands) {
+                 ArrayRef<const Value *> Operands,
                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
    // In the basic model, we just assume that all-constant GEPs will be folded
    // into their uses via addressing modes.
    for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@ -285,15 +286,19 @@ public:
    return 0;
  }
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty,
                         TTI::TargetCostKind CostKind) {
    return TTI::TCC_Basic;
  }
  unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                             Type *Ty) {
+                             Type *Ty, TTI::TargetCostKind CostKind) {
    return TTI::TCC_Free;
  }
  unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                               const APInt &Imm, Type *Ty) {
+                               const APInt &Imm, Type *Ty,
                               TTI::TargetCostKind CostKind) {
    return TTI::TCC_Free;
  }
@ -366,6 +371,7 @@ public:
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                  TTI::TargetCostKind CostKind,
                                  TTI::OperandValueKind Opd1Info,
                                  TTI::OperandValueKind Opd2Info,
                                  TTI::OperandValueProperties Opd1PropInfo,
@ -381,6 +387,7 @@ public:
  }
  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                            TTI::TargetCostKind CostKind,
                            const Instruction *I) {
    switch (Opcode) {
    default:
@ -419,10 +426,12 @@ public:
    return 1;
  }
-  unsigned getCFInstrCost(unsigned Opcode) { return 1; }
+  unsigned getCFInstrCost(unsigned Opcode,
                          TTI::TargetCostKind CostKind) { return 1; }
  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                              const Instruction *I) {
+                              TTI::TargetCostKind CostKind,
                              const Instruction *I) const {
    return 1;
  }
@ -431,18 +440,21 @@ public:
  }
  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                           unsigned AddressSpace, const Instruction *I) {
+                           unsigned AddressSpace, TTI::TargetCostKind CostKind,
                           const Instruction *I) const {
    return 1;
  }
  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                 unsigned AddressSpace) {
+                                 unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind) {
    return 1;
  }
-  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+  unsigned getGatherScatterOpCost(
-                                  bool VariableMask, unsigned Alignment,
+      unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
-                                  const Instruction *I = nullptr) {
+      unsigned Alignment, TTI::TargetCostKind CostKind,
      const Instruction *I = nullptr) {
    return 1;
  }
@ -450,24 +462,28 @@ public:
                                      unsigned Factor,
                                      ArrayRef<unsigned> Indices,
                                      unsigned Alignment, unsigned AddressSpace,
-                                      bool UseMaskForCond = false,
+                                      TTI::TargetCostKind CostKind,
-                                      bool UseMaskForGaps = false) {
+                                      bool UseMaskForCond,
                                      bool UseMaskForGaps) {
    return 1;
  }
  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                 ArrayRef<Type *> Tys, FastMathFlags FMF,
                                 unsigned ScalarizationCostPassed,
                                 TTI::TargetCostKind CostKind,
                                 const Instruction *I) {
    return 1;
  }
  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                 ArrayRef<Value *> Args, FastMathFlags FMF,
-                                 unsigned VF, const Instruction *I) {
+                                 unsigned VF, TTI::TargetCostKind CostKind,
                                 const Instruction *I) {
    return 1;
  }
-  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
+  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
                            TTI::TargetCostKind CostKind) {
    return 1;
  }
@ -478,9 +494,11 @@ public:
    return 0;
  }
-  unsigned getArithmeticReductionCost(unsigned, VectorType *, bool) { return 1; }
+  unsigned getArithmeticReductionCost(unsigned, VectorType *, bool,
                                      TTI::TargetCostKind) { return 1; }
-  unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool) { return 1; }
+  unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool,
                                  TTI::TargetCostKind) { return 1; }
  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
@ -680,7 +698,8 @@ public:
  using BaseT::getGEPCost;
  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands) {
+                 ArrayRef<const Value *> Operands,
                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
    assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
    // TODO: will remove this when pointers have an opaque type.
    assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
@ -738,7 +757,8 @@ public:
  }
  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys, const User *U) {
+                            ArrayRef<Type *> ParamTys, const User *U,
                            TTI::TargetCostKind TCK_SizeAndLatency) {
    switch (IID) {
    default:
      // Intrinsics rarely (if ever) have normal argument setup constraints.
@ -782,7 +802,8 @@ public:
  }
  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments, const User *U) {
+                            ArrayRef<const Value *> Arguments, const User *U,
                            TTI::TargetCostKind CostKind) {
    // Delegate to the generic intrinsic handling code. This mostly provides an
    // opportunity for targets to (for example) special case the cost of
    // certain intrinsics based on constants used as arguments.
@ -790,11 +811,12 @@ public:
    ParamTys.reserve(Arguments.size());
    for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
      ParamTys.push_back(Arguments[Idx]->getType());
-    return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
+    return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U,
                                                    CostKind);
  }
  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
-                       enum TTI::TargetCostKind CostKind) {
+                       TTI::TargetCostKind CostKind) {
    auto *TargetTTI = static_cast<T *>(this);
    // FIXME: Unlikely to be true for anything but CodeSize.
@ -805,7 +827,7 @@ public:
        if (Intrinsic::ID IID = F->getIntrinsicID()) {
          SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
          return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(),
-                                             ParamTys, U);
+                                             ParamTys, U, CostKind);
        }
        if (!TargetTTI->isLoweredToCall(F))
@ -849,12 +871,12 @@ public:
    case Instruction::IntToPtr:
    case Instruction::PtrToInt:
    case Instruction::Trunc:
-      if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free ||
+      if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free ||
-          TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free)
+          TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
        return TTI::TCC_Free;
      break;
    case Instruction::BitCast:
-      if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free)
+      if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
        return TTI::TCC_Free;
      break;
    case Instruction::FPExt:
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@ -305,12 +305,14 @@ public:
  }
  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments, const User *U) {
+                            ArrayRef<const Value *> Arguments, const User *U,
-    return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
+                            TTI::TargetCostKind CostKind) {
    return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
  }
  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys, const User *U) {
+                            ArrayRef<Type *> ParamTys, const User *U,
                            TTI::TargetCostKind CostKind) {
    if (IID == Intrinsic::cttz) {
      if (getTLI()->isCheapToSpeculateCttz())
        return TargetTransformInfo::TCC_Basic;
@ -323,7 +325,7 @@ public:
      return TargetTransformInfo::TCC_Expensive;
    }
-    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
+    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
  }
  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@ -625,6 +627,7 @@ public:
  unsigned getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -661,7 +664,7 @@ public:
    if (auto *VTy = dyn_cast<VectorType>(Ty)) {
      unsigned Num = VTy->getNumElements();
      unsigned Cost = static_cast<T *>(this)->getArithmeticInstrCost(
-          Opcode, VTy->getScalarType());
+          Opcode, VTy->getScalarType(), CostKind);
      // Return the cost of multiple scalar invocation plus the cost of
      // inserting and extracting the values.
      return getScalarizationOverhead(VTy, Args) + Num * Cost;
@ -691,6 +694,7 @@ public:
  }
  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                            TTI::TargetCostKind CostKind,
                            const Instruction *I = nullptr) {
    const TargetLoweringBase *TLI = getTLI();
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
@ -802,14 +806,16 @@ public:
        unsigned SplitCost =
            (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
        return SplitCost +
-               (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, I));
+               (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy,
                                          CostKind, I));
      }
      // In other cases where the source or destination are illegal, assume
      // the operation will get scalarized.
      unsigned Num = DstVTy->getNumElements();
      unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
-          Opcode, Dst->getScalarType(), Src->getScalarType(), I);
+          Opcode, Dst->getScalarType(), Src->getScalarType(),
          CostKind, I);
      // Return the cost of multiple scalar invocation plus the cost of
      // inserting and extracting the values.
@ -835,16 +841,18 @@ public:
    return static_cast<T *>(this)->getVectorInstrCost(
               Instruction::ExtractElement, VecTy, Index) +
           static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
-                                                    VecTy->getElementType());
+                                                    VecTy->getElementType(),
                                                    TTI::TCK_RecipThroughput);
  }
-  unsigned getCFInstrCost(unsigned Opcode) {
+  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
    // Branches are assumed to be predicted.
    return 0;
  }
  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                              const Instruction *I) {
+                              TTI::TargetCostKind CostKind,
                              const Instruction *I = nullptr) {
    const TargetLoweringBase *TLI = getTLI();
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
    assert(ISD && "Invalid opcode");
@ -872,7 +880,7 @@ public:
      if (CondTy)
        CondTy = CondTy->getScalarType();
      unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
-          Opcode, ValVTy->getScalarType(), CondTy, I);
+          Opcode, ValVTy->getScalarType(), CondTy, CostKind, I);
      // Return the cost of multiple scalar invocation plus the cost of
      // inserting and extracting the values.
@ -892,6 +900,7 @@ public:
  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                           unsigned AddressSpace,
                           TTI::TargetCostKind CostKind,
                           const Instruction *I = nullptr) {
    assert(!Src->isVoidTy() && "Invalid type");
    std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
@ -926,6 +935,7 @@ public:
                                      unsigned Factor,
                                      ArrayRef<unsigned> Indices,
                                      unsigned Alignment, unsigned AddressSpace,
                                      TTI::TargetCostKind CostKind,
                                      bool UseMaskForCond = false,
                                      bool UseMaskForGaps = false) {
    auto *VT = cast<VectorType>(VecTy);
@ -940,10 +950,10 @@ public:
    unsigned Cost;
    if (UseMaskForCond || UseMaskForGaps)
      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
-          Opcode, VecTy, Alignment, AddressSpace);
+          Opcode, VecTy, Alignment, AddressSpace, CostKind);
    else
      Cost = static_cast<T *>(this)->getMemoryOpCost(
-          Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
+          Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
    // Legalize the vector type, and get the legalized and unlegalized type
    // sizes.
@ -1071,16 +1081,18 @@ public:
    // inside the loop.
    if (UseMaskForGaps)
      Cost += static_cast<T *>(this)->getArithmeticInstrCost(
-          BinaryOperator::And, MaskVT);
+          BinaryOperator::And, MaskVT, CostKind);
    return Cost;
  }
  /// Get intrinsic cost based on arguments.
-  unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+  unsigned getIntrinsicInstrCost(
-                                 ArrayRef<Value *> Args, FastMathFlags FMF,
+      Intrinsic::ID IID, Type *RetTy, ArrayRef<Value *> Args,
-                                 unsigned VF = 1,
+      FastMathFlags FMF, unsigned VF = 1,
-                                 const Instruction *I = nullptr) {
+      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      const Instruction *I = nullptr) {
    unsigned RetVF =
        (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getNumElements() : 1);
    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
@ -1111,7 +1123,8 @@ public:
      }
      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
-                                                ScalarizationCost);
+                                                ScalarizationCost, CostKind,
                                                I);
    }
    case Intrinsic::masked_scatter: {
      assert(VF == 1 && "Can't vectorize types here.");
@ -1120,7 +1133,8 @@ public:
      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
      return ConcreteTTI->getGatherScatterOpCost(Instruction::Store,
                                                 Args[0]->getType(), Args[1],
-                                                 VarMask, Alignment, I);
+                                                 VarMask, Alignment, CostKind,
                                                 I);
    }
    case Intrinsic::masked_gather: {
      assert(VF == 1 && "Can't vectorize types here.");
@ -1128,7 +1142,7 @@ public:
      bool VarMask = !isa<Constant>(Mask);
      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
      return ConcreteTTI->getGatherScatterOpCost(
-          Instruction::Load, RetTy, Args[0], VarMask, Alignment, I);
+          Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I);
    }
    case Intrinsic::experimental_vector_reduce_add:
    case Intrinsic::experimental_vector_reduce_mul:
@ -1143,7 +1157,8 @@ public:
    case Intrinsic::experimental_vector_reduce_fmin:
    case Intrinsic::experimental_vector_reduce_umax:
    case Intrinsic::experimental_vector_reduce_umin:
-      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
+      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF, 1,
                                   CostKind, I);
    case Intrinsic::fshl:
    case Intrinsic::fshr: {
      Value *X = Args[0];
@ -1159,25 +1174,30 @@ public:
      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
      unsigned Cost = 0;
-      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy,
-      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
+                                                  CostKind);
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
                                                  CostKind);
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
                                                  CostKind,
                                                  OpKindX, OpKindZ, OpPropsX);
      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
                                                  CostKind,
                                                  OpKindY, OpKindZ, OpPropsY);
      // Non-constant shift amounts requires a modulo.
      if (OpKindZ != TTI::OK_UniformConstantValue &&
          OpKindZ != TTI::OK_NonUniformConstantValue)
        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
                                                    CostKind,
                                                    OpKindZ, OpKindBW, OpPropsZ,
                                                    OpPropsBW);
      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
      if (X != Y) {
        Type *CondTy = RetTy->getWithNewBitWidth(1);
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
-                                                CondTy, nullptr);
+                                                CondTy, CostKind);
        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
-                                                CondTy, nullptr);
+                                                CondTy, CostKind);
      }
      return Cost;
    }
@ -1191,6 +1211,7 @@ public:
  unsigned getIntrinsicInstrCost(
      Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max(),
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      const Instruction *I = nullptr) {
    auto *ConcreteTTI = static_cast<T *>(this);
    auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]);
@ -1226,7 +1247,8 @@ public:
        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
      unsigned ScalarCost =
-          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
+          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF,
                                             CostKind);
      return ScalarCalls * ScalarCost + ScalarizationCost;
    }
@ -1312,34 +1334,40 @@ public:
      return 0;
    case Intrinsic::masked_store:
      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
-                                                0);
+                                                0, CostKind);
    case Intrinsic::masked_load:
-      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
+      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0,
                                                CostKind);
    case Intrinsic::experimental_vector_reduce_add:
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy,
-                                                     /*IsPairwiseForm=*/false);
+                                                     /*IsPairwiseForm=*/false,
                                                     CostKind);
    case Intrinsic::experimental_vector_reduce_mul:
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
-                                                     /*IsPairwiseForm=*/false);
+                                                     /*IsPairwiseForm=*/false,
                                                     CostKind);
    case Intrinsic::experimental_vector_reduce_and:
      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy,
-                                                     /*IsPairwiseForm=*/false);
+                                                     /*IsPairwiseForm=*/false,
                                                     CostKind);
    case Intrinsic::experimental_vector_reduce_or:
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy,
-                                                     /*IsPairwiseForm=*/false);
+                                                     /*IsPairwiseForm=*/false,
                                                     CostKind);
    case Intrinsic::experimental_vector_reduce_xor:
      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
-                                                     /*IsPairwiseForm=*/false);
+                                                     /*IsPairwiseForm=*/false,
                                                     CostKind);
    case Intrinsic::experimental_vector_reduce_v2_fadd:
      // FIXME: Add new flag for cost of strict reductions.
      return ConcreteTTI->getArithmeticReductionCost(
          Instruction::FAdd, VecOpTy,
-          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+          /*IsPairwiseForm=*/false, CostKind);
                                     // reductions.
    case Intrinsic::experimental_vector_reduce_v2_fmul:
      // FIXME: Add new flag for cost of strict reductions.
      return ConcreteTTI->getArithmeticReductionCost(
          Instruction::FMul, VecOpTy,
-          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+          /*IsPairwiseForm=*/false, CostKind);
                                     // reductions.
    case Intrinsic::experimental_vector_reduce_smax:
    case Intrinsic::experimental_vector_reduce_smin:
    case Intrinsic::experimental_vector_reduce_fmax:
@ -1347,13 +1375,13 @@ public:
      return ConcreteTTI->getMinMaxReductionCost(
          VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
          /*IsPairwiseForm=*/false,
-          /*IsUnsigned=*/false);
+          /*IsUnsigned=*/false, CostKind);
    case Intrinsic::experimental_vector_reduce_umax:
    case Intrinsic::experimental_vector_reduce_umin:
      return ConcreteTTI->getMinMaxReductionCost(
          VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
          /*IsPairwiseForm=*/false,
-          /*IsUnsigned=*/true);
+          /*IsUnsigned=*/true, CostKind);
    case Intrinsic::sadd_sat:
    case Intrinsic::ssub_sat: {
      Type *CondTy = RetTy->getWithNewBitWidth(1);
@ -1367,11 +1395,12 @@ public:
      // SatMin -> Overflow && SumDiff >= 0
      unsigned Cost = 0;
      Cost += ConcreteTTI->getIntrinsicInstrCost(
-          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed,
          CostKind);
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
-                                              CondTy, nullptr);
+                                              CondTy, CostKind);
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
-                                                  CondTy, nullptr);
+                                                  CondTy, CostKind);
      return Cost;
    }
    case Intrinsic::uadd_sat:
@ -1385,9 +1414,10 @@ public:
      unsigned Cost = 0;
      Cost += ConcreteTTI->getIntrinsicInstrCost(
-          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed,
          CostKind);
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
-                                              CondTy, nullptr);
+                                              CondTy, CostKind);
      return Cost;
    }
    case Intrinsic::smul_fix:
@ -1399,17 +1429,22 @@ public:
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
      unsigned Cost = 0;
-      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
+      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind);
-      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy,
                                                  CostKind);
      Cost +=
-          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
+          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
                                            CostKind);
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
                                                  CostKind,
                                                  TTI::OK_AnyValue,
                                                  TTI::OK_UniformConstantValue);
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
                                                  CostKind,
                                                  TTI::OK_AnyValue,
                                                  TTI::OK_UniformConstantValue);
-      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy,
                                                  CostKind);
      return Cost;
    }
    case Intrinsic::sadd_with_overflow:
@ -1429,13 +1464,14 @@ public:
      //   Sub:
      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
      unsigned Cost = 0;
-      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind);
      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
-                                                  OverflowTy, nullptr);
+                                                  OverflowTy, CostKind);
      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
-                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
+                      BinaryOperator::ICmp, OverflowTy, OverflowTy, CostKind);
      Cost +=
-          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
+          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
                                              CostKind);
      return Cost;
    }
    case Intrinsic::uadd_with_overflow:
@ -1447,9 +1483,9 @@ public:
                            : BinaryOperator::Sub;
      unsigned Cost = 0;
-      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind);
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
-                                              OverflowTy, nullptr);
+                                              OverflowTy, CostKind);
      return Cost;
    }
    case Intrinsic::smul_with_overflow:
@ -1463,21 +1499,24 @@ public:
          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
      unsigned Cost = 0;
-      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
+      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind);
-      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy,
                                                  CostKind);
      Cost +=
-          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
+          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
                                            CostKind);
      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
                                                  CostKind,
                                                  TTI::OK_AnyValue,
                                                  TTI::OK_UniformConstantValue);
      if (IID == Intrinsic::smul_with_overflow)
        Cost += ConcreteTTI->getArithmeticInstrCost(
-            Instruction::AShr, MulTy, TTI::OK_AnyValue,
+            Instruction::AShr, MulTy, CostKind, TTI::OK_AnyValue,
            TTI::OK_UniformConstantValue);
      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
-                                              OverflowTy, nullptr);
+                                              OverflowTy, CostKind);
      return Cost;
    }
    case Intrinsic::ctpop:
@ -1534,14 +1573,17 @@ public:
    // If we can't lower fmuladd into an FMA estimate the cost as a floating
    // point mul followed by an add.
    if (IID == Intrinsic::fmuladd)
-      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
+      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
-             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
+                                                 CostKind) +
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
                                                 CostKind);
    if (IID == Intrinsic::experimental_constrained_fmuladd)
      return ConcreteTTI->getIntrinsicCost(
-                 Intrinsic::experimental_constrained_fmul, RetTy, Tys,
+                 Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr,
-                 nullptr) +
+                 CostKind) +
             ConcreteTTI->getIntrinsicCost(
-                 Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr);
+                 Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr,
                 CostKind);
    // Else, assume that we need to scalarize this intrinsic. For math builtins
    // this will emit a costly libcall, adding call overhead and spills. Make it
@ -1560,7 +1602,7 @@ public:
        ScalarTys.push_back(Ty);
      }
      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
-          IID, RetTy->getScalarType(), ScalarTys, FMF);
+          IID, RetTy->getScalarType(), ScalarTys, FMF, CostKind);
      for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
        if (Tys[i]->isVectorTy()) {
          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
@ -1588,7 +1630,8 @@ public:
  /// \param RetTy Return value types.
  /// \param Tys Argument types.
  /// \returns The cost of Call instruction.
-  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
+  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
                     TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
    return 10;
  }
@ -1638,7 +1681,8 @@ public:
  /// The cost model should take into account that the actual length of the
  /// vector is reduced on each iteration.
  unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                      bool IsPairwise) {
+                                      bool IsPairwise,
                                      TTI::TargetCostKind CostKind) {
    Type *ScalarTy = Ty->getElementType();
    unsigned NumVecElts = Ty->getNumElements();
    unsigned NumReduxLevels = Log2_32(NumVecElts);
@ -1657,7 +1701,7 @@ public:
      ShuffleCost += (IsPairwise + 1) *
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
                                                 NumVecElts, SubTy);
-      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
+      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy, CostKind);
      Ty = SubTy;
      ++LongVectorCount;
    }
@ -1687,7 +1731,8 @@ public:
  /// Try to calculate op costs for min/max reduction operations.
  /// \param CondTy Conditional type for the Select instruction.
  unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                                  bool IsPairwise, bool) {
+                                  bool IsPairwise, bool IsUnsigned,
                                  TTI::TargetCostKind CostKind) {
    Type *ScalarTy = Ty->getElementType();
    Type *ScalarCondTy = CondTy->getElementType();
    unsigned NumVecElts = Ty->getNumElements();
@ -1718,9 +1763,9 @@ public:
                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
                                                 NumVecElts, SubTy);
      MinMaxCost +=
-          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
+          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) +
          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
-                                          nullptr);
+                                          CostKind);
      Ty = SubTy;
      ++LongVectorCount;
    }
@ -1743,9 +1788,9 @@ public:
                                               0, Ty);
    MinMaxCost +=
        NumReduxLevels *
-        (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
+        (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
         ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
-                                         nullptr));
+                                         CostKind));
    // The last min/max should be in vector registers and we counted it above.
    // So just need a single extractelement.
    return ShuffleCost + MinMaxCost +
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@ -2157,6 +2157,9 @@ bool SCEVExpander::isHighCostExpansionHelper(
    return false; // Assume to be zero-cost.
  }
  TargetTransformInfo::TargetCostKind CostKind =
    TargetTransformInfo::TCK_RecipThroughput;
  if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) {
    unsigned Opcode;
    switch (S->getSCEVType()) {
@ -2174,7 +2177,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
    }
    const SCEV *Op = CastExpr->getOperand();
    BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(),
-                                            /*Src=*/Op->getType());
+                                            /*Src=*/Op->getType(), CostKind);
    Worklist.emplace_back(Op);
    return false; // Will answer upon next entry into this function.
  }
@ -2184,7 +2187,8 @@ bool SCEVExpander::isHighCostExpansionHelper(
    if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) {
      if (SC->getAPInt().isPowerOf2()) {
        BudgetRemaining -=
-            TTI.getArithmeticInstrCost(Instruction::LShr, S->getType());
+            TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(),
                                       CostKind);
        // Note that we don't count the cost of RHS, because it is a constant,
        // and we consider those to be free. But if that changes, we would need
        // to log2() it first before calling isHighCostExpansionHelper().
@ -2207,7 +2211,8 @@ bool SCEVExpander::isHighCostExpansionHelper(
    // Need to count the cost of this UDiv.
    BudgetRemaining -=
-        TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType());
+        TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(),
                                   CostKind);
    Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()});
    return false; // Will answer upon next entry into this function.
  }
@ -2218,8 +2223,10 @@ bool SCEVExpander::isHighCostExpansionHelper(
    assert(NAry->getNumOperands() >= 2 &&
           "Polynomial should be at least linear");
-    int AddCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType);
+    int AddCost =
-    int MulCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType);
+      TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
    int MulCost =
      TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
    // In this polynominal, we may have some zero operands, and we shouldn't
    // really charge for those. So how many non-zero coeffients are there?
@ -2273,22 +2280,26 @@ bool SCEVExpander::isHighCostExpansionHelper(
    int PairCost;
    switch (S->getSCEVType()) {
    case scAddExpr:
-      PairCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType);
+      PairCost =
        TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
      break;
    case scMulExpr:
      // TODO: this is a very pessimistic cost modelling for Mul,
      // because of Bin Pow algorithm actually used by the expander,
      // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
-      PairCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType);
+      PairCost =
        TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
      break;
    case scSMaxExpr:
    case scUMaxExpr:
    case scSMinExpr:
    case scUMinExpr:
      PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType,
-                                        CmpInst::makeCmpResultType(OpType)) +
+                                        CmpInst::makeCmpResultType(OpType),
                                        CostKind) +
                 TTI.getCmpSelInstrCost(Instruction::Select, OpType,
-                                        CmpInst::makeCmpResultType(OpType));
+                                        CmpInst::makeCmpResultType(OpType),
                                        CostKind);
      break;
    default:
      llvm_unreachable("There are no other variants here.");
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@ -154,8 +154,9 @@ int TargetTransformInfo::getInlinerVectorBonusPercent() const {
 }
 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
-                                    ArrayRef<const Value *> Operands) const {
+                                    ArrayRef<const Value *> Operands,
-  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
+                                    TTI::TargetCostKind CostKind) const {
  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
 }
 int TargetTransformInfo::getExtCost(const Instruction *I,
@ -165,8 +166,9 @@ int TargetTransformInfo::getExtCost(const Instruction *I,
 int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                                          ArrayRef<const Value *> Arguments,
-                                          const User *U) const {
+                                          const User *U,
-  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
+                                          TTI::TargetCostKind CostKind) const {
  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -440,22 +442,27 @@ int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
  return Cost;
 }
-int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
+int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty,
-  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
+                                       TTI::TargetCostKind CostKind) const {
  int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
-int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+int
-                                           const APInt &Imm, Type *Ty) const {
+TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
-  int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty);
+                                       const APInt &Imm, Type *Ty,
                                       TTI::TargetCostKind CostKind) const {
  int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
-int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+int
-                                             const APInt &Imm, Type *Ty) const {
+TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-  int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty);
+                                         const APInt &Imm, Type *Ty,
                                         TTI::TargetCostKind CostKind) const {
  int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -582,12 +589,14 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
 }
 int TargetTransformInfo::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    OperandValueKind Opd1Info,
    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
    OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) const {
  int Cost = TTIImpl->getArithmeticInstrCost(
-      Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI);
+      Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo,
      Args, CxtI);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -600,10 +609,11 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty,
 }
 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                          TTI::TargetCostKind CostKind,
                                          const Instruction *I) const {
  assert((I == nullptr || I->getOpcode() == Opcode) &&
         "Opcode should reflect passed instruction.");
-  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
+  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -616,18 +626,20 @@ int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  return Cost;
 }
-int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
+int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
-  int Cost = TTIImpl->getCFInstrCost(Opcode);
+                                        TTI::TargetCostKind CostKind) const {
  int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                            Type *CondTy,
                                            TTI::TargetCostKind CostKind,
                                            const Instruction *I) const {
  assert((I == nullptr || I->getOpcode() == Opcode) &&
         "Opcode should reflect passed instruction.");
-  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -642,40 +654,45 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
                                         MaybeAlign Alignment,
                                         unsigned AddressSpace,
                                         TTI::TargetCostKind CostKind,
                                         const Instruction *I) const {
  assert((I == nullptr || I->getOpcode() == Opcode) &&
         "Opcode should reflect passed instruction.");
-  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                      CostKind, I);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
 int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
                                               unsigned Alignment,
-                                               unsigned AddressSpace) const {
+                                               unsigned AddressSpace,
                                               TTI::TargetCostKind CostKind) const {
  int Cost =
-      TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+      TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                     CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
-int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+int TargetTransformInfo::getGatherScatterOpCost(
-                                                Value *Ptr, bool VariableMask,
+    unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
-                                                unsigned Alignment,
+    unsigned Alignment, TTI::TargetCostKind CostKind,
-                                                const Instruction *I) const {
+    const Instruction *I) const {
  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
-                                             Alignment, I);
+                                             Alignment, CostKind, I);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
 int TargetTransformInfo::getInterleavedMemoryOpCost(
    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
-    unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+    unsigned Alignment, unsigned AddressSpace,
-    bool UseMaskForGaps) const {
+    TTI::TargetCostKind CostKind,
    bool UseMaskForCond, bool UseMaskForGaps) const {
  int Cost = TTIImpl->getInterleavedMemoryOpCost(
-      Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, UseMaskForCond,
+      Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
-      UseMaskForGaps);
+      UseMaskForCond, UseMaskForGaps);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -684,9 +701,11 @@ int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                               ArrayRef<Type *> Tys,
                                               FastMathFlags FMF,
                                               unsigned ScalarizationCostPassed,
                                               TTI::TargetCostKind CostKind,
                                               const Instruction *I) const {
  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
-                                            ScalarizationCostPassed, I);
+                                            ScalarizationCostPassed, CostKind,
                                            I);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -694,15 +713,18 @@ int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                               ArrayRef<Value *> Args,
                                               FastMathFlags FMF, unsigned VF,
                                               TTI::TargetCostKind CostKind,
                                               const Instruction *I) const {
-  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF,
                                            CostKind, I);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
 int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
-                                          ArrayRef<Type *> Tys) const {
+                                          ArrayRef<Type *> Tys,
-  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
+                                          TTI::TargetCostKind CostKind) const {
  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -727,18 +749,20 @@ int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
 int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode,
                                                    VectorType *Ty,
-                                                    bool IsPairwiseForm) const {
+                                                    bool IsPairwiseForm,
-  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
+                                                    TTI::TargetCostKind CostKind) const {
  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
                                                 CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
-int TargetTransformInfo::getMinMaxReductionCost(VectorType *Ty,
+int TargetTransformInfo::getMinMaxReductionCost(
-                                                VectorType *CondTy,
+    VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
-                                                bool IsPairwiseForm,
+    TTI::TargetCostKind CostKind) const {
                                                bool IsUnsigned) const {
  int Cost =
-      TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
+      TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
                                      CostKind);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -1151,14 +1175,16 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
 }
 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  switch (I->getOpcode()) {
  case Instruction::GetElementPtr:
-    return getUserCost(I, TCK_RecipThroughput);
+    return getUserCost(I, CostKind);
  case Instruction::Ret:
  case Instruction::PHI:
  case Instruction::Br: {
-    return getCFInstrCost(I->getOpcode());
+    return getCFInstrCost(I->getOpcode(), CostKind);
  }
  case Instruction::Add:
  case Instruction::FAdd:
@ -1183,7 +1209,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
    Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
    Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
    SmallVector<const Value *, 2> Operands(I->operand_values());
-    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
+    return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
                                  Op1VK, Op2VK,
                                  Op1VP, Op2VP, Operands, I);
  }
  case Instruction::FNeg: {
@ -1193,31 +1220,34 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
    Op2VK = OK_AnyValue;
    Op2VP = OP_None;
    SmallVector<const Value *, 2> Operands(I->operand_values());
-    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
+    return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
                                  Op1VK, Op2VK,
                                  Op1VP, Op2VP, Operands, I);
  }
  case Instruction::Select: {
    const SelectInst *SI = cast<SelectInst>(I);
    Type *CondTy = SI->getCondition()->getType();
-    return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
+    return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy,
                              CostKind, I);
  }
  case Instruction::ICmp:
  case Instruction::FCmp: {
    Type *ValTy = I->getOperand(0)->getType();
-    return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
+    return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(),
                              CostKind, I);
  }
  case Instruction::Store: {
    const StoreInst *SI = cast<StoreInst>(I);
    Type *ValTy = SI->getValueOperand()->getType();
    return getMemoryOpCost(I->getOpcode(), ValTy,
                           MaybeAlign(SI->getAlignment()),
-                           SI->getPointerAddressSpace(), I);
+                           SI->getPointerAddressSpace(), CostKind, I);
  }
  case Instruction::Load: {
    const LoadInst *LI = cast<LoadInst>(I);
    return getMemoryOpCost(I->getOpcode(), I->getType(),
                           MaybeAlign(LI->getAlignment()),
-                           LI->getPointerAddressSpace(), I);
+                           LI->getPointerAddressSpace(), CostKind, I);
  }
  case Instruction::ZExt:
  case Instruction::SExt:
@ -1233,7 +1263,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
  case Instruction::BitCast:
  case Instruction::AddrSpaceCast: {
    Type *SrcTy = I->getOperand(0)->getType();
-    return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
+    return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I);
  }
  case Instruction::ExtractElement: {
    const ExtractElementInst *EEI = cast<ExtractElementInst>(I);
@ -1250,7 +1280,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
    switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
    case RK_Arithmetic:
      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
-                                        /*IsPairwiseForm=*/false);
+                                        /*IsPairwiseForm=*/false,
                                        CostKind);
    case RK_MinMax:
      return getMinMaxReductionCost(
          ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
@ -1266,7 +1297,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
    switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
    case RK_Arithmetic:
      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
-                                        /*IsPairwiseForm=*/true);
+                                        /*IsPairwiseForm=*/true, CostKind);
    case RK_MinMax:
      return getMinMaxReductionCost(
          ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
@ -1334,7 +1365,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
        FMF = FPMO->getFastMathFlags();
      return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
-                                   FMF, 1, II);
+                                   FMF, 1, CostKind, II);
    }
    return -1;
  default:
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@ -6652,6 +6652,8 @@ class VectorPromoteHelper {
    uint64_t ScalarCost =
        TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
    uint64_t VectorCost = StoreExtractCombineCost;
    enum TargetTransformInfo::TargetCostKind CostKind =
      TargetTransformInfo::TCK_RecipThroughput;
    for (const auto &Inst : InstsToBePromoted) {
      // Compute the cost.
      // By construction, all instructions being promoted are arithmetic ones.
@ -6667,8 +6669,9 @@ class VectorPromoteHelper {
          !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
                          : TargetTransformInfo::OK_AnyValue;
      ScalarCost += TTI.getArithmeticInstrCost(
-          Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
+          Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK);
      VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
                                               CostKind,
                                               Arg0OVK, Arg1OVK);
    }
    LLVM_DEBUG(
@ -7127,7 +7130,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
    return false;
  ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
  // Check that GEPI is a cheap one.
-  if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
+  if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
                         TargetTransformInfo::TCK_SizeAndLatency)
      > TargetTransformInfo::TCC_Basic)
    return false;
  Value *GEPIOp = GEPI->getOperand(0);
@ -7176,7 +7180,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
        cast<ConstantInt>(UGEPI->getOperand(1))->getType())
      return false;
    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
-    if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
+    if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
                           TargetTransformInfo::TCK_SizeAndLatency)
        > TargetTransformInfo::TCC_Basic)
      return false;
    UGEPIs.push_back(UGEPI);
@ -7187,7 +7192,9 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
  for (GetElementPtrInst *UGEPI : UGEPIs) {
    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
    APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
-    unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
+    unsigned ImmCost =
      TTI->getIntImmCost(NewIdx, GEPIIdx->getType(),
                         TargetTransformInfo::TCK_SizeAndLatency);
    if (ImmCost > TargetTransformInfo::TCC_Basic)
      return false;
  }
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -57,7 +57,8 @@ int AArch64TTIImpl::getIntImmCost(int64_t Val) {
 }
 /// Calculate the cost of materializing the given constant.
-int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                                  TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -82,7 +83,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 }
 int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
-                                      const APInt &Imm, Type *Ty) {
+                                      const APInt &Imm, Type *Ty,
                                      TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -139,16 +141,17 @@ int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
  if (Idx == ImmIdx) {
    int NumConstants = (BitSize + 63) / 64;
-    int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+    int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
    return (Cost <= NumConstants * TTI::TCC_Basic)
               ? static_cast<int>(TTI::TCC_Free)
               : Cost;
  }
-  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
+  return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                        const APInt &Imm, Type *Ty) {
+                                        const APInt &Imm, Type *Ty,
                                        TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -161,7 +164,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  // selected instruction, so we compute the materialization cost for the
  // immediate directly.
  if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
-    return AArch64TTIImpl::getIntImmCost(Imm, Ty);
+    return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
  switch (IID) {
  default:
@ -174,7 +177,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  case Intrinsic::umul_with_overflow:
    if (Idx == 1) {
      int NumConstants = (BitSize + 63) / 64;
-      int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+      int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
      return (Cost <= NumConstants * TTI::TCC_Basic)
                 ? static_cast<int>(TTI::TCC_Free)
                 : Cost;
@ -190,7 +193,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
      return TTI::TCC_Free;
    break;
  }
-  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
+  return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 TargetTransformInfo::PopcntSupportKind
@ -267,6 +270,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
 }
 int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                     TTI::TargetCostKind CostKind,
                                     const Instruction *I) {
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  assert(ISD && "Invalid opcode");
@ -295,7 +299,7 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  EVT DstTy = TLI->getValueType(DL, Dst);
  if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return BaseT::getCastInstrCost(Opcode, Dst, Src);
+    return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
  static const TypeConversionCostTblEntry
  ConversionTbl[] = {
@ -399,7 +403,7 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                                 SrcTy.getSimpleVT()))
    return Entry->Cost;
-  return BaseT::getCastInstrCost(Opcode, Dst, Src);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
 }
 int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@ -425,17 +429,18 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
  auto DstVT = TLI->getValueType(DL, Dst);
  auto SrcVT = TLI->getValueType(DL, Src);
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  // If the resulting type is still a vector and the destination type is legal,
  // we may get the extension for free. If not, get the default cost for the
  // extend.
  if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
-    return Cost + getCastInstrCost(Opcode, Dst, Src);
+    return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
  // The destination type should be larger than the element type. If not, get
  // the default cost for the extend.
  if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
-    return Cost + getCastInstrCost(Opcode, Dst, Src);
+    return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
  switch (Opcode) {
  default:
@ -454,7 +459,7 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  }
  // If we are unable to perform the extend for free, get the default cost.
-  return Cost + getCastInstrCost(Opcode, Dst, Src);
+  return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
 }
 int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
@ -483,7 +488,8 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
 }
 int AArch64TTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    TTI::OperandValueKind Opd1Info,
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
@ -504,7 +510,8 @@ int AArch64TTIImpl::getArithmeticInstrCost(
  switch (ISD) {
  default:
-    return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+    return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                                Opd2Info,
                                                Opd1PropInfo, Opd2PropInfo);
  case ISD::SDIV:
    if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
@ -513,16 +520,20 @@ int AArch64TTIImpl::getArithmeticInstrCost(
      // normally expanded to the sequence ADD + CMP + SELECT + SRA.
      // The OperandValue properties many not be same as that of previous
      // operation; conservatively assume OP_None.
-      Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+      Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
                                     Opd1Info, Opd2Info,
                                     TargetTransformInfo::OP_None,
                                     TargetTransformInfo::OP_None);
-      Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
+      Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
                                     Opd1Info, Opd2Info,
                                     TargetTransformInfo::OP_None,
                                     TargetTransformInfo::OP_None);
-      Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
+      Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
                                     Opd1Info, Opd2Info,
                                     TargetTransformInfo::OP_None,
                                     TargetTransformInfo::OP_None);
-      Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
+      Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
                                     Opd1Info, Opd2Info,
                                     TargetTransformInfo::OP_None,
                                     TargetTransformInfo::OP_None);
      return Cost;
@ -535,31 +546,34 @@ int AArch64TTIImpl::getArithmeticInstrCost(
        // Vector signed division by constant are expanded to the
        // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
        // to MULHS + SUB + SRL + ADD + SRL.
-        int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
+        int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
-                                             Opd2Info,
+                                             Opd1Info, Opd2Info,
                                             TargetTransformInfo::OP_None,
                                             TargetTransformInfo::OP_None);
-        int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
+        int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
-                                             Opd2Info,
+                                             Opd1Info, Opd2Info,
                                             TargetTransformInfo::OP_None,
                                             TargetTransformInfo::OP_None);
-        int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
+        int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
-                                             Opd2Info,
+                                             Opd1Info, Opd2Info,
                                             TargetTransformInfo::OP_None,
                                             TargetTransformInfo::OP_None);
        return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
      }
    }
-    Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+    Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                          Opd2Info,
                                          Opd1PropInfo, Opd2PropInfo);
    if (Ty->isVectorTy()) {
      // On AArch64, vector divisions are not supported natively and are
      // expanded into scalar divisions of each pair of elements.
-      Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
+      Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
-                                     Opd2Info, Opd1PropInfo, Opd2PropInfo);
+                                     Opd1Info, Opd2Info, Opd1PropInfo,
-      Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
+                                     Opd2PropInfo);
-                                     Opd2Info, Opd1PropInfo, Opd2PropInfo);
+      Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
                                     Opd1Info, Opd2Info, Opd1PropInfo,
                                     Opd2PropInfo);
      // TODO: if one of the arguments is scalar, then it's not necessary to
      // double the cost of handling the vector elements.
      Cost += Cost;
@ -596,7 +610,9 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 }
 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                       Type *CondTy, const Instruction *I) {
+                                       Type *CondTy,
                                       TTI::TargetCostKind CostKind,
                                       const Instruction *I) {
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  // We don't lower some vector selects well that are wider than the register
@ -623,7 +639,7 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
        return Entry->Cost;
    }
  }
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
 }
 AArch64TTIImpl::TTI::MemCmpExpansionOptions
@ -646,6 +662,7 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
                                    MaybeAlign Alignment, unsigned AddressSpace,
                                    TTI::TargetCostKind CostKind,
                                    const Instruction *I) {
  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
@ -688,6 +705,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                               ArrayRef<unsigned> Indices,
                                               unsigned Alignment,
                                               unsigned AddressSpace,
                                               TTI::TargetCostKind CostKind,
                                               bool UseMaskForCond,
                                               bool UseMaskForGaps) {
  assert(Factor >= 2 && "Invalid interleave factor");
@ -707,18 +725,19 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
  }
  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace,
+                                           Alignment, AddressSpace, CostKind,
                                           UseMaskForCond, UseMaskForGaps);
 }
 int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
  int Cost = 0;
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  for (auto *I : Tys) {
    if (!I->isVectorTy())
      continue;
    if (I->getScalarSizeInBits() * cast<VectorType>(I)->getNumElements() == 128)
-      Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
+      Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
-              getMemoryOpCost(Instruction::Load, I, Align(128), 0);
+              getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
  }
  return Cost;
 }
@ -932,10 +951,12 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
 int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
                                               VectorType *ValTy,
-                                               bool IsPairwiseForm) {
+                                               bool IsPairwiseForm,
                                               TTI::TargetCostKind CostKind) {
  if (IsPairwiseForm)
-    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
+    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
                                             CostKind);
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
  MVT MTy = LT.second;
@ -956,7 +977,8 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
    return LT.first * Entry->Cost;
-  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
+  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
                                           CostKind);
 }
 int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -72,11 +72,11 @@ public:
  using BaseT::getIntImmCost;
  int getIntImmCost(int64_t Val);
-  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                        Type *Ty);
+                        Type *Ty, TTI::TargetCostKind CostKind);
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty);
+                          Type *Ty, TTI::TargetCostKind CostKind);
  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  /// @}
@ -112,6 +112,7 @@ public:
  unsigned getMaxInterleaveFactor(unsigned VF);
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       TTI::TargetCostKind CostKind,
                       const Instruction *I = nullptr);
  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
@ -121,6 +122,7 @@ public:
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -131,13 +133,16 @@ public:
  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                         TTI::TargetCostKind CostKind,
                         const Instruction *I = nullptr);
  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
                                                    bool IsZeroCmp) const;
  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                      unsigned AddressSpace, const Instruction *I = nullptr);
+                      unsigned AddressSpace,
                      TTI::TargetCostKind CostKind,
                      const Instruction *I = nullptr);
  int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
@ -192,6 +197,7 @@ public:
  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                 ArrayRef<unsigned> Indices, unsigned Alignment,
                                 unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                                 bool UseMaskForCond = false,
                                 bool UseMaskForGaps = false);
@ -225,7 +231,8 @@ public:
                             TTI::ReductionFlags Flags) const;
  int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                 bool IsPairwiseForm);
+                                 bool IsPairwiseForm,
                                 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
  int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
                     VectorType *SubTp);
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@ -422,6 +422,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
 }
 int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                       TTI::TargetCostKind CostKind,
                                       TTI::OperandValueKind Opd1Info,
                                       TTI::OperandValueKind Opd2Info,
                                       TTI::OperandValueProperties Opd1PropInfo,
@ -430,7 +431,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                       const Instruction *CxtI) {
  EVT OrigTy = TLI->getValueType(DL, Ty);
  if (!OrigTy.isSimple()) {
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                         Opd2Info,
                                         Opd1PropInfo, Opd2PropInfo);
  }
@ -542,7 +544,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
    break;
  }
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                       Opd2Info,
                                       Opd1PropInfo, Opd2PropInfo);
 }
@ -562,13 +565,15 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
 template <typename T>
 int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                      ArrayRef<T *> Args, FastMathFlags FMF,
-                                      unsigned VF, const Instruction *I) {
+                                      unsigned VF,
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I) {
  if (!intrinsicHasPackedVectorBenefit(ID))
-    return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
+    return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
  EVT OrigTy = TLI->getValueType(DL, RetTy);
  if (!OrigTy.isSimple()) {
-    return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
+    return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
  }
  // Legalize the type.
@ -597,31 +602,36 @@ int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                      ArrayRef<Value *> Args, FastMathFlags FMF,
-                                      unsigned VF, const Instruction *I) {
+                                      unsigned VF,
-  return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF, I);
+                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I) {
  return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF, CostKind, I);
 }
 int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                      ArrayRef<Type *> Tys, FastMathFlags FMF,
                                      unsigned ScalarizationCostPassed,
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I) {
  return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
-                                     ScalarizationCostPassed, I);
+                                     ScalarizationCostPassed, CostKind, I);
 }
-unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
+unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode,
                                    TTI::TargetCostKind CostKind) {
  // XXX - For some reason this isn't called for switch.
  switch (Opcode) {
  case Instruction::Br:
  case Instruction::Ret:
    return 10;
  default:
-    return BaseT::getCFInstrCost(Opcode);
+    return BaseT::getCFInstrCost(Opcode, CostKind);
  }
 }
 int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                           bool IsPairwise) {
+                                           bool IsPairwise,
                                           TTI::TargetCostKind CostKind) {
  EVT OrigTy = TLI->getValueType(DL, Ty);
  // Computes cost on targets that have packed math instructions(which support
@ -629,15 +639,15 @@ int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  if (IsPairwise ||
      !ST->hasVOP3PInsts() ||
      OrigTy.getScalarSizeInBits() != 16)
-    return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise);
+    return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind);
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
  return LT.first * getFullRateInstrCost();
 }
 int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                                          bool IsPairwise,
+                                       bool IsPairwise, bool IsUnsigned,
-                                          bool IsUnsigned) {
+                                       TTI::TargetCostKind CostKind) {
  EVT OrigTy = TLI->getValueType(DL, Ty);
  // Computes cost on targets that have packed math instructions(which support
@ -645,7 +655,8 @@ int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  if (IsPairwise ||
      !ST->hasVOP3PInsts() ||
      OrigTy.getScalarSizeInBits() != 16)
-    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned);
+    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
                                         CostKind);
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
  return LT.first * getHalfRateInstrCost();
@ -979,7 +990,7 @@ GCNTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
        FMF = FPMO->getFastMathFlags();
      return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
-                                   FMF, 1, II);
+                                   FMF, 1, CostKind, II);
    } else {
      return BaseT::getUserCost(U, Operands, CostKind);
    }
@ -1031,7 +1042,7 @@ GCNTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
  case Instruction::BitCast:
  case Instruction::AddrSpaceCast: {
    return getCastInstrCost(I->getOpcode(), I->getType(),
-                            I->getOperand(0)->getType(), I);
+                            I->getOperand(0)->getType(), CostKind, I);
  }
  case Instruction::Add:
  case Instruction::FAdd:
@ -1052,7 +1063,7 @@ GCNTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
  case Instruction::Or:
  case Instruction::Xor:
  case Instruction::FNeg: {
-    return getArithmeticInstrCost(I->getOpcode(), I->getType(),
+    return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
                                  TTI::OK_AnyValue, TTI::OK_AnyValue,
                                  TTI::OP_None, TTI::OP_None, Operands, I);
  }
@ -1127,14 +1138,15 @@ unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
  return 8;
 }
-unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
+unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode,
                                     TTI::TargetCostKind CostKind) {
  // XXX - For some reason this isn't called for switch.
  switch (Opcode) {
  case Instruction::Br:
  case Instruction::Ret:
    return 10;
  default:
-    return BaseT::getCFInstrCost(Opcode);
+    return BaseT::getCFInstrCost(Opcode, CostKind);
  }
 }
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@ -184,6 +184,7 @@ public:
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -191,7 +192,7 @@ public:
      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
      const Instruction *CxtI = nullptr);
-  unsigned getCFInstrCost(unsigned Opcode);
+  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
  bool isInlineAsmSourceOfDivergence(const CallInst *CI,
                                     ArrayRef<unsigned> Indices = {}) const;
@ -225,23 +226,30 @@ public:
  int getInlinerVectorBonusPercent() { return 0; }
-  int getArithmeticReductionCost(unsigned Opcode,
+  int getArithmeticReductionCost(
-                                 VectorType *Ty,
+      unsigned Opcode,
-                                 bool IsPairwise);
+      VectorType *Ty,
      bool IsPairwise,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
  template <typename T>
  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<T *> Args,
                            FastMathFlags FMF, unsigned VF,
                            TTI::TargetCostKind CostKind,
                            const Instruction *I = nullptr);
-  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+  int getIntrinsicInstrCost(
-                            ArrayRef<Type *> Tys, FastMathFlags FMF,
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
-                            unsigned ScalarizationCostPassed = UINT_MAX,
+    unsigned ScalarizationCostPassed = UINT_MAX,
-                            const Instruction *I = nullptr);
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+    const Instruction *I = nullptr);
-                            ArrayRef<Value *> Args, FastMathFlags FMF,
+  int getIntrinsicInstrCost(
-                            unsigned VF = 1, const Instruction *I = nullptr);
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<Value *> Args, FastMathFlags FMF,
-  int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+    unsigned VF = 1, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-                             bool IsPairwiseForm,
+    const Instruction *I = nullptr);
-                             bool IsUnsigned);
+  int getMinMaxReductionCost(
    VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
                       TTI::TargetCostKind CostKind);
 };
@ -282,7 +290,7 @@ public:
                                    unsigned Alignment,
                                    unsigned AddrSpace) const;
  unsigned getMaxInterleaveFactor(unsigned VF);
-  unsigned getCFInstrCost(unsigned Opcode);
+  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 };
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@ -80,7 +80,8 @@ bool ARMTTIImpl::shouldFavorPostInc() const {
  return false;
 }
-int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                              TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
 unsigned Bits = Ty->getPrimitiveSizeInBits();
@ -123,7 +124,7 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
 }
 int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                              Type *Ty) {
+                                  Type *Ty, TTI::TargetCostKind CostKind) {
  // Division by a constant can be turned into multiplication, but only if we
  // know it's constant. So it's not so much that the immediate is cheap (it's
  // not), but that the alternative is worse.
@ -138,12 +139,14 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
    if (Imm == 255 || Imm == 65535)
      return 0;
    // Conversion to BIC is free, and means we can use ~Imm instead.
-    return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+    return std::min(getIntImmCost(Imm, Ty, CostKind),
                    getIntImmCost(~Imm, Ty, CostKind));
  }
  if (Opcode == Instruction::Add)
    // Conversion to SUB is free, and means we can use -Imm instead.
-    return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
+    return std::min(getIntImmCost(Imm, Ty, CostKind),
                    getIntImmCost(-Imm, Ty, CostKind));
  if (Opcode == Instruction::ICmp && Imm.isNegative() &&
      Ty->getIntegerBitWidth() == 32) {
@ -160,10 +163,11 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
  if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
    return 0;
-  return getIntImmCost(Imm, Ty);
+  return getIntImmCost(Imm, Ty, CostKind);
 }
 int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                 TTI::TargetCostKind CostKind,
                                 const Instruction *I) {
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  assert(ISD && "Invalid opcode");
@ -187,7 +191,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  EVT DstTy = TLI->getValueType(DL, Dst);
  if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return BaseT::getCastInstrCost(Opcode, Dst, Src);
+    return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
  // The extend of a load is free
  if (I && isa<LoadInst>(I->getOperand(0))) {
@ -418,7 +422,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
                     ? ST->getMVEVectorCostFactor()
                     : 1;
-  return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src);
+  return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
 }
 int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@ -458,6 +462,7 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
 }
 int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                                   TTI::TargetCostKind CostKind,
                                   const Instruction *I) {
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  // On NEON a vector select gets lowered to vbsl.
@ -485,7 +490,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
                     ? ST->getMVEVectorCostFactor()
                     : 1;
-  return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind,
                                              I);
 }
 int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@ -681,6 +687,7 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
 }
 int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                       TTI::TargetCostKind CostKind,
                                       TTI::OperandValueKind Op1Info,
                                       TTI::OperandValueKind Op2Info,
                                       TTI::OperandValueProperties Opd1PropInfo,
@ -737,7 +744,8 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
    if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
      return LT.first * Entry->Cost;
-    int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+    int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
                                             Op2Info,
                                             Opd1PropInfo, Opd2PropInfo);
    // This is somewhat of a hack. The problem that we are facing is that SROA
@ -795,7 +803,8 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
  // Else this is expand, assume that we need to scalarize this op.
  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
    unsigned Num = VTy->getNumElements();
-    unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+    unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType(),
                                           CostKind);
    // Return the cost of multiple scalar invocation plus the cost of
    // inserting and extracting the values.
    return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost;
@ -806,6 +815,7 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
 int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                MaybeAlign Alignment, unsigned AddressSpace,
                                TTI::TargetCostKind CostKind,
                                const Instruction *I) {
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
@ -824,8 +834,9 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
 int ARMTTIImpl::getInterleavedMemoryOpCost(
    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
-    unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+    unsigned Alignment, unsigned AddressSpace,
-    bool UseMaskForGaps) {
+    TTI::TargetCostKind CostKind,
    bool UseMaskForCond, bool UseMaskForGaps) {
  assert(Factor >= 2 && "Invalid interleave factor");
  assert(isa<VectorType>(VecTy) && "Expect a vector type");
@ -856,18 +867,19 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
  }
  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace,
+                                           Alignment, AddressSpace, CostKind,
                                           UseMaskForCond, UseMaskForGaps);
 }
 unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
                                            Value *Ptr, bool VariableMask,
                                            unsigned Alignment,
                                            TTI::TargetCostKind CostKind,
                                            const Instruction *I) {
  using namespace PatternMatch;
  if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters)
    return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
-                                         Alignment, I);
+                                         Alignment, CostKind, I);
  assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
  VectorType *VTy = cast<VectorType>(DataTy);
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@ -110,9 +110,10 @@ public:
                            Type *Ty);
  using BaseT::getIntImmCost;
-  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
-  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
                        Type *Ty, TTI::TargetCostKind CostKind);
  /// @}
@ -194,9 +195,11 @@ public:
  }
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       TTI::TargetCostKind CostKind,
                       const Instruction *I = nullptr);
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                         TTI::TargetCostKind CostKind,
                         const Instruction *I = nullptr);
  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@ -206,6 +209,7 @@ public:
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -214,17 +218,21 @@ public:
      const Instruction *CxtI = nullptr);
  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                      unsigned AddressSpace, const Instruction *I = nullptr);
+                      unsigned AddressSpace,
                      TTI::TargetCostKind CostKind,
                      const Instruction *I = nullptr);
  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                 ArrayRef<unsigned> Indices, unsigned Alignment,
                                 unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                                 bool UseMaskForCond = false,
                                 bool UseMaskForGaps = false);
-  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+  unsigned getGatherScatterOpCost(
-                                  bool VariableMask, unsigned Alignment,
+    unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
-                                  const Instruction *I = nullptr);
+    unsigned Alignment, TTI::TargetCostKind CostKind,
    const Instruction *I = nullptr);
  bool isLoweredToCall(const Function *F);
  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@ -127,28 +127,30 @@ unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
 }
 unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
-      ArrayRef<Type*> Tys) {
+      ArrayRef<Type*> Tys, TTI::TargetCostKind CostKind) {
-  return BaseT::getCallInstrCost(F, RetTy, Tys);
+  return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
 }
 unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                               ArrayRef<Value *> Args,
                                               FastMathFlags FMF, unsigned VF,
                                               TTI::TargetCostKind CostKind,
                                               const Instruction *I) {
-  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
+  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
 }
 unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                               ArrayRef<Type *> Tys,
                                               FastMathFlags FMF,
                                               unsigned ScalarizationCostPassed,
                                               TTI::TargetCostKind CostKind,
                                               const Instruction *I) {
  if (ID == Intrinsic::bswap) {
    std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
    return LT.first + 2;
  }
  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
-                                      ScalarizationCostPassed, I);
+                                      ScalarizationCostPassed, CostKind, I);
 }
 unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
@ -159,10 +161,12 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
 unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                         MaybeAlign Alignment,
                                         unsigned AddressSpace,
                                         TTI::TargetCostKind CostKind,
                                         const Instruction *I) {
  assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
  if (Opcode == Instruction::Store)
-    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                  CostKind, I);
  if (Src->isVectorTy()) {
    VectorType *VecTy = cast<VectorType>(Src);
@ -200,12 +204,15 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
    return (3 - LogA) * Cost * NumLoads;
  }
-  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                CostKind, I);
 }
 unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
-      Type *Src, unsigned Alignment, unsigned AddressSpace) {
+      Type *Src, unsigned Alignment, unsigned AddressSpace,
-  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+      TTI::TargetCostKind CostKind) {
  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                      CostKind);
 }
 unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
@ -213,38 +220,41 @@ unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
  return 1;
 }
-unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+unsigned HexagonTTIImpl::getGatherScatterOpCost(
-                                                Value *Ptr, bool VariableMask,
+    unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
-                                                unsigned Alignment,
+    unsigned Alignment, TTI::TargetCostKind CostKind,
-                                                const Instruction *I) {
+    const Instruction *I) {
  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
-                                       Alignment, I);
+                                       Alignment, CostKind, I);
 }
 unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
      Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
-      unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+      unsigned Alignment, unsigned AddressSpace,
      TTI::TargetCostKind CostKind, bool UseMaskForCond,
      bool UseMaskForGaps) {
  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
                                             Alignment, AddressSpace,
                                             CostKind,
                                             UseMaskForCond, UseMaskForGaps);
  return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
-                         nullptr);
+                         CostKind);
 }
 unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-      Type *CondTy, const Instruction *I) {
+      Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) {
  if (ValTy->isVectorTy()) {
    std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
    if (Opcode == Instruction::FCmp)
      return LT.first + FloatFactor * getTypeNumElements(ValTy);
  }
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
 }
 unsigned HexagonTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    TTI::OperandValueKind Opd1Info,
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
@ -253,12 +263,12 @@ unsigned HexagonTTIImpl::getArithmeticInstrCost(
    if (LT.second.isFloatingPoint())
      return LT.first + FloatFactor * getTypeNumElements(Ty);
  }
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
 }
 unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
-      Type *SrcTy, const Instruction *I) {
+      Type *SrcTy, TTI::TargetCostKind CostKind, const Instruction *I) {
  if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
    unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
    unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@ -105,34 +105,44 @@ public:
                                    bool Insert, bool Extract);
  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                            unsigned VF);
-  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys);
+  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys,
                            TTI::TargetCostKind CostKind);
  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                 ArrayRef<Value *> Args, FastMathFlags FMF,
-                                 unsigned VF, const Instruction *I);
+                                 unsigned VF,
-  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                 TTI::TargetCostKind CostKind,
-                                 ArrayRef<Type *> Tys, FastMathFlags FMF,
+                                 const Instruction *I);
-                                 unsigned ScalarizationCostPassed = UINT_MAX,
+  unsigned getIntrinsicInstrCost(
-                                 const Instruction *I = nullptr);
+    Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
    unsigned ScalarizationCostPassed = UINT_MAX,
    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
    const Instruction *I = nullptr);
  unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
            const SCEV *S);
  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                           unsigned AddressSpace,
                           TTI::TargetCostKind CostKind,
                           const Instruction *I = nullptr);
  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-            unsigned AddressSpace);
+                                 unsigned AddressSpace,
                           TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
            Type *SubTp);
  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
                                  bool VariableMask, unsigned Alignment,
                                  TTI::TargetCostKind CostKind,
                                  const Instruction *I);
  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
            unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
-            unsigned AddressSpace, bool UseMaskForCond = false,
+            unsigned AddressSpace,
-            bool UseMaskForGaps = false);
+            TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
            bool UseMaskForCond = false, bool UseMaskForGaps = false);
  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-            const Instruction *I);
+                              TTI::TargetCostKind CostKind,
                              const Instruction *I = nullptr);
  unsigned getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -140,10 +150,11 @@ public:
      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
      const Instruction *CxtI = nullptr);
  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
            TTI::TargetCostKind CostKind,
            const Instruction *I = nullptr);
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
-  unsigned getCFInstrCost(unsigned Opcode) {
+  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
    return 1;
  }
--- a/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/lib/Target/Lanai/LanaiTargetTransformInfo.h
@ -49,7 +49,7 @@ public:
    return TTI::PSK_Software;
  }
-  int getIntImmCost(const APInt &Imm, Type *Ty) {
+  int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) {
    assert(Ty->isIntegerTy());
    if (Imm == 0)
      return TTI::TCC_Free;
@ -66,17 +66,19 @@ public:
    return 4 * TTI::TCC_Basic;
  }
-  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) {
+  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
-    return getIntImmCost(Imm, Ty);
+                        TTI::TargetCostKind CostKind) {
    return getIntImmCost(Imm, Ty, CostKind);
  }
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty) {
+                          Type *Ty, TTI::TargetCostKind CostKind) {
-    return getIntImmCost(Imm, Ty);
+    return getIntImmCost(Imm, Ty, CostKind);
  }
  unsigned getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -87,7 +89,8 @@ public:
    switch (ISD) {
    default:
-      return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+      return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                           Opd2Info,
                                           Opd1PropInfo, Opd2PropInfo);
    case ISD::MUL:
    case ISD::SDIV:
@ -98,7 +101,8 @@ public:
      // instruction cost was arbitrarily chosen to reduce the desirability
      // of emitting arithmetic instructions that are emulated in software.
      // TODO: Investigate the performance impact given specialized lowerings.
-      return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+      return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                                Opd2Info,
                                                Opd1PropInfo, Opd2PropInfo);
    }
  }
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@ -112,7 +112,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
 }
 int NVPTXTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    TTI::OperandValueKind Opd1Info,
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
@ -123,7 +124,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
  switch (ISD) {
  default:
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                         Opd2Info,
                                         Opd1PropInfo, Opd2PropInfo);
  case ISD::ADD:
  case ISD::MUL:
@ -136,7 +138,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
    if (LT.second.SimpleTy == MVT::i64)
      return 2 * LT.first;
    // Delegate other cases to the basic TTI.
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
                                         Opd2Info,
                                         Opd1PropInfo, Opd2PropInfo);
  }
 }
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@ -87,6 +87,7 @@ public:
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@ -59,9 +59,10 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
  return TTI::PSK_Software;
 }
-int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                              TTI::TargetCostKind CostKind) {
  if (DisablePPCConstHoist)
-    return BaseT::getIntImmCost(Imm, Ty);
+    return BaseT::getIntImmCost(Imm, Ty, CostKind);
  assert(Ty->isIntegerTy());
@ -89,9 +90,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 }
 int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                    const APInt &Imm, Type *Ty) {
+                                    const APInt &Imm, Type *Ty,
                                    TTI::TargetCostKind CostKind) {
  if (DisablePPCConstHoist)
-    return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty);
+    return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
  assert(Ty->isIntegerTy());
@ -119,13 +121,14 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
      return TTI::TCC_Free;
    break;
  }
-  return PPCTTIImpl::getIntImmCost(Imm, Ty);
+  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
-                                  const APInt &Imm, Type *Ty) {
+                                  const APInt &Imm, Type *Ty,
                                  TTI::TargetCostKind CostKind) {
  if (DisablePPCConstHoist)
-    return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty);
+    return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
  assert(Ty->isIntegerTy());
@ -203,7 +206,7 @@ int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
      return TTI::TCC_Free;
  }
-  return PPCTTIImpl::getIntImmCost(Imm, Ty);
+  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 unsigned
@ -720,6 +723,7 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
 }
 int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                       TTI::TargetCostKind CostKind,
                                       TTI::OperandValueKind Op1Info,
                                       TTI::OperandValueKind Op2Info,
                                       TTI::OperandValueProperties Opd1PropInfo,
@ -729,7 +733,8 @@ int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
  // Fallback to the default implementation.
-  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
                                           Op2Info,
                                           Opd1PropInfo, Opd2PropInfo);
  return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
 }
@ -749,16 +754,18 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
 }
 int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                 TTI::TargetCostKind CostKind,
                                 const Instruction *I) {
  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
-  int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
+  int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
  return vectorCostAdjustment(Cost, Opcode, Dst, Src);
 }
 int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                                   TTI::TargetCostKind CostKind,
                                   const Instruction *I) {
-  int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
  return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
 }
@ -837,13 +844,15 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
 int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                MaybeAlign Alignment, unsigned AddressSpace,
                                TTI::TargetCostKind CostKind,
                                const Instruction *I) {
  // Legalize the type.
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
         "Invalid Opcode");
-  int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                    CostKind);
  Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
  bool IsAltivecType = ST->hasAltivec() &&
@ -913,11 +922,12 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                           ArrayRef<unsigned> Indices,
                                           unsigned Alignment,
                                           unsigned AddressSpace,
                                           TTI::TargetCostKind CostKind,
                                           bool UseMaskForCond,
                                           bool UseMaskForGaps) {
  if (UseMaskForCond || UseMaskForGaps)
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace,
+                                             Alignment, AddressSpace, CostKind,
                                             UseMaskForCond, UseMaskForGaps);
  assert(isa<VectorType>(VecTy) &&
@ -928,7 +938,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
  // Firstly, the cost of load/store operation.
  int Cost =
-      getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
+      getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
                      CostKind);
  // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
  // (at least in the sense that there need only be one non-loop-invariant
@ -943,19 +954,21 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
 unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                           ArrayRef<Value *> Args,
                                           FastMathFlags FMF, unsigned VF,
                                           TTI::TargetCostKind CostKind,
                                           const Instruction *I) {
-  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
+  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
 }
 unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                           ArrayRef<Type *> Tys,
                                           FastMathFlags FMF,
                                           unsigned ScalarizationCostPassed,
                                           TTI::TargetCostKind CostKind,
                                           const Instruction *I) {
  if (ID == Intrinsic::bswap && ST->hasP9Vector())
    return TLI->getTypeLegalizationCost(DL, RetTy).first;
  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
-                                      ScalarizationCostPassed, I);
+                                      ScalarizationCostPassed, CostKind, I);
 }
 bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@ -45,12 +45,13 @@ public:
  /// @{
  using BaseT::getIntImmCost;
-  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty,
                    TTI::TargetCostKind CostKind);
  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                        Type *Ty);
+                        Type *Ty, TTI::TargetCostKind CostKind);
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty);
+                          Type *Ty, TTI::TargetCostKind CostKind);
  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
                       TTI::TargetCostKind CostKind);
@ -91,6 +92,7 @@ public:
  int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -99,25 +101,34 @@ public:
      const Instruction *CxtI = nullptr);
  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       TTI::TargetCostKind CostKind,
                       const Instruction *I = nullptr);
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                         TTI::TargetCostKind CostKind,
                         const Instruction *I = nullptr);
  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                      unsigned AddressSpace, const Instruction *I = nullptr);
+                      unsigned AddressSpace,
                      TTI::TargetCostKind CostKind,
                      const Instruction *I = nullptr);
  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                 unsigned Factor,
                                 ArrayRef<unsigned> Indices,
                                 unsigned Alignment,
                                 unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                                 bool UseMaskForCond = false,
                                 bool UseMaskForGaps = false);
-  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+  unsigned getIntrinsicInstrCost(
    Intrinsic::ID ID, Type *RetTy,
                                 ArrayRef<Value *> Args, FastMathFlags FMF,
-                                 unsigned VF, const Instruction *I = nullptr);
+                                 unsigned VF,
                                 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
                                 const Instruction *I = nullptr);
  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                 ArrayRef<Type *> Tys, FastMathFlags FMF,
                                 unsigned ScalarizationCostPassed = UINT_MAX,
                                 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
                                 const Instruction *I = nullptr);
  /// @}
--- a/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@ -15,7 +15,8 @@ using namespace llvm;
 #define DEBUG_TYPE "riscvtti"
-int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                                TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy() &&
         "getIntImmCost can only estimate cost of materialising integers");
@ -30,7 +31,7 @@ int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 }
 int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                                Type *Ty) {
+                                Type *Ty, TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy() &&
         "getIntImmCost can only estimate cost of materialising integers");
@ -78,7 +79,7 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &
    }
    // Otherwise, use the full materialisation cost.
-    return getIntImmCost(Imm, Ty);
+    return getIntImmCost(Imm, Ty, CostKind);
  }
  // By default, prevent hoisting.
@ -86,7 +87,8 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &
 }
 int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                      const APInt &Imm, Type *Ty) {
+                                      const APInt &Imm, Type *Ty,
                                      TTI::TargetCostKind CostKind) {
  // Prevent hoisting in unknown cases.
  return TTI::TCC_Free;
 }
--- a/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.h
@ -41,12 +41,13 @@ public:
      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
        TLI(ST->getTargetLowering()) {}
-  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
-  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
                        TTI::TargetCostKind CostKind);
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty);
+                          Type *Ty, TTI::TargetCostKind CostKind);
 };
 } // end namespace llvm
-#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
+#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@ -30,7 +30,8 @@ using namespace llvm;
 //
 //===----------------------------------------------------------------------===//
-int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                                  TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 }
 int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
-                                  const APInt &Imm, Type *Ty) {
+                                  const APInt &Imm, Type *Ty,
                                  TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
    break;
  }
-  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                        const APInt &Imm, Type *Ty) {
+                                        const APInt &Imm, Type *Ty,
                                        TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
      return TTI::TCC_Free;
    break;
  }
-  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 TargetTransformInfo::PopcntSupportKind
@ -258,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
      }
      if (isa<StoreInst>(&I)) {
        Type *MemAccessTy = I.getOperand(0)->getType();
-        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
+        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0,
                                     TTI::TCK_RecipThroughput);
      }
    }
@ -365,7 +369,8 @@ static unsigned getNumVectorRegs(Type *Ty) {
 }
 int SystemZTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    TTI::OperandValueKind Op1Info,
    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
@ -494,7 +499,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
        // Return the cost of multiple scalar invocation plus the cost of
        // inserting and extracting the values.
        unsigned ScalarCost =
-            getArithmeticInstrCost(Opcode, Ty->getScalarType());
+            getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
        unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
        // FIXME: VF 2 for these FP operations are currently just as
        // expensive as for VF 4.
@ -521,7 +526,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
  }
  // Fallback to the default implementation.
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
 }
@ -684,6 +689,7 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
 }
 int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                     TTI::TargetCostKind CostKind,
                                     const Instruction *I) {
  unsigned DstScalarBits = Dst->getScalarSizeInBits();
  unsigned SrcScalarBits = Src->getScalarSizeInBits();
@ -764,7 +770,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
      // inserting and extracting the values. Base implementation does not
      // realize float->int gets scalarized.
      unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
-                                             Src->getScalarType());
+                                             Src->getScalarType(), CostKind);
      unsigned TotCost = VF * ScalarCost;
      bool NeedsInserts = true, NeedsExtracts = true;
      // FP128 registers do not get inserted or extracted.
@ -804,7 +810,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    }
  }
-  return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
 }
 // Scalar i8 / i16 operations will typically be made after first extending
@ -820,7 +826,9 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
 }
 int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                       Type *CondTy, const Instruction *I) {
+                                       Type *CondTy,
                                       TTI::TargetCostKind CostKind,
                                       const Instruction *I) {
  if (!ValTy->isVectorTy()) {
    switch (Opcode) {
    case Instruction::ICmp: {
@ -895,7 +903,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    }
  }
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
 }
 int SystemZTTIImpl::
@ -1009,6 +1017,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
 int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                    MaybeAlign Alignment, unsigned AddressSpace,
                                    TTI::TargetCostKind CostKind,
                                    const Instruction *I) {
  assert(!Src->isVoidTy() && "Invalid type");
@ -1077,11 +1086,12 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                               ArrayRef<unsigned> Indices,
                                               unsigned Alignment,
                                               unsigned AddressSpace,
                                               TTI::TargetCostKind CostKind,
                                               bool UseMaskForCond,
                                               bool UseMaskForGaps) {
  if (UseMaskForCond || UseMaskForGaps)
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace,
+                                             Alignment, AddressSpace, CostKind,
                                             UseMaskForCond, UseMaskForGaps);
  assert(isa<VectorType>(VecTy) &&
         "Expect a vector type for interleaved memory op");
@ -1142,21 +1152,23 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
 int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                          ArrayRef<Value *> Args,
                                          FastMathFlags FMF, unsigned VF,
                                          TTI::TargetCostKind CostKind,
                                          const Instruction *I) {
  int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
  if (Cost != -1)
    return Cost;
-  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
+  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
 }
 int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                          ArrayRef<Type *> Tys,
                                          FastMathFlags FMF,
                                          unsigned ScalarizationCostPassed,
                                          TTI::TargetCostKind CostKind,
                                          const Instruction *I) {
  int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
  if (Cost != -1)
    return Cost;
  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
-                                      ScalarizationCostPassed, I);
+                                      ScalarizationCostPassed, CostKind, I);
 }
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@ -38,11 +38,12 @@ public:
  unsigned getInliningThresholdMultiplier() { return 3; }
-  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
-  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
                        Type *Ty, TTI::TargetCostKind CostKind);
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty);
+                          Type *Ty, TTI::TargetCostKind CostKind);
  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
@ -75,6 +76,7 @@ public:
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -88,28 +90,35 @@ public:
  unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
                                         const Instruction *I);
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       TTI::TargetCostKind CostKind,
                       const Instruction *I = nullptr);
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                         TTI::TargetCostKind CostKind,
                         const Instruction *I = nullptr);
  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
  bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                      unsigned AddressSpace, const Instruction *I = nullptr);
+                      unsigned AddressSpace, TTI::TargetCostKind CostKind,
                      const Instruction *I = nullptr);
  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                 unsigned Factor,
                                 ArrayRef<unsigned> Indices,
                                 unsigned Alignment,
                                 unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                                 bool UseMaskForCond = false,
                                 bool UseMaskForGaps = false);
  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                            ArrayRef<Value *> Args, FastMathFlags FMF,
-                            unsigned VF = 1, const Instruction *I = nullptr);
+                            unsigned VF = 1,
                            TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                            const Instruction *I = nullptr);
  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
                            FastMathFlags FMF,
                            unsigned ScalarizationCostPassed = UINT_MAX,
                            TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                            const Instruction *I = nullptr);
  /// @}
 };
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@ -44,13 +44,14 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
 }
 unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    TTI::OperandValueKind Opd1Info,
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
  unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
-      Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+      Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
    switch (Opcode) {
@ -64,7 +65,7 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
          Opd2Info != TTI::OK_UniformConstantValue)
        Cost = VTy->getNumElements() *
               (TargetTransformInfo::TCC_Basic +
-                getArithmeticInstrCost(Opcode, VTy->getElementType()) +
+                getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
                TargetTransformInfo::TCC_Basic);
      break;
    }
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@ -57,6 +57,7 @@ public:
  unsigned getRegisterBitWidth(bool Vector) const;
  unsigned getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@ -170,6 +170,7 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
 }
 int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                       TTI::TargetCostKind CostKind,
                                       TTI::OperandValueKind Op1Info,
                                       TTI::OperandValueKind Op2Info,
                                       TTI::OperandValueProperties Opd1PropInfo,
@ -256,20 +257,25 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
      // The OperandValue properties may not be the same as that of the previous
      // operation; conservatively assume OP_None.
      int Cost =
-          2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
+          2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
                                     Op2Info,
                                     TargetTransformInfo::OP_None,
                                     TargetTransformInfo::OP_None);
-      Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+      Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
                                     Op2Info,
                                     TargetTransformInfo::OP_None,
                                     TargetTransformInfo::OP_None);
-      Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
+      Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
                                     Op2Info,
                                     TargetTransformInfo::OP_None,
                                     TargetTransformInfo::OP_None);
      if (ISD == ISD::SREM) {
        // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
-        Cost += getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info);
+        Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
-        Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Op1Info, Op2Info);
+                                       Op2Info);
        Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
                                       Op2Info);
      }
      return Cost;
@ -277,12 +283,14 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
    // Vector unsigned division/remainder will be simplified to shifts/masks.
    if (ISD == ISD::UDIV)
-      return getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+      return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
                                    Op1Info, Op2Info,
                                    TargetTransformInfo::OP_None,
                                    TargetTransformInfo::OP_None);
    else // UREM
-      return getArithmeticInstrCost(Instruction::And, Ty, Op1Info, Op2Info,
+      return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
                                    Op1Info, Op2Info,
                                    TargetTransformInfo::OP_None,
                                    TargetTransformInfo::OP_None);
  }
@ -596,7 +604,8 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
      // On AVX512, a packed v32i16 shift left by a constant build_vector
      // is lowered into a vector multiply (vpmullw).
-      return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info,
+      return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
                                    Op1Info, Op2Info,
                                    TargetTransformInfo::OP_None,
                                    TargetTransformInfo::OP_None);
  }
@ -608,7 +617,8 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
      // On AVX2, a packed v16i16 shift left by a constant build_vector
      // is lowered into a vector multiply (vpmullw).
-      return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info,
+      return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
                                    Op1Info, Op2Info,
                                    TargetTransformInfo::OP_None,
                                    TargetTransformInfo::OP_None);
@ -916,13 +926,13 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
  if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM ||
                               ISD == ISD::UDIV || ISD == ISD::UREM)) {
    int ScalarCost = getArithmeticInstrCost(
-        Opcode, Ty->getScalarType(), Op1Info, Op2Info,
+        Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info,
        TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
    return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
  }
  // Fallback to the default implementation.
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info);
 }
 int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
@ -1353,6 +1363,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
 }
 int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                 TTI::TargetCostKind CostKind,
                                 const Instruction *I) {
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  assert(ISD && "Invalid opcode");
@ -1966,7 +1977,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  // The function getSimpleVT only handles simple value types.
  if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return BaseT::getCastInstrCost(Opcode, Dst, Src);
+    return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
  MVT SimpleSrcTy = SrcTy.getSimpleVT();
  MVT SimpleDstTy = DstTy.getSimpleVT();
@ -2027,10 +2038,11 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
      return Entry->Cost;
  }
-  return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
 }
 int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                                   TTI::TargetCostKind CostKind,
                                   const Instruction *I) {
  // Legalize the type.
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@ -2214,7 +2226,7 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
    if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
      return LT.first * (ExtraCost + Entry->Cost);
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
 }
 unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
@ -2222,6 +2234,7 @@ unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
 int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                                      ArrayRef<Type *> Tys, FastMathFlags FMF,
                                      unsigned ScalarizationCostPassed,
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I) {
  // Costs should match the codegen from:
  // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
@ -2682,12 +2695,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
  }
  return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF,
-                                      ScalarizationCostPassed, I);
+                                      ScalarizationCostPassed, CostKind, I);
 }
 int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                                      ArrayRef<Value *> Args, FastMathFlags FMF,
-                                      unsigned VF, const Instruction *I) {
+                                      unsigned VF,
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I) {
  static const CostTblEntry AVX512CostTbl[] = {
    { ISD::ROTL,       MVT::v8i64,   1 },
    { ISD::ROTL,       MVT::v4i64,   1 },
@ -2777,7 +2792,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
      return LT.first * Entry->Cost;
  }
-  return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, I);
+  return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, CostKind, I);
 }
 int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@ -2933,6 +2948,7 @@ unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty,
 int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                MaybeAlign Alignment, unsigned AddressSpace,
                                TTI::TargetCostKind CostKind,
                                const Instruction *I) {
  // Handle non-power-of-two vectors such as <3 x float>
  if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
@ -2953,7 +2969,7 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
    if (!isPowerOf2_32(NumElem)) {
      APInt DemandedElts = APInt::getAllOnesValue(NumElem);
      int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
-                                        AddressSpace);
+                                        AddressSpace, CostKind);
      int SplitCost = getScalarizationOverhead(Src, DemandedElts,
                                               Opcode == Instruction::Load,
                                               Opcode == Instruction::Store);
@ -2979,14 +2995,16 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
 int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
                                      unsigned Alignment,
-                                      unsigned AddressSpace) {
+                                      unsigned AddressSpace,
                                      TTI::TargetCostKind CostKind) {
  bool IsLoad = (Instruction::Load == Opcode);
  bool IsStore = (Instruction::Store == Opcode);
  VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
  if (!SrcVTy)
    // To calculate scalar take the regular cost, without mask
-    return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
+    return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace,
                           CostKind);
  unsigned NumElem = SrcVTy->getNumElements();
  VectorType *MaskTy =
@ -2999,14 +3017,16 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
    int MaskSplitCost =
        getScalarizationOverhead(MaskTy, DemandedElts, false, true);
    int ScalarCompareCost = getCmpSelInstrCost(
-        Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr);
+        Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
-    int BranchCost = getCFInstrCost(Instruction::Br);
+        CostKind);
    int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
    int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
    int ValueSplitCost =
        getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
    int MemopCost =
        NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
-                                         MaybeAlign(Alignment), AddressSpace);
+                                         MaybeAlign(Alignment), AddressSpace,
                                         CostKind);
    return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
  }
@ -3061,10 +3081,11 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 }
 int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
-                                           bool IsPairwise) {
+                                           bool IsPairwise,
                                           TTI::TargetCostKind CostKind) {
  // Just use the default implementation for pair reductions.
  if (IsPairwise)
-    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
+    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise, CostKind);
  // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
  // and make it as the cost.
@ -3134,7 +3155,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
    // Type needs to be split. We need LT.first - 1 arithmetic ops.
    VectorType *SingleOpTy =
        VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
-    ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy);
+    ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
    ArithmeticCost *= LT.first - 1;
  }
@ -3204,7 +3225,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
      // Type needs to be split. We need LT.first - 1 arithmetic ops.
      Type *SingleOpTy =
          VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
-      ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy);
+      ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
      ArithmeticCost *= LT.first - 1;
    }
@ -3221,7 +3242,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
      if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
        return ArithmeticCost + Entry->Cost;
-    return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise);
+    return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise,
                                             CostKind);
  }
  unsigned NumVecElts = ValVTy->getNumElements();
@ -3230,7 +3252,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
  // Special case power of 2 reductions where the scalar type isn't changed
  // by type legalization.
  if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits())
-    return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise);
+    return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise,
                                             CostKind);
  unsigned ReductionCost = 0;
@ -3239,7 +3262,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
      MTy.getVectorNumElements() < ValVTy->getNumElements()) {
    // Type needs to be split. We need LT.first - 1 arithmetic ops.
    Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
-    ReductionCost = getArithmeticInstrCost(Opcode, Ty);
+    ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind);
    ReductionCost *= LT.first - 1;
    NumVecElts = MTy.getVectorNumElements();
  }
@ -3279,13 +3302,14 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
      auto *ShiftTy = VectorType::get(
          Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
      ReductionCost += getArithmeticInstrCost(
-          Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue,
+          Instruction::LShr, ShiftTy, CostKind,
          TargetTransformInfo::OK_AnyValue,
          TargetTransformInfo::OK_UniformConstantValue,
          TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
    }
    // Add the arithmetic op for this level.
-    ReductionCost += getArithmeticInstrCost(Opcode, Ty);
+    ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind);
  }
  // Add the final extract element to the cost.
@ -3409,16 +3433,19 @@ int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) {
    CmpOpcode = Instruction::ICmp;
  }
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  // Otherwise fall back to cmp+select.
-  return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
+  return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
-         getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr);
+         getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind);
 }
 int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
-                                       bool IsPairwise, bool IsUnsigned) {
+                                       bool IsPairwise, bool IsUnsigned,
                                       TTI::TargetCostKind CostKind) {
  // Just use the default implementation for pair reductions.
  if (IsPairwise)
-    return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
+    return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
                                         CostKind);
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@ -3534,7 +3561,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
  // by type legalization.
  if (!isPowerOf2_32(ValVTy->getNumElements()) ||
      ScalarSize != MTy.getScalarSizeInBits())
-    return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
+    return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
                                         CostKind);
  // Now handle reduction with the legal type, taking into account size changes
  // at each level.
@ -3571,7 +3599,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
      VectorType *ShiftTy = VectorType::get(
          Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
      MinMaxCost += getArithmeticInstrCost(
-          Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue,
+          Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput,
          TargetTransformInfo::OK_AnyValue,
          TargetTransformInfo::OK_UniformConstantValue,
          TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
    }
@ -3599,7 +3628,8 @@ int X86TTIImpl::getIntImmCost(int64_t Val) {
  return 2 * TTI::TCC_Basic;
 }
-int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                              TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -3634,7 +3664,7 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 }
 int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                              Type *Ty) {
+                                  Type *Ty, TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -3721,17 +3751,18 @@ int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
  if (Idx == ImmIdx) {
    int NumConstants = divideCeil(BitSize, 64);
-    int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
+    int Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
    return (Cost <= NumConstants * TTI::TCC_Basic)
               ? static_cast<int>(TTI::TCC_Free)
               : Cost;
  }
-  return X86TTIImpl::getIntImmCost(Imm, Ty);
+  return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                    const APInt &Imm, Type *Ty) {
+                                    const APInt &Imm, Type *Ty,
                                    TTI::TargetCostKind CostKind) {
  assert(Ty->isIntegerTy());
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -3762,7 +3793,7 @@ int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
      return TTI::TCC_Free;
    break;
  }
-  return X86TTIImpl::getIntImmCost(Imm, Ty);
+  return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
 unsigned
@ -3842,7 +3873,8 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
                             ? ST->getGatherOverhead()
                             : ST->getScatterOverhead();
  return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
-                                           MaybeAlign(Alignment), AddressSpace);
+                                           MaybeAlign(Alignment), AddressSpace,
                                           TTI::TCK_RecipThroughput);
 }
 /// Return the cost of full scalarization of gather / scatter operation.
@ -3858,6 +3890,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
                                unsigned AddressSpace) {
  unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
  APInt DemandedElts = APInt::getAllOnesValue(VF);
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  int MaskUnpackCost = 0;
  if (VariableMask) {
@ -3867,14 +3900,15 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
        getScalarizationOverhead(MaskTy, DemandedElts, false, true);
    int ScalarCompareCost =
      getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()),
-                         nullptr);
+                         nullptr, CostKind);
-    int BranchCost = getCFInstrCost(Instruction::Br);
+    int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
    MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
  }
  // The cost of the scalar loads/stores.
  int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
-                                          MaybeAlign(Alignment), AddressSpace);
+                                          MaybeAlign(Alignment), AddressSpace,
                                          CostKind);
  int InsertExtractCost = 0;
  if (Opcode == Instruction::Load)
@ -3892,10 +3926,11 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
 }
 /// Calculate the cost of Gather / Scatter operation
-int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
+int X86TTIImpl::getGatherScatterOpCost(
-                                       Value *Ptr, bool VariableMask,
+    unsigned Opcode, Type *SrcVTy, Value *Ptr, bool VariableMask,
-                                       unsigned Alignment,
+    unsigned Alignment, TTI::TargetCostKind CostKind,
-                                       const Instruction *I = nullptr) {
+    const Instruction *I = nullptr) {
  assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
  unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
@ -4177,19 +4212,21 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
                                               ArrayRef<unsigned> Indices,
                                               unsigned Alignment,
                                               unsigned AddressSpace,
                                               TTI::TargetCostKind CostKind,
                                               bool UseMaskForCond,
                                               bool UseMaskForGaps) {
  if (UseMaskForCond || UseMaskForGaps)
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace,
+                                             Alignment, AddressSpace, CostKind,
                                             UseMaskForCond, UseMaskForGaps);
  // We currently Support only fully-interleaved groups, with no gaps.
  // TODO: Support also strided loads (interleaved-groups with gaps).
  if (Indices.size() && Indices.size() != Factor)
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace);
+                                             Alignment, AddressSpace,
                                             CostKind);
  // VecTy for interleave memop is <VF*Factor x Elt>.
  // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@ -4201,7 +4238,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
  // (see MachineValueType.h::getVectorVT()).
  if (!LegalVT.isVector())
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace);
+                                             Alignment, AddressSpace,
                                             CostKind);
  unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
  Type *ScalarTy = cast<VectorType>(VecTy)->getElementType();
@ -4217,13 +4255,15 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
      VectorType::get(cast<VectorType>(VecTy)->getElementType(),
                      LegalVT.getVectorNumElements());
  unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
-                                       MaybeAlign(Alignment), AddressSpace);
+                                       MaybeAlign(Alignment), AddressSpace,
                                       CostKind);
  VectorType *VT = VectorType::get(ScalarTy, VF);
  EVT ETy = TLI->getValueType(DL, VT);
  if (!ETy.isSimple())
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace);
+                                             Alignment, AddressSpace,
                                             CostKind);
  // TODO: Complete for other data-types and strides.
  // Each combination of Stride, ElementTy and VF results in a different
@ -4282,7 +4322,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
  }
  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace);
+                                           Alignment, AddressSpace, CostKind);
 }
 // Get estimation for interleaved load/store operations and strided load.
@ -4294,12 +4334,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
                                                 ArrayRef<unsigned> Indices,
                                                 unsigned Alignment,
                                                 unsigned AddressSpace,
                                                 TTI::TargetCostKind CostKind,
                                                 bool UseMaskForCond,
                                                 bool UseMaskForGaps) {
  if (UseMaskForCond || UseMaskForGaps)
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace,
+                                             Alignment, AddressSpace, CostKind,
                                             UseMaskForCond, UseMaskForGaps);
  // VecTy for interleave memop is <VF*Factor x Elt>.
@ -4318,7 +4359,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
      VectorType::get(cast<VectorType>(VecTy)->getElementType(),
                      LegalVT.getVectorNumElements());
  unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
-                                       MaybeAlign(Alignment), AddressSpace);
+                                       MaybeAlign(Alignment), AddressSpace,
                                       CostKind);
  unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
  MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
@ -4421,6 +4463,7 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                           ArrayRef<unsigned> Indices,
                                           unsigned Alignment,
                                           unsigned AddressSpace,
                                           TTI::TargetCostKind CostKind,
                                           bool UseMaskForCond,
                                           bool UseMaskForGaps) {
  auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
@ -4434,14 +4477,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
  };
  if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
    return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
-                                            Alignment, AddressSpace,
+                                            Alignment, AddressSpace, CostKind,
                                            UseMaskForCond, UseMaskForGaps);
  if (ST->hasAVX2())
    return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
-                                          Alignment, AddressSpace,
+                                          Alignment, AddressSpace, CostKind,
                                          UseMaskForCond, UseMaskForGaps);
  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace,
+                                           Alignment, AddressSpace, CostKind,
                                           UseMaskForCond, UseMaskForGaps);
 }
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@ -119,6 +119,7 @@ public:
  unsigned getMaxInterleaveFactor(unsigned VF);
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -128,66 +129,82 @@ public:
  int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
                     VectorType *SubTp);
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       TTI::TargetCostKind CostKind,
                       const Instruction *I = nullptr);
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                         TTI::TargetCostKind CostKind,
                         const Instruction *I = nullptr);
  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
  unsigned getScalarizationOverhead(Type *Ty, const APInt &DemandedElts,
                                    bool Insert, bool Extract);
  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
-                      unsigned AddressSpace, const Instruction *I = nullptr);
+                      unsigned AddressSpace,
                      TTI::TargetCostKind CostKind,
                      const Instruction *I = nullptr);
  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                            unsigned AddressSpace);
+                            unsigned AddressSpace,
                            TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
                             bool VariableMask, unsigned Alignment,
                             TTI::TargetCostKind CostKind,
                             const Instruction *I);
  int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
                                const SCEV *Ptr);
  unsigned getAtomicMemIntrinsicMaxElementSize() const;
-  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+  int getIntrinsicInstrCost(
-                            ArrayRef<Type *> Tys, FastMathFlags FMF,
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys,
-                            unsigned ScalarizationCostPassed = UINT_MAX,
+    FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX,
-                            const Instruction *I = nullptr);
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
    const Instruction *I = nullptr);
  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                            ArrayRef<Value *> Args, FastMathFlags FMF,
-                            unsigned VF = 1, const Instruction *I = nullptr);
+                            unsigned VF = 1,
                            TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
                            const Instruction *I = nullptr);
  int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                 bool IsPairwiseForm);
+                                 bool IsPairwiseForm,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
  int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
  int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                             bool IsPairwiseForm, bool IsUnsigned);
+                             bool IsPairwiseForm, bool IsUnsigned,
                             TTI::TargetCostKind CostKind);
  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                 unsigned Factor, ArrayRef<unsigned> Indices,
                                 unsigned Alignment, unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                                 bool UseMaskForCond = false,
                                 bool UseMaskForGaps = false);
  int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
                                 unsigned Factor, ArrayRef<unsigned> Indices,
                                 unsigned Alignment, unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                                 bool UseMaskForCond = false,
                                 bool UseMaskForGaps = false);
  int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
                                 unsigned Factor, ArrayRef<unsigned> Indices,
                                 unsigned Alignment, unsigned AddressSpace,
                                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
                                 bool UseMaskForCond = false,
                                 bool UseMaskForGaps = false);
  int getIntImmCost(int64_t);
-  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
                       TTI::TargetCostKind);
-  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
                        TTI::TargetCostKind CostKind);
  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty);
+                          Type *Ty, TTI::TargetCostKind CostKind);
  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
                     TargetTransformInfo::LSRCost &C2);
  bool canMacroFuseCmp();
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@ -363,10 +363,12 @@ void ConstantHoistingPass::collectConstantCandidates(
  // instruction and operand index.
  if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
    Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx,
-                                    ConstInt->getValue(), ConstInt->getType());
+                                    ConstInt->getValue(), ConstInt->getType(),
                                    TargetTransformInfo::TCK_SizeAndLatency);
  else
    Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(),
-                                  ConstInt->getType());
+                                  ConstInt->getType(),
                                  TargetTransformInfo::TCK_SizeAndLatency);
  // Ignore cheap integer constants.
  if (Cost > TargetTransformInfo::TCC_Basic) {
@ -416,7 +418,8 @@ void ConstantHoistingPass::collectConstantCandidates(
  // usually lowered to a load from constant pool. Such operation is unlikely
  // to be cheaper than compute it by <Base + Offset>, which can be lowered to
  // an ADD instruction or folded into Load/Store instruction.
-  int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy);
+  int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
                                    TargetTransformInfo::TCK_SizeAndLatency);
  ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
  ConstCandMapType::iterator Itr;
  bool Inserted;
@ -582,7 +585,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
    for (auto User : ConstCand->Uses) {
      unsigned Opcode = User.Inst->getOpcode();
      unsigned OpndIdx = User.OpndIdx;
-      Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty);
+      Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty,
                                     TargetTransformInfo::TCK_SizeAndLatency);
      LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n");
      for (auto C2 = S; C2 != E; ++C2) {
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@ -1990,7 +1990,9 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
             "non noop cast is found during rematerialization");
      Type *SrcTy = CI->getOperand(0)->getType();
-      Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI);
+      Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
                                   TargetTransformInfo::TCK_SizeAndLatency,
                                   CI);
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
      // Cost of the address calculation
--- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@ -232,7 +232,8 @@ static bool isSafeAndProfitableToSpeculateAroundPHI(
      continue;
    int &MatCost = InsertResult.first->second.MatCost;
-    MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType());
+    MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(),
                                TargetTransformInfo::TCK_SizeAndLatency);
    NonFreeMat |= MatCost != TTI.TCC_Free;
  }
  if (!NonFreeMat) {
@ -283,12 +284,15 @@ static bool isSafeAndProfitableToSpeculateAroundPHI(
      int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
      int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
      if (IID)
-        FoldedCost += TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
+        FoldedCost +=
-                                              IncomingC->getType());
+          TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
                                  IncomingC->getType(),
                                  TargetTransformInfo::TCK_SizeAndLatency);
      else
        FoldedCost +=
            TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
-                                  IncomingC->getValue(), IncomingC->getType());
+                                  IncomingC->getValue(), IncomingC->getType(),
                                  TargetTransformInfo::TCK_SizeAndLatency);
      // If we accumulate more folded cost for this incoming constant than
      // materialized cost, then we'll regress any edge with this constant so
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -3277,7 +3277,8 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
  // to be vectors, so we need to extract individual elements from there,
  // execute VF scalar calls, and then gather the result into the vector return
  // value.
-  unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys);
+  unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys,
                                                 TTI::TCK_RecipThroughput);
  if (VF == 1)
    return ScalarCallCost;
@ -3302,7 +3303,8 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
    return Cost;
  // If the corresponding vector cost is cheaper, return its cost.
-  unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys);
+  unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys,
                                                 TTI::TCK_RecipThroughput);
  if (VectorCallCost < Cost) {
    NeedToScalarize = false;
    return VectorCallCost;
@ -3320,7 +3322,9 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
    FMF = FPMO->getFastMathFlags();
  SmallVector<Value *, 4> Operands(CI->arg_operands());
-  return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF, CI);
+  return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF,
                                   TargetTransformInfo::TCK_RecipThroughput,
                                   CI);
 }
 static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
@ -5832,7 +5836,8 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
  // vectorized loop where the user of it is a vectorized instruction.
  const MaybeAlign Alignment = getLoadStoreAlignment(I);
  Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
-                                   Alignment, AS);
+                                   Alignment, AS, 
                                   TTI::TCK_RecipThroughput);
  // Get the overhead of the extractelement and insertelement instructions
  // we might create due to scalarization.
@ -5860,6 +5865,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
  Value *Ptr = getLoadStorePointerOperand(I);
  unsigned AS = getLoadStoreAddressSpace(I);
  int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
  enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
         "Stride should be 1 or -1 for consecutive memory access");
@ -5867,9 +5873,11 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
  unsigned Cost = 0;
  if (Legal->isMaskRequired(I))
    Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
-                                      Alignment ? Alignment->value() : 0, AS);
+                                      Alignment ? Alignment->value() : 0, AS,
                                      CostKind);
  else
-    Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
+    Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
                                CostKind, I);
  bool Reverse = ConsecutiveStride < 0;
  if (Reverse)
@ -5883,16 +5891,19 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
  auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
  const MaybeAlign Alignment = getLoadStoreAlignment(I);
  unsigned AS = getLoadStoreAddressSpace(I);
  enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  if (isa<LoadInst>(I)) {
    return TTI.getAddressComputationCost(ValTy) +
-           TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
+           TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS,
                               CostKind) +
           TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
  }
  StoreInst *SI = cast<StoreInst>(I);
  bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
  return TTI.getAddressComputationCost(ValTy) +
-         TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
+         TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
                             CostKind) +
         (isLoopInvariantStoreValue
              ? 0
              : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
@ -5909,7 +5920,9 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
  return TTI.getAddressComputationCost(VectorTy) +
         TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
                                    Legal->isMaskRequired(I),
-                                    Alignment ? Alignment->value() : 0, I);
+                                    Alignment ? Alignment->value() : 0,
                                    TargetTransformInfo::TCK_RecipThroughput,
                                    I);
 }
 unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
@ -5938,7 +5951,8 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
      Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
  unsigned Cost = TTI.getInterleavedMemoryOpCost(
      I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
-      Group->getAlign().value(), AS, Legal->isMaskRequired(I), UseMaskForGaps);
+      Group->getAlign().value(), AS, TTI::TCK_RecipThroughput,
      Legal->isMaskRequired(I), UseMaskForGaps);
  if (Group->isReverse()) {
    // TODO: Add support for reversed masked interleaved access.
@ -5960,7 +5974,8 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
    unsigned AS = getLoadStoreAddressSpace(I);
    return TTI.getAddressComputationCost(ValTy) +
-           TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
+           TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS,
                               TTI::TCK_RecipThroughput, I);
  }
  return getWideningCost(I, VF);
 }
@ -6182,6 +6197,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
  VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
  auto SE = PSE.getSE();
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  // TODO: We need to estimate the cost of intrinsic calls.
  switch (I->getOpcode()) {
@ -6238,7 +6254,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
      return (Phi->getNumIncomingValues() - 1) *
             TTI.getCmpSelInstrCost(
                 Instruction::Select, ToVectorTy(Phi->getType(), VF),
-                 ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF));
+                 ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF),
                 CostKind);
    return TTI.getCFInstrCost(Instruction::PHI);
  }
@ -6260,7 +6277,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
      Cost += VF * TTI.getCFInstrCost(Instruction::PHI);
      // The cost of the non-predicated instruction.
-      Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy);
+      Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
      // The cost of insertelement and extractelement instructions needed for
      // scalarization.
@ -6301,13 +6318,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    SmallVector<const Value *, 4> Operands(I->operand_values());
    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
    return N * TTI.getArithmeticInstrCost(
-                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+                   I->getOpcode(), VectorTy, CostKind,
                   TargetTransformInfo::OK_AnyValue,
                   Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
  }
  case Instruction::FNeg: {
    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
    return N * TTI.getArithmeticInstrCost(
-                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+                   I->getOpcode(), VectorTy, CostKind,
                   TargetTransformInfo::OK_AnyValue,
                   TargetTransformInfo::OK_AnyValue,
                   TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
                   I->getOperand(0), I);
@ -6320,7 +6339,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    if (!ScalarCond)
      CondTy = VectorType::get(CondTy, VF);
-    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I);
+    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
                                  CostKind, I);
  }
  case Instruction::ICmp:
  case Instruction::FCmp: {
@ -6329,7 +6349,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
      ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
    VectorTy = ToVectorTy(ValTy, VF);
-    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I);
+    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind,
                                  I);
  }
  case Instruction::Store:
  case Instruction::Load: {
@ -6362,7 +6383,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    if (isOptimizableIVTruncate(I, VF)) {
      auto *Trunc = cast<TruncInst>(I);
      return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(),
-                                  Trunc->getSrcTy(), Trunc);
+                                  Trunc->getSrcTy(), CostKind, Trunc);
    }
    Type *SrcScalarTy = I->getOperand(0)->getType();
@ -6388,7 +6409,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    }
    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
-    return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I);
+    return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy,
                                    CostKind, I);
  }
  case Instruction::Call: {
    bool NeedToScalarize;
@ -6401,7 +6423,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
  default:
    // The cost of executing VF copies of the scalar instruction. This opcode
    // is unknown. Assume that it is the same as 'mul'.
-    return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
+    return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
                                           CostKind) +
           getScalarizationOverhead(I, VF);
  } // end of switch.
 }
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -3259,7 +3259,8 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
          VectorType::get(Arg->getType(), VecTy->getNumElements()));
    // If the corresponding vector call is cheaper, return its cost.
-    LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys);
+    LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
                                    TTI::TCK_RecipThroughput);
  }
  return {IntrinsicCost, LibCost};
 }
@ -3273,6 +3274,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
  else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
    ScalarTy = CI->getOperand(0)->getType();
  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  // If we have computed a smaller type for the expression, update VecTy so
  // that the costs will be accurate.
@ -3380,7 +3382,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
                  Ext->getOpcode(), Ext->getType(), VecTy, i);
              // Add back the cost of s|zext which is subtracted separately.
              DeadCost += TTI->getCastInstrCost(
-                  Ext->getOpcode(), Ext->getType(), E->getType(), Ext);
+                  Ext->getOpcode(), Ext->getType(), E->getType(), CostKind,
                  Ext);
              continue;
            }
          }
@ -3404,7 +3407,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
    case Instruction::BitCast: {
      Type *SrcTy = VL0->getOperand(0)->getType();
      int ScalarEltCost =
-          TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0);
+          TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, CostKind,
                                VL0);
      if (NeedToShuffleReuses) {
        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      }
@ -3417,7 +3421,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
      // Check if the values are candidates to demote.
      if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
        VecCost = ReuseShuffleCost +
-                  TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0);
+                  TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy,
                                        CostKind, VL0);
      }
      return VecCost - ScalarCost;
    }
@ -3426,13 +3431,15 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
    case Instruction::Select: {
      // Calculate the cost of this instruction.
      int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
-                                                  Builder.getInt1Ty(), VL0);
+                                                  Builder.getInt1Ty(),
                                                  CostKind, VL0);
      if (NeedToShuffleReuses) {
        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      }
      VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
      int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
-      int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0);
+      int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
                                            CostKind, VL0);
      return ReuseShuffleCost + VecCost - ScalarCost;
    }
    case Instruction::FNeg:
@ -3493,13 +3500,15 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
      SmallVector<const Value *, 4> Operands(VL0->operand_values());
      int ScalarEltCost = TTI->getArithmeticInstrCost(
-          E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0);
+          E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
          Operands, VL0);
      if (NeedToShuffleReuses) {
        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      }
      int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
      int VecCost = TTI->getArithmeticInstrCost(
-          E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0);
+          E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
          Operands, VL0);
      return ReuseShuffleCost + VecCost - ScalarCost;
    }
    case Instruction::GetElementPtr: {
@ -3509,26 +3518,30 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
          TargetTransformInfo::OK_UniformConstantValue;
      int ScalarEltCost =
-          TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
+          TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind,
                                      Op1VK, Op2VK);
      if (NeedToShuffleReuses) {
        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      }
      int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
      int VecCost =
-          TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
+          TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind,
                                      Op1VK, Op2VK);
      return ReuseShuffleCost + VecCost - ScalarCost;
    }
    case Instruction::Load: {
      // Cost of wide load - cost of scalar loads.
      MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment());
      int ScalarEltCost =
-          TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
+          TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0,
                               CostKind, VL0);
      if (NeedToShuffleReuses) {
        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      }
      int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
      int VecLdCost =
-          TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, VL0);
+          TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0,
                               CostKind, VL0);
      if (!E->ReorderIndices.empty()) {
        // TODO: Merge this shuffle with the ReuseShuffleCost.
        VecLdCost += TTI->getShuffleCost(
@ -3543,12 +3556,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
          cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
      MaybeAlign Alignment(SI->getAlignment());
      int ScalarEltCost =
-          TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, VL0);
+          TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
                               CostKind, VL0);
      if (NeedToShuffleReuses)
        ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
      int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
-                                           VecTy, Alignment, 0, VL0);
+                                           VecTy, Alignment, 0, CostKind, VL0);
      if (IsReorder) {
        // TODO: Merge this shuffle with the ReuseShuffleCost.
        VecStCost += TTI->getShuffleCost(
@ -3570,7 +3584,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
        FMF = FPMO->getFastMathFlags();
      int ScalarEltCost =
-          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
+          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF, 1, CostKind);
      if (NeedToShuffleReuses) {
        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      }
@ -3596,34 +3610,34 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
      if (NeedToShuffleReuses) {
        for (unsigned Idx : E->ReuseShuffleIndices) {
          Instruction *I = cast<Instruction>(VL[Idx]);
-          ReuseShuffleCost -= TTI->getInstructionCost(
+          ReuseShuffleCost -= TTI->getInstructionCost(I, CostKind);
              I, TargetTransformInfo::TCK_RecipThroughput);
        }
        for (Value *V : VL) {
          Instruction *I = cast<Instruction>(V);
-          ReuseShuffleCost += TTI->getInstructionCost(
+          ReuseShuffleCost += TTI->getInstructionCost(I, CostKind);
              I, TargetTransformInfo::TCK_RecipThroughput);
        }
      }
      for (Value *V : VL) {
        Instruction *I = cast<Instruction>(V);
        assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
-        ScalarCost += TTI->getInstructionCost(
+        ScalarCost += TTI->getInstructionCost(I, CostKind);
            I, TargetTransformInfo::TCK_RecipThroughput);
      }
      // VecCost is equal to sum of the cost of creating 2 vectors
      // and the cost of creating shuffle.
      int VecCost = 0;
      if (Instruction::isBinaryOp(E->getOpcode())) {
-        VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy);
+        VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
-        VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy);
+        VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
                                               CostKind);
      } else {
        Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
        Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
        VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
        VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
-        VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty);
+        VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
-        VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty);
+                                        CostKind);
        VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
                                         CostKind);
      }
      VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
      return ReuseShuffleCost + VecCost - ScalarCost;