1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[NFC][CostModel] Add TargetCostKind to relevant APIs

Make the kind of cost explicit throughout the cost model which,
apart from making the cost clear, will allow the generic parts to
calculate better costs. It will also allow some backends to
approximate and correlate the different costs if they wish. Another
benefit is that it will also help simplify the cost model around
immediate and intrinsic costs, where we currently have multiple APIs.

RFC thread:
http://lists.llvm.org/pipermail/llvm-dev/2020-April/141263.html

Differential Revision: https://reviews.llvm.org/D79002
This commit is contained in:
Sam Parker 2020-04-28 14:11:27 +01:00
parent 52f56e2249
commit c8018d2237
32 changed files with 1008 additions and 572 deletions

View File

@ -105,6 +105,9 @@ struct HardwareLoopInfo {
bool canAnalyze(LoopInfo &LI); bool canAnalyze(LoopInfo &LI);
}; };
class TargetTransformInfo;
typedef TargetTransformInfo TTI;
/// This pass provides access to the codegen interfaces that are needed /// This pass provides access to the codegen interfaces that are needed
/// for IR-level transformations. /// for IR-level transformations.
class TargetTransformInfo { class TargetTransformInfo {
@ -205,7 +208,8 @@ public:
/// Estimate the cost of a GEP operation when lowered. /// Estimate the cost of a GEP operation when lowered.
int getGEPCost(Type *PointeeType, const Value *Ptr, int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) const; ArrayRef<const Value *> Operands,
TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// Estimate the cost of a EXT operation when lowered. /// Estimate the cost of a EXT operation when lowered.
int getExtCost(const Instruction *I, const Value *Src) const; int getExtCost(const Instruction *I, const Value *Src) const;
@ -233,12 +237,14 @@ public:
/// Estimate the cost of an intrinsic when lowered. /// Estimate the cost of an intrinsic when lowered.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, ArrayRef<Type *> ParamTys,
const User *U = nullptr) const; const User *U = nullptr,
TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// Estimate the cost of an intrinsic when lowered. /// Estimate the cost of an intrinsic when lowered.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, ArrayRef<const Value *> Arguments,
const User *U = nullptr) const; const User *U = nullptr,
TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// \return the expected cost of a memcpy, which could e.g. depend on the /// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied. /// source/destination type and alignment and the number of bytes copied.
@ -702,15 +708,15 @@ public:
/// Return the expected cost of materializing for the given integer /// Return the expected cost of materializing for the given integer
/// immediate of the specified type. /// immediate of the specified type.
int getIntImmCost(const APInt &Imm, Type *Ty) const; int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
/// Return the expected cost of materialization for the given integer /// Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be /// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction. /// zero if the immediate can be folded into the specified instruction.
int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) const; Type *Ty, TargetCostKind CostKind) const;
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const; Type *Ty, TargetCostKind CostKind) const;
/// Return the expected cost for the given integer when optimising /// Return the expected cost for the given integer when optimising
/// for size. This is different than the other integer immediate cost /// for size. This is different than the other integer immediate cost
@ -876,7 +882,9 @@ public:
/// \p CxtI is the optional original context instruction, if one exists, to /// \p CxtI is the optional original context instruction, if one exists, to
/// provide even more information. /// provide even more information.
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue, OperandValueKind Opd2Info = OK_AnyValue,
OperandValueProperties Opd1PropInfo = OP_None, OperandValueProperties Opd1PropInfo = OP_None,
OperandValueProperties Opd2PropInfo = OP_None, OperandValueProperties Opd2PropInfo = OP_None,
@ -895,6 +903,7 @@ public:
/// zext, etc. If there is an existing instruction that holds Opcode, it /// zext, etc. If there is an existing instruction that holds Opcode, it
/// may be passed in the 'I' parameter. /// may be passed in the 'I' parameter.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr) const; const Instruction *I = nullptr) const;
/// \return The expected cost of a sign- or zero-extended vector extract. Use /// \return The expected cost of a sign- or zero-extended vector extract. Use
@ -904,12 +913,14 @@ public:
/// \return The expected cost of control-flow related instructions such as /// \return The expected cost of control-flow related instructions such as
/// Phi, Ret, Br. /// Phi, Ret, Br.
int getCFInstrCost(unsigned Opcode) const; int getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
/// \returns The expected cost of compare and select instructions. If there /// \returns The expected cost of compare and select instructions. If there
/// is an existing instruction that holds Opcode, it may be passed in the /// is an existing instruction that holds Opcode, it may be passed in the
/// 'I' parameter. /// 'I' parameter.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const; const Instruction *I = nullptr) const;
/// \return The expected cost of vector Insert and Extract. /// \return The expected cost of vector Insert and Extract.
@ -919,11 +930,13 @@ public:
/// \return The cost of Load and Store instructions. /// \return The cost of Load and Store instructions.
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const; const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions. /// \return The cost of masked Load and Store instructions.
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMaskedMemoryOpCost(
unsigned AddressSpace) const; unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// \return The cost of Gather or Scatter operation /// \return The cost of Gather or Scatter operation
/// \p Opcode - is a type of memory access Load or Store /// \p Opcode - is a type of memory access Load or Store
@ -934,9 +947,10 @@ public:
/// \p Alignment - alignment of single element /// \p Alignment - alignment of single element
/// \p I - the optional original context instruction, if one exists, e.g. the /// \p I - the optional original context instruction, if one exists, e.g. the
/// load/store to transform or the call to the gather/scatter intrinsic /// load/store to transform or the call to the gather/scatter intrinsic
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, int getGatherScatterOpCost(
bool VariableMask, unsigned Alignment, unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
const Instruction *I = nullptr) const; unsigned Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
/// \return The cost of the interleaved memory operation. /// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code /// \p Opcode is the memory operation code
@ -948,11 +962,11 @@ public:
/// \p AddressSpace is address space of the pointer. /// \p AddressSpace is address space of the pointer.
/// \p UseMaskForCond indicates if the memory access is predicated. /// \p UseMaskForCond indicates if the memory access is predicated.
/// \p UseMaskForGaps indicates if gaps should be masked. /// \p UseMaskForGaps indicates if gaps should be masked.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, int getInterleavedMemoryOpCost(
ArrayRef<unsigned> Indices, unsigned Alignment, unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned AddressSpace, unsigned Alignment, unsigned AddressSpace,
bool UseMaskForCond = false, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
bool UseMaskForGaps = false) const; bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
/// Calculate the cost of performing a vector reduction. /// Calculate the cost of performing a vector reduction.
/// ///
@ -967,33 +981,39 @@ public:
/// Split: /// Split:
/// (v0, v1, v2, v3) /// (v0, v1, v2, v3)
/// ((v0+v2), (v1+v3), undef, undef) /// ((v0+v2), (v1+v3), undef, undef)
int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, int getArithmeticReductionCost(
bool IsPairwiseForm) const; unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
bool IsPairwiseForm, bool IsUnsigned) const;
int getMinMaxReductionCost(
VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction /// Three cases are handled: 1. scalar instruction 2. vector instruction
/// 3. scalar instruction which is to be vectorized with VF. /// 3. scalar instruction which is to be vectorized with VF.
/// I is the optional original context instruction holding the call to the /// I is the optional original context instruction holding the call to the
/// intrinsic /// intrinsic
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int getIntrinsicInstrCost(
ArrayRef<Value *> Args, FastMathFlags FMF, Intrinsic::ID ID, Type *RetTy, ArrayRef<Value *> Args,
unsigned VF = 1, FastMathFlags FMF, unsigned VF = 1,
const Instruction *I = nullptr) const; TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
/// \returns The cost of Intrinsic instructions. Types analysis only. /// \returns The cost of Intrinsic instructions. Types analysis only.
/// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
/// arguments and the return value will be computed based on types. /// arguments and the return value will be computed based on types.
/// I is the optional original context instruction holding the call to the /// I is the optional original context instruction holding the call to the
/// intrinsic /// intrinsic
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, int getIntrinsicInstrCost(
FastMathFlags FMF, Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX, unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr) const; TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
/// \returns The cost of Call instructions. /// \returns The cost of Call instructions.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const; int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
/// \returns The number of pieces into which the provided type must be /// \returns The number of pieces into which the provided type must be
/// split during legalization. Zero is returned when the answer is unknown. /// split during legalization. Zero is returned when the answer is unknown.
@ -1166,15 +1186,18 @@ public:
virtual ~Concept() = 0; virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0; virtual const DataLayout &getDataLayout() const = 0;
virtual int getGEPCost(Type *PointeeType, const Value *Ptr, virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) = 0; ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) = 0;
virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0; virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getInlinerVectorBonusPercent() = 0; virtual int getInlinerVectorBonusPercent() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U) = 0; ArrayRef<Type *> ParamTys, const User *U,
enum TargetCostKind CostKind) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, ArrayRef<const Value *> Arguments,
const User *U) = 0; const User *U,
enum TargetCostKind CostKind) = 0;
virtual int getMemcpyCost(const Instruction *I) = 0; virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
@ -1261,11 +1284,13 @@ public:
virtual int getFPOpCost(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0;
virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty) = 0; const APInt &Imm, Type *Ty) = 0;
virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; virtual int getIntImmCost(const APInt &Imm, Type *Ty,
TargetCostKind CostKind) = 0;
virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0; Type *Ty, TargetCostKind CostKind) = 0;
virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) = 0; const APInt &Imm, Type *Ty,
TargetCostKind CostKind) = 0;
virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
virtual unsigned getRegisterClassForType(bool Vector, virtual unsigned getRegisterClassForType(bool Vector,
Type *Ty = nullptr) const = 0; Type *Ty = nullptr) const = 0;
@ -1306,47 +1331,65 @@ public:
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned getArithmeticInstrCost( virtual unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI = nullptr) = 0; const Instruction *CxtI = nullptr) = 0;
virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp) = 0; VectorType *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0; const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) = 0; VectorType *VecTy, unsigned Index) = 0;
virtual int getCFInstrCost(unsigned Opcode) = 0; virtual int getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind) = 0;
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0; const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val, virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0; unsigned Index) = 0;
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) = 0; unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace) = 0; unsigned AddressSpace,
virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, TTI::TargetCostKind CostKind) = 0;
bool VariableMask, unsigned Alignment, virtual int getGatherScatterOpCost(
const Instruction *I = nullptr) = 0; unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) = 0;
virtual int virtual int
getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment, ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, bool UseMaskForCond = false, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond = false,
bool UseMaskForGaps = false) = 0; bool UseMaskForGaps = false) = 0;
virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) = 0; bool IsPairwiseForm,
TTI::TargetCostKind CostKind) = 0;
virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned) = 0; bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0; const Instruction *I) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) = 0; unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual int getCallInstrCost(Function *F, Type *RetTy, virtual int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) = 0; ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0;
virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
const SCEV *Ptr) = 0; const SCEV *Ptr) = 0;
@ -1408,7 +1451,8 @@ public:
} }
int getGEPCost(Type *PointeeType, const Value *Ptr, int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) override { ArrayRef<const Value *> Operands,
enum TargetTransformInfo::TargetCostKind CostKind) override {
return Impl.getGEPCost(PointeeType, Ptr, Operands); return Impl.getGEPCost(PointeeType, Ptr, Operands);
} }
int getExtCost(const Instruction *I, const Value *Src) override { int getExtCost(const Instruction *I, const Value *Src) override {
@ -1422,13 +1466,15 @@ public:
} }
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, ArrayRef<Type *> ParamTys,
const User *U = nullptr) override { const User *U = nullptr,
return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U); TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
} }
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, ArrayRef<const Value *> Arguments,
const User *U = nullptr) override { const User *U = nullptr,
return Impl.getIntrinsicCost(IID, RetTy, Arguments, U); TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
} }
int getMemcpyCost(const Instruction *I) override { int getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I); return Impl.getMemcpyCost(I);
@ -1613,16 +1659,17 @@ public:
Type *Ty) override { Type *Ty) override {
return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
} }
int getIntImmCost(const APInt &Imm, Type *Ty) override { int getIntImmCost(const APInt &Imm, Type *Ty,
return Impl.getIntImmCost(Imm, Ty); TargetCostKind CostKind) override {
return Impl.getIntImmCost(Imm, Ty, CostKind);
} }
int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) override { Type *Ty, TargetCostKind CostKind) override {
return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty); return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind);
} }
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) override { Type *Ty, TargetCostKind CostKind) override {
return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty); return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
} }
unsigned getNumberOfRegisters(unsigned ClassID) const override { unsigned getNumberOfRegisters(unsigned ClassID) const override {
return Impl.getNumberOfRegisters(ClassID); return Impl.getNumberOfRegisters(ClassID);
@ -1698,13 +1745,14 @@ public:
return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
} }
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
OperandValueKind Opd1Info, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo, OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args, ArrayRef<const Value *> Args,
const Instruction *CxtI = nullptr) override { const Instruction *CxtI = nullptr) override {
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI); Opd1PropInfo, Opd2PropInfo, Args, CxtI);
} }
int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
@ -1712,67 +1760,84 @@ public:
return Impl.getShuffleCost(Kind, Tp, Index, SubTp); return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
} }
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) override { const Instruction *I) override {
return Impl.getCastInstrCost(Opcode, Dst, Src, I); return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I);
} }
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index) override { unsigned Index) override {
return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
} }
int getCFInstrCost(unsigned Opcode) override { int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
return Impl.getCFInstrCost(Opcode); return Impl.getCFInstrCost(Opcode, CostKind);
} }
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) override { const Instruction *I) override {
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I); return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
} }
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index); return Impl.getVectorInstrCost(Opcode, Val, Index);
} }
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) override { unsigned AddressSpace, TTI::TargetCostKind CostKind,
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
} }
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) override { unsigned AddressSpace,
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); TTI::TargetCostKind CostKind) override {
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
} }
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, int getGatherScatterOpCost(
bool VariableMask, unsigned Alignment, unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
const Instruction *I = nullptr) override { unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) override {
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, I); Alignment, CostKind, I);
} }
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment, ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, bool UseMaskForCond, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond,
bool UseMaskForGaps) override { bool UseMaskForGaps) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
} }
int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) override { bool IsPairwiseForm,
return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); TTI::TargetCostKind CostKind) override {
return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
CostKind);
} }
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned) override { bool IsPairwiseForm, bool IsUnsigned,
return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); TTI::TargetCostKind CostKind) override {
return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
CostKind);
} }
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF, unsigned ScalarizationCostPassed, FastMathFlags FMF, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) override { const Instruction *I) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I); ScalarizationCostPassed, CostKind, I);
} }
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) override { unsigned VF,
return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); TTI::TargetCostKind CostKind,
const Instruction *I) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
} }
int getCallInstrCost(Function *F, Type *RetTy, int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) override { ArrayRef<Type *> Tys,
return Impl.getCallInstrCost(F, RetTy, Tys); TTI::TargetCostKind CostKind) override {
return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
} }
unsigned getNumberOfParts(Type *Tp) override { unsigned getNumberOfParts(Type *Tp) override {
return Impl.getNumberOfParts(Tp); return Impl.getNumberOfParts(Tp);

View File

@ -44,7 +44,8 @@ public:
const DataLayout &getDataLayout() const { return DL; } const DataLayout &getDataLayout() const { return DL; }
int getGEPCost(Type *PointeeType, const Value *Ptr, int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) { ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
// In the basic model, we just assume that all-constant GEPs will be folded // In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes. // into their uses via addressing modes.
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@ -285,15 +286,19 @@ public:
return 0; return 0;
} }
unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } unsigned getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
return TTI::TCC_Basic;
}
unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) { Type *Ty, TTI::TargetCostKind CostKind) {
return TTI::TCC_Free; return TTI::TCC_Free;
} }
unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
return TTI::TCC_Free; return TTI::TCC_Free;
} }
@ -366,6 +371,7 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd1PropInfo,
@ -381,6 +387,7 @@ public:
} }
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
switch (Opcode) { switch (Opcode) {
default: default:
@ -419,10 +426,12 @@ public:
return 1; return 1;
} }
unsigned getCFInstrCost(unsigned Opcode) { return 1; } unsigned getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind) { return 1; }
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I) { TTI::TargetCostKind CostKind,
const Instruction *I) const {
return 1; return 1;
} }
@ -431,18 +440,21 @@ public:
} }
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) { unsigned AddressSpace, TTI::TargetCostKind CostKind,
const Instruction *I) const {
return 1; return 1;
} }
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) { unsigned AddressSpace,
TTI::TargetCostKind CostKind) {
return 1; return 1;
} }
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, unsigned getGatherScatterOpCost(
bool VariableMask, unsigned Alignment, unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
const Instruction *I = nullptr) { unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
return 1; return 1;
} }
@ -450,24 +462,28 @@ public:
unsigned Factor, unsigned Factor,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, unsigned Alignment, unsigned AddressSpace,
bool UseMaskForCond = false, TTI::TargetCostKind CostKind,
bool UseMaskForGaps = false) { bool UseMaskForCond,
bool UseMaskForGaps) {
return 1; return 1;
} }
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
return 1; return 1;
} }
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) { unsigned VF, TTI::TargetCostKind CostKind,
const Instruction *I) {
return 1; return 1;
} }
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) {
return 1; return 1;
} }
@ -478,9 +494,11 @@ public:
return 0; return 0;
} }
unsigned getArithmeticReductionCost(unsigned, VectorType *, bool) { return 1; } unsigned getArithmeticReductionCost(unsigned, VectorType *, bool,
TTI::TargetCostKind) { return 1; }
unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool) { return 1; } unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool,
TTI::TargetCostKind) { return 1; }
unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
@ -680,7 +698,8 @@ public:
using BaseT::getGEPCost; using BaseT::getGEPCost;
int getGEPCost(Type *PointeeType, const Value *Ptr, int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) { ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
// TODO: will remove this when pointers have an opaque type. // TODO: will remove this when pointers have an opaque type.
assert(Ptr->getType()->getScalarType()->getPointerElementType() == assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
@ -738,7 +757,8 @@ public:
} }
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U) { ArrayRef<Type *> ParamTys, const User *U,
TTI::TargetCostKind TCK_SizeAndLatency) {
switch (IID) { switch (IID) {
default: default:
// Intrinsics rarely (if ever) have normal argument setup constraints. // Intrinsics rarely (if ever) have normal argument setup constraints.
@ -782,7 +802,8 @@ public:
} }
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, const User *U) { ArrayRef<const Value *> Arguments, const User *U,
TTI::TargetCostKind CostKind) {
// Delegate to the generic intrinsic handling code. This mostly provides an // Delegate to the generic intrinsic handling code. This mostly provides an
// opportunity for targets to (for example) special case the cost of // opportunity for targets to (for example) special case the cost of
// certain intrinsics based on constants used as arguments. // certain intrinsics based on constants used as arguments.
@ -790,11 +811,12 @@ public:
ParamTys.reserve(Arguments.size()); ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType()); ParamTys.push_back(Arguments[Idx]->getType());
return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U,
CostKind);
} }
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands, unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
enum TTI::TargetCostKind CostKind) { TTI::TargetCostKind CostKind) {
auto *TargetTTI = static_cast<T *>(this); auto *TargetTTI = static_cast<T *>(this);
// FIXME: Unlikely to be true for anything but CodeSize. // FIXME: Unlikely to be true for anything but CodeSize.
@ -805,7 +827,7 @@ public:
if (Intrinsic::ID IID = F->getIntrinsicID()) { if (Intrinsic::ID IID = F->getIntrinsicID()) {
SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(), return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(),
ParamTys, U); ParamTys, U, CostKind);
} }
if (!TargetTTI->isLoweredToCall(F)) if (!TargetTTI->isLoweredToCall(F))
@ -849,12 +871,12 @@ public:
case Instruction::IntToPtr: case Instruction::IntToPtr:
case Instruction::PtrToInt: case Instruction::PtrToInt:
case Instruction::Trunc: case Instruction::Trunc:
if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free || if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free ||
TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free) TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
return TTI::TCC_Free; return TTI::TCC_Free;
break; break;
case Instruction::BitCast: case Instruction::BitCast:
if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free) if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
return TTI::TCC_Free; return TTI::TCC_Free;
break; break;
case Instruction::FPExt: case Instruction::FPExt:

View File

@ -305,12 +305,14 @@ public:
} }
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, const User *U) { ArrayRef<const Value *> Arguments, const User *U,
return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); TTI::TargetCostKind CostKind) {
return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
} }
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U) { ArrayRef<Type *> ParamTys, const User *U,
TTI::TargetCostKind CostKind) {
if (IID == Intrinsic::cttz) { if (IID == Intrinsic::cttz) {
if (getTLI()->isCheapToSpeculateCttz()) if (getTLI()->isCheapToSpeculateCttz())
return TargetTransformInfo::TCC_Basic; return TargetTransformInfo::TCC_Basic;
@ -323,7 +325,7 @@ public:
return TargetTransformInfo::TCC_Expensive; return TargetTransformInfo::TCC_Expensive;
} }
return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
} }
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@ -625,6 +627,7 @@ public:
unsigned getArithmeticInstrCost( unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -661,7 +664,7 @@ public:
if (auto *VTy = dyn_cast<VectorType>(Ty)) { if (auto *VTy = dyn_cast<VectorType>(Ty)) {
unsigned Num = VTy->getNumElements(); unsigned Num = VTy->getNumElements();
unsigned Cost = static_cast<T *>(this)->getArithmeticInstrCost( unsigned Cost = static_cast<T *>(this)->getArithmeticInstrCost(
Opcode, VTy->getScalarType()); Opcode, VTy->getScalarType(), CostKind);
// Return the cost of multiple scalar invocation plus the cost of // Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. // inserting and extracting the values.
return getScalarizationOverhead(VTy, Args) + Num * Cost; return getScalarizationOverhead(VTy, Args) + Num * Cost;
@ -691,6 +694,7 @@ public:
} }
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) { const Instruction *I = nullptr) {
const TargetLoweringBase *TLI = getTLI(); const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
@ -802,14 +806,16 @@ public:
unsigned SplitCost = unsigned SplitCost =
(!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0; (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
return SplitCost + return SplitCost +
(2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, I)); (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy,
CostKind, I));
} }
// In other cases where the source or destination are illegal, assume // In other cases where the source or destination are illegal, assume
// the operation will get scalarized. // the operation will get scalarized.
unsigned Num = DstVTy->getNumElements(); unsigned Num = DstVTy->getNumElements();
unsigned Cost = static_cast<T *>(this)->getCastInstrCost( unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
Opcode, Dst->getScalarType(), Src->getScalarType(), I); Opcode, Dst->getScalarType(), Src->getScalarType(),
CostKind, I);
// Return the cost of multiple scalar invocation plus the cost of // Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. // inserting and extracting the values.
@ -835,16 +841,18 @@ public:
return static_cast<T *>(this)->getVectorInstrCost( return static_cast<T *>(this)->getVectorInstrCost(
Instruction::ExtractElement, VecTy, Index) + Instruction::ExtractElement, VecTy, Index) +
static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
VecTy->getElementType()); VecTy->getElementType(),
TTI::TCK_RecipThroughput);
} }
unsigned getCFInstrCost(unsigned Opcode) { unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
// Branches are assumed to be predicted. // Branches are assumed to be predicted.
return 0; return 0;
} }
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I) { TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
const TargetLoweringBase *TLI = getTLI(); const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode"); assert(ISD && "Invalid opcode");
@ -872,7 +880,7 @@ public:
if (CondTy) if (CondTy)
CondTy = CondTy->getScalarType(); CondTy = CondTy->getScalarType();
unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
Opcode, ValVTy->getScalarType(), CondTy, I); Opcode, ValVTy->getScalarType(), CondTy, CostKind, I);
// Return the cost of multiple scalar invocation plus the cost of // Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. // inserting and extracting the values.
@ -892,6 +900,7 @@ public:
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) { const Instruction *I = nullptr) {
assert(!Src->isVoidTy() && "Invalid type"); assert(!Src->isVoidTy() && "Invalid type");
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
@ -926,6 +935,7 @@ public:
unsigned Factor, unsigned Factor,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, unsigned Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false) { bool UseMaskForGaps = false) {
auto *VT = cast<VectorType>(VecTy); auto *VT = cast<VectorType>(VecTy);
@ -940,10 +950,10 @@ public:
unsigned Cost; unsigned Cost;
if (UseMaskForCond || UseMaskForGaps) if (UseMaskForCond || UseMaskForGaps)
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
Opcode, VecTy, Alignment, AddressSpace); Opcode, VecTy, Alignment, AddressSpace, CostKind);
else else
Cost = static_cast<T *>(this)->getMemoryOpCost( Cost = static_cast<T *>(this)->getMemoryOpCost(
Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
// Legalize the vector type, and get the legalized and unlegalized type // Legalize the vector type, and get the legalized and unlegalized type
// sizes. // sizes.
@ -1071,16 +1081,18 @@ public:
// inside the loop. // inside the loop.
if (UseMaskForGaps) if (UseMaskForGaps)
Cost += static_cast<T *>(this)->getArithmeticInstrCost( Cost += static_cast<T *>(this)->getArithmeticInstrCost(
BinaryOperator::And, MaskVT); BinaryOperator::And, MaskVT, CostKind);
return Cost; return Cost;
} }
/// Get intrinsic cost based on arguments. /// Get intrinsic cost based on arguments.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, unsigned getIntrinsicInstrCost(
ArrayRef<Value *> Args, FastMathFlags FMF, Intrinsic::ID IID, Type *RetTy, ArrayRef<Value *> Args,
unsigned VF = 1, FastMathFlags FMF, unsigned VF = 1,
const Instruction *I = nullptr) { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) {
unsigned RetVF = unsigned RetVF =
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getNumElements() : 1); (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getNumElements() : 1);
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
@ -1111,7 +1123,8 @@ public:
} }
return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
ScalarizationCost); ScalarizationCost, CostKind,
I);
} }
case Intrinsic::masked_scatter: { case Intrinsic::masked_scatter: {
assert(VF == 1 && "Can't vectorize types here."); assert(VF == 1 && "Can't vectorize types here.");
@ -1120,7 +1133,8 @@ public:
unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
return ConcreteTTI->getGatherScatterOpCost(Instruction::Store, return ConcreteTTI->getGatherScatterOpCost(Instruction::Store,
Args[0]->getType(), Args[1], Args[0]->getType(), Args[1],
VarMask, Alignment, I); VarMask, Alignment, CostKind,
I);
} }
case Intrinsic::masked_gather: { case Intrinsic::masked_gather: {
assert(VF == 1 && "Can't vectorize types here."); assert(VF == 1 && "Can't vectorize types here.");
@ -1128,7 +1142,7 @@ public:
bool VarMask = !isa<Constant>(Mask); bool VarMask = !isa<Constant>(Mask);
unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
return ConcreteTTI->getGatherScatterOpCost( return ConcreteTTI->getGatherScatterOpCost(
Instruction::Load, RetTy, Args[0], VarMask, Alignment, I); Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I);
} }
case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_mul:
@ -1143,7 +1157,8 @@ public:
case Intrinsic::experimental_vector_reduce_fmin: case Intrinsic::experimental_vector_reduce_fmin:
case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_umin:
return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF, 1,
CostKind, I);
case Intrinsic::fshl: case Intrinsic::fshl:
case Intrinsic::fshr: { case Intrinsic::fshr: {
Value *X = Args[0]; Value *X = Args[0];
@ -1159,25 +1174,30 @@ public:
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
unsigned Cost = 0; unsigned Cost = 0;
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy,
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); CostKind);
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
CostKind);
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
CostKind,
OpKindX, OpKindZ, OpPropsX); OpKindX, OpKindZ, OpPropsX);
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
CostKind,
OpKindY, OpKindZ, OpPropsY); OpKindY, OpKindZ, OpPropsY);
// Non-constant shift amounts requires a modulo. // Non-constant shift amounts requires a modulo.
if (OpKindZ != TTI::OK_UniformConstantValue && if (OpKindZ != TTI::OK_UniformConstantValue &&
OpKindZ != TTI::OK_NonUniformConstantValue) OpKindZ != TTI::OK_NonUniformConstantValue)
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
CostKind,
OpKindZ, OpKindBW, OpPropsZ, OpKindZ, OpKindBW, OpPropsZ,
OpPropsBW); OpPropsBW);
// For non-rotates (X != Y) we must add shift-by-zero handling costs. // For non-rotates (X != Y) we must add shift-by-zero handling costs.
if (X != Y) { if (X != Y) {
Type *CondTy = RetTy->getWithNewBitWidth(1); Type *CondTy = RetTy->getWithNewBitWidth(1);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
CondTy, nullptr); CondTy, CostKind);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
CondTy, nullptr); CondTy, CostKind);
} }
return Cost; return Cost;
} }
@ -1191,6 +1211,7 @@ public:
unsigned getIntrinsicInstrCost( unsigned getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max(), unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max(),
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) { const Instruction *I = nullptr) {
auto *ConcreteTTI = static_cast<T *>(this); auto *ConcreteTTI = static_cast<T *>(this);
auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]); auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]);
@ -1226,7 +1247,8 @@ public:
return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
unsigned ScalarCost = unsigned ScalarCost =
ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF,
CostKind);
return ScalarCalls * ScalarCost + ScalarizationCost; return ScalarCalls * ScalarCost + ScalarizationCost;
} }
@ -1312,34 +1334,40 @@ public:
return 0; return 0;
case Intrinsic::masked_store: case Intrinsic::masked_store:
return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
0); 0, CostKind);
case Intrinsic::masked_load: case Intrinsic::masked_load:
return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0,
CostKind);
case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_add:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy, return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy,
/*IsPairwiseForm=*/false); /*IsPairwiseForm=*/false,
CostKind);
case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_mul:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy, return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
/*IsPairwiseForm=*/false); /*IsPairwiseForm=*/false,
CostKind);
case Intrinsic::experimental_vector_reduce_and: case Intrinsic::experimental_vector_reduce_and:
return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy, return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy,
/*IsPairwiseForm=*/false); /*IsPairwiseForm=*/false,
CostKind);
case Intrinsic::experimental_vector_reduce_or: case Intrinsic::experimental_vector_reduce_or:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy, return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy,
/*IsPairwiseForm=*/false); /*IsPairwiseForm=*/false,
CostKind);
case Intrinsic::experimental_vector_reduce_xor: case Intrinsic::experimental_vector_reduce_xor:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy, return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
/*IsPairwiseForm=*/false); /*IsPairwiseForm=*/false,
CostKind);
case Intrinsic::experimental_vector_reduce_v2_fadd: case Intrinsic::experimental_vector_reduce_v2_fadd:
// FIXME: Add new flag for cost of strict reductions.
return ConcreteTTI->getArithmeticReductionCost( return ConcreteTTI->getArithmeticReductionCost(
Instruction::FAdd, VecOpTy, Instruction::FAdd, VecOpTy,
/*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict /*IsPairwiseForm=*/false, CostKind);
// reductions.
case Intrinsic::experimental_vector_reduce_v2_fmul: case Intrinsic::experimental_vector_reduce_v2_fmul:
// FIXME: Add new flag for cost of strict reductions.
return ConcreteTTI->getArithmeticReductionCost( return ConcreteTTI->getArithmeticReductionCost(
Instruction::FMul, VecOpTy, Instruction::FMul, VecOpTy,
/*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict /*IsPairwiseForm=*/false, CostKind);
// reductions.
case Intrinsic::experimental_vector_reduce_smax: case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin: case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmax:
@ -1347,13 +1375,13 @@ public:
return ConcreteTTI->getMinMaxReductionCost( return ConcreteTTI->getMinMaxReductionCost(
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsPairwiseForm=*/false, /*IsPairwiseForm=*/false,
/*IsUnsigned=*/false); /*IsUnsigned=*/false, CostKind);
case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_umin:
return ConcreteTTI->getMinMaxReductionCost( return ConcreteTTI->getMinMaxReductionCost(
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsPairwiseForm=*/false, /*IsPairwiseForm=*/false,
/*IsUnsigned=*/true); /*IsUnsigned=*/true, CostKind);
case Intrinsic::sadd_sat: case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat: { case Intrinsic::ssub_sat: {
Type *CondTy = RetTy->getWithNewBitWidth(1); Type *CondTy = RetTy->getWithNewBitWidth(1);
@ -1367,11 +1395,12 @@ public:
// SatMin -> Overflow && SumDiff >= 0 // SatMin -> Overflow && SumDiff >= 0
unsigned Cost = 0; unsigned Cost = 0;
Cost += ConcreteTTI->getIntrinsicInstrCost( Cost += ConcreteTTI->getIntrinsicInstrCost(
OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed,
CostKind);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
CondTy, nullptr); CondTy, CostKind);
Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
CondTy, nullptr); CondTy, CostKind);
return Cost; return Cost;
} }
case Intrinsic::uadd_sat: case Intrinsic::uadd_sat:
@ -1385,9 +1414,10 @@ public:
unsigned Cost = 0; unsigned Cost = 0;
Cost += ConcreteTTI->getIntrinsicInstrCost( Cost += ConcreteTTI->getIntrinsicInstrCost(
OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed,
CostKind);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
CondTy, nullptr); CondTy, CostKind);
return Cost; return Cost;
} }
case Intrinsic::smul_fix: case Intrinsic::smul_fix:
@ -1399,17 +1429,22 @@ public:
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
unsigned Cost = 0; unsigned Cost = 0;
Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind);
Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
Cost += Cost +=
2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
CostKind);
Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
CostKind,
TTI::OK_AnyValue, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue); TTI::OK_UniformConstantValue);
Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
CostKind,
TTI::OK_AnyValue, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue); TTI::OK_UniformConstantValue);
Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy,
CostKind);
return Cost; return Cost;
} }
case Intrinsic::sadd_with_overflow: case Intrinsic::sadd_with_overflow:
@ -1429,13 +1464,14 @@ public:
// Sub: // Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
unsigned Cost = 0; unsigned Cost = 0;
Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind);
Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
OverflowTy, nullptr); OverflowTy, CostKind);
Cost += 2 * ConcreteTTI->getCmpSelInstrCost( Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); BinaryOperator::ICmp, OverflowTy, OverflowTy, CostKind);
Cost += Cost +=
ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
CostKind);
return Cost; return Cost;
} }
case Intrinsic::uadd_with_overflow: case Intrinsic::uadd_with_overflow:
@ -1447,9 +1483,9 @@ public:
: BinaryOperator::Sub; : BinaryOperator::Sub;
unsigned Cost = 0; unsigned Cost = 0;
Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
OverflowTy, nullptr); OverflowTy, CostKind);
return Cost; return Cost;
} }
case Intrinsic::smul_with_overflow: case Intrinsic::smul_with_overflow:
@ -1463,21 +1499,24 @@ public:
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
unsigned Cost = 0; unsigned Cost = 0;
Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind);
Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
Cost += Cost +=
2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
CostKind);
Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
CostKind,
TTI::OK_AnyValue, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue); TTI::OK_UniformConstantValue);
if (IID == Intrinsic::smul_with_overflow) if (IID == Intrinsic::smul_with_overflow)
Cost += ConcreteTTI->getArithmeticInstrCost( Cost += ConcreteTTI->getArithmeticInstrCost(
Instruction::AShr, MulTy, TTI::OK_AnyValue, Instruction::AShr, MulTy, CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue); TTI::OK_UniformConstantValue);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
OverflowTy, nullptr); OverflowTy, CostKind);
return Cost; return Cost;
} }
case Intrinsic::ctpop: case Intrinsic::ctpop:
@ -1534,14 +1573,17 @@ public:
// If we can't lower fmuladd into an FMA estimate the cost as a floating // If we can't lower fmuladd into an FMA estimate the cost as a floating
// point mul followed by an add. // point mul followed by an add.
if (IID == Intrinsic::fmuladd) if (IID == Intrinsic::fmuladd)
return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); CostKind) +
ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
CostKind);
if (IID == Intrinsic::experimental_constrained_fmuladd) if (IID == Intrinsic::experimental_constrained_fmuladd)
return ConcreteTTI->getIntrinsicCost( return ConcreteTTI->getIntrinsicCost(
Intrinsic::experimental_constrained_fmul, RetTy, Tys, Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr,
nullptr) + CostKind) +
ConcreteTTI->getIntrinsicCost( ConcreteTTI->getIntrinsicCost(
Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr); Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr,
CostKind);
// Else, assume that we need to scalarize this intrinsic. For math builtins // Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it // this will emit a costly libcall, adding call overhead and spills. Make it
@ -1560,7 +1602,7 @@ public:
ScalarTys.push_back(Ty); ScalarTys.push_back(Ty);
} }
unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
IID, RetTy->getScalarType(), ScalarTys, FMF); IID, RetTy->getScalarType(), ScalarTys, FMF, CostKind);
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
if (Tys[i]->isVectorTy()) { if (Tys[i]->isVectorTy()) {
if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
@ -1588,7 +1630,8 @@ public:
/// \param RetTy Return value types. /// \param RetTy Return value types.
/// \param Tys Argument types. /// \param Tys Argument types.
/// \returns The cost of Call instruction. /// \returns The cost of Call instruction.
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
return 10; return 10;
} }
@ -1638,7 +1681,8 @@ public:
/// The cost model should take into account that the actual length of the /// The cost model should take into account that the actual length of the
/// vector is reduced on each iteration. /// vector is reduced on each iteration.
unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwise) { bool IsPairwise,
TTI::TargetCostKind CostKind) {
Type *ScalarTy = Ty->getElementType(); Type *ScalarTy = Ty->getElementType();
unsigned NumVecElts = Ty->getNumElements(); unsigned NumVecElts = Ty->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts); unsigned NumReduxLevels = Log2_32(NumVecElts);
@ -1657,7 +1701,7 @@ public:
ShuffleCost += (IsPairwise + 1) * ShuffleCost += (IsPairwise + 1) *
ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
NumVecElts, SubTy); NumVecElts, SubTy);
ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy, CostKind);
Ty = SubTy; Ty = SubTy;
++LongVectorCount; ++LongVectorCount;
} }
@ -1687,7 +1731,8 @@ public:
/// Try to calculate op costs for min/max reduction operations. /// Try to calculate op costs for min/max reduction operations.
/// \param CondTy Conditional type for the Select instruction. /// \param CondTy Conditional type for the Select instruction.
unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwise, bool) { bool IsPairwise, bool IsUnsigned,
TTI::TargetCostKind CostKind) {
Type *ScalarTy = Ty->getElementType(); Type *ScalarTy = Ty->getElementType();
Type *ScalarCondTy = CondTy->getElementType(); Type *ScalarCondTy = CondTy->getElementType();
unsigned NumVecElts = Ty->getNumElements(); unsigned NumVecElts = Ty->getNumElements();
@ -1718,9 +1763,9 @@ public:
ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
NumVecElts, SubTy); NumVecElts, SubTy);
MinMaxCost += MinMaxCost +=
ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) +
ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
nullptr); CostKind);
Ty = SubTy; Ty = SubTy;
++LongVectorCount; ++LongVectorCount;
} }
@ -1743,9 +1788,9 @@ public:
0, Ty); 0, Ty);
MinMaxCost += MinMaxCost +=
NumReduxLevels * NumReduxLevels *
(ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
nullptr)); CostKind));
// The last min/max should be in vector registers and we counted it above. // The last min/max should be in vector registers and we counted it above.
// So just need a single extractelement. // So just need a single extractelement.
return ShuffleCost + MinMaxCost + return ShuffleCost + MinMaxCost +

View File

@ -2157,6 +2157,9 @@ bool SCEVExpander::isHighCostExpansionHelper(
return false; // Assume to be zero-cost. return false; // Assume to be zero-cost.
} }
TargetTransformInfo::TargetCostKind CostKind =
TargetTransformInfo::TCK_RecipThroughput;
if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) { if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) {
unsigned Opcode; unsigned Opcode;
switch (S->getSCEVType()) { switch (S->getSCEVType()) {
@ -2174,7 +2177,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
} }
const SCEV *Op = CastExpr->getOperand(); const SCEV *Op = CastExpr->getOperand();
BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(), BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(),
/*Src=*/Op->getType()); /*Src=*/Op->getType(), CostKind);
Worklist.emplace_back(Op); Worklist.emplace_back(Op);
return false; // Will answer upon next entry into this function. return false; // Will answer upon next entry into this function.
} }
@ -2184,7 +2187,8 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) { if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) {
if (SC->getAPInt().isPowerOf2()) { if (SC->getAPInt().isPowerOf2()) {
BudgetRemaining -= BudgetRemaining -=
TTI.getArithmeticInstrCost(Instruction::LShr, S->getType()); TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(),
CostKind);
// Note that we don't count the cost of RHS, because it is a constant, // Note that we don't count the cost of RHS, because it is a constant,
// and we consider those to be free. But if that changes, we would need // and we consider those to be free. But if that changes, we would need
// to log2() it first before calling isHighCostExpansionHelper(). // to log2() it first before calling isHighCostExpansionHelper().
@ -2207,7 +2211,8 @@ bool SCEVExpander::isHighCostExpansionHelper(
// Need to count the cost of this UDiv. // Need to count the cost of this UDiv.
BudgetRemaining -= BudgetRemaining -=
TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType()); TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(),
CostKind);
Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()});
return false; // Will answer upon next entry into this function. return false; // Will answer upon next entry into this function.
} }
@ -2218,8 +2223,10 @@ bool SCEVExpander::isHighCostExpansionHelper(
assert(NAry->getNumOperands() >= 2 && assert(NAry->getNumOperands() >= 2 &&
"Polynomial should be at least linear"); "Polynomial should be at least linear");
int AddCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType); int AddCost =
int MulCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType); TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
int MulCost =
TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
// In this polynominal, we may have some zero operands, and we shouldn't // In this polynominal, we may have some zero operands, and we shouldn't
// really charge for those. So how many non-zero coeffients are there? // really charge for those. So how many non-zero coeffients are there?
@ -2273,22 +2280,26 @@ bool SCEVExpander::isHighCostExpansionHelper(
int PairCost; int PairCost;
switch (S->getSCEVType()) { switch (S->getSCEVType()) {
case scAddExpr: case scAddExpr:
PairCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType); PairCost =
TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
break; break;
case scMulExpr: case scMulExpr:
// TODO: this is a very pessimistic cost modelling for Mul, // TODO: this is a very pessimistic cost modelling for Mul,
// because of Bin Pow algorithm actually used by the expander, // because of Bin Pow algorithm actually used by the expander,
// see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
PairCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType); PairCost =
TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
break; break;
case scSMaxExpr: case scSMaxExpr:
case scUMaxExpr: case scUMaxExpr:
case scSMinExpr: case scSMinExpr:
case scUMinExpr: case scUMinExpr:
PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType,
CmpInst::makeCmpResultType(OpType)) + CmpInst::makeCmpResultType(OpType),
CostKind) +
TTI.getCmpSelInstrCost(Instruction::Select, OpType, TTI.getCmpSelInstrCost(Instruction::Select, OpType,
CmpInst::makeCmpResultType(OpType)); CmpInst::makeCmpResultType(OpType),
CostKind);
break; break;
default: default:
llvm_unreachable("There are no other variants here."); llvm_unreachable("There are no other variants here.");

View File

@ -154,8 +154,9 @@ int TargetTransformInfo::getInlinerVectorBonusPercent() const {
} }
int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) const { ArrayRef<const Value *> Operands,
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); TTI::TargetCostKind CostKind) const {
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
} }
int TargetTransformInfo::getExtCost(const Instruction *I, int TargetTransformInfo::getExtCost(const Instruction *I,
@ -165,8 +166,9 @@ int TargetTransformInfo::getExtCost(const Instruction *I,
int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, ArrayRef<const Value *> Arguments,
const User *U) const { const User *U,
int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U); TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -440,22 +442,27 @@ int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
return Cost; return Cost;
} }
int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty,
int Cost = TTIImpl->getIntImmCost(Imm, Ty); TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, int
const APInt &Imm, Type *Ty) const { TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty); const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, int
const APInt &Imm, Type *Ty) const { TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty); const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -582,12 +589,14 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
} }
int TargetTransformInfo::getArithmeticInstrCost( int TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) const { const Instruction *CxtI) const {
int Cost = TTIImpl->getArithmeticInstrCost( int Cost = TTIImpl->getArithmeticInstrCost(
Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo,
Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -600,10 +609,11 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty,
} }
int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) const { const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) && assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction."); "Opcode should reflect passed instruction.");
int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -616,18 +626,20 @@ int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
return Cost; return Cost;
} }
int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
int Cost = TTIImpl->getCFInstrCost(Opcode); TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) const { const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) && assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction."); "Opcode should reflect passed instruction.");
int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -642,40 +654,45 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, MaybeAlign Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) const { const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) && assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction."); "Opcode should reflect passed instruction.");
int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace) const { unsigned AddressSpace,
TTI::TargetCostKind CostKind) const {
int Cost = int Cost =
TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, int TargetTransformInfo::getGatherScatterOpCost(
Value *Ptr, bool VariableMask, unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
unsigned Alignment, unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I) const { const Instruction *I) const {
int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, I); Alignment, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getInterleavedMemoryOpCost( int TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, unsigned Alignment, unsigned AddressSpace,
bool UseMaskForGaps) const { TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) const {
int Cost = TTIImpl->getInterleavedMemoryOpCost( int Cost = TTIImpl->getInterleavedMemoryOpCost(
Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, UseMaskForCond, Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -684,9 +701,11 @@ int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, ArrayRef<Type *> Tys,
FastMathFlags FMF, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) const { const Instruction *I) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I); ScalarizationCostPassed, CostKind,
I);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -694,15 +713,18 @@ int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF, FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) const { const Instruction *I) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF,
CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) const { ArrayRef<Type *> Tys,
int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -727,18 +749,20 @@ int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode,
VectorType *Ty, VectorType *Ty,
bool IsPairwiseForm) const { bool IsPairwiseForm,
int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
int TargetTransformInfo::getMinMaxReductionCost(VectorType *Ty, int TargetTransformInfo::getMinMaxReductionCost(
VectorType *CondTy, VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
bool IsPairwiseForm, TTI::TargetCostKind CostKind) const {
bool IsUnsigned) const {
int Cost = int Cost =
TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!"); assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost; return Cost;
} }
@ -1151,14 +1175,16 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
} }
int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
switch (I->getOpcode()) { switch (I->getOpcode()) {
case Instruction::GetElementPtr: case Instruction::GetElementPtr:
return getUserCost(I, TCK_RecipThroughput); return getUserCost(I, CostKind);
case Instruction::Ret: case Instruction::Ret:
case Instruction::PHI: case Instruction::PHI:
case Instruction::Br: { case Instruction::Br: {
return getCFInstrCost(I->getOpcode()); return getCFInstrCost(I->getOpcode(), CostKind);
} }
case Instruction::Add: case Instruction::Add:
case Instruction::FAdd: case Instruction::FAdd:
@ -1183,7 +1209,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
Op1VK = getOperandInfo(I->getOperand(0), Op1VP); Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
Op2VK = getOperandInfo(I->getOperand(1), Op2VP); Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
SmallVector<const Value *, 2> Operands(I->operand_values()); SmallVector<const Value *, 2> Operands(I->operand_values());
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
Op1VK, Op2VK,
Op1VP, Op2VP, Operands, I); Op1VP, Op2VP, Operands, I);
} }
case Instruction::FNeg: { case Instruction::FNeg: {
@ -1193,31 +1220,34 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
Op2VK = OK_AnyValue; Op2VK = OK_AnyValue;
Op2VP = OP_None; Op2VP = OP_None;
SmallVector<const Value *, 2> Operands(I->operand_values()); SmallVector<const Value *, 2> Operands(I->operand_values());
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
Op1VK, Op2VK,
Op1VP, Op2VP, Operands, I); Op1VP, Op2VP, Operands, I);
} }
case Instruction::Select: { case Instruction::Select: {
const SelectInst *SI = cast<SelectInst>(I); const SelectInst *SI = cast<SelectInst>(I);
Type *CondTy = SI->getCondition()->getType(); Type *CondTy = SI->getCondition()->getType();
return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy,
CostKind, I);
} }
case Instruction::ICmp: case Instruction::ICmp:
case Instruction::FCmp: { case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType(); Type *ValTy = I->getOperand(0)->getType();
return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(),
CostKind, I);
} }
case Instruction::Store: { case Instruction::Store: {
const StoreInst *SI = cast<StoreInst>(I); const StoreInst *SI = cast<StoreInst>(I);
Type *ValTy = SI->getValueOperand()->getType(); Type *ValTy = SI->getValueOperand()->getType();
return getMemoryOpCost(I->getOpcode(), ValTy, return getMemoryOpCost(I->getOpcode(), ValTy,
MaybeAlign(SI->getAlignment()), MaybeAlign(SI->getAlignment()),
SI->getPointerAddressSpace(), I); SI->getPointerAddressSpace(), CostKind, I);
} }
case Instruction::Load: { case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(I); const LoadInst *LI = cast<LoadInst>(I);
return getMemoryOpCost(I->getOpcode(), I->getType(), return getMemoryOpCost(I->getOpcode(), I->getType(),
MaybeAlign(LI->getAlignment()), MaybeAlign(LI->getAlignment()),
LI->getPointerAddressSpace(), I); LI->getPointerAddressSpace(), CostKind, I);
} }
case Instruction::ZExt: case Instruction::ZExt:
case Instruction::SExt: case Instruction::SExt:
@ -1233,7 +1263,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
case Instruction::BitCast: case Instruction::BitCast:
case Instruction::AddrSpaceCast: { case Instruction::AddrSpaceCast: {
Type *SrcTy = I->getOperand(0)->getType(); Type *SrcTy = I->getOperand(0)->getType();
return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I);
} }
case Instruction::ExtractElement: { case Instruction::ExtractElement: {
const ExtractElementInst *EEI = cast<ExtractElementInst>(I); const ExtractElementInst *EEI = cast<ExtractElementInst>(I);
@ -1250,7 +1280,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
case RK_Arithmetic: case RK_Arithmetic:
return getArithmeticReductionCost(ReduxOpCode, ReduxType, return getArithmeticReductionCost(ReduxOpCode, ReduxType,
/*IsPairwiseForm=*/false); /*IsPairwiseForm=*/false,
CostKind);
case RK_MinMax: case RK_MinMax:
return getMinMaxReductionCost( return getMinMaxReductionCost(
ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)), ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
@ -1266,7 +1297,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
case RK_Arithmetic: case RK_Arithmetic:
return getArithmeticReductionCost(ReduxOpCode, ReduxType, return getArithmeticReductionCost(ReduxOpCode, ReduxType,
/*IsPairwiseForm=*/true); /*IsPairwiseForm=*/true, CostKind);
case RK_MinMax: case RK_MinMax:
return getMinMaxReductionCost( return getMinMaxReductionCost(
ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)), ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
@ -1334,7 +1365,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
FMF = FPMO->getFastMathFlags(); FMF = FPMO->getFastMathFlags();
return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
FMF, 1, II); FMF, 1, CostKind, II);
} }
return -1; return -1;
default: default:

View File

@ -6652,6 +6652,8 @@ class VectorPromoteHelper {
uint64_t ScalarCost = uint64_t ScalarCost =
TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
uint64_t VectorCost = StoreExtractCombineCost; uint64_t VectorCost = StoreExtractCombineCost;
enum TargetTransformInfo::TargetCostKind CostKind =
TargetTransformInfo::TCK_RecipThroughput;
for (const auto &Inst : InstsToBePromoted) { for (const auto &Inst : InstsToBePromoted) {
// Compute the cost. // Compute the cost.
// By construction, all instructions being promoted are arithmetic ones. // By construction, all instructions being promoted are arithmetic ones.
@ -6667,8 +6669,9 @@ class VectorPromoteHelper {
!IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
: TargetTransformInfo::OK_AnyValue; : TargetTransformInfo::OK_AnyValue;
ScalarCost += TTI.getArithmeticInstrCost( ScalarCost += TTI.getArithmeticInstrCost(
Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK);
VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
CostKind,
Arg0OVK, Arg1OVK); Arg0OVK, Arg1OVK);
} }
LLVM_DEBUG( LLVM_DEBUG(
@ -7127,7 +7130,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return false; return false;
ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1)); ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
// Check that GEPI is a cheap one. // Check that GEPI is a cheap one.
if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType()) if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
TargetTransformInfo::TCK_SizeAndLatency)
> TargetTransformInfo::TCC_Basic) > TargetTransformInfo::TCC_Basic)
return false; return false;
Value *GEPIOp = GEPI->getOperand(0); Value *GEPIOp = GEPI->getOperand(0);
@ -7176,7 +7180,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
cast<ConstantInt>(UGEPI->getOperand(1))->getType()) cast<ConstantInt>(UGEPI->getOperand(1))->getType())
return false; return false;
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType()) if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
TargetTransformInfo::TCK_SizeAndLatency)
> TargetTransformInfo::TCC_Basic) > TargetTransformInfo::TCC_Basic)
return false; return false;
UGEPIs.push_back(UGEPI); UGEPIs.push_back(UGEPI);
@ -7187,7 +7192,9 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
for (GetElementPtrInst *UGEPI : UGEPIs) { for (GetElementPtrInst *UGEPI : UGEPIs) {
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType()); unsigned ImmCost =
TTI->getIntImmCost(NewIdx, GEPIIdx->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
if (ImmCost > TargetTransformInfo::TCC_Basic) if (ImmCost > TargetTransformInfo::TCC_Basic)
return false; return false;
} }

View File

@ -57,7 +57,8 @@ int AArch64TTIImpl::getIntImmCost(int64_t Val) {
} }
/// Calculate the cost of materializing the given constant. /// Calculate the cost of materializing the given constant.
int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -82,7 +83,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
} }
int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -139,16 +141,17 @@ int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
if (Idx == ImmIdx) { if (Idx == ImmIdx) {
int NumConstants = (BitSize + 63) / 64; int NumConstants = (BitSize + 63) / 64;
int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
return (Cost <= NumConstants * TTI::TCC_Basic) return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free) ? static_cast<int>(TTI::TCC_Free)
: Cost; : Cost;
} }
return AArch64TTIImpl::getIntImmCost(Imm, Ty); return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -161,7 +164,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
// selected instruction, so we compute the materialization cost for the // selected instruction, so we compute the materialization cost for the
// immediate directly. // immediate directly.
if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv) if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
return AArch64TTIImpl::getIntImmCost(Imm, Ty); return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
switch (IID) { switch (IID) {
default: default:
@ -174,7 +177,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
case Intrinsic::umul_with_overflow: case Intrinsic::umul_with_overflow:
if (Idx == 1) { if (Idx == 1) {
int NumConstants = (BitSize + 63) / 64; int NumConstants = (BitSize + 63) / 64;
int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
return (Cost <= NumConstants * TTI::TCC_Basic) return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free) ? static_cast<int>(TTI::TCC_Free)
: Cost; : Cost;
@ -190,7 +193,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free; return TTI::TCC_Free;
break; break;
} }
return AArch64TTIImpl::getIntImmCost(Imm, Ty); return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
TargetTransformInfo::PopcntSupportKind TargetTransformInfo::PopcntSupportKind
@ -267,6 +270,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
} }
int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode"); assert(ISD && "Invalid opcode");
@ -295,7 +299,7 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
EVT DstTy = TLI->getValueType(DL, Dst); EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple()) if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src); return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
static const TypeConversionCostTblEntry static const TypeConversionCostTblEntry
ConversionTbl[] = { ConversionTbl[] = {
@ -399,7 +403,7 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
SrcTy.getSimpleVT())) SrcTy.getSimpleVT()))
return Entry->Cost; return Entry->Cost;
return BaseT::getCastInstrCost(Opcode, Dst, Src); return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
} }
int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst, int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@ -425,17 +429,18 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy); auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
auto DstVT = TLI->getValueType(DL, Dst); auto DstVT = TLI->getValueType(DL, Dst);
auto SrcVT = TLI->getValueType(DL, Src); auto SrcVT = TLI->getValueType(DL, Src);
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// If the resulting type is still a vector and the destination type is legal, // If the resulting type is still a vector and the destination type is legal,
// we may get the extension for free. If not, get the default cost for the // we may get the extension for free. If not, get the default cost for the
// extend. // extend.
if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT)) if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
return Cost + getCastInstrCost(Opcode, Dst, Src); return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
// The destination type should be larger than the element type. If not, get // The destination type should be larger than the element type. If not, get
// the default cost for the extend. // the default cost for the extend.
if (DstVT.getSizeInBits() < SrcVT.getSizeInBits()) if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
return Cost + getCastInstrCost(Opcode, Dst, Src); return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
switch (Opcode) { switch (Opcode) {
default: default:
@ -454,7 +459,7 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
} }
// If we are unable to perform the extend for free, get the default cost. // If we are unable to perform the extend for free, get the default cost.
return Cost + getCastInstrCost(Opcode, Dst, Src); return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
} }
int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
@ -483,7 +488,8 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
} }
int AArch64TTIImpl::getArithmeticInstrCost( int AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) { const Instruction *CxtI) {
@ -504,7 +510,8 @@ int AArch64TTIImpl::getArithmeticInstrCost(
switch (ISD) { switch (ISD) {
default: default:
return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
case ISD::SDIV: case ISD::SDIV:
if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue && if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
@ -513,16 +520,20 @@ int AArch64TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence ADD + CMP + SELECT + SRA. // normally expanded to the sequence ADD + CMP + SELECT + SRA.
// The OperandValue properties many not be same as that of previous // The OperandValue properties many not be same as that of previous
// operation; conservatively assume OP_None. // operation; conservatively assume OP_None.
Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
Opd1Info, Opd2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info, Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
Opd1Info, Opd2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info, Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
Opd1Info, Opd2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info, Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
Opd1Info, Opd2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
return Cost; return Cost;
@ -535,31 +546,34 @@ int AArch64TTIImpl::getArithmeticInstrCost(
// Vector signed division by constant are expanded to the // Vector signed division by constant are expanded to the
// sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
// to MULHS + SUB + SRL + ADD + SRL. // to MULHS + SUB + SRL + ADD + SRL.
int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info, int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
Opd2Info, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
Opd2Info, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
Opd2Info, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1; return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
} }
} }
Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
if (Ty->isVectorTy()) { if (Ty->isVectorTy()) {
// On AArch64, vector divisions are not supported natively and are // On AArch64, vector divisions are not supported natively and are
// expanded into scalar divisions of each pair of elements. // expanded into scalar divisions of each pair of elements.
Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info, Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
Opd2Info, Opd1PropInfo, Opd2PropInfo); Opd1Info, Opd2Info, Opd1PropInfo,
Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info, Opd2PropInfo);
Opd2Info, Opd1PropInfo, Opd2PropInfo); Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
Opd1Info, Opd2Info, Opd1PropInfo,
Opd2PropInfo);
// TODO: if one of the arguments is scalar, then it's not necessary to // TODO: if one of the arguments is scalar, then it's not necessary to
// double the cost of handling the vector elements. // double the cost of handling the vector elements.
Cost += Cost; Cost += Cost;
@ -596,7 +610,9 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
} }
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy, const Instruction *I) { Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register // We don't lower some vector selects well that are wider than the register
@ -623,7 +639,7 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost; return Entry->Cost;
} }
} }
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
} }
AArch64TTIImpl::TTI::MemCmpExpansionOptions AArch64TTIImpl::TTI::MemCmpExpansionOptions
@ -646,6 +662,7 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment, unsigned AddressSpace, MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
auto LT = TLI->getTypeLegalizationCost(DL, Ty); auto LT = TLI->getTypeLegalizationCost(DL, Ty);
@ -688,6 +705,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForCond,
bool UseMaskForGaps) { bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor"); assert(Factor >= 2 && "Invalid interleave factor");
@ -707,18 +725,19 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
} }
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
} }
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
int Cost = 0; int Cost = 0;
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
for (auto *I : Tys) { for (auto *I : Tys) {
if (!I->isVectorTy()) if (!I->isVectorTy())
continue; continue;
if (I->getScalarSizeInBits() * cast<VectorType>(I)->getNumElements() == 128) if (I->getScalarSizeInBits() * cast<VectorType>(I)->getNumElements() == 128)
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) + Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
getMemoryOpCost(Instruction::Load, I, Align(128), 0); getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
} }
return Cost; return Cost;
} }
@ -932,10 +951,12 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
VectorType *ValTy, VectorType *ValTy,
bool IsPairwiseForm) { bool IsPairwiseForm,
TTI::TargetCostKind CostKind) {
if (IsPairwiseForm) if (IsPairwiseForm)
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm); return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second; MVT MTy = LT.second;
@ -956,7 +977,8 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy)) if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * Entry->Cost;
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm); return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
CostKind);
} }
int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,

View File

@ -72,11 +72,11 @@ public:
using BaseT::getIntImmCost; using BaseT::getIntImmCost;
int getIntImmCost(int64_t Val); int getIntImmCost(int64_t Val);
int getIntImmCost(const APInt &Imm, Type *Ty); int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty); Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty); Type *Ty, TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
/// @} /// @}
@ -112,6 +112,7 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF); unsigned getMaxInterleaveFactor(unsigned VF);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
@ -121,6 +122,7 @@ public:
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -131,13 +133,16 @@ public:
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const; bool IsZeroCmp) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
@ -192,6 +197,7 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment, ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false); bool UseMaskForGaps = false);
@ -225,7 +231,8 @@ public:
TTI::ReductionFlags Flags) const; TTI::ReductionFlags Flags) const;
int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm); bool IsPairwiseForm,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp); VectorType *SubTp);

View File

@ -422,6 +422,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
} }
int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd1PropInfo,
@ -430,7 +431,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
const Instruction *CxtI) { const Instruction *CxtI) {
EVT OrigTy = TLI->getValueType(DL, Ty); EVT OrigTy = TLI->getValueType(DL, Ty);
if (!OrigTy.isSimple()) { if (!OrigTy.isSimple()) {
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
} }
@ -542,7 +544,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
break; break;
} }
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
} }
@ -562,13 +565,15 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
template <typename T> template <typename T>
int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<T *> Args, FastMathFlags FMF, ArrayRef<T *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) { unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (!intrinsicHasPackedVectorBenefit(ID)) if (!intrinsicHasPackedVectorBenefit(ID))
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
EVT OrigTy = TLI->getValueType(DL, RetTy); EVT OrigTy = TLI->getValueType(DL, RetTy);
if (!OrigTy.isSimple()) { if (!OrigTy.isSimple()) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
} }
// Legalize the type. // Legalize the type.
@ -597,31 +602,36 @@ int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) { unsigned VF,
return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF, I); TTI::TargetCostKind CostKind,
const Instruction *I) {
return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF, CostKind, I);
} }
int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF, return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I); ScalarizationCostPassed, CostKind, I);
} }
unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) { unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind) {
// XXX - For some reason this isn't called for switch. // XXX - For some reason this isn't called for switch.
switch (Opcode) { switch (Opcode) {
case Instruction::Br: case Instruction::Br:
case Instruction::Ret: case Instruction::Ret:
return 10; return 10;
default: default:
return BaseT::getCFInstrCost(Opcode); return BaseT::getCFInstrCost(Opcode, CostKind);
} }
} }
int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwise) { bool IsPairwise,
TTI::TargetCostKind CostKind) {
EVT OrigTy = TLI->getValueType(DL, Ty); EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support // Computes cost on targets that have packed math instructions(which support
@ -629,15 +639,15 @@ int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
if (IsPairwise || if (IsPairwise ||
!ST->hasVOP3PInsts() || !ST->hasVOP3PInsts() ||
OrigTy.getScalarSizeInBits() != 16) OrigTy.getScalarSizeInBits() != 16)
return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise); return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
return LT.first * getFullRateInstrCost(); return LT.first * getFullRateInstrCost();
} }
int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwise, bool IsPairwise, bool IsUnsigned,
bool IsUnsigned) { TTI::TargetCostKind CostKind) {
EVT OrigTy = TLI->getValueType(DL, Ty); EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support // Computes cost on targets that have packed math instructions(which support
@ -645,7 +655,8 @@ int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
if (IsPairwise || if (IsPairwise ||
!ST->hasVOP3PInsts() || !ST->hasVOP3PInsts() ||
OrigTy.getScalarSizeInBits() != 16) OrigTy.getScalarSizeInBits() != 16)
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned); return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
return LT.first * getHalfRateInstrCost(); return LT.first * getHalfRateInstrCost();
@ -979,7 +990,7 @@ GCNTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
if (auto *FPMO = dyn_cast<FPMathOperator>(II)) if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags(); FMF = FPMO->getFastMathFlags();
return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
FMF, 1, II); FMF, 1, CostKind, II);
} else { } else {
return BaseT::getUserCost(U, Operands, CostKind); return BaseT::getUserCost(U, Operands, CostKind);
} }
@ -1031,7 +1042,7 @@ GCNTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
case Instruction::BitCast: case Instruction::BitCast:
case Instruction::AddrSpaceCast: { case Instruction::AddrSpaceCast: {
return getCastInstrCost(I->getOpcode(), I->getType(), return getCastInstrCost(I->getOpcode(), I->getType(),
I->getOperand(0)->getType(), I); I->getOperand(0)->getType(), CostKind, I);
} }
case Instruction::Add: case Instruction::Add:
case Instruction::FAdd: case Instruction::FAdd:
@ -1052,7 +1063,7 @@ GCNTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
case Instruction::Or: case Instruction::Or:
case Instruction::Xor: case Instruction::Xor:
case Instruction::FNeg: { case Instruction::FNeg: {
return getArithmeticInstrCost(I->getOpcode(), I->getType(), return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
TTI::OK_AnyValue, TTI::OK_AnyValue, TTI::OK_AnyValue, TTI::OK_AnyValue,
TTI::OP_None, TTI::OP_None, Operands, I); TTI::OP_None, TTI::OP_None, Operands, I);
} }
@ -1127,14 +1138,15 @@ unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 8; return 8;
} }
unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) { unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind) {
// XXX - For some reason this isn't called for switch. // XXX - For some reason this isn't called for switch.
switch (Opcode) { switch (Opcode) {
case Instruction::Br: case Instruction::Br:
case Instruction::Ret: case Instruction::Ret:
return 10; return 10;
default: default:
return BaseT::getCFInstrCost(Opcode); return BaseT::getCFInstrCost(Opcode, CostKind);
} }
} }

View File

@ -184,6 +184,7 @@ public:
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -191,7 +192,7 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(), ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr); const Instruction *CxtI = nullptr);
unsigned getCFInstrCost(unsigned Opcode); unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
bool isInlineAsmSourceOfDivergence(const CallInst *CI, bool isInlineAsmSourceOfDivergence(const CallInst *CI,
ArrayRef<unsigned> Indices = {}) const; ArrayRef<unsigned> Indices = {}) const;
@ -225,23 +226,30 @@ public:
int getInlinerVectorBonusPercent() { return 0; } int getInlinerVectorBonusPercent() { return 0; }
int getArithmeticReductionCost(unsigned Opcode, int getArithmeticReductionCost(
VectorType *Ty, unsigned Opcode,
bool IsPairwise); VectorType *Ty,
bool IsPairwise,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
template <typename T> template <typename T>
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<T *> Args, int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<T *> Args,
FastMathFlags FMF, unsigned VF, FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, int getIntrinsicInstrCost(
ArrayRef<Type *> Tys, FastMathFlags FMF, Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX, unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, const Instruction *I = nullptr);
ArrayRef<Value *> Args, FastMathFlags FMF, int getIntrinsicInstrCost(
unsigned VF = 1, const Instruction *I = nullptr); Intrinsic::ID IID, Type *RetTy, ArrayRef<Value *> Args, FastMathFlags FMF,
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, unsigned VF = 1, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
bool IsPairwiseForm, const Instruction *I = nullptr);
bool IsUnsigned); int getMinMaxReductionCost(
VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands, unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind); TTI::TargetCostKind CostKind);
}; };
@ -282,7 +290,7 @@ public:
unsigned Alignment, unsigned Alignment,
unsigned AddrSpace) const; unsigned AddrSpace) const;
unsigned getMaxInterleaveFactor(unsigned VF); unsigned getMaxInterleaveFactor(unsigned VF);
unsigned getCFInstrCost(unsigned Opcode); unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
}; };

View File

@ -80,7 +80,8 @@ bool ARMTTIImpl::shouldFavorPostInc() const {
return false; return false;
} }
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits(); unsigned Bits = Ty->getPrimitiveSizeInBits();
@ -123,7 +124,7 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
} }
int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) { Type *Ty, TTI::TargetCostKind CostKind) {
// Division by a constant can be turned into multiplication, but only if we // Division by a constant can be turned into multiplication, but only if we
// know it's constant. So it's not so much that the immediate is cheap (it's // know it's constant. So it's not so much that the immediate is cheap (it's
// not), but that the alternative is worse. // not), but that the alternative is worse.
@ -138,12 +139,14 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
if (Imm == 255 || Imm == 65535) if (Imm == 255 || Imm == 65535)
return 0; return 0;
// Conversion to BIC is free, and means we can use ~Imm instead. // Conversion to BIC is free, and means we can use ~Imm instead.
return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty)); return std::min(getIntImmCost(Imm, Ty, CostKind),
getIntImmCost(~Imm, Ty, CostKind));
} }
if (Opcode == Instruction::Add) if (Opcode == Instruction::Add)
// Conversion to SUB is free, and means we can use -Imm instead. // Conversion to SUB is free, and means we can use -Imm instead.
return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty)); return std::min(getIntImmCost(Imm, Ty, CostKind),
getIntImmCost(-Imm, Ty, CostKind));
if (Opcode == Instruction::ICmp && Imm.isNegative() && if (Opcode == Instruction::ICmp && Imm.isNegative() &&
Ty->getIntegerBitWidth() == 32) { Ty->getIntegerBitWidth() == 32) {
@ -160,10 +163,11 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
return 0; return 0;
return getIntImmCost(Imm, Ty); return getIntImmCost(Imm, Ty, CostKind);
} }
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode"); assert(ISD && "Invalid opcode");
@ -187,7 +191,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
EVT DstTy = TLI->getValueType(DL, Dst); EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple()) if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src); return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
// The extend of a load is free // The extend of a load is free
if (I && isa<LoadInst>(I->getOperand(0))) { if (I && isa<LoadInst>(I->getOperand(0))) {
@ -418,7 +422,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
? ST->getMVEVectorCostFactor() ? ST->getMVEVectorCostFactor()
: 1; : 1;
return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src); return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
} }
int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@ -458,6 +462,7 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
} }
int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a vector select gets lowered to vbsl. // On NEON a vector select gets lowered to vbsl.
@ -485,7 +490,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy() int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
? ST->getMVEVectorCostFactor() ? ST->getMVEVectorCostFactor()
: 1; : 1;
return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind,
I);
} }
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@ -681,6 +687,7 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
} }
int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd1PropInfo,
@ -737,7 +744,8 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
return LT.first * Entry->Cost; return LT.first * Entry->Cost;
int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
Op2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA // This is somewhat of a hack. The problem that we are facing is that SROA
@ -795,7 +803,8 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Else this is expand, assume that we need to scalarize this op. // Else this is expand, assume that we need to scalarize this op.
if (auto *VTy = dyn_cast<VectorType>(Ty)) { if (auto *VTy = dyn_cast<VectorType>(Ty)) {
unsigned Num = VTy->getNumElements(); unsigned Num = VTy->getNumElements();
unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType(),
CostKind);
// Return the cost of multiple scalar invocation plus the cost of // Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. // inserting and extracting the values.
return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost; return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost;
@ -806,6 +815,7 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace, MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
@ -824,8 +834,9 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
int ARMTTIImpl::getInterleavedMemoryOpCost( int ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, unsigned Alignment, unsigned AddressSpace,
bool UseMaskForGaps) { TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor"); assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type"); assert(isa<VectorType>(VecTy) && "Expect a vector type");
@ -856,18 +867,19 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
} }
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
} }
unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask, Value *Ptr, bool VariableMask,
unsigned Alignment, unsigned Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
using namespace PatternMatch; using namespace PatternMatch;
if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters) if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters)
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, I); Alignment, CostKind, I);
assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!"); assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
VectorType *VTy = cast<VectorType>(DataTy); VectorType *VTy = cast<VectorType>(DataTy);

View File

@ -110,9 +110,10 @@ public:
Type *Ty); Type *Ty);
using BaseT::getIntImmCost; using BaseT::getIntImmCost;
int getIntImmCost(const APInt &Imm, Type *Ty); int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
/// @} /// @}
@ -194,9 +195,11 @@ public:
} }
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@ -206,6 +209,7 @@ public:
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -214,17 +218,21 @@ public:
const Instruction *CxtI = nullptr); const Instruction *CxtI = nullptr);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment, ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false); bool UseMaskForGaps = false);
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, unsigned getGatherScatterOpCost(
bool VariableMask, unsigned Alignment, unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
const Instruction *I = nullptr); unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
bool isLoweredToCall(const Function *F); bool isLoweredToCall(const Function *F);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

View File

@ -127,28 +127,30 @@ unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
} }
unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type*> Tys) { ArrayRef<Type*> Tys, TTI::TargetCostKind CostKind) {
return BaseT::getCallInstrCost(F, RetTy, Tys); return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
} }
unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF, FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
} }
unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, ArrayRef<Type *> Tys,
FastMathFlags FMF, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
if (ID == Intrinsic::bswap) { if (ID == Intrinsic::bswap) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy); std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
return LT.first + 2; return LT.first + 2;
} }
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I); ScalarizationCostPassed, CostKind, I);
} }
unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
@ -159,10 +161,12 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, MaybeAlign Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
assert(Opcode == Instruction::Load || Opcode == Instruction::Store); assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
if (Opcode == Instruction::Store) if (Opcode == Instruction::Store)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
if (Src->isVectorTy()) { if (Src->isVectorTy()) {
VectorType *VecTy = cast<VectorType>(Src); VectorType *VecTy = cast<VectorType>(Src);
@ -200,12 +204,15 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return (3 - LogA) * Cost * NumLoads; return (3 - LogA) * Cost * NumLoads;
} }
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
} }
unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
Type *Src, unsigned Alignment, unsigned AddressSpace) { Type *Src, unsigned Alignment, unsigned AddressSpace,
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); TTI::TargetCostKind CostKind) {
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
} }
unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
@ -213,38 +220,41 @@ unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
return 1; return 1;
} }
unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, unsigned HexagonTTIImpl::getGatherScatterOpCost(
Value *Ptr, bool VariableMask, unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
unsigned Alignment, unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, I); Alignment, CostKind, I);
} }
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, unsigned Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, bool UseMaskForCond,
bool UseMaskForGaps) { bool UseMaskForGaps) {
if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps) if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace,
CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
nullptr); CostKind);
} }
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy, const Instruction *I) { Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) {
if (ValTy->isVectorTy()) { if (ValTy->isVectorTy()) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy); std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
if (Opcode == Instruction::FCmp) if (Opcode == Instruction::FCmp)
return LT.first + FloatFactor * getTypeNumElements(ValTy); return LT.first + FloatFactor * getTypeNumElements(ValTy);
} }
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
} }
unsigned HexagonTTIImpl::getArithmeticInstrCost( unsigned HexagonTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) { const Instruction *CxtI) {
@ -253,12 +263,12 @@ unsigned HexagonTTIImpl::getArithmeticInstrCost(
if (LT.second.isFloatingPoint()) if (LT.second.isFloatingPoint())
return LT.first + FloatFactor * getTypeNumElements(Ty); return LT.first + FloatFactor * getTypeNumElements(Ty);
} }
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI); Opd1PropInfo, Opd2PropInfo, Args, CxtI);
} }
unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
Type *SrcTy, const Instruction *I) { Type *SrcTy, TTI::TargetCostKind CostKind, const Instruction *I) {
if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) { if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;

View File

@ -105,34 +105,44 @@ public:
bool Insert, bool Extract); bool Insert, bool Extract);
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
unsigned VF); unsigned VF);
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys); unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys,
TTI::TargetCostKind CostKind);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I); unsigned VF,
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, TTI::TargetCostKind CostKind,
ArrayRef<Type *> Tys, FastMathFlags FMF, const Instruction *I);
unsigned ScalarizationCostPassed = UINT_MAX, unsigned getIntrinsicInstrCost(
const Instruction *I = nullptr); Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S); const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace); unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp); Type *SubTp);
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment, bool VariableMask, unsigned Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I); const Instruction *I);
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, bool UseMaskForCond = false, unsigned AddressSpace,
bool UseMaskForGaps = false); TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I); TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
unsigned getArithmeticInstrCost( unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -140,10 +150,11 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(), ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr); const Instruction *CxtI = nullptr);
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
unsigned getCFInstrCost(unsigned Opcode) { unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
return 1; return 1;
} }

View File

@ -49,7 +49,7 @@ public:
return TTI::PSK_Software; return TTI::PSK_Software;
} }
int getIntImmCost(const APInt &Imm, Type *Ty) { int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
if (Imm == 0) if (Imm == 0)
return TTI::TCC_Free; return TTI::TCC_Free;
@ -66,17 +66,19 @@ public:
return 4 * TTI::TCC_Basic; return 4 * TTI::TCC_Basic;
} }
int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) { int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
return getIntImmCost(Imm, Ty); TTI::TargetCostKind CostKind) {
return getIntImmCost(Imm, Ty, CostKind);
} }
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) { Type *Ty, TTI::TargetCostKind CostKind) {
return getIntImmCost(Imm, Ty); return getIntImmCost(Imm, Ty, CostKind);
} }
unsigned getArithmeticInstrCost( unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -87,7 +89,8 @@ public:
switch (ISD) { switch (ISD) {
default: default:
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
case ISD::MUL: case ISD::MUL:
case ISD::SDIV: case ISD::SDIV:
@ -98,7 +101,8 @@ public:
// instruction cost was arbitrarily chosen to reduce the desirability // instruction cost was arbitrarily chosen to reduce the desirability
// of emitting arithmetic instructions that are emulated in software. // of emitting arithmetic instructions that are emulated in software.
// TODO: Investigate the performance impact given specialized lowerings. // TODO: Investigate the performance impact given specialized lowerings.
return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
} }
} }

View File

@ -112,7 +112,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
} }
int NVPTXTTIImpl::getArithmeticInstrCost( int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) { const Instruction *CxtI) {
@ -123,7 +124,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
switch (ISD) { switch (ISD) {
default: default:
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
case ISD::ADD: case ISD::ADD:
case ISD::MUL: case ISD::MUL:
@ -136,7 +138,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
if (LT.second.SimpleTy == MVT::i64) if (LT.second.SimpleTy == MVT::i64)
return 2 * LT.first; return 2 * LT.first;
// Delegate other cases to the basic TTI. // Delegate other cases to the basic TTI.
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
} }
} }

View File

@ -87,6 +87,7 @@ public:
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,

View File

@ -59,9 +59,10 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software; return TTI::PSK_Software;
} }
int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist) if (DisablePPCConstHoist)
return BaseT::getIntImmCost(Imm, Ty); return BaseT::getIntImmCost(Imm, Ty, CostKind);
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
@ -89,9 +90,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
} }
int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist) if (DisablePPCConstHoist)
return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty); return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
@ -119,13 +121,14 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free; return TTI::TCC_Free;
break; break;
} }
return PPCTTIImpl::getIntImmCost(Imm, Ty); return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist) if (DisablePPCConstHoist)
return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty); return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
@ -203,7 +206,7 @@ int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return TTI::TCC_Free; return TTI::TCC_Free;
} }
return PPCTTIImpl::getIntImmCost(Imm, Ty); return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
unsigned unsigned
@ -720,6 +723,7 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
} }
int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd1PropInfo,
@ -729,7 +733,8 @@ int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation. // Fallback to the default implementation.
int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
Op2Info,
Opd1PropInfo, Opd2PropInfo); Opd1PropInfo, Opd2PropInfo);
return vectorCostAdjustment(Cost, Opcode, Ty, nullptr); return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
} }
@ -749,16 +754,18 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
} }
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src); int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
return vectorCostAdjustment(Cost, Opcode, Dst, Src); return vectorCostAdjustment(Cost, Opcode, Dst, Src);
} }
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr); return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
} }
@ -837,13 +844,15 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace, MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
// Legalize the type. // Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode"); "Invalid Opcode");
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr); Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
bool IsAltivecType = ST->hasAltivec() && bool IsAltivecType = ST->hasAltivec() &&
@ -913,11 +922,12 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForCond,
bool UseMaskForGaps) { bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps) if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) && assert(isa<VectorType>(VecTy) &&
@ -928,7 +938,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
// Firstly, the cost of load/store operation. // Firstly, the cost of load/store operation.
int Cost = int Cost =
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant // (at least in the sense that there need only be one non-loop-invariant
@ -943,19 +954,21 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF, FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
} }
unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, ArrayRef<Type *> Tys,
FastMathFlags FMF, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
if (ID == Intrinsic::bswap && ST->hasP9Vector()) if (ID == Intrinsic::bswap && ST->hasP9Vector())
return TLI->getTypeLegalizationCost(DL, RetTy).first; return TLI->getTypeLegalizationCost(DL, RetTy).first;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I); ScalarizationCostPassed, CostKind, I);
} }
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,

View File

@ -45,12 +45,13 @@ public:
/// @{ /// @{
using BaseT::getIntImmCost; using BaseT::getIntImmCost;
int getIntImmCost(const APInt &Imm, Type *Ty); int getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty); Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty); Type *Ty, TTI::TargetCostKind CostKind);
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands, unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind); TTI::TargetCostKind CostKind);
@ -91,6 +92,7 @@ public:
int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -99,25 +101,34 @@ public:
const Instruction *CxtI = nullptr); const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, unsigned Factor,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false); bool UseMaskForGaps = false);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned getIntrinsicInstrCost(
Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I = nullptr); unsigned VF,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX, unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr); const Instruction *I = nullptr);
/// @} /// @}

View File

@ -15,7 +15,8 @@ using namespace llvm;
#define DEBUG_TYPE "riscvtti" #define DEBUG_TYPE "riscvtti"
int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() && assert(Ty->isIntegerTy() &&
"getIntImmCost can only estimate cost of materialising integers"); "getIntImmCost can only estimate cost of materialising integers");
@ -30,7 +31,7 @@ int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
} }
int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) { Type *Ty, TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() && assert(Ty->isIntegerTy() &&
"getIntImmCost can only estimate cost of materialising integers"); "getIntImmCost can only estimate cost of materialising integers");
@ -78,7 +79,7 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &
} }
// Otherwise, use the full materialisation cost. // Otherwise, use the full materialisation cost.
return getIntImmCost(Imm, Ty); return getIntImmCost(Imm, Ty, CostKind);
} }
// By default, prevent hoisting. // By default, prevent hoisting.
@ -86,7 +87,8 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &
} }
int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
// Prevent hoisting in unknown cases. // Prevent hoisting in unknown cases.
return TTI::TCC_Free; return TTI::TCC_Free;
} }

View File

@ -41,12 +41,13 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {} TLI(ST->getTargetLowering()) {}
int getIntImmCost(const APInt &Imm, Type *Ty); int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty); Type *Ty, TTI::TargetCostKind CostKind);
}; };
} // end namespace llvm } // end namespace llvm
#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H #endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H

View File

@ -30,7 +30,8 @@ using namespace llvm;
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
} }
int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
break; break;
} }
return SystemZTTIImpl::getIntImmCost(Imm, Ty); return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free; return TTI::TCC_Free;
break; break;
} }
return SystemZTTIImpl::getIntImmCost(Imm, Ty); return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
TargetTransformInfo::PopcntSupportKind TargetTransformInfo::PopcntSupportKind
@ -258,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
} }
if (isa<StoreInst>(&I)) { if (isa<StoreInst>(&I)) {
Type *MemAccessTy = I.getOperand(0)->getType(); Type *MemAccessTy = I.getOperand(0)->getType();
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0); NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0,
TTI::TCK_RecipThroughput);
} }
} }
@ -365,7 +369,8 @@ static unsigned getNumVectorRegs(Type *Ty) {
} }
int SystemZTTIImpl::getArithmeticInstrCost( int SystemZTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) { const Instruction *CxtI) {
@ -494,7 +499,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// Return the cost of multiple scalar invocation plus the cost of // Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. // inserting and extracting the values.
unsigned ScalarCost = unsigned ScalarCost =
getArithmeticInstrCost(Opcode, Ty->getScalarType()); getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
// FIXME: VF 2 for these FP operations are currently just as // FIXME: VF 2 for these FP operations are currently just as
// expensive as for VF 4. // expensive as for VF 4.
@ -521,7 +526,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
} }
// Fallback to the default implementation. // Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI); Opd1PropInfo, Opd2PropInfo, Args, CxtI);
} }
@ -684,6 +689,7 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
} }
int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
unsigned DstScalarBits = Dst->getScalarSizeInBits(); unsigned DstScalarBits = Dst->getScalarSizeInBits();
unsigned SrcScalarBits = Src->getScalarSizeInBits(); unsigned SrcScalarBits = Src->getScalarSizeInBits();
@ -764,7 +770,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// inserting and extracting the values. Base implementation does not // inserting and extracting the values. Base implementation does not
// realize float->int gets scalarized. // realize float->int gets scalarized.
unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(), unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
Src->getScalarType()); Src->getScalarType(), CostKind);
unsigned TotCost = VF * ScalarCost; unsigned TotCost = VF * ScalarCost;
bool NeedsInserts = true, NeedsExtracts = true; bool NeedsInserts = true, NeedsExtracts = true;
// FP128 registers do not get inserted or extracted. // FP128 registers do not get inserted or extracted.
@ -804,7 +810,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
} }
} }
return BaseT::getCastInstrCost(Opcode, Dst, Src, I); return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
} }
// Scalar i8 / i16 operations will typically be made after first extending // Scalar i8 / i16 operations will typically be made after first extending
@ -820,7 +826,9 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
} }
int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy, const Instruction *I) { Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (!ValTy->isVectorTy()) { if (!ValTy->isVectorTy()) {
switch (Opcode) { switch (Opcode) {
case Instruction::ICmp: { case Instruction::ICmp: {
@ -895,7 +903,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
} }
} }
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
} }
int SystemZTTIImpl:: int SystemZTTIImpl::
@ -1009,6 +1017,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace, MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type"); assert(!Src->isVoidTy() && "Invalid type");
@ -1077,11 +1086,12 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForCond,
bool UseMaskForGaps) { bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps) if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) && assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op"); "Expect a vector type for interleaved memory op");
@ -1142,21 +1152,23 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF, FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
int Cost = getVectorIntrinsicInstrCost(ID, RetTy); int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
if (Cost != -1) if (Cost != -1)
return Cost; return Cost;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
} }
int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, ArrayRef<Type *> Tys,
FastMathFlags FMF, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
int Cost = getVectorIntrinsicInstrCost(ID, RetTy); int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
if (Cost != -1) if (Cost != -1)
return Cost; return Cost;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I); ScalarizationCostPassed, CostKind, I);
} }

View File

@ -38,11 +38,12 @@ public:
unsigned getInliningThresholdMultiplier() { return 3; } unsigned getInliningThresholdMultiplier() { return 3; }
int getIntImmCost(const APInt &Imm, Type *Ty); int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty); Type *Ty, TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
@ -75,6 +76,7 @@ public:
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -88,28 +90,35 @@ public:
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
const Instruction *I); const Instruction *I);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, unsigned Factor,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false); bool UseMaskForGaps = false);
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1, const Instruction *I = nullptr); unsigned VF = 1,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX, unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr); const Instruction *I = nullptr);
/// @} /// @}
}; };

View File

@ -44,13 +44,14 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
} }
unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) { const Instruction *CxtI) {
unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
if (auto *VTy = dyn_cast<VectorType>(Ty)) { if (auto *VTy = dyn_cast<VectorType>(Ty)) {
switch (Opcode) { switch (Opcode) {
@ -64,7 +65,7 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
Opd2Info != TTI::OK_UniformConstantValue) Opd2Info != TTI::OK_UniformConstantValue)
Cost = VTy->getNumElements() * Cost = VTy->getNumElements() *
(TargetTransformInfo::TCC_Basic + (TargetTransformInfo::TCC_Basic +
getArithmeticInstrCost(Opcode, VTy->getElementType()) + getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
TargetTransformInfo::TCC_Basic); TargetTransformInfo::TCC_Basic);
break; break;
} }

View File

@ -57,6 +57,7 @@ public:
unsigned getRegisterBitWidth(bool Vector) const; unsigned getRegisterBitWidth(bool Vector) const;
unsigned getArithmeticInstrCost( unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,

View File

@ -170,6 +170,7 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
} }
int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd1PropInfo,
@ -256,20 +257,25 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// The OperandValue properties may not be the same as that of the previous // The OperandValue properties may not be the same as that of the previous
// operation; conservatively assume OP_None. // operation; conservatively assume OP_None.
int Cost = int Cost =
2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info, 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info, Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
if (ISD == ISD::SREM) { if (ISD == ISD::SREM) {
// For SREM: (X % C) is the equivalent of (X - (X/C)*C) // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
Cost += getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info); Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Op1Info, Op2Info); Op2Info);
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
Op2Info);
} }
return Cost; return Cost;
@ -277,12 +283,14 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Vector unsigned division/remainder will be simplified to shifts/masks. // Vector unsigned division/remainder will be simplified to shifts/masks.
if (ISD == ISD::UDIV) if (ISD == ISD::UDIV)
return getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
Op1Info, Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
else // UREM else // UREM
return getArithmeticInstrCost(Instruction::And, Ty, Op1Info, Op2Info, return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
Op1Info, Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
} }
@ -596,7 +604,8 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
// On AVX512, a packed v32i16 shift left by a constant build_vector // On AVX512, a packed v32i16 shift left by a constant build_vector
// is lowered into a vector multiply (vpmullw). // is lowered into a vector multiply (vpmullw).
return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info, return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
Op1Info, Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
} }
@ -608,7 +617,8 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
// On AVX2, a packed v16i16 shift left by a constant build_vector // On AVX2, a packed v16i16 shift left by a constant build_vector
// is lowered into a vector multiply (vpmullw). // is lowered into a vector multiply (vpmullw).
return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info, return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
Op1Info, Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None); TargetTransformInfo::OP_None);
@ -916,13 +926,13 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM || if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM ||
ISD == ISD::UDIV || ISD == ISD::UREM)) { ISD == ISD::UDIV || ISD == ISD::UREM)) {
int ScalarCost = getArithmeticInstrCost( int ScalarCost = getArithmeticInstrCost(
Opcode, Ty->getScalarType(), Op1Info, Op2Info, Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost; return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
} }
// Fallback to the default implementation. // Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info);
} }
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
@ -1353,6 +1363,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
} }
int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode"); assert(ISD && "Invalid opcode");
@ -1966,7 +1977,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// The function getSimpleVT only handles simple value types. // The function getSimpleVT only handles simple value types.
if (!SrcTy.isSimple() || !DstTy.isSimple()) if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src); return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
MVT SimpleSrcTy = SrcTy.getSimpleVT(); MVT SimpleSrcTy = SrcTy.getSimpleVT();
MVT SimpleDstTy = DstTy.getSimpleVT(); MVT SimpleDstTy = DstTy.getSimpleVT();
@ -2027,10 +2038,11 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Entry->Cost; return Entry->Cost;
} }
return BaseT::getCastInstrCost(Opcode, Dst, Src, I); return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
} }
int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
// Legalize the type. // Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@ -2214,7 +2226,7 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * (ExtraCost + Entry->Cost); return LT.first * (ExtraCost + Entry->Cost);
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
} }
unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
@ -2222,6 +2234,7 @@ unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed, unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
// Costs should match the codegen from: // Costs should match the codegen from:
// BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
@ -2682,12 +2695,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
} }
return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF,
ScalarizationCostPassed, I); ScalarizationCostPassed, CostKind, I);
} }
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) { unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
static const CostTblEntry AVX512CostTbl[] = { static const CostTblEntry AVX512CostTbl[] = {
{ ISD::ROTL, MVT::v8i64, 1 }, { ISD::ROTL, MVT::v8i64, 1 },
{ ISD::ROTL, MVT::v4i64, 1 }, { ISD::ROTL, MVT::v4i64, 1 },
@ -2777,7 +2792,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
return LT.first * Entry->Cost; return LT.first * Entry->Cost;
} }
return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, I); return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, CostKind, I);
} }
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@ -2933,6 +2948,7 @@ unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty,
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace, MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) { const Instruction *I) {
// Handle non-power-of-two vectors such as <3 x float> // Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) { if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
@ -2953,7 +2969,7 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
if (!isPowerOf2_32(NumElem)) { if (!isPowerOf2_32(NumElem)) {
APInt DemandedElts = APInt::getAllOnesValue(NumElem); APInt DemandedElts = APInt::getAllOnesValue(NumElem);
int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment, int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
AddressSpace); AddressSpace, CostKind);
int SplitCost = getScalarizationOverhead(Src, DemandedElts, int SplitCost = getScalarizationOverhead(Src, DemandedElts,
Opcode == Instruction::Load, Opcode == Instruction::Load,
Opcode == Instruction::Store); Opcode == Instruction::Store);
@ -2979,14 +2995,16 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace) { unsigned AddressSpace,
TTI::TargetCostKind CostKind) {
bool IsLoad = (Instruction::Load == Opcode); bool IsLoad = (Instruction::Load == Opcode);
bool IsStore = (Instruction::Store == Opcode); bool IsStore = (Instruction::Store == Opcode);
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy); VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy) if (!SrcVTy)
// To calculate scalar take the regular cost, without mask // To calculate scalar take the regular cost, without mask
return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace); return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
unsigned NumElem = SrcVTy->getNumElements(); unsigned NumElem = SrcVTy->getNumElements();
VectorType *MaskTy = VectorType *MaskTy =
@ -2999,14 +3017,16 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
int MaskSplitCost = int MaskSplitCost =
getScalarizationOverhead(MaskTy, DemandedElts, false, true); getScalarizationOverhead(MaskTy, DemandedElts, false, true);
int ScalarCompareCost = getCmpSelInstrCost( int ScalarCompareCost = getCmpSelInstrCost(
Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr); Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
int BranchCost = getCFInstrCost(Instruction::Br); CostKind);
int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
int ValueSplitCost = int ValueSplitCost =
getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore); getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
int MemopCost = int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
MaybeAlign(Alignment), AddressSpace); MaybeAlign(Alignment), AddressSpace,
CostKind);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
} }
@ -3061,10 +3081,11 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
} }
int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
bool IsPairwise) { bool IsPairwise,
TTI::TargetCostKind CostKind) {
// Just use the default implementation for pair reductions. // Just use the default implementation for pair reductions.
if (IsPairwise) if (IsPairwise)
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise); return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise, CostKind);
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost. // and make it as the cost.
@ -3134,7 +3155,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
// Type needs to be split. We need LT.first - 1 arithmetic ops. // Type needs to be split. We need LT.first - 1 arithmetic ops.
VectorType *SingleOpTy = VectorType *SingleOpTy =
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy); ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
ArithmeticCost *= LT.first - 1; ArithmeticCost *= LT.first - 1;
} }
@ -3204,7 +3225,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
// Type needs to be split. We need LT.first - 1 arithmetic ops. // Type needs to be split. We need LT.first - 1 arithmetic ops.
Type *SingleOpTy = Type *SingleOpTy =
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy); ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
ArithmeticCost *= LT.first - 1; ArithmeticCost *= LT.first - 1;
} }
@ -3221,7 +3242,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy)) if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
return ArithmeticCost + Entry->Cost; return ArithmeticCost + Entry->Cost;
return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise); return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise,
CostKind);
} }
unsigned NumVecElts = ValVTy->getNumElements(); unsigned NumVecElts = ValVTy->getNumElements();
@ -3230,7 +3252,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
// Special case power of 2 reductions where the scalar type isn't changed // Special case power of 2 reductions where the scalar type isn't changed
// by type legalization. // by type legalization.
if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits()) if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits())
return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise); return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise,
CostKind);
unsigned ReductionCost = 0; unsigned ReductionCost = 0;
@ -3239,7 +3262,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
MTy.getVectorNumElements() < ValVTy->getNumElements()) { MTy.getVectorNumElements() < ValVTy->getNumElements()) {
// Type needs to be split. We need LT.first - 1 arithmetic ops. // Type needs to be split. We need LT.first - 1 arithmetic ops.
Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
ReductionCost = getArithmeticInstrCost(Opcode, Ty); ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind);
ReductionCost *= LT.first - 1; ReductionCost *= LT.first - 1;
NumVecElts = MTy.getVectorNumElements(); NumVecElts = MTy.getVectorNumElements();
} }
@ -3279,13 +3302,14 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
auto *ShiftTy = VectorType::get( auto *ShiftTy = VectorType::get(
Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size); Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
ReductionCost += getArithmeticInstrCost( ReductionCost += getArithmeticInstrCost(
Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, Instruction::LShr, ShiftTy, CostKind,
TargetTransformInfo::OK_AnyValue,
TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OK_UniformConstantValue,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
} }
// Add the arithmetic op for this level. // Add the arithmetic op for this level.
ReductionCost += getArithmeticInstrCost(Opcode, Ty); ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind);
} }
// Add the final extract element to the cost. // Add the final extract element to the cost.
@ -3409,16 +3433,19 @@ int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) {
CmpOpcode = Instruction::ICmp; CmpOpcode = Instruction::ICmp;
} }
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// Otherwise fall back to cmp+select. // Otherwise fall back to cmp+select.
return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr); getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind);
} }
int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
bool IsPairwise, bool IsUnsigned) { bool IsPairwise, bool IsUnsigned,
TTI::TargetCostKind CostKind) {
// Just use the default implementation for pair reductions. // Just use the default implementation for pair reductions.
if (IsPairwise) if (IsPairwise)
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@ -3534,7 +3561,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
// by type legalization. // by type legalization.
if (!isPowerOf2_32(ValVTy->getNumElements()) || if (!isPowerOf2_32(ValVTy->getNumElements()) ||
ScalarSize != MTy.getScalarSizeInBits()) ScalarSize != MTy.getScalarSizeInBits())
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
CostKind);
// Now handle reduction with the legal type, taking into account size changes // Now handle reduction with the legal type, taking into account size changes
// at each level. // at each level.
@ -3571,7 +3599,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
VectorType *ShiftTy = VectorType::get( VectorType *ShiftTy = VectorType::get(
Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
MinMaxCost += getArithmeticInstrCost( MinMaxCost += getArithmeticInstrCost(
Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput,
TargetTransformInfo::OK_AnyValue,
TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OK_UniformConstantValue,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
} }
@ -3599,7 +3628,8 @@ int X86TTIImpl::getIntImmCost(int64_t Val) {
return 2 * TTI::TCC_Basic; return 2 * TTI::TCC_Basic;
} }
int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -3634,7 +3664,7 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
} }
int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) { Type *Ty, TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -3721,17 +3751,18 @@ int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
if (Idx == ImmIdx) { if (Idx == ImmIdx) {
int NumConstants = divideCeil(BitSize, 64); int NumConstants = divideCeil(BitSize, 64);
int Cost = X86TTIImpl::getIntImmCost(Imm, Ty); int Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
return (Cost <= NumConstants * TTI::TCC_Basic) return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free) ? static_cast<int>(TTI::TCC_Free)
: Cost; : Cost;
} }
return X86TTIImpl::getIntImmCost(Imm, Ty); return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) { const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy()); assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits(); unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -3762,7 +3793,7 @@ int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free; return TTI::TCC_Free;
break; break;
} }
return X86TTIImpl::getIntImmCost(Imm, Ty); return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
} }
unsigned unsigned
@ -3842,7 +3873,8 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
? ST->getGatherOverhead() ? ST->getGatherOverhead()
: ST->getScatterOverhead(); : ST->getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
MaybeAlign(Alignment), AddressSpace); MaybeAlign(Alignment), AddressSpace,
TTI::TCK_RecipThroughput);
} }
/// Return the cost of full scalarization of gather / scatter operation. /// Return the cost of full scalarization of gather / scatter operation.
@ -3858,6 +3890,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
unsigned AddressSpace) { unsigned AddressSpace) {
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements(); unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
APInt DemandedElts = APInt::getAllOnesValue(VF); APInt DemandedElts = APInt::getAllOnesValue(VF);
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
int MaskUnpackCost = 0; int MaskUnpackCost = 0;
if (VariableMask) { if (VariableMask) {
@ -3867,14 +3900,15 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
getScalarizationOverhead(MaskTy, DemandedElts, false, true); getScalarizationOverhead(MaskTy, DemandedElts, false, true);
int ScalarCompareCost = int ScalarCompareCost =
getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()),
nullptr); nullptr, CostKind);
int BranchCost = getCFInstrCost(Instruction::Br); int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
} }
// The cost of the scalar loads/stores. // The cost of the scalar loads/stores.
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
MaybeAlign(Alignment), AddressSpace); MaybeAlign(Alignment), AddressSpace,
CostKind);
int InsertExtractCost = 0; int InsertExtractCost = 0;
if (Opcode == Instruction::Load) if (Opcode == Instruction::Load)
@ -3892,10 +3926,11 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
} }
/// Calculate the cost of Gather / Scatter operation /// Calculate the cost of Gather / Scatter operation
int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, int X86TTIImpl::getGatherScatterOpCost(
Value *Ptr, bool VariableMask, unsigned Opcode, Type *SrcVTy, Value *Ptr, bool VariableMask,
unsigned Alignment, unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) { const Instruction *I = nullptr) {
assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements(); unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
@ -4177,19 +4212,21 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForCond,
bool UseMaskForGaps) { bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps) if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
// We currently Support only fully-interleaved groups, with no gaps. // We currently Support only fully-interleaved groups, with no gaps.
// TODO: Support also strided loads (interleaved-groups with gaps). // TODO: Support also strided loads (interleaved-groups with gaps).
if (Indices.size() && Indices.size() != Factor) if (Indices.size() && Indices.size() != Factor)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace); Alignment, AddressSpace,
CostKind);
// VecTy for interleave memop is <VF*Factor x Elt>. // VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@ -4201,7 +4238,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
// (see MachineValueType.h::getVectorVT()). // (see MachineValueType.h::getVectorVT()).
if (!LegalVT.isVector()) if (!LegalVT.isVector())
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace); Alignment, AddressSpace,
CostKind);
unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor; unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
Type *ScalarTy = cast<VectorType>(VecTy)->getElementType(); Type *ScalarTy = cast<VectorType>(VecTy)->getElementType();
@ -4217,13 +4255,15 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
VectorType::get(cast<VectorType>(VecTy)->getElementType(), VectorType::get(cast<VectorType>(VecTy)->getElementType(),
LegalVT.getVectorNumElements()); LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace); MaybeAlign(Alignment), AddressSpace,
CostKind);
VectorType *VT = VectorType::get(ScalarTy, VF); VectorType *VT = VectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT); EVT ETy = TLI->getValueType(DL, VT);
if (!ETy.isSimple()) if (!ETy.isSimple())
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace); Alignment, AddressSpace,
CostKind);
// TODO: Complete for other data-types and strides. // TODO: Complete for other data-types and strides.
// Each combination of Stride, ElementTy and VF results in a different // Each combination of Stride, ElementTy and VF results in a different
@ -4282,7 +4322,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
} }
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace); Alignment, AddressSpace, CostKind);
} }
// Get estimation for interleaved load/store operations and strided load. // Get estimation for interleaved load/store operations and strided load.
@ -4294,12 +4334,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForCond,
bool UseMaskForGaps) { bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps) if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
// VecTy for interleave memop is <VF*Factor x Elt>. // VecTy for interleave memop is <VF*Factor x Elt>.
@ -4318,7 +4359,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
VectorType::get(cast<VectorType>(VecTy)->getElementType(), VectorType::get(cast<VectorType>(VecTy)->getElementType(),
LegalVT.getVectorNumElements()); LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace); MaybeAlign(Alignment), AddressSpace,
CostKind);
unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor; unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
@ -4421,6 +4463,7 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned Alignment,
unsigned AddressSpace, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForCond,
bool UseMaskForGaps) { bool UseMaskForGaps) {
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
@ -4434,14 +4477,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}; };
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
if (ST->hasAVX2()) if (ST->hasAVX2())
return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
} }

View File

@ -119,6 +119,7 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF); unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost( int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@ -128,66 +129,82 @@ public:
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp); VectorType *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
unsigned getScalarizationOverhead(Type *Ty, const APInt &DemandedElts, unsigned getScalarizationOverhead(Type *Ty, const APInt &DemandedElts,
bool Insert, bool Extract); bool Insert, bool Extract);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace); unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment, bool VariableMask, unsigned Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I); const Instruction *I);
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
const SCEV *Ptr); const SCEV *Ptr);
unsigned getAtomicMemIntrinsicMaxElementSize() const; unsigned getAtomicMemIntrinsicMaxElementSize() const;
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, int getIntrinsicInstrCost(
ArrayRef<Type *> Tys, FastMathFlags FMF, Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys,
unsigned ScalarizationCostPassed = UINT_MAX, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1, const Instruction *I = nullptr); unsigned VF = 1,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm); bool IsPairwiseForm,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned); int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned); bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, unsigned Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false); bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, unsigned Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false); bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace, unsigned Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForCond = false,
bool UseMaskForGaps = false); bool UseMaskForGaps = false);
int getIntImmCost(int64_t); int getIntImmCost(int64_t);
int getIntImmCost(const APInt &Imm, Type *Ty); int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands, unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind); TTI::TargetCostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty); Type *Ty, TTI::TargetCostKind CostKind);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2); TargetTransformInfo::LSRCost &C2);
bool canMacroFuseCmp(); bool canMacroFuseCmp();

View File

@ -363,10 +363,12 @@ void ConstantHoistingPass::collectConstantCandidates(
// instruction and operand index. // instruction and operand index.
if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst)) if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx, Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx,
ConstInt->getValue(), ConstInt->getType()); ConstInt->getValue(), ConstInt->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
else else
Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(), Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(),
ConstInt->getType()); ConstInt->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
// Ignore cheap integer constants. // Ignore cheap integer constants.
if (Cost > TargetTransformInfo::TCC_Basic) { if (Cost > TargetTransformInfo::TCC_Basic) {
@ -416,7 +418,8 @@ void ConstantHoistingPass::collectConstantCandidates(
// usually lowered to a load from constant pool. Such operation is unlikely // usually lowered to a load from constant pool. Such operation is unlikely
// to be cheaper than compute it by <Base + Offset>, which can be lowered to // to be cheaper than compute it by <Base + Offset>, which can be lowered to
// an ADD instruction or folded into Load/Store instruction. // an ADD instruction or folded into Load/Store instruction.
int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy); int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
TargetTransformInfo::TCK_SizeAndLatency);
ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
ConstCandMapType::iterator Itr; ConstCandMapType::iterator Itr;
bool Inserted; bool Inserted;
@ -582,7 +585,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
for (auto User : ConstCand->Uses) { for (auto User : ConstCand->Uses) {
unsigned Opcode = User.Inst->getOpcode(); unsigned Opcode = User.Inst->getOpcode();
unsigned OpndIdx = User.OpndIdx; unsigned OpndIdx = User.OpndIdx;
Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty); Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty,
TargetTransformInfo::TCK_SizeAndLatency);
LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n"); LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n");
for (auto C2 = S; C2 != E; ++C2) { for (auto C2 = S; C2 != E; ++C2) {

View File

@ -1990,7 +1990,9 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
"non noop cast is found during rematerialization"); "non noop cast is found during rematerialization");
Type *SrcTy = CI->getOperand(0)->getType(); Type *SrcTy = CI->getOperand(0)->getType();
Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI); Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
TargetTransformInfo::TCK_SizeAndLatency,
CI);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) { } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
// Cost of the address calculation // Cost of the address calculation

View File

@ -232,7 +232,8 @@ static bool isSafeAndProfitableToSpeculateAroundPHI(
continue; continue;
int &MatCost = InsertResult.first->second.MatCost; int &MatCost = InsertResult.first->second.MatCost;
MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType()); MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
NonFreeMat |= MatCost != TTI.TCC_Free; NonFreeMat |= MatCost != TTI.TCC_Free;
} }
if (!NonFreeMat) { if (!NonFreeMat) {
@ -283,12 +284,15 @@ static bool isSafeAndProfitableToSpeculateAroundPHI(
int MatCost = IncomingConstantAndCostsAndCount.second.MatCost; int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost; int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
if (IID) if (IID)
FoldedCost += TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(), FoldedCost +=
IncomingC->getType()); TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
IncomingC->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
else else
FoldedCost += FoldedCost +=
TTI.getIntImmCostInst(UserI->getOpcode(), Idx, TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
IncomingC->getValue(), IncomingC->getType()); IncomingC->getValue(), IncomingC->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
// If we accumulate more folded cost for this incoming constant than // If we accumulate more folded cost for this incoming constant than
// materialized cost, then we'll regress any edge with this constant so // materialized cost, then we'll regress any edge with this constant so

View File

@ -3277,7 +3277,8 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
// to be vectors, so we need to extract individual elements from there, // to be vectors, so we need to extract individual elements from there,
// execute VF scalar calls, and then gather the result into the vector return // execute VF scalar calls, and then gather the result into the vector return
// value. // value.
unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys); unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys,
TTI::TCK_RecipThroughput);
if (VF == 1) if (VF == 1)
return ScalarCallCost; return ScalarCallCost;
@ -3302,7 +3303,8 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
return Cost; return Cost;
// If the corresponding vector cost is cheaper, return its cost. // If the corresponding vector cost is cheaper, return its cost.
unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys,
TTI::TCK_RecipThroughput);
if (VectorCallCost < Cost) { if (VectorCallCost < Cost) {
NeedToScalarize = false; NeedToScalarize = false;
return VectorCallCost; return VectorCallCost;
@ -3320,7 +3322,9 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
FMF = FPMO->getFastMathFlags(); FMF = FPMO->getFastMathFlags();
SmallVector<Value *, 4> Operands(CI->arg_operands()); SmallVector<Value *, 4> Operands(CI->arg_operands());
return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF, CI); return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF,
TargetTransformInfo::TCK_RecipThroughput,
CI);
} }
static Type *smallestIntegerVectorType(Type *T1, Type *T2) { static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
@ -5832,7 +5836,8 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// vectorized loop where the user of it is a vectorized instruction. // vectorized loop where the user of it is a vectorized instruction.
const MaybeAlign Alignment = getLoadStoreAlignment(I); const MaybeAlign Alignment = getLoadStoreAlignment(I);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS); Alignment, AS,
TTI::TCK_RecipThroughput);
// Get the overhead of the extractelement and insertelement instructions // Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization. // we might create due to scalarization.
@ -5860,6 +5865,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
Value *Ptr = getLoadStorePointerOperand(I); Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I); unsigned AS = getLoadStoreAddressSpace(I);
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
"Stride should be 1 or -1 for consecutive memory access"); "Stride should be 1 or -1 for consecutive memory access");
@ -5867,9 +5873,11 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned Cost = 0; unsigned Cost = 0;
if (Legal->isMaskRequired(I)) if (Legal->isMaskRequired(I))
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
Alignment ? Alignment->value() : 0, AS); Alignment ? Alignment->value() : 0, AS,
CostKind);
else else
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
CostKind, I);
bool Reverse = ConsecutiveStride < 0; bool Reverse = ConsecutiveStride < 0;
if (Reverse) if (Reverse)
@ -5883,16 +5891,19 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF)); auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
const MaybeAlign Alignment = getLoadStoreAlignment(I); const MaybeAlign Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I); unsigned AS = getLoadStoreAddressSpace(I);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
if (isa<LoadInst>(I)) { if (isa<LoadInst>(I)) {
return TTI.getAddressComputationCost(ValTy) + return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS,
CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
} }
StoreInst *SI = cast<StoreInst>(I); StoreInst *SI = cast<StoreInst>(I);
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) + return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
(isLoopInvariantStoreValue (isLoopInvariantStoreValue
? 0 ? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
@ -5909,7 +5920,9 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
return TTI.getAddressComputationCost(VectorTy) + return TTI.getAddressComputationCost(VectorTy) +
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
Legal->isMaskRequired(I), Legal->isMaskRequired(I),
Alignment ? Alignment->value() : 0, I); Alignment ? Alignment->value() : 0,
TargetTransformInfo::TCK_RecipThroughput,
I);
} }
unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
@ -5938,7 +5951,8 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
unsigned Cost = TTI.getInterleavedMemoryOpCost( unsigned Cost = TTI.getInterleavedMemoryOpCost(
I->getOpcode(), WideVecTy, Group->getFactor(), Indices, I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
Group->getAlign().value(), AS, Legal->isMaskRequired(I), UseMaskForGaps); Group->getAlign().value(), AS, TTI::TCK_RecipThroughput,
Legal->isMaskRequired(I), UseMaskForGaps);
if (Group->isReverse()) { if (Group->isReverse()) {
// TODO: Add support for reversed masked interleaved access. // TODO: Add support for reversed masked interleaved access.
@ -5960,7 +5974,8 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I); unsigned AS = getLoadStoreAddressSpace(I);
return TTI.getAddressComputationCost(ValTy) + return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS,
TTI::TCK_RecipThroughput, I);
} }
return getWideningCost(I, VF); return getWideningCost(I, VF);
} }
@ -6182,6 +6197,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
auto SE = PSE.getSE(); auto SE = PSE.getSE();
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// TODO: We need to estimate the cost of intrinsic calls. // TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) { switch (I->getOpcode()) {
@ -6238,7 +6254,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return (Phi->getNumIncomingValues() - 1) * return (Phi->getNumIncomingValues() - 1) *
TTI.getCmpSelInstrCost( TTI.getCmpSelInstrCost(
Instruction::Select, ToVectorTy(Phi->getType(), VF), Instruction::Select, ToVectorTy(Phi->getType(), VF),
ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF),
CostKind);
return TTI.getCFInstrCost(Instruction::PHI); return TTI.getCFInstrCost(Instruction::PHI);
} }
@ -6260,7 +6277,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Cost += VF * TTI.getCFInstrCost(Instruction::PHI); Cost += VF * TTI.getCFInstrCost(Instruction::PHI);
// The cost of the non-predicated instruction. // The cost of the non-predicated instruction.
Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy); Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind);
// The cost of insertelement and extractelement instructions needed for // The cost of insertelement and extractelement instructions needed for
// scalarization. // scalarization.
@ -6301,13 +6318,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
SmallVector<const Value *, 4> Operands(I->operand_values()); SmallVector<const Value *, 4> Operands(I->operand_values());
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
return N * TTI.getArithmeticInstrCost( return N * TTI.getArithmeticInstrCost(
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, I->getOpcode(), VectorTy, CostKind,
TargetTransformInfo::OK_AnyValue,
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
} }
case Instruction::FNeg: { case Instruction::FNeg: {
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
return N * TTI.getArithmeticInstrCost( return N * TTI.getArithmeticInstrCost(
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, I->getOpcode(), VectorTy, CostKind,
TargetTransformInfo::OK_AnyValue,
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_AnyValue,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
I->getOperand(0), I); I->getOperand(0), I);
@ -6320,7 +6339,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (!ScalarCond) if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF); CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
CostKind, I);
} }
case Instruction::ICmp: case Instruction::ICmp:
case Instruction::FCmp: { case Instruction::FCmp: {
@ -6329,7 +6349,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
VectorTy = ToVectorTy(ValTy, VF); VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind,
I);
} }
case Instruction::Store: case Instruction::Store:
case Instruction::Load: { case Instruction::Load: {
@ -6362,7 +6383,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (isOptimizableIVTruncate(I, VF)) { if (isOptimizableIVTruncate(I, VF)) {
auto *Trunc = cast<TruncInst>(I); auto *Trunc = cast<TruncInst>(I);
return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(), return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(),
Trunc->getSrcTy(), Trunc); Trunc->getSrcTy(), CostKind, Trunc);
} }
Type *SrcScalarTy = I->getOperand(0)->getType(); Type *SrcScalarTy = I->getOperand(0)->getType();
@ -6388,7 +6409,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
} }
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy,
CostKind, I);
} }
case Instruction::Call: { case Instruction::Call: {
bool NeedToScalarize; bool NeedToScalarize;
@ -6401,7 +6423,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
default: default:
// The cost of executing VF copies of the scalar instruction. This opcode // The cost of executing VF copies of the scalar instruction. This opcode
// is unknown. Assume that it is the same as 'mul'. // is unknown. Assume that it is the same as 'mul'.
return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) + return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
CostKind) +
getScalarizationOverhead(I, VF); getScalarizationOverhead(I, VF);
} // end of switch. } // end of switch.
} }

View File

@ -3259,7 +3259,8 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
VectorType::get(Arg->getType(), VecTy->getNumElements())); VectorType::get(Arg->getType(), VecTy->getNumElements()));
// If the corresponding vector call is cheaper, return its cost. // If the corresponding vector call is cheaper, return its cost.
LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys); LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
TTI::TCK_RecipThroughput);
} }
return {IntrinsicCost, LibCost}; return {IntrinsicCost, LibCost};
} }
@ -3273,6 +3274,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0])) else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
ScalarTy = CI->getOperand(0)->getType(); ScalarTy = CI->getOperand(0)->getType();
VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// If we have computed a smaller type for the expression, update VecTy so // If we have computed a smaller type for the expression, update VecTy so
// that the costs will be accurate. // that the costs will be accurate.
@ -3380,7 +3382,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
Ext->getOpcode(), Ext->getType(), VecTy, i); Ext->getOpcode(), Ext->getType(), VecTy, i);
// Add back the cost of s|zext which is subtracted separately. // Add back the cost of s|zext which is subtracted separately.
DeadCost += TTI->getCastInstrCost( DeadCost += TTI->getCastInstrCost(
Ext->getOpcode(), Ext->getType(), E->getType(), Ext); Ext->getOpcode(), Ext->getType(), E->getType(), CostKind,
Ext);
continue; continue;
} }
} }
@ -3404,7 +3407,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::BitCast: { case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType(); Type *SrcTy = VL0->getOperand(0)->getType();
int ScalarEltCost = int ScalarEltCost =
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0); TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, CostKind,
VL0);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
} }
@ -3417,7 +3421,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Check if the values are candidates to demote. // Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
VecCost = ReuseShuffleCost + VecCost = ReuseShuffleCost +
TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0); TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy,
CostKind, VL0);
} }
return VecCost - ScalarCost; return VecCost - ScalarCost;
} }
@ -3426,13 +3431,15 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::Select: { case Instruction::Select: {
// Calculate the cost of this instruction. // Calculate the cost of this instruction.
int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
Builder.getInt1Ty(), VL0); Builder.getInt1Ty(),
CostKind, VL0);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
} }
VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0); int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CostKind, VL0);
return ReuseShuffleCost + VecCost - ScalarCost; return ReuseShuffleCost + VecCost - ScalarCost;
} }
case Instruction::FNeg: case Instruction::FNeg:
@ -3493,13 +3500,15 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
SmallVector<const Value *, 4> Operands(VL0->operand_values()); SmallVector<const Value *, 4> Operands(VL0->operand_values());
int ScalarEltCost = TTI->getArithmeticInstrCost( int ScalarEltCost = TTI->getArithmeticInstrCost(
E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
Operands, VL0);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
} }
int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getArithmeticInstrCost( int VecCost = TTI->getArithmeticInstrCost(
E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
Operands, VL0);
return ReuseShuffleCost + VecCost - ScalarCost; return ReuseShuffleCost + VecCost - ScalarCost;
} }
case Instruction::GetElementPtr: { case Instruction::GetElementPtr: {
@ -3509,26 +3518,30 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TargetTransformInfo::OK_UniformConstantValue; TargetTransformInfo::OK_UniformConstantValue;
int ScalarEltCost = int ScalarEltCost =
TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind,
Op1VK, Op2VK);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
} }
int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = int VecCost =
TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind,
Op1VK, Op2VK);
return ReuseShuffleCost + VecCost - ScalarCost; return ReuseShuffleCost + VecCost - ScalarCost;
} }
case Instruction::Load: { case Instruction::Load: {
// Cost of wide load - cost of scalar loads. // Cost of wide load - cost of scalar loads.
MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment()); MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment());
int ScalarEltCost = int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0); TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0,
CostKind, VL0);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
} }
int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
int VecLdCost = int VecLdCost =
TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, VL0); TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0,
CostKind, VL0);
if (!E->ReorderIndices.empty()) { if (!E->ReorderIndices.empty()) {
// TODO: Merge this shuffle with the ReuseShuffleCost. // TODO: Merge this shuffle with the ReuseShuffleCost.
VecLdCost += TTI->getShuffleCost( VecLdCost += TTI->getShuffleCost(
@ -3543,12 +3556,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0); cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
MaybeAlign Alignment(SI->getAlignment()); MaybeAlign Alignment(SI->getAlignment());
int ScalarEltCost = int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, VL0); TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
CostKind, VL0);
if (NeedToShuffleReuses) if (NeedToShuffleReuses)
ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost; ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
int ScalarStCost = VecTy->getNumElements() * ScalarEltCost; int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
int VecStCost = TTI->getMemoryOpCost(Instruction::Store, int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
VecTy, Alignment, 0, VL0); VecTy, Alignment, 0, CostKind, VL0);
if (IsReorder) { if (IsReorder) {
// TODO: Merge this shuffle with the ReuseShuffleCost. // TODO: Merge this shuffle with the ReuseShuffleCost.
VecStCost += TTI->getShuffleCost( VecStCost += TTI->getShuffleCost(
@ -3570,7 +3584,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
FMF = FPMO->getFastMathFlags(); FMF = FPMO->getFastMathFlags();
int ScalarEltCost = int ScalarEltCost =
TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF, 1, CostKind);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
} }
@ -3596,34 +3610,34 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
for (unsigned Idx : E->ReuseShuffleIndices) { for (unsigned Idx : E->ReuseShuffleIndices) {
Instruction *I = cast<Instruction>(VL[Idx]); Instruction *I = cast<Instruction>(VL[Idx]);
ReuseShuffleCost -= TTI->getInstructionCost( ReuseShuffleCost -= TTI->getInstructionCost(I, CostKind);
I, TargetTransformInfo::TCK_RecipThroughput);
} }
for (Value *V : VL) { for (Value *V : VL) {
Instruction *I = cast<Instruction>(V); Instruction *I = cast<Instruction>(V);
ReuseShuffleCost += TTI->getInstructionCost( ReuseShuffleCost += TTI->getInstructionCost(I, CostKind);
I, TargetTransformInfo::TCK_RecipThroughput);
} }
} }
for (Value *V : VL) { for (Value *V : VL) {
Instruction *I = cast<Instruction>(V); Instruction *I = cast<Instruction>(V);
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
ScalarCost += TTI->getInstructionCost( ScalarCost += TTI->getInstructionCost(I, CostKind);
I, TargetTransformInfo::TCK_RecipThroughput);
} }
// VecCost is equal to sum of the cost of creating 2 vectors // VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle. // and the cost of creating shuffle.
int VecCost = 0; int VecCost = 0;
if (Instruction::isBinaryOp(E->getOpcode())) { if (Instruction::isBinaryOp(E->getOpcode())) {
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy); VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy); VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
} else { } else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size()); VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size()); VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty); VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty); CostKind);
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
CostKind);
} }
VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
return ReuseShuffleCost + VecCost - ScalarCost; return ReuseShuffleCost + VecCost - ScalarCost;