1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[TTI] NFC: Change get[Interleaved]MemoryOpCost to return InstructionCost

This patch migrates the TTI cost interfaces to return an InstructionCost.

See this patch for the introduction of the type: https://reviews.llvm.org/D91174
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D100205
This commit is contained in:
Sander de Smalen 2021-01-23 12:14:21 +00:00
parent 32a70b87c0
commit 7016294be8
16 changed files with 141 additions and 133 deletions

View File

@ -1129,10 +1129,11 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
InstructionCost getMaskedMemoryOpCost(
@ -1163,7 +1164,7 @@ public:
/// \p AddressSpace is address space of the pointer.
/// \p UseMaskForCond indicates if the memory access is predicated.
/// \p UseMaskForGaps indicates if gaps should be masked.
int getInterleavedMemoryOpCost(
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
@ -1590,10 +1591,11 @@ public:
const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual InstructionCost
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
@ -1604,7 +1606,7 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) = 0;
virtual int getInterleavedMemoryOpCost(
virtual InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
@ -2070,9 +2072,10 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace, TTI::TargetCostKind CostKind,
const Instruction *I) override {
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
}
@ -2090,12 +2093,10 @@ public:
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond,
bool UseMaskForGaps) override {
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);

View File

@ -537,9 +537,10 @@ public:
return 1;
}
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace, TTI::TargetCostKind CostKind,
const Instruction *I) const {
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) const {
return 1;
}

View File

@ -982,10 +982,10 @@ public:
return LT.first;
}
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
assert(!Src->isVoidTy() && "Invalid type");
// Assume types, such as structs, are expensive.
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
@ -993,7 +993,7 @@ public:
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
// Assuming that all loads of legal types cost 1.
unsigned Cost = LT.first;
InstructionCost Cost = LT.first;
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
@ -1036,7 +1036,7 @@ public:
//
// First, compute the cost of extracting the individual addresses and the
// individual memory operations.
int LoadCost =
InstructionCost LoadCost =
VT->getNumElements() *
(getVectorInstrCost(
Instruction::ExtractElement,
@ -1071,7 +1071,7 @@ public:
return LoadCost + PackingCost + ConditionalCost;
}
unsigned getInterleavedMemoryOpCost(
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) {
@ -1186,7 +1186,7 @@ public:
}
if (!UseMaskForCond)
return *Cost.getValue();
return Cost;
Type *I8Type = Type::getInt8Ty(VT->getContext());
auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
@ -1219,7 +1219,7 @@ public:
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
CostKind);
return *Cost.getValue();
return Cost;
}
/// Get intrinsic cost based on arguments.

View File

@ -815,14 +815,13 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
return Cost;
}
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) const {
InstructionCost TargetTransformInfo::getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction.");
int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
InstructionCost Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment,
AddressSpace, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@ -845,11 +844,11 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost(
return Cost;
}
int TargetTransformInfo::getInterleavedMemoryOpCost(
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) const {
int Cost = TTIImpl->getInterleavedMemoryOpCost(
InstructionCost Cost = TTIImpl->getInterleavedMemoryOpCost(
Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
assert(Cost >= 0 && "TTI should not produce negative costs!");

View File

@ -931,7 +931,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
Optional<unsigned> MaxNumVScale = getMaxVScale();
assert(MaxNumVScale && "Expected valid max vscale value");
unsigned MemOpCost =
InstructionCost MemOpCost =
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
unsigned MaxNumElementsPerGather =
MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
@ -942,10 +942,11 @@ bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
}
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return 1;
@ -991,7 +992,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
return LT.first;
}
int AArch64TTIImpl::getInterleavedMemoryOpCost(
InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
@ -1018,7 +1019,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(
}
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
int Cost = 0;
InstructionCost Cost = 0;
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
for (auto *I : Tys) {
if (!I->isVectorTy())
@ -1028,7 +1029,7 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
}
return Cost;
return *Cost.getValue();
}
unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {

View File

@ -176,10 +176,10 @@ public:
bool IsZeroCmp) const;
bool useNeonVector(const Type *Ty) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
@ -258,7 +258,7 @@ public:
return BaseT::isLegalNTStore(DataType, Alignment);
}
int getInterleavedMemoryOpCost(
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,

View File

@ -1382,10 +1382,11 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return BaseCost;
}
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return 1;
@ -1445,7 +1446,7 @@ ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
return cast<FixedVectorType>(Src)->getNumElements() * 8;
}
int ARMTTIImpl::getInterleavedMemoryOpCost(
InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {

View File

@ -226,16 +226,16 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
int getInterleavedMemoryOpCost(
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,

View File

@ -154,11 +154,11 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
return 0;
}
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
@ -231,7 +231,7 @@ InstructionCost HexagonTTIImpl::getGatherScatterOpCost(
Alignment, CostKind, I);
}
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(
InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {

View File

@ -114,10 +114,10 @@ public:
TTI::TargetCostKind CostKind);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
@ -129,7 +129,7 @@ public:
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I);
unsigned getInterleavedMemoryOpCost(
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,

View File

@ -1105,10 +1105,11 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost;
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (TLI->getValueType(DL, Src, true) == MVT::Other)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
@ -1117,8 +1118,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
InstructionCost Cost =
BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
@ -1185,7 +1186,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return Cost;
}
int PPCTTIImpl::getInterleavedMemoryOpCost(
InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
@ -1201,9 +1202,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
// Firstly, the cost of load/store operation.
int Cost =
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
AddressSpace, CostKind);
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant

View File

@ -121,11 +121,11 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,

View File

@ -246,7 +246,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
bool HasCall = false;
unsigned NumStores = 0;
InstructionCost NumStores = 0;
for (auto &BB : L->blocks())
for (auto &I : *BB) {
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
@ -270,7 +270,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// The z13 processor will run out of store tags if too many stores
// are fed into it too quickly. Therefore make sure there are not
// too many stores in the resulting unrolled loop.
unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
unsigned const NumStoresVal = *NumStores.getValue();
unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
if (HasCall) {
// Only allow full unrolling if loop has any calls.
@ -1059,10 +1060,11 @@ static bool isBswapIntrinsicCall(const Value *V) {
return false;
}
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
// TODO: Handle other cost kinds.
@ -1129,7 +1131,7 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// needed for using / defining the vector operands. The SystemZ version does
// roughly the same but bases the computations on vector permutations
// instead.
int SystemZTTIImpl::getInterleavedMemoryOpCost(
InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {

View File

@ -103,11 +103,12 @@ public:
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,

View File

@ -3192,10 +3192,11 @@ unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
return Cost;
}
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput) {
if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
@ -3227,8 +3228,8 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Assume that all other non-power-of-two numbers are scalarized.
if (!isPowerOf2_32(NumElem)) {
APInt DemandedElts = APInt::getAllOnesValue(NumElem);
int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
AddressSpace, CostKind);
InstructionCost Cost = BaseT::getMemoryOpCost(
Opcode, VTy->getScalarType(), Alignment, AddressSpace, CostKind);
int SplitCost = getScalarizationOverhead(VTy, DemandedElts,
Opcode == Instruction::Load,
Opcode == Instruction::Store);
@ -3286,7 +3287,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
InstructionCost ValueSplitCost =
getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
int MemopCost =
InstructionCost MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace, CostKind);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
@ -4116,8 +4117,9 @@ int X86TTIImpl::getScatterOverhead() const {
// Return an average cost of Gather / Scatter instruction, maybe improved later.
// FIXME: Add TargetCostKind support.
int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr,
Align Alignment, unsigned AddressSpace) {
InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
const Value *Ptr, Align Alignment,
unsigned AddressSpace) {
assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
@ -4210,9 +4212,9 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
}
// The cost of the scalar loads/stores.
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
MaybeAlign(Alignment), AddressSpace,
CostKind);
InstructionCost MemoryOpCost =
VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
MaybeAlign(Alignment), AddressSpace, CostKind);
int InsertExtractCost = 0;
if (Opcode == Instruction::Load)
@ -4520,7 +4522,7 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
// computing the cost using a generic formula as a function of generic
// shuffles. We therefore use a lookup table instead, filled according to
// the instruction sequences that codegen currently generates.
int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
@ -4562,9 +4564,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
// Get the cost of one memory operation.
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace,
CostKind);
InstructionCost MemOpCost = getMemoryOpCost(
Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
auto *VT = FixedVectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
@ -4637,7 +4638,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
// \p Indices contains indices for strided load.
// \p Factor - the factor of interleaving.
// AVX-512 provides 3-src shuffles that significantly reduces the cost.
int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
@ -4661,9 +4662,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// Get the cost of one memory operation.
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace,
CostKind);
InstructionCost MemOpCost = getMemoryOpCost(
Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
@ -4719,8 +4719,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
int Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
return Cost;
}
@ -4755,12 +4755,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// The SK_MergeTwoSrc shuffle clobbers one of src operands.
// We need additional instructions to keep sources.
unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
int Cost = NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
NumOfMoves;
InstructionCost Cost =
NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
NumOfMoves;
return Cost;
}
int X86TTIImpl::getInterleavedMemoryOpCost(
InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {

View File

@ -140,10 +140,10 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
bool Insert, bool Extract);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
@ -186,17 +186,17 @@ public:
bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind);
int getInterleavedMemoryOpCost(
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX512(
InstructionCost getInterleavedMemoryOpCostAVX512(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX2(
InstructionCost getInterleavedMemoryOpCostAVX2(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
@ -240,8 +240,9 @@ private:
InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
bool VariableMask, Align Alignment,
unsigned AddressSpace);
int getGSVectorCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
Align Alignment, unsigned AddressSpace);
InstructionCost getGSVectorCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, Align Alignment,
unsigned AddressSpace);
int getGatherOverhead() const;
int getScatterOverhead() const;