mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[TTI] NFC: Change get[Interleaved]MemoryOpCost to return InstructionCost
This patch migrates the TTI cost interfaces to return an InstructionCost. See this patch for the introduction of the type: https://reviews.llvm.org/D91174 See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D100205
This commit is contained in:
parent
32a70b87c0
commit
7016294be8
@ -1129,10 +1129,11 @@ public:
|
|||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
|
||||||
|
|
||||||
/// \return The cost of Load and Store instructions.
|
/// \return The cost of Load and Store instructions.
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
InstructionCost
|
||||||
unsigned AddressSpace,
|
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
unsigned AddressSpace,
|
||||||
const Instruction *I = nullptr) const;
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
||||||
|
const Instruction *I = nullptr) const;
|
||||||
|
|
||||||
/// \return The cost of masked Load and Store instructions.
|
/// \return The cost of masked Load and Store instructions.
|
||||||
InstructionCost getMaskedMemoryOpCost(
|
InstructionCost getMaskedMemoryOpCost(
|
||||||
@ -1163,7 +1164,7 @@ public:
|
|||||||
/// \p AddressSpace is address space of the pointer.
|
/// \p AddressSpace is address space of the pointer.
|
||||||
/// \p UseMaskForCond indicates if the memory access is predicated.
|
/// \p UseMaskForCond indicates if the memory access is predicated.
|
||||||
/// \p UseMaskForGaps indicates if gaps should be masked.
|
/// \p UseMaskForGaps indicates if gaps should be masked.
|
||||||
int getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
||||||
@ -1590,10 +1591,11 @@ public:
|
|||||||
const Instruction *I) = 0;
|
const Instruction *I) = 0;
|
||||||
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
|
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||||
unsigned Index) = 0;
|
unsigned Index) = 0;
|
||||||
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace,
|
Align Alignment,
|
||||||
TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) = 0;
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) = 0;
|
||||||
virtual InstructionCost
|
virtual InstructionCost
|
||||||
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||||
unsigned AddressSpace,
|
unsigned AddressSpace,
|
||||||
@ -1604,7 +1606,7 @@ public:
|
|||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr) = 0;
|
const Instruction *I = nullptr) = 0;
|
||||||
|
|
||||||
virtual int getInterleavedMemoryOpCost(
|
virtual InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
|
bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
|
||||||
@ -2070,9 +2072,10 @@ public:
|
|||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
|
||||||
return Impl.getVectorInstrCost(Opcode, Val, Index);
|
return Impl.getVectorInstrCost(Opcode, Val, Index);
|
||||||
}
|
}
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||||
unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) override {
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) override {
|
||||||
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
||||||
CostKind, I);
|
CostKind, I);
|
||||||
}
|
}
|
||||||
@ -2090,12 +2093,10 @@ public:
|
|||||||
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
|
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
|
||||||
Alignment, CostKind, I);
|
Alignment, CostKind, I);
|
||||||
}
|
}
|
||||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
ArrayRef<unsigned> Indices, Align Alignment,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
TTI::TargetCostKind CostKind,
|
bool UseMaskForCond, bool UseMaskForGaps) override {
|
||||||
bool UseMaskForCond,
|
|
||||||
bool UseMaskForGaps) override {
|
|
||||||
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||||
Alignment, AddressSpace, CostKind,
|
Alignment, AddressSpace, CostKind,
|
||||||
UseMaskForCond, UseMaskForGaps);
|
UseMaskForCond, UseMaskForGaps);
|
||||||
|
@ -537,9 +537,10 @@ public:
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||||
unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) const {
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) const {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -982,10 +982,10 @@ public:
|
|||||||
return LT.first;
|
return LT.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr) {
|
const Instruction *I = nullptr) {
|
||||||
assert(!Src->isVoidTy() && "Invalid type");
|
assert(!Src->isVoidTy() && "Invalid type");
|
||||||
// Assume types, such as structs, are expensive.
|
// Assume types, such as structs, are expensive.
|
||||||
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
|
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
|
||||||
@ -993,7 +993,7 @@ public:
|
|||||||
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
|
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
|
||||||
|
|
||||||
// Assuming that all loads of legal types cost 1.
|
// Assuming that all loads of legal types cost 1.
|
||||||
unsigned Cost = LT.first;
|
InstructionCost Cost = LT.first;
|
||||||
if (CostKind != TTI::TCK_RecipThroughput)
|
if (CostKind != TTI::TCK_RecipThroughput)
|
||||||
return Cost;
|
return Cost;
|
||||||
|
|
||||||
@ -1036,7 +1036,7 @@ public:
|
|||||||
//
|
//
|
||||||
// First, compute the cost of extracting the individual addresses and the
|
// First, compute the cost of extracting the individual addresses and the
|
||||||
// individual memory operations.
|
// individual memory operations.
|
||||||
int LoadCost =
|
InstructionCost LoadCost =
|
||||||
VT->getNumElements() *
|
VT->getNumElements() *
|
||||||
(getVectorInstrCost(
|
(getVectorInstrCost(
|
||||||
Instruction::ExtractElement,
|
Instruction::ExtractElement,
|
||||||
@ -1071,7 +1071,7 @@ public:
|
|||||||
return LoadCost + PackingCost + ConditionalCost;
|
return LoadCost + PackingCost + ConditionalCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond = false, bool UseMaskForGaps = false) {
|
bool UseMaskForCond = false, bool UseMaskForGaps = false) {
|
||||||
@ -1186,7 +1186,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!UseMaskForCond)
|
if (!UseMaskForCond)
|
||||||
return *Cost.getValue();
|
return Cost;
|
||||||
|
|
||||||
Type *I8Type = Type::getInt8Ty(VT->getContext());
|
Type *I8Type = Type::getInt8Ty(VT->getContext());
|
||||||
auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
|
auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
|
||||||
@ -1219,7 +1219,7 @@ public:
|
|||||||
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
|
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
|
||||||
CostKind);
|
CostKind);
|
||||||
|
|
||||||
return *Cost.getValue();
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get intrinsic cost based on arguments.
|
/// Get intrinsic cost based on arguments.
|
||||||
|
@ -815,14 +815,13 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
|
|||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
|
InstructionCost TargetTransformInfo::getMemoryOpCost(
|
||||||
Align Alignment, unsigned AddressSpace,
|
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind, const Instruction *I) const {
|
||||||
const Instruction *I) const {
|
|
||||||
assert((I == nullptr || I->getOpcode() == Opcode) &&
|
assert((I == nullptr || I->getOpcode() == Opcode) &&
|
||||||
"Opcode should reflect passed instruction.");
|
"Opcode should reflect passed instruction.");
|
||||||
int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
InstructionCost Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment,
|
||||||
CostKind, I);
|
AddressSpace, CostKind, I);
|
||||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
@ -845,11 +844,11 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost(
|
|||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int TargetTransformInfo::getInterleavedMemoryOpCost(
|
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond, bool UseMaskForGaps) const {
|
bool UseMaskForCond, bool UseMaskForGaps) const {
|
||||||
int Cost = TTIImpl->getInterleavedMemoryOpCost(
|
InstructionCost Cost = TTIImpl->getInterleavedMemoryOpCost(
|
||||||
Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
|
Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
|
||||||
UseMaskForCond, UseMaskForGaps);
|
UseMaskForCond, UseMaskForGaps);
|
||||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||||
|
@ -931,7 +931,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
|
|||||||
Optional<unsigned> MaxNumVScale = getMaxVScale();
|
Optional<unsigned> MaxNumVScale = getMaxVScale();
|
||||||
assert(MaxNumVScale && "Expected valid max vscale value");
|
assert(MaxNumVScale && "Expected valid max vscale value");
|
||||||
|
|
||||||
unsigned MemOpCost =
|
InstructionCost MemOpCost =
|
||||||
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
|
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
|
||||||
unsigned MaxNumElementsPerGather =
|
unsigned MaxNumElementsPerGather =
|
||||||
MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
|
MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
|
||||||
@ -942,10 +942,11 @@ bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
|
|||||||
return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
|
return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
|
||||||
}
|
}
|
||||||
|
|
||||||
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
||||||
MaybeAlign Alignment, unsigned AddressSpace,
|
MaybeAlign Alignment,
|
||||||
TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) {
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) {
|
||||||
// TODO: Handle other cost kinds.
|
// TODO: Handle other cost kinds.
|
||||||
if (CostKind != TTI::TCK_RecipThroughput)
|
if (CostKind != TTI::TCK_RecipThroughput)
|
||||||
return 1;
|
return 1;
|
||||||
@ -991,7 +992,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
|||||||
return LT.first;
|
return LT.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
int AArch64TTIImpl::getInterleavedMemoryOpCost(
|
InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond, bool UseMaskForGaps) {
|
bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
@ -1018,7 +1019,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(
|
|||||||
}
|
}
|
||||||
|
|
||||||
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
|
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
|
||||||
int Cost = 0;
|
InstructionCost Cost = 0;
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||||
for (auto *I : Tys) {
|
for (auto *I : Tys) {
|
||||||
if (!I->isVectorTy())
|
if (!I->isVectorTy())
|
||||||
@ -1028,7 +1029,7 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
|
|||||||
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
|
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
|
||||||
getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
|
getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
|
||||||
}
|
}
|
||||||
return Cost;
|
return *Cost.getValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
||||||
|
@ -176,10 +176,10 @@ public:
|
|||||||
bool IsZeroCmp) const;
|
bool IsZeroCmp) const;
|
||||||
bool useNeonVector(const Type *Ty) const;
|
bool useNeonVector(const Type *Ty) const;
|
||||||
|
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
|
|
||||||
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
|
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
|
||||||
|
|
||||||
@ -258,7 +258,7 @@ public:
|
|||||||
return BaseT::isLegalNTStore(DataType, Alignment);
|
return BaseT::isLegalNTStore(DataType, Alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
int getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
|
@ -1382,10 +1382,11 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
|
|||||||
return BaseCost;
|
return BaseCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
MaybeAlign Alignment, unsigned AddressSpace,
|
MaybeAlign Alignment,
|
||||||
TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) {
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) {
|
||||||
// TODO: Handle other cost kinds.
|
// TODO: Handle other cost kinds.
|
||||||
if (CostKind != TTI::TCK_RecipThroughput)
|
if (CostKind != TTI::TCK_RecipThroughput)
|
||||||
return 1;
|
return 1;
|
||||||
@ -1445,7 +1446,7 @@ ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
|||||||
return cast<FixedVectorType>(Src)->getNumElements() * 8;
|
return cast<FixedVectorType>(Src)->getNumElements() * 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ARMTTIImpl::getInterleavedMemoryOpCost(
|
InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond, bool UseMaskForGaps) {
|
bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
|
@ -226,16 +226,16 @@ public:
|
|||||||
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
||||||
const Instruction *CxtI = nullptr);
|
const Instruction *CxtI = nullptr);
|
||||||
|
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
|
|
||||||
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
|
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind);
|
TTI::TargetCostKind CostKind);
|
||||||
|
|
||||||
int getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
|
@ -154,11 +154,11 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
MaybeAlign Alignment,
|
MaybeAlign Alignment,
|
||||||
unsigned AddressSpace,
|
unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I) {
|
const Instruction *I) {
|
||||||
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
|
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
|
||||||
// TODO: Handle other cost kinds.
|
// TODO: Handle other cost kinds.
|
||||||
if (CostKind != TTI::TCK_RecipThroughput)
|
if (CostKind != TTI::TCK_RecipThroughput)
|
||||||
@ -231,7 +231,7 @@ InstructionCost HexagonTTIImpl::getGatherScatterOpCost(
|
|||||||
Alignment, CostKind, I);
|
Alignment, CostKind, I);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(
|
InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond, bool UseMaskForGaps) {
|
bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
|
@ -114,10 +114,10 @@ public:
|
|||||||
TTI::TargetCostKind CostKind);
|
TTI::TargetCostKind CostKind);
|
||||||
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
|
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
|
||||||
const SCEV *S);
|
const SCEV *S);
|
||||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
InstructionCost
|
InstructionCost
|
||||||
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||||
unsigned AddressSpace,
|
unsigned AddressSpace,
|
||||||
@ -129,7 +129,7 @@ public:
|
|||||||
Align Alignment,
|
Align Alignment,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I);
|
const Instruction *I);
|
||||||
unsigned getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
|
@ -1105,10 +1105,11 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
|
|||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
MaybeAlign Alignment, unsigned AddressSpace,
|
MaybeAlign Alignment,
|
||||||
TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) {
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) {
|
||||||
if (TLI->getValueType(DL, Src, true) == MVT::Other)
|
if (TLI->getValueType(DL, Src, true) == MVT::Other)
|
||||||
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
||||||
CostKind);
|
CostKind);
|
||||||
@ -1117,8 +1118,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||||||
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
||||||
"Invalid Opcode");
|
"Invalid Opcode");
|
||||||
|
|
||||||
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
InstructionCost Cost =
|
||||||
CostKind);
|
BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
|
||||||
// TODO: Handle other cost kinds.
|
// TODO: Handle other cost kinds.
|
||||||
if (CostKind != TTI::TCK_RecipThroughput)
|
if (CostKind != TTI::TCK_RecipThroughput)
|
||||||
return Cost;
|
return Cost;
|
||||||
@ -1185,7 +1186,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PPCTTIImpl::getInterleavedMemoryOpCost(
|
InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond, bool UseMaskForGaps) {
|
bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
@ -1201,9 +1202,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
|
|||||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
||||||
|
|
||||||
// Firstly, the cost of load/store operation.
|
// Firstly, the cost of load/store operation.
|
||||||
int Cost =
|
InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
|
||||||
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
|
AddressSpace, CostKind);
|
||||||
CostKind);
|
|
||||||
|
|
||||||
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
|
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
|
||||||
// (at least in the sense that there need only be one non-loop-invariant
|
// (at least in the sense that there need only be one non-loop-invariant
|
||||||
|
@ -121,11 +121,11 @@ public:
|
|||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
int getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
|
@ -246,7 +246,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||||||
// Find out if L contains a call, what the machine instruction count
|
// Find out if L contains a call, what the machine instruction count
|
||||||
// estimate is, and how many stores there are.
|
// estimate is, and how many stores there are.
|
||||||
bool HasCall = false;
|
bool HasCall = false;
|
||||||
unsigned NumStores = 0;
|
InstructionCost NumStores = 0;
|
||||||
for (auto &BB : L->blocks())
|
for (auto &BB : L->blocks())
|
||||||
for (auto &I : *BB) {
|
for (auto &I : *BB) {
|
||||||
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
|
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
|
||||||
@ -270,7 +270,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||||||
// The z13 processor will run out of store tags if too many stores
|
// The z13 processor will run out of store tags if too many stores
|
||||||
// are fed into it too quickly. Therefore make sure there are not
|
// are fed into it too quickly. Therefore make sure there are not
|
||||||
// too many stores in the resulting unrolled loop.
|
// too many stores in the resulting unrolled loop.
|
||||||
unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
|
unsigned const NumStoresVal = *NumStores.getValue();
|
||||||
|
unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
|
||||||
|
|
||||||
if (HasCall) {
|
if (HasCall) {
|
||||||
// Only allow full unrolling if loop has any calls.
|
// Only allow full unrolling if loop has any calls.
|
||||||
@ -1059,10 +1060,11 @@ static bool isBswapIntrinsicCall(const Value *V) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
MaybeAlign Alignment, unsigned AddressSpace,
|
MaybeAlign Alignment,
|
||||||
TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) {
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) {
|
||||||
assert(!Src->isVoidTy() && "Invalid type");
|
assert(!Src->isVoidTy() && "Invalid type");
|
||||||
|
|
||||||
// TODO: Handle other cost kinds.
|
// TODO: Handle other cost kinds.
|
||||||
@ -1129,7 +1131,7 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||||||
// needed for using / defining the vector operands. The SystemZ version does
|
// needed for using / defining the vector operands. The SystemZ version does
|
||||||
// roughly the same but bases the computations on vector permutations
|
// roughly the same but bases the computations on vector permutations
|
||||||
// instead.
|
// instead.
|
||||||
int SystemZTTIImpl::getInterleavedMemoryOpCost(
|
InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond, bool UseMaskForGaps) {
|
bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
|
@ -103,11 +103,12 @@ public:
|
|||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||||
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
|
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
const Instruction *I = nullptr);
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I = nullptr);
|
||||||
|
|
||||||
int getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
|
@ -3192,10 +3192,11 @@ unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
|||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
MaybeAlign Alignment, unsigned AddressSpace,
|
MaybeAlign Alignment,
|
||||||
TTI::TargetCostKind CostKind,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) {
|
TTI::TargetCostKind CostKind,
|
||||||
|
const Instruction *I) {
|
||||||
// TODO: Handle other cost kinds.
|
// TODO: Handle other cost kinds.
|
||||||
if (CostKind != TTI::TCK_RecipThroughput) {
|
if (CostKind != TTI::TCK_RecipThroughput) {
|
||||||
if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
|
if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
|
||||||
@ -3227,8 +3228,8 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||||||
// Assume that all other non-power-of-two numbers are scalarized.
|
// Assume that all other non-power-of-two numbers are scalarized.
|
||||||
if (!isPowerOf2_32(NumElem)) {
|
if (!isPowerOf2_32(NumElem)) {
|
||||||
APInt DemandedElts = APInt::getAllOnesValue(NumElem);
|
APInt DemandedElts = APInt::getAllOnesValue(NumElem);
|
||||||
int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
|
InstructionCost Cost = BaseT::getMemoryOpCost(
|
||||||
AddressSpace, CostKind);
|
Opcode, VTy->getScalarType(), Alignment, AddressSpace, CostKind);
|
||||||
int SplitCost = getScalarizationOverhead(VTy, DemandedElts,
|
int SplitCost = getScalarizationOverhead(VTy, DemandedElts,
|
||||||
Opcode == Instruction::Load,
|
Opcode == Instruction::Load,
|
||||||
Opcode == Instruction::Store);
|
Opcode == Instruction::Store);
|
||||||
@ -3286,7 +3287,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
|
|||||||
InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
|
InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
|
||||||
InstructionCost ValueSplitCost =
|
InstructionCost ValueSplitCost =
|
||||||
getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
|
getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
|
||||||
int MemopCost =
|
InstructionCost MemopCost =
|
||||||
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||||
Alignment, AddressSpace, CostKind);
|
Alignment, AddressSpace, CostKind);
|
||||||
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
|
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
|
||||||
@ -4116,8 +4117,9 @@ int X86TTIImpl::getScatterOverhead() const {
|
|||||||
|
|
||||||
// Return an average cost of Gather / Scatter instruction, maybe improved later.
|
// Return an average cost of Gather / Scatter instruction, maybe improved later.
|
||||||
// FIXME: Add TargetCostKind support.
|
// FIXME: Add TargetCostKind support.
|
||||||
int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr,
|
InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
|
||||||
Align Alignment, unsigned AddressSpace) {
|
const Value *Ptr, Align Alignment,
|
||||||
|
unsigned AddressSpace) {
|
||||||
|
|
||||||
assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
|
assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
|
||||||
unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
|
unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
|
||||||
@ -4210,9 +4212,9 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The cost of the scalar loads/stores.
|
// The cost of the scalar loads/stores.
|
||||||
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
InstructionCost MemoryOpCost =
|
||||||
MaybeAlign(Alignment), AddressSpace,
|
VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||||
CostKind);
|
MaybeAlign(Alignment), AddressSpace, CostKind);
|
||||||
|
|
||||||
int InsertExtractCost = 0;
|
int InsertExtractCost = 0;
|
||||||
if (Opcode == Instruction::Load)
|
if (Opcode == Instruction::Load)
|
||||||
@ -4520,7 +4522,7 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
|
|||||||
// computing the cost using a generic formula as a function of generic
|
// computing the cost using a generic formula as a function of generic
|
||||||
// shuffles. We therefore use a lookup table instead, filled according to
|
// shuffles. We therefore use a lookup table instead, filled according to
|
||||||
// the instruction sequences that codegen currently generates.
|
// the instruction sequences that codegen currently generates.
|
||||||
int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
|
InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
|
||||||
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
||||||
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
|
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
@ -4562,9 +4564,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
|
|||||||
// Get the cost of one memory operation.
|
// Get the cost of one memory operation.
|
||||||
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
|
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
|
||||||
LegalVT.getVectorNumElements());
|
LegalVT.getVectorNumElements());
|
||||||
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
InstructionCost MemOpCost = getMemoryOpCost(
|
||||||
MaybeAlign(Alignment), AddressSpace,
|
Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
|
||||||
CostKind);
|
|
||||||
|
|
||||||
auto *VT = FixedVectorType::get(ScalarTy, VF);
|
auto *VT = FixedVectorType::get(ScalarTy, VF);
|
||||||
EVT ETy = TLI->getValueType(DL, VT);
|
EVT ETy = TLI->getValueType(DL, VT);
|
||||||
@ -4637,7 +4638,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
|
|||||||
// \p Indices contains indices for strided load.
|
// \p Indices contains indices for strided load.
|
||||||
// \p Factor - the factor of interleaving.
|
// \p Factor - the factor of interleaving.
|
||||||
// AVX-512 provides 3-src shuffles that significantly reduces the cost.
|
// AVX-512 provides 3-src shuffles that significantly reduces the cost.
|
||||||
int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
||||||
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
||||||
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
|
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
@ -4661,9 +4662,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
|||||||
// Get the cost of one memory operation.
|
// Get the cost of one memory operation.
|
||||||
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
|
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
|
||||||
LegalVT.getVectorNumElements());
|
LegalVT.getVectorNumElements());
|
||||||
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
InstructionCost MemOpCost = getMemoryOpCost(
|
||||||
MaybeAlign(Alignment), AddressSpace,
|
Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
|
||||||
CostKind);
|
|
||||||
|
|
||||||
unsigned VF = VecTy->getNumElements() / Factor;
|
unsigned VF = VecTy->getNumElements() / Factor;
|
||||||
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
|
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
|
||||||
@ -4719,8 +4719,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
|||||||
if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
|
if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
|
||||||
NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
|
NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
|
||||||
|
|
||||||
int Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
|
InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
|
||||||
NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
|
NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
|
||||||
|
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
@ -4755,12 +4755,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
|||||||
// The SK_MergeTwoSrc shuffle clobbers one of src operands.
|
// The SK_MergeTwoSrc shuffle clobbers one of src operands.
|
||||||
// We need additional instructions to keep sources.
|
// We need additional instructions to keep sources.
|
||||||
unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
|
unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
|
||||||
int Cost = NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
|
InstructionCost Cost =
|
||||||
NumOfMoves;
|
NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
|
||||||
|
NumOfMoves;
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int X86TTIImpl::getInterleavedMemoryOpCost(
|
InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
|
||||||
bool UseMaskForCond, bool UseMaskForGaps) {
|
bool UseMaskForCond, bool UseMaskForGaps) {
|
||||||
|
@ -140,10 +140,10 @@ public:
|
|||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||||
unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
|
unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
|
||||||
bool Insert, bool Extract);
|
bool Insert, bool Extract);
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
InstructionCost
|
InstructionCost
|
||||||
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||||
unsigned AddressSpace,
|
unsigned AddressSpace,
|
||||||
@ -186,17 +186,17 @@ public:
|
|||||||
bool IsPairwiseForm, bool IsUnsigned,
|
bool IsPairwiseForm, bool IsUnsigned,
|
||||||
TTI::TargetCostKind CostKind);
|
TTI::TargetCostKind CostKind);
|
||||||
|
|
||||||
int getInterleavedMemoryOpCost(
|
InstructionCost getInterleavedMemoryOpCost(
|
||||||
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||||
Align Alignment, unsigned AddressSpace,
|
Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
||||||
int getInterleavedMemoryOpCostAVX512(
|
InstructionCost getInterleavedMemoryOpCostAVX512(
|
||||||
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
||||||
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
||||||
int getInterleavedMemoryOpCostAVX2(
|
InstructionCost getInterleavedMemoryOpCostAVX2(
|
||||||
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
||||||
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||||
@ -240,8 +240,9 @@ private:
|
|||||||
InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
|
InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
|
||||||
bool VariableMask, Align Alignment,
|
bool VariableMask, Align Alignment,
|
||||||
unsigned AddressSpace);
|
unsigned AddressSpace);
|
||||||
int getGSVectorCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
|
InstructionCost getGSVectorCost(unsigned Opcode, Type *DataTy,
|
||||||
Align Alignment, unsigned AddressSpace);
|
const Value *Ptr, Align Alignment,
|
||||||
|
unsigned AddressSpace);
|
||||||
|
|
||||||
int getGatherOverhead() const;
|
int getGatherOverhead() const;
|
||||||
int getScatterOverhead() const;
|
int getScatterOverhead() const;
|
||||||
|
Loading…
Reference in New Issue
Block a user