From 7016294be8d84e7d22954963ab32e41325a3a318 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Sat, 23 Jan 2021 12:14:21 +0000 Subject: [PATCH] [TTI] NFC: Change get[Interleaved]MemoryOpCost to return InstructionCost This patch migrates the TTI cost interfaces to return an InstructionCost. See this patch for the introduction of the type: https://reviews.llvm.org/D91174 See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D100205 --- include/llvm/Analysis/TargetTransformInfo.h | 39 +++++++------- .../llvm/Analysis/TargetTransformInfoImpl.h | 7 +-- include/llvm/CodeGen/BasicTTIImpl.h | 18 +++---- lib/Analysis/TargetTransformInfo.cpp | 15 +++--- .../AArch64/AArch64TargetTransformInfo.cpp | 17 ++++--- .../AArch64/AArch64TargetTransformInfo.h | 10 ++-- lib/Target/ARM/ARMTargetTransformInfo.cpp | 11 ++-- lib/Target/ARM/ARMTargetTransformInfo.h | 10 ++-- .../Hexagon/HexagonTargetTransformInfo.cpp | 12 ++--- .../Hexagon/HexagonTargetTransformInfo.h | 10 ++-- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 20 ++++---- lib/Target/PowerPC/PPCTargetTransformInfo.h | 10 ++-- .../SystemZ/SystemZTargetTransformInfo.cpp | 16 +++--- .../SystemZ/SystemZTargetTransformInfo.h | 9 ++-- lib/Target/X86/X86TargetTransformInfo.cpp | 51 ++++++++++--------- lib/Target/X86/X86TargetTransformInfo.h | 19 +++---- 16 files changed, 141 insertions(+), 133 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 0e56e0c78df..0d837780478 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -1129,10 +1129,11 @@ public: int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; /// \return The cost of Load and Store instructions. - int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; + InstructionCost + getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The cost of masked Load and Store instructions. InstructionCost getMaskedMemoryOpCost( @@ -1163,7 +1164,7 @@ public: /// \p AddressSpace is address space of the pointer. /// \p UseMaskForCond indicates if the memory access is predicated. /// \p UseMaskForGaps indicates if gaps should be masked. - int getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, @@ -1590,10 +1591,11 @@ public: const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; - virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; + virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -1604,7 +1606,7 @@ public: TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0; - virtual int getInterleavedMemoryOpCost( + virtual InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; @@ -2070,9 +2072,10 @@ public: int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } - int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, TTI::TargetCostKind CostKind, - const Instruction *I) override { + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) override { return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, I); } @@ -2090,12 +2093,10 @@ public: return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I); } - int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, - ArrayRef Indices, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - bool UseMaskForCond, - bool UseMaskForGaps) override { + InstructionCost getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, + Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) override { return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 16eeeab1f21..dc0983a7f8c 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -537,9 +537,10 @@ public: return 1; } - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, TTI::TargetCostKind CostKind, - const Instruction *I) const { + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) const { return 1; } diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 7b626be5811..c4ebaacd327 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -982,10 +982,10 @@ public: return LT.first; } - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { assert(!Src->isVoidTy() && "Invalid type"); // Assume types, such as structs, are expensive. if (getTLI()->getValueType(DL, Src, true) == MVT::Other) @@ -993,7 +993,7 @@ public: std::pair LT = getTLI()->getTypeLegalizationCost(DL, Src); // Assuming that all loads of legal types cost 1. - unsigned Cost = LT.first; + InstructionCost Cost = LT.first; if (CostKind != TTI::TCK_RecipThroughput) return Cost; @@ -1036,7 +1036,7 @@ public: // // First, compute the cost of extracting the individual addresses and the // individual memory operations. - int LoadCost = + InstructionCost LoadCost = VT->getNumElements() * (getVectorInstrCost( Instruction::ExtractElement, @@ -1071,7 +1071,7 @@ public: return LoadCost + PackingCost + ConditionalCost; } - unsigned getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) { @@ -1186,7 +1186,7 @@ public: } if (!UseMaskForCond) - return *Cost.getValue(); + return Cost; Type *I8Type = Type::getInt8Ty(VT->getContext()); auto *MaskVT = FixedVectorType::get(I8Type, NumElts); @@ -1219,7 +1219,7 @@ public: Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind); - return *Cost.getValue(); + return Cost; } /// Get intrinsic cost based on arguments. diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 0919b2b83d6..29d1c0eac12 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -815,14 +815,13 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, return Cost; } -int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - Align Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) const { +InstructionCost TargetTransformInfo::getMemoryOpCost( + unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind, I); + InstructionCost Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, + AddressSpace, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -845,11 +844,11 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost( return Cost; } -int TargetTransformInfo::getInterleavedMemoryOpCost( +InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const { - int Cost = TTIImpl->getInterleavedMemoryOpCost( + InstructionCost Cost = TTIImpl->getInterleavedMemoryOpCost( Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(Cost >= 0 && "TTI should not produce negative costs!"); diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 996c8736ed1..c8eeb246122 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -931,7 +931,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost( Optional MaxNumVScale = getMaxVScale(); assert(MaxNumVScale && "Expected valid max vscale value"); - unsigned MemOpCost = + InstructionCost MemOpCost = getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I); unsigned MaxNumElementsPerGather = MaxNumVScale.getValue() * LegalVF.getKnownMinValue(); @@ -942,10 +942,11 @@ bool AArch64TTIImpl::useNeonVector(const Type *Ty) const { return isa(Ty) && !ST->useSVEForFixedLengthVectors(); } -int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, - MaybeAlign Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return 1; @@ -991,7 +992,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, return LT.first; } -int AArch64TTIImpl::getInterleavedMemoryOpCost( +InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { @@ -1018,7 +1019,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost( } int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef Tys) { - int Cost = 0; + InstructionCost Cost = 0; TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; for (auto *I : Tys) { if (!I->isVectorTy()) @@ -1028,7 +1029,7 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef Tys) { Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) + getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind); } - return Cost; + return *Cost.getValue(); } unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 3e77d67d8a2..d04e2b20606 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -176,10 +176,10 @@ public: bool IsZeroCmp) const; bool useNeonVector(const Type *Ty) const; - int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getCostOfKeepingLiveOverCall(ArrayRef Tys); @@ -258,7 +258,7 @@ public: return BaseT::isLegalNTStore(DataType, Alignment); } - int getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 6f3de3977f3..16bf5e4fc47 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1382,10 +1382,11 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, return BaseCost; } -int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return 1; @@ -1445,7 +1446,7 @@ ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, return cast(Src)->getNumElements() * 8; } -int ARMTTIImpl::getInterleavedMemoryOpCost( +InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 46a14f51725..3aee336f85d 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -226,16 +226,16 @@ public: ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); - int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind); - int getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 2ba10a60db8..57efdb8c43b 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -154,11 +154,11 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, return 0; } -unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { assert(Opcode == Instruction::Load || Opcode == Instruction::Store); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) @@ -231,7 +231,7 @@ InstructionCost HexagonTTIImpl::getGatherScatterOpCost( Alignment, CostKind, I); } -unsigned HexagonTTIImpl::getInterleavedMemoryOpCost( +InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 2faa82742f8..b9077f0f016 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -114,10 +114,10 @@ public: TTI::TargetCostKind CostKind); unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S); - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -129,7 +129,7 @@ public: Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I); - unsigned getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index d0280d5b7eb..5d903cf2f2e 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1105,10 +1105,11 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return Cost; } -int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (TLI->getValueType(DL, Src, true) == MVT::Other) return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); @@ -1117,8 +1118,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); - int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); + InstructionCost Cost = + BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; @@ -1185,7 +1186,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return Cost; } -int PPCTTIImpl::getInterleavedMemoryOpCost( +InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { @@ -1201,9 +1202,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost( std::pair LT = TLI->getTypeLegalizationCost(DL, VecTy); // Firstly, the cost of load/store operation. - int Cost = - getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, - CostKind); + InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), + AddressSpace, CostKind); // PPC, for both Altivec/VSX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 25c30d1b144..5ae842351de 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -121,11 +121,11 @@ public: TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); - int getInterleavedMemoryOpCost( + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 0b0a5ef83b2..0dbdca5035c 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -246,7 +246,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // Find out if L contains a call, what the machine instruction count // estimate is, and how many stores there are. bool HasCall = false; - unsigned NumStores = 0; + InstructionCost NumStores = 0; for (auto &BB : L->blocks()) for (auto &I : *BB) { if (isa(&I) || isa(&I)) { @@ -270,7 +270,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // The z13 processor will run out of store tags if too many stores // are fed into it too quickly. Therefore make sure there are not // too many stores in the resulting unrolled loop. - unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX); + unsigned const NumStoresVal = *NumStores.getValue(); + unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX); if (HasCall) { // Only allow full unrolling if loop has any calls. @@ -1059,10 +1060,11 @@ static bool isBswapIntrinsicCall(const Value *V) { return false; } -int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); // TODO: Handle other cost kinds. @@ -1129,7 +1131,7 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // needed for using / defining the vector operands. The SystemZ version does // roughly the same but bases the computations on vector permutations // instead. -int SystemZTTIImpl::getInterleavedMemoryOpCost( +InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 8848ac820dd..1ab5b73ce6d 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -103,11 +103,12 @@ public: const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); - int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); - int getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 5f2b1f6f344..c24a094868c 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3192,10 +3192,11 @@ unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty, return Cost; } -int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) { if (auto *SI = dyn_cast_or_null(I)) { @@ -3227,8 +3228,8 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // Assume that all other non-power-of-two numbers are scalarized. if (!isPowerOf2_32(NumElem)) { APInt DemandedElts = APInt::getAllOnesValue(NumElem); - int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment, - AddressSpace, CostKind); + InstructionCost Cost = BaseT::getMemoryOpCost( + Opcode, VTy->getScalarType(), Alignment, AddressSpace, CostKind); int SplitCost = getScalarizationOverhead(VTy, DemandedElts, Opcode == Instruction::Load, Opcode == Instruction::Store); @@ -3286,7 +3287,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment, InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); InstructionCost ValueSplitCost = getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore); - int MemopCost = + InstructionCost MemopCost = NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), Alignment, AddressSpace, CostKind); return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; @@ -4116,8 +4117,9 @@ int X86TTIImpl::getScatterOverhead() const { // Return an average cost of Gather / Scatter instruction, maybe improved later. // FIXME: Add TargetCostKind support. -int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr, - Align Alignment, unsigned AddressSpace) { +InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, + const Value *Ptr, Align Alignment, + unsigned AddressSpace) { assert(isa(SrcVTy) && "Unexpected type in getGSVectorCost"); unsigned VF = cast(SrcVTy)->getNumElements(); @@ -4210,9 +4212,9 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, } // The cost of the scalar loads/stores. - int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - MaybeAlign(Alignment), AddressSpace, - CostKind); + InstructionCost MemoryOpCost = + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), + MaybeAlign(Alignment), AddressSpace, CostKind); int InsertExtractCost = 0; if (Opcode == Instruction::Load) @@ -4520,7 +4522,7 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() { // computing the cost using a generic formula as a function of generic // shuffles. We therefore use a lookup table instead, filled according to // the instruction sequences that codegen currently generates. -int X86TTIImpl::getInterleavedMemoryOpCostAVX2( +InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2( unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { @@ -4562,9 +4564,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2( // Get the cost of one memory operation. auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), LegalVT.getVectorNumElements()); - unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, - MaybeAlign(Alignment), AddressSpace, - CostKind); + InstructionCost MemOpCost = getMemoryOpCost( + Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); auto *VT = FixedVectorType::get(ScalarTy, VF); EVT ETy = TLI->getValueType(DL, VT); @@ -4637,7 +4638,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2( // \p Indices contains indices for strided load. // \p Factor - the factor of interleaving. // AVX-512 provides 3-src shuffles that significantly reduces the cost. -int X86TTIImpl::getInterleavedMemoryOpCostAVX512( +InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { @@ -4661,9 +4662,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512( // Get the cost of one memory operation. auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), LegalVT.getVectorNumElements()); - unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, - MaybeAlign(Alignment), AddressSpace, - CostKind); + InstructionCost MemOpCost = getMemoryOpCost( + Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); unsigned VF = VecTy->getNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); @@ -4719,8 +4719,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512( if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc) NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2; - int Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost + - NumOfUnfoldedLoads * MemOpCost + NumOfMoves; + InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost + + NumOfUnfoldedLoads * MemOpCost + NumOfMoves; return Cost; } @@ -4755,12 +4755,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512( // The SK_MergeTwoSrc shuffle clobbers one of src operands. // We need additional instructions to keep sources. unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2; - int Cost = NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) + - NumOfMoves; + InstructionCost Cost = + NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) + + NumOfMoves; return Cost; } -int X86TTIImpl::getInterleavedMemoryOpCost( +InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index a3e8db157bc..9d5ca819f87 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -140,10 +140,10 @@ public: int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract); - int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -186,17 +186,17 @@ public: bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind); - int getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); - int getInterleavedMemoryOpCostAVX512( + InstructionCost getInterleavedMemoryOpCostAVX512( unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); - int getInterleavedMemoryOpCostAVX2( + InstructionCost getInterleavedMemoryOpCostAVX2( unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, @@ -240,8 +240,9 @@ private: InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, unsigned AddressSpace); - int getGSVectorCost(unsigned Opcode, Type *DataTy, const Value *Ptr, - Align Alignment, unsigned AddressSpace); + InstructionCost getGSVectorCost(unsigned Opcode, Type *DataTy, + const Value *Ptr, Align Alignment, + unsigned AddressSpace); int getGatherOverhead() const; int getScatterOverhead() const;