1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[TTI] NFC: Change getTypeLegalizationCost to return InstructionCost.

This patch migrates the TTI cost interfaces to return an InstructionCost.

See this patch for the introduction of the type: https://reviews.llvm.org/D91174
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html

Reviewed By: sdesmalen, kparzysz

Differential Revision: https://reviews.llvm.org/D101533
This commit is contained in:
Daniil Fukalov 2021-04-29 16:02:51 +03:00
parent 8ceccff3f0
commit 3f292e8925
12 changed files with 128 additions and 101 deletions

View File

@ -357,7 +357,10 @@ public:
}
unsigned getRegUsageForType(Type *Ty) {
return getTLI()->getTypeLegalizationCost(DL, Ty).first;
InstructionCost::CostType Val =
*getTLI()->getTypeLegalizationCost(DL, Ty).first.getValue();
assert(Val >= 0 && "Negative cost!");
return Val;
}
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
@ -750,7 +753,7 @@ public:
Opd1PropInfo, Opd2PropInfo,
Args, CxtI);
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
bool IsFloat = Ty->isFPOrFPVectorTy();
// Assume that floating point arithmetic operations cost twice as much as
@ -852,8 +855,10 @@ public:
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
std::pair<InstructionCost, MVT> SrcLT =
TLI->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> DstLT =
TLI->getTypeLegalizationCost(DL, Dst);
TypeSize SrcSize = SrcLT.second.getSizeInBits();
TypeSize DstSize = DstLT.second.getSizeInBits();
@ -1025,7 +1030,8 @@ public:
if (CondTy->isVectorTy())
ISD = ISD::VSELECT;
}
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
!TLI->isOperationExpand(ISD, LT.second)) {
@ -1055,7 +1061,7 @@ public:
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) {
std::pair<unsigned, MVT> LT =
std::pair<InstructionCost, MVT> LT =
getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
return LT.first;
@ -1069,7 +1075,8 @@ public:
// Assume types, such as structs, are expensive.
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
return 4;
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> LT =
getTLI()->getTypeLegalizationCost(DL, Src);
// Assuming that all loads of legal types cost 1.
InstructionCost Cost = LT.first;
@ -1836,10 +1843,11 @@ public:
}
const TargetLoweringBase *TLI = getTLI();
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, RetTy);
SmallVector<unsigned, 2> LegalCost;
SmallVector<unsigned, 2> CustomCost;
SmallVector<InstructionCost, 2> LegalCost;
SmallVector<InstructionCost, 2> CustomCost;
for (unsigned ISD : ISDs) {
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
@ -1945,8 +1953,9 @@ public:
}
unsigned getNumberOfParts(Type *Tp) {
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
return LT.first;
std::pair<InstructionCost, MVT> LT =
getTLI()->getTypeLegalizationCost(DL, Tp);
return *LT.first.getValue();
}
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
@ -2013,7 +2022,7 @@ public:
unsigned NumReduxLevels = Log2_32(NumVecElts);
InstructionCost ArithCost = 0;
InstructionCost ShuffleCost = 0;
std::pair<unsigned, MVT> LT =
std::pair<InstructionCost, MVT> LT =
thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
unsigned LongVectorCount = 0;
unsigned MVTLen =
@ -2069,7 +2078,7 @@ public:
}
InstructionCost MinMaxCost = 0;
InstructionCost ShuffleCost = 0;
std::pair<unsigned, MVT> LT =
std::pair<InstructionCost, MVT> LT =
thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
unsigned LongVectorCount = 0;
unsigned MVTLen =

View File

@ -1815,8 +1815,8 @@ public:
int InstructionOpcodeToISD(unsigned Opcode) const;
/// Estimate the cost of type-legalization and the legalized type.
std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const;
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const;
/// @}

View File

@ -1807,13 +1807,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
llvm_unreachable("Unknown instruction type encountered!");
}
std::pair<int, MVT>
std::pair<InstructionCost, MVT>
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
LLVMContext &C = Ty->getContext();
EVT MTy = getValueType(DL, Ty);
int Cost = 1;
InstructionCost Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
// the only operation that costs anything is the split. After splitting
// we need to handle two types.

View File

@ -545,8 +545,10 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
return false;
// Get the total number of vector elements in the legalized types.
unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
InstructionCost NumDstEls =
DstTyL.first * DstTyL.second.getVectorMinNumElements();
InstructionCost NumSrcEls =
SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
// Return true if the legalized types have the same number of vector elements
// and the destination element type size is twice that of the source type.
@ -906,7 +908,7 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index != -1U) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@ -938,7 +940,7 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
Opd2PropInfo, Args, CxtI);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
// If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
// add in the widening overhead specified by the sub-target. Since the
@ -1536,7 +1538,7 @@ AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
"Both vector needs to be scalable");
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
InstructionCost LegalizationCost = 0;
if (LT.first > 1) {
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
@ -1558,7 +1560,7 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
TTI::TargetCostKind CostKind) {
assert(!IsPairwise && "Cannot be pair wise to continue");
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
InstructionCost LegalizationCost = 0;
if (LT.first > 1) {
Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
@ -1593,7 +1595,7 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@ -1694,7 +1696,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ TTI::SK_Reverse, MVT::nxv4i1, 1 },
{ TTI::SK_Reverse, MVT::nxv2i1, 1 },
};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
}

View File

@ -531,7 +531,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
bool IsFloat = Ty->isFPOrFPVectorTy();
// Assume that floating point arithmetic operations cost twice as much as
@ -569,7 +569,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
}
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// Because we don't have any legal vector operations, but the legal types, we
@ -775,7 +775,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
unsigned NElts = LT.second.isVector() ?
LT.second.getVectorNumElements() : 1;
@ -857,7 +857,7 @@ GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
OrigTy.getScalarSizeInBits() != 16)
return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
return LT.first * getFullRateInstrCost();
}
@ -875,7 +875,7 @@ GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
return LT.first * getHalfRateInstrCost(CostKind);
}

View File

@ -12257,10 +12257,11 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
}
std::pair<int, MVT>
std::pair<InstructionCost, MVT>
SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> Cost =
TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
auto Size = DL.getTypeSizeInBits(Ty);
// Maximum load or store can handle 8 dwords for scalar and 4 for
// vector ALU. Let's assume anything above 8 dwords is expensive

View File

@ -491,8 +491,8 @@ public:
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const;
std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const;
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const;
};
} // End namespace llvm

View File

@ -559,7 +559,7 @@ InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_EXTEND, MVT::v2f32, 2},
{ISD::FP_EXTEND, MVT::v4f32, 4}};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
return AdjustCost(LT.first * Entry->Cost);
}
@ -825,7 +825,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
// Integer cross-lane moves are more expensive than float, which can
// sometimes just be vmovs. Integer involve being passes to GPR registers,
// causing more of a delay.
std::pair<unsigned, MVT> LT =
std::pair<InstructionCost, MVT> LT =
getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType());
return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
}
@ -851,7 +851,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// - may require one or more conditional mov (including an IT),
// - can't operate directly on immediates,
// - require live flags, which we can't copy around easily.
int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
// Possible IT instruction for Thumb2, or more for Thumb1.
++Cost;
@ -928,7 +928,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}
@ -952,7 +953,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
I);
}
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
int BaseCost = ST->getMVEVectorCostFactor(CostKind);
// There are two types - the input that specifies the type of the compare
// and the output vXi1 type. Because we don't know how the output will be
@ -1156,8 +1158,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry =
CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
@ -1178,8 +1179,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry =
CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
@ -1203,7 +1203,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
@ -1219,8 +1219,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
LT.second))
return LT.first * Entry->Cost *
@ -1228,7 +1227,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
}
if (!Mask.empty()) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (Mask.size() <= LT.second.getVectorNumElements() &&
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
isVREVMask(Mask, LT.second, 64)))
@ -1264,7 +1263,7 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
}
}
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->hasNEON()) {
const unsigned FunctionCallDivCost = 20;
@ -1403,7 +1402,7 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
return LT.first * 4;
}
@ -1504,14 +1503,14 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(
unsigned NumElems = VTy->getNumElements();
unsigned EltSize = VTy->getScalarSizeInBits();
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
// For now, it is assumed that for the MVE gather instructions the loads are
// all effectively serialised. This means the cost is the scalar cost
// multiplied by the number of elements being loaded. This is possibly very
// conservative, but even so we still end up vectorising loops because the
// cost per iteration for many loops is lower than for scalar loops.
unsigned VectorCost =
InstructionCost VectorCost =
NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind);
// The scalarization cost should be a lot higher. We use the number of vector
// elements plus the scalarization overhead.
@ -1598,7 +1597,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
static const CostTblEntry CostTblAdd[]{
{ISD::ADD, MVT::v16i8, 1},
@ -1619,7 +1618,8 @@ ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
EVT ValVT = TLI->getValueType(DL, ValTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
(LT.second == MVT::v8i16 &&
ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
@ -1654,8 +1654,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
break;
Type *VT = ICA.getReturnType();
std::pair<int, MVT> LT =
TLI->getTypeLegalizationCost(DL, VT);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
LT.second == MVT::v16i8) {
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
@ -1675,7 +1674,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
break;
Type *VT = ICA.getReturnType();
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
LT.second == MVT::v16i8)
return LT.first * ST->getMVEVectorCostFactor(CostKind);
@ -1686,7 +1685,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (!ST->hasMVEFloatOps())
break;
Type *VT = ICA.getReturnType();
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
return LT.first * ST->getMVEVectorCostFactor(CostKind);
break;

View File

@ -143,7 +143,8 @@ InstructionCost
HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (ICA.getID() == Intrinsic::bswap) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
std::pair<InstructionCost, MVT> LT =
TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
return LT.first + 2;
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
@ -251,7 +252,7 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
if (Opcode == Instruction::FCmp)
return LT.first + FloatFactor * getTypeNumElements(ValTy);
}
@ -271,7 +272,7 @@ InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
Opd2PropInfo, Args, CxtI);
if (Ty->isVectorTy()) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
if (LT.second.isFloatingPoint())
return LT.first + FloatFactor * getTypeNumElements(Ty);
}
@ -288,9 +289,12 @@ InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
unsigned Cost = std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
std::pair<InstructionCost, MVT> SrcLT =
TLI.getTypeLegalizationCost(DL, SrcTy);
std::pair<InstructionCost, MVT> DstLT =
TLI.getTypeLegalizationCost(DL, DstTy);
InstructionCost Cost =
std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
// TODO: Allow non-throughput costs that aren't binary.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost == 0 ? 0 : 1;

View File

@ -375,7 +375,7 @@ InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);

View File

@ -328,7 +328,8 @@ InstructionCost PPCTTIImpl::getUserCost(const User *U,
if (U->getType()->isVectorTy()) {
// Instructions that need to be split should cost more.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, U->getType());
return LT.first * BaseT::getUserCost(U, Operands, CostKind);
}
@ -946,7 +947,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
return Cost;
std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
// If type legalization involves splitting the vector, we don't want to
// double the cost at every step - only the last step.
if (LT1.first != 1 || !LT1.second.isVector())
@ -957,7 +958,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
return Cost;
if (Ty2) {
std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
if (LT2.first != 1 || !LT2.second.isVector())
return Cost;
}
@ -988,7 +989,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask, int Index,
Type *SubTp) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
@ -1113,7 +1114,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
@ -1198,7 +1199,7 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
"Expect a vector type for interleaved memory op");
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
// Firstly, the cost of load/store operation.
InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),

View File

@ -185,7 +185,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
Op2Info, Opd1PropInfo,
Opd2PropInfo, Args, CxtI);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@ -966,7 +966,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *SubTp) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
Kind = improveShuffleKindFromMask(Kind, Mask);
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
@ -985,7 +985,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
int NumElts = LT.second.getVectorNumElements();
if ((Index % NumElts) == 0)
return 0;
std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
std::pair<InstructionCost, MVT> SubLT =
TLI->getTypeLegalizationCost(DL, SubTp);
if (SubLT.second.isVector()) {
int NumSubElts = SubLT.second.getVectorNumElements();
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
@ -1031,7 +1032,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// isn't free, because we need to preserve the rest of the wide vector.
if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
int NumElts = LT.second.getVectorNumElements();
std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
std::pair<InstructionCost, MVT> SubLT =
TLI->getTypeLegalizationCost(DL, SubTp);
if (SubLT.second.isVector()) {
int NumSubElts = SubLT.second.getVectorNumElements();
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
@ -1091,12 +1093,12 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Number of source vectors after legalization:
unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Number of destination vectors after legalization:
unsigned NumOfDests = LT.first;
InstructionCost NumOfDests = LT.first;
auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(),
LegalVT.getVectorNumElements());
unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy,
None, 0, nullptr);
}
@ -1107,8 +1109,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// For 2-input shuffles, we must account for splitting the 2 inputs into many.
if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
// We assume that source and destination have the same vector type.
int NumOfDests = LT.first;
int NumOfShufflesPerDest = LT.first * 2 - 1;
InstructionCost NumOfDests = LT.first;
InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1;
LT.first = NumOfDests * NumOfShufflesPerDest;
}
@ -2024,8 +2026,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD
};
std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> LTDest =
TLI->getTypeLegalizationCost(DL, Dst);
if (ST->hasSSE2() && !ST->hasAVX()) {
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
@ -2114,7 +2117,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
I);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@ -2804,7 +2807,7 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (ISD != ISD::DELETED_NODE) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
MVT MTy = LT.second;
// Attempt to lookup cost.
@ -2824,7 +2827,8 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return LT.first * Cost;
}
auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost,
auto adjustTableCost = [](const CostTblEntry &Entry,
InstructionCost LegalizationCost,
FastMathFlags FMF) {
// If there are no NANs to deal with, then these are reduced to a
// single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we
@ -3006,7 +3010,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (ISD != ISD::DELETED_NODE) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, RetTy);
MVT MTy = LT.second;
// Attempt to lookup cost.
@ -3045,7 +3050,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index != -1U && (Opcode == Instruction::ExtractElement ||
Opcode == Instruction::InsertElement)) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@ -3133,7 +3138,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
// For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
// cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
if (Insert) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
MVT MScalarTy = LT.second.getScalarType();
if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
@ -3157,8 +3162,10 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
// Case#2: inserting into 5th index needs extracti128 + vpinsrd +
// inserti128.
// Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * LT.first;
unsigned NumElts = LT.second.getVectorNumElements() * LT.first;
const int CostValue = *LT.first.getValue();
assert(CostValue >= 0 && "Negative cost!");
unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue;
unsigned NumElts = LT.second.getVectorNumElements() * CostValue;
APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts);
unsigned Scale = NumElts / Num128Lanes;
// We iterate each 128-lane, and check if we need a
@ -3249,7 +3256,8 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
Cost +=
getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind);
std::pair<int, MVT> LST = TLI->getTypeLegalizationCost(DL, SubTy);
std::pair<InstructionCost, MVT> LST =
TLI->getTypeLegalizationCost(DL, SubTy);
if (!LST.second.isVector()) {
APInt DemandedElts =
APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor);
@ -3267,10 +3275,10 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
// Each load/store unit costs 1.
int Cost = LT.first * 1;
InstructionCost Cost = LT.first * 1;
// This isn't exactly right. We're using slow unaligned 32-byte accesses as a
// proxy for a double-pumped AVX memory interface such as on Sandybridge.
@ -3316,7 +3324,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
}
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
auto VT = TLI->getValueType(DL, SrcVTy);
InstructionCost Cost = 0;
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
@ -3431,7 +3439,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
return Entry->Cost;
}
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@ -3621,7 +3629,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
bool IsUnsigned) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
MVT MTy = LT.second;
@ -3756,7 +3764,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@ -4184,9 +4192,12 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
auto *IndexVTy = FixedVectorType::get(
IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
std::pair<InstructionCost, MVT> IdxsLT =
TLI->getTypeLegalizationCost(DL, IndexVTy);
std::pair<InstructionCost, MVT> SrcLT =
TLI->getTypeLegalizationCost(DL, SrcVTy);
InstructionCost::CostType SplitFactor =
*std::max(IdxsLT.first, SrcLT.first).getValue();
if (SplitFactor > 1) {
// Handle splitting of vector of pointers
auto *SplitSrcTy =
@ -4722,7 +4733,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
Indices.size() ? Indices.size() : Factor;
auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
VecTy->getNumElements() / Factor);
unsigned NumOfResults =
InstructionCost NumOfResults =
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
NumOfLoadsInInterleaveGrp;
@ -4738,7 +4749,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// The SK_MergeTwoSrc shuffle clobbers one of src operands.
// When we have more than one destination, we need additional instructions
// to keep sources.
unsigned NumOfMoves = 0;
InstructionCost NumOfMoves = 0;
if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;