mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[TTI] NFC: Change getTypeLegalizationCost to return InstructionCost.
This patch migrates the TTI cost interfaces to return an InstructionCost. See this patch for the introduction of the type: https://reviews.llvm.org/D91174 See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html Reviewed By: sdesmalen, kparzysz Differential Revision: https://reviews.llvm.org/D101533
This commit is contained in:
parent
8ceccff3f0
commit
3f292e8925
@ -357,7 +357,10 @@ public:
|
||||
}
|
||||
|
||||
unsigned getRegUsageForType(Type *Ty) {
|
||||
return getTLI()->getTypeLegalizationCost(DL, Ty).first;
|
||||
InstructionCost::CostType Val =
|
||||
*getTLI()->getTypeLegalizationCost(DL, Ty).first.getValue();
|
||||
assert(Val >= 0 && "Negative cost!");
|
||||
return Val;
|
||||
}
|
||||
|
||||
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
|
||||
@ -750,7 +753,7 @@ public:
|
||||
Opd1PropInfo, Opd2PropInfo,
|
||||
Args, CxtI);
|
||||
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
bool IsFloat = Ty->isFPOrFPVectorTy();
|
||||
// Assume that floating point arithmetic operations cost twice as much as
|
||||
@ -852,8 +855,10 @@ public:
|
||||
const TargetLoweringBase *TLI = getTLI();
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
assert(ISD && "Invalid opcode");
|
||||
std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
|
||||
std::pair<InstructionCost, MVT> SrcLT =
|
||||
TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<InstructionCost, MVT> DstLT =
|
||||
TLI->getTypeLegalizationCost(DL, Dst);
|
||||
|
||||
TypeSize SrcSize = SrcLT.second.getSizeInBits();
|
||||
TypeSize DstSize = DstLT.second.getSizeInBits();
|
||||
@ -1025,7 +1030,8 @@ public:
|
||||
if (CondTy->isVectorTy())
|
||||
ISD = ISD::VSELECT;
|
||||
}
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
|
||||
if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
|
||||
!TLI->isOperationExpand(ISD, LT.second)) {
|
||||
@ -1055,7 +1061,7 @@ public:
|
||||
|
||||
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
unsigned Index) {
|
||||
std::pair<unsigned, MVT> LT =
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
|
||||
|
||||
return LT.first;
|
||||
@ -1069,7 +1075,8 @@ public:
|
||||
// Assume types, such as structs, are expensive.
|
||||
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
|
||||
return 4;
|
||||
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
getTLI()->getTypeLegalizationCost(DL, Src);
|
||||
|
||||
// Assuming that all loads of legal types cost 1.
|
||||
InstructionCost Cost = LT.first;
|
||||
@ -1836,10 +1843,11 @@ public:
|
||||
}
|
||||
|
||||
const TargetLoweringBase *TLI = getTLI();
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
|
||||
SmallVector<unsigned, 2> LegalCost;
|
||||
SmallVector<unsigned, 2> CustomCost;
|
||||
SmallVector<InstructionCost, 2> LegalCost;
|
||||
SmallVector<InstructionCost, 2> CustomCost;
|
||||
for (unsigned ISD : ISDs) {
|
||||
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
|
||||
if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
|
||||
@ -1945,8 +1953,9 @@ public:
|
||||
}
|
||||
|
||||
unsigned getNumberOfParts(Type *Tp) {
|
||||
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
|
||||
return LT.first;
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
getTLI()->getTypeLegalizationCost(DL, Tp);
|
||||
return *LT.first.getValue();
|
||||
}
|
||||
|
||||
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
|
||||
@ -2013,7 +2022,7 @@ public:
|
||||
unsigned NumReduxLevels = Log2_32(NumVecElts);
|
||||
InstructionCost ArithCost = 0;
|
||||
InstructionCost ShuffleCost = 0;
|
||||
std::pair<unsigned, MVT> LT =
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
|
||||
unsigned LongVectorCount = 0;
|
||||
unsigned MVTLen =
|
||||
@ -2069,7 +2078,7 @@ public:
|
||||
}
|
||||
InstructionCost MinMaxCost = 0;
|
||||
InstructionCost ShuffleCost = 0;
|
||||
std::pair<unsigned, MVT> LT =
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
|
||||
unsigned LongVectorCount = 0;
|
||||
unsigned MVTLen =
|
||||
|
@ -1815,8 +1815,8 @@ public:
|
||||
int InstructionOpcodeToISD(unsigned Opcode) const;
|
||||
|
||||
/// Estimate the cost of type-legalization and the legalized type.
|
||||
std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const;
|
||||
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const;
|
||||
|
||||
/// @}
|
||||
|
||||
|
@ -1807,13 +1807,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
|
||||
llvm_unreachable("Unknown instruction type encountered!");
|
||||
}
|
||||
|
||||
std::pair<int, MVT>
|
||||
std::pair<InstructionCost, MVT>
|
||||
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const {
|
||||
LLVMContext &C = Ty->getContext();
|
||||
EVT MTy = getValueType(DL, Ty);
|
||||
|
||||
int Cost = 1;
|
||||
InstructionCost Cost = 1;
|
||||
// We keep legalizing the type until we find a legal kind. We assume that
|
||||
// the only operation that costs anything is the split. After splitting
|
||||
// we need to handle two types.
|
||||
|
@ -545,8 +545,10 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
|
||||
return false;
|
||||
|
||||
// Get the total number of vector elements in the legalized types.
|
||||
unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
|
||||
unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
|
||||
InstructionCost NumDstEls =
|
||||
DstTyL.first * DstTyL.second.getVectorMinNumElements();
|
||||
InstructionCost NumSrcEls =
|
||||
SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
|
||||
|
||||
// Return true if the legalized types have the same number of vector elements
|
||||
// and the destination element type size is twice that of the source type.
|
||||
@ -906,7 +908,7 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
|
||||
if (Index != -1U) {
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
|
||||
|
||||
// This type is legalized to a scalar type.
|
||||
if (!LT.second.isVector())
|
||||
@ -938,7 +940,7 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
|
||||
Opd2PropInfo, Args, CxtI);
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
// If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
|
||||
// add in the widening overhead specified by the sub-target. Since the
|
||||
@ -1536,7 +1538,7 @@ AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||
assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
|
||||
"Both vector needs to be scalable");
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
InstructionCost LegalizationCost = 0;
|
||||
if (LT.first > 1) {
|
||||
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
|
||||
@ -1558,7 +1560,7 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
|
||||
TTI::TargetCostKind CostKind) {
|
||||
assert(!IsPairwise && "Cannot be pair wise to continue");
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
InstructionCost LegalizationCost = 0;
|
||||
if (LT.first > 1) {
|
||||
Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
|
||||
@ -1593,7 +1595,7 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
|
||||
CostKind);
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
MVT MTy = LT.second;
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
assert(ISD && "Invalid opcode");
|
||||
@ -1694,7 +1696,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
{ TTI::SK_Reverse, MVT::nxv4i1, 1 },
|
||||
{ TTI::SK_Reverse, MVT::nxv2i1, 1 },
|
||||
};
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
@ -531,7 +531,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
assert(ISD && "Invalid opcode");
|
||||
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
bool IsFloat = Ty->isFPOrFPVectorTy();
|
||||
// Assume that floating point arithmetic operations cost twice as much as
|
||||
@ -569,7 +569,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
|
||||
}
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
|
||||
// Because we don't have any legal vector operations, but the legal types, we
|
||||
@ -775,7 +775,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
}
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
|
||||
unsigned NElts = LT.second.isVector() ?
|
||||
LT.second.getVectorNumElements() : 1;
|
||||
@ -857,7 +857,7 @@ GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
OrigTy.getScalarSizeInBits() != 16)
|
||||
return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind);
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
return LT.first * getFullRateInstrCost();
|
||||
}
|
||||
|
||||
@ -875,7 +875,7 @@ GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
|
||||
CostKind);
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
return LT.first * getHalfRateInstrCost(CostKind);
|
||||
}
|
||||
|
||||
|
@ -12257,10 +12257,11 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
|
||||
return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
|
||||
}
|
||||
|
||||
std::pair<int, MVT>
|
||||
std::pair<InstructionCost, MVT>
|
||||
SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const {
|
||||
auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> Cost =
|
||||
TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
|
||||
auto Size = DL.getTypeSizeInBits(Ty);
|
||||
// Maximum load or store can handle 8 dwords for scalar and 4 for
|
||||
// vector ALU. Let's assume anything above 8 dwords is expensive
|
||||
|
@ -491,8 +491,8 @@ public:
|
||||
const SIRegisterInfo &TRI,
|
||||
SIMachineFunctionInfo &Info) const;
|
||||
|
||||
std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const;
|
||||
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
@ -559,7 +559,7 @@ InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
{ISD::FP_EXTEND, MVT::v2f32, 2},
|
||||
{ISD::FP_EXTEND, MVT::v4f32, 4}};
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
|
||||
return AdjustCost(LT.first * Entry->Cost);
|
||||
}
|
||||
@ -825,7 +825,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||
// Integer cross-lane moves are more expensive than float, which can
|
||||
// sometimes just be vmovs. Integer involve being passes to GPR registers,
|
||||
// causing more of a delay.
|
||||
std::pair<unsigned, MVT> LT =
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType());
|
||||
return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
|
||||
}
|
||||
@ -851,7 +851,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
// - may require one or more conditional mov (including an IT),
|
||||
// - can't operate directly on immediates,
|
||||
// - require live flags, which we can't copy around easily.
|
||||
int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
|
||||
InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
|
||||
|
||||
// Possible IT instruction for Thumb2, or more for Thumb1.
|
||||
++Cost;
|
||||
@ -928,7 +928,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
return Entry->Cost;
|
||||
}
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
return LT.first;
|
||||
}
|
||||
|
||||
@ -952,7 +953,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
I);
|
||||
}
|
||||
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
int BaseCost = ST->getMVEVectorCostFactor(CostKind);
|
||||
// There are two types - the input that specifies the type of the compare
|
||||
// and the output vXi1 type. Because we don't know how the output will be
|
||||
@ -1156,8 +1158,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
@ -1178,8 +1179,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
@ -1203,7 +1203,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
|
||||
ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
@ -1219,8 +1219,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
|
||||
LT.second))
|
||||
return LT.first * Entry->Cost *
|
||||
@ -1228,7 +1227,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
}
|
||||
|
||||
if (!Mask.empty()) {
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
if (Mask.size() <= LT.second.getVectorNumElements() &&
|
||||
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
|
||||
isVREVMask(Mask, LT.second, 64)))
|
||||
@ -1264,7 +1263,7 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
if (ST->hasNEON()) {
|
||||
const unsigned FunctionCallDivCost = 20;
|
||||
@ -1403,7 +1402,7 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
|
||||
// Unaligned loads/stores are extremely inefficient.
|
||||
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
return LT.first * 4;
|
||||
}
|
||||
|
||||
@ -1504,14 +1503,14 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(
|
||||
|
||||
unsigned NumElems = VTy->getNumElements();
|
||||
unsigned EltSize = VTy->getScalarSizeInBits();
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
|
||||
|
||||
// For now, it is assumed that for the MVE gather instructions the loads are
|
||||
// all effectively serialised. This means the cost is the scalar cost
|
||||
// multiplied by the number of elements being loaded. This is possibly very
|
||||
// conservative, but even so we still end up vectorising loops because the
|
||||
// cost per iteration for many loops is lower than for scalar loops.
|
||||
unsigned VectorCost =
|
||||
InstructionCost VectorCost =
|
||||
NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind);
|
||||
// The scalarization cost should be a lot higher. We use the number of vector
|
||||
// elements plus the scalarization overhead.
|
||||
@ -1598,7 +1597,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
|
||||
CostKind);
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
|
||||
static const CostTblEntry CostTblAdd[]{
|
||||
{ISD::ADD, MVT::v16i8, 1},
|
||||
@ -1619,7 +1618,8 @@ ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
|
||||
EVT ValVT = TLI->getValueType(DL, ValTy);
|
||||
EVT ResVT = TLI->getValueType(DL, ResTy);
|
||||
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
|
||||
(LT.second == MVT::v8i16 &&
|
||||
ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
|
||||
@ -1654,8 +1654,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
break;
|
||||
Type *VT = ICA.getReturnType();
|
||||
|
||||
std::pair<int, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, VT);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
||||
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
|
||||
LT.second == MVT::v16i8) {
|
||||
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
|
||||
@ -1675,7 +1674,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
break;
|
||||
Type *VT = ICA.getReturnType();
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
||||
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
|
||||
LT.second == MVT::v16i8)
|
||||
return LT.first * ST->getMVEVectorCostFactor(CostKind);
|
||||
@ -1686,7 +1685,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
if (!ST->hasMVEFloatOps())
|
||||
break;
|
||||
Type *VT = ICA.getReturnType();
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
||||
if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
|
||||
return LT.first * ST->getMVEVectorCostFactor(CostKind);
|
||||
break;
|
||||
|
@ -143,7 +143,8 @@ InstructionCost
|
||||
HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (ICA.getID() == Intrinsic::bswap) {
|
||||
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
|
||||
return LT.first + 2;
|
||||
}
|
||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
||||
@ -251,7 +252,7 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
|
||||
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
|
||||
if (Opcode == Instruction::FCmp)
|
||||
return LT.first + FloatFactor * getTypeNumElements(ValTy);
|
||||
}
|
||||
@ -271,7 +272,7 @@ InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
|
||||
Opd2PropInfo, Args, CxtI);
|
||||
|
||||
if (Ty->isVectorTy()) {
|
||||
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
|
||||
if (LT.second.isFloatingPoint())
|
||||
return LT.first + FloatFactor * getTypeNumElements(Ty);
|
||||
}
|
||||
@ -288,9 +289,12 @@ InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
|
||||
unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
|
||||
unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
|
||||
|
||||
std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
|
||||
std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
|
||||
unsigned Cost = std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
|
||||
std::pair<InstructionCost, MVT> SrcLT =
|
||||
TLI.getTypeLegalizationCost(DL, SrcTy);
|
||||
std::pair<InstructionCost, MVT> DstLT =
|
||||
TLI.getTypeLegalizationCost(DL, DstTy);
|
||||
InstructionCost Cost =
|
||||
std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
|
||||
// TODO: Allow non-throughput costs that aren't binary.
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return Cost == 0 ? 0 : 1;
|
||||
|
@ -375,7 +375,7 @@ InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
|
||||
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
|
||||
const Instruction *CxtI) {
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
|
||||
|
@ -328,7 +328,8 @@ InstructionCost PPCTTIImpl::getUserCost(const User *U,
|
||||
|
||||
if (U->getType()->isVectorTy()) {
|
||||
// Instructions that need to be split should cost more.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, U->getType());
|
||||
return LT.first * BaseT::getUserCost(U, Operands, CostKind);
|
||||
}
|
||||
|
||||
@ -946,7 +947,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
|
||||
if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
|
||||
return Cost;
|
||||
|
||||
std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
|
||||
std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
|
||||
// If type legalization involves splitting the vector, we don't want to
|
||||
// double the cost at every step - only the last step.
|
||||
if (LT1.first != 1 || !LT1.second.isVector())
|
||||
@ -957,7 +958,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
|
||||
return Cost;
|
||||
|
||||
if (Ty2) {
|
||||
std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
|
||||
std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
|
||||
if (LT2.first != 1 || !LT2.second.isVector())
|
||||
return Cost;
|
||||
}
|
||||
@ -988,7 +989,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
|
||||
ArrayRef<int> Mask, int Index,
|
||||
Type *SubTp) {
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
||||
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
|
||||
// (at least in the sense that there need only be one non-loop-invariant
|
||||
@ -1113,7 +1114,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
||||
CostKind);
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
||||
"Invalid Opcode");
|
||||
|
||||
@ -1198,7 +1199,7 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
|
||||
"Expect a vector type for interleaved memory op");
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
||||
|
||||
// Firstly, the cost of load/store operation.
|
||||
InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
|
||||
|
@ -185,7 +185,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
|
||||
Op2Info, Opd1PropInfo,
|
||||
Opd2PropInfo, Args, CxtI);
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
assert(ISD && "Invalid opcode");
|
||||
@ -966,7 +966,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
VectorType *SubTp) {
|
||||
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
|
||||
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
|
||||
|
||||
Kind = improveShuffleKindFromMask(Kind, Mask);
|
||||
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
|
||||
@ -985,7 +985,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
int NumElts = LT.second.getVectorNumElements();
|
||||
if ((Index % NumElts) == 0)
|
||||
return 0;
|
||||
std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
|
||||
std::pair<InstructionCost, MVT> SubLT =
|
||||
TLI->getTypeLegalizationCost(DL, SubTp);
|
||||
if (SubLT.second.isVector()) {
|
||||
int NumSubElts = SubLT.second.getVectorNumElements();
|
||||
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
|
||||
@ -1031,7 +1032,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
// isn't free, because we need to preserve the rest of the wide vector.
|
||||
if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
|
||||
int NumElts = LT.second.getVectorNumElements();
|
||||
std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
|
||||
std::pair<InstructionCost, MVT> SubLT =
|
||||
TLI->getTypeLegalizationCost(DL, SubTp);
|
||||
if (SubLT.second.isVector()) {
|
||||
int NumSubElts = SubLT.second.getVectorNumElements();
|
||||
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
|
||||
@ -1091,12 +1093,12 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
// Number of source vectors after legalization:
|
||||
unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
|
||||
// Number of destination vectors after legalization:
|
||||
unsigned NumOfDests = LT.first;
|
||||
InstructionCost NumOfDests = LT.first;
|
||||
|
||||
auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
|
||||
unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
|
||||
InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
|
||||
return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy,
|
||||
None, 0, nullptr);
|
||||
}
|
||||
@ -1107,8 +1109,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
// For 2-input shuffles, we must account for splitting the 2 inputs into many.
|
||||
if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
|
||||
// We assume that source and destination have the same vector type.
|
||||
int NumOfDests = LT.first;
|
||||
int NumOfShufflesPerDest = LT.first * 2 - 1;
|
||||
InstructionCost NumOfDests = LT.first;
|
||||
InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1;
|
||||
LT.first = NumOfDests * NumOfShufflesPerDest;
|
||||
}
|
||||
|
||||
@ -2024,8 +2026,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
{ ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD
|
||||
};
|
||||
|
||||
std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
|
||||
std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<InstructionCost, MVT> LTDest =
|
||||
TLI->getTypeLegalizationCost(DL, Dst);
|
||||
|
||||
if (ST->hasSSE2() && !ST->hasAVX()) {
|
||||
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
|
||||
@ -2114,7 +2117,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
I);
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
|
||||
MVT MTy = LT.second;
|
||||
|
||||
@ -2804,7 +2807,7 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
|
||||
if (ISD != ISD::DELETED_NODE) {
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
|
||||
MVT MTy = LT.second;
|
||||
|
||||
// Attempt to lookup cost.
|
||||
@ -2824,7 +2827,8 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
return LT.first * Cost;
|
||||
}
|
||||
|
||||
auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost,
|
||||
auto adjustTableCost = [](const CostTblEntry &Entry,
|
||||
InstructionCost LegalizationCost,
|
||||
FastMathFlags FMF) {
|
||||
// If there are no NANs to deal with, then these are reduced to a
|
||||
// single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we
|
||||
@ -3006,7 +3010,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
|
||||
if (ISD != ISD::DELETED_NODE) {
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
std::pair<InstructionCost, MVT> LT =
|
||||
TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
MVT MTy = LT.second;
|
||||
|
||||
// Attempt to lookup cost.
|
||||
@ -3045,7 +3050,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
if (Index != -1U && (Opcode == Instruction::ExtractElement ||
|
||||
Opcode == Instruction::InsertElement)) {
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
|
||||
|
||||
// This type is legalized to a scalar type.
|
||||
if (!LT.second.isVector())
|
||||
@ -3133,7 +3138,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
||||
// For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
|
||||
// cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
|
||||
if (Insert) {
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
MVT MScalarTy = LT.second.getScalarType();
|
||||
|
||||
if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
|
||||
@ -3157,8 +3162,10 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
||||
// Case#2: inserting into 5th index needs extracti128 + vpinsrd +
|
||||
// inserti128.
|
||||
// Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
|
||||
unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * LT.first;
|
||||
unsigned NumElts = LT.second.getVectorNumElements() * LT.first;
|
||||
const int CostValue = *LT.first.getValue();
|
||||
assert(CostValue >= 0 && "Negative cost!");
|
||||
unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue;
|
||||
unsigned NumElts = LT.second.getVectorNumElements() * CostValue;
|
||||
APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts);
|
||||
unsigned Scale = NumElts / Num128Lanes;
|
||||
// We iterate each 128-lane, and check if we need a
|
||||
@ -3249,7 +3256,8 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
Cost +=
|
||||
getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind);
|
||||
|
||||
std::pair<int, MVT> LST = TLI->getTypeLegalizationCost(DL, SubTy);
|
||||
std::pair<InstructionCost, MVT> LST =
|
||||
TLI->getTypeLegalizationCost(DL, SubTy);
|
||||
if (!LST.second.isVector()) {
|
||||
APInt DemandedElts =
|
||||
APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor);
|
||||
@ -3267,10 +3275,10 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
}
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
|
||||
// Each load/store unit costs 1.
|
||||
int Cost = LT.first * 1;
|
||||
InstructionCost Cost = LT.first * 1;
|
||||
|
||||
// This isn't exactly right. We're using slow unaligned 32-byte accesses as a
|
||||
// proxy for a double-pumped AVX memory interface such as on Sandybridge.
|
||||
@ -3316,7 +3324,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
|
||||
}
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
|
||||
auto VT = TLI->getValueType(DL, SrcVTy);
|
||||
InstructionCost Cost = 0;
|
||||
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
|
||||
@ -3431,7 +3439,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
return Entry->Cost;
|
||||
}
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
|
||||
MVT MTy = LT.second;
|
||||
|
||||
@ -3621,7 +3629,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
|
||||
InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
|
||||
bool IsUnsigned) {
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
MVT MTy = LT.second;
|
||||
|
||||
@ -3756,7 +3764,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
|
||||
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
|
||||
CostKind);
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
|
||||
MVT MTy = LT.second;
|
||||
|
||||
@ -4184,9 +4192,12 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
|
||||
|
||||
auto *IndexVTy = FixedVectorType::get(
|
||||
IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
|
||||
std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
|
||||
std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
|
||||
int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
|
||||
std::pair<InstructionCost, MVT> IdxsLT =
|
||||
TLI->getTypeLegalizationCost(DL, IndexVTy);
|
||||
std::pair<InstructionCost, MVT> SrcLT =
|
||||
TLI->getTypeLegalizationCost(DL, SrcVTy);
|
||||
InstructionCost::CostType SplitFactor =
|
||||
*std::max(IdxsLT.first, SrcLT.first).getValue();
|
||||
if (SplitFactor > 1) {
|
||||
// Handle splitting of vector of pointers
|
||||
auto *SplitSrcTy =
|
||||
@ -4722,7 +4733,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
||||
Indices.size() ? Indices.size() : Factor;
|
||||
auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
|
||||
VecTy->getNumElements() / Factor);
|
||||
unsigned NumOfResults =
|
||||
InstructionCost NumOfResults =
|
||||
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
|
||||
NumOfLoadsInInterleaveGrp;
|
||||
|
||||
@ -4738,7 +4749,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
||||
// The SK_MergeTwoSrc shuffle clobbers one of src operands.
|
||||
// When we have more than one destination, we need additional instructions
|
||||
// to keep sources.
|
||||
unsigned NumOfMoves = 0;
|
||||
InstructionCost NumOfMoves = 0;
|
||||
if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
|
||||
NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user