mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[SampleFDO] Another fix to prevent repeated indirect call promotion in
sample loader pass. In https://reviews.llvm.org/rG5fb65c02ca5e91e7e1a00e0efdb8edc899f3e4b9, to prevent repeated indirect call promotion for the same indirect call and the same target, we used zero-count value profile to indicate an indirect call has been promoted for a certain target. We removed PromotedInsns cache in the same patch. However, there was a problem in that patch described below, and that problem led me to add PromotedInsns back as a mitigation in https://reviews.llvm.org/rG4ffad1fb489f691825d6c7d78e1626de142f26cf. When we get value profile from metadata by calling getValueProfDataFromInst, we need to specify the maximum possible number of values we expect to read. We uses MaxNumPromotions in the last patch so the maximum number of value information extracted from metadata is MaxNumPromotions. If we have many values including zero-count values when we write the metadata, some of them will be dropped when we read them because we only read MaxNumPromotions values. It will allow repeated indirect call promotion again. We need to make sure if there are values indicating promoted targets, those values need to be saved in metadata with higher priority than other values. The patch fixed that problem. We change to use -1 to represent the count of a promoted target instead of 0 so it is easier to sort the values. When we prepare to update the metadata in updateIDTMetaData, we will sort the values in the descending count order and extract only MaxNumPromotions values to write into metadata. Since -1 is the max uint64_t number, if we have equal to or less than MaxNumPromotions of -1 count values, they will all be kept in metadata. If we have more than MaxNumPromotions of -1 count values, we will only save MaxNumPromotions such values maximally. In such case, we have logic in place in doesHistoryAllowICP to guarantee no more promotion in sample loader pass will happen for the indirect call, because it has been promoted enough. With this change, now we can remove PromotedInsns without problem. Differential Revision: https://reviews.llvm.org/D97350
This commit is contained in:
parent
38673999a0
commit
ad2a6f2861
@ -253,6 +253,10 @@ void annotateValueSite(Module &M, Instruction &Inst,
|
|||||||
ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
|
ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
|
||||||
InstrProfValueKind ValueKind, uint32_t MaxMDCount);
|
InstrProfValueKind ValueKind, uint32_t MaxMDCount);
|
||||||
|
|
||||||
|
/// Magic number in the value profile data showing a target has been
|
||||||
|
/// promoted for the instruction and shouldn't be promoted again.
|
||||||
|
const uint64_t NOMORE_ICP_MAGICNUM = -1;
|
||||||
|
|
||||||
/// Extract the value profile data from \p Inst which is annotated with
|
/// Extract the value profile data from \p Inst which is annotated with
|
||||||
/// value profile meta data. Return false if there is no value data annotated,
|
/// value profile meta data. Return false if there is no value data annotated,
|
||||||
/// otherwise return true.
|
/// otherwise return true.
|
||||||
@ -261,7 +265,7 @@ bool getValueProfDataFromInst(const Instruction &Inst,
|
|||||||
uint32_t MaxNumValueData,
|
uint32_t MaxNumValueData,
|
||||||
InstrProfValueData ValueData[],
|
InstrProfValueData ValueData[],
|
||||||
uint32_t &ActualNumValueData, uint64_t &TotalC,
|
uint32_t &ActualNumValueData, uint64_t &TotalC,
|
||||||
bool GetZeroCntValue = false);
|
bool GetNoICPValue = false);
|
||||||
|
|
||||||
inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
|
inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ static cl::opt<unsigned>
|
|||||||
|
|
||||||
// Set the maximum number of targets to promote for a single indirect-call
|
// Set the maximum number of targets to promote for a single indirect-call
|
||||||
// callsite.
|
// callsite.
|
||||||
cl::opt<unsigned>
|
static cl::opt<unsigned>
|
||||||
MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore,
|
MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore,
|
||||||
cl::desc("Max number of promotions for a single indirect "
|
cl::desc("Max number of promotions for a single indirect "
|
||||||
"call callsite"));
|
"call callsite"));
|
||||||
|
@ -988,7 +988,7 @@ bool getValueProfDataFromInst(const Instruction &Inst,
|
|||||||
uint32_t MaxNumValueData,
|
uint32_t MaxNumValueData,
|
||||||
InstrProfValueData ValueData[],
|
InstrProfValueData ValueData[],
|
||||||
uint32_t &ActualNumValueData, uint64_t &TotalC,
|
uint32_t &ActualNumValueData, uint64_t &TotalC,
|
||||||
bool GetZeroCntValue) {
|
bool GetNoICPValue) {
|
||||||
MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof);
|
MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof);
|
||||||
if (!MD)
|
if (!MD)
|
||||||
return false;
|
return false;
|
||||||
@ -1015,7 +1015,7 @@ bool getValueProfDataFromInst(const Instruction &Inst,
|
|||||||
|
|
||||||
// Get total count
|
// Get total count
|
||||||
ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
|
ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
|
||||||
if (!TotalCInt && !GetZeroCntValue)
|
if (!TotalCInt)
|
||||||
return false;
|
return false;
|
||||||
TotalC = TotalCInt->getZExtValue();
|
TotalC = TotalCInt->getZExtValue();
|
||||||
|
|
||||||
@ -1027,10 +1027,13 @@ bool getValueProfDataFromInst(const Instruction &Inst,
|
|||||||
ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I));
|
ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I));
|
||||||
ConstantInt *Count =
|
ConstantInt *Count =
|
||||||
mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1));
|
mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1));
|
||||||
if (!Value || (!Count && !GetZeroCntValue))
|
if (!Value || !Count)
|
||||||
return false;
|
return false;
|
||||||
|
uint64_t CntValue = Count->getZExtValue();
|
||||||
|
if (!GetNoICPValue && (CntValue == NOMORE_ICP_MAGICNUM))
|
||||||
|
continue;
|
||||||
ValueData[ActualNumValueData].Value = Value->getZExtValue();
|
ValueData[ActualNumValueData].Value = Value->getZExtValue();
|
||||||
ValueData[ActualNumValueData].Count = Count->getZExtValue();
|
ValueData[ActualNumValueData].Count = CntValue;
|
||||||
ActualNumValueData++;
|
ActualNumValueData++;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -218,7 +218,11 @@ static cl::opt<std::string> ProfileInlineReplayFile(
|
|||||||
"by inlining from sample profile loader."),
|
"by inlining from sample profile loader."),
|
||||||
cl::Hidden);
|
cl::Hidden);
|
||||||
|
|
||||||
extern cl::opt<unsigned> MaxNumPromotions;
|
static cl::opt<unsigned>
|
||||||
|
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
|
||||||
|
cl::ZeroOrMore,
|
||||||
|
cl::desc("Max number of promotions for a single indirect "
|
||||||
|
"call callsite in sample profile loader"));
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -364,8 +368,7 @@ protected:
|
|||||||
// Attempt to promote indirect call and also inline the promoted call
|
// Attempt to promote indirect call and also inline the promoted call
|
||||||
bool tryPromoteAndInlineCandidate(
|
bool tryPromoteAndInlineCandidate(
|
||||||
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
|
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
|
||||||
uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
|
uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
|
||||||
SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
|
|
||||||
bool inlineHotFunctions(Function &F,
|
bool inlineHotFunctions(Function &F,
|
||||||
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
|
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
|
||||||
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
|
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
|
||||||
@ -696,9 +699,14 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
|
|||||||
return it.first->second;
|
return it.first->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If the profile count for the promotion candidate \p Candidate is 0,
|
/// Check whether the indirect call promotion history of \p Inst allows
|
||||||
/// it means \p Candidate has already been promoted for \p Inst.
|
/// the promotion for \p Candidate.
|
||||||
static bool isPromotedBefore(const Instruction &Inst, StringRef Candidate) {
|
/// If the profile count for the promotion candidate \p Candidate is
|
||||||
|
/// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
|
||||||
|
/// for \p Inst. If we already have at least MaxNumPromotions
|
||||||
|
/// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
|
||||||
|
/// cannot promote for \p Inst anymore.
|
||||||
|
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
|
||||||
uint32_t NumVals = 0;
|
uint32_t NumVals = 0;
|
||||||
uint64_t TotalCount = 0;
|
uint64_t TotalCount = 0;
|
||||||
std::unique_ptr<InstrProfValueData[]> ValueData =
|
std::unique_ptr<InstrProfValueData[]> ValueData =
|
||||||
@ -706,33 +714,55 @@ static bool isPromotedBefore(const Instruction &Inst, StringRef Candidate) {
|
|||||||
bool Valid =
|
bool Valid =
|
||||||
getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
|
getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
|
||||||
ValueData.get(), NumVals, TotalCount, true);
|
ValueData.get(), NumVals, TotalCount, true);
|
||||||
if (Valid) {
|
// No valid value profile so no promoted targets have been recorded
|
||||||
for (uint32_t I = 0; I < NumVals; I++) {
|
// before. Ok to do ICP.
|
||||||
// If the promotion candidate has 0 count in the metadata, it
|
if (!Valid)
|
||||||
// means the candidate has been promoted for this indirect call.
|
return true;
|
||||||
if (ValueData[I].Value == Function::getGUID(Candidate))
|
|
||||||
return ValueData[I].Count == 0;
|
unsigned NumPromoted = 0;
|
||||||
}
|
for (uint32_t I = 0; I < NumVals; I++) {
|
||||||
|
if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
|
||||||
|
// metadata, it means the candidate has been promoted for this
|
||||||
|
// indirect call.
|
||||||
|
if (ValueData[I].Value == Function::getGUID(Candidate))
|
||||||
|
return false;
|
||||||
|
NumPromoted++;
|
||||||
|
// If already have MaxNumPromotions promotion, don't do it anymore.
|
||||||
|
if (NumPromoted == MaxNumPromotions)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Update indirect call target profile metadata for \p Inst. If \p Total
|
/// Update indirect call target profile metadata for \p Inst.
|
||||||
/// is given, set TotalCount of call targets counts to \p Total, otherwise
|
/// Usually \p Sum is the sum of counts of all the targets for \p Inst.
|
||||||
/// keep the original value in metadata.
|
/// If it is 0, it means updateIDTMetaData is used to mark a
|
||||||
|
/// certain target to be promoted already. If it is not zero,
|
||||||
|
/// we expect to use it to update the total count in the value profile.
|
||||||
static void
|
static void
|
||||||
updateIDTMetaData(Instruction &Inst,
|
updateIDTMetaData(Instruction &Inst,
|
||||||
const SmallVectorImpl<InstrProfValueData> &CallTargets,
|
const SmallVectorImpl<InstrProfValueData> &CallTargets,
|
||||||
uint64_t Total = 0) {
|
uint64_t Sum) {
|
||||||
DenseMap<uint64_t, uint64_t> ValueCountMap;
|
assert((Sum != 0 || (CallTargets.size() == 1 &&
|
||||||
|
CallTargets[0].Count == NOMORE_ICP_MAGICNUM)) &&
|
||||||
|
"If sum is 0, assume only one element in CallTargets with count "
|
||||||
|
"being NOMORE_ICP_MAGICNUM");
|
||||||
|
|
||||||
uint32_t NumVals = 0;
|
uint32_t NumVals = 0;
|
||||||
uint64_t TotalCount = 0;
|
// OldSum is the existing total count in the value profile data.
|
||||||
|
// It will be replaced by Sum if Sum is not 0.
|
||||||
|
uint64_t OldSum = 0;
|
||||||
std::unique_ptr<InstrProfValueData[]> ValueData =
|
std::unique_ptr<InstrProfValueData[]> ValueData =
|
||||||
std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
|
std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
|
||||||
bool Valid =
|
bool Valid =
|
||||||
getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
|
getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
|
||||||
ValueData.get(), NumVals, TotalCount, true);
|
ValueData.get(), NumVals, OldSum, true);
|
||||||
|
|
||||||
|
DenseMap<uint64_t, uint64_t> ValueCountMap;
|
||||||
|
// Initialize ValueCountMap with existing value profile data.
|
||||||
if (Valid) {
|
if (Valid) {
|
||||||
for (uint32_t I = 0; I < NumVals; I++)
|
for (uint32_t I = 0; I < NumVals; I++)
|
||||||
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
|
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
|
||||||
@ -742,13 +772,24 @@ updateIDTMetaData(Instruction &Inst,
|
|||||||
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
|
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
|
||||||
if (Pair.second)
|
if (Pair.second)
|
||||||
continue;
|
continue;
|
||||||
// Update existing profile count of the call target if it is not 0.
|
// Whenever the count is NOMORE_ICP_MAGICNUM for a value, keep it
|
||||||
// If it is 0, the call target has been promoted so keep it as 0.
|
// in the ValueCountMap. If both the count in CallTargets and the
|
||||||
if (Pair.first->second != 0)
|
// count in ValueCountMap is not NOMORE_ICP_MAGICNUM, keep the
|
||||||
|
// count in CallTargets.
|
||||||
|
if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
|
||||||
|
Data.Count == NOMORE_ICP_MAGICNUM) {
|
||||||
|
OldSum -= Pair.first->second;
|
||||||
|
Pair.first->second = NOMORE_ICP_MAGICNUM;
|
||||||
|
} else if (Pair.first->second == NOMORE_ICP_MAGICNUM &&
|
||||||
|
Data.Count != NOMORE_ICP_MAGICNUM) {
|
||||||
|
assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
|
||||||
|
Sum -= Data.Count;
|
||||||
|
} else if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
|
||||||
|
Data.Count != NOMORE_ICP_MAGICNUM) {
|
||||||
|
// Sum will be used in this case. Although the existing count
|
||||||
|
// for the current value in value profile will be overriden,
|
||||||
|
// no need to update OldSum.
|
||||||
Pair.first->second = Data.Count;
|
Pair.first->second = Data.Count;
|
||||||
else {
|
|
||||||
assert(Total >= Data.Count && "Total should be >= Data.Count");
|
|
||||||
Total -= Data.Count;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -757,15 +798,19 @@ updateIDTMetaData(Instruction &Inst,
|
|||||||
NewCallTargets.emplace_back(
|
NewCallTargets.emplace_back(
|
||||||
InstrProfValueData{ValueCount.first, ValueCount.second});
|
InstrProfValueData{ValueCount.first, ValueCount.second});
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::sort(NewCallTargets,
|
llvm::sort(NewCallTargets,
|
||||||
[](const InstrProfValueData &L, const InstrProfValueData &R) {
|
[](const InstrProfValueData &L, const InstrProfValueData &R) {
|
||||||
if (L.Count != R.Count)
|
if (L.Count != R.Count)
|
||||||
return L.Count > R.Count;
|
return L.Count > R.Count;
|
||||||
return L.Value > R.Value;
|
return L.Value > R.Value;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
uint32_t MaxMDCount =
|
||||||
|
std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
|
||||||
annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
|
annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
|
||||||
NewCallTargets, Total ? Total : TotalCount,
|
NewCallTargets, Sum ? Sum : OldSum, IPVK_IndirectCallTarget,
|
||||||
IPVK_IndirectCallTarget, NewCallTargets.size());
|
MaxMDCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Attempt to promote indirect call and also inline the promoted call.
|
/// Attempt to promote indirect call and also inline the promoted call.
|
||||||
@ -773,12 +818,10 @@ updateIDTMetaData(Instruction &Inst,
|
|||||||
/// \param F Caller function.
|
/// \param F Caller function.
|
||||||
/// \param Candidate ICP and inline candidate.
|
/// \param Candidate ICP and inline candidate.
|
||||||
/// \param Sum Sum of target counts for indirect call.
|
/// \param Sum Sum of target counts for indirect call.
|
||||||
/// \param PromotedInsns Map to keep track of indirect call already processed.
|
|
||||||
/// \param InlinedCallSite Output vector for new call sites exposed after
|
/// \param InlinedCallSite Output vector for new call sites exposed after
|
||||||
/// inlining.
|
/// inlining.
|
||||||
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
|
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
|
||||||
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
|
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
|
||||||
DenseSet<Instruction *> &PromotedInsns,
|
|
||||||
SmallVector<CallBase *, 8> *InlinedCallSite) {
|
SmallVector<CallBase *, 8> *InlinedCallSite) {
|
||||||
auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
|
auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
|
||||||
auto R = SymbolMap.find(CalleeFunctionName);
|
auto R = SymbolMap.find(CalleeFunctionName);
|
||||||
@ -786,7 +829,7 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
auto &CI = *Candidate.CallInstr;
|
auto &CI = *Candidate.CallInstr;
|
||||||
if (isPromotedBefore(CI, R->getValue()->getName()))
|
if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const char *Reason = "Callee function not available";
|
const char *Reason = "Callee function not available";
|
||||||
@ -799,11 +842,11 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
|
|||||||
if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
|
if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
|
||||||
R->getValue()->hasFnAttribute("use-sample-profile") &&
|
R->getValue()->hasFnAttribute("use-sample-profile") &&
|
||||||
R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
|
R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
|
||||||
// For promoted target, save 0 count in the value profile metadata so
|
// For promoted target, set its value with NOMORE_ICP_MAGICNUM count
|
||||||
// the target won't be promoted again.
|
// in the value profile metadata so the target won't be promoted again.
|
||||||
SmallVector<InstrProfValueData, 1> SortedCallTargets = {
|
SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
|
||||||
InstrProfValueData{Function::getGUID(R->getValue()->getName()), 0}};
|
Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
|
||||||
updateIDTMetaData(CI, SortedCallTargets);
|
updateIDTMetaData(CI, SortedCallTargets, 0);
|
||||||
|
|
||||||
auto *DI = &pgo::promoteIndirectCall(
|
auto *DI = &pgo::promoteIndirectCall(
|
||||||
CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
|
CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
|
||||||
@ -817,7 +860,6 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
|
|||||||
// be prorated so that the it will reflect the real callsite counts.
|
// be prorated so that the it will reflect the real callsite counts.
|
||||||
setProbeDistributionFactor(CI, Candidate.CallsiteDistribution * Sum /
|
setProbeDistributionFactor(CI, Candidate.CallsiteDistribution * Sum /
|
||||||
SumOrigin);
|
SumOrigin);
|
||||||
PromotedInsns.insert(Candidate.CallInstr);
|
|
||||||
Candidate.CallInstr = DI;
|
Candidate.CallInstr = DI;
|
||||||
if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
|
if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
|
||||||
bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
|
bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
|
||||||
@ -890,8 +932,6 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
|
|||||||
/// \returns True if there is any inline happened.
|
/// \returns True if there is any inline happened.
|
||||||
bool SampleProfileLoader::inlineHotFunctions(
|
bool SampleProfileLoader::inlineHotFunctions(
|
||||||
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
|
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
|
||||||
DenseSet<Instruction *> PromotedInsns;
|
|
||||||
|
|
||||||
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
|
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
|
||||||
// Profile symbol list is ignored when profile-sample-accurate is on.
|
// Profile symbol list is ignored when profile-sample-accurate is on.
|
||||||
assert((!ProfAccForSymsInList ||
|
assert((!ProfAccForSymsInList ||
|
||||||
@ -945,8 +985,6 @@ bool SampleProfileLoader::inlineHotFunctions(
|
|||||||
if (CalledFunction == &F)
|
if (CalledFunction == &F)
|
||||||
continue;
|
continue;
|
||||||
if (I->isIndirectCall()) {
|
if (I->isIndirectCall()) {
|
||||||
if (PromotedInsns.count(I))
|
|
||||||
continue;
|
|
||||||
uint64_t Sum;
|
uint64_t Sum;
|
||||||
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
|
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
|
||||||
uint64_t SumOrigin = Sum;
|
uint64_t SumOrigin = Sum;
|
||||||
@ -959,8 +997,7 @@ bool SampleProfileLoader::inlineHotFunctions(
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
Candidate = {I, FS, FS->getEntrySamples(), 1.0};
|
Candidate = {I, FS, FS->getEntrySamples(), 1.0};
|
||||||
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
|
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
|
||||||
PromotedInsns)) {
|
|
||||||
LocalNotInlinedCallSites.erase(I);
|
LocalNotInlinedCallSites.erase(I);
|
||||||
LocalChanged = true;
|
LocalChanged = true;
|
||||||
}
|
}
|
||||||
@ -1169,7 +1206,6 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
|
|||||||
|
|
||||||
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
|
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
|
||||||
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
|
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
|
||||||
DenseSet<Instruction *> PromotedInsns;
|
|
||||||
assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
|
assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
|
||||||
|
|
||||||
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
|
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
|
||||||
@ -1218,8 +1254,6 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
|
|||||||
if (CalledFunction == &F)
|
if (CalledFunction == &F)
|
||||||
continue;
|
continue;
|
||||||
if (I->isIndirectCall()) {
|
if (I->isIndirectCall()) {
|
||||||
if (PromotedInsns.count(I))
|
|
||||||
continue;
|
|
||||||
uint64_t Sum;
|
uint64_t Sum;
|
||||||
auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
|
auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
|
||||||
uint64_t SumOrigin = Sum;
|
uint64_t SumOrigin = Sum;
|
||||||
@ -1254,7 +1288,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
|
|||||||
Candidate = {I, FS, EntryCountDistributed,
|
Candidate = {I, FS, EntryCountDistributed,
|
||||||
Candidate.CallsiteDistribution};
|
Candidate.CallsiteDistribution};
|
||||||
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
|
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
|
||||||
PromotedInsns, &InlinedCallSites)) {
|
&InlinedCallSites)) {
|
||||||
for (auto *CB : InlinedCallSites) {
|
for (auto *CB : InlinedCallSites) {
|
||||||
if (getInlineCandidate(&NewCandidate, CB))
|
if (getInlineCandidate(&NewCandidate, CB))
|
||||||
CQueue.emplace(NewCandidate);
|
CQueue.emplace(NewCandidate);
|
||||||
@ -1351,6 +1385,8 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
|
|||||||
Sum += NameFS.second.getEntrySamples();
|
Sum += NameFS.second.getEntrySamples();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!Sum)
|
||||||
|
continue;
|
||||||
updateIDTMetaData(I, SortedCallTargets, Sum);
|
updateIDTMetaData(I, SortedCallTargets, Sum);
|
||||||
} else if (!isa<IntrinsicInst>(&I)) {
|
} else if (!isa<IntrinsicInst>(&I)) {
|
||||||
I.setMetadata(LLVMContext::MD_prof,
|
I.setMetadata(LLVMContext::MD_prof,
|
||||||
|
16
test/Transforms/SampleProfile/Inputs/norepeated-icp-2.prof
Normal file
16
test/Transforms/SampleProfile/Inputs/norepeated-icp-2.prof
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
_Z3goov:5860:1
|
||||||
|
1: 5279 _Z3foov:2000 _Z3barv:1000
|
||||||
|
2: 5279 _Z3foov:2000 _Z3barv:1000
|
||||||
|
3: 5279 _Z3foov:2000 _Z3barv:1000
|
||||||
|
1: _Z3hoov:5860
|
||||||
|
1: 5000
|
||||||
|
1: _Z3moov:5860
|
||||||
|
1: 5000
|
||||||
|
2: _Z3hoov:5860
|
||||||
|
1: 5000
|
||||||
|
2: _Z3moov:5860
|
||||||
|
1: 5000
|
||||||
|
3: _Z3hoov:5860
|
||||||
|
1: 5000
|
||||||
|
3: _Z3moov:5860
|
||||||
|
1: 5000
|
@ -202,7 +202,7 @@ attributes #0 = {"use-sample-profile"}
|
|||||||
; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}
|
; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}
|
||||||
; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000}
|
; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000}
|
||||||
; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000}
|
; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000}
|
||||||
; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000, i64 7476224446746900038, i64 0, i64 925324185419832389, i64 0}
|
; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 7476224446746900038, i64 -1, i64 925324185419832389, i64 -1, i64 -6391416044382067764, i64 1000}
|
||||||
; CHECK: ![[BR3]] = !{!"branch_weights", i32 1, i32 0}
|
; CHECK: ![[BR3]] = !{!"branch_weights", i32 1, i32 0}
|
||||||
!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0)
|
!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0)
|
||||||
!7 = !DILocation(line: 7, scope: !6)
|
!7 = !DILocation(line: 7, scope: !6)
|
||||||
|
124
test/Transforms/SampleProfile/norepeated-icp-2.ll
Normal file
124
test/Transforms/SampleProfile/norepeated-icp-2.ll
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
; RUN: opt < %s -sample-profile-icp-max-prom=2 -passes=sample-profile -sample-profile-file=%S/Inputs/norepeated-icp-2.prof -S | FileCheck %s --check-prefix=MAX2
|
||||||
|
; RUN: opt < %s -sample-profile-icp-max-prom=4 -passes=sample-profile -sample-profile-file=%S/Inputs/norepeated-icp-2.prof -S | FileCheck %s --check-prefix=MAX4
|
||||||
|
|
||||||
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@.str = private unnamed_addr constant [5 x i8] c"moo\0A\00", align 1
|
||||||
|
@p = dso_local global void ()* null, align 8
|
||||||
|
@cond = dso_local global i8 0, align 1
|
||||||
|
@str = private unnamed_addr constant [4 x i8] c"moo\00", align 1
|
||||||
|
|
||||||
|
; Function Attrs: uwtable mustprogress
|
||||||
|
define dso_local void @_Z3moov() #0 !dbg !7 {
|
||||||
|
entry:
|
||||||
|
%puts = call i32 @puts(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @str, i64 0, i64 0)), !dbg !9
|
||||||
|
ret void, !dbg !10
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nofree nounwind
|
||||||
|
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1
|
||||||
|
|
||||||
|
; Function Attrs: uwtable mustprogress
|
||||||
|
define dso_local void @_Z3hoov() #0 !dbg !11 {
|
||||||
|
entry:
|
||||||
|
%0 = load volatile i8, i8* @cond, align 1, !dbg !12, !range !17
|
||||||
|
%tobool.not = icmp eq i8 %0, 0, !dbg !12
|
||||||
|
br i1 %tobool.not, label %if.end, label %if.then, !dbg !12
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
call void @_Z10hoo_calleev(), !dbg !18
|
||||||
|
br label %if.end, !dbg !18
|
||||||
|
|
||||||
|
if.end: ; preds = %if.then, %entry
|
||||||
|
store void ()* @_Z3moov, void ()** @p, align 8, !dbg !19
|
||||||
|
ret void, !dbg !22
|
||||||
|
}
|
||||||
|
|
||||||
|
declare !dbg !23 dso_local void @_Z10hoo_calleev() #2
|
||||||
|
|
||||||
|
; MAX2-LABEL: @_Z3goov(
|
||||||
|
; MAX2: icmp eq void ()* {{.*}} @_Z3hoov
|
||||||
|
; MAX2: call void %t0(), {{.*}} !prof ![[PROF_ID1:[0-9]+]]
|
||||||
|
; MAX2-NOT: icmp eq void ()* {{.*}} @_Z3hoov
|
||||||
|
; MAX2-NOT: icmp eq void ()* {{.*}} @_Z3moov
|
||||||
|
; MAX2: call void %t1(), {{.*}} !prof ![[PROF_ID2:[0-9]+]]
|
||||||
|
; MAX2-NOT: icmp eq void ()* {{.*}} @_Z3hoov
|
||||||
|
; MAX2-NOT: icmp eq void ()* {{.*}} @_Z3moov
|
||||||
|
; MAX2: call void %t2(), {{.*}} !prof ![[PROF_ID2:[0-9]+]]
|
||||||
|
; MAX2: ret void
|
||||||
|
; MAX4-LABEL: @_Z3goov(
|
||||||
|
; MAX4: icmp eq void ()* {{.*}} @_Z3hoov
|
||||||
|
; MAX4: icmp eq void ()* {{.*}} @_Z3moov
|
||||||
|
; MAX4: call void %t0(), {{.*}} !prof ![[PROF_ID3:[0-9]+]]
|
||||||
|
; MAX4: icmp eq void ()* {{.*}} @_Z3hoov
|
||||||
|
; MAX4: icmp eq void ()* {{.*}} @_Z3moov
|
||||||
|
; MAX4: call void %t1(), {{.*}} !prof ![[PROF_ID4:[0-9]+]]
|
||||||
|
; MAX4-NOT: icmp eq void ()* {{.*}} @_Z3hoov
|
||||||
|
; MAX4-NOT: icmp eq void ()* {{.*}} @_Z3moov
|
||||||
|
; MAX4: call void %t2(), {{.*}} !prof ![[PROF_ID5:[0-9]+]]
|
||||||
|
; MAX4: ret void
|
||||||
|
|
||||||
|
; Function Attrs: uwtable mustprogress
|
||||||
|
define dso_local void @_Z3goov() #0 !dbg !24 {
|
||||||
|
entry:
|
||||||
|
%t0 = load void ()*, void ()** @p, align 8, !dbg !25
|
||||||
|
call void %t0(), !dbg !26, !prof !30
|
||||||
|
%t1 = load void ()*, void ()** @p, align 8, !dbg !25
|
||||||
|
call void %t1(), !dbg !28, !prof !31
|
||||||
|
%t2 = load void ()*, void ()** @p, align 8, !dbg !25
|
||||||
|
call void %t2(), !dbg !29, !prof !32
|
||||||
|
ret void, !dbg !27
|
||||||
|
}
|
||||||
|
|
||||||
|
; MAX2: ![[PROF_ID1]] = !{!"VP", i32 0, i64 13000, i64 -7701940972712279918, i64 -1, i64 1850239051784516332, i64 -1}
|
||||||
|
; MAX2: ![[PROF_ID2]] = !{!"VP", i32 0, i64 13000, i64 3137940972712279918, i64 -1, i64 1850239051784516332, i64 -1}
|
||||||
|
; MAX4: ![[PROF_ID3]] = !{!"VP", i32 0, i64 13000, i64 -7383239051784516332, i64 -1, i64 -7701940972712279918, i64 -1, i64 1850239051784516332, i64 -1, i64 9191153033785521275, i64 2000}
|
||||||
|
; MAX4: ![[PROF_ID4]] = !{!"VP", i32 0, i64 13000, i64 -7383239051784516332, i64 -1, i64 -7701940972712279918, i64 -1, i64 3137940972712279918, i64 -1, i64 1850239051784516332, i64 -1}
|
||||||
|
; MAX4: ![[PROF_ID5]] = !{!"VP", i32 0, i64 13000, i64 4128940972712279918, i64 -1, i64 3137940972712279918, i64 -1, i64 2132940972712279918, i64 -1, i64 1850239051784516332, i64 -1}
|
||||||
|
|
||||||
|
; Function Attrs: nofree nounwind
|
||||||
|
declare noundef i32 @puts(i8* nocapture noundef readonly) #3
|
||||||
|
|
||||||
|
attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
|
||||||
|
attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||||
|
attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||||
|
attributes #3 = { nofree nounwind }
|
||||||
|
|
||||||
|
!llvm.dbg.cu = !{!0}
|
||||||
|
!llvm.module.flags = !{!3, !4, !5}
|
||||||
|
!llvm.ident = !{!6}
|
||||||
|
|
||||||
|
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git f8226e6e284e9f199790bdb330f87d71adb5376f)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
|
||||||
|
!1 = !DIFile(filename: "1.cc", directory: "/usr/local/google/home/wmi/workarea/llvm/build/splitprofile")
|
||||||
|
!2 = !{}
|
||||||
|
!3 = !{i32 7, !"Dwarf Version", i32 4}
|
||||||
|
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||||
|
!5 = !{i32 1, !"wchar_size", i32 4}
|
||||||
|
!6 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git f8226e6e284e9f199790bdb330f87d71adb5376f)"}
|
||||||
|
!7 = distinct !DISubprogram(name: "moo", linkageName: "_Z3moov", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||||
|
!8 = !DISubroutineType(types: !2)
|
||||||
|
!9 = !DILocation(line: 2, column: 3, scope: !7)
|
||||||
|
!10 = !DILocation(line: 3, column: 1, scope: !7)
|
||||||
|
!11 = distinct !DISubprogram(name: "hoo", linkageName: "_Z3hoov", scope: !1, file: !1, line: 9, type: !8, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||||
|
!12 = !DILocation(line: 10, column: 7, scope: !11)
|
||||||
|
!13 = !{!14, !14, i64 0}
|
||||||
|
!14 = !{!"bool", !15, i64 0}
|
||||||
|
!15 = !{!"omnipotent char", !16, i64 0}
|
||||||
|
!16 = !{!"Simple C++ TBAA"}
|
||||||
|
!17 = !{i8 0, i8 2}
|
||||||
|
!18 = !DILocation(line: 11, column: 5, scope: !11)
|
||||||
|
!19 = !DILocation(line: 12, column: 5, scope: !11)
|
||||||
|
!20 = !{!21, !21, i64 0}
|
||||||
|
!21 = !{!"any pointer", !15, i64 0}
|
||||||
|
!22 = !DILocation(line: 13, column: 1, scope: !11)
|
||||||
|
!23 = !DISubprogram(name: "hoo_callee", linkageName: "_Z10hoo_calleev", scope: !1, file: !1, line: 5, type: !8, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
|
||||||
|
!24 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 15, type: !8, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||||
|
!25 = !DILocation(line: 16, column: 5, scope: !24)
|
||||||
|
!26 = !DILocation(line: 16, column: 3, scope: !24)
|
||||||
|
!27 = !DILocation(line: 19, column: 1, scope: !24)
|
||||||
|
!28 = !DILocation(line: 17, column: 3, scope: !24)
|
||||||
|
!29 = !DILocation(line: 18, column: 3, scope: !24)
|
||||||
|
!30 = !{!"VP", i32 0, i64 0, i64 1850239051784516332, i64 -1}
|
||||||
|
!31 = !{!"VP", i32 0, i64 0, i64 1850239051784516332, i64 -1, i64 3137940972712279918, i64 -1}
|
||||||
|
!32 = !{!"VP", i32 0, i64 0, i64 1850239051784516332, i64 -1, i64 3137940972712279918, i64 -1, i64 2132940972712279918, i64 -1, i64 4128940972712279918, i64 -1}
|
Loading…
Reference in New Issue
Block a user