1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[CSSPGO] Aggregation by the last K context frames for cold profiles

This change provides the option to merge and aggregate cold context by the last k frames instead of context-less name. By default K = 1 means the context-less one.

This is for better perf tuning. The more selective merging and trimming will rely on llvm-profgen's preinliner.

Reviewed By: wenlei, hoy

Differential Revision: https://reviews.llvm.org/D104131
This commit is contained in:
wlei 2021-06-11 00:35:45 -07:00
parent 94d12e2de0
commit c4ed78c10b
5 changed files with 86 additions and 18 deletions

View File

@ -430,6 +430,22 @@ public:
return ContextStr.split(" @ ");
}
// Reconstruct a new context with the last k frames, return the context-less
// name if K = 1
StringRef getContextWithLastKFrames(uint32_t K) {
if (K == 1)
return getNameWithoutContext();
size_t I = FullContext.size();
while (K--) {
I = FullContext.find_last_of(" @ ", I);
if (I == StringRef::npos)
return FullContext;
I -= 2;
}
return FullContext.slice(I + 3, StringRef::npos);
}
// Decode context string for a frame to get function name and location.
// `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
static void decodeContextString(StringRef ContextStr, StringRef &FName,
@ -993,8 +1009,9 @@ public:
: ProfileMap(Profiles){};
// Trim and merge cold context profile when requested.
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
bool TrimColdContext = true,
bool MergeColdContext = true);
bool TrimColdContext,
bool MergeColdContext,
uint32_t ColdContextFrameLength);
// Canonicalize context profile name and attributes.
void canonicalizeContextProfiles();

View File

@ -324,7 +324,8 @@ std::error_code ProfileSymbolList::read(const uint8_t *Data,
}
void SampleContextTrimmer::trimAndMergeColdContextProfiles(
uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext) {
uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,
uint32_t ColdContextFrameLength) {
if (!TrimColdContext && !MergeColdContext)
return;
@ -342,21 +343,24 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
ColdProfiles.emplace_back(I.getKey(), &I.second);
}
// Remove the cold profile from ProfileMap and merge them into BaseProileMap
StringMap<FunctionSamples> BaseProfileMap;
// Remove the cold profile from ProfileMap and merge them into
// MergedProfileMap by the last K frames of context
StringMap<FunctionSamples> MergedProfileMap;
for (const auto &I : ColdProfiles) {
if (MergeColdContext) {
auto Ret = BaseProfileMap.try_emplace(
I.second->getContext().getNameWithoutContext(), FunctionSamples());
FunctionSamples &BaseProfile = Ret.first->second;
BaseProfile.merge(*I.second);
auto Ret = MergedProfileMap.try_emplace(
I.second->getContext().getContextWithLastKFrames(
ColdContextFrameLength),
FunctionSamples());
FunctionSamples &MergedProfile = Ret.first->second;
MergedProfile.merge(*I.second);
}
ProfileMap.erase(I.first);
}
// Merge the base profiles into ProfileMap;
for (const auto &I : BaseProfileMap) {
// Filter the cold base profile
// Move the merged profiles into ProfileMap;
for (const auto &I : MergedProfileMap) {
// Filter the cold merged profile
if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
ProfileMap.find(I.getKey()) == ProfileMap.end())
continue;

View File

@ -10,6 +10,10 @@
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --profile-summary-cold-count=10 --csprof-merge-cold-context=0
; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
; Test --csprof-frame-depth-for-cold-context
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-cold-count=100 --csprof-trim-cold-context=0 --csprof-frame-depth-for-cold-context=2
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
; CHECK: [fa]:14:4
; CHECK-NEXT: 1: 4
; CHECK-NEXT: 2: 18446744073709551615
@ -56,6 +60,38 @@
; CHECK-UNMERGED-NOT: [fa]
; CHECK-UNMERGED-NOT: [fb]
; CHECK-COLD-CONTEXT-LENGTH: [fb:5 @ fb]:13:4
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 4
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 3
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 4 fb:4
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 1 fa:1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fb:6 @ fa]:10:3
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 3
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 3
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 1 fa:1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:7 @ fb]:6:2
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 2
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 2
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 2 fa:2
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:8 @ fa]:4:1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls

View File

@ -689,7 +689,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
StringRef ProfileSymbolListFile, bool CompressAllSections,
bool UseMD5, bool GenPartialProfile,
bool SampleMergeColdContext, bool SampleTrimColdContext,
FailureMode FailMode) {
bool SampleColdContextFrameDepth, FailureMode FailMode) {
using namespace sampleprof;
StringMap<FunctionSamples> ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@ -758,9 +758,9 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
// Trim and merge cold context profile using cold threshold above;
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(SampleProfColdThreshold,
SampleTrimColdContext,
SampleMergeColdContext);
.trimAndMergeColdContextProfiles(
SampleProfColdThreshold, SampleTrimColdContext,
SampleMergeColdContext, SampleColdContextFrameDepth);
}
auto WriterOrErr =
@ -914,6 +914,10 @@ static int merge_main(int argc, const char *argv[]) {
"sample-trim-cold-context", cl::init(false), cl::Hidden,
cl::desc(
"Trim context sample profiles whose count is below cold threshold"));
cl::opt<uint32_t> SampleColdContextFrameDepth(
"sample-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
cl::desc("Keep the last K frames while merging cold profile. 1 means the "
"context-less base profile"));
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
@ -985,7 +989,8 @@ static int merge_main(int argc, const char *argv[]) {
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, ProfileSymbolListFile, CompressAllSections,
UseMD5, GenPartialProfile, SampleMergeColdContext,
SampleTrimColdContext, FailureMode);
SampleTrimColdContext, SampleColdContextFrameDepth,
FailureMode);
return 0;
}

View File

@ -43,6 +43,11 @@ static cl::opt<bool> CSProfTrimColdContext(
cl::desc("If the total count of the profile after all merge is done "
"is still smaller than threshold, it will be trimmed."));
static cl::opt<uint32_t> CSProfColdContextFrameDepth(
"csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
cl::desc("Keep the last K frames while merging cold profile. 1 means the "
"context-less base profile"));
extern cl::opt<int> ProfileSummaryCutoffCold;
using namespace llvm;
@ -401,7 +406,8 @@ void CSProfileGenerator::postProcessProfiles() {
// Trim and merge cold context profile using cold threshold above;
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
CSProfColdContextFrameDepth);
}
void CSProfileGenerator::computeSummaryAndThreshold() {