mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[CSSPGO] Aggregation by the last K context frames for cold profiles
This change provides the option to merge and aggregate cold context by the last k frames instead of context-less name. By default K = 1 means the context-less one. This is for better perf tuning. The more selective merging and trimming will rely on llvm-profgen's preinliner. Reviewed By: wenlei, hoy Differential Revision: https://reviews.llvm.org/D104131
This commit is contained in:
parent
94d12e2de0
commit
c4ed78c10b
@ -430,6 +430,22 @@ public:
|
||||
return ContextStr.split(" @ ");
|
||||
}
|
||||
|
||||
// Reconstruct a new context with the last k frames, return the context-less
|
||||
// name if K = 1
|
||||
StringRef getContextWithLastKFrames(uint32_t K) {
|
||||
if (K == 1)
|
||||
return getNameWithoutContext();
|
||||
|
||||
size_t I = FullContext.size();
|
||||
while (K--) {
|
||||
I = FullContext.find_last_of(" @ ", I);
|
||||
if (I == StringRef::npos)
|
||||
return FullContext;
|
||||
I -= 2;
|
||||
}
|
||||
return FullContext.slice(I + 3, StringRef::npos);
|
||||
}
|
||||
|
||||
// Decode context string for a frame to get function name and location.
|
||||
// `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
|
||||
static void decodeContextString(StringRef ContextStr, StringRef &FName,
|
||||
@ -993,8 +1009,9 @@ public:
|
||||
: ProfileMap(Profiles){};
|
||||
// Trim and merge cold context profile when requested.
|
||||
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
|
||||
bool TrimColdContext = true,
|
||||
bool MergeColdContext = true);
|
||||
bool TrimColdContext,
|
||||
bool MergeColdContext,
|
||||
uint32_t ColdContextFrameLength);
|
||||
// Canonicalize context profile name and attributes.
|
||||
void canonicalizeContextProfiles();
|
||||
|
||||
|
@ -324,7 +324,8 @@ std::error_code ProfileSymbolList::read(const uint8_t *Data,
|
||||
}
|
||||
|
||||
void SampleContextTrimmer::trimAndMergeColdContextProfiles(
|
||||
uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext) {
|
||||
uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,
|
||||
uint32_t ColdContextFrameLength) {
|
||||
if (!TrimColdContext && !MergeColdContext)
|
||||
return;
|
||||
|
||||
@ -342,21 +343,24 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
|
||||
ColdProfiles.emplace_back(I.getKey(), &I.second);
|
||||
}
|
||||
|
||||
// Remove the cold profile from ProfileMap and merge them into BaseProileMap
|
||||
StringMap<FunctionSamples> BaseProfileMap;
|
||||
// Remove the cold profile from ProfileMap and merge them into
|
||||
// MergedProfileMap by the last K frames of context
|
||||
StringMap<FunctionSamples> MergedProfileMap;
|
||||
for (const auto &I : ColdProfiles) {
|
||||
if (MergeColdContext) {
|
||||
auto Ret = BaseProfileMap.try_emplace(
|
||||
I.second->getContext().getNameWithoutContext(), FunctionSamples());
|
||||
FunctionSamples &BaseProfile = Ret.first->second;
|
||||
BaseProfile.merge(*I.second);
|
||||
auto Ret = MergedProfileMap.try_emplace(
|
||||
I.second->getContext().getContextWithLastKFrames(
|
||||
ColdContextFrameLength),
|
||||
FunctionSamples());
|
||||
FunctionSamples &MergedProfile = Ret.first->second;
|
||||
MergedProfile.merge(*I.second);
|
||||
}
|
||||
ProfileMap.erase(I.first);
|
||||
}
|
||||
|
||||
// Merge the base profiles into ProfileMap;
|
||||
for (const auto &I : BaseProfileMap) {
|
||||
// Filter the cold base profile
|
||||
// Move the merged profiles into ProfileMap;
|
||||
for (const auto &I : MergedProfileMap) {
|
||||
// Filter the cold merged profile
|
||||
if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
|
||||
ProfileMap.find(I.getKey()) == ProfileMap.end())
|
||||
continue;
|
||||
|
@ -10,6 +10,10 @@
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --profile-summary-cold-count=10 --csprof-merge-cold-context=0
|
||||
; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
|
||||
|
||||
; Test --csprof-frame-depth-for-cold-context
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-cold-count=100 --csprof-trim-cold-context=0 --csprof-frame-depth-for-cold-context=2
|
||||
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
|
||||
|
||||
; CHECK: [fa]:14:4
|
||||
; CHECK-NEXT: 1: 4
|
||||
; CHECK-NEXT: 2: 18446744073709551615
|
||||
@ -56,6 +60,38 @@
|
||||
; CHECK-UNMERGED-NOT: [fa]
|
||||
; CHECK-UNMERGED-NOT: [fb]
|
||||
|
||||
; CHECK-COLD-CONTEXT-LENGTH: [fb:5 @ fb]:13:4
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 4
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 3
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 4 fb:4
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 1 fa:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fb:6 @ fa]:10:3
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 3
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 3
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 1 fa:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:7 @ fb]:6:2
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 2
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 2
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 2 fa:2
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:8 @ fa]:4:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
|
||||
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
|
||||
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
|
||||
|
@ -689,7 +689,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
|
||||
StringRef ProfileSymbolListFile, bool CompressAllSections,
|
||||
bool UseMD5, bool GenPartialProfile,
|
||||
bool SampleMergeColdContext, bool SampleTrimColdContext,
|
||||
FailureMode FailMode) {
|
||||
bool SampleColdContextFrameDepth, FailureMode FailMode) {
|
||||
using namespace sampleprof;
|
||||
StringMap<FunctionSamples> ProfileMap;
|
||||
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
|
||||
@ -758,9 +758,9 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
|
||||
|
||||
// Trim and merge cold context profile using cold threshold above;
|
||||
SampleContextTrimmer(ProfileMap)
|
||||
.trimAndMergeColdContextProfiles(SampleProfColdThreshold,
|
||||
SampleTrimColdContext,
|
||||
SampleMergeColdContext);
|
||||
.trimAndMergeColdContextProfiles(
|
||||
SampleProfColdThreshold, SampleTrimColdContext,
|
||||
SampleMergeColdContext, SampleColdContextFrameDepth);
|
||||
}
|
||||
|
||||
auto WriterOrErr =
|
||||
@ -914,6 +914,10 @@ static int merge_main(int argc, const char *argv[]) {
|
||||
"sample-trim-cold-context", cl::init(false), cl::Hidden,
|
||||
cl::desc(
|
||||
"Trim context sample profiles whose count is below cold threshold"));
|
||||
cl::opt<uint32_t> SampleColdContextFrameDepth(
|
||||
"sample-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
|
||||
cl::desc("Keep the last K frames while merging cold profile. 1 means the "
|
||||
"context-less base profile"));
|
||||
cl::opt<bool> GenPartialProfile(
|
||||
"gen-partial-profile", cl::init(false), cl::Hidden,
|
||||
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
|
||||
@ -985,7 +989,8 @@ static int merge_main(int argc, const char *argv[]) {
|
||||
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
|
||||
OutputFormat, ProfileSymbolListFile, CompressAllSections,
|
||||
UseMD5, GenPartialProfile, SampleMergeColdContext,
|
||||
SampleTrimColdContext, FailureMode);
|
||||
SampleTrimColdContext, SampleColdContextFrameDepth,
|
||||
FailureMode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -43,6 +43,11 @@ static cl::opt<bool> CSProfTrimColdContext(
|
||||
cl::desc("If the total count of the profile after all merge is done "
|
||||
"is still smaller than threshold, it will be trimmed."));
|
||||
|
||||
static cl::opt<uint32_t> CSProfColdContextFrameDepth(
|
||||
"csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
|
||||
cl::desc("Keep the last K frames while merging cold profile. 1 means the "
|
||||
"context-less base profile"));
|
||||
|
||||
extern cl::opt<int> ProfileSummaryCutoffCold;
|
||||
|
||||
using namespace llvm;
|
||||
@ -401,7 +406,8 @@ void CSProfileGenerator::postProcessProfiles() {
|
||||
// Trim and merge cold context profile using cold threshold above;
|
||||
SampleContextTrimmer(ProfileMap)
|
||||
.trimAndMergeColdContextProfiles(
|
||||
ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
|
||||
ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
|
||||
CSProfColdContextFrameDepth);
|
||||
}
|
||||
|
||||
void CSProfileGenerator::computeSummaryAndThreshold() {
|
||||
|
Loading…
Reference in New Issue
Block a user