mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[CSSPGO][llvm-profdata] Support trimming cold context when merging profiles
The change adds support for triming and merging cold context when mergine CSSPGO profiles using llvm-profdata. This is similar to the context profile trimming in llvm-profgen, however the flexibility to trim cold context after profile is generated can be useful. Differential Revision: https://reviews.llvm.org/D100528
This commit is contained in:
parent
a2acab380b
commit
e734b4c21b
@ -59,9 +59,6 @@ private:
|
||||
|
||||
public:
|
||||
ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); }
|
||||
ProfileSummaryInfo(std::unique_ptr<ProfileSummary> PSI)
|
||||
: M(nullptr), Summary(std::move(PSI)) {}
|
||||
|
||||
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
|
||||
|
||||
/// If no summary is present, attempt to refresh.
|
||||
|
@ -67,6 +67,8 @@ public:
|
||||
/// Find the summary entry for a desired percentile of counts.
|
||||
static const ProfileSummaryEntry &
|
||||
getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile);
|
||||
static uint64_t getHotCountThreshold(SummaryEntryVector &DS);
|
||||
static uint64_t getColdCountThreshold(SummaryEntryVector &DS);
|
||||
};
|
||||
|
||||
class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
|
||||
|
@ -23,49 +23,13 @@
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
using namespace llvm;
|
||||
|
||||
// The following two parameters determine the threshold for a count to be
|
||||
// considered hot/cold. These two parameters are percentile values (multiplied
|
||||
// by 10000). If the counts are sorted in descending order, the minimum count to
|
||||
// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
|
||||
// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
|
||||
// threshold for determining cold count (everything <= this threshold is
|
||||
// considered cold).
|
||||
|
||||
static cl::opt<int> ProfileSummaryCutoffHot(
|
||||
"profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
|
||||
cl::desc("A count is hot if it exceeds the minimum count to"
|
||||
" reach this percentile of total counts."));
|
||||
|
||||
static cl::opt<int> ProfileSummaryCutoffCold(
|
||||
"profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
|
||||
cl::desc("A count is cold if it is below the minimum count"
|
||||
" to reach this percentile of total counts."));
|
||||
|
||||
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
|
||||
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
|
||||
cl::init(15000), cl::ZeroOrMore,
|
||||
cl::desc("The code working set size is considered huge if the number of"
|
||||
" blocks required to reach the -profile-summary-cutoff-hot"
|
||||
" percentile exceeds this count."));
|
||||
|
||||
static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
|
||||
"profile-summary-large-working-set-size-threshold", cl::Hidden,
|
||||
cl::init(12500), cl::ZeroOrMore,
|
||||
cl::desc("The code working set size is considered large if the number of"
|
||||
" blocks required to reach the -profile-summary-cutoff-hot"
|
||||
" percentile exceeds this count."));
|
||||
|
||||
// The next two options override the counts derived from summary computation and
|
||||
// are useful for debugging purposes.
|
||||
static cl::opt<int> ProfileSummaryHotCount(
|
||||
"profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
|
||||
cl::desc("A fixed hot count that overrides the count derived from"
|
||||
" profile-summary-cutoff-hot"));
|
||||
|
||||
static cl::opt<int> ProfileSummaryColdCount(
|
||||
"profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
|
||||
cl::desc("A fixed cold count that overrides the count derived from"
|
||||
" profile-summary-cutoff-cold"));
|
||||
// Knobs for profile summary based thresholds.
|
||||
extern cl::opt<int> ProfileSummaryCutoffHot;
|
||||
extern cl::opt<int> ProfileSummaryCutoffCold;
|
||||
extern cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold;
|
||||
extern cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold;
|
||||
extern cl::opt<int> ProfileSummaryHotCount;
|
||||
extern cl::opt<int> ProfileSummaryColdCount;
|
||||
|
||||
static cl::opt<bool> PartialProfile(
|
||||
"partial-profile", cl::Hidden, cl::init(false),
|
||||
@ -274,14 +238,10 @@ void ProfileSummaryInfo::computeThresholds() {
|
||||
auto &DetailedSummary = Summary->getDetailedSummary();
|
||||
auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile(
|
||||
DetailedSummary, ProfileSummaryCutoffHot);
|
||||
HotCountThreshold = HotEntry.MinCount;
|
||||
if (ProfileSummaryHotCount.getNumOccurrences() > 0)
|
||||
HotCountThreshold = ProfileSummaryHotCount;
|
||||
auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
|
||||
DetailedSummary, ProfileSummaryCutoffCold);
|
||||
ColdCountThreshold = ColdEntry.MinCount;
|
||||
if (ProfileSummaryColdCount.getNumOccurrences() > 0)
|
||||
ColdCountThreshold = ProfileSummaryColdCount;
|
||||
HotCountThreshold =
|
||||
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
|
||||
ColdCountThreshold =
|
||||
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
|
||||
assert(ColdCountThreshold <= HotCountThreshold &&
|
||||
"Cold count threshold cannot exceed hot count threshold!");
|
||||
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
|
||||
|
@ -26,6 +26,49 @@ cl::opt<bool> UseContextLessSummary(
|
||||
"profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Merge context profiles before calculating thresholds."));
|
||||
|
||||
// The following two parameters determine the threshold for a count to be
|
||||
// considered hot/cold. These two parameters are percentile values (multiplied
|
||||
// by 10000). If the counts are sorted in descending order, the minimum count to
|
||||
// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
|
||||
// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
|
||||
// threshold for determining cold count (everything <= this threshold is
|
||||
// considered cold).
|
||||
cl::opt<int> ProfileSummaryCutoffHot(
|
||||
"profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
|
||||
cl::desc("A count is hot if it exceeds the minimum count to"
|
||||
" reach this percentile of total counts."));
|
||||
|
||||
cl::opt<int> ProfileSummaryCutoffCold(
|
||||
"profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
|
||||
cl::desc("A count is cold if it is below the minimum count"
|
||||
" to reach this percentile of total counts."));
|
||||
|
||||
cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
|
||||
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
|
||||
cl::init(15000), cl::ZeroOrMore,
|
||||
cl::desc("The code working set size is considered huge if the number of"
|
||||
" blocks required to reach the -profile-summary-cutoff-hot"
|
||||
" percentile exceeds this count."));
|
||||
|
||||
cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
|
||||
"profile-summary-large-working-set-size-threshold", cl::Hidden,
|
||||
cl::init(12500), cl::ZeroOrMore,
|
||||
cl::desc("The code working set size is considered large if the number of"
|
||||
" blocks required to reach the -profile-summary-cutoff-hot"
|
||||
" percentile exceeds this count."));
|
||||
|
||||
// The next two options override the counts derived from summary computation and
|
||||
// are useful for debugging purposes.
|
||||
cl::opt<int> ProfileSummaryHotCount(
|
||||
"profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
|
||||
cl::desc("A fixed hot count that overrides the count derived from"
|
||||
" profile-summary-cutoff-hot"));
|
||||
|
||||
cl::opt<int> ProfileSummaryColdCount(
|
||||
"profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
|
||||
cl::desc("A fixed cold count that overrides the count derived from"
|
||||
" profile-summary-cutoff-cold"));
|
||||
|
||||
// A set of cutoff values. Each value, when divided by ProfileSummary::Scale
|
||||
// (which is 1000000) is a desired percentile of total counts.
|
||||
static const uint32_t DefaultCutoffsData[] = {
|
||||
@ -113,6 +156,24 @@ void ProfileSummaryBuilder::computeDetailedSummary() {
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
|
||||
auto &HotEntry =
|
||||
ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
|
||||
uint64_t HotCountThreshold = HotEntry.MinCount;
|
||||
if (ProfileSummaryHotCount.getNumOccurrences() > 0)
|
||||
HotCountThreshold = ProfileSummaryHotCount;
|
||||
return HotCountThreshold;
|
||||
}
|
||||
|
||||
uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector &DS) {
|
||||
auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
|
||||
DS, ProfileSummaryCutoffCold);
|
||||
uint64_t ColdCountThreshold = ColdEntry.MinCount;
|
||||
if (ProfileSummaryColdCount.getNumOccurrences() > 0)
|
||||
ColdCountThreshold = ProfileSummaryColdCount;
|
||||
return ColdCountThreshold;
|
||||
}
|
||||
|
||||
std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
|
||||
computeDetailedSummary();
|
||||
return std::make_unique<ProfileSummary>(
|
||||
|
42
test/tools/llvm-profdata/cs-sample-trimmer.test
Normal file
42
test/tools/llvm-profdata/cs-sample-trimmer.test
Normal file
@ -0,0 +1,42 @@
|
||||
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext
|
||||
RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext
|
||||
|
||||
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -profile-summary-cold-count=500
|
||||
RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-MERGE
|
||||
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cold-count=500
|
||||
RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END
|
||||
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cutoff-cold=990000
|
||||
RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END
|
||||
|
||||
CHECK-TRIM: [main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11
|
||||
CHECK-TRIM-NEXT: 0: 6
|
||||
CHECK-TRIM-NEXT: 1: 6
|
||||
CHECK-TRIM-NEXT: 3: 287884
|
||||
CHECK-TRIM-NEXT: 4: 287864 _Z3fibi:315608
|
||||
CHECK-TRIM-NEXT: 15: 23
|
||||
CHECK-TRIM-NEXT: !Attributes: 0
|
||||
CHECK-TRIM-NEXT: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
|
||||
CHECK-TRIM-NEXT: 0: 15
|
||||
CHECK-TRIM-NEXT: 1: 15
|
||||
CHECK-TRIM-NEXT: 3: 74946
|
||||
CHECK-TRIM-NEXT: 4: 74941 _Z3fibi:82359
|
||||
CHECK-TRIM-NEXT: 10: 23324
|
||||
CHECK-TRIM-NEXT: 11: 23327 _Z3fibi:25228
|
||||
CHECK-TRIM-NEXT: 15: 11
|
||||
CHECK-TRIM-NEXT: !Attributes: 1
|
||||
CHECK-END-NOT: [
|
||||
CHECK-MERGE: [_Z5funcBi]:360:32
|
||||
CHECK-MERGE-NEXT: 0: 32
|
||||
CHECK-MERGE-NEXT: 1: 32 _Z8funcLeafi:20
|
||||
CHECK-MERGE-NEXT: 3: 12
|
||||
CHECK-MERGE-NEXT: !Attributes: 0
|
||||
CHECK-MERGE-NEXT:[main]:308:12
|
||||
CHECK-MERGE-NEXT: 2: 24
|
||||
CHECK-MERGE-NEXT: 3: 28 _Z5funcAi:18
|
||||
CHECK-MERGE-NEXT: 3.1: 28 _Z5funcBi:30
|
||||
CHECK-MERGE-NEXT: !Attributes: 0
|
||||
CHECK-MERGE-NEXT:[_Z5funcAi]:99:11
|
||||
CHECK-MERGE-NEXT: 0: 10
|
||||
CHECK-MERGE-NEXT: 1: 10 _Z8funcLeafi:11
|
||||
CHECK-MERGE-NEXT: 3: 24
|
||||
CHECK-MERGE-NEXT: !Attributes: 0
|
@ -7,7 +7,7 @@
|
||||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-PREINL
|
||||
|
||||
; Test preinliner threshold that prevents all possible inlining and merges everything into base profile.
|
||||
; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-hot-inline-threshold=0
|
||||
; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-cold-inline-threshold=0
|
||||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL
|
||||
|
||||
; CHECK-DEFAULT: [main:1 @ foo]:309:0
|
||||
|
@ -666,7 +666,9 @@ static void
|
||||
mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
|
||||
StringRef OutputFilename, ProfileFormat OutputFormat,
|
||||
StringRef ProfileSymbolListFile, bool CompressAllSections,
|
||||
bool UseMD5, bool GenPartialProfile, FailureMode FailMode) {
|
||||
bool UseMD5, bool GenPartialProfile,
|
||||
bool SampleMergeColdContext, bool SampleTrimColdContext,
|
||||
FailureMode FailMode) {
|
||||
using namespace sampleprof;
|
||||
StringMap<FunctionSamples> ProfileMap;
|
||||
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
|
||||
@ -723,6 +725,22 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
|
||||
if (ReaderList)
|
||||
WriterList.merge(*ReaderList);
|
||||
}
|
||||
|
||||
if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
|
||||
// Use threshold calculated from profile summary unless specified.
|
||||
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
|
||||
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
|
||||
uint64_t SampleProfColdThreshold =
|
||||
ProfileSummaryBuilder::getColdCountThreshold(
|
||||
(Summary->getDetailedSummary()));
|
||||
|
||||
// Trim and merge cold context profile using cold threshold above;
|
||||
SampleContextTrimmer(ProfileMap)
|
||||
.trimAndMergeColdContextProfiles(SampleProfColdThreshold,
|
||||
SampleTrimColdContext,
|
||||
SampleMergeColdContext);
|
||||
}
|
||||
|
||||
auto WriterOrErr =
|
||||
SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
|
||||
if (std::error_code EC = WriterOrErr.getError())
|
||||
@ -866,6 +884,14 @@ static int merge_main(int argc, const char *argv[]) {
|
||||
"use-md5", cl::init(false), cl::Hidden,
|
||||
cl::desc("Choose to use MD5 to represent string in name table (only "
|
||||
"meaningful for -extbinary)"));
|
||||
cl::opt<bool> SampleMergeColdContext(
|
||||
"sample-merge-cold-context", cl::init(false), cl::Hidden,
|
||||
cl::desc(
|
||||
"Merge context sample profiles whose count is below cold threshold"));
|
||||
cl::opt<bool> SampleTrimColdContext(
|
||||
"sample-trim-cold-context", cl::init(false), cl::Hidden,
|
||||
cl::desc(
|
||||
"Trim context sample profiles whose count is below cold threshold"));
|
||||
cl::opt<bool> GenPartialProfile(
|
||||
"gen-partial-profile", cl::init(false), cl::Hidden,
|
||||
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
|
||||
@ -936,7 +962,8 @@ static int merge_main(int argc, const char *argv[]) {
|
||||
else
|
||||
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
|
||||
OutputFormat, ProfileSymbolListFile, CompressAllSections,
|
||||
UseMD5, GenPartialProfile, FailureMode);
|
||||
UseMD5, GenPartialProfile, SampleMergeColdContext,
|
||||
SampleTrimColdContext, FailureMode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -401,24 +401,25 @@ void CSProfileGenerator::postProcessProfiles() {
|
||||
|
||||
// Run global pre-inliner to adjust/merge context profile based on estimated
|
||||
// inline decisions.
|
||||
CSPreInliner(ProfileMap, PSI->getHotCountThreshold(),
|
||||
PSI->getColdCountThreshold())
|
||||
.run();
|
||||
CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
|
||||
|
||||
// Trim and merge cold context profile using cold threshold above;
|
||||
SampleContextTrimmer(ProfileMap)
|
||||
.trimAndMergeColdContextProfiles(
|
||||
CSProfColdThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
|
||||
ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
|
||||
}
|
||||
|
||||
void CSProfileGenerator::computeSummaryAndThreshold() {
|
||||
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
|
||||
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
|
||||
PSI.reset(new ProfileSummaryInfo(std::move(Summary)));
|
||||
HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
|
||||
(Summary->getDetailedSummary()));
|
||||
ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
|
||||
(Summary->getDetailedSummary()));
|
||||
|
||||
// Use threshold calculated from profile summary unless specified.
|
||||
if (!CSProfColdThreshold.getNumOccurrences()) {
|
||||
CSProfColdThreshold = PSI->getColdCountThreshold();
|
||||
if (CSProfColdThreshold.getNumOccurrences()) {
|
||||
ColdCountThreshold = CSProfColdThreshold;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include "ErrorHandling.h"
|
||||
#include "PerfReader.h"
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/ProfileData/SampleProfWriter.h"
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
@ -187,8 +186,9 @@ protected:
|
||||
void write(std::unique_ptr<SampleProfileWriter> Writer,
|
||||
StringMap<FunctionSamples> &ProfileMap) override;
|
||||
|
||||
// Profile summary to answer isHotCount and isColdCount queries.
|
||||
std::unique_ptr<ProfileSummaryInfo> PSI;
|
||||
// Thresholds from profile summary to answer isHotCount/isColdCount queries.
|
||||
uint64_t HotCountThreshold;
|
||||
uint64_t ColdCountThreshold;
|
||||
|
||||
// String table owning context strings created from profile generation.
|
||||
std::unordered_set<std::string> ContextStrings;
|
||||
|
Loading…
Reference in New Issue
Block a user