1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[CSSPGO][llvm-profdata] Support trimming cold context when merging profiles

The change adds support for triming and merging cold context when mergine CSSPGO profiles using llvm-profdata. This is similar to the context profile trimming in llvm-profgen, however the flexibility to trim cold context after profile is generated can be useful.

Differential Revision: https://reviews.llvm.org/D100528
This commit is contained in:
Wenlei He 2021-04-14 22:53:40 -07:00
parent a2acab380b
commit e734b4c21b
9 changed files with 157 additions and 67 deletions

View File

@ -59,9 +59,6 @@ private:
public:
ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); }
ProfileSummaryInfo(std::unique_ptr<ProfileSummary> PSI)
: M(nullptr), Summary(std::move(PSI)) {}
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
/// If no summary is present, attempt to refresh.

View File

@ -67,6 +67,8 @@ public:
/// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &
getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile);
static uint64_t getHotCountThreshold(SummaryEntryVector &DS);
static uint64_t getColdCountThreshold(SummaryEntryVector &DS);
};
class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {

View File

@ -23,49 +23,13 @@
#include "llvm/Support/CommandLine.h"
using namespace llvm;
// The following two parameters determine the threshold for a count to be
// considered hot/cold. These two parameters are percentile values (multiplied
// by 10000). If the counts are sorted in descending order, the minimum count to
// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
// threshold for determining cold count (everything <= this threshold is
// considered cold).
static cl::opt<int> ProfileSummaryCutoffHot(
"profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
cl::desc("A count is hot if it exceeds the minimum count to"
" reach this percentile of total counts."));
static cl::opt<int> ProfileSummaryCutoffCold(
"profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
cl::init(15000), cl::ZeroOrMore,
cl::desc("The code working set size is considered huge if the number of"
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
"profile-summary-large-working-set-size-threshold", cl::Hidden,
cl::init(12500), cl::ZeroOrMore,
cl::desc("The code working set size is considered large if the number of"
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
// The next two options override the counts derived from summary computation and
// are useful for debugging purposes.
static cl::opt<int> ProfileSummaryHotCount(
"profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
cl::desc("A fixed hot count that overrides the count derived from"
" profile-summary-cutoff-hot"));
static cl::opt<int> ProfileSummaryColdCount(
"profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
cl::desc("A fixed cold count that overrides the count derived from"
" profile-summary-cutoff-cold"));
// Knobs for profile summary based thresholds.
extern cl::opt<int> ProfileSummaryCutoffHot;
extern cl::opt<int> ProfileSummaryCutoffCold;
extern cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold;
extern cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold;
extern cl::opt<int> ProfileSummaryHotCount;
extern cl::opt<int> ProfileSummaryColdCount;
static cl::opt<bool> PartialProfile(
"partial-profile", cl::Hidden, cl::init(false),
@ -274,14 +238,10 @@ void ProfileSummaryInfo::computeThresholds() {
auto &DetailedSummary = Summary->getDetailedSummary();
auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile(
DetailedSummary, ProfileSummaryCutoffHot);
HotCountThreshold = HotEntry.MinCount;
if (ProfileSummaryHotCount.getNumOccurrences() > 0)
HotCountThreshold = ProfileSummaryHotCount;
auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
DetailedSummary, ProfileSummaryCutoffCold);
ColdCountThreshold = ColdEntry.MinCount;
if (ProfileSummaryColdCount.getNumOccurrences() > 0)
ColdCountThreshold = ProfileSummaryColdCount;
HotCountThreshold =
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
ColdCountThreshold =
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
assert(ColdCountThreshold <= HotCountThreshold &&
"Cold count threshold cannot exceed hot count threshold!");
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {

View File

@ -26,6 +26,49 @@ cl::opt<bool> UseContextLessSummary(
"profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::desc("Merge context profiles before calculating thresholds."));
// The following two parameters determine the threshold for a count to be
// considered hot/cold. These two parameters are percentile values (multiplied
// by 10000). If the counts are sorted in descending order, the minimum count to
// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
// threshold for determining cold count (everything <= this threshold is
// considered cold).
cl::opt<int> ProfileSummaryCutoffHot(
"profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
cl::desc("A count is hot if it exceeds the minimum count to"
" reach this percentile of total counts."));
cl::opt<int> ProfileSummaryCutoffCold(
"profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
cl::init(15000), cl::ZeroOrMore,
cl::desc("The code working set size is considered huge if the number of"
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
"profile-summary-large-working-set-size-threshold", cl::Hidden,
cl::init(12500), cl::ZeroOrMore,
cl::desc("The code working set size is considered large if the number of"
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
// The next two options override the counts derived from summary computation and
// are useful for debugging purposes.
cl::opt<int> ProfileSummaryHotCount(
"profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
cl::desc("A fixed hot count that overrides the count derived from"
" profile-summary-cutoff-hot"));
cl::opt<int> ProfileSummaryColdCount(
"profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
cl::desc("A fixed cold count that overrides the count derived from"
" profile-summary-cutoff-cold"));
// A set of cutoff values. Each value, when divided by ProfileSummary::Scale
// (which is 1000000) is a desired percentile of total counts.
static const uint32_t DefaultCutoffsData[] = {
@ -113,6 +156,24 @@ void ProfileSummaryBuilder::computeDetailedSummary() {
}
}
uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
auto &HotEntry =
ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
uint64_t HotCountThreshold = HotEntry.MinCount;
if (ProfileSummaryHotCount.getNumOccurrences() > 0)
HotCountThreshold = ProfileSummaryHotCount;
return HotCountThreshold;
}
uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector &DS) {
auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
DS, ProfileSummaryCutoffCold);
uint64_t ColdCountThreshold = ColdEntry.MinCount;
if (ProfileSummaryColdCount.getNumOccurrences() > 0)
ColdCountThreshold = ProfileSummaryColdCount;
return ColdCountThreshold;
}
std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
computeDetailedSummary();
return std::make_unique<ProfileSummary>(

View File

@ -0,0 +1,42 @@
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext
RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -profile-summary-cold-count=500
RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-MERGE
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cold-count=500
RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cutoff-cold=990000
RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END
CHECK-TRIM: [main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11
CHECK-TRIM-NEXT: 0: 6
CHECK-TRIM-NEXT: 1: 6
CHECK-TRIM-NEXT: 3: 287884
CHECK-TRIM-NEXT: 4: 287864 _Z3fibi:315608
CHECK-TRIM-NEXT: 15: 23
CHECK-TRIM-NEXT: !Attributes: 0
CHECK-TRIM-NEXT: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
CHECK-TRIM-NEXT: 0: 15
CHECK-TRIM-NEXT: 1: 15
CHECK-TRIM-NEXT: 3: 74946
CHECK-TRIM-NEXT: 4: 74941 _Z3fibi:82359
CHECK-TRIM-NEXT: 10: 23324
CHECK-TRIM-NEXT: 11: 23327 _Z3fibi:25228
CHECK-TRIM-NEXT: 15: 11
CHECK-TRIM-NEXT: !Attributes: 1
CHECK-END-NOT: [
CHECK-MERGE: [_Z5funcBi]:360:32
CHECK-MERGE-NEXT: 0: 32
CHECK-MERGE-NEXT: 1: 32 _Z8funcLeafi:20
CHECK-MERGE-NEXT: 3: 12
CHECK-MERGE-NEXT: !Attributes: 0
CHECK-MERGE-NEXT:[main]:308:12
CHECK-MERGE-NEXT: 2: 24
CHECK-MERGE-NEXT: 3: 28 _Z5funcAi:18
CHECK-MERGE-NEXT: 3.1: 28 _Z5funcBi:30
CHECK-MERGE-NEXT: !Attributes: 0
CHECK-MERGE-NEXT:[_Z5funcAi]:99:11
CHECK-MERGE-NEXT: 0: 10
CHECK-MERGE-NEXT: 1: 10 _Z8funcLeafi:11
CHECK-MERGE-NEXT: 3: 24
CHECK-MERGE-NEXT: !Attributes: 0

View File

@ -7,7 +7,7 @@
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-PREINL
; Test preinliner threshold that prevents all possible inlining and merges everything into base profile.
; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-hot-inline-threshold=0
; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-cold-inline-threshold=0
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL
; CHECK-DEFAULT: [main:1 @ foo]:309:0

View File

@ -666,7 +666,9 @@ static void
mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
StringRef OutputFilename, ProfileFormat OutputFormat,
StringRef ProfileSymbolListFile, bool CompressAllSections,
bool UseMD5, bool GenPartialProfile, FailureMode FailMode) {
bool UseMD5, bool GenPartialProfile,
bool SampleMergeColdContext, bool SampleTrimColdContext,
FailureMode FailMode) {
using namespace sampleprof;
StringMap<FunctionSamples> ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@ -723,6 +725,22 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
if (ReaderList)
WriterList.merge(*ReaderList);
}
if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
// Use threshold calculated from profile summary unless specified.
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
uint64_t SampleProfColdThreshold =
ProfileSummaryBuilder::getColdCountThreshold(
(Summary->getDetailedSummary()));
// Trim and merge cold context profile using cold threshold above;
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(SampleProfColdThreshold,
SampleTrimColdContext,
SampleMergeColdContext);
}
auto WriterOrErr =
SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
if (std::error_code EC = WriterOrErr.getError())
@ -866,6 +884,14 @@ static int merge_main(int argc, const char *argv[]) {
"use-md5", cl::init(false), cl::Hidden,
cl::desc("Choose to use MD5 to represent string in name table (only "
"meaningful for -extbinary)"));
cl::opt<bool> SampleMergeColdContext(
"sample-merge-cold-context", cl::init(false), cl::Hidden,
cl::desc(
"Merge context sample profiles whose count is below cold threshold"));
cl::opt<bool> SampleTrimColdContext(
"sample-trim-cold-context", cl::init(false), cl::Hidden,
cl::desc(
"Trim context sample profiles whose count is below cold threshold"));
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
@ -936,7 +962,8 @@ static int merge_main(int argc, const char *argv[]) {
else
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, ProfileSymbolListFile, CompressAllSections,
UseMD5, GenPartialProfile, FailureMode);
UseMD5, GenPartialProfile, SampleMergeColdContext,
SampleTrimColdContext, FailureMode);
return 0;
}

View File

@ -401,24 +401,25 @@ void CSProfileGenerator::postProcessProfiles() {
// Run global pre-inliner to adjust/merge context profile based on estimated
// inline decisions.
CSPreInliner(ProfileMap, PSI->getHotCountThreshold(),
PSI->getColdCountThreshold())
.run();
CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
// Trim and merge cold context profile using cold threshold above;
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
CSProfColdThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
}
void CSProfileGenerator::computeSummaryAndThreshold() {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
PSI.reset(new ProfileSummaryInfo(std::move(Summary)));
HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
(Summary->getDetailedSummary()));
ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
(Summary->getDetailedSummary()));
// Use threshold calculated from profile summary unless specified.
if (!CSProfColdThreshold.getNumOccurrences()) {
CSProfColdThreshold = PSI->getColdCountThreshold();
if (CSProfColdThreshold.getNumOccurrences()) {
ColdCountThreshold = CSProfColdThreshold;
}
}

View File

@ -12,7 +12,6 @@
#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfiledBinary.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/ProfileData/SampleProfWriter.h"
#include <memory>
#include <unordered_set>
@ -187,8 +186,9 @@ protected:
void write(std::unique_ptr<SampleProfileWriter> Writer,
StringMap<FunctionSamples> &ProfileMap) override;
// Profile summary to answer isHotCount and isColdCount queries.
std::unique_ptr<ProfileSummaryInfo> PSI;
// Thresholds from profile summary to answer isHotCount/isColdCount queries.
uint64_t HotCountThreshold;
uint64_t ColdCountThreshold;
// String table owning context strings created from profile generation.
std::unordered_set<std::string> ContextStrings;