From e734b4c21b5222a1ea9c6ca32f9a917331b72bb1 Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Wed, 14 Apr 2021 22:53:40 -0700 Subject: [PATCH] [CSSPGO][llvm-profdata] Support trimming cold context when merging profiles The change adds support for triming and merging cold context when mergine CSSPGO profiles using llvm-profdata. This is similar to the context profile trimming in llvm-profgen, however the flexibility to trim cold context after profile is generated can be useful. Differential Revision: https://reviews.llvm.org/D100528 --- include/llvm/Analysis/ProfileSummaryInfo.h | 3 - include/llvm/ProfileData/ProfileCommon.h | 2 + lib/Analysis/ProfileSummaryInfo.cpp | 62 ++++--------------- lib/ProfileData/ProfileSummaryBuilder.cpp | 61 ++++++++++++++++++ .../llvm-profdata/cs-sample-trimmer.test | 42 +++++++++++++ test/tools/llvm-profgen/cs-preinline.test | 2 +- tools/llvm-profdata/llvm-profdata.cpp | 31 +++++++++- tools/llvm-profgen/ProfileGenerator.cpp | 15 ++--- tools/llvm-profgen/ProfileGenerator.h | 6 +- 9 files changed, 157 insertions(+), 67 deletions(-) create mode 100644 test/tools/llvm-profdata/cs-sample-trimmer.test diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index e3e6268004f..c95404d96f4 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -59,9 +59,6 @@ private: public: ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); } - ProfileSummaryInfo(std::unique_ptr PSI) - : M(nullptr), Summary(std::move(PSI)) {} - ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default; /// If no summary is present, attempt to refresh. diff --git a/include/llvm/ProfileData/ProfileCommon.h b/include/llvm/ProfileData/ProfileCommon.h index 55b94b2e690..f2d9ccc45fd 100644 --- a/include/llvm/ProfileData/ProfileCommon.h +++ b/include/llvm/ProfileData/ProfileCommon.h @@ -67,6 +67,8 @@ public: /// Find the summary entry for a desired percentile of counts. static const ProfileSummaryEntry & getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile); + static uint64_t getHotCountThreshold(SummaryEntryVector &DS); + static uint64_t getColdCountThreshold(SummaryEntryVector &DS); }; class InstrProfSummaryBuilder final : public ProfileSummaryBuilder { diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index 67712c3d818..6dda0bf0a1b 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -23,49 +23,13 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -// The following two parameters determine the threshold for a count to be -// considered hot/cold. These two parameters are percentile values (multiplied -// by 10000). If the counts are sorted in descending order, the minimum count to -// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. -// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the -// threshold for determining cold count (everything <= this threshold is -// considered cold). - -static cl::opt ProfileSummaryCutoffHot( - "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore, - cl::desc("A count is hot if it exceeds the minimum count to" - " reach this percentile of total counts.")); - -static cl::opt ProfileSummaryCutoffCold( - "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore, - cl::desc("A count is cold if it is below the minimum count" - " to reach this percentile of total counts.")); - -static cl::opt ProfileSummaryHugeWorkingSetSizeThreshold( - "profile-summary-huge-working-set-size-threshold", cl::Hidden, - cl::init(15000), cl::ZeroOrMore, - cl::desc("The code working set size is considered huge if the number of" - " blocks required to reach the -profile-summary-cutoff-hot" - " percentile exceeds this count.")); - -static cl::opt ProfileSummaryLargeWorkingSetSizeThreshold( - "profile-summary-large-working-set-size-threshold", cl::Hidden, - cl::init(12500), cl::ZeroOrMore, - cl::desc("The code working set size is considered large if the number of" - " blocks required to reach the -profile-summary-cutoff-hot" - " percentile exceeds this count.")); - -// The next two options override the counts derived from summary computation and -// are useful for debugging purposes. -static cl::opt ProfileSummaryHotCount( - "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore, - cl::desc("A fixed hot count that overrides the count derived from" - " profile-summary-cutoff-hot")); - -static cl::opt ProfileSummaryColdCount( - "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore, - cl::desc("A fixed cold count that overrides the count derived from" - " profile-summary-cutoff-cold")); +// Knobs for profile summary based thresholds. +extern cl::opt ProfileSummaryCutoffHot; +extern cl::opt ProfileSummaryCutoffCold; +extern cl::opt ProfileSummaryHugeWorkingSetSizeThreshold; +extern cl::opt ProfileSummaryLargeWorkingSetSizeThreshold; +extern cl::opt ProfileSummaryHotCount; +extern cl::opt ProfileSummaryColdCount; static cl::opt PartialProfile( "partial-profile", cl::Hidden, cl::init(false), @@ -274,14 +238,10 @@ void ProfileSummaryInfo::computeThresholds() { auto &DetailedSummary = Summary->getDetailedSummary(); auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile( DetailedSummary, ProfileSummaryCutoffHot); - HotCountThreshold = HotEntry.MinCount; - if (ProfileSummaryHotCount.getNumOccurrences() > 0) - HotCountThreshold = ProfileSummaryHotCount; - auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( - DetailedSummary, ProfileSummaryCutoffCold); - ColdCountThreshold = ColdEntry.MinCount; - if (ProfileSummaryColdCount.getNumOccurrences() > 0) - ColdCountThreshold = ProfileSummaryColdCount; + HotCountThreshold = + ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary); + ColdCountThreshold = + ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary); assert(ColdCountThreshold <= HotCountThreshold && "Cold count threshold cannot exceed hot count threshold!"); if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { diff --git a/lib/ProfileData/ProfileSummaryBuilder.cpp b/lib/ProfileData/ProfileSummaryBuilder.cpp index b6b4e79d5d2..6def44e4525 100644 --- a/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -26,6 +26,49 @@ cl::opt UseContextLessSummary( "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Merge context profiles before calculating thresholds.")); +// The following two parameters determine the threshold for a count to be +// considered hot/cold. These two parameters are percentile values (multiplied +// by 10000). If the counts are sorted in descending order, the minimum count to +// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. +// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the +// threshold for determining cold count (everything <= this threshold is +// considered cold). +cl::opt ProfileSummaryCutoffHot( + "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore, + cl::desc("A count is hot if it exceeds the minimum count to" + " reach this percentile of total counts.")); + +cl::opt ProfileSummaryCutoffCold( + "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore, + cl::desc("A count is cold if it is below the minimum count" + " to reach this percentile of total counts.")); + +cl::opt ProfileSummaryHugeWorkingSetSizeThreshold( + "profile-summary-huge-working-set-size-threshold", cl::Hidden, + cl::init(15000), cl::ZeroOrMore, + cl::desc("The code working set size is considered huge if the number of" + " blocks required to reach the -profile-summary-cutoff-hot" + " percentile exceeds this count.")); + +cl::opt ProfileSummaryLargeWorkingSetSizeThreshold( + "profile-summary-large-working-set-size-threshold", cl::Hidden, + cl::init(12500), cl::ZeroOrMore, + cl::desc("The code working set size is considered large if the number of" + " blocks required to reach the -profile-summary-cutoff-hot" + " percentile exceeds this count.")); + +// The next two options override the counts derived from summary computation and +// are useful for debugging purposes. +cl::opt ProfileSummaryHotCount( + "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore, + cl::desc("A fixed hot count that overrides the count derived from" + " profile-summary-cutoff-hot")); + +cl::opt ProfileSummaryColdCount( + "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore, + cl::desc("A fixed cold count that overrides the count derived from" + " profile-summary-cutoff-cold")); + // A set of cutoff values. Each value, when divided by ProfileSummary::Scale // (which is 1000000) is a desired percentile of total counts. static const uint32_t DefaultCutoffsData[] = { @@ -113,6 +156,24 @@ void ProfileSummaryBuilder::computeDetailedSummary() { } } +uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) { + auto &HotEntry = + ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot); + uint64_t HotCountThreshold = HotEntry.MinCount; + if (ProfileSummaryHotCount.getNumOccurrences() > 0) + HotCountThreshold = ProfileSummaryHotCount; + return HotCountThreshold; +} + +uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector &DS) { + auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( + DS, ProfileSummaryCutoffCold); + uint64_t ColdCountThreshold = ColdEntry.MinCount; + if (ProfileSummaryColdCount.getNumOccurrences() > 0) + ColdCountThreshold = ProfileSummaryColdCount; + return ColdCountThreshold; +} + std::unique_ptr SampleProfileSummaryBuilder::getSummary() { computeDetailedSummary(); return std::make_unique( diff --git a/test/tools/llvm-profdata/cs-sample-trimmer.test b/test/tools/llvm-profdata/cs-sample-trimmer.test new file mode 100644 index 00000000000..2ab70e83bce --- /dev/null +++ b/test/tools/llvm-profdata/cs-sample-trimmer.test @@ -0,0 +1,42 @@ +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext +RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext + +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -profile-summary-cold-count=500 +RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-MERGE +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cold-count=500 +RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cutoff-cold=990000 +RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END + +CHECK-TRIM: [main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11 +CHECK-TRIM-NEXT: 0: 6 +CHECK-TRIM-NEXT: 1: 6 +CHECK-TRIM-NEXT: 3: 287884 +CHECK-TRIM-NEXT: 4: 287864 _Z3fibi:315608 +CHECK-TRIM-NEXT: 15: 23 +CHECK-TRIM-NEXT: !Attributes: 0 +CHECK-TRIM-NEXT: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 +CHECK-TRIM-NEXT: 0: 15 +CHECK-TRIM-NEXT: 1: 15 +CHECK-TRIM-NEXT: 3: 74946 +CHECK-TRIM-NEXT: 4: 74941 _Z3fibi:82359 +CHECK-TRIM-NEXT: 10: 23324 +CHECK-TRIM-NEXT: 11: 23327 _Z3fibi:25228 +CHECK-TRIM-NEXT: 15: 11 +CHECK-TRIM-NEXT: !Attributes: 1 +CHECK-END-NOT: [ +CHECK-MERGE: [_Z5funcBi]:360:32 +CHECK-MERGE-NEXT: 0: 32 +CHECK-MERGE-NEXT: 1: 32 _Z8funcLeafi:20 +CHECK-MERGE-NEXT: 3: 12 +CHECK-MERGE-NEXT: !Attributes: 0 +CHECK-MERGE-NEXT:[main]:308:12 +CHECK-MERGE-NEXT: 2: 24 +CHECK-MERGE-NEXT: 3: 28 _Z5funcAi:18 +CHECK-MERGE-NEXT: 3.1: 28 _Z5funcBi:30 +CHECK-MERGE-NEXT: !Attributes: 0 +CHECK-MERGE-NEXT:[_Z5funcAi]:99:11 +CHECK-MERGE-NEXT: 0: 10 +CHECK-MERGE-NEXT: 1: 10 _Z8funcLeafi:11 +CHECK-MERGE-NEXT: 3: 24 +CHECK-MERGE-NEXT: !Attributes: 0 diff --git a/test/tools/llvm-profgen/cs-preinline.test b/test/tools/llvm-profgen/cs-preinline.test index e9aa7cbc73a..f1a96bb1c46 100644 --- a/test/tools/llvm-profgen/cs-preinline.test +++ b/test/tools/llvm-profgen/cs-preinline.test @@ -7,7 +7,7 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-PREINL ; Test preinliner threshold that prevents all possible inlining and merges everything into base profile. -; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-hot-inline-threshold=0 +; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-cold-inline-threshold=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL ; CHECK-DEFAULT: [main:1 @ foo]:309:0 diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index 66b95d1ba1d..f842444101a 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -666,7 +666,9 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, FailureMode FailMode) { + bool UseMD5, bool GenPartialProfile, + bool SampleMergeColdContext, bool SampleTrimColdContext, + FailureMode FailMode) { using namespace sampleprof; StringMap ProfileMap; SmallVector, 5> Readers; @@ -723,6 +725,22 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, if (ReaderList) WriterList.merge(*ReaderList); } + + if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { + // Use threshold calculated from profile summary unless specified. + SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); + auto Summary = Builder.computeSummaryForProfiles(ProfileMap); + uint64_t SampleProfColdThreshold = + ProfileSummaryBuilder::getColdCountThreshold( + (Summary->getDetailedSummary())); + + // Trim and merge cold context profile using cold threshold above; + SampleContextTrimmer(ProfileMap) + .trimAndMergeColdContextProfiles(SampleProfColdThreshold, + SampleTrimColdContext, + SampleMergeColdContext); + } + auto WriterOrErr = SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); if (std::error_code EC = WriterOrErr.getError()) @@ -866,6 +884,14 @@ static int merge_main(int argc, const char *argv[]) { "use-md5", cl::init(false), cl::Hidden, cl::desc("Choose to use MD5 to represent string in name table (only " "meaningful for -extbinary)")); + cl::opt SampleMergeColdContext( + "sample-merge-cold-context", cl::init(false), cl::Hidden, + cl::desc( + "Merge context sample profiles whose count is below cold threshold")); + cl::opt SampleTrimColdContext( + "sample-trim-cold-context", cl::init(false), cl::Hidden, + cl::desc( + "Trim context sample profiles whose count is below cold threshold")); cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); @@ -936,7 +962,8 @@ static int merge_main(int argc, const char *argv[]) { else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, - UseMD5, GenPartialProfile, FailureMode); + UseMD5, GenPartialProfile, SampleMergeColdContext, + SampleTrimColdContext, FailureMode); return 0; } diff --git a/tools/llvm-profgen/ProfileGenerator.cpp b/tools/llvm-profgen/ProfileGenerator.cpp index 0cfcc841968..6bfee2d0132 100644 --- a/tools/llvm-profgen/ProfileGenerator.cpp +++ b/tools/llvm-profgen/ProfileGenerator.cpp @@ -401,24 +401,25 @@ void CSProfileGenerator::postProcessProfiles() { // Run global pre-inliner to adjust/merge context profile based on estimated // inline decisions. - CSPreInliner(ProfileMap, PSI->getHotCountThreshold(), - PSI->getColdCountThreshold()) - .run(); + CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run(); // Trim and merge cold context profile using cold threshold above; SampleContextTrimmer(ProfileMap) .trimAndMergeColdContextProfiles( - CSProfColdThreshold, CSProfTrimColdContext, CSProfMergeColdContext); + ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext); } void CSProfileGenerator::computeSummaryAndThreshold() { SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); auto Summary = Builder.computeSummaryForProfiles(ProfileMap); - PSI.reset(new ProfileSummaryInfo(std::move(Summary))); + HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( + (Summary->getDetailedSummary())); + ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( + (Summary->getDetailedSummary())); // Use threshold calculated from profile summary unless specified. - if (!CSProfColdThreshold.getNumOccurrences()) { - CSProfColdThreshold = PSI->getColdCountThreshold(); + if (CSProfColdThreshold.getNumOccurrences()) { + ColdCountThreshold = CSProfColdThreshold; } } diff --git a/tools/llvm-profgen/ProfileGenerator.h b/tools/llvm-profgen/ProfileGenerator.h index b2e2a339a26..66ccf495b33 100644 --- a/tools/llvm-profgen/ProfileGenerator.h +++ b/tools/llvm-profgen/ProfileGenerator.h @@ -12,7 +12,6 @@ #include "ErrorHandling.h" #include "PerfReader.h" #include "ProfiledBinary.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/ProfileData/SampleProfWriter.h" #include #include @@ -187,8 +186,9 @@ protected: void write(std::unique_ptr Writer, StringMap &ProfileMap) override; - // Profile summary to answer isHotCount and isColdCount queries. - std::unique_ptr PSI; + // Thresholds from profile summary to answer isHotCount/isColdCount queries. + uint64_t HotCountThreshold; + uint64_t ColdCountThreshold; // String table owning context strings created from profile generation. std::unordered_set ContextStrings;