1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

Supplement instr profile with sample profile.

PGO profile is usually more precise than sample profile. However, PGO profile
needs to be collected from loadtest and loadtest may not be representative
enough to the production workload. Sample profile collected from production
can be used as a supplement -- for functions cold in loadtest but warm/hot
in production, we can scale up the related function in PGO profile if the
function is warm or hot in sample profile.

The implementation contains changes in compiler side and llvm-profdata side.
Given an instr profile and a sample profile, for a function cold in PGO
profile but warm/hot in sample profile, llvm-profdata will either mark
all the counters in the profile to be -1 or scale up the max count in the
function to be above hot threshold, depending on the zero counter ratio in
the profile. The assumption is if there are too many counters being zero
in the function profile, the profile is more likely to cause harm than good,
then llvm-profdata will mark all the counters to be -1 indicating the
function is hot but the profile is unaccountable. In compiler side, if a
function profile with all -1 counters is seen, the function entry count will
be set to be above hot threshold but its internal profile will be dropped.

In the long run, it may be useful to let compiler support using PGO profile
and sample profile at the same time, but that requires more careful design
and more substantial changes to make two profiles work seamlessly. The patch
here serves as a simple intermediate solution.

Differential Revision: https://reviews.llvm.org/D81981
This commit is contained in:
Wei Mi 2020-07-08 15:19:44 -07:00
parent 9d1bb81744
commit 51d4708437
15 changed files with 488 additions and 26 deletions

View File

@ -161,6 +161,30 @@ OPTIONS
coverage for the optimized target. This option can only be used with
sample-based profile in extbinary format.
.. option:: -supplement-instr-with-sample=path_to_sample_profile
Supplement an instrumentation profile with sample profile. The sample profile
is the input of the flag. Output will be in instrumentation format (only works
with -instr).
.. option:: -zero-counter-threshold=threshold_float_number
For the function which is cold in instr profile but hot in sample profile, if
the ratio of the number of zero counters divided by the the total number of
counters is above the threshold, the profile of the function will be regarded
as being harmful for performance and will be dropped.
.. option:: -instr-prof-cold-threshold=threshold_int_number
User specified cold threshold for instr profile which will override the cold
threshold got from profile summary.
.. option:: -suppl-min-size-threshold=threshold_int_number
If the size of a function is smaller than the threshold, assume it can be
inlined by PGO early inliner and it will not be adjusted based on sample
profile.
EXAMPLES
^^^^^^^^
Basic Usage

View File

@ -678,8 +678,8 @@ struct InstrProfValueSiteRecord {
/// Optionally scale merged counts by \p Weight.
void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
function_ref<void(instrprof_error)> Warn);
/// Scale up value profile data counts.
void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
/// Scale up value profile data counts by N (Numerator) / D (Denominator).
void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
/// Compute the overlap b/w this record and Input record.
void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
@ -753,8 +753,8 @@ struct InstrProfRecord {
function_ref<void(instrprof_error)> Warn);
/// Scale up profile counts (including value profile data) by
/// \p Weight.
void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
/// a factor of (N / D).
void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
/// Sort value profile data (per site) by count.
void sortValueData() {
@ -839,8 +839,8 @@ private:
uint64_t Weight,
function_ref<void(instrprof_error)> Warn);
// Scale up value profile data count.
void scaleValueProfData(uint32_t ValueKind, uint64_t Weight,
// Scale up value profile data count by N (Numerator) / D (Denominator).
void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn);
};

View File

@ -48,6 +48,8 @@ public:
InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false);
~InstrProfWriter();
StringMap<ProfilingData> &getProfileData() { return FunctionData; }
/// Add function counts for the given function. If there are already counts
/// for this function and the hash and number of counts match, each counter is
/// summed. Optionally scale counts by \p Weight.

View File

@ -625,11 +625,11 @@ void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
}
}
void InstrProfValueSiteRecord::scale(uint64_t Weight,
void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
bool Overflowed;
I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
I->Count = SaturatingMultiply(I->Count, N, &Overflowed) / D;
if (Overflowed)
Warn(instrprof_error::counter_overflow);
}
@ -678,22 +678,23 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight,
}
void InstrProfRecord::scaleValueProfData(
uint32_t ValueKind, uint64_t Weight,
uint32_t ValueKind, uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
for (auto &R : getValueSitesForKind(ValueKind))
R.scale(Weight, Warn);
R.scale(N, D, Warn);
}
void InstrProfRecord::scale(uint64_t Weight,
void InstrProfRecord::scale(uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
assert(D != 0 && "D cannot be 0");
for (auto &Count : this->Counts) {
bool Overflowed;
Count = SaturatingMultiply(Count, Weight, &Overflowed);
Count = SaturatingMultiply(Count, N, &Overflowed) / D;
if (Overflowed)
Warn(instrprof_error::counter_overflow);
}
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
scaleValueProfData(Kind, Weight, Warn);
scaleValueProfData(Kind, N, D, Warn);
}
// Map indirect call target name hash to name string.

View File

@ -241,7 +241,7 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
// We've never seen a function with this name and hash, add it.
Dest = std::move(I);
if (Weight > 1)
Dest.scale(Weight, MapWarn);
Dest.scale(Weight, 1, MapWarn);
} else {
// We're updating a function we've seen before.
Dest.merge(I, Weight, MapWarn);

View File

@ -119,13 +119,22 @@ std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
}
void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
addCount(Count);
NumFunctions++;
// Skip invalid count.
if (Count == (uint64_t)-1)
return;
addCount(Count);
if (Count > MaxFunctionCount)
MaxFunctionCount = Count;
}
void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
// Skip invalid count.
if (Count == (uint64_t)-1)
return;
addCount(Count);
if (Count > MaxInternalBlockCount)
MaxInternalBlockCount = Count;

View File

@ -1020,7 +1020,8 @@ public:
FreqAttr(FFA_Normal), IsCS(IsCS) {}
// Read counts for the instrumented BB from profile.
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
bool &AllMinusOnes);
// Populate the counts for all BBs.
void populateCounters();
@ -1203,7 +1204,8 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
// Read the profile from ProfileFileName and assign the value to the
// instrumented BB and the edges. This function also updates ProgramMaxCount.
// Return true if the profile are successfully read, and false on errors.
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
bool &AllMinusOnes) {
auto &Ctx = M->getContext();
Expected<InstrProfRecord> Result =
PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
@ -1246,10 +1248,13 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
AllMinusOnes = (CountFromProfile.size() > 0);
uint64_t ValueSum = 0;
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
ValueSum += CountFromProfile[I];
if (CountFromProfile[I] != (uint64_t)-1)
AllMinusOnes = false;
}
AllZeros = (ValueSum == 0);
@ -1657,8 +1662,13 @@ static bool annotateAllFunctions(
SplitIndirectBrCriticalEdges(F, BPI, BFI);
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
InstrumentFuncEntry);
// When AllMinusOnes is true, it means the profile for the function
// is unrepresentative and this function is actually hot. Set the
// entry count of the function to be multiple times of hot threshold
// and drop all its internal counters.
bool AllMinusOnes = false;
bool AllZeros = false;
if (!Func.readCounters(PGOReader.get(), AllZeros))
if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
continue;
if (AllZeros) {
F.setEntryCount(ProfileCount(0, Function::PCT_Real));
@ -1666,6 +1676,15 @@ static bool annotateAllFunctions(
ColdFunctions.push_back(&F);
continue;
}
const unsigned MultiplyFactor = 3;
if (AllMinusOnes) {
uint64_t HotThreshold = PSI->getHotCountThreshold();
if (HotThreshold)
F.setEntryCount(
ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
HotFunctions.push_back(&F);
continue;
}
Func.populateCounters();
Func.setBranchWeights();
Func.annotateValueSites();

View File

@ -0,0 +1,12 @@
test_simple_for:4000:4000
1: 1000
2: 1000
3: 1000
4: 1000
moo:10:10
1: 2
2: 2
3: 2
4: 2
5: 2

View File

@ -0,0 +1,15 @@
# :ir is the flag to indicate this is IR level profile.
:ir
test_simple_for
34137660316
2
0
0
foo
2582734
4
1000
270
180
760

View File

@ -0,0 +1,37 @@
; Supplement instr profile suppl-profile.proftext with sample profile
; sample-profile.proftext.
; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \
; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile.proftext \
; RUN: %S/Inputs/suppl-profile.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Check test_simple_for has a non-zero entry count and doesn't have any other
; prof metadata.
; CHECK: @test_simple_for(i32 %n) {{.*}} !prof ![[ENTRY_COUNT:[0-9]+]]
; CHECK-NOT: !prof !
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 540}
define i32 @test_simple_for(i32 %n) {
entry:
br label %for.cond
for.cond:
%i = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
%sum = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i32 %i, %n
br i1 %cmp, label %for.body, label %for.end
for.body:
%inc = add nsw i32 %sum, 1
br label %for.inc
for.inc:
%inc1 = add nsw i32 %i, 1
br label %for.cond
for.end:
ret i32 %sum
}

View File

@ -0,0 +1,25 @@
:ir
foo
7
5
12
13
0
0
0
goo
5
3
0
0
0
moo
9
4
3000
1000
2000
500

View File

@ -0,0 +1,17 @@
foo:2000:2000
1: 2000
goo:3000:1500
1: 1200
2: 800
3: 1000
moo:1000:1000
1: 1000
hoo:50:1
1: 1
2: 2
3: 3
4: 4
5: 5
6: 6
7: 7
8: 8

View File

@ -2,16 +2,14 @@ Tests for overflow when merging instrumented profiles.
1- Merge profile having maximum counts with itself and verify overflow detected and saturation occurred
RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW
RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
MERGE_OVERFLOW: {{.*}}: overflow: Counter overflow
SHOW_OVERFLOW: Total functions: 1
SHOW_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
SHOW_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
SHOW_OVERFLOW: Function count: 18446744073709551615
SHOW_OVERFLOW-NEXT: Block counts: [18446744073709551615, 18446744073709551615]
2- Merge profile having maximum counts by itself and verify no overflow
RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty
RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
MERGE_NO_OVERFLOW-NOT: {{.*}}: overflow: Counter overflow
SHOW_NO_OVERFLOW: Total functions: 1
SHOW_NO_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
SHOW_NO_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
SHOW_NO_OVERFLOW: Function count: 18446744073709551615
SHOW_NO_OVERFLOW-NEXT: Block counts: [9223372036854775808, 18446744073709551615]

View File

@ -0,0 +1,102 @@
Some basic tests for supplementing instrumentation profile with sample profile.
Test all of goo's counters will be set to -1.
RUN: llvm-profdata merge \
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX1
MIX1: foo:
MIX1-NEXT: Hash: 0x0000000000000007
MIX1-NEXT: Counters: 5
MIX1-NEXT: Block counts: [12, 13, 0, 0, 0]
MIX1: goo:
MIX1-NEXT: Hash: 0x0000000000000005
MIX1-NEXT: Counters: 3
MIX1-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
MIX1: moo:
MIX1-NEXT: Hash: 0x0000000000000009
MIX1-NEXT: Counters: 4
MIX1-NEXT: Block counts: [3000, 1000, 2000, 500]
Test when the zero counter ratio of foo is higher than zero-counter-threshold.
RUN: llvm-profdata merge \
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.5 \
RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX2
MIX2: foo:
MIX2-NEXT: Hash: 0x0000000000000007
MIX2-NEXT: Counters: 5
MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615]
MIX2: goo:
MIX2-NEXT: Hash: 0x0000000000000005
MIX2-NEXT: Counters: 3
MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
MIX2: moo:
MIX2-NEXT: Hash: 0x0000000000000009
MIX2-NEXT: Counters: 4
MIX2-NEXT: Block counts: [3000, 1000, 2000, 500]
Test when the zero counter ratio of foo is lower than zero-counter-threshold.
RUN: llvm-profdata merge \
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.7 \
RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX3
MIX3: foo:
MIX3-NEXT: Hash: 0x0000000000000007
MIX3-NEXT: Counters: 5
MIX3-NEXT: Block counts: [1384, 1500, 0, 0, 0]
MIX3: goo:
MIX3-NEXT: Hash: 0x0000000000000005
MIX3-NEXT: Counters: 3
MIX3-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
MIX3: moo:
MIX3-NEXT: Hash: 0x0000000000000009
MIX3-NEXT: Counters: 4
MIX3-NEXT: Block counts: [3000, 1000, 2000, 500]
Test foo's profile won't be adjusted because its size is smaller
than suppl-min-size-threshold.
RUN: llvm-profdata merge \
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
RUN: -suppl-min-size-threshold=2 -zero-counter-threshold=0.7 \
RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX4
MIX4: foo:
MIX4-NEXT: Hash: 0x0000000000000007
MIX4-NEXT: Counters: 5
MIX4-NEXT: Block counts: [12, 13, 0, 0, 0]
MIX4: goo:
MIX4-NEXT: Hash: 0x0000000000000005
MIX4-NEXT: Counters: 3
MIX4-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
MIX4: moo:
MIX4-NEXT: Hash: 0x0000000000000009
MIX4-NEXT: Counters: 4
MIX4-NEXT: Block counts: [3000, 1000, 2000, 500]
Test profile summary won't be affected by -1 counter.
RUN: llvm-profdata merge \
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t
RUN: llvm-profdata show %t -detailed-summary | FileCheck %s --check-prefix=MIX5
MIX5: Instrumentation level: IR
MIX5-NEXT: Total functions: 3
MIX5-NEXT: Maximum function count: 3000
MIX5-NEXT: Maximum internal block count: 2000
MIX5-NEXT: Total number of blocks: 9
MIX5-NEXT: Total count: 6525
MIX5-NEXT: Detailed summary:
MIX5-NEXT: 3 blocks with count >= 1000 account for 80 percentage of the total counts.
MIX5-NEXT: 3 blocks with count >= 1000 account for 90 percentage of the total counts.
MIX5-NEXT: 4 blocks with count >= 500 account for 95 percentage of the total counts.
MIX5-NEXT: 4 blocks with count >= 500 account for 99 percentage of the total counts.
MIX5-NEXT: 6 blocks with count >= 12 account for 99.9 percentage of the total counts.
MIX5-NEXT: 6 blocks with count >= 12 account for 99.99 percentage of the total counts.
MIX5-NEXT: 6 blocks with count >= 12 account for 99.999 percentage of the total counts.

View File

@ -386,6 +386,172 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
}
/// The profile entry for a function in instrumentation profile.
struct InstrProfileEntry {
uint64_t MaxCount = 0;
float ZeroCounterRatio = 0.0;
InstrProfRecord *ProfRecord;
InstrProfileEntry(InstrProfRecord *Record);
InstrProfileEntry() = default;
};
InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
ProfRecord = Record;
uint64_t CntNum = Record->Counts.size();
uint64_t ZeroCntNum = 0;
for (size_t I = 0; I < CntNum; ++I) {
MaxCount = std::max(MaxCount, Record->Counts[I]);
ZeroCntNum += !Record->Counts[I];
}
ZeroCounterRatio = (float)ZeroCntNum / CntNum;
}
/// Either set all the counters in the instr profile entry \p IFE to -1
/// in order to drop the profile or scale up the counters in \p IFP to
/// be above hot threshold. We use the ratio of zero counters in the
/// profile of a function to decide the profile is helpful or harmful
/// for performance, and to choose whether to scale up or drop it.
static void updateInstrProfileEntry(InstrProfileEntry &IFE,
uint64_t HotInstrThreshold,
float ZeroCounterThreshold) {
InstrProfRecord *ProfRecord = IFE.ProfRecord;
if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
// If all or most of the counters of the function are zero, the
// profile is unaccountable and shuld be dropped. Reset all the
// counters to be -1 and PGO profile-use will drop the profile.
// All counters being -1 also implies that the function is hot so
// PGO profile-use will also set the entry count metadata to be
// above hot threshold.
for (size_t I = 0; I < ProfRecord->Counts.size(); ++I)
ProfRecord->Counts[I] = -1;
return;
}
// Scale up the MaxCount to be multiple times above hot threshold.
const unsigned MultiplyFactor = 3;
uint64_t Numerator = HotInstrThreshold * MultiplyFactor;
uint64_t Denominator = IFE.MaxCount;
ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
warn(toString(make_error<InstrProfError>(E)));
});
}
const uint64_t ColdPercentileIdx = 15;
const uint64_t HotPercentileIdx = 11;
/// Adjust the instr profile in \p WC based on the sample profile in
/// \p Reader.
static void
adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
unsigned InstrProfColdThreshold) {
// Function to its entry in instr profile.
StringMap<InstrProfileEntry> InstrProfileMap;
InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
for (auto &PD : WC->Writer.getProfileData()) {
// Populate IPBuilder.
for (const auto &PDV : PD.getValue()) {
InstrProfRecord Record = PDV.second;
IPBuilder.addRecord(Record);
}
// If a function has multiple entries in instr profile, skip it.
if (PD.getValue().size() != 1)
continue;
// Initialize InstrProfileMap.
InstrProfRecord *R = &PD.getValue().begin()->second;
InstrProfileMap[PD.getKey()] = InstrProfileEntry(R);
}
ProfileSummary InstrPS = *IPBuilder.getSummary();
ProfileSummary SamplePS = Reader->getSummary();
// Compute cold thresholds for instr profile and sample profile.
uint64_t ColdSampleThreshold =
ProfileSummaryBuilder::getEntryForPercentile(
SamplePS.getDetailedSummary(),
ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
.MinCount;
uint64_t HotInstrThreshold =
ProfileSummaryBuilder::getEntryForPercentile(
InstrPS.getDetailedSummary(),
ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
.MinCount;
uint64_t ColdInstrThreshold =
InstrProfColdThreshold
? InstrProfColdThreshold
: ProfileSummaryBuilder::getEntryForPercentile(
InstrPS.getDetailedSummary(),
ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
.MinCount;
// Find hot/warm functions in sample profile which is cold in instr profile
// and adjust the profiles of those functions in the instr profile.
for (const auto &PD : Reader->getProfiles()) {
StringRef FName = PD.getKey();
const sampleprof::FunctionSamples &FS = PD.getValue();
auto It = InstrProfileMap.find(FName);
if (FS.getHeadSamples() > ColdSampleThreshold &&
It != InstrProfileMap.end() &&
It->second.MaxCount <= ColdInstrThreshold &&
FS.getBodySamples().size() >= SupplMinSizeThreshold) {
updateInstrProfileEntry(It->second, HotInstrThreshold,
ZeroCounterThreshold);
}
}
}
/// The main function to supplement instr profile with sample profile.
/// \Inputs contains the instr profile. \p SampleFilename specifies the
/// sample profile. \p OutputFilename specifies the output profile name.
/// \p OutputFormat specifies the output profile format. \p OutputSparse
/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
/// specifies the minimal size for the functions whose profile will be
/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
/// a function contains too many zero counters and whether its profile
/// should be dropped. \p InstrProfColdThreshold is the user specified
/// cold threshold which will override the cold threshold got from the
/// instr profile summary.
static void supplementInstrProfile(
const WeightedFileVector &Inputs, StringRef SampleFilename,
StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
unsigned InstrProfColdThreshold) {
if (OutputFilename.compare("-") == 0)
exitWithError("Cannot write indexed profdata format to stdout.");
if (Inputs.size() != 1)
exitWithError("Expect one input to be an instr profile.");
if (Inputs[0].Weight != 1)
exitWithError("Expect instr profile doesn't have weight.");
StringRef InstrFilename = Inputs[0].Filename;
// Read sample profile.
LLVMContext Context;
auto ReaderOrErr =
sampleprof::SampleProfileReader::create(SampleFilename.str(), Context);
if (std::error_code EC = ReaderOrErr.getError())
exitWithErrorCode(EC, SampleFilename);
auto Reader = std::move(ReaderOrErr.get());
if (std::error_code EC = Reader->read())
exitWithErrorCode(EC, SampleFilename);
// Read instr profile.
std::mutex ErrorLock;
SmallSet<instrprof_error, 4> WriterErrorCodes;
auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
WriterErrorCodes);
loadInput(Inputs[0], nullptr, WC.get());
if (WC->Errors.size() > 0)
exitWithError(std::move(WC->Errors[0].first), InstrFilename);
adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
InstrProfColdThreshold);
writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
}
/// Make a copy of the given function samples with all symbol names remapped
/// by the provided symbol remapper.
static sampleprof::FunctionSamples
@ -680,6 +846,28 @@ static int merge_main(int argc, const char *argv[]) {
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
cl::opt<std::string> SupplInstrWithSample(
"supplement-instr-with-sample", cl::init(""), cl::Hidden,
cl::desc("Supplement an instr profile with sample profile, to correct "
"the profile unrepresentativeness issue. The sample "
"profile is the input of the flag. Output will be in instr "
"format (The flag only works with -instr)"));
cl::opt<float> ZeroCounterThreshold(
"zero-counter-threshold", cl::init(0.7), cl::Hidden,
cl::desc("For the function which is cold in instr profile but hot in "
"sample profile, if the ratio of the number of zero counters "
"divided by the the total number of counters is above the "
"threshold, the profile of the function will be regarded as "
"being harmful for performance and will be dropped. "));
cl::opt<unsigned> SupplMinSizeThreshold(
"suppl-min-size-threshold", cl::init(10), cl::Hidden,
cl::desc("If the size of a function is smaller than the threshold, "
"assume it can be inlined by PGO early inliner and it won't "
"be adjusted based on sample profile. "));
cl::opt<unsigned> InstrProfColdThreshold(
"instr-prof-cold-threshold", cl::init(0), cl::Hidden,
cl::desc("User specified cold threshold for instr profile which will "
"override the cold threshold got from profile summary. "));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@ -708,6 +896,17 @@ static int merge_main(int argc, const char *argv[]) {
if (!RemappingFile.empty())
Remapper = SymbolRemapper::create(RemappingFile);
if (!SupplInstrWithSample.empty()) {
if (ProfileKind != instr)
exitWithError(
"-supplement-instr-with-sample can only work with -instr. ");
supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
OutputFormat, OutputSparse, SupplMinSizeThreshold,
ZeroCounterThreshold, InstrProfColdThreshold);
return 0;
}
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, OutputSparse, NumThreads, FailureMode);
@ -904,6 +1103,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
uint64_t FuncMax = 0;
uint64_t FuncSum = 0;
for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
if (Func.Counts[I] == (uint64_t)-1)
continue;
FuncMax = std::max(FuncMax, Func.Counts[I]);
FuncSum += Func.Counts[I];
}