mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
Supplement instr profile with sample profile.
PGO profile is usually more precise than sample profile. However, PGO profile needs to be collected from loadtest and loadtest may not be representative enough to the production workload. Sample profile collected from production can be used as a supplement -- for functions cold in loadtest but warm/hot in production, we can scale up the related function in PGO profile if the function is warm or hot in sample profile. The implementation contains changes in compiler side and llvm-profdata side. Given an instr profile and a sample profile, for a function cold in PGO profile but warm/hot in sample profile, llvm-profdata will either mark all the counters in the profile to be -1 or scale up the max count in the function to be above hot threshold, depending on the zero counter ratio in the profile. The assumption is if there are too many counters being zero in the function profile, the profile is more likely to cause harm than good, then llvm-profdata will mark all the counters to be -1 indicating the function is hot but the profile is unaccountable. In compiler side, if a function profile with all -1 counters is seen, the function entry count will be set to be above hot threshold but its internal profile will be dropped. In the long run, it may be useful to let compiler support using PGO profile and sample profile at the same time, but that requires more careful design and more substantial changes to make two profiles work seamlessly. The patch here serves as a simple intermediate solution. Differential Revision: https://reviews.llvm.org/D81981
This commit is contained in:
parent
9d1bb81744
commit
51d4708437
@ -161,6 +161,30 @@ OPTIONS
|
||||
coverage for the optimized target. This option can only be used with
|
||||
sample-based profile in extbinary format.
|
||||
|
||||
.. option:: -supplement-instr-with-sample=path_to_sample_profile
|
||||
|
||||
Supplement an instrumentation profile with sample profile. The sample profile
|
||||
is the input of the flag. Output will be in instrumentation format (only works
|
||||
with -instr).
|
||||
|
||||
.. option:: -zero-counter-threshold=threshold_float_number
|
||||
|
||||
For the function which is cold in instr profile but hot in sample profile, if
|
||||
the ratio of the number of zero counters divided by the the total number of
|
||||
counters is above the threshold, the profile of the function will be regarded
|
||||
as being harmful for performance and will be dropped.
|
||||
|
||||
.. option:: -instr-prof-cold-threshold=threshold_int_number
|
||||
|
||||
User specified cold threshold for instr profile which will override the cold
|
||||
threshold got from profile summary.
|
||||
|
||||
.. option:: -suppl-min-size-threshold=threshold_int_number
|
||||
|
||||
If the size of a function is smaller than the threshold, assume it can be
|
||||
inlined by PGO early inliner and it will not be adjusted based on sample
|
||||
profile.
|
||||
|
||||
EXAMPLES
|
||||
^^^^^^^^
|
||||
Basic Usage
|
||||
|
@ -678,8 +678,8 @@ struct InstrProfValueSiteRecord {
|
||||
/// Optionally scale merged counts by \p Weight.
|
||||
void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
|
||||
function_ref<void(instrprof_error)> Warn);
|
||||
/// Scale up value profile data counts.
|
||||
void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
|
||||
/// Scale up value profile data counts by N (Numerator) / D (Denominator).
|
||||
void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
|
||||
|
||||
/// Compute the overlap b/w this record and Input record.
|
||||
void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
|
||||
@ -753,8 +753,8 @@ struct InstrProfRecord {
|
||||
function_ref<void(instrprof_error)> Warn);
|
||||
|
||||
/// Scale up profile counts (including value profile data) by
|
||||
/// \p Weight.
|
||||
void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
|
||||
/// a factor of (N / D).
|
||||
void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
|
||||
|
||||
/// Sort value profile data (per site) by count.
|
||||
void sortValueData() {
|
||||
@ -839,8 +839,8 @@ private:
|
||||
uint64_t Weight,
|
||||
function_ref<void(instrprof_error)> Warn);
|
||||
|
||||
// Scale up value profile data count.
|
||||
void scaleValueProfData(uint32_t ValueKind, uint64_t Weight,
|
||||
// Scale up value profile data count by N (Numerator) / D (Denominator).
|
||||
void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D,
|
||||
function_ref<void(instrprof_error)> Warn);
|
||||
};
|
||||
|
||||
|
@ -48,6 +48,8 @@ public:
|
||||
InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false);
|
||||
~InstrProfWriter();
|
||||
|
||||
StringMap<ProfilingData> &getProfileData() { return FunctionData; }
|
||||
|
||||
/// Add function counts for the given function. If there are already counts
|
||||
/// for this function and the hash and number of counts match, each counter is
|
||||
/// summed. Optionally scale counts by \p Weight.
|
||||
|
@ -625,11 +625,11 @@ void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
|
||||
}
|
||||
}
|
||||
|
||||
void InstrProfValueSiteRecord::scale(uint64_t Weight,
|
||||
void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D,
|
||||
function_ref<void(instrprof_error)> Warn) {
|
||||
for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
|
||||
bool Overflowed;
|
||||
I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
|
||||
I->Count = SaturatingMultiply(I->Count, N, &Overflowed) / D;
|
||||
if (Overflowed)
|
||||
Warn(instrprof_error::counter_overflow);
|
||||
}
|
||||
@ -678,22 +678,23 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight,
|
||||
}
|
||||
|
||||
void InstrProfRecord::scaleValueProfData(
|
||||
uint32_t ValueKind, uint64_t Weight,
|
||||
uint32_t ValueKind, uint64_t N, uint64_t D,
|
||||
function_ref<void(instrprof_error)> Warn) {
|
||||
for (auto &R : getValueSitesForKind(ValueKind))
|
||||
R.scale(Weight, Warn);
|
||||
R.scale(N, D, Warn);
|
||||
}
|
||||
|
||||
void InstrProfRecord::scale(uint64_t Weight,
|
||||
void InstrProfRecord::scale(uint64_t N, uint64_t D,
|
||||
function_ref<void(instrprof_error)> Warn) {
|
||||
assert(D != 0 && "D cannot be 0");
|
||||
for (auto &Count : this->Counts) {
|
||||
bool Overflowed;
|
||||
Count = SaturatingMultiply(Count, Weight, &Overflowed);
|
||||
Count = SaturatingMultiply(Count, N, &Overflowed) / D;
|
||||
if (Overflowed)
|
||||
Warn(instrprof_error::counter_overflow);
|
||||
}
|
||||
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
||||
scaleValueProfData(Kind, Weight, Warn);
|
||||
scaleValueProfData(Kind, N, D, Warn);
|
||||
}
|
||||
|
||||
// Map indirect call target name hash to name string.
|
||||
|
@ -241,7 +241,7 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
|
||||
// We've never seen a function with this name and hash, add it.
|
||||
Dest = std::move(I);
|
||||
if (Weight > 1)
|
||||
Dest.scale(Weight, MapWarn);
|
||||
Dest.scale(Weight, 1, MapWarn);
|
||||
} else {
|
||||
// We're updating a function we've seen before.
|
||||
Dest.merge(I, Weight, MapWarn);
|
||||
|
@ -119,13 +119,22 @@ std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
|
||||
}
|
||||
|
||||
void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
|
||||
addCount(Count);
|
||||
NumFunctions++;
|
||||
|
||||
// Skip invalid count.
|
||||
if (Count == (uint64_t)-1)
|
||||
return;
|
||||
|
||||
addCount(Count);
|
||||
if (Count > MaxFunctionCount)
|
||||
MaxFunctionCount = Count;
|
||||
}
|
||||
|
||||
void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
|
||||
// Skip invalid count.
|
||||
if (Count == (uint64_t)-1)
|
||||
return;
|
||||
|
||||
addCount(Count);
|
||||
if (Count > MaxInternalBlockCount)
|
||||
MaxInternalBlockCount = Count;
|
||||
|
@ -1020,7 +1020,8 @@ public:
|
||||
FreqAttr(FFA_Normal), IsCS(IsCS) {}
|
||||
|
||||
// Read counts for the instrumented BB from profile.
|
||||
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
|
||||
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
|
||||
bool &AllMinusOnes);
|
||||
|
||||
// Populate the counts for all BBs.
|
||||
void populateCounters();
|
||||
@ -1203,7 +1204,8 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
|
||||
// Read the profile from ProfileFileName and assign the value to the
|
||||
// instrumented BB and the edges. This function also updates ProgramMaxCount.
|
||||
// Return true if the profile are successfully read, and false on errors.
|
||||
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
|
||||
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
|
||||
bool &AllMinusOnes) {
|
||||
auto &Ctx = M->getContext();
|
||||
Expected<InstrProfRecord> Result =
|
||||
PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
|
||||
@ -1246,10 +1248,13 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
|
||||
|
||||
IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
|
||||
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
|
||||
AllMinusOnes = (CountFromProfile.size() > 0);
|
||||
uint64_t ValueSum = 0;
|
||||
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
|
||||
LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
|
||||
ValueSum += CountFromProfile[I];
|
||||
if (CountFromProfile[I] != (uint64_t)-1)
|
||||
AllMinusOnes = false;
|
||||
}
|
||||
AllZeros = (ValueSum == 0);
|
||||
|
||||
@ -1657,8 +1662,13 @@ static bool annotateAllFunctions(
|
||||
SplitIndirectBrCriticalEdges(F, BPI, BFI);
|
||||
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
|
||||
InstrumentFuncEntry);
|
||||
// When AllMinusOnes is true, it means the profile for the function
|
||||
// is unrepresentative and this function is actually hot. Set the
|
||||
// entry count of the function to be multiple times of hot threshold
|
||||
// and drop all its internal counters.
|
||||
bool AllMinusOnes = false;
|
||||
bool AllZeros = false;
|
||||
if (!Func.readCounters(PGOReader.get(), AllZeros))
|
||||
if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
|
||||
continue;
|
||||
if (AllZeros) {
|
||||
F.setEntryCount(ProfileCount(0, Function::PCT_Real));
|
||||
@ -1666,6 +1676,15 @@ static bool annotateAllFunctions(
|
||||
ColdFunctions.push_back(&F);
|
||||
continue;
|
||||
}
|
||||
const unsigned MultiplyFactor = 3;
|
||||
if (AllMinusOnes) {
|
||||
uint64_t HotThreshold = PSI->getHotCountThreshold();
|
||||
if (HotThreshold)
|
||||
F.setEntryCount(
|
||||
ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
|
||||
HotFunctions.push_back(&F);
|
||||
continue;
|
||||
}
|
||||
Func.populateCounters();
|
||||
Func.setBranchWeights();
|
||||
Func.annotateValueSites();
|
||||
|
12
test/Transforms/PGOProfile/Inputs/sample-profile.proftext
Normal file
12
test/Transforms/PGOProfile/Inputs/sample-profile.proftext
Normal file
@ -0,0 +1,12 @@
|
||||
test_simple_for:4000:4000
|
||||
1: 1000
|
||||
2: 1000
|
||||
3: 1000
|
||||
4: 1000
|
||||
|
||||
moo:10:10
|
||||
1: 2
|
||||
2: 2
|
||||
3: 2
|
||||
4: 2
|
||||
5: 2
|
15
test/Transforms/PGOProfile/Inputs/suppl-profile.proftext
Normal file
15
test/Transforms/PGOProfile/Inputs/suppl-profile.proftext
Normal file
@ -0,0 +1,15 @@
|
||||
# :ir is the flag to indicate this is IR level profile.
|
||||
:ir
|
||||
test_simple_for
|
||||
34137660316
|
||||
2
|
||||
0
|
||||
0
|
||||
|
||||
foo
|
||||
2582734
|
||||
4
|
||||
1000
|
||||
270
|
||||
180
|
||||
760
|
37
test/Transforms/PGOProfile/suppl-profile.ll
Normal file
37
test/Transforms/PGOProfile/suppl-profile.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; Supplement instr profile suppl-profile.proftext with sample profile
|
||||
; sample-profile.proftext.
|
||||
; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \
|
||||
; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile.proftext \
|
||||
; RUN: %S/Inputs/suppl-profile.proftext -o %t.profdata
|
||||
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s
|
||||
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Check test_simple_for has a non-zero entry count and doesn't have any other
|
||||
; prof metadata.
|
||||
; CHECK: @test_simple_for(i32 %n) {{.*}} !prof ![[ENTRY_COUNT:[0-9]+]]
|
||||
; CHECK-NOT: !prof !
|
||||
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 540}
|
||||
define i32 @test_simple_for(i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%i = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
|
||||
%sum = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i32 %i, %n
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
%inc = add nsw i32 %sum, 1
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%inc1 = add nsw i32 %i, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret i32 %sum
|
||||
}
|
25
test/tools/llvm-profdata/Inputs/mix_instr.proftext
Normal file
25
test/tools/llvm-profdata/Inputs/mix_instr.proftext
Normal file
@ -0,0 +1,25 @@
|
||||
:ir
|
||||
foo
|
||||
7
|
||||
5
|
||||
12
|
||||
13
|
||||
0
|
||||
0
|
||||
0
|
||||
|
||||
goo
|
||||
5
|
||||
3
|
||||
0
|
||||
0
|
||||
0
|
||||
|
||||
moo
|
||||
9
|
||||
4
|
||||
3000
|
||||
1000
|
||||
2000
|
||||
500
|
||||
|
17
test/tools/llvm-profdata/Inputs/mix_sample.proftext
Normal file
17
test/tools/llvm-profdata/Inputs/mix_sample.proftext
Normal file
@ -0,0 +1,17 @@
|
||||
foo:2000:2000
|
||||
1: 2000
|
||||
goo:3000:1500
|
||||
1: 1200
|
||||
2: 800
|
||||
3: 1000
|
||||
moo:1000:1000
|
||||
1: 1000
|
||||
hoo:50:1
|
||||
1: 1
|
||||
2: 2
|
||||
3: 3
|
||||
4: 4
|
||||
5: 5
|
||||
6: 6
|
||||
7: 7
|
||||
8: 8
|
@ -2,16 +2,14 @@ Tests for overflow when merging instrumented profiles.
|
||||
|
||||
1- Merge profile having maximum counts with itself and verify overflow detected and saturation occurred
|
||||
RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW
|
||||
RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
|
||||
RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
|
||||
MERGE_OVERFLOW: {{.*}}: overflow: Counter overflow
|
||||
SHOW_OVERFLOW: Total functions: 1
|
||||
SHOW_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
|
||||
SHOW_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
|
||||
SHOW_OVERFLOW: Function count: 18446744073709551615
|
||||
SHOW_OVERFLOW-NEXT: Block counts: [18446744073709551615, 18446744073709551615]
|
||||
|
||||
2- Merge profile having maximum counts by itself and verify no overflow
|
||||
RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty
|
||||
RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
|
||||
RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
|
||||
MERGE_NO_OVERFLOW-NOT: {{.*}}: overflow: Counter overflow
|
||||
SHOW_NO_OVERFLOW: Total functions: 1
|
||||
SHOW_NO_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
|
||||
SHOW_NO_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
|
||||
SHOW_NO_OVERFLOW: Function count: 18446744073709551615
|
||||
SHOW_NO_OVERFLOW-NEXT: Block counts: [9223372036854775808, 18446744073709551615]
|
||||
|
102
test/tools/llvm-profdata/suppl-instr-with-sample.test
Normal file
102
test/tools/llvm-profdata/suppl-instr-with-sample.test
Normal file
@ -0,0 +1,102 @@
|
||||
Some basic tests for supplementing instrumentation profile with sample profile.
|
||||
|
||||
Test all of goo's counters will be set to -1.
|
||||
RUN: llvm-profdata merge \
|
||||
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
|
||||
RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t
|
||||
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX1
|
||||
|
||||
MIX1: foo:
|
||||
MIX1-NEXT: Hash: 0x0000000000000007
|
||||
MIX1-NEXT: Counters: 5
|
||||
MIX1-NEXT: Block counts: [12, 13, 0, 0, 0]
|
||||
MIX1: goo:
|
||||
MIX1-NEXT: Hash: 0x0000000000000005
|
||||
MIX1-NEXT: Counters: 3
|
||||
MIX1-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
|
||||
MIX1: moo:
|
||||
MIX1-NEXT: Hash: 0x0000000000000009
|
||||
MIX1-NEXT: Counters: 4
|
||||
MIX1-NEXT: Block counts: [3000, 1000, 2000, 500]
|
||||
|
||||
Test when the zero counter ratio of foo is higher than zero-counter-threshold.
|
||||
RUN: llvm-profdata merge \
|
||||
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
|
||||
RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.5 \
|
||||
RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
|
||||
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX2
|
||||
|
||||
MIX2: foo:
|
||||
MIX2-NEXT: Hash: 0x0000000000000007
|
||||
MIX2-NEXT: Counters: 5
|
||||
MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615]
|
||||
MIX2: goo:
|
||||
MIX2-NEXT: Hash: 0x0000000000000005
|
||||
MIX2-NEXT: Counters: 3
|
||||
MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
|
||||
MIX2: moo:
|
||||
MIX2-NEXT: Hash: 0x0000000000000009
|
||||
MIX2-NEXT: Counters: 4
|
||||
MIX2-NEXT: Block counts: [3000, 1000, 2000, 500]
|
||||
|
||||
Test when the zero counter ratio of foo is lower than zero-counter-threshold.
|
||||
RUN: llvm-profdata merge \
|
||||
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
|
||||
RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.7 \
|
||||
RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
|
||||
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX3
|
||||
|
||||
MIX3: foo:
|
||||
MIX3-NEXT: Hash: 0x0000000000000007
|
||||
MIX3-NEXT: Counters: 5
|
||||
MIX3-NEXT: Block counts: [1384, 1500, 0, 0, 0]
|
||||
MIX3: goo:
|
||||
MIX3-NEXT: Hash: 0x0000000000000005
|
||||
MIX3-NEXT: Counters: 3
|
||||
MIX3-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
|
||||
MIX3: moo:
|
||||
MIX3-NEXT: Hash: 0x0000000000000009
|
||||
MIX3-NEXT: Counters: 4
|
||||
MIX3-NEXT: Block counts: [3000, 1000, 2000, 500]
|
||||
|
||||
Test foo's profile won't be adjusted because its size is smaller
|
||||
than suppl-min-size-threshold.
|
||||
RUN: llvm-profdata merge \
|
||||
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
|
||||
RUN: -suppl-min-size-threshold=2 -zero-counter-threshold=0.7 \
|
||||
RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
|
||||
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX4
|
||||
|
||||
MIX4: foo:
|
||||
MIX4-NEXT: Hash: 0x0000000000000007
|
||||
MIX4-NEXT: Counters: 5
|
||||
MIX4-NEXT: Block counts: [12, 13, 0, 0, 0]
|
||||
MIX4: goo:
|
||||
MIX4-NEXT: Hash: 0x0000000000000005
|
||||
MIX4-NEXT: Counters: 3
|
||||
MIX4-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
|
||||
MIX4: moo:
|
||||
MIX4-NEXT: Hash: 0x0000000000000009
|
||||
MIX4-NEXT: Counters: 4
|
||||
MIX4-NEXT: Block counts: [3000, 1000, 2000, 500]
|
||||
|
||||
Test profile summary won't be affected by -1 counter.
|
||||
RUN: llvm-profdata merge \
|
||||
RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
|
||||
RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t
|
||||
RUN: llvm-profdata show %t -detailed-summary | FileCheck %s --check-prefix=MIX5
|
||||
|
||||
MIX5: Instrumentation level: IR
|
||||
MIX5-NEXT: Total functions: 3
|
||||
MIX5-NEXT: Maximum function count: 3000
|
||||
MIX5-NEXT: Maximum internal block count: 2000
|
||||
MIX5-NEXT: Total number of blocks: 9
|
||||
MIX5-NEXT: Total count: 6525
|
||||
MIX5-NEXT: Detailed summary:
|
||||
MIX5-NEXT: 3 blocks with count >= 1000 account for 80 percentage of the total counts.
|
||||
MIX5-NEXT: 3 blocks with count >= 1000 account for 90 percentage of the total counts.
|
||||
MIX5-NEXT: 4 blocks with count >= 500 account for 95 percentage of the total counts.
|
||||
MIX5-NEXT: 4 blocks with count >= 500 account for 99 percentage of the total counts.
|
||||
MIX5-NEXT: 6 blocks with count >= 12 account for 99.9 percentage of the total counts.
|
||||
MIX5-NEXT: 6 blocks with count >= 12 account for 99.99 percentage of the total counts.
|
||||
MIX5-NEXT: 6 blocks with count >= 12 account for 99.999 percentage of the total counts.
|
@ -386,6 +386,172 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
|
||||
writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
|
||||
}
|
||||
|
||||
/// The profile entry for a function in instrumentation profile.
|
||||
struct InstrProfileEntry {
|
||||
uint64_t MaxCount = 0;
|
||||
float ZeroCounterRatio = 0.0;
|
||||
InstrProfRecord *ProfRecord;
|
||||
InstrProfileEntry(InstrProfRecord *Record);
|
||||
InstrProfileEntry() = default;
|
||||
};
|
||||
|
||||
InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
|
||||
ProfRecord = Record;
|
||||
uint64_t CntNum = Record->Counts.size();
|
||||
uint64_t ZeroCntNum = 0;
|
||||
for (size_t I = 0; I < CntNum; ++I) {
|
||||
MaxCount = std::max(MaxCount, Record->Counts[I]);
|
||||
ZeroCntNum += !Record->Counts[I];
|
||||
}
|
||||
ZeroCounterRatio = (float)ZeroCntNum / CntNum;
|
||||
}
|
||||
|
||||
/// Either set all the counters in the instr profile entry \p IFE to -1
|
||||
/// in order to drop the profile or scale up the counters in \p IFP to
|
||||
/// be above hot threshold. We use the ratio of zero counters in the
|
||||
/// profile of a function to decide the profile is helpful or harmful
|
||||
/// for performance, and to choose whether to scale up or drop it.
|
||||
static void updateInstrProfileEntry(InstrProfileEntry &IFE,
|
||||
uint64_t HotInstrThreshold,
|
||||
float ZeroCounterThreshold) {
|
||||
InstrProfRecord *ProfRecord = IFE.ProfRecord;
|
||||
if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
|
||||
// If all or most of the counters of the function are zero, the
|
||||
// profile is unaccountable and shuld be dropped. Reset all the
|
||||
// counters to be -1 and PGO profile-use will drop the profile.
|
||||
// All counters being -1 also implies that the function is hot so
|
||||
// PGO profile-use will also set the entry count metadata to be
|
||||
// above hot threshold.
|
||||
for (size_t I = 0; I < ProfRecord->Counts.size(); ++I)
|
||||
ProfRecord->Counts[I] = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
// Scale up the MaxCount to be multiple times above hot threshold.
|
||||
const unsigned MultiplyFactor = 3;
|
||||
uint64_t Numerator = HotInstrThreshold * MultiplyFactor;
|
||||
uint64_t Denominator = IFE.MaxCount;
|
||||
ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
|
||||
warn(toString(make_error<InstrProfError>(E)));
|
||||
});
|
||||
}
|
||||
|
||||
const uint64_t ColdPercentileIdx = 15;
|
||||
const uint64_t HotPercentileIdx = 11;
|
||||
|
||||
/// Adjust the instr profile in \p WC based on the sample profile in
|
||||
/// \p Reader.
|
||||
static void
|
||||
adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
|
||||
std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
|
||||
unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
|
||||
unsigned InstrProfColdThreshold) {
|
||||
// Function to its entry in instr profile.
|
||||
StringMap<InstrProfileEntry> InstrProfileMap;
|
||||
InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
|
||||
for (auto &PD : WC->Writer.getProfileData()) {
|
||||
// Populate IPBuilder.
|
||||
for (const auto &PDV : PD.getValue()) {
|
||||
InstrProfRecord Record = PDV.second;
|
||||
IPBuilder.addRecord(Record);
|
||||
}
|
||||
|
||||
// If a function has multiple entries in instr profile, skip it.
|
||||
if (PD.getValue().size() != 1)
|
||||
continue;
|
||||
|
||||
// Initialize InstrProfileMap.
|
||||
InstrProfRecord *R = &PD.getValue().begin()->second;
|
||||
InstrProfileMap[PD.getKey()] = InstrProfileEntry(R);
|
||||
}
|
||||
|
||||
ProfileSummary InstrPS = *IPBuilder.getSummary();
|
||||
ProfileSummary SamplePS = Reader->getSummary();
|
||||
|
||||
// Compute cold thresholds for instr profile and sample profile.
|
||||
uint64_t ColdSampleThreshold =
|
||||
ProfileSummaryBuilder::getEntryForPercentile(
|
||||
SamplePS.getDetailedSummary(),
|
||||
ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
|
||||
.MinCount;
|
||||
uint64_t HotInstrThreshold =
|
||||
ProfileSummaryBuilder::getEntryForPercentile(
|
||||
InstrPS.getDetailedSummary(),
|
||||
ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
|
||||
.MinCount;
|
||||
uint64_t ColdInstrThreshold =
|
||||
InstrProfColdThreshold
|
||||
? InstrProfColdThreshold
|
||||
: ProfileSummaryBuilder::getEntryForPercentile(
|
||||
InstrPS.getDetailedSummary(),
|
||||
ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
|
||||
.MinCount;
|
||||
|
||||
// Find hot/warm functions in sample profile which is cold in instr profile
|
||||
// and adjust the profiles of those functions in the instr profile.
|
||||
for (const auto &PD : Reader->getProfiles()) {
|
||||
StringRef FName = PD.getKey();
|
||||
const sampleprof::FunctionSamples &FS = PD.getValue();
|
||||
auto It = InstrProfileMap.find(FName);
|
||||
if (FS.getHeadSamples() > ColdSampleThreshold &&
|
||||
It != InstrProfileMap.end() &&
|
||||
It->second.MaxCount <= ColdInstrThreshold &&
|
||||
FS.getBodySamples().size() >= SupplMinSizeThreshold) {
|
||||
updateInstrProfileEntry(It->second, HotInstrThreshold,
|
||||
ZeroCounterThreshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The main function to supplement instr profile with sample profile.
|
||||
/// \Inputs contains the instr profile. \p SampleFilename specifies the
|
||||
/// sample profile. \p OutputFilename specifies the output profile name.
|
||||
/// \p OutputFormat specifies the output profile format. \p OutputSparse
|
||||
/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
|
||||
/// specifies the minimal size for the functions whose profile will be
|
||||
/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
|
||||
/// a function contains too many zero counters and whether its profile
|
||||
/// should be dropped. \p InstrProfColdThreshold is the user specified
|
||||
/// cold threshold which will override the cold threshold got from the
|
||||
/// instr profile summary.
|
||||
static void supplementInstrProfile(
|
||||
const WeightedFileVector &Inputs, StringRef SampleFilename,
|
||||
StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
|
||||
unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
|
||||
unsigned InstrProfColdThreshold) {
|
||||
if (OutputFilename.compare("-") == 0)
|
||||
exitWithError("Cannot write indexed profdata format to stdout.");
|
||||
if (Inputs.size() != 1)
|
||||
exitWithError("Expect one input to be an instr profile.");
|
||||
if (Inputs[0].Weight != 1)
|
||||
exitWithError("Expect instr profile doesn't have weight.");
|
||||
|
||||
StringRef InstrFilename = Inputs[0].Filename;
|
||||
|
||||
// Read sample profile.
|
||||
LLVMContext Context;
|
||||
auto ReaderOrErr =
|
||||
sampleprof::SampleProfileReader::create(SampleFilename.str(), Context);
|
||||
if (std::error_code EC = ReaderOrErr.getError())
|
||||
exitWithErrorCode(EC, SampleFilename);
|
||||
auto Reader = std::move(ReaderOrErr.get());
|
||||
if (std::error_code EC = Reader->read())
|
||||
exitWithErrorCode(EC, SampleFilename);
|
||||
|
||||
// Read instr profile.
|
||||
std::mutex ErrorLock;
|
||||
SmallSet<instrprof_error, 4> WriterErrorCodes;
|
||||
auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
|
||||
WriterErrorCodes);
|
||||
loadInput(Inputs[0], nullptr, WC.get());
|
||||
if (WC->Errors.size() > 0)
|
||||
exitWithError(std::move(WC->Errors[0].first), InstrFilename);
|
||||
|
||||
adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
|
||||
InstrProfColdThreshold);
|
||||
writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
|
||||
}
|
||||
|
||||
/// Make a copy of the given function samples with all symbol names remapped
|
||||
/// by the provided symbol remapper.
|
||||
static sampleprof::FunctionSamples
|
||||
@ -680,6 +846,28 @@ static int merge_main(int argc, const char *argv[]) {
|
||||
cl::opt<bool> GenPartialProfile(
|
||||
"gen-partial-profile", cl::init(false), cl::Hidden,
|
||||
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
|
||||
cl::opt<std::string> SupplInstrWithSample(
|
||||
"supplement-instr-with-sample", cl::init(""), cl::Hidden,
|
||||
cl::desc("Supplement an instr profile with sample profile, to correct "
|
||||
"the profile unrepresentativeness issue. The sample "
|
||||
"profile is the input of the flag. Output will be in instr "
|
||||
"format (The flag only works with -instr)"));
|
||||
cl::opt<float> ZeroCounterThreshold(
|
||||
"zero-counter-threshold", cl::init(0.7), cl::Hidden,
|
||||
cl::desc("For the function which is cold in instr profile but hot in "
|
||||
"sample profile, if the ratio of the number of zero counters "
|
||||
"divided by the the total number of counters is above the "
|
||||
"threshold, the profile of the function will be regarded as "
|
||||
"being harmful for performance and will be dropped. "));
|
||||
cl::opt<unsigned> SupplMinSizeThreshold(
|
||||
"suppl-min-size-threshold", cl::init(10), cl::Hidden,
|
||||
cl::desc("If the size of a function is smaller than the threshold, "
|
||||
"assume it can be inlined by PGO early inliner and it won't "
|
||||
"be adjusted based on sample profile. "));
|
||||
cl::opt<unsigned> InstrProfColdThreshold(
|
||||
"instr-prof-cold-threshold", cl::init(0), cl::Hidden,
|
||||
cl::desc("User specified cold threshold for instr profile which will "
|
||||
"override the cold threshold got from profile summary. "));
|
||||
|
||||
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
|
||||
|
||||
@ -708,6 +896,17 @@ static int merge_main(int argc, const char *argv[]) {
|
||||
if (!RemappingFile.empty())
|
||||
Remapper = SymbolRemapper::create(RemappingFile);
|
||||
|
||||
if (!SupplInstrWithSample.empty()) {
|
||||
if (ProfileKind != instr)
|
||||
exitWithError(
|
||||
"-supplement-instr-with-sample can only work with -instr. ");
|
||||
|
||||
supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
|
||||
OutputFormat, OutputSparse, SupplMinSizeThreshold,
|
||||
ZeroCounterThreshold, InstrProfColdThreshold);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ProfileKind == instr)
|
||||
mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
|
||||
OutputFormat, OutputSparse, NumThreads, FailureMode);
|
||||
@ -904,6 +1103,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
|
||||
uint64_t FuncMax = 0;
|
||||
uint64_t FuncSum = 0;
|
||||
for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
|
||||
if (Func.Counts[I] == (uint64_t)-1)
|
||||
continue;
|
||||
FuncMax = std::max(FuncMax, Func.Counts[I]);
|
||||
FuncSum += Func.Counts[I];
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user