1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

Revert "[CSSPGO][llvm-profgen] Compress recursive cycles in calling context"

This reverts commit 0609f257dc2e2c3e4c7cd30fe2ffd520117e706b.
This commit is contained in:
wlei 2021-02-03 21:17:21 -08:00
parent 924cc19d25
commit a12b3252a9
16 changed files with 71 additions and 491 deletions

View File

@ -1,4 +0,0 @@
PERF_RECORD_MMAP2 3019402/3019402: [0x400000(0x1000) @ 0 00:1d 265650677 1451231]: r-xp recursion-compression-noprobe.perfbin
4007e1
0x4007d6/0x4007e1/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x400795/0x4007b0/P/-/-/0 0x40079c/0x400790/P/-/-/0 0x400801/0x400770/P/-/-/0 0x400698/0x400801/P/-/-/0 0x400673/0x400696/P/-/-/0

View File

@ -1,23 +0,0 @@
PERF_RECORD_MMAP2 3367317/3367317: [0x201000(0x1000) @ 0 00:1d 238458915 1121070]: r-xp recursion-compression-pseudoprobe.perfbin
2017db
2017ba
2017e5
2017ba
2017e5
2017d9
2017ba
2017b0
2017b0
2017b0
2017b0
2017b0
2017b0
2017b0
2017b0
2017e5
2017d9
201847
7fcb072a67c3
5541f689495641d7
0x2017cd/0x2017db/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017e0/0x2017a0/P/-/-/0 0x2017cd/0x2017db/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017e0/0x2017a0/P/-/-/0 0x2017cd/0x2017db/P/-/-/0 0x2017d4/0x2017c0/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0

View File

@ -1,65 +0,0 @@
; Firstly test uncompression(--compress-recursion=0)
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0
; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t
; RUN: FileCheck %s --input-file %t
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 13 fb:11
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0
; CHECK-UNCOMPRESS: 1: 11
; CHECK-UNCOMPRESS: 2: 1 fa:1
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 2 fb:1
; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0
; CHECK-UNCOMPRESS: 2: 1
; CHECK-UNCOMPRESS: 3: 2 fa:1
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0
; CHECK-UNCOMPRESS: 4: 1
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0
; CHECK-UNCOMPRESS: 2: 1 fa:1
; CHECK: [main:1 @ foo:3 @ fa]:14:0
; CHECK: 1: 1
; CHECK: 2: 13 fb:11
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0
; CHECK: 1: 11
; CHECK: 2: 1 fa:1
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:4:0
; CHECK: 1: 1
; CHECK: 2: 2 fb:1
; CHECK: 4: 1
; CHECK: [main:1 @ foo]:3:0
; CHECK: 2: 1
; CHECK: 3: 2 fa:1
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:0:0
; original code:
; clang -O3 -g test.c -o a.out
#include <stdio.h>
int fb(int n) {
if(n > 10) return fb(n / 2);
return fa(n - 1);
}
int fa(int n) {
if(n < 2) return n;
if(n % 2) return fb(n - 1);
return fa(n - 1);
}
void foo() {
int s, i = 0;
while (i++ < 10000)
s += fa(i);
printf("sum is %d\n", s);
}
int main() {
foo();
return 0;
}

View File

@ -1,169 +0,0 @@
; Firstly test uncompression(--compress-recursion=0)
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0
; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
; RUN: FileCheck %s --input-file %t
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 3: 1
; CHECK-UNCOMPRESS: 4: 1
; CHECK-UNCOMPRESS: 7: 1 fb:1
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 3: 1
; CHECK-UNCOMPRESS: 4: 1
; CHECK-UNCOMPRESS: 7: 1 fb:1
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 3: 1
; CHECK-UNCOMPRESS: 5: 1
; CHECK-UNCOMPRESS: 8: 1 fa:1
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 3: 1
; CHECK-UNCOMPRESS: 6: 1 fa:1
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 3: 1
; CHECK-UNCOMPRESS: 6: 1 fa:1
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 3: 1
; CHECK-UNCOMPRESS: 6: 1 fa:1
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 1
; CHECK-UNCOMPRESS: 5: 1 fb:1
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 1
; CHECK-UNCOMPRESS: 5: 1 fb:1
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 1
; CHECK-UNCOMPRESS: 5: 1 fb:1
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 3: 1
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0
; CHECK-UNCOMPRESS: 5: 1 fb:1
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
; CHECK: 1: 4
; CHECK: 2: 3
; CHECK: 3: 1
; CEHCK: 5: 4 fb:4
; CHECK: 6: 1 fa:1
; CHECK !CFGChecksum: 72617220756
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2
; CHECK: 1: 2
; CHECK: 3: 2
; CHECK: 4: 1
; CHECK: 7: 1 fb:1
; CHECK: !CFGChecksum: 120515930909
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
; CHECK: 1: 1
; CHECK: 3: 1
; CHECK: 4: 1
; CHECK: 7: 1 fb:1
; CHECK: !CFGChecksum: 120515930909
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa]:4:1
; CHECK: 1: 1
; CHECK: 3: 1
; CHECK: 5: 1
; CHECK: 8: 1 fa:1
; CHECK: !CFGChecksum: 120515930909
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
; CHECK: 1: 1
; CHECK: 3: 1
; CHECK: 6: 1 fa:1
; CHECK: !CFGChecksum: 72617220756
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
; CHECK: 1: 1
; CHECK: 3: 1
; CHECK: 6: 1 fa:1
; CHECK: !CFGChecksum: 72617220756
; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Range Counter:
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5
; CHECK-UNWINDER: (7a0, 7a7): 1
; CHECK-UNWINDER: (7a0, 7ab): 3
; CHECK-UNWINDER: (7b2, 7b5): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6
; CHECK-UNWINDER: (7c0, 7d4): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8
; CHECK-UNWINDER: (7c0, 7cd): 1
; CHECK-UNWINDER: (7db, 7e0): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7
; CHECK-UNWINDER: (7a0, 7a7): 1
; CHECK-UNWINDER: (7b2, 7b5): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6
; CHECK-UNWINDER: (7c0, 7cd): 2
; CHECK-UNWINDER: (7db, 7e0): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7
; CHECK-UNWINDER: (7a0, 7a7): 1
; CHECK-UNWINDER: (7b2, 7b5): 1
; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Branch Counter:
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5
; CHECK-UNWINDER: (7a7, 7b2): 1
; CHECK-UNWINDER: (7ab, 7a0): 4
; CHECK-UNWINDER: (7b5, 7c0): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6
; CHECK-UNWINDER: (7d4, 7c0): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8
; CHECK-UNWINDER: (7cd, 7db): 1
; CHECK-UNWINDER: (7e0, 7a0): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7
; CHECK-UNWINDER: (7a7, 7b2): 1
; CHECK-UNWINDER: (7b5, 7c0): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6
; CHECK-UNWINDER: (7cd, 7db): 2
; CHECK-UNWINDER: (7e0, 7a0): 1
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7
; CHECK-UNWINDER: (7a7, 7b2): 1
; CHECK-UNWINDER: (7b5, 7c0): 1
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
; -g test.c -o a.out
#include <stdio.h>
int fb(int n) {
if(n > 10) return fb(n / 2);
return fa(n - 1);
}
int fa(int n) {
if(n < 2) return n;
if(n % 2) return fb(n - 1);
return fa(n - 1);
}
void foo() {
int s, i = 0;
while (i++ < 10000)
s += fa(i);
printf("sum is %d\n", s);
}
int main() {
foo();
return 0;
}

View File

@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
#include "PerfReader.h"
#include "ProfileGenerator.h"
static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
cl::init(false), cl::ZeroOrMore,
@ -125,8 +124,6 @@ VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary,
ProbeBasedKey->Probes.emplace_back(CallProbe);
}
}
CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
ProbeBasedKey->Probes);
ProbeBasedKey->genHashCode();
Hashable<ContextKey> ContextId(ProbeBasedKey);
auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter());

View File

@ -22,22 +22,12 @@ static cl::opt<SampleProfileFormat> OutputFormat(
clEnumValN(SPF_GCC, "gcc",
"GCC encoding (only meaningful for -sample)")));
static cl::opt<int32_t, true> RecursionCompression(
"compress-recursion",
cl::desc("Compressing recursion by deduplicating adjacent frame "
"sequences up to the specified size. -1 means no size limit."),
cl::Hidden,
cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
using namespace llvm;
using namespace sampleprof;
namespace llvm {
namespace sampleprof {
// Initialize the MaxCompressionSize to -1 which means no size limit
int32_t CSProfileGenerator::MaxCompressionSize = -1;
static bool
usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
return BinarySampleCounters.size() &&
@ -329,16 +319,26 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
}
}
// Helper function to extract context prefix string stack
// Extract context stack for reusing, leaf context stack will
// be added compressed while looking up function profile
static void
extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
const SmallVectorImpl<const PseudoProbe *> &Probes,
ProfiledBinary *Binary) {
// Helper function to extract context prefix
// PrefixContextId is the context id string except for the leaf probe's
// context, the final ContextId will be:
// ContextId = PrefixContextId + LeafContextId;
// Remind that the string in ContextStrStack is in callee-caller order
// So process the string vector reversely
static std::string
extractPrefixContextId(const SmallVector<const PseudoProbe *, 16> &Probes,
ProfiledBinary *Binary) {
SmallVector<std::string, 16> ContextStrStack;
for (const auto *P : Probes) {
Binary->getInlineContextForProbe(P, ContextStrStack, true);
}
std::ostringstream OContextStr;
for (auto &CxtStr : ContextStrStack) {
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << CxtStr;
}
return OContextStr.str();
}
void PseudoProbeCSProfileGenerator::generateProfile() {
@ -350,15 +350,15 @@ void PseudoProbeCSProfileGenerator::generateProfile() {
for (const auto &CI : BI.second) {
const ProbeBasedCtxKey *CtxKey =
dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
SmallVector<std::string, 16> ContextStrStack;
extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
std::string PrefixContextId =
extractPrefixContextId(CtxKey->Probes, Binary);
// Fill in function body samples from probes, also infer caller's samples
// from callee's probe
populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
populateBodySamplesWithProbes(CI.second.RangeCounter, PrefixContextId,
Binary);
// Fill in boundary samples for a call probe
populateBoundarySamplesWithProbes(CI.second.BranchCounter,
ContextStrStack, Binary);
PrefixContextId, Binary);
}
}
}
@ -403,8 +403,8 @@ void PseudoProbeCSProfileGenerator::extractProbesFromRange(
}
void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
const RangeSample &RangeCounter,
SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
const RangeSample &RangeCounter, StringRef PrefixContextId,
ProfiledBinary *Binary) {
ProbeCounterMap ProbeCounter;
// Extract the top frame probes by looking up each address among the range in
// the Address2ProbeMap
@ -413,7 +413,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
const PseudoProbe *Probe = PI.first;
uint64_t Count = PI.second;
FunctionSamples &FunctionProfile =
getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
getFunctionProfileForLeafProbe(PrefixContextId, Probe, Binary);
FunctionProfile.addBodySamples(Probe->Index, 0, Count);
FunctionProfile.addTotalSamples(Count);
@ -446,8 +446,8 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
}
void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
const BranchSample &BranchCounter,
SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
const BranchSample &BranchCounter, StringRef PrefixContextId,
ProfiledBinary *Binary) {
for (auto BI : BranchCounter) {
uint64_t SourceOffset = BI.first.first;
uint64_t TargetOffset = BI.first.second;
@ -457,7 +457,7 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
if (CallProbe == nullptr)
continue;
FunctionSamples &FunctionProfile =
getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
getFunctionProfileForLeafProbe(PrefixContextId, CallProbe, Binary);
FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
FunctionProfile.addTotalSamples(Count);
StringRef CalleeName = FunctionSamples::getCanonicalFnName(
@ -470,26 +470,25 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
}
FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
SmallVectorImpl<std::string> &ContextStrStack,
StringRef PrefixContextId, SmallVector<std::string, 16> &LeafInlinedContext,
const PseudoProbeFuncDesc *LeafFuncDesc) {
assert(ContextStrStack.size() && "Profile context must have the leaf frame");
// Compress the context string except for the leaf frame
std::string LeafFrame = ContextStrStack.back();
ContextStrStack.pop_back();
CSProfileGenerator::compressRecursionContext(ContextStrStack);
assert(LeafInlinedContext.size() &&
"Profile context must have the leaf frame");
std::ostringstream OContextStr;
for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
OContextStr << PrefixContextId.str();
for (uint32_t I = 0; I < LeafInlinedContext.size() - 1; I++) {
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << ContextStrStack[I];
OContextStr << LeafInlinedContext[I];
}
// For leaf inlined context with the top frame, we should strip off the top
// frame's probe id, like:
// Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << StringRef(LeafFrame).split(":").first.str();
StringRef LeafLoc = LeafInlinedContext.back();
OContextStr << LeafLoc.split(":").first.str();
FunctionSamples &FunctionProile =
getFunctionProfileForContext(OContextStr.str());
@ -498,18 +497,17 @@ FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
}
FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
StringRef PrefixContextId, const PseudoProbe *LeafProbe,
ProfiledBinary *Binary) {
// Explicitly copy the context for appending the leaf context
SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
ContextStrStack.end());
Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy);
SmallVector<std::string, 16> LeafInlinedContext;
Binary->getInlineContextForProbe(LeafProbe, LeafInlinedContext);
// Note that the context from probe doesn't include leaf frame,
// hence we need to retrieve and append the leaf frame.
const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
ContextStrStackCopy.emplace_back(FuncDesc->FuncName + ":" +
Twine(LeafProbe->Index).str());
return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc);
LeafInlinedContext.emplace_back(FuncDesc->FuncName + ":" +
Twine(LeafProbe->Index).str());
return getFunctionProfileForLeafProbe(PrefixContextId, LeafInlinedContext,
FuncDesc);
}
} // end namespace sampleprof

View File

@ -50,7 +50,6 @@ protected:
*/
void findDisjointRanges(RangeSample &DisjointRanges,
const RangeSample &Ranges);
// Used by SampleProfileWriter
StringMap<FunctionSamples> ProfileMap;
};
@ -92,110 +91,6 @@ public:
populateInferredFunctionSamples();
}
// Remove adjacent repeated context sequences up to a given sequence length,
// -1 means no size limit. Note that repeated sequences are identified based
// on the exact call site, this is finer granularity than function recursion.
template <typename T>
static void compressRecursionContext(SmallVectorImpl<T> &Context,
int32_t CSize = MaxCompressionSize) {
uint32_t I = 1;
uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
uint32_t MaxDedupSize =
CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
auto BeginIter = Context.begin();
// Use an in-place algorithm to save memory copy
// End indicates the end location of current iteration's data
uint32_t End = 0;
// Deduplicate from length 1 to the max possible size of a repeated
// sequence.
while (I <= MaxDedupSize) {
// This is a linear algorithm that deduplicates adjacent repeated
// sequences of size I. The deduplication detection runs on a sliding
// window whose size is 2*I and it keeps sliding the window to deduplicate
// the data inside. Once duplication is detected, deduplicate it by
// skipping the right half part of the window, otherwise just copy back
// the new one by appending them at the back of End pointer(for the next
// iteration).
//
// For example:
// Input: [a1, a2, b1, b2]
// (Added index to distinguish the same char, the origin is [a, a, b,
// b], the size of the dedup window is 2(I = 1) at the beginning)
//
// 1) The initial status is a dummy window[null, a1], then just copy the
// right half of the window(End = 0), then slide the window.
// Result: [a1], a2, b1, b2 (End points to the element right before ],
// after ] is the data of the previous iteration)
//
// 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
// the window i.e the duplication happen. Only slide the window.
// Result: [a1], a2, b1, b2
//
// 3) Next window is [a2, b1], copy the right half of the window(b1 is
// new) to the End and slide the window.
// Result: [a1, b1], b1, b2
//
// 4) Next window is [b1, b2], same to 2), skip b2.
// Result: [a1, b1], b1, b2
// After resize, it will be [a, b]
// Use pointers like below to do comparison inside the window
// [a b c a b c]
// | | | | |
// LeftBoundary Left Right Left+I Right+I
// A duplication found if Left < LeftBoundry.
int32_t Right = I - 1;
End = I;
int32_t LeftBoundary = 0;
while (Right + I < Context.size()) {
// To avoids scanning a part of a sequence repeatedly, it finds out
// the common suffix of two hald in the window. The common suffix will
// serve as the common prefix of next possible pair of duplicate
// sequences. The non-common part will be ignored and never scanned
// again.
// For example.
// Input: [a, b1], c1, b2, c2
// I = 2
//
// 1) For the window [a, b1, c1, b2], non-common-suffix for the right
// part is 'c1', copy it and only slide the window 1 step.
// Result: [a, b1, c1], b2, c2
//
// 2) Next window is [b1, c1, b2, c2], so duplication happen.
// Result after resize: [a, b, c]
int32_t Left = Right;
while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
// Find the longest suffix inside the window. When stops, Left points
// at the diverging point in the current sequence.
Left--;
}
bool DuplicationFound = (Left < LeftBoundary);
// Don't need to recheck the data before Right
LeftBoundary = Right + 1;
if (DuplicationFound) {
// Duplication found, skip right half of the window.
Right += I;
} else {
// Copy the non-common-suffix part of the adjacent sequence.
std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
BeginIter + End);
End += Left + I - Right;
// Only slide the window by the size of non-common-suffix
Right = Left + I;
}
}
// Don't forget the remaining part that's not scanned.
std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
End += Context.size() - Right - 1;
I++;
Context.resize(End);
}
}
protected:
// Lookup or create FunctionSamples for the context
FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
@ -214,11 +109,6 @@ private:
const BranchSample &BranchCounters,
ProfiledBinary *Binary);
void populateInferredFunctionSamples();
public:
// Deduplicate adjacent repeated context sequences up to a given sequence
// length. -1 means no size limit.
static int32_t MaxCompressionSize;
};
using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>;
@ -237,23 +127,22 @@ private:
ProbeCounterMap &ProbeCounter,
ProfiledBinary *Binary);
// Fill in function body samples from probes
void
populateBodySamplesWithProbes(const RangeSample &RangeCounter,
SmallVectorImpl<std::string> &ContextStrStack,
ProfiledBinary *Binary);
void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
StringRef PrefixContextId,
ProfiledBinary *Binary);
// Fill in boundary samples for a call probe
void populateBoundarySamplesWithProbes(
const BranchSample &BranchCounter,
SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary);
void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
StringRef PrefixContextId,
ProfiledBinary *Binary);
// Helper function to get FunctionSamples for the leaf inlined context
FunctionSamples &
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
const PseudoProbeFuncDesc *LeafFuncDesc);
FunctionSamples &getFunctionProfileForLeafProbe(
StringRef PrefixContextId,
SmallVector<std::string, 16> &LeafInlinedContext,
const PseudoProbeFuncDesc *LeafFuncDesc);
// Helper function to get FunctionSamples for the leaf probe
FunctionSamples &
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
const PseudoProbe *LeafProbe,
ProfiledBinary *Binary);
FunctionSamples &getFunctionProfileForLeafProbe(StringRef PrefixContextId,
const PseudoProbe *LeafProbe,
ProfiledBinary *Binary);
};
} // end namespace sampleprof

View File

@ -8,7 +8,6 @@
#include "ProfiledBinary.h"
#include "ErrorHandling.h"
#include "ProfileGenerator.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"
@ -129,7 +128,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
std::string
ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
std::string ContextStr;
SmallVector<std::string, 16> ContextVec;
SmallVector<std::string, 8> ContextVec;
// Process from frame root to leaf
for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
uint64_t Offset = virtualAddrToOffset(*Iter);
@ -140,22 +139,21 @@ ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
}
assert(ContextVec.size() && "Context length should be at least 1");
// Compress the context string except for the leaf frame
std::string LeafFrame = ContextVec.back();
ContextVec.pop_back();
CSProfileGenerator::compressRecursionContext<std::string>(ContextVec);
std::ostringstream OContextStr;
for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) {
if (OContextStr.str().size()) {
OContextStr << " @ ";
}
OContextStr << ContextVec[I];
if (I == ContextVec.size() - 1) {
// Only keep the function name for the leaf frame
StringRef Ref(ContextVec[I]);
OContextStr << Ref.split(":").first.str();
} else {
OContextStr << ContextVec[I];
}
}
// Only keep the function name for the leaf frame
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << StringRef(LeafFrame).split(":").first.str();
return OContextStr.str();
}

View File

@ -243,7 +243,7 @@ public:
}
void
getInlineContextForProbe(const PseudoProbe *Probe,
SmallVectorImpl<std::string> &InlineContextStack,
SmallVector<std::string, 16> &InlineContextStack,
bool IncludeLeaf = false) const {
return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack,
IncludeLeaf);

View File

@ -34,7 +34,7 @@ void PseudoProbeFuncDesc::print(raw_ostream &OS) {
OS << "Hash: " << FuncHash << "\n";
}
void PseudoProbe::getInlineContext(SmallVectorImpl<std::string> &ContextStack,
void PseudoProbe::getInlineContext(SmallVector<std::string, 16> &ContextStack,
const GUIDProbeFunctionMap &GUID2FuncMAP,
bool ShowName) const {
uint32_t Begin = ContextStack.size();
@ -320,7 +320,7 @@ PseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
}
void PseudoProbeDecoder::getInlineContextForProbe(
const PseudoProbe *Probe, SmallVectorImpl<std::string> &InlineContextStack,
const PseudoProbe *Probe, SmallVector<std::string, 16> &InlineContextStack,
bool IncludeLeaf) const {
Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true);
if (!IncludeLeaf)

View File

@ -138,7 +138,7 @@ struct PseudoProbe {
// Get the inlined context by traversing current inline tree backwards,
// each tree node has its InlineSite which is taken as the context.
// \p ContextStack is populated in root to leaf order
void getInlineContext(SmallVectorImpl<std::string> &ContextStack,
void getInlineContext(SmallVector<std::string, 16> &ContextStack,
const GUIDProbeFunctionMap &GUID2FuncMAP,
bool ShowName) const;
// Helper function to get the string from context stack
@ -214,7 +214,7 @@ public:
// IncludeLeaf = false, Output: [main:1, foo:2]
void
getInlineContextForProbe(const PseudoProbe *Probe,
SmallVectorImpl<std::string> &InlineContextStack,
SmallVector<std::string, 16> &InlineContextStack,
bool IncludeLeaf) const;
const AddressProbesMap &getAddress2ProbesMap() const {

View File

@ -7,4 +7,4 @@ endif()
add_subdirectory(
llvm-exegesis
)
add_subdirectory(llvm-profgen)

View File

@ -1,5 +0,0 @@
add_llvm_unittest(LLVMProfgenTests
ContextCompressionTest.cpp
)
target_link_libraries(LLVMProfgenTests PRIVATE LLVMTestingSupport)

View File

@ -1,36 +0,0 @@
//===-- ContextCompressionTest.cpp -------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "../tools/llvm-profgen/ProfileGenerator.h"
#include "gtest/gtest.h"
using namespace llvm;
using namespace sampleprof;
TEST(TestCompression, TestNoSizeLimit1) {
SmallVector<std::string, 16> Context = {"a", "b", "c", "a", "b", "c"};
SmallVector<std::string, 16> Expect = {"a", "b", "c"};
CSProfileGenerator::compressRecursionContext(Context, -1);
EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin()));
}
TEST(TestCompression, TestNoSizeLimit2) {
SmallVector<std::string, 16> Context = {"m", "a", "a", "b", "c", "a",
"b", "c", "b", "c", "d"};
SmallVector<std::string, 16> Expect = {"m", "a", "b", "c", "d"};
CSProfileGenerator::compressRecursionContext(Context, -1);
EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin()));
}
TEST(TestCompression, TestMaxDedupSize) {
SmallVector<std::string, 16> Context = {"m", "a", "a", "b", "c", "a",
"b", "c", "b", "c", "d"};
SmallVector<std::string, 16> Expect = {"m", "a", "b", "c",
"a", "b", "c", "d"};
CSProfileGenerator::compressRecursionContext(Context, 2);
EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin()));
}