1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[CSSPGO][llvm-profgen] Aggregate samples on call frame trie to speed up profile generation

For CS profile generation, the process of call stack unwinding is time-consuming since for each LBR entry we need linear time to generate the context( hash, compression, string concatenation). This change speeds up this by grouping all the call frame within one LBR sample into a trie and aggregating the result(sample counter) on it, deferring the context compression and string generation to the end of unwinding.

Specifically, it uses `StackLeaf` as the top frame on the stack and manipulates(pop or push a trie node) it dynamically during virtual unwinding so that the raw sample can just be recoded on the leaf node, the path(root to leaf) will represent its calling context. In the end, it traverses the trie and generates the context on the fly.

Results:
Our internal branch shows about 5X speed-up on some large workloads in SPEC06 benchmark.

Differential Revision: https://reviews.llvm.org/D94110
This commit is contained in:
wlei 2021-01-11 12:47:22 -08:00
parent b659297be9
commit 425d60e18c
4 changed files with 232 additions and 108 deletions

View File

@ -28,11 +28,12 @@ void VirtualUnwinder::unwindCall(UnwindState &State) {
// 2nd frame is in prolog/epilog. In the future, we will switch to
// pro/epi tracker(Dwarf CFI) for the precise check.
uint64_t Source = State.getCurrentLBRSource();
auto Iter = State.CallStack.begin();
if (State.CallStack.size() == 1 || *(++Iter) != Source) {
State.CallStack.front() = Source;
auto *ParentFrame = State.getParentFrame();
if (ParentFrame == State.getDummyRootPtr() ||
ParentFrame->Address != Source) {
State.switchToFrame(Source);
} else {
State.CallStack.pop_front();
State.popFrame();
}
State.InstPtr.update(Source);
}
@ -41,26 +42,29 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
InstructionPointer &IP = State.InstPtr;
uint64_t Target = State.getCurrentLBRTarget();
uint64_t End = IP.Address;
if (State.getBinary()->usePseudoProbes()) {
if (Binary->usePseudoProbes()) {
// We don't need to top frame probe since it should be extracted
// from the range.
// The outcome of the virtual unwinding with pseudo probes is a
// map from a context key to the address range being unwound.
// This means basically linear unwinding is not needed for pseudo
// probes. The range will be simply recorded here and will be
// converted to a list of pseudo probes to report in ProfileGenerator.
recordRangeCount(Target, End, State, Repeat);
State.getParentFrame()->recordRangeCount(Target, End, Repeat);
} else {
// Unwind linear execution part
uint64_t LeafAddr = State.CurrentLeafFrame->Address;
while (IP.Address >= Target) {
uint64_t PrevIP = IP.Address;
IP.backward();
// Break into segments for implicit call/return due to inlining
bool SameInlinee =
State.getBinary()->inlineContextEqual(PrevIP, IP.Address);
bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
if (!SameInlinee || PrevIP == Target) {
recordRangeCount(PrevIP, End, State, Repeat);
State.switchToFrame(LeafAddr);
State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
End = IP.Address;
}
State.CallStack.front() = IP.Address;
LeafAddr = IP.Address;
}
}
}
@ -68,9 +72,9 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
void VirtualUnwinder::unwindReturn(UnwindState &State) {
// Add extra frame as we unwind through the return
const LBREntry &LBR = State.getCurrentLBR();
uint64_t CallAddr = State.getBinary()->getCallAddrFromFrameAddr(LBR.Target);
State.CallStack.front() = CallAddr;
State.CallStack.push_front(LBR.Source);
uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
State.switchToFrame(CallAddr);
State.pushFrame(LBR.Source);
State.InstPtr.update(LBR.Source);
}
@ -78,79 +82,100 @@ void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) {
// TODO: Tolerate tail call for now, as we may see tail call from libraries.
// This is only for intra function branches, excluding tail calls.
uint64_t Source = State.getCurrentLBRSource();
State.CallStack.front() = Source;
State.switchToFrame(Source);
State.InstPtr.update(Source);
}
SampleCounter &
VirtualUnwinder::getOrCreateCounter(const ProfiledBinary *Binary,
std::list<uint64_t> &CallStack) {
if (Binary->usePseudoProbes()) {
return getOrCreateCounterForProbe(Binary, CallStack);
}
std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
std::shared_ptr<StringBasedCtxKey> KeyStr =
std::make_shared<StringBasedCtxKey>();
KeyStr->Context = Binary->getExpandedContextStr(CallStack);
KeyStr->Context = Binary->getExpandedContextStr(Stack);
KeyStr->genHashCode();
auto Ret =
CtxCounterMap->emplace(Hashable<ContextKey>(KeyStr), SampleCounter());
return Ret.first->second;
return KeyStr;
}
SampleCounter &
VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary,
std::list<uint64_t> &CallStack) {
std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
std::make_shared<ProbeBasedCtxKey>();
if (CallStack.size() > 1) {
// We don't need to top frame probe since it should be extracted
// from the range.
// The top of stack is an instruction from the function where
// the LBR address range physcially resides. Strip it since
// the function is not a part of the call context. We also
// don't need its inline context since the probes being unwound
// come with an inline context all the way back to the uninlined
// function in their prefix tree.
auto Iter = CallStack.rbegin();
auto EndT = std::prev(CallStack.rend());
for (; Iter != EndT; Iter++) {
uint64_t Address = *Iter;
const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Address);
// We may not find a probe for a merged or external callsite.
// Callsite merging may cause the loss of original probe IDs.
// Cutting off the context from here since the inline will
// not know how to consume a context with unknown callsites.
if (!CallProbe)
break;
for (auto CallProbe : Stack) {
ProbeBasedKey->Probes.emplace_back(CallProbe);
}
}
CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
ProbeBasedKey->Probes);
ProbeBasedKey->genHashCode();
Hashable<ContextKey> ContextId(ProbeBasedKey);
auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter());
return Ret.first->second;
return ProbeBasedKey;
}
void VirtualUnwinder::recordRangeCount(uint64_t Start, uint64_t End,
UnwindState &State, uint64_t Repeat) {
uint64_t StartOffset = State.getBinary()->virtualAddrToOffset(Start);
uint64_t EndOffset = State.getBinary()->virtualAddrToOffset(End);
SampleCounter &SCounter =
getOrCreateCounter(State.getBinary(), State.CallStack);
SCounter.recordRangeCount(StartOffset, EndOffset, Repeat);
template <typename T>
void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
T &Stack) {
if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
return;
std::shared_ptr<ContextKey> Key = Stack.getContextKey();
auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
SampleCounter &SCounter = Ret.first->second;
for (auto &Item : Cur->RangeSamples) {
uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item));
}
for (auto &Item : Cur->BranchSamples) {
uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item));
}
}
template <typename T>
void VirtualUnwinder::collectSamplesFromFrameTrie(
UnwindState::ProfiledFrame *Cur, T &Stack) {
if (!Cur->isDummyRoot()) {
if (!Stack.pushFrame(Cur)) {
// Process truncated context
for (const auto &Item : Cur->Children) {
// Start a new traversal ignoring its bottom context
collectSamplesFromFrameTrie(Item.second.get());
}
return;
}
}
collectSamplesFromFrame(Cur, Stack);
// Process children frame
for (const auto &Item : Cur->Children) {
collectSamplesFromFrameTrie(Item.second.get(), Stack);
}
// Recover the call stack
Stack.popFrame();
}
void VirtualUnwinder::collectSamplesFromFrameTrie(
UnwindState::ProfiledFrame *Cur) {
if (Binary->usePseudoProbes()) {
ProbeStack Stack(Binary);
collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
} else {
FrameStack Stack(Binary);
collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
}
}
void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
UnwindState &State, uint64_t Repeat) {
if (Branch.IsArtificial)
return;
uint64_t SourceOffset = State.getBinary()->virtualAddrToOffset(Branch.Source);
uint64_t TargetOffset = State.getBinary()->virtualAddrToOffset(Branch.Target);
SampleCounter &SCounter =
getOrCreateCounter(State.getBinary(), State.CallStack);
SCounter.recordBranchCount(SourceOffset, TargetOffset, Repeat);
if (Binary->usePseudoProbes()) {
// Same as recordRangeCount, We don't need to top frame probe since we will
// extract it from branch's source address
State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
Repeat);
} else {
State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
Repeat);
}
}
bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
@ -199,6 +224,8 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
// Record `branch` with calling context after unwinding.
recordBranchCount(Branch, State, Repeat);
}
// As samples are aggregated on trie, record them into counter map
collectSamplesFromFrameTrie(State.getDummyRootPtr());
return true;
}
@ -325,7 +352,8 @@ void PerfReader::printUnwinderOutput() {
void PerfReader::unwindSamples() {
for (const auto &Item : AggregatedSamples) {
const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary]);
VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
Sample->Binary);
Unwinder.unwind(Sample, Item.second);
}
@ -334,7 +362,7 @@ void PerfReader::unwindSamples() {
}
bool PerfReader::extractLBRStack(TraceStream &TraceIt,
SmallVector<LBREntry, 16> &LBRStack,
SmallVectorImpl<LBREntry> &LBRStack,
ProfiledBinary *Binary) {
// The raw format of LBR stack is like:
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
@ -398,7 +426,7 @@ bool PerfReader::extractLBRStack(TraceStream &TraceIt,
}
bool PerfReader::extractCallstack(TraceStream &TraceIt,
std::list<uint64_t> &CallStack) {
SmallVectorImpl<uint64_t> &CallStack) {
// The raw format of call stack is like:
// 4005dc # leaf frame
// 400634

View File

@ -133,7 +133,7 @@ struct HybridSample : public PerfSample {
// Profiled binary that current frame address belongs to
ProfiledBinary *Binary;
// Call stack recorded in FILO(leaf to root) order
std::list<uint64_t> CallStack;
SmallVector<uint64_t, 16> CallStack;
// LBR stack recorded in FIFO order
SmallVector<LBREntry, 16> LBRStack;
@ -147,7 +147,7 @@ struct HybridSample : public PerfSample {
const HybridSample *Other = dyn_cast<HybridSample>(K);
if (Other->Binary != Binary)
return false;
const std::list<uint64_t> &OtherCallStack = Other->CallStack;
const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
if (CallStack.size() != OtherCallStack.size() ||
@ -193,14 +193,40 @@ using AggregatedCounter =
std::unordered_map<Hashable<PerfSample>, uint64_t,
Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
// The state for the unwinder, it doesn't hold the data but only keep the
// pointer/index of the data, While unwinding, the CallStack is changed
// dynamicially and will be recorded as the context of the sample
struct UnwindState {
// Profiled binary that current frame address belongs to
const ProfiledBinary *Binary;
// TODO: switch to use trie for call stack
std::list<uint64_t> CallStack;
// Call stack trie node
struct ProfiledFrame {
const uint64_t Address = 0;
ProfiledFrame *Parent;
SampleVector RangeSamples;
SampleVector BranchSamples;
std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
: Address(Addr), Parent(P) {}
ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
assert(Address && "Address can't be zero!");
auto Ret = Children.emplace(
Address, std::make_unique<ProfiledFrame>(Address, this));
return Ret.first->second.get();
}
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
}
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
}
bool isDummyRoot() { return Address == 0; }
};
ProfiledFrame DummyTrieRoot;
ProfiledFrame *CurrentLeafFrame;
// Used to fall through the LBR stack
uint32_t LBRIndex = 0;
// Reference to HybridSample.LBRStack
@ -208,19 +234,20 @@ struct UnwindState {
// Used to iterate the address range
InstructionPointer InstPtr;
UnwindState(const HybridSample *Sample)
: Binary(Sample->Binary), CallStack(Sample->CallStack),
LBRStack(Sample->LBRStack),
InstPtr(Sample->Binary, Sample->CallStack.front()) {}
: Binary(Sample->Binary), LBRStack(Sample->LBRStack),
InstPtr(Sample->Binary, Sample->CallStack.front()) {
initFrameTrie(Sample->CallStack);
}
bool validateInitialState() {
uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
uint64_t StackLeaf = CallStack.front();
uint64_t LeafAddr = CurrentLeafFrame->Address;
// When we take a stack sample, ideally the sampling distance between the
// leaf IP of stack and the last LBR target shouldn't be very large.
// Use a heuristic size (0x100) to filter out broken records.
if (StackLeaf < LBRLeaf || StackLeaf >= LBRLeaf + 0x100) {
if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
WithColor::warning() << "Bogus trace: stack tip = "
<< format("%#010x", StackLeaf)
<< format("%#010x", LeafAddr)
<< ", LBR tip = " << format("%#010x\n", LBRLeaf);
return false;
}
@ -228,19 +255,40 @@ struct UnwindState {
}
void checkStateConsistency() {
assert(InstPtr.Address == CallStack.front() &&
assert(InstPtr.Address == CurrentLeafFrame->Address &&
"IP should align with context leaf");
}
std::string getExpandedContextStr() const {
return Binary->getExpandedContextStr(CallStack);
}
const ProfiledBinary *getBinary() const { return Binary; }
bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
void advanceLBR() { LBRIndex++; }
ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
void pushFrame(uint64_t Address) {
CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
}
void switchToFrame(uint64_t Address) {
if (CurrentLeafFrame->Address == Address)
return;
CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
}
void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
ProfiledFrame *Cur = &DummyTrieRoot;
for (auto Address : reverse(CallStack)) {
Cur = Cur->getOrCreateChildFrame(Address);
}
CurrentLeafFrame = Cur;
}
ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
};
// Base class for sample counter key with context
@ -330,6 +378,56 @@ using ContextSampleCounterMap =
std::unordered_map<Hashable<ContextKey>, SampleCounter,
Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
struct FrameStack {
SmallVector<uint64_t, 16> Stack;
const ProfiledBinary *Binary;
FrameStack(const ProfiledBinary *B) : Binary(B) {}
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
Stack.push_back(Cur->Address);
return true;
}
void popFrame() {
if (!Stack.empty())
Stack.pop_back();
}
std::shared_ptr<StringBasedCtxKey> getContextKey();
};
struct ProbeStack {
SmallVector<const PseudoProbe *, 16> Stack;
const ProfiledBinary *Binary;
ProbeStack(const ProfiledBinary *B) : Binary(B) {}
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address);
// We may not find a probe for a merged or external callsite.
// Callsite merging may cause the loss of original probe IDs.
// Cutting off the context from here since the inliner will
// not know how to consume a context with unknown callsites.
if (!CallProbe)
return false;
Stack.push_back(CallProbe);
return true;
}
void popFrame() {
if (!Stack.empty())
Stack.pop_back();
}
// Use pseudo probe based context key to get the sample counter
// A context stands for a call path from 'main' to an uninlined
// callee with all inline frames recovered on that path. The probes
// belonging to that call path is the probes either originated from
// the callee or from any functions inlined into the callee. Since
// pseudo probes are organized in a tri-tree style after decoded,
// the tree path from the tri-tree root (which is the uninlined
// callee) to the probe node forms an inline context.
// Here we use a list of probe(pointer) as the context key to speed up
// aggregation and the final context string will be generate in
// ProfileGenerator
std::shared_ptr<ProbeBasedCtxKey> getContextKey();
};
/*
As in hybrid sample we have a group of LBRs and the most recent sampling call
stack, we can walk through those LBRs to infer more call stacks which would be
@ -351,47 +449,43 @@ range as sample counter for further CS profile generation.
*/
class VirtualUnwinder {
public:
VirtualUnwinder(ContextSampleCounterMap *Counter) : CtxCounterMap(Counter) {}
VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
: CtxCounterMap(Counter), Binary(B) {}
bool unwind(const HybridSample *Sample, uint64_t Repeat);
private:
bool isCallState(UnwindState &State) const {
// The tail call frame is always missing here in stack sample, we will
// use a specific tail call tracker to infer it.
return State.getBinary()->addressIsCall(State.getCurrentLBRSource());
return Binary->addressIsCall(State.getCurrentLBRSource());
}
bool isReturnState(UnwindState &State) const {
// Simply check addressIsReturn, as ret is always reliable, both for
// regular call and tail call.
return State.getBinary()->addressIsReturn(State.getCurrentLBRSource());
return Binary->addressIsReturn(State.getCurrentLBRSource());
}
void unwindCall(UnwindState &State);
void unwindLinear(UnwindState &State, uint64_t Repeat);
void unwindReturn(UnwindState &State);
void unwindBranchWithinFrame(UnwindState &State);
bool unwind(const HybridSample *Sample, uint64_t Repeat);
template <typename T>
void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
// Collect each samples on trie node by DFS traversal
template <typename T>
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
uint64_t Repeat);
void recordBranchCount(const LBREntry &Branch, UnwindState &State,
uint64_t Repeat);
SampleCounter &getOrCreateCounter(const ProfiledBinary *Binary,
std::list<uint64_t> &CallStack);
// Use pseudo probe based context key to get the sample counter
// A context stands for a call path from 'main' to an uninlined
// callee with all inline frames recovered on that path. The probes
// belonging to that call path is the probes either originated from
// the callee or from any functions inlined into the callee. Since
// pseudo probes are organized in a tri-tree style after decoded,
// the tree path from the tri-tree root (which is the uninlined
// callee) to the probe node forms an inline context.
// Here we use a list of probe(pointer) as the context key to speed up
// aggregation and the final context string will be generate in
// ProfileGenerator
SampleCounter &getOrCreateCounterForProbe(const ProfiledBinary *Binary,
std::list<uint64_t> &CallStack);
private:
ContextSampleCounterMap *CtxCounterMap;
// Profiled binary that current frame address belongs to
const ProfiledBinary *Binary;
};
// Filename to binary map
@ -457,10 +551,11 @@ private:
// Parse the hybrid sample including the call and LBR line
void parseHybridSample(TraceStream &TraceIt);
// Extract call stack from the perf trace lines
bool extractCallstack(TraceStream &TraceIt, std::list<uint64_t> &CallStack);
bool extractCallstack(TraceStream &TraceIt,
SmallVectorImpl<uint64_t> &CallStack);
// Extract LBR stack from one perf trace line
bool extractLBRStack(TraceStream &TraceIt,
SmallVector<LBREntry, 16> &LBRStack,
SmallVectorImpl<LBREntry> &LBRStack,
ProfiledBinary *Binary);
void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
// Post process the profile after trace aggregation, we will do simple range

View File

@ -126,13 +126,13 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
Context2.begin(), Context2.begin() + Context2.size() - 1);
}
std::string
ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
std::string ProfiledBinary::getExpandedContextStr(
const SmallVectorImpl<uint64_t> &Stack) const {
std::string ContextStr;
SmallVector<std::string, 16> ContextVec;
// Process from frame root to leaf
for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
uint64_t Offset = virtualAddrToOffset(*Iter);
for (auto Address : Stack) {
uint64_t Offset = virtualAddrToOffset(Address);
const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
for (const auto &Loc : ExpandedContext) {
ContextVec.push_back(getCallSite(Loc));

View File

@ -236,7 +236,8 @@ public:
// Get the context string of the current stack with inline context filled in.
// It will search the disassembling info stored in Offset2LocStackMap. This is
// used as the key of function sample map
std::string getExpandedContextStr(const std::list<uint64_t> &stack) const;
std::string
getExpandedContextStr(const SmallVectorImpl<uint64_t> &Stack) const;
const PseudoProbe *getCallProbeForAddr(uint64_t Address) const {
return ProbeDecoder.getCallProbeForAddr(Address);