mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 10:32:48 +02:00
Revert "[CSSPGO][llvm-profgen] Aggregate samples on call frame trie to speed up profile generation"
This reverts commit 1714ad2336293f351b15dd4b518f9e8618ec38f2.
This commit is contained in:
parent
b7bd8d62b5
commit
924cc19d25
@ -28,12 +28,11 @@ void VirtualUnwinder::unwindCall(UnwindState &State) {
|
||||
// 2nd frame is in prolog/epilog. In the future, we will switch to
|
||||
// pro/epi tracker(Dwarf CFI) for the precise check.
|
||||
uint64_t Source = State.getCurrentLBRSource();
|
||||
auto *ParentFrame = State.getParentFrame();
|
||||
if (ParentFrame == State.getDummyRootPtr() ||
|
||||
ParentFrame->Address != Source) {
|
||||
State.switchToFrame(Source);
|
||||
auto Iter = State.CallStack.begin();
|
||||
if (State.CallStack.size() == 1 || *(++Iter) != Source) {
|
||||
State.CallStack.front() = Source;
|
||||
} else {
|
||||
State.popFrame();
|
||||
State.CallStack.pop_front();
|
||||
}
|
||||
State.InstPtr.update(Source);
|
||||
}
|
||||
@ -42,29 +41,26 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
|
||||
InstructionPointer &IP = State.InstPtr;
|
||||
uint64_t Target = State.getCurrentLBRTarget();
|
||||
uint64_t End = IP.Address;
|
||||
if (Binary->usePseudoProbes()) {
|
||||
// We don't need to top frame probe since it should be extracted
|
||||
// from the range.
|
||||
if (State.getBinary()->usePseudoProbes()) {
|
||||
// The outcome of the virtual unwinding with pseudo probes is a
|
||||
// map from a context key to the address range being unwound.
|
||||
// This means basically linear unwinding is not needed for pseudo
|
||||
// probes. The range will be simply recorded here and will be
|
||||
// converted to a list of pseudo probes to report in ProfileGenerator.
|
||||
State.getParentFrame()->recordRangeCount(Target, End, Repeat);
|
||||
recordRangeCount(Target, End, State, Repeat);
|
||||
} else {
|
||||
// Unwind linear execution part
|
||||
uint64_t LeafAddr = State.CurrentLeafFrame->Address;
|
||||
while (IP.Address >= Target) {
|
||||
uint64_t PrevIP = IP.Address;
|
||||
IP.backward();
|
||||
// Break into segments for implicit call/return due to inlining
|
||||
bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
|
||||
bool SameInlinee =
|
||||
State.getBinary()->inlineContextEqual(PrevIP, IP.Address);
|
||||
if (!SameInlinee || PrevIP == Target) {
|
||||
State.switchToFrame(LeafAddr);
|
||||
State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
|
||||
recordRangeCount(PrevIP, End, State, Repeat);
|
||||
End = IP.Address;
|
||||
}
|
||||
LeafAddr = IP.Address;
|
||||
State.CallStack.front() = IP.Address;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -72,9 +68,9 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
|
||||
void VirtualUnwinder::unwindReturn(UnwindState &State) {
|
||||
// Add extra frame as we unwind through the return
|
||||
const LBREntry &LBR = State.getCurrentLBR();
|
||||
uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
|
||||
State.switchToFrame(CallAddr);
|
||||
State.pushFrame(LBR.Source);
|
||||
uint64_t CallAddr = State.getBinary()->getCallAddrFromFrameAddr(LBR.Target);
|
||||
State.CallStack.front() = CallAddr;
|
||||
State.CallStack.push_front(LBR.Source);
|
||||
State.InstPtr.update(LBR.Source);
|
||||
}
|
||||
|
||||
@ -82,100 +78,79 @@ void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) {
|
||||
// TODO: Tolerate tail call for now, as we may see tail call from libraries.
|
||||
// This is only for intra function branches, excluding tail calls.
|
||||
uint64_t Source = State.getCurrentLBRSource();
|
||||
State.switchToFrame(Source);
|
||||
State.CallStack.front() = Source;
|
||||
State.InstPtr.update(Source);
|
||||
}
|
||||
|
||||
std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
|
||||
SampleCounter &
|
||||
VirtualUnwinder::getOrCreateCounter(const ProfiledBinary *Binary,
|
||||
std::list<uint64_t> &CallStack) {
|
||||
if (Binary->usePseudoProbes()) {
|
||||
return getOrCreateCounterForProbe(Binary, CallStack);
|
||||
}
|
||||
std::shared_ptr<StringBasedCtxKey> KeyStr =
|
||||
std::make_shared<StringBasedCtxKey>();
|
||||
KeyStr->Context = Binary->getExpandedContextStr(Stack);
|
||||
KeyStr->Context = Binary->getExpandedContextStr(CallStack);
|
||||
KeyStr->genHashCode();
|
||||
return KeyStr;
|
||||
auto Ret =
|
||||
CtxCounterMap->emplace(Hashable<ContextKey>(KeyStr), SampleCounter());
|
||||
return Ret.first->second;
|
||||
}
|
||||
|
||||
std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
|
||||
SampleCounter &
|
||||
VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary,
|
||||
std::list<uint64_t> &CallStack) {
|
||||
std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
|
||||
std::make_shared<ProbeBasedCtxKey>();
|
||||
for (auto CallProbe : Stack) {
|
||||
ProbeBasedKey->Probes.emplace_back(CallProbe);
|
||||
if (CallStack.size() > 1) {
|
||||
// We don't need to top frame probe since it should be extracted
|
||||
// from the range.
|
||||
// The top of stack is an instruction from the function where
|
||||
// the LBR address range physcially resides. Strip it since
|
||||
// the function is not a part of the call context. We also
|
||||
// don't need its inline context since the probes being unwound
|
||||
// come with an inline context all the way back to the uninlined
|
||||
// function in their prefix tree.
|
||||
auto Iter = CallStack.rbegin();
|
||||
auto EndT = std::prev(CallStack.rend());
|
||||
for (; Iter != EndT; Iter++) {
|
||||
uint64_t Address = *Iter;
|
||||
const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Address);
|
||||
// We may not find a probe for a merged or external callsite.
|
||||
// Callsite merging may cause the loss of original probe IDs.
|
||||
// Cutting off the context from here since the inline will
|
||||
// not know how to consume a context with unknown callsites.
|
||||
if (!CallProbe)
|
||||
break;
|
||||
ProbeBasedKey->Probes.emplace_back(CallProbe);
|
||||
}
|
||||
}
|
||||
CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
|
||||
ProbeBasedKey->Probes);
|
||||
ProbeBasedKey->genHashCode();
|
||||
return ProbeBasedKey;
|
||||
Hashable<ContextKey> ContextId(ProbeBasedKey);
|
||||
auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter());
|
||||
return Ret.first->second;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
|
||||
T &Stack) {
|
||||
if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
|
||||
return;
|
||||
|
||||
std::shared_ptr<ContextKey> Key = Stack.getContextKey();
|
||||
auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
|
||||
SampleCounter &SCounter = Ret.first->second;
|
||||
for (auto &Item : Cur->RangeSamples) {
|
||||
uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
|
||||
uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
|
||||
SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item));
|
||||
}
|
||||
|
||||
for (auto &Item : Cur->BranchSamples) {
|
||||
uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
|
||||
uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
|
||||
SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void VirtualUnwinder::collectSamplesFromFrameTrie(
|
||||
UnwindState::ProfiledFrame *Cur, T &Stack) {
|
||||
if (!Cur->isDummyRoot()) {
|
||||
if (!Stack.pushFrame(Cur)) {
|
||||
// Process truncated context
|
||||
for (const auto &Item : Cur->Children) {
|
||||
// Start a new traversal ignoring its bottom context
|
||||
collectSamplesFromFrameTrie(Item.second.get());
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
collectSamplesFromFrame(Cur, Stack);
|
||||
// Process children frame
|
||||
for (const auto &Item : Cur->Children) {
|
||||
collectSamplesFromFrameTrie(Item.second.get(), Stack);
|
||||
}
|
||||
// Recover the call stack
|
||||
Stack.popFrame();
|
||||
}
|
||||
|
||||
void VirtualUnwinder::collectSamplesFromFrameTrie(
|
||||
UnwindState::ProfiledFrame *Cur) {
|
||||
if (Binary->usePseudoProbes()) {
|
||||
ProbeStack Stack(Binary);
|
||||
collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
|
||||
} else {
|
||||
FrameStack Stack(Binary);
|
||||
collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
|
||||
}
|
||||
void VirtualUnwinder::recordRangeCount(uint64_t Start, uint64_t End,
|
||||
UnwindState &State, uint64_t Repeat) {
|
||||
uint64_t StartOffset = State.getBinary()->virtualAddrToOffset(Start);
|
||||
uint64_t EndOffset = State.getBinary()->virtualAddrToOffset(End);
|
||||
SampleCounter &SCounter =
|
||||
getOrCreateCounter(State.getBinary(), State.CallStack);
|
||||
SCounter.recordRangeCount(StartOffset, EndOffset, Repeat);
|
||||
}
|
||||
|
||||
void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
|
||||
UnwindState &State, uint64_t Repeat) {
|
||||
if (Branch.IsArtificial)
|
||||
return;
|
||||
|
||||
if (Binary->usePseudoProbes()) {
|
||||
// Same as recordRangeCount, We don't need to top frame probe since we will
|
||||
// extract it from branch's source address
|
||||
State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
|
||||
Repeat);
|
||||
} else {
|
||||
State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
|
||||
Repeat);
|
||||
}
|
||||
uint64_t SourceOffset = State.getBinary()->virtualAddrToOffset(Branch.Source);
|
||||
uint64_t TargetOffset = State.getBinary()->virtualAddrToOffset(Branch.Target);
|
||||
SampleCounter &SCounter =
|
||||
getOrCreateCounter(State.getBinary(), State.CallStack);
|
||||
SCounter.recordBranchCount(SourceOffset, TargetOffset, Repeat);
|
||||
}
|
||||
|
||||
bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
|
||||
@ -224,8 +199,6 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
|
||||
// Record `branch` with calling context after unwinding.
|
||||
recordBranchCount(Branch, State, Repeat);
|
||||
}
|
||||
// As samples are aggregated on trie, record them into counter map
|
||||
collectSamplesFromFrameTrie(State.getDummyRootPtr());
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -352,8 +325,7 @@ void PerfReader::printUnwinderOutput() {
|
||||
void PerfReader::unwindSamples() {
|
||||
for (const auto &Item : AggregatedSamples) {
|
||||
const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
|
||||
VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
|
||||
Sample->Binary);
|
||||
VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary]);
|
||||
Unwinder.unwind(Sample, Item.second);
|
||||
}
|
||||
|
||||
@ -362,7 +334,7 @@ void PerfReader::unwindSamples() {
|
||||
}
|
||||
|
||||
bool PerfReader::extractLBRStack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<LBREntry> &LBRStack,
|
||||
SmallVector<LBREntry, 16> &LBRStack,
|
||||
ProfiledBinary *Binary) {
|
||||
// The raw format of LBR stack is like:
|
||||
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
|
||||
@ -426,7 +398,7 @@ bool PerfReader::extractLBRStack(TraceStream &TraceIt,
|
||||
}
|
||||
|
||||
bool PerfReader::extractCallstack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<uint64_t> &CallStack) {
|
||||
std::list<uint64_t> &CallStack) {
|
||||
// The raw format of call stack is like:
|
||||
// 4005dc # leaf frame
|
||||
// 400634
|
||||
|
@ -133,7 +133,7 @@ struct HybridSample : public PerfSample {
|
||||
// Profiled binary that current frame address belongs to
|
||||
ProfiledBinary *Binary;
|
||||
// Call stack recorded in FILO(leaf to root) order
|
||||
SmallVector<uint64_t, 16> CallStack;
|
||||
std::list<uint64_t> CallStack;
|
||||
// LBR stack recorded in FIFO order
|
||||
SmallVector<LBREntry, 16> LBRStack;
|
||||
|
||||
@ -147,7 +147,7 @@ struct HybridSample : public PerfSample {
|
||||
const HybridSample *Other = dyn_cast<HybridSample>(K);
|
||||
if (Other->Binary != Binary)
|
||||
return false;
|
||||
const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
|
||||
const std::list<uint64_t> &OtherCallStack = Other->CallStack;
|
||||
const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
|
||||
|
||||
if (CallStack.size() != OtherCallStack.size() ||
|
||||
@ -193,40 +193,14 @@ using AggregatedCounter =
|
||||
std::unordered_map<Hashable<PerfSample>, uint64_t,
|
||||
Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
|
||||
|
||||
using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
|
||||
// The state for the unwinder, it doesn't hold the data but only keep the
|
||||
// pointer/index of the data, While unwinding, the CallStack is changed
|
||||
// dynamicially and will be recorded as the context of the sample
|
||||
struct UnwindState {
|
||||
// Profiled binary that current frame address belongs to
|
||||
const ProfiledBinary *Binary;
|
||||
// Call stack trie node
|
||||
struct ProfiledFrame {
|
||||
const uint64_t Address = 0;
|
||||
ProfiledFrame *Parent;
|
||||
SampleVector RangeSamples;
|
||||
SampleVector BranchSamples;
|
||||
std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
|
||||
|
||||
ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
|
||||
: Address(Addr), Parent(P) {}
|
||||
ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
|
||||
assert(Address && "Address can't be zero!");
|
||||
auto Ret = Children.emplace(
|
||||
Address, std::make_unique<ProfiledFrame>(Address, this));
|
||||
return Ret.first->second.get();
|
||||
}
|
||||
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
|
||||
RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
|
||||
}
|
||||
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
|
||||
BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
|
||||
}
|
||||
bool isDummyRoot() { return Address == 0; }
|
||||
};
|
||||
|
||||
ProfiledFrame DummyTrieRoot;
|
||||
ProfiledFrame *CurrentLeafFrame;
|
||||
// TODO: switch to use trie for call stack
|
||||
std::list<uint64_t> CallStack;
|
||||
// Used to fall through the LBR stack
|
||||
uint32_t LBRIndex = 0;
|
||||
// Reference to HybridSample.LBRStack
|
||||
@ -234,20 +208,19 @@ struct UnwindState {
|
||||
// Used to iterate the address range
|
||||
InstructionPointer InstPtr;
|
||||
UnwindState(const HybridSample *Sample)
|
||||
: Binary(Sample->Binary), LBRStack(Sample->LBRStack),
|
||||
InstPtr(Sample->Binary, Sample->CallStack.front()) {
|
||||
initFrameTrie(Sample->CallStack);
|
||||
}
|
||||
: Binary(Sample->Binary), CallStack(Sample->CallStack),
|
||||
LBRStack(Sample->LBRStack),
|
||||
InstPtr(Sample->Binary, Sample->CallStack.front()) {}
|
||||
|
||||
bool validateInitialState() {
|
||||
uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
|
||||
uint64_t LeafAddr = CurrentLeafFrame->Address;
|
||||
uint64_t StackLeaf = CallStack.front();
|
||||
// When we take a stack sample, ideally the sampling distance between the
|
||||
// leaf IP of stack and the last LBR target shouldn't be very large.
|
||||
// Use a heuristic size (0x100) to filter out broken records.
|
||||
if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
|
||||
if (StackLeaf < LBRLeaf || StackLeaf >= LBRLeaf + 0x100) {
|
||||
WithColor::warning() << "Bogus trace: stack tip = "
|
||||
<< format("%#010x", LeafAddr)
|
||||
<< format("%#010x", StackLeaf)
|
||||
<< ", LBR tip = " << format("%#010x\n", LBRLeaf);
|
||||
return false;
|
||||
}
|
||||
@ -255,40 +228,19 @@ struct UnwindState {
|
||||
}
|
||||
|
||||
void checkStateConsistency() {
|
||||
assert(InstPtr.Address == CurrentLeafFrame->Address &&
|
||||
assert(InstPtr.Address == CallStack.front() &&
|
||||
"IP should align with context leaf");
|
||||
}
|
||||
|
||||
std::string getExpandedContextStr() const {
|
||||
return Binary->getExpandedContextStr(CallStack);
|
||||
}
|
||||
const ProfiledBinary *getBinary() const { return Binary; }
|
||||
bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
|
||||
uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
|
||||
uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
|
||||
const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
|
||||
void advanceLBR() { LBRIndex++; }
|
||||
|
||||
ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
|
||||
|
||||
void pushFrame(uint64_t Address) {
|
||||
CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
|
||||
}
|
||||
|
||||
void switchToFrame(uint64_t Address) {
|
||||
if (CurrentLeafFrame->Address == Address)
|
||||
return;
|
||||
CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
|
||||
}
|
||||
|
||||
void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
|
||||
|
||||
void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
|
||||
ProfiledFrame *Cur = &DummyTrieRoot;
|
||||
for (auto Address : reverse(CallStack)) {
|
||||
Cur = Cur->getOrCreateChildFrame(Address);
|
||||
}
|
||||
CurrentLeafFrame = Cur;
|
||||
}
|
||||
|
||||
ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
|
||||
};
|
||||
|
||||
// Base class for sample counter key with context
|
||||
@ -378,56 +330,6 @@ using ContextSampleCounterMap =
|
||||
std::unordered_map<Hashable<ContextKey>, SampleCounter,
|
||||
Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
|
||||
|
||||
struct FrameStack {
|
||||
SmallVector<uint64_t, 16> Stack;
|
||||
const ProfiledBinary *Binary;
|
||||
FrameStack(const ProfiledBinary *B) : Binary(B) {}
|
||||
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
|
||||
Stack.push_back(Cur->Address);
|
||||
return true;
|
||||
}
|
||||
|
||||
void popFrame() {
|
||||
if (!Stack.empty())
|
||||
Stack.pop_back();
|
||||
}
|
||||
std::shared_ptr<StringBasedCtxKey> getContextKey();
|
||||
};
|
||||
|
||||
struct ProbeStack {
|
||||
SmallVector<const PseudoProbe *, 16> Stack;
|
||||
const ProfiledBinary *Binary;
|
||||
ProbeStack(const ProfiledBinary *B) : Binary(B) {}
|
||||
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
|
||||
const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address);
|
||||
// We may not find a probe for a merged or external callsite.
|
||||
// Callsite merging may cause the loss of original probe IDs.
|
||||
// Cutting off the context from here since the inliner will
|
||||
// not know how to consume a context with unknown callsites.
|
||||
if (!CallProbe)
|
||||
return false;
|
||||
Stack.push_back(CallProbe);
|
||||
return true;
|
||||
}
|
||||
|
||||
void popFrame() {
|
||||
if (!Stack.empty())
|
||||
Stack.pop_back();
|
||||
}
|
||||
// Use pseudo probe based context key to get the sample counter
|
||||
// A context stands for a call path from 'main' to an uninlined
|
||||
// callee with all inline frames recovered on that path. The probes
|
||||
// belonging to that call path is the probes either originated from
|
||||
// the callee or from any functions inlined into the callee. Since
|
||||
// pseudo probes are organized in a tri-tree style after decoded,
|
||||
// the tree path from the tri-tree root (which is the uninlined
|
||||
// callee) to the probe node forms an inline context.
|
||||
// Here we use a list of probe(pointer) as the context key to speed up
|
||||
// aggregation and the final context string will be generate in
|
||||
// ProfileGenerator
|
||||
std::shared_ptr<ProbeBasedCtxKey> getContextKey();
|
||||
};
|
||||
|
||||
/*
|
||||
As in hybrid sample we have a group of LBRs and the most recent sampling call
|
||||
stack, we can walk through those LBRs to infer more call stacks which would be
|
||||
@ -449,43 +351,47 @@ range as sample counter for further CS profile generation.
|
||||
*/
|
||||
class VirtualUnwinder {
|
||||
public:
|
||||
VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
|
||||
: CtxCounterMap(Counter), Binary(B) {}
|
||||
bool unwind(const HybridSample *Sample, uint64_t Repeat);
|
||||
VirtualUnwinder(ContextSampleCounterMap *Counter) : CtxCounterMap(Counter) {}
|
||||
|
||||
private:
|
||||
bool isCallState(UnwindState &State) const {
|
||||
// The tail call frame is always missing here in stack sample, we will
|
||||
// use a specific tail call tracker to infer it.
|
||||
return Binary->addressIsCall(State.getCurrentLBRSource());
|
||||
return State.getBinary()->addressIsCall(State.getCurrentLBRSource());
|
||||
}
|
||||
|
||||
bool isReturnState(UnwindState &State) const {
|
||||
// Simply check addressIsReturn, as ret is always reliable, both for
|
||||
// regular call and tail call.
|
||||
return Binary->addressIsReturn(State.getCurrentLBRSource());
|
||||
return State.getBinary()->addressIsReturn(State.getCurrentLBRSource());
|
||||
}
|
||||
|
||||
void unwindCall(UnwindState &State);
|
||||
void unwindLinear(UnwindState &State, uint64_t Repeat);
|
||||
void unwindReturn(UnwindState &State);
|
||||
void unwindBranchWithinFrame(UnwindState &State);
|
||||
|
||||
template <typename T>
|
||||
void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
|
||||
// Collect each samples on trie node by DFS traversal
|
||||
template <typename T>
|
||||
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
|
||||
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
|
||||
|
||||
bool unwind(const HybridSample *Sample, uint64_t Repeat);
|
||||
void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
|
||||
uint64_t Repeat);
|
||||
void recordBranchCount(const LBREntry &Branch, UnwindState &State,
|
||||
uint64_t Repeat);
|
||||
SampleCounter &getOrCreateCounter(const ProfiledBinary *Binary,
|
||||
std::list<uint64_t> &CallStack);
|
||||
// Use pseudo probe based context key to get the sample counter
|
||||
// A context stands for a call path from 'main' to an uninlined
|
||||
// callee with all inline frames recovered on that path. The probes
|
||||
// belonging to that call path is the probes either originated from
|
||||
// the callee or from any functions inlined into the callee. Since
|
||||
// pseudo probes are organized in a tri-tree style after decoded,
|
||||
// the tree path from the tri-tree root (which is the uninlined
|
||||
// callee) to the probe node forms an inline context.
|
||||
// Here we use a list of probe(pointer) as the context key to speed up
|
||||
// aggregation and the final context string will be generate in
|
||||
// ProfileGenerator
|
||||
SampleCounter &getOrCreateCounterForProbe(const ProfiledBinary *Binary,
|
||||
std::list<uint64_t> &CallStack);
|
||||
|
||||
private:
|
||||
ContextSampleCounterMap *CtxCounterMap;
|
||||
// Profiled binary that current frame address belongs to
|
||||
const ProfiledBinary *Binary;
|
||||
};
|
||||
|
||||
// Filename to binary map
|
||||
@ -551,11 +457,10 @@ private:
|
||||
// Parse the hybrid sample including the call and LBR line
|
||||
void parseHybridSample(TraceStream &TraceIt);
|
||||
// Extract call stack from the perf trace lines
|
||||
bool extractCallstack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<uint64_t> &CallStack);
|
||||
bool extractCallstack(TraceStream &TraceIt, std::list<uint64_t> &CallStack);
|
||||
// Extract LBR stack from one perf trace line
|
||||
bool extractLBRStack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<LBREntry> &LBRStack,
|
||||
SmallVector<LBREntry, 16> &LBRStack,
|
||||
ProfiledBinary *Binary);
|
||||
void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
|
||||
// Post process the profile after trace aggregation, we will do simple range
|
||||
|
@ -126,13 +126,13 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
|
||||
Context2.begin(), Context2.begin() + Context2.size() - 1);
|
||||
}
|
||||
|
||||
std::string ProfiledBinary::getExpandedContextStr(
|
||||
const SmallVectorImpl<uint64_t> &Stack) const {
|
||||
std::string
|
||||
ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
|
||||
std::string ContextStr;
|
||||
SmallVector<std::string, 16> ContextVec;
|
||||
// Process from frame root to leaf
|
||||
for (auto Address : Stack) {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
|
||||
uint64_t Offset = virtualAddrToOffset(*Iter);
|
||||
const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
|
||||
for (const auto &Loc : ExpandedContext) {
|
||||
ContextVec.push_back(getCallSite(Loc));
|
||||
|
@ -236,8 +236,7 @@ public:
|
||||
// Get the context string of the current stack with inline context filled in.
|
||||
// It will search the disassembling info stored in Offset2LocStackMap. This is
|
||||
// used as the key of function sample map
|
||||
std::string
|
||||
getExpandedContextStr(const SmallVectorImpl<uint64_t> &Stack) const;
|
||||
std::string getExpandedContextStr(const std::list<uint64_t> &stack) const;
|
||||
|
||||
const PseudoProbe *getCallProbeForAddr(uint64_t Address) const {
|
||||
return ProbeDecoder.getCallProbeForAddr(Address);
|
||||
|
Loading…
Reference in New Issue
Block a user