1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[CSSPGO][llvm-profgen] Filter out the instructions without location info for symbolizer

It appears some instructions doesn't have the debug location info and the symbolizer will return an empty call stack for them which will cause some crash later in profile unwinding. Actually we do not record the sample info for them, so this change just filter out those instruction.

As those instruction would appears at the begin and end of the instruction list, without them we need to add the boundary check for IP `advance` and `backward`.

Also for pseudo probe based profile, we actually don't need the symbolized location info, so here just change to use an empty stack for it. This could save half of the binary loading time.

Differential Revision: https://reviews.llvm.org/D96434
This commit is contained in:
wlei 2021-02-10 10:04:39 -08:00
parent fe7a083c4c
commit be4f05ba0b
7 changed files with 58 additions and 36 deletions

View File

@ -1,12 +1,12 @@
; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t
; CHECK:[main:1 @ foo]:44:0 ; CHECK:[main:1 @ foo]:309:0
; CHECK: 2.1: 14 ; CHECK: 2.1: 14
; CHECK: 3: 15 ; CHECK: 3: 15
; CHECK: 3.1: 14 bar:14 ; CHECK: 3.1: 14 bar:14
; CHECK: 3.2: 1 ; CHECK: 3.2: 1
; CHECK:[main:1 @ foo:3.1 @ bar]:14:0 ; CHECK:[main:1 @ foo:3.1 @ bar]:84:0
; CHECK: 1: 14 ; CHECK: 1: 14
; CHECK-UNWINDER: Binary(inline-cs-noprobe.perfbin)'s Range Counter: ; CHECK-UNWINDER: Binary(inline-cs-noprobe.perfbin)'s Range Counter:

View File

@ -1,15 +1,15 @@
; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t
; CHECK:[main:1 @ foo:3 @ bar]:12:3 ; CHECK:[main:1 @ foo]:54:0
; CHECK: 2: 3
; CHECK: 3: 3 bar:3
; CHECK:[main:1 @ foo:3 @ bar]:50:3
; CHECK: 0: 3 ; CHECK: 0: 3
; CHECK: 1: 3 ; CHECK: 1: 3
; CHECK: 2: 2 ; CHECK: 2: 2
; CHECK: 4: 1 ; CHECK: 4: 1
; CHECK: 5: 3 ; CHECK: 5: 3
; CHECK:[main:1 @ foo]:6:0
; CHECK: 2: 3
; CHECK: 3: 3 bar:3
; CHECK-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Range Counter: ; CHECK-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Range Counter:
; CHECK-UNWINDER: main:1 @ foo ; CHECK-UNWINDER: main:1 @ foo

View File

@ -4,38 +4,38 @@
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --csprof-cold-thres=0 ; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --csprof-cold-thres=0
; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:48:0
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 13 fb:11
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0
; CHECK-UNCOMPRESS: 1: 11 ; CHECK-UNCOMPRESS: 1: 11
; CHECK-UNCOMPRESS: 2: 1 fa:1 ; CHECK-UNCOMPRESS: 2: 1 fa:1
; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:24:0
; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 13 fb:11
; CHECK-UNCOMPRESS:[main:1 @ foo]:7:0
; CHECK-UNCOMPRESS: 2: 1 ; CHECK-UNCOMPRESS: 2: 1
; CHECK-UNCOMPRESS: 3: 2 fa:1 ; CHECK-UNCOMPRESS: 3: 2 fa:1
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:7:0
; CHECK-UNCOMPRESS: 1: 1 ; CHECK-UNCOMPRESS: 1: 1
; CHECK-UNCOMPRESS: 2: 2 fb:1 ; CHECK-UNCOMPRESS: 2: 2 fb:1
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:2:0
; CHECK-UNCOMPRESS: 2: 1 fa:1 ; CHECK-UNCOMPRESS: 2: 1 fa:1
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:2:0
; CHECK-UNCOMPRESS: 4: 1 ; CHECK-UNCOMPRESS: 4: 1
; CHECK: [main:1 @ foo:3 @ fa]:14:0 ; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:48:0
; CHECK: 1: 1
; CHECK: 2: 13 fb:11
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0
; CHECK: 1: 11 ; CHECK: 1: 11
; CHECK: 2: 1 fa:1 ; CHECK: 2: 1 fa:1
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:4:0 ; CHECK: [main:1 @ foo:3 @ fa]:24:0
; CHECK: 1: 1
; CHECK: 2: 13 fb:11
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:9:0
; CHECK: 1: 1 ; CHECK: 1: 1
; CHECK: 2: 2 fb:1 ; CHECK: 2: 2 fb:1
; CHECK: 4: 1 ; CHECK: 4: 1
; CHECK: [main:1 @ foo]:3:0 ; CHECK: [main:1 @ foo]:7:0
; CHECK: 2: 1 ; CHECK: 2: 1
; CHECK: 3: 2 fa:1 ; CHECK: 3: 2 fa:1
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:0:0 ; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0
; original code: ; original code:

View File

@ -93,6 +93,8 @@ std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
std::shared_ptr<StringBasedCtxKey> KeyStr = std::shared_ptr<StringBasedCtxKey> KeyStr =
std::make_shared<StringBasedCtxKey>(); std::make_shared<StringBasedCtxKey>();
KeyStr->Context = Binary->getExpandedContextStr(Stack); KeyStr->Context = Binary->getExpandedContextStr(Stack);
if (KeyStr->Context.empty())
return nullptr;
KeyStr->genHashCode(); KeyStr->genHashCode();
return KeyStr; return KeyStr;
} }
@ -116,6 +118,8 @@ void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
return; return;
std::shared_ptr<ContextKey> Key = Stack.getContextKey(); std::shared_ptr<ContextKey> Key = Stack.getContextKey();
if (Key == nullptr)
return;
auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter()); auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
SampleCounter &SCounter = Ret.first->second; SampleCounter &SCounter = Ret.first->second;
for (auto &Item : Cur->RangeSamples) { for (auto &Item : Cur->RangeSamples) {

View File

@ -212,7 +212,6 @@ void CSProfileGenerator::updateBodySamplesforFunctionProfile(
FunctionProfile.addBodySamples(LeafLoc.second.LineOffset, FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
LeafLoc.second.Discriminator, LeafLoc.second.Discriminator,
Count - PreviousCount); Count - PreviousCount);
FunctionProfile.addTotalSamples(Count - PreviousCount);
} }
} }
@ -242,9 +241,13 @@ void CSProfileGenerator::populateFunctionBodySamples(
while (IP.Address <= RangeEnd) { while (IP.Address <= RangeEnd) {
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
const FrameLocation &LeafLoc = Binary->getInlineLeafFrameLoc(Offset); auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
// Recording body sample for this specific context if (LeafLoc.hasValue()) {
updateBodySamplesforFunctionProfile(FunctionProfile, LeafLoc, Count); // Recording body sample for this specific context
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
}
// Accumulate total sample count even it's a line with invalid debug info
FunctionProfile.addTotalSamples(Count);
// Move to next IP within the range // Move to next IP within the range
IP.advance(); IP.advance();
} }
@ -266,9 +269,11 @@ void CSProfileGenerator::populateFunctionBoundarySamples(
continue; continue;
// Record called target sample and its count // Record called target sample and its count
const FrameLocation &LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
FunctionProfile.addCalledTargetSamples(LeafLoc.second.LineOffset, if (!LeafLoc.hasValue())
LeafLoc.second.Discriminator, continue;
FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
LeafLoc->second.Discriminator,
CalleeName, Count); CalleeName, Count);
// Record head sample for called target(callee) // Record head sample for called target(callee)
@ -277,7 +282,7 @@ void CSProfileGenerator::populateFunctionBoundarySamples(
OCalleeCtxStr << ContextId.rsplit(" @ ").first.str(); OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
OCalleeCtxStr << " @ "; OCalleeCtxStr << " @ ";
} }
OCalleeCtxStr << getCallSite(LeafLoc) << " @ " << CalleeName.str(); OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
FunctionSamples &CalleeProfile = FunctionSamples &CalleeProfile =
getFunctionProfileForContext(OCalleeCtxStr.str()); getFunctionProfileForContext(OCalleeCtxStr.str());

View File

@ -119,7 +119,8 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
const FrameLocationStack &Context2 = getFrameLocationStack(Offset2); const FrameLocationStack &Context2 = getFrameLocationStack(Offset2);
if (Context1.size() != Context2.size()) if (Context1.size() != Context2.size())
return false; return false;
if (Context1.empty())
return false;
// The leaf frame contains location within the leaf, and it // The leaf frame contains location within the leaf, and it
// needs to be remove that as it's not part of the calling context // needs to be remove that as it's not part of the calling context
return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1, return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
@ -134,6 +135,10 @@ std::string ProfiledBinary::getExpandedContextStr(
for (auto Address : Stack) { for (auto Address : Stack) {
uint64_t Offset = virtualAddrToOffset(Address); uint64_t Offset = virtualAddrToOffset(Address);
const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset); const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
// An instruction without a valid debug line will be ignored by sample
// processing
if (ExpandedContext.empty())
return std::string();
for (const auto &Loc : ExpandedContext) { for (const auto &Loc : ExpandedContext) {
ContextVec.push_back(getCallSite(Loc)); ContextVec.push_back(getCallSite(Loc));
} }
@ -242,9 +247,14 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
// Populate a vector of the symbolized callsite at this location // Populate a vector of the symbolized callsite at this location
InstructionPointer IP(this, Offset); // We don't need symbolized info for probe-based profile, just use an empty
Offset2LocStackMap[Offset] = symbolize(IP, true); // stack as an entry to indicate a valid binary offset
FrameLocationStack SymbolizedCallStack;
if (!UsePseudoProbes) {
InstructionPointer IP(this, Offset);
SymbolizedCallStack = symbolize(IP, true);
}
Offset2LocStackMap[Offset] = SymbolizedCallStack;
// Populate address maps. // Populate address maps.
CodeAddrs.push_back(Offset); CodeAddrs.push_back(Offset);
if (MCDesc.isCall()) if (MCDesc.isCall())

View File

@ -11,6 +11,7 @@
#include "CallContext.h" #include "CallContext.h"
#include "PseudoProbe.h" #include "PseudoProbe.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmInfo.h"
@ -225,9 +226,11 @@ public:
return FuncStartAddrMap[Offset]; return FuncStartAddrMap[Offset];
} }
const FrameLocation &getInlineLeafFrameLoc(uint64_t Offset, Optional<const FrameLocation> getInlineLeafFrameLoc(uint64_t Offset) {
bool NameOnly = false) { const auto &Stack = getFrameLocationStack(Offset);
return getFrameLocationStack(Offset).back(); if (Stack.empty())
return {};
return Stack.back();
} }
// Compare two addresses' inline context // Compare two addresses' inline context