[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
//===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements the SampleProfileProber transformation.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
2020-12-11 21:18:31 +01:00
|
|
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/CFG.h"
|
|
|
|
#include "llvm/IR/Constant.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DebugInfoMetadata.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
|
|
#include "llvm/IR/IRBuilder.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/IR/MDBuilder.h"
|
|
|
|
#include "llvm/ProfileData/SampleProf.h"
|
|
|
|
#include "llvm/Support/CRC.h"
|
2020-12-11 21:18:31 +01:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
#include "llvm/Transforms/Instrumentation.h"
|
|
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
2020-12-11 21:18:31 +01:00
|
|
|
#include <unordered_set>
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "sample-profile-probe"
|
|
|
|
|
|
|
|
STATISTIC(ArtificialDbgLine,
|
|
|
|
"Number of probes that have an artificial debug line");
|
|
|
|
|
2020-12-11 21:18:31 +01:00
|
|
|
static cl::opt<bool>
|
|
|
|
VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
|
|
|
|
cl::desc("Do pseudo probe verification"));
|
|
|
|
|
|
|
|
static cl::list<std::string> VerifyPseudoProbeFuncList(
|
|
|
|
"verify-pseudo-probe-funcs", cl::Hidden,
|
|
|
|
cl::desc("The option to specify the name of the functions to verify."));
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
|
|
|
|
cl::desc("Update pseudo probe distribution factor"));
|
|
|
|
|
2021-05-13 20:06:44 +02:00
|
|
|
static uint64_t getCallStackHash(const DILocation *DIL) {
|
|
|
|
uint64_t Hash = 0;
|
|
|
|
const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() : nullptr;
|
|
|
|
while (InlinedAt) {
|
|
|
|
Hash ^= MD5Hash(std::to_string(InlinedAt->getLine()));
|
|
|
|
Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn()));
|
|
|
|
const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
|
|
|
|
// Use linkage name for C++ if possible.
|
|
|
|
auto Name = SP->getLinkageName();
|
|
|
|
if (Name.empty())
|
|
|
|
Name = SP->getName();
|
|
|
|
Hash ^= MD5Hash(Name);
|
|
|
|
InlinedAt = InlinedAt->getInlinedAt();
|
|
|
|
}
|
|
|
|
return Hash;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t computeCallStackHash(const Instruction &Inst) {
|
|
|
|
return getCallStackHash(Inst.getDebugLoc());
|
|
|
|
}
|
|
|
|
|
2020-12-11 21:18:31 +01:00
|
|
|
bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
|
|
|
|
// Skip function declaration.
|
|
|
|
if (F->isDeclaration())
|
|
|
|
return false;
|
|
|
|
// Skip function that will not be emitted into object file. The prevailing
|
|
|
|
// defintion will be verified instead.
|
|
|
|
if (F->hasAvailableExternallyLinkage())
|
|
|
|
return false;
|
|
|
|
// Do a name matching.
|
|
|
|
static std::unordered_set<std::string> VerifyFuncNames(
|
|
|
|
VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
|
|
|
|
return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
|
|
|
|
if (VerifyPseudoProbe) {
|
|
|
|
PIC.registerAfterPassCallback(
|
|
|
|
[this](StringRef P, Any IR, const PreservedAnalyses &) {
|
|
|
|
this->runAfterPass(P, IR);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Callback to run after each transformation for the new pass manager.
|
|
|
|
void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
|
|
|
|
std::string Banner =
|
|
|
|
"\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
|
|
|
|
dbgs() << Banner;
|
|
|
|
if (any_isa<const Module *>(IR))
|
|
|
|
runAfterPass(any_cast<const Module *>(IR));
|
|
|
|
else if (any_isa<const Function *>(IR))
|
|
|
|
runAfterPass(any_cast<const Function *>(IR));
|
|
|
|
else if (any_isa<const LazyCallGraph::SCC *>(IR))
|
|
|
|
runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
|
|
|
|
else if (any_isa<const Loop *>(IR))
|
|
|
|
runAfterPass(any_cast<const Loop *>(IR));
|
|
|
|
else
|
|
|
|
llvm_unreachable("Unknown IR unit");
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoProbeVerifier::runAfterPass(const Module *M) {
|
|
|
|
for (const Function &F : *M)
|
|
|
|
runAfterPass(&F);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
|
|
|
|
for (const LazyCallGraph::Node &N : *C)
|
|
|
|
runAfterPass(&N.getFunction());
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoProbeVerifier::runAfterPass(const Function *F) {
|
|
|
|
if (!shouldVerifyFunction(F))
|
|
|
|
return;
|
|
|
|
ProbeFactorMap ProbeFactors;
|
|
|
|
for (const auto &BB : *F)
|
|
|
|
collectProbeFactors(&BB, ProbeFactors);
|
|
|
|
verifyProbeFactors(F, ProbeFactors);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoProbeVerifier::runAfterPass(const Loop *L) {
|
|
|
|
const Function *F = L->getHeader()->getParent();
|
|
|
|
runAfterPass(F);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
|
|
|
|
ProbeFactorMap &ProbeFactors) {
|
|
|
|
for (const auto &I : *Block) {
|
2021-05-13 20:06:44 +02:00
|
|
|
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
|
|
|
|
uint64_t Hash = computeCallStackHash(I);
|
|
|
|
ProbeFactors[{Probe->Id, Hash}] += Probe->Factor;
|
|
|
|
}
|
2020-12-11 21:18:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoProbeVerifier::verifyProbeFactors(
|
|
|
|
const Function *F, const ProbeFactorMap &ProbeFactors) {
|
|
|
|
bool BannerPrinted = false;
|
|
|
|
auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
|
|
|
|
for (const auto &I : ProbeFactors) {
|
|
|
|
float CurProbeFactor = I.second;
|
|
|
|
if (PrevProbeFactors.count(I.first)) {
|
|
|
|
float PrevProbeFactor = PrevProbeFactors[I.first];
|
|
|
|
if (std::abs(CurProbeFactor - PrevProbeFactor) >
|
|
|
|
DistributionFactorVariance) {
|
|
|
|
if (!BannerPrinted) {
|
|
|
|
dbgs() << "Function " << F->getName() << ":\n";
|
|
|
|
BannerPrinted = true;
|
|
|
|
}
|
2021-05-13 20:06:44 +02:00
|
|
|
dbgs() << "Probe " << I.first.first << "\tprevious factor "
|
2020-12-11 21:18:31 +01:00
|
|
|
<< format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
|
|
|
|
<< format("%0.2f", CurProbeFactor) << "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update
|
|
|
|
PrevProbeFactors[I.first] = I.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-16 21:54:50 +01:00
|
|
|
PseudoProbeManager::PseudoProbeManager(const Module &M) {
|
|
|
|
if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
|
|
|
|
for (const auto *Operand : FuncInfo->operands()) {
|
|
|
|
const auto *MD = cast<MDNode>(Operand);
|
|
|
|
auto GUID =
|
|
|
|
mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
|
|
|
|
auto Hash =
|
|
|
|
mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
|
|
|
|
GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const PseudoProbeDescriptor *
|
|
|
|
PseudoProbeManager::getDesc(const Function &F) const {
|
|
|
|
auto I = GUIDToProbeDescMap.find(
|
|
|
|
Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
|
|
|
|
return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
|
|
|
|
return M.getNamedMetadata(PseudoProbeDescMetadataName);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PseudoProbeManager::profileIsValid(const Function &F,
|
|
|
|
const FunctionSamples &Samples) const {
|
|
|
|
const auto *Desc = getDesc(F);
|
|
|
|
if (!Desc) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
|
|
|
|
<< "\n");
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
|
|
|
|
<< "\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Pseudo probe encoding and emission.
This change implements pseudo probe encoding and emission for CSSPGO. Please see RFC here for more context: https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s
Pseudo probes are in the form of intrinsic calls on IR/MIR but they do not turn into any machine instructions. Instead they are emitted into the binary as a piece of data in standalone sections. The probe-specific sections are not needed to be loaded into memory at execution time, thus they do not incur a runtime overhead.
**ELF object emission**
The binary data to emit are organized as two ELF sections, i.e, the `.pseudo_probe_desc` section and the `.pseudo_probe` section. The `.pseudo_probe_desc` section stores a function descriptor for each function and the `.pseudo_probe` section stores the actual probes, each fo which corresponds to an IR basic block or an IR function callsite. A function descriptor is stored as a module-level metadata during the compilation and is serialized into the object file during object emission.
Both the probe descriptors and pseudo probes can be emitted into a separate ELF section per function to leverage the linker for deduplication. A `.pseudo_probe` section shares the same COMDAT group with the function code so that when the function is dead, the probes are dead and disposed too. On the contrary, a `.pseudo_probe_desc` section has its own COMDAT group. This is because even if a function is dead, its probes may be inlined into other functions and its descriptor is still needed by the profile generation tool.
The format of `.pseudo_probe_desc` section looks like:
```
.section .pseudo_probe_desc,"",@progbits
.quad 6309742469962978389 // Func GUID
.quad 4294967295 // Func Hash
.byte 9 // Length of func name
.ascii "_Z5funcAi" // Func name
.quad 7102633082150537521
.quad 138828622701
.byte 12
.ascii "_Z8funcLeafi"
.quad 446061515086924981
.quad 4294967295
.byte 9
.ascii "_Z5funcBi"
.quad -2016976694713209516
.quad 72617220756
.byte 7
.ascii "_Z3fibi"
```
For each `.pseudoprobe` section, the encoded binary data consists of a single function record corresponding to an outlined function (i.e, a function with a code entry in the `.text` section). A function record has the following format :
```
FUNCTION BODY (one for each outlined function present in the text section)
GUID (uint64)
GUID of the function
NPROBES (ULEB128)
Number of probes originating from this function.
NUM_INLINED_FUNCTIONS (ULEB128)
Number of callees inlined into this function, aka number of
first-level inlinees
PROBE RECORDS
A list of NPROBES entries. Each entry contains:
INDEX (ULEB128)
TYPE (uint4)
0 - block probe, 1 - indirect call, 2 - direct call
ATTRIBUTE (uint3)
reserved
ADDRESS_TYPE (uint1)
0 - code address, 1 - address delta
CODE_ADDRESS (uint64 or ULEB128)
code address or address delta, depending on ADDRESS_TYPE
INLINED FUNCTION RECORDS
A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
callees. Each record contains:
INLINE SITE
GUID of the inlinee (uint64)
ID of the callsite probe (ULEB128)
FUNCTION BODY
A FUNCTION BODY entry describing the inlined function.
```
To support building a context-sensitive profile, probes from inlinees are grouped by their inline contexts. An inline context is logically a call path through which a callee function lands in a caller function. The probe emitter builds an inline tree based on the debug metadata for each outlined function in the form of a trie tree. A tree root is the outlined function. Each tree edge stands for a callsite where inlining happens. Pseudo probes originating from an inlinee function are stored in a tree node and the tree path starting from the root all the way down to the tree node is the inline context of the probes. The emission happens on the whole tree top-down recursively. Probes of a tree node will be emitted altogether with their direct parent edge. Since a pseudo probe corresponds to a real code address, for size savings, the address is encoded as a delta from the previous probe except for the first probe. Variant-sized integer encoding, aka LEB128, is used for address delta and probe index.
**Assembling**
Pseudo probes can be printed as assembly directives alternatively. This allows for good assembly code readability and also provides a view of how optimizations and pseudo probes affect each other, especially helpful for diff time assembly analysis.
A pseudo probe directive has the following operands in order: function GUID, probe index, probe type, probe attributes and inline context. The directive is generated by the compiler and can be parsed by the assembler to form an encoded `.pseudoprobe` section in the object file.
A example assembly looks like:
```
foo2: # @foo2
# %bb.0: # %bb0
pushq %rax
testl %edi, %edi
.pseudoprobe 837061429793323041 1 0 0
je .LBB1_1
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 6 2 0
callq foo
.pseudoprobe 837061429793323041 3 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
.LBB1_1: # %bb1
.pseudoprobe 837061429793323041 5 1 0
callq *%rsi
.pseudoprobe 837061429793323041 2 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
# -- End function
.section .pseudo_probe_desc,"",@progbits
.quad 6699318081062747564
.quad 72617220756
.byte 3
.ascii "foo"
.quad 837061429793323041
.quad 281547593931412
.byte 4
.ascii "foo2"
```
With inlining turned on, the assembly may look different around %bb2 with an inlined probe:
```
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 3 0
.pseudoprobe 6699318081062747564 1 0 @ 837061429793323041:6
.pseudoprobe 837061429793323041 4 0
popq %rax
retq
```
**Disassembling**
We have a disassembling tool (llvm-profgen) that can display disassembly alongside with pseudo probes. So far it only supports ELF executable file.
An example disassembly looks like:
```
00000000002011a0 <foo2>:
2011a0: 50 push rax
2011a1: 85 ff test edi,edi
[Probe]: FUNC: foo2 Index: 1 Type: Block
2011a3: 74 02 je 2011a7 <foo2+0x7>
[Probe]: FUNC: foo2 Index: 3 Type: Block
[Probe]: FUNC: foo2 Index: 4 Type: Block
[Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ foo2:6
2011a5: 58 pop rax
2011a6: c3 ret
[Probe]: FUNC: foo2 Index: 2 Type: Block
2011a7: bf 01 00 00 00 mov edi,0x1
[Probe]: FUNC: foo2 Index: 5 Type: IndirectCall
2011ac: ff d6 call rsi
[Probe]: FUNC: foo2 Index: 4 Type: Block
2011ae: 58 pop rax
2011af: c3 ret
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D91878
2020-12-09 00:37:32 +01:00
|
|
|
SampleProfileProber::SampleProfileProber(Function &Func,
|
|
|
|
const std::string &CurModuleUniqueId)
|
|
|
|
: F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
BlockProbeIds.clear();
|
2020-12-02 06:44:06 +01:00
|
|
|
CallProbeIds.clear();
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
|
|
|
|
computeProbeIdForBlocks();
|
2020-12-02 06:44:06 +01:00
|
|
|
computeProbeIdForCallsites();
|
[CSSPGO] Pseudo probe encoding and emission.
This change implements pseudo probe encoding and emission for CSSPGO. Please see RFC here for more context: https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s
Pseudo probes are in the form of intrinsic calls on IR/MIR but they do not turn into any machine instructions. Instead they are emitted into the binary as a piece of data in standalone sections. The probe-specific sections are not needed to be loaded into memory at execution time, thus they do not incur a runtime overhead.
**ELF object emission**
The binary data to emit are organized as two ELF sections, i.e, the `.pseudo_probe_desc` section and the `.pseudo_probe` section. The `.pseudo_probe_desc` section stores a function descriptor for each function and the `.pseudo_probe` section stores the actual probes, each fo which corresponds to an IR basic block or an IR function callsite. A function descriptor is stored as a module-level metadata during the compilation and is serialized into the object file during object emission.
Both the probe descriptors and pseudo probes can be emitted into a separate ELF section per function to leverage the linker for deduplication. A `.pseudo_probe` section shares the same COMDAT group with the function code so that when the function is dead, the probes are dead and disposed too. On the contrary, a `.pseudo_probe_desc` section has its own COMDAT group. This is because even if a function is dead, its probes may be inlined into other functions and its descriptor is still needed by the profile generation tool.
The format of `.pseudo_probe_desc` section looks like:
```
.section .pseudo_probe_desc,"",@progbits
.quad 6309742469962978389 // Func GUID
.quad 4294967295 // Func Hash
.byte 9 // Length of func name
.ascii "_Z5funcAi" // Func name
.quad 7102633082150537521
.quad 138828622701
.byte 12
.ascii "_Z8funcLeafi"
.quad 446061515086924981
.quad 4294967295
.byte 9
.ascii "_Z5funcBi"
.quad -2016976694713209516
.quad 72617220756
.byte 7
.ascii "_Z3fibi"
```
For each `.pseudoprobe` section, the encoded binary data consists of a single function record corresponding to an outlined function (i.e, a function with a code entry in the `.text` section). A function record has the following format :
```
FUNCTION BODY (one for each outlined function present in the text section)
GUID (uint64)
GUID of the function
NPROBES (ULEB128)
Number of probes originating from this function.
NUM_INLINED_FUNCTIONS (ULEB128)
Number of callees inlined into this function, aka number of
first-level inlinees
PROBE RECORDS
A list of NPROBES entries. Each entry contains:
INDEX (ULEB128)
TYPE (uint4)
0 - block probe, 1 - indirect call, 2 - direct call
ATTRIBUTE (uint3)
reserved
ADDRESS_TYPE (uint1)
0 - code address, 1 - address delta
CODE_ADDRESS (uint64 or ULEB128)
code address or address delta, depending on ADDRESS_TYPE
INLINED FUNCTION RECORDS
A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
callees. Each record contains:
INLINE SITE
GUID of the inlinee (uint64)
ID of the callsite probe (ULEB128)
FUNCTION BODY
A FUNCTION BODY entry describing the inlined function.
```
To support building a context-sensitive profile, probes from inlinees are grouped by their inline contexts. An inline context is logically a call path through which a callee function lands in a caller function. The probe emitter builds an inline tree based on the debug metadata for each outlined function in the form of a trie tree. A tree root is the outlined function. Each tree edge stands for a callsite where inlining happens. Pseudo probes originating from an inlinee function are stored in a tree node and the tree path starting from the root all the way down to the tree node is the inline context of the probes. The emission happens on the whole tree top-down recursively. Probes of a tree node will be emitted altogether with their direct parent edge. Since a pseudo probe corresponds to a real code address, for size savings, the address is encoded as a delta from the previous probe except for the first probe. Variant-sized integer encoding, aka LEB128, is used for address delta and probe index.
**Assembling**
Pseudo probes can be printed as assembly directives alternatively. This allows for good assembly code readability and also provides a view of how optimizations and pseudo probes affect each other, especially helpful for diff time assembly analysis.
A pseudo probe directive has the following operands in order: function GUID, probe index, probe type, probe attributes and inline context. The directive is generated by the compiler and can be parsed by the assembler to form an encoded `.pseudoprobe` section in the object file.
A example assembly looks like:
```
foo2: # @foo2
# %bb.0: # %bb0
pushq %rax
testl %edi, %edi
.pseudoprobe 837061429793323041 1 0 0
je .LBB1_1
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 6 2 0
callq foo
.pseudoprobe 837061429793323041 3 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
.LBB1_1: # %bb1
.pseudoprobe 837061429793323041 5 1 0
callq *%rsi
.pseudoprobe 837061429793323041 2 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
# -- End function
.section .pseudo_probe_desc,"",@progbits
.quad 6699318081062747564
.quad 72617220756
.byte 3
.ascii "foo"
.quad 837061429793323041
.quad 281547593931412
.byte 4
.ascii "foo2"
```
With inlining turned on, the assembly may look different around %bb2 with an inlined probe:
```
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 3 0
.pseudoprobe 6699318081062747564 1 0 @ 837061429793323041:6
.pseudoprobe 837061429793323041 4 0
popq %rax
retq
```
**Disassembling**
We have a disassembling tool (llvm-profgen) that can display disassembly alongside with pseudo probes. So far it only supports ELF executable file.
An example disassembly looks like:
```
00000000002011a0 <foo2>:
2011a0: 50 push rax
2011a1: 85 ff test edi,edi
[Probe]: FUNC: foo2 Index: 1 Type: Block
2011a3: 74 02 je 2011a7 <foo2+0x7>
[Probe]: FUNC: foo2 Index: 3 Type: Block
[Probe]: FUNC: foo2 Index: 4 Type: Block
[Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ foo2:6
2011a5: 58 pop rax
2011a6: c3 ret
[Probe]: FUNC: foo2 Index: 2 Type: Block
2011a7: bf 01 00 00 00 mov edi,0x1
[Probe]: FUNC: foo2 Index: 5 Type: IndirectCall
2011ac: ff d6 call rsi
[Probe]: FUNC: foo2 Index: 4 Type: Block
2011ae: 58 pop rax
2011af: c3 ret
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D91878
2020-12-09 00:37:32 +01:00
|
|
|
computeCFGHash();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
|
|
|
|
// value of each BB in the CFG. The higher 32 bits record the number of edges
|
|
|
|
// preceded by the number of indirect calls.
|
|
|
|
// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
|
|
|
|
void SampleProfileProber::computeCFGHash() {
|
|
|
|
std::vector<uint8_t> Indexes;
|
|
|
|
JamCRC JC;
|
|
|
|
for (auto &BB : *F) {
|
|
|
|
auto *TI = BB.getTerminator();
|
|
|
|
for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
|
|
|
|
auto *Succ = TI->getSuccessor(I);
|
|
|
|
auto Index = getBlockId(Succ);
|
|
|
|
for (int J = 0; J < 4; J++)
|
|
|
|
Indexes.push_back((uint8_t)(Index >> (J * 8)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
JC.update(Indexes);
|
|
|
|
|
|
|
|
FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
|
|
|
|
(uint64_t)Indexes.size() << 32 | JC.getCRC();
|
|
|
|
// Reserve bit 60-63 for other information purpose.
|
|
|
|
FunctionHash &= 0x0FFFFFFFFFFFFFFF;
|
|
|
|
assert(FunctionHash && "Function checksum should not be zero");
|
|
|
|
LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
|
|
|
|
<< ":\n"
|
|
|
|
<< " CRC = " << JC.getCRC() << ", Edges = "
|
|
|
|
<< Indexes.size() << ", ICSites = " << CallProbeIds.size()
|
|
|
|
<< ", Hash = " << FunctionHash << "\n");
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void SampleProfileProber::computeProbeIdForBlocks() {
|
|
|
|
for (auto &BB : *F) {
|
|
|
|
BlockProbeIds[&BB] = ++LastProbeId;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-02 06:44:06 +01:00
|
|
|
void SampleProfileProber::computeProbeIdForCallsites() {
|
|
|
|
for (auto &BB : *F) {
|
|
|
|
for (auto &I : BB) {
|
|
|
|
if (!isa<CallBase>(I))
|
|
|
|
continue;
|
|
|
|
if (isa<IntrinsicInst>(&I))
|
|
|
|
continue;
|
|
|
|
CallProbeIds[&I] = ++LastProbeId;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
|
|
|
|
auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
|
|
|
|
return I == BlockProbeIds.end() ? 0 : I->second;
|
|
|
|
}
|
|
|
|
|
2020-12-02 06:44:06 +01:00
|
|
|
uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
|
|
|
|
auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call));
|
|
|
|
return Iter == CallProbeIds.end() ? 0 : Iter->second;
|
|
|
|
}
|
|
|
|
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
|
|
|
|
Module *M = F.getParent();
|
|
|
|
MDBuilder MDB(F.getContext());
|
|
|
|
// Compute a GUID without considering the function's linkage type. This is
|
|
|
|
// fine since function name is the only key in the profile database.
|
|
|
|
uint64_t Guid = Function::getGUID(F.getName());
|
|
|
|
|
2020-12-02 06:44:06 +01:00
|
|
|
// Assign an artificial debug line to a probe that doesn't come with a real
|
|
|
|
// line. A probe not having a debug line will get an incomplete inline
|
|
|
|
// context. This will cause samples collected on the probe to be counted
|
|
|
|
// into the base profile instead of a context profile. The line number
|
|
|
|
// itself is not important though.
|
|
|
|
auto AssignDebugLoc = [&](Instruction *I) {
|
|
|
|
assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) &&
|
|
|
|
"Expecting pseudo probe or call instructions");
|
|
|
|
if (!I->getDebugLoc()) {
|
|
|
|
if (auto *SP = F.getSubprogram()) {
|
2020-12-11 21:45:22 +01:00
|
|
|
auto DIL = DILocation::get(SP->getContext(), 0, 0, SP);
|
2020-12-02 06:44:06 +01:00
|
|
|
I->setDebugLoc(DIL);
|
|
|
|
ArtificialDbgLine++;
|
|
|
|
LLVM_DEBUG({
|
|
|
|
dbgs() << "\nIn Function " << F.getName()
|
|
|
|
<< " Probe gets an artificial debug line\n";
|
|
|
|
I->dump();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
// Probe basic blocks.
|
|
|
|
for (auto &I : BlockProbeIds) {
|
|
|
|
BasicBlock *BB = I.first;
|
|
|
|
uint32_t Index = I.second;
|
|
|
|
// Insert a probe before an instruction with a valid debug line number which
|
|
|
|
// will be assigned to the probe. The line number will be used later to
|
|
|
|
// model the inline context when the probe is inlined into other functions.
|
|
|
|
// Debug instructions, phi nodes and lifetime markers do not have an valid
|
|
|
|
// line number. Real instructions generated by optimizations may not come
|
|
|
|
// with a line number either.
|
|
|
|
auto HasValidDbgLine = [](Instruction *J) {
|
|
|
|
return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
|
|
|
|
!J->isLifetimeStartOrEnd() && J->getDebugLoc();
|
|
|
|
};
|
|
|
|
|
|
|
|
Instruction *J = &*BB->getFirstInsertionPt();
|
|
|
|
while (J != BB->getTerminator() && !HasValidDbgLine(J)) {
|
|
|
|
J = J->getNextNode();
|
|
|
|
}
|
|
|
|
|
|
|
|
IRBuilder<> Builder(J);
|
|
|
|
assert(Builder.GetInsertPoint() != BB->end() &&
|
|
|
|
"Cannot get the probing point");
|
|
|
|
Function *ProbeFn =
|
|
|
|
llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
|
|
|
|
Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
|
2020-12-11 21:18:31 +01:00
|
|
|
Builder.getInt32(0),
|
|
|
|
Builder.getInt64(PseudoProbeFullDistributionFactor)};
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
auto *Probe = Builder.CreateCall(ProbeFn, Args);
|
2020-12-02 06:44:06 +01:00
|
|
|
AssignDebugLoc(Probe);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Probe both direct calls and indirect calls. Direct calls are probed so that
|
|
|
|
// their probe ID can be used as an call site identifier to represent a
|
|
|
|
// calling context.
|
|
|
|
for (auto &I : CallProbeIds) {
|
|
|
|
auto *Call = I.first;
|
|
|
|
uint32_t Index = I.second;
|
|
|
|
uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
|
|
|
|
? (uint32_t)PseudoProbeType::DirectCall
|
|
|
|
: (uint32_t)PseudoProbeType::IndirectCall;
|
|
|
|
AssignDebugLoc(Call);
|
|
|
|
// Levarge the 32-bit discriminator field of debug data to store the ID and
|
|
|
|
// type of a callsite probe. This gets rid of the dependency on plumbing a
|
|
|
|
// customized metadata through the codegen pipeline.
|
2020-12-11 21:18:31 +01:00
|
|
|
uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
|
|
|
|
Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
|
2020-12-02 06:44:06 +01:00
|
|
|
if (auto DIL = Call->getDebugLoc()) {
|
|
|
|
DIL = DIL->cloneWithDiscriminator(V);
|
|
|
|
Call->setDebugLoc(DIL);
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
}
|
|
|
|
}
|
[CSSPGO] Pseudo probe encoding and emission.
This change implements pseudo probe encoding and emission for CSSPGO. Please see RFC here for more context: https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s
Pseudo probes are in the form of intrinsic calls on IR/MIR but they do not turn into any machine instructions. Instead they are emitted into the binary as a piece of data in standalone sections. The probe-specific sections are not needed to be loaded into memory at execution time, thus they do not incur a runtime overhead.
**ELF object emission**
The binary data to emit are organized as two ELF sections, i.e, the `.pseudo_probe_desc` section and the `.pseudo_probe` section. The `.pseudo_probe_desc` section stores a function descriptor for each function and the `.pseudo_probe` section stores the actual probes, each fo which corresponds to an IR basic block or an IR function callsite. A function descriptor is stored as a module-level metadata during the compilation and is serialized into the object file during object emission.
Both the probe descriptors and pseudo probes can be emitted into a separate ELF section per function to leverage the linker for deduplication. A `.pseudo_probe` section shares the same COMDAT group with the function code so that when the function is dead, the probes are dead and disposed too. On the contrary, a `.pseudo_probe_desc` section has its own COMDAT group. This is because even if a function is dead, its probes may be inlined into other functions and its descriptor is still needed by the profile generation tool.
The format of `.pseudo_probe_desc` section looks like:
```
.section .pseudo_probe_desc,"",@progbits
.quad 6309742469962978389 // Func GUID
.quad 4294967295 // Func Hash
.byte 9 // Length of func name
.ascii "_Z5funcAi" // Func name
.quad 7102633082150537521
.quad 138828622701
.byte 12
.ascii "_Z8funcLeafi"
.quad 446061515086924981
.quad 4294967295
.byte 9
.ascii "_Z5funcBi"
.quad -2016976694713209516
.quad 72617220756
.byte 7
.ascii "_Z3fibi"
```
For each `.pseudoprobe` section, the encoded binary data consists of a single function record corresponding to an outlined function (i.e, a function with a code entry in the `.text` section). A function record has the following format :
```
FUNCTION BODY (one for each outlined function present in the text section)
GUID (uint64)
GUID of the function
NPROBES (ULEB128)
Number of probes originating from this function.
NUM_INLINED_FUNCTIONS (ULEB128)
Number of callees inlined into this function, aka number of
first-level inlinees
PROBE RECORDS
A list of NPROBES entries. Each entry contains:
INDEX (ULEB128)
TYPE (uint4)
0 - block probe, 1 - indirect call, 2 - direct call
ATTRIBUTE (uint3)
reserved
ADDRESS_TYPE (uint1)
0 - code address, 1 - address delta
CODE_ADDRESS (uint64 or ULEB128)
code address or address delta, depending on ADDRESS_TYPE
INLINED FUNCTION RECORDS
A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
callees. Each record contains:
INLINE SITE
GUID of the inlinee (uint64)
ID of the callsite probe (ULEB128)
FUNCTION BODY
A FUNCTION BODY entry describing the inlined function.
```
To support building a context-sensitive profile, probes from inlinees are grouped by their inline contexts. An inline context is logically a call path through which a callee function lands in a caller function. The probe emitter builds an inline tree based on the debug metadata for each outlined function in the form of a trie tree. A tree root is the outlined function. Each tree edge stands for a callsite where inlining happens. Pseudo probes originating from an inlinee function are stored in a tree node and the tree path starting from the root all the way down to the tree node is the inline context of the probes. The emission happens on the whole tree top-down recursively. Probes of a tree node will be emitted altogether with their direct parent edge. Since a pseudo probe corresponds to a real code address, for size savings, the address is encoded as a delta from the previous probe except for the first probe. Variant-sized integer encoding, aka LEB128, is used for address delta and probe index.
**Assembling**
Pseudo probes can be printed as assembly directives alternatively. This allows for good assembly code readability and also provides a view of how optimizations and pseudo probes affect each other, especially helpful for diff time assembly analysis.
A pseudo probe directive has the following operands in order: function GUID, probe index, probe type, probe attributes and inline context. The directive is generated by the compiler and can be parsed by the assembler to form an encoded `.pseudoprobe` section in the object file.
A example assembly looks like:
```
foo2: # @foo2
# %bb.0: # %bb0
pushq %rax
testl %edi, %edi
.pseudoprobe 837061429793323041 1 0 0
je .LBB1_1
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 6 2 0
callq foo
.pseudoprobe 837061429793323041 3 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
.LBB1_1: # %bb1
.pseudoprobe 837061429793323041 5 1 0
callq *%rsi
.pseudoprobe 837061429793323041 2 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
# -- End function
.section .pseudo_probe_desc,"",@progbits
.quad 6699318081062747564
.quad 72617220756
.byte 3
.ascii "foo"
.quad 837061429793323041
.quad 281547593931412
.byte 4
.ascii "foo2"
```
With inlining turned on, the assembly may look different around %bb2 with an inlined probe:
```
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 3 0
.pseudoprobe 6699318081062747564 1 0 @ 837061429793323041:6
.pseudoprobe 837061429793323041 4 0
popq %rax
retq
```
**Disassembling**
We have a disassembling tool (llvm-profgen) that can display disassembly alongside with pseudo probes. So far it only supports ELF executable file.
An example disassembly looks like:
```
00000000002011a0 <foo2>:
2011a0: 50 push rax
2011a1: 85 ff test edi,edi
[Probe]: FUNC: foo2 Index: 1 Type: Block
2011a3: 74 02 je 2011a7 <foo2+0x7>
[Probe]: FUNC: foo2 Index: 3 Type: Block
[Probe]: FUNC: foo2 Index: 4 Type: Block
[Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ foo2:6
2011a5: 58 pop rax
2011a6: c3 ret
[Probe]: FUNC: foo2 Index: 2 Type: Block
2011a7: bf 01 00 00 00 mov edi,0x1
[Probe]: FUNC: foo2 Index: 5 Type: IndirectCall
2011ac: ff d6 call rsi
[Probe]: FUNC: foo2 Index: 4 Type: Block
2011ae: 58 pop rax
2011af: c3 ret
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D91878
2020-12-09 00:37:32 +01:00
|
|
|
|
|
|
|
// Create module-level metadata that contains function info necessary to
|
|
|
|
// synthesize probe-based sample counts, which are
|
|
|
|
// - FunctionGUID
|
|
|
|
// - FunctionHash.
|
|
|
|
// - FunctionName
|
|
|
|
auto Hash = getFunctionHash();
|
|
|
|
auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
|
|
|
|
auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
|
|
|
|
assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
|
|
|
|
NMD->addOperand(MD);
|
|
|
|
|
|
|
|
// Preserve a comdat group to hold all probes materialized later. This
|
|
|
|
// allows that when the function is considered dead and removed, the
|
|
|
|
// materialized probes are disposed too.
|
|
|
|
// Imported functions are defined in another module. They do not need
|
|
|
|
// the following handling since same care will be taken for them in their
|
|
|
|
// original module. The pseudo probes inserted into an imported functions
|
|
|
|
// above will naturally not be emitted since the imported function is free
|
|
|
|
// from object emission. However they will be emitted together with the
|
|
|
|
// inliner functions that the imported function is inlined into. We are not
|
|
|
|
// creating a comdat group for an import function since it's useless anyway.
|
|
|
|
if (!F.isDeclarationForLinker()) {
|
|
|
|
if (TM) {
|
|
|
|
auto Triple = TM->getTargetTriple();
|
[SanitizerCoverage] Drop !associated on metadata sections
In SanitizerCoverage, the metadata sections (`__sancov_guards`,
`__sancov_cntrs`, `__sancov_bools`) are referenced by functions. After
inlining, such a `__sancov_*` section can be referenced by more than one
functions, but its sh_link still refers to the original function's section.
(Note: a SHF_LINK_ORDER section referenced by a section other than its linked-to
section violates the invariant.)
If the original function's section is discarded (e.g. LTO internalization +
`ld.lld --gc-sections`), ld.lld may report a `sh_link points to discarded section` error.
This above reasoning means that `!associated` is not appropriate to be called by
an inlinable function. Non-interposable functions are inline candidates, so we
have to drop `!associated`. A `__sancov_pcs` is not referenced by other sections
but is expected to parallel a metadata section, so we have to make sure the two
sections are retained or discarded at the same time. A section group does the
trick. (Note: we have a module ctor, so `getUniqueModuleId` guarantees to
return a non-empty string, and `GetOrCreateFunctionComdat` guarantees to return
non-null.)
For interposable functions, we could keep using `!associated`, but
LTO can change the linkage to `internal` and allow such functions to be inlinable,
so we have to drop `!associated`, too. To not interfere with section
group resolution, we need to use the `noduplicates` variant (section group flag 0).
(This allows us to get rid of the ModuleID parameter.)
In -fno-pie and -fpie code (mostly dso_local), instrumented interposable
functions have WeakAny/LinkOnceAny linkages, which are rare. So the
section group header overload should be low.
This patch does not change the object file output for COFF (where `!associated` is ignored).
Reviewed By: morehouse, rnk, vitalybuka
Differential Revision: https://reviews.llvm.org/D97430
2021-02-25 20:59:23 +01:00
|
|
|
if (Triple.supportsCOMDAT() && TM->getFunctionSections())
|
|
|
|
getOrCreateFunctionComdat(F, Triple);
|
[CSSPGO] Pseudo probe encoding and emission.
This change implements pseudo probe encoding and emission for CSSPGO. Please see RFC here for more context: https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s
Pseudo probes are in the form of intrinsic calls on IR/MIR but they do not turn into any machine instructions. Instead they are emitted into the binary as a piece of data in standalone sections. The probe-specific sections are not needed to be loaded into memory at execution time, thus they do not incur a runtime overhead.
**ELF object emission**
The binary data to emit are organized as two ELF sections, i.e, the `.pseudo_probe_desc` section and the `.pseudo_probe` section. The `.pseudo_probe_desc` section stores a function descriptor for each function and the `.pseudo_probe` section stores the actual probes, each fo which corresponds to an IR basic block or an IR function callsite. A function descriptor is stored as a module-level metadata during the compilation and is serialized into the object file during object emission.
Both the probe descriptors and pseudo probes can be emitted into a separate ELF section per function to leverage the linker for deduplication. A `.pseudo_probe` section shares the same COMDAT group with the function code so that when the function is dead, the probes are dead and disposed too. On the contrary, a `.pseudo_probe_desc` section has its own COMDAT group. This is because even if a function is dead, its probes may be inlined into other functions and its descriptor is still needed by the profile generation tool.
The format of `.pseudo_probe_desc` section looks like:
```
.section .pseudo_probe_desc,"",@progbits
.quad 6309742469962978389 // Func GUID
.quad 4294967295 // Func Hash
.byte 9 // Length of func name
.ascii "_Z5funcAi" // Func name
.quad 7102633082150537521
.quad 138828622701
.byte 12
.ascii "_Z8funcLeafi"
.quad 446061515086924981
.quad 4294967295
.byte 9
.ascii "_Z5funcBi"
.quad -2016976694713209516
.quad 72617220756
.byte 7
.ascii "_Z3fibi"
```
For each `.pseudoprobe` section, the encoded binary data consists of a single function record corresponding to an outlined function (i.e, a function with a code entry in the `.text` section). A function record has the following format :
```
FUNCTION BODY (one for each outlined function present in the text section)
GUID (uint64)
GUID of the function
NPROBES (ULEB128)
Number of probes originating from this function.
NUM_INLINED_FUNCTIONS (ULEB128)
Number of callees inlined into this function, aka number of
first-level inlinees
PROBE RECORDS
A list of NPROBES entries. Each entry contains:
INDEX (ULEB128)
TYPE (uint4)
0 - block probe, 1 - indirect call, 2 - direct call
ATTRIBUTE (uint3)
reserved
ADDRESS_TYPE (uint1)
0 - code address, 1 - address delta
CODE_ADDRESS (uint64 or ULEB128)
code address or address delta, depending on ADDRESS_TYPE
INLINED FUNCTION RECORDS
A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
callees. Each record contains:
INLINE SITE
GUID of the inlinee (uint64)
ID of the callsite probe (ULEB128)
FUNCTION BODY
A FUNCTION BODY entry describing the inlined function.
```
To support building a context-sensitive profile, probes from inlinees are grouped by their inline contexts. An inline context is logically a call path through which a callee function lands in a caller function. The probe emitter builds an inline tree based on the debug metadata for each outlined function in the form of a trie tree. A tree root is the outlined function. Each tree edge stands for a callsite where inlining happens. Pseudo probes originating from an inlinee function are stored in a tree node and the tree path starting from the root all the way down to the tree node is the inline context of the probes. The emission happens on the whole tree top-down recursively. Probes of a tree node will be emitted altogether with their direct parent edge. Since a pseudo probe corresponds to a real code address, for size savings, the address is encoded as a delta from the previous probe except for the first probe. Variant-sized integer encoding, aka LEB128, is used for address delta and probe index.
**Assembling**
Pseudo probes can be printed as assembly directives alternatively. This allows for good assembly code readability and also provides a view of how optimizations and pseudo probes affect each other, especially helpful for diff time assembly analysis.
A pseudo probe directive has the following operands in order: function GUID, probe index, probe type, probe attributes and inline context. The directive is generated by the compiler and can be parsed by the assembler to form an encoded `.pseudoprobe` section in the object file.
A example assembly looks like:
```
foo2: # @foo2
# %bb.0: # %bb0
pushq %rax
testl %edi, %edi
.pseudoprobe 837061429793323041 1 0 0
je .LBB1_1
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 6 2 0
callq foo
.pseudoprobe 837061429793323041 3 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
.LBB1_1: # %bb1
.pseudoprobe 837061429793323041 5 1 0
callq *%rsi
.pseudoprobe 837061429793323041 2 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
# -- End function
.section .pseudo_probe_desc,"",@progbits
.quad 6699318081062747564
.quad 72617220756
.byte 3
.ascii "foo"
.quad 837061429793323041
.quad 281547593931412
.byte 4
.ascii "foo2"
```
With inlining turned on, the assembly may look different around %bb2 with an inlined probe:
```
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 3 0
.pseudoprobe 6699318081062747564 1 0 @ 837061429793323041:6
.pseudoprobe 837061429793323041 4 0
popq %rax
retq
```
**Disassembling**
We have a disassembling tool (llvm-profgen) that can display disassembly alongside with pseudo probes. So far it only supports ELF executable file.
An example disassembly looks like:
```
00000000002011a0 <foo2>:
2011a0: 50 push rax
2011a1: 85 ff test edi,edi
[Probe]: FUNC: foo2 Index: 1 Type: Block
2011a3: 74 02 je 2011a7 <foo2+0x7>
[Probe]: FUNC: foo2 Index: 3 Type: Block
[Probe]: FUNC: foo2 Index: 4 Type: Block
[Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ foo2:6
2011a5: 58 pop rax
2011a6: c3 ret
[Probe]: FUNC: foo2 Index: 2 Type: Block
2011a7: bf 01 00 00 00 mov edi,0x1
[Probe]: FUNC: foo2 Index: 5 Type: IndirectCall
2011ac: ff d6 call rsi
[Probe]: FUNC: foo2 Index: 4 Type: Block
2011ae: 58 pop rax
2011af: c3 ret
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D91878
2020-12-09 00:37:32 +01:00
|
|
|
}
|
|
|
|
}
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
PreservedAnalyses SampleProfileProbePass::run(Module &M,
|
|
|
|
ModuleAnalysisManager &AM) {
|
[CSSPGO] Pseudo probe encoding and emission.
This change implements pseudo probe encoding and emission for CSSPGO. Please see RFC here for more context: https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s
Pseudo probes are in the form of intrinsic calls on IR/MIR but they do not turn into any machine instructions. Instead they are emitted into the binary as a piece of data in standalone sections. The probe-specific sections are not needed to be loaded into memory at execution time, thus they do not incur a runtime overhead.
**ELF object emission**
The binary data to emit are organized as two ELF sections, i.e, the `.pseudo_probe_desc` section and the `.pseudo_probe` section. The `.pseudo_probe_desc` section stores a function descriptor for each function and the `.pseudo_probe` section stores the actual probes, each fo which corresponds to an IR basic block or an IR function callsite. A function descriptor is stored as a module-level metadata during the compilation and is serialized into the object file during object emission.
Both the probe descriptors and pseudo probes can be emitted into a separate ELF section per function to leverage the linker for deduplication. A `.pseudo_probe` section shares the same COMDAT group with the function code so that when the function is dead, the probes are dead and disposed too. On the contrary, a `.pseudo_probe_desc` section has its own COMDAT group. This is because even if a function is dead, its probes may be inlined into other functions and its descriptor is still needed by the profile generation tool.
The format of `.pseudo_probe_desc` section looks like:
```
.section .pseudo_probe_desc,"",@progbits
.quad 6309742469962978389 // Func GUID
.quad 4294967295 // Func Hash
.byte 9 // Length of func name
.ascii "_Z5funcAi" // Func name
.quad 7102633082150537521
.quad 138828622701
.byte 12
.ascii "_Z8funcLeafi"
.quad 446061515086924981
.quad 4294967295
.byte 9
.ascii "_Z5funcBi"
.quad -2016976694713209516
.quad 72617220756
.byte 7
.ascii "_Z3fibi"
```
For each `.pseudoprobe` section, the encoded binary data consists of a single function record corresponding to an outlined function (i.e, a function with a code entry in the `.text` section). A function record has the following format :
```
FUNCTION BODY (one for each outlined function present in the text section)
GUID (uint64)
GUID of the function
NPROBES (ULEB128)
Number of probes originating from this function.
NUM_INLINED_FUNCTIONS (ULEB128)
Number of callees inlined into this function, aka number of
first-level inlinees
PROBE RECORDS
A list of NPROBES entries. Each entry contains:
INDEX (ULEB128)
TYPE (uint4)
0 - block probe, 1 - indirect call, 2 - direct call
ATTRIBUTE (uint3)
reserved
ADDRESS_TYPE (uint1)
0 - code address, 1 - address delta
CODE_ADDRESS (uint64 or ULEB128)
code address or address delta, depending on ADDRESS_TYPE
INLINED FUNCTION RECORDS
A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
callees. Each record contains:
INLINE SITE
GUID of the inlinee (uint64)
ID of the callsite probe (ULEB128)
FUNCTION BODY
A FUNCTION BODY entry describing the inlined function.
```
To support building a context-sensitive profile, probes from inlinees are grouped by their inline contexts. An inline context is logically a call path through which a callee function lands in a caller function. The probe emitter builds an inline tree based on the debug metadata for each outlined function in the form of a trie tree. A tree root is the outlined function. Each tree edge stands for a callsite where inlining happens. Pseudo probes originating from an inlinee function are stored in a tree node and the tree path starting from the root all the way down to the tree node is the inline context of the probes. The emission happens on the whole tree top-down recursively. Probes of a tree node will be emitted altogether with their direct parent edge. Since a pseudo probe corresponds to a real code address, for size savings, the address is encoded as a delta from the previous probe except for the first probe. Variant-sized integer encoding, aka LEB128, is used for address delta and probe index.
**Assembling**
Pseudo probes can be printed as assembly directives alternatively. This allows for good assembly code readability and also provides a view of how optimizations and pseudo probes affect each other, especially helpful for diff time assembly analysis.
A pseudo probe directive has the following operands in order: function GUID, probe index, probe type, probe attributes and inline context. The directive is generated by the compiler and can be parsed by the assembler to form an encoded `.pseudoprobe` section in the object file.
A example assembly looks like:
```
foo2: # @foo2
# %bb.0: # %bb0
pushq %rax
testl %edi, %edi
.pseudoprobe 837061429793323041 1 0 0
je .LBB1_1
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 6 2 0
callq foo
.pseudoprobe 837061429793323041 3 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
.LBB1_1: # %bb1
.pseudoprobe 837061429793323041 5 1 0
callq *%rsi
.pseudoprobe 837061429793323041 2 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
# -- End function
.section .pseudo_probe_desc,"",@progbits
.quad 6699318081062747564
.quad 72617220756
.byte 3
.ascii "foo"
.quad 837061429793323041
.quad 281547593931412
.byte 4
.ascii "foo2"
```
With inlining turned on, the assembly may look different around %bb2 with an inlined probe:
```
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 3 0
.pseudoprobe 6699318081062747564 1 0 @ 837061429793323041:6
.pseudoprobe 837061429793323041 4 0
popq %rax
retq
```
**Disassembling**
We have a disassembling tool (llvm-profgen) that can display disassembly alongside with pseudo probes. So far it only supports ELF executable file.
An example disassembly looks like:
```
00000000002011a0 <foo2>:
2011a0: 50 push rax
2011a1: 85 ff test edi,edi
[Probe]: FUNC: foo2 Index: 1 Type: Block
2011a3: 74 02 je 2011a7 <foo2+0x7>
[Probe]: FUNC: foo2 Index: 3 Type: Block
[Probe]: FUNC: foo2 Index: 4 Type: Block
[Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ foo2:6
2011a5: 58 pop rax
2011a6: c3 ret
[Probe]: FUNC: foo2 Index: 2 Type: Block
2011a7: bf 01 00 00 00 mov edi,0x1
[Probe]: FUNC: foo2 Index: 5 Type: IndirectCall
2011ac: ff d6 call rsi
[Probe]: FUNC: foo2 Index: 4 Type: Block
2011ae: 58 pop rax
2011af: c3 ret
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D91878
2020-12-09 00:37:32 +01:00
|
|
|
auto ModuleId = getUniqueModuleId(&M);
|
|
|
|
// Create the pseudo probe desc metadata beforehand.
|
|
|
|
// Note that modules with only data but no functions will require this to
|
|
|
|
// be set up so that they will be known as probed later.
|
|
|
|
M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
|
|
|
|
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
for (auto &F : M) {
|
|
|
|
if (F.isDeclaration())
|
|
|
|
continue;
|
[CSSPGO] Pseudo probe encoding and emission.
This change implements pseudo probe encoding and emission for CSSPGO. Please see RFC here for more context: https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s
Pseudo probes are in the form of intrinsic calls on IR/MIR but they do not turn into any machine instructions. Instead they are emitted into the binary as a piece of data in standalone sections. The probe-specific sections are not needed to be loaded into memory at execution time, thus they do not incur a runtime overhead.
**ELF object emission**
The binary data to emit are organized as two ELF sections, i.e, the `.pseudo_probe_desc` section and the `.pseudo_probe` section. The `.pseudo_probe_desc` section stores a function descriptor for each function and the `.pseudo_probe` section stores the actual probes, each fo which corresponds to an IR basic block or an IR function callsite. A function descriptor is stored as a module-level metadata during the compilation and is serialized into the object file during object emission.
Both the probe descriptors and pseudo probes can be emitted into a separate ELF section per function to leverage the linker for deduplication. A `.pseudo_probe` section shares the same COMDAT group with the function code so that when the function is dead, the probes are dead and disposed too. On the contrary, a `.pseudo_probe_desc` section has its own COMDAT group. This is because even if a function is dead, its probes may be inlined into other functions and its descriptor is still needed by the profile generation tool.
The format of `.pseudo_probe_desc` section looks like:
```
.section .pseudo_probe_desc,"",@progbits
.quad 6309742469962978389 // Func GUID
.quad 4294967295 // Func Hash
.byte 9 // Length of func name
.ascii "_Z5funcAi" // Func name
.quad 7102633082150537521
.quad 138828622701
.byte 12
.ascii "_Z8funcLeafi"
.quad 446061515086924981
.quad 4294967295
.byte 9
.ascii "_Z5funcBi"
.quad -2016976694713209516
.quad 72617220756
.byte 7
.ascii "_Z3fibi"
```
For each `.pseudoprobe` section, the encoded binary data consists of a single function record corresponding to an outlined function (i.e, a function with a code entry in the `.text` section). A function record has the following format :
```
FUNCTION BODY (one for each outlined function present in the text section)
GUID (uint64)
GUID of the function
NPROBES (ULEB128)
Number of probes originating from this function.
NUM_INLINED_FUNCTIONS (ULEB128)
Number of callees inlined into this function, aka number of
first-level inlinees
PROBE RECORDS
A list of NPROBES entries. Each entry contains:
INDEX (ULEB128)
TYPE (uint4)
0 - block probe, 1 - indirect call, 2 - direct call
ATTRIBUTE (uint3)
reserved
ADDRESS_TYPE (uint1)
0 - code address, 1 - address delta
CODE_ADDRESS (uint64 or ULEB128)
code address or address delta, depending on ADDRESS_TYPE
INLINED FUNCTION RECORDS
A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
callees. Each record contains:
INLINE SITE
GUID of the inlinee (uint64)
ID of the callsite probe (ULEB128)
FUNCTION BODY
A FUNCTION BODY entry describing the inlined function.
```
To support building a context-sensitive profile, probes from inlinees are grouped by their inline contexts. An inline context is logically a call path through which a callee function lands in a caller function. The probe emitter builds an inline tree based on the debug metadata for each outlined function in the form of a trie tree. A tree root is the outlined function. Each tree edge stands for a callsite where inlining happens. Pseudo probes originating from an inlinee function are stored in a tree node and the tree path starting from the root all the way down to the tree node is the inline context of the probes. The emission happens on the whole tree top-down recursively. Probes of a tree node will be emitted altogether with their direct parent edge. Since a pseudo probe corresponds to a real code address, for size savings, the address is encoded as a delta from the previous probe except for the first probe. Variant-sized integer encoding, aka LEB128, is used for address delta and probe index.
**Assembling**
Pseudo probes can be printed as assembly directives alternatively. This allows for good assembly code readability and also provides a view of how optimizations and pseudo probes affect each other, especially helpful for diff time assembly analysis.
A pseudo probe directive has the following operands in order: function GUID, probe index, probe type, probe attributes and inline context. The directive is generated by the compiler and can be parsed by the assembler to form an encoded `.pseudoprobe` section in the object file.
A example assembly looks like:
```
foo2: # @foo2
# %bb.0: # %bb0
pushq %rax
testl %edi, %edi
.pseudoprobe 837061429793323041 1 0 0
je .LBB1_1
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 6 2 0
callq foo
.pseudoprobe 837061429793323041 3 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
.LBB1_1: # %bb1
.pseudoprobe 837061429793323041 5 1 0
callq *%rsi
.pseudoprobe 837061429793323041 2 0 0
.pseudoprobe 837061429793323041 4 0 0
popq %rax
retq
# -- End function
.section .pseudo_probe_desc,"",@progbits
.quad 6699318081062747564
.quad 72617220756
.byte 3
.ascii "foo"
.quad 837061429793323041
.quad 281547593931412
.byte 4
.ascii "foo2"
```
With inlining turned on, the assembly may look different around %bb2 with an inlined probe:
```
# %bb.2: # %bb2
.pseudoprobe 837061429793323041 3 0
.pseudoprobe 6699318081062747564 1 0 @ 837061429793323041:6
.pseudoprobe 837061429793323041 4 0
popq %rax
retq
```
**Disassembling**
We have a disassembling tool (llvm-profgen) that can display disassembly alongside with pseudo probes. So far it only supports ELF executable file.
An example disassembly looks like:
```
00000000002011a0 <foo2>:
2011a0: 50 push rax
2011a1: 85 ff test edi,edi
[Probe]: FUNC: foo2 Index: 1 Type: Block
2011a3: 74 02 je 2011a7 <foo2+0x7>
[Probe]: FUNC: foo2 Index: 3 Type: Block
[Probe]: FUNC: foo2 Index: 4 Type: Block
[Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ foo2:6
2011a5: 58 pop rax
2011a6: c3 ret
[Probe]: FUNC: foo2 Index: 2 Type: Block
2011a7: bf 01 00 00 00 mov edi,0x1
[Probe]: FUNC: foo2 Index: 5 Type: IndirectCall
2011ac: ff d6 call rsi
[Probe]: FUNC: foo2 Index: 4 Type: Block
2011ae: 58 pop rax
2011af: c3 ret
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D91878
2020-12-09 00:37:32 +01:00
|
|
|
SampleProfileProber ProbeManager(F, ModuleId);
|
[CSSPGO] Pseudo probe instrumentation pass
This change introduces a pseudo probe instrumentation pass for block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
Given the following LLVM IR:
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %bb1, label %bb2
bb1:
br label %bb3
bb2:
br label %bb3
bb3:
ret void
}
```
The instrumented IR will look like below. Note that each llvm.pseudoprobe intrinsic call represents a pseudo probe at a block, of which the first parameter is the GUID of the probe’s owner function and the second parameter is the probe’s ID.
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86499
2020-08-25 02:52:47 +02:00
|
|
|
ProbeManager.instrumentOneFunc(F, TM);
|
|
|
|
}
|
|
|
|
|
|
|
|
return PreservedAnalyses::none();
|
|
|
|
}
|
2020-12-11 21:18:31 +01:00
|
|
|
|
|
|
|
void PseudoProbeUpdatePass::runOnFunction(Function &F,
|
|
|
|
FunctionAnalysisManager &FAM) {
|
|
|
|
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
|
|
|
|
auto BBProfileCount = [&BFI](BasicBlock *BB) {
|
|
|
|
return BFI.getBlockProfileCount(BB)
|
|
|
|
? BFI.getBlockProfileCount(BB).getValue()
|
|
|
|
: 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Collect the sum of execution weight for each probe.
|
|
|
|
ProbeFactorMap ProbeFactors;
|
|
|
|
for (auto &Block : F) {
|
|
|
|
for (auto &I : Block) {
|
2021-02-25 09:52:58 +01:00
|
|
|
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
|
2021-06-17 20:09:13 +02:00
|
|
|
uint64_t Hash = computeCallStackHash(I);
|
|
|
|
ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
|
2021-02-25 09:52:58 +01:00
|
|
|
}
|
2020-12-11 21:18:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fix up over-counted probes.
|
|
|
|
for (auto &Block : F) {
|
|
|
|
for (auto &I : Block) {
|
|
|
|
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
|
2021-06-17 20:09:13 +02:00
|
|
|
uint64_t Hash = computeCallStackHash(I);
|
|
|
|
float Sum = ProbeFactors[{Probe->Id, Hash}];
|
|
|
|
if (Sum != 0)
|
|
|
|
setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
|
2020-12-11 21:18:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
|
|
|
|
ModuleAnalysisManager &AM) {
|
|
|
|
if (UpdatePseudoProbe) {
|
|
|
|
for (auto &F : M) {
|
|
|
|
if (F.isDeclaration())
|
|
|
|
continue;
|
|
|
|
FunctionAnalysisManager &FAM =
|
|
|
|
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
|
|
|
runOnFunction(F, FAM);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return PreservedAnalyses::none();
|
|
|
|
}
|