mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
fd7e7fce85
Use profiled call edges to augment the top-down order. There are cases that the top-down order computed based on the static call graph doesn't reflect real execution order. For example: 1. Incomplete static call graph due to unknown indirect call targets. Adjusting the order by considering indirect call edges from the profile can enable the inlining of indirect call targets by allowing the caller processed before them. 2. Mutual call edges in an SCC. The static processing order computed for an SCC may not reflect the call contexts in the context-sensitive profile, thus may cause potential inlining to be overlooked. The function order in one SCC is being adjusted to a top-down order based on the profile to favor more inlining. 3. Transitive indirect call edges due to inlining. When a callee function is inlined into into a caller function in LTO prelink, every call edge originated from the callee will be transferred to the caller. If any of the transferred edges is indirect, the original profiled indirect edge, even if considered, would not enforce a top-down order from the caller to the potential indirect call target in LTO postlink since the inlined callee is gone from the static call graph. 4. #3 can happen even for direct call targets, due to functions defined in header files. Header functions, when included into source files, are defined multiple times but only one definition survives due to ODR. Therefore, the LTO prelink inlining done on those dropped definitions can be useless based on a local file scope. More importantly, the inlinee, once fully inlined to a to-be-dropped inliner, will have no profile to consume when its outlined version is compiled. This can lead to a profile-less prelink compilation for the outlined version of the inlinee function which may be called from external modules. while this isn't easy to fix, we rely on the postlink AutoFDO pipeline to optimize the inlinee. Since the survived copy of the inliner (defined in headers) can be inlined in its local scope in prelink, it may not exist in the merged IR in postlink, and we'll need the profiled call edges to enforce a top-down order for the rest of the functions. Considering those cases, a profiled call graph completely independent of the static call graph is constructed based on profile data, where function objects are not even needed to handle case #3 and case 4. I'm seeing an average 0.4% perf win out of SPEC2017. For certain benchmark such as Xalanbmk and GCC, the win is bigger, above 2%. The change is an enhancement to https://reviews.llvm.org/D95988. Reviewed By: wmi, wenlei Differential Revision: https://reviews.llvm.org/D99351
153 lines
6.7 KiB
C++
153 lines
6.7 KiB
C++
//===- Transforms/IPO/SampleContextTracker.h --------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// This file provides the interface for context-sensitive profile tracker used
|
|
/// by CSSPGO.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
|
|
#define LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/IR/DebugInfoMetadata.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/ProfileData/SampleProf.h"
|
|
#include <list>
|
|
#include <map>
|
|
#include <vector>
|
|
|
|
using namespace llvm;
|
|
using namespace sampleprof;
|
|
|
|
namespace llvm {
|
|
|
|
// Internal trie tree representation used for tracking context tree and sample
|
|
// profiles. The path from root node to a given node represents the context of
|
|
// that nodes' profile.
|
|
class ContextTrieNode {
|
|
public:
|
|
ContextTrieNode(ContextTrieNode *Parent = nullptr,
|
|
StringRef FName = StringRef(),
|
|
FunctionSamples *FSamples = nullptr,
|
|
LineLocation CallLoc = {0, 0})
|
|
: ParentContext(Parent), FuncName(FName), FuncSamples(FSamples),
|
|
CallSiteLoc(CallLoc){};
|
|
ContextTrieNode *getChildContext(const LineLocation &CallSite,
|
|
StringRef CalleeName);
|
|
ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
|
|
ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
|
|
StringRef CalleeName,
|
|
bool AllowCreate = true);
|
|
|
|
ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
|
|
ContextTrieNode &&NodeToMove,
|
|
StringRef ContextStrToRemove,
|
|
bool DeleteNode = true);
|
|
void removeChildContext(const LineLocation &CallSite, StringRef CalleeName);
|
|
std::map<uint32_t, ContextTrieNode> &getAllChildContext();
|
|
StringRef getFuncName() const;
|
|
FunctionSamples *getFunctionSamples() const;
|
|
void setFunctionSamples(FunctionSamples *FSamples);
|
|
LineLocation getCallSiteLoc() const;
|
|
ContextTrieNode *getParentContext() const;
|
|
void setParentContext(ContextTrieNode *Parent);
|
|
void dump();
|
|
|
|
private:
|
|
static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
|
|
|
|
// Map line+discriminator location to child context
|
|
std::map<uint32_t, ContextTrieNode> AllChildContext;
|
|
|
|
// Link to parent context node
|
|
ContextTrieNode *ParentContext;
|
|
|
|
// Function name for current context
|
|
StringRef FuncName;
|
|
|
|
// Function Samples for current context
|
|
FunctionSamples *FuncSamples;
|
|
|
|
// Callsite location in parent context
|
|
LineLocation CallSiteLoc;
|
|
};
|
|
|
|
// Profile tracker that manages profiles and its associated context. It
|
|
// provides interfaces used by sample profile loader to query context profile or
|
|
// base profile for given function or location; it also manages context tree
|
|
// manipulation that is needed to accommodate inline decisions so we have
|
|
// accurate post-inline profile for functions. Internally context profiles
|
|
// are organized in a trie, with each node representing profile for specific
|
|
// calling context and the context is identified by path from root to the node.
|
|
class SampleContextTracker {
|
|
public:
|
|
using ContextSamplesTy = SmallVector<FunctionSamples *, 16>;
|
|
|
|
SampleContextTracker(StringMap<FunctionSamples> &Profiles);
|
|
// Query context profile for a specific callee with given name at a given
|
|
// call-site. The full context is identified by location of call instruction.
|
|
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
|
|
StringRef CalleeName);
|
|
// Get samples for indirect call targets for call site at given location.
|
|
std::vector<const FunctionSamples *>
|
|
getIndirectCalleeContextSamplesFor(const DILocation *DIL);
|
|
// Query context profile for a given location. The full context
|
|
// is identified by input DILocation.
|
|
FunctionSamples *getContextSamplesFor(const DILocation *DIL);
|
|
// Query context profile for a given sample contxt of a function.
|
|
FunctionSamples *getContextSamplesFor(const SampleContext &Context);
|
|
// Get all context profile for given function.
|
|
ContextSamplesTy &getAllContextSamplesFor(const Function &Func);
|
|
ContextSamplesTy &getAllContextSamplesFor(StringRef Name);
|
|
// Query base profile for a given function. A base profile is a merged view
|
|
// of all context profiles for contexts that are not inlined.
|
|
FunctionSamples *getBaseSamplesFor(const Function &Func,
|
|
bool MergeContext = true);
|
|
// Query base profile for a given function by name.
|
|
FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true);
|
|
// Retrieve the context trie node for given profile context
|
|
ContextTrieNode *getContextFor(const SampleContext &Context);
|
|
// Mark a context profile as inlined when function is inlined.
|
|
// This makes sure that inlined context profile will be excluded in
|
|
// function's base profile.
|
|
void markContextSamplesInlined(const FunctionSamples *InlinedSamples);
|
|
ContextTrieNode &getRootContext();
|
|
void promoteMergeContextSamplesTree(const Instruction &Inst,
|
|
StringRef CalleeName);
|
|
// Dump the internal context profile trie.
|
|
void dump();
|
|
|
|
private:
|
|
ContextTrieNode *getContextFor(const DILocation *DIL);
|
|
ContextTrieNode *getCalleeContextFor(const DILocation *DIL,
|
|
StringRef CalleeName);
|
|
ContextTrieNode *getOrCreateContextPath(const SampleContext &Context,
|
|
bool AllowCreate);
|
|
ContextTrieNode *getTopLevelContextNode(StringRef FName);
|
|
ContextTrieNode &addTopLevelContextNode(StringRef FName);
|
|
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
|
|
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
|
|
StringRef ContextStrToRemove);
|
|
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
|
|
ContextTrieNode &ToNodeParent,
|
|
StringRef ContextStrToRemove);
|
|
|
|
// Map from function name to context profiles (excluding base profile)
|
|
StringMap<ContextSamplesTy> FuncToCtxtProfiles;
|
|
|
|
// Root node for context trie tree
|
|
ContextTrieNode RootContext;
|
|
};
|
|
|
|
} // end namespace llvm
|
|
#endif // LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
|