mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[SyntheticCounts] Rewrite the code using only graph traits.
Summary: The intent of this is to allow the code to be used with ThinLTO. In Thinlink phase, a traditional Callgraph can not be computed even though all the necessary information (nodes and edges of a call graph) is available. This is due to the fact that CallGraph class is closely tied to the IR. This patch first extends GraphTraits to add a CallGraphTraits graph. This is then used to implement a version of counts propagation on a generic callgraph. Reviewers: davidxl Subscribers: mehdi_amini, tejohnson, llvm-commits Differential Revision: https://reviews.llvm.org/D42311 llvm-svn: 323475
This commit is contained in:
parent
4cec19ffa9
commit
bd3a1aab59
@ -495,6 +495,56 @@ struct GraphTraits<const CallGraph *> : public GraphTraits<
|
||||
}
|
||||
};
|
||||
|
||||
// FIXME: The traits here are not limited to callgraphs and can be moved
|
||||
// elsewhere including GraphTraits. They are left here because only algorithms
|
||||
// that operate on Callgraphs currently use them. If other algorithms operating
|
||||
// on a general graph need edge traversals, these can be moved.
|
||||
template <class CallGraphType>
|
||||
struct CallGraphTraits : public GraphTraits<CallGraphType> {
|
||||
// Elements to provide:
|
||||
|
||||
// typedef EdgeRef - Type of Edge token in the graph, which should
|
||||
// be cheap to copy.
|
||||
// typedef CallEdgeIteratorType - Type used to iterate over children edges in
|
||||
// graph, dereference to a EdgeRef.
|
||||
|
||||
// static CallEdgeIteratorType call_edge_begin(NodeRef)
|
||||
// static CallEdgeIteratorType call_edge_end (NodeRef)
|
||||
// Return iterators that point to the beginning and ending of the call
|
||||
// edges list for the given callgraph node.
|
||||
//
|
||||
// static NodeRef edge_dest(EdgeRef)
|
||||
// Return the destination node of an edge.
|
||||
|
||||
// If anyone tries to use this class without having an appropriate
|
||||
// specialization, make an error. If you get this error, it's because you
|
||||
// need to include the appropriate specialization of GraphTraits<> for your
|
||||
// graph, or you need to define it for a new graph type. Either that or
|
||||
// your argument to XXX_begin(...) is unknown or needs to have the proper .h
|
||||
// file #include'd.
|
||||
using CallEdgeIteratorType =
|
||||
typename CallGraphType::UnknownCallGraphTypeError;
|
||||
};
|
||||
|
||||
template <class GraphType>
|
||||
iterator_range<typename CallGraphTraits<GraphType>::CallEdgeIteratorType>
|
||||
call_edges(const typename CallGraphTraits<GraphType>::NodeRef &G) {
|
||||
return make_range(CallGraphTraits<GraphType>::call_edge_begin(G),
|
||||
CallGraphTraits<GraphType>::call_edge_end(G));
|
||||
}
|
||||
|
||||
template <>
|
||||
struct CallGraphTraits<const CallGraph *>
|
||||
: public GraphTraits<const CallGraph *> {
|
||||
using EdgeRef = const CallGraphNode::CallRecord &;
|
||||
using CallEdgeIteratorType = CallGraphNode::const_iterator;
|
||||
|
||||
static CallEdgeIteratorType call_edge_begin(NodeRef N) { return N->begin(); }
|
||||
static CallEdgeIteratorType call_edge_end(NodeRef N) { return N->end(); }
|
||||
|
||||
static NodeRef edge_dest(EdgeRef E) { return E.second; }
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_ANALYSIS_CALLGRAPH_H
|
||||
|
@ -15,7 +15,7 @@
|
||||
#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Support/ScaledNumber.h"
|
||||
|
||||
namespace llvm {
|
||||
@ -23,11 +23,30 @@ namespace llvm {
|
||||
class CallGraph;
|
||||
class Function;
|
||||
|
||||
using Scaled64 = ScaledNumber<uint64_t>;
|
||||
void propagateSyntheticCounts(
|
||||
const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
|
||||
function_ref<uint64_t(Function *F)> GetCount,
|
||||
function_ref<void(Function *F, uint64_t)> AddToCount);
|
||||
/// Class with methods to propagate synthetic entry counts.
|
||||
///
|
||||
/// This class is templated on the type of the call graph and designed to work
|
||||
/// with the traditional per-module callgraph and the summary callgraphs used in
|
||||
/// ThinLTO. This contains only static methods and alias templates.
|
||||
template <typename CallGraphType> class SyntheticCountsUtils {
|
||||
public:
|
||||
using Scaled64 = ScaledNumber<uint64_t>;
|
||||
using CGT = CallGraphTraits<CallGraphType>;
|
||||
using NodeRef = typename CGT::NodeRef;
|
||||
using EdgeRef = typename CGT::EdgeRef;
|
||||
using SccTy = std::vector<NodeRef>;
|
||||
|
||||
using GetRelBBFreqTy = function_ref<Optional<Scaled64>(EdgeRef)>;
|
||||
using GetCountTy = function_ref<uint64_t(NodeRef)>;
|
||||
using AddCountTy = function_ref<void(NodeRef, uint64_t)>;
|
||||
|
||||
static void propagate(const CallGraphType &CG, GetRelBBFreqTy GetRelBBFreq,
|
||||
GetCountTy GetCount, AddCountTy AddCount);
|
||||
|
||||
private:
|
||||
static void propagateFromSCC(const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq,
|
||||
GetCountTy GetCount, AddCountTy AddCount);
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
@ -23,100 +23,91 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Given a set of functions in an SCC, propagate entry counts to functions
|
||||
// called by the SCC.
|
||||
static void
|
||||
propagateFromSCC(const SmallPtrSetImpl<Function *> &SCCFunctions,
|
||||
function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
|
||||
function_ref<uint64_t(Function *F)> GetCount,
|
||||
function_ref<void(Function *F, uint64_t)> AddToCount) {
|
||||
// Given an SCC, propagate entry counts along the edge of the SCC nodes.
|
||||
template <typename CallGraphType>
|
||||
void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
|
||||
const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
|
||||
AddCountTy AddCount) {
|
||||
|
||||
SmallVector<CallSite, 16> CallSites;
|
||||
SmallPtrSet<NodeRef, 8> SCCNodes;
|
||||
SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
|
||||
|
||||
// Gather all callsites in the SCC.
|
||||
auto GatherCallSites = [&]() {
|
||||
for (auto *F : SCCFunctions) {
|
||||
assert(F && !F->isDeclaration());
|
||||
for (auto &I : instructions(F)) {
|
||||
if (auto CS = CallSite(&I)) {
|
||||
CallSites.push_back(CS);
|
||||
}
|
||||
}
|
||||
for (auto &Node : SCC)
|
||||
SCCNodes.insert(Node);
|
||||
|
||||
// Partition the edges coming out of the SCC into those whose destination is
|
||||
// in the SCC and the rest.
|
||||
for (const auto &Node : SCCNodes) {
|
||||
for (auto &E : call_edges<CallGraphType>(Node)) {
|
||||
if (SCCNodes.count(CGT::edge_dest(E)))
|
||||
SCCEdges.emplace_back(Node, E);
|
||||
else
|
||||
NonSCCEdges.emplace_back(Node, E);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
GatherCallSites();
|
||||
|
||||
// Partition callsites so that the callsites that call functions in the same
|
||||
// SCC come first.
|
||||
auto Mid = partition(CallSites, [&](CallSite &CS) {
|
||||
auto *Callee = CS.getCalledFunction();
|
||||
if (Callee)
|
||||
return SCCFunctions.count(Callee);
|
||||
// FIXME: Use the !callees metadata to propagate counts through indirect
|
||||
// calls.
|
||||
return 0U;
|
||||
});
|
||||
|
||||
// For functions in the same SCC, update the counts in two steps:
|
||||
// 1. Compute the additional count for each function by propagating the counts
|
||||
// along all incoming edges to the function that originate from the same SCC
|
||||
// and summing them up.
|
||||
// 2. Add the additional counts to the functions in the SCC.
|
||||
// For nodes in the same SCC, update the counts in two steps:
|
||||
// 1. Compute the additional count for each node by propagating the counts
|
||||
// along all incoming edges to the node that originate from within the same
|
||||
// SCC and summing them up.
|
||||
// 2. Add the additional counts to the nodes in the SCC.
|
||||
// This ensures that the order of
|
||||
// traversal of functions within the SCC doesn't change the final result.
|
||||
// traversal of nodes within the SCC doesn't affect the final result.
|
||||
|
||||
DenseMap<Function *, uint64_t> AdditionalCounts;
|
||||
for (auto It = CallSites.begin(); It != Mid; It++) {
|
||||
auto &CS = *It;
|
||||
auto RelFreq = GetCallSiteRelFreq(CS);
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
Function *Caller = CS.getCaller();
|
||||
DenseMap<NodeRef, uint64_t> AdditionalCounts;
|
||||
for (auto &E : SCCEdges) {
|
||||
auto OptRelFreq = GetRelBBFreq(E.second);
|
||||
if (!OptRelFreq)
|
||||
continue;
|
||||
Scaled64 RelFreq = OptRelFreq.getValue();
|
||||
auto Caller = E.first;
|
||||
auto Callee = CGT::edge_dest(E.second);
|
||||
RelFreq *= Scaled64(GetCount(Caller), 0);
|
||||
uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
|
||||
AdditionalCounts[Callee] += AdditionalCount;
|
||||
}
|
||||
|
||||
// Update the counts for the functions in the SCC.
|
||||
// Update the counts for the nodes in the SCC.
|
||||
for (auto &Entry : AdditionalCounts)
|
||||
AddToCount(Entry.first, Entry.second);
|
||||
AddCount(Entry.first, Entry.second);
|
||||
|
||||
// Now update the counts for functions not in SCC.
|
||||
for (auto It = Mid; It != CallSites.end(); It++) {
|
||||
auto &CS = *It;
|
||||
auto Weight = GetCallSiteRelFreq(CS);
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
Function *Caller = CS.getCaller();
|
||||
Weight *= Scaled64(GetCount(Caller), 0);
|
||||
AddToCount(Callee, Weight.toInt<uint64_t>());
|
||||
// Now update the counts for nodes outside the SCC.
|
||||
for (auto &E : NonSCCEdges) {
|
||||
auto OptRelFreq = GetRelBBFreq(E.second);
|
||||
if (!OptRelFreq)
|
||||
continue;
|
||||
Scaled64 RelFreq = OptRelFreq.getValue();
|
||||
auto Caller = E.first;
|
||||
auto Callee = CGT::edge_dest(E.second);
|
||||
RelFreq *= Scaled64(GetCount(Caller), 0);
|
||||
AddCount(Callee, RelFreq.toInt<uint64_t>());
|
||||
}
|
||||
}
|
||||
|
||||
/// Propgate synthetic entry counts on a callgraph.
|
||||
/// Propgate synthetic entry counts on a callgraph \p CG.
|
||||
///
|
||||
/// This performs a reverse post-order traversal of the callgraph SCC. For each
|
||||
/// SCC, it first propagates the entry counts to the functions within the SCC
|
||||
/// SCC, it first propagates the entry counts to the nodes within the SCC
|
||||
/// through call edges and updates them in one shot. Then the entry counts are
|
||||
/// propagated to functions outside the SCC.
|
||||
void llvm::propagateSyntheticCounts(
|
||||
const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
|
||||
function_ref<uint64_t(Function *F)> GetCount,
|
||||
function_ref<void(Function *F, uint64_t)> AddToCount) {
|
||||
/// propagated to nodes outside the SCC. This requires \p CallGraphTraits
|
||||
/// to have a specialization for \p CallGraphType.
|
||||
|
||||
SmallVector<SmallPtrSet<Function *, 8>, 16> SCCs;
|
||||
for (auto I = scc_begin(&CG); !I.isAtEnd(); ++I) {
|
||||
auto SCC = *I;
|
||||
template <typename CallGraphType>
|
||||
void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
|
||||
GetRelBBFreqTy GetRelBBFreq,
|
||||
GetCountTy GetCount,
|
||||
AddCountTy AddCount) {
|
||||
std::vector<SccTy> SCCs;
|
||||
|
||||
SmallPtrSet<Function *, 8> SCCFunctions;
|
||||
for (auto *Node : SCC) {
|
||||
Function *F = Node->getFunction();
|
||||
if (F && !F->isDeclaration()) {
|
||||
SCCFunctions.insert(F);
|
||||
}
|
||||
}
|
||||
SCCs.push_back(SCCFunctions);
|
||||
}
|
||||
// Collect all the SCCs.
|
||||
for (auto I = scc_begin(CG); !I.isAtEnd(); ++I)
|
||||
SCCs.push_back(*I);
|
||||
|
||||
for (auto &SCCFunctions : reverse(SCCs))
|
||||
propagateFromSCC(SCCFunctions, GetCallSiteRelFreq, GetCount, AddToCount);
|
||||
// The callgraph-scc needs to be visited in top-down order for propagation.
|
||||
// The scc iterator returns the scc in bottom-up order, so reverse the SCCs
|
||||
// and call propagateFromSCC.
|
||||
for (auto &SCC : reverse(SCCs))
|
||||
propagateFromSCC(SCC, GetRelBBFreq, GetCount, AddCount);
|
||||
}
|
||||
|
||||
template class llvm::SyntheticCountsUtils<const CallGraph *>;
|
||||
|
@ -102,23 +102,34 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
|
||||
// Set initial entry counts.
|
||||
initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; });
|
||||
|
||||
// Compute the relative block frequency for a callsite. Use scaled numbers
|
||||
// Compute the relative block frequency for a call edge. Use scaled numbers
|
||||
// and not integers since the relative block frequency could be less than 1.
|
||||
auto GetCallSiteRelFreq = [&](CallSite CS) {
|
||||
auto GetCallSiteRelFreq = [&](const CallGraphNode::CallRecord &Edge) {
|
||||
Optional<Scaled64> Res = None;
|
||||
if (!Edge.first)
|
||||
return Res;
|
||||
assert(isa<Instruction>(Edge.first));
|
||||
CallSite CS(cast<Instruction>(Edge.first));
|
||||
Function *Caller = CS.getCaller();
|
||||
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
|
||||
BasicBlock *CSBB = CS.getInstruction()->getParent();
|
||||
Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
|
||||
Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0);
|
||||
BBFreq /= EntryFreq;
|
||||
return BBFreq;
|
||||
return Optional<Scaled64>(BBFreq);
|
||||
};
|
||||
|
||||
CallGraph CG(M);
|
||||
// Propgate the entry counts on the callgraph.
|
||||
propagateSyntheticCounts(
|
||||
CG, GetCallSiteRelFreq, [&](Function *F) { return Counts[F]; },
|
||||
[&](Function *F, uint64_t New) { Counts[F] += New; });
|
||||
SyntheticCountsUtils<const CallGraph *>::propagate(
|
||||
&CG, GetCallSiteRelFreq,
|
||||
[&](const CallGraphNode *N) { return Counts[N->getFunction()]; },
|
||||
[&](const CallGraphNode *N, uint64_t New) {
|
||||
auto F = N->getFunction();
|
||||
if (!F || F->isDeclaration())
|
||||
return;
|
||||
Counts[F] += New;
|
||||
});
|
||||
|
||||
// Set the counts as metadata.
|
||||
for (auto Entry : Counts)
|
||||
|
Loading…
Reference in New Issue
Block a user