1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

Add a pass to generate synthetic function entry counts.

Summary:
This pass synthesizes function entry counts by traversing the callgraph
and using the relative block frequencies of the callsites. The intended
use of these counts is in inlining to determine hot/cold callsites in
the absence of profile information.

The pass is split into two files with the code that propagates the
counts in a callgraph in a Utils file. I plan to add support for
propagation in the thinlto link phase and the propagation code will be
shared and hence this split. I did not add support to the old PM since
hot callsite determination in inlining is not possible in old PM
(although we could use hot callee heuristic with synthetic counts in the
old PM it is not worth the effort tuning it)

Reviewers: davidxl, silvas

Subscribers: mgorny, mehdi_amini, llvm-commits

Differential Revision: https://reviews.llvm.org/D41604

llvm-svn: 322110
This commit is contained in:
Easwaran Raman 2018-01-09 19:39:35 +00:00
parent 6abdbd7d39
commit f04207e3b2
17 changed files with 485 additions and 13 deletions

View File

@ -0,0 +1,33 @@
//===- SyntheticCountsUtils.h - utilities for count propagation--*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines utilities for synthetic counts propagation.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/Support/ScaledNumber.h"
namespace llvm {
class CallGraph;
class Function;
using Scaled64 = ScaledNumber<uint64_t>;
void propagateSyntheticCounts(
const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
function_ref<uint64_t(Function *F)> GetCount,
function_ref<void(Function *F, uint64_t)> AddToCount);
} // namespace llvm
#endif

View File

@ -237,10 +237,11 @@ public:
/// \brief Set the entry count for this function.
///
/// Entry count is the number of times this function was executed based on
/// pgo data. \p Imports points to a set of GUIDs that needs to be imported
/// by the function for sample PGO, to enable the same inlines as the
/// profiled optimized binary.
void setEntryCount(uint64_t Count,
/// pgo data. \p Synthetic indicates the count is synthesized by analysis and
/// not from a profile run. \p Imports points to a set of GUIDs that needs to
/// be imported by the function for sample PGO, to enable the same inlines as
/// the profiled optimized binary.
void setEntryCount(uint64_t Count, bool Synthetic = false,
const DenseSet<GlobalValue::GUID> *Imports = nullptr);
/// \brief Get the entry count for this function.

View File

@ -66,10 +66,11 @@ public:
/// Return metadata specifying that a branch or switch is unpredictable.
MDNode *createUnpredictable();
/// Return metadata containing the entry \p Count for a function, and the
/// Return metadata containing the entry \p Count for a function, a boolean
/// \Synthetic indicating whether the counts were synthetized, and the
/// GUIDs stored in \p Imports that need to be imported for sample PGO, to
/// enable the same inlines as the profiled optimized binary
MDNode *createFunctionEntryCount(uint64_t Count,
MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic,
const DenseSet<GlobalValue::GUID> *Imports);
/// Return metadata containing the section prefix for a function.

View File

@ -0,0 +1,19 @@
#ifndef LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H
#define LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/ScaledNumber.h"
namespace llvm {
class Function;
class Module;
class SyntheticCountsPropagation
: public PassInfoMixin<SyntheticCountsPropagation> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
} // namespace llvm
#endif

View File

@ -74,6 +74,7 @@ add_llvm_library(LLVMAnalysis
ScalarEvolutionAliasAnalysis.cpp
ScalarEvolutionExpander.cpp
ScalarEvolutionNormalization.cpp
SyntheticCountsUtils.cpp
TargetLibraryInfo.cpp
TargetTransformInfo.cpp
Trace.cpp

View File

@ -0,0 +1,122 @@
//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines utilities for propagating synthetic counts.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
using namespace llvm;
// Given a set of functions in an SCC, propagate entry counts to functions
// called by the SCC.
static void
propagateFromSCC(const SmallPtrSetImpl<Function *> &SCCFunctions,
function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
function_ref<uint64_t(Function *F)> GetCount,
function_ref<void(Function *F, uint64_t)> AddToCount) {
SmallVector<CallSite, 16> CallSites;
// Gather all callsites in the SCC.
auto GatherCallSites = [&]() {
for (auto *F : SCCFunctions) {
assert(F && !F->isDeclaration());
for (auto &I : instructions(F)) {
if (auto CS = CallSite(&I)) {
CallSites.push_back(CS);
}
}
}
};
GatherCallSites();
// Partition callsites so that the callsites that call functions in the same
// SCC come first.
auto Mid = partition(CallSites, [&](CallSite &CS) {
auto *Callee = CS.getCalledFunction();
if (Callee)
return SCCFunctions.count(Callee);
// FIXME: Use the !callees metadata to propagate counts through indirect
// calls.
return 0U;
});
// For functions in the same SCC, update the counts in two steps:
// 1. Compute the additional count for each function by propagating the counts
// along all incoming edges to the function that originate from the same SCC
// and summing them up.
// 2. Add the additional counts to the functions in the SCC.
// This ensures that the order of
// traversal of functions within the SCC doesn't change the final result.
DenseMap<Function *, uint64_t> AdditionalCounts;
for (auto It = CallSites.begin(); It != Mid; It++) {
auto &CS = *It;
auto RelFreq = GetCallSiteRelFreq(CS);
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
RelFreq *= Scaled64(GetCount(Caller), 0);
uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
AdditionalCounts[Callee] += AdditionalCount;
}
// Update the counts for the functions in the SCC.
for (auto &Entry : AdditionalCounts)
AddToCount(Entry.first, Entry.second);
// Now update the counts for functions not in SCC.
for (auto It = Mid; It != CallSites.end(); It++) {
auto &CS = *It;
auto Weight = GetCallSiteRelFreq(CS);
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
Weight *= Scaled64(GetCount(Caller), 0);
AddToCount(Callee, Weight.toInt<uint64_t>());
}
}
/// Propgate synthetic entry counts on a callgraph.
///
/// This performs a reverse post-order traversal of the callgraph SCC. For each
/// SCC, it first propagates the entry counts to the functions within the SCC
/// through call edges and updates them in one shot. Then the entry counts are
/// propagated to functions outside the SCC.
void llvm::propagateSyntheticCounts(
const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
function_ref<uint64_t(Function *F)> GetCount,
function_ref<void(Function *F, uint64_t)> AddToCount) {
SmallVector<SmallPtrSet<Function *, 8>, 16> SCCs;
for (auto I = scc_begin(&CG); !I.isAtEnd(); ++I) {
auto SCC = *I;
SmallPtrSet<Function *, 8> SCCFunctions;
for (auto *Node : SCC) {
Function *F = Node->getFunction();
if (F && !F->isDeclaration()) {
SCCFunctions.insert(F);
}
}
SCCs.push_back(SCCFunctions);
}
for (auto &SCCFunctions : reverse(SCCs))
propagateFromSCC(SCCFunctions, GetCallSiteRelFreq, GetCount, AddToCount);
}

View File

@ -1320,10 +1320,11 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
}
void Function::setEntryCount(uint64_t Count,
void Function::setEntryCount(uint64_t Count, bool Synthetic,
const DenseSet<GlobalValue::GUID> *S) {
MDBuilder MDB(getContext());
setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S));
setMetadata(LLVMContext::MD_prof,
MDB.createFunctionEntryCount(Count, Synthetic, S));
}
Optional<uint64_t> Function::getEntryCount() const {

View File

@ -58,10 +58,14 @@ MDNode *MDBuilder::createUnpredictable() {
}
MDNode *MDBuilder::createFunctionEntryCount(
uint64_t Count, const DenseSet<GlobalValue::GUID> *Imports) {
uint64_t Count, bool Synthetic,
const DenseSet<GlobalValue::GUID> *Imports) {
Type *Int64Ty = Type::getInt64Ty(Context);
SmallVector<Metadata *, 8> Ops;
Ops.push_back(createString("function_entry_count"));
if (Synthetic)
Ops.push_back(createString("synthetic_function_entry_count"));
else
Ops.push_back(createString("function_entry_count"));
Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
if (Imports) {
SmallVector<GlobalValue::GUID, 2> OrderID(Imports->begin(), Imports->end());

View File

@ -1695,8 +1695,11 @@ void Verifier::verifyFunctionMetadata(
"expected string with name of the !prof annotation", MD);
MDString *MDS = cast<MDString>(MD->getOperand(0));
StringRef ProfName = MDS->getString();
Assert(ProfName.equals("function_entry_count"),
"first operand should be 'function_entry_count'", MD);
Assert(ProfName.equals("function_entry_count") ||
ProfName.equals("synthetic_function_entry_count"),
"first operand should be 'function_entry_count'"
" or 'synthetic_function_entry_count'",
MD);
// Check second operand.
Assert(MD->getOperand(1) != nullptr, "second operand should not be null",

View File

@ -80,6 +80,7 @@
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/InstrProfiling.h"
@ -176,6 +177,11 @@ static cl::opt<bool> EnableGVNSink(
"enable-npm-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
static cl::opt<bool> EnableSyntheticCounts(
"enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
cl::desc("Run synthetic function entry count generation "
"pass"));
static Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
@ -622,6 +628,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
// Synthesize function entry counts for non-PGO compilation.
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());
// Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline.
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());

View File

@ -73,6 +73,7 @@ MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
MODULE_PASS("verify", VerifierPass())
#undef MODULE_PASS

View File

@ -29,6 +29,7 @@ add_llvm_library(LLVMipo
SampleProfile.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
SyntheticCountsPropagation.cpp
ThinLTOBitcodeWriter.cpp
WholeProgramDevirt.cpp

View File

@ -1467,7 +1467,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
// Sets the GUIDs that are inlined in the profiled binary. This is used
// for ThinLink to make correct liveness analysis, and also make the IR
// match the profiled binary before annotation.
F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);
F.setEntryCount(Samples->getHeadSamples() + 1, false, &InlinedGUIDs);
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);

View File

@ -0,0 +1,127 @@
//=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a transformation that synthesizes entry counts for
// functions and attaches !prof metadata to functions with the synthesized
// counts. The presence of !prof metadata with counter name set to
// 'synthesized_function_entry_count' indicate that the value of the counter is
// an estimation of the likely execution count of the function. This transform
// is applied only in non PGO mode as functions get 'real' profile-based
// function entry counts in the PGO mode.
//
// The transformation works by first assigning some initial values to the entry
// counts of all functions and then doing a top-down traversal of the
// callgraph-scc to propagate the counts. For each function the set of callsites
// and their relative block frequency is gathered. The relative block frequency
// multiplied by the entry count of the caller and added to the callee's entry
// count. For non-trivial SCCs, the new counts are computed from the previous
// counts and updated in one shot.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using Scaled64 = ScaledNumber<uint64_t>;
#define DEBUG_TYPE "synthetic-counts-propagation"
/// Initial synthetic count assigned to functions.
static cl::opt<int>
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
cl::ZeroOrMore,
cl::desc("Initial value of synthetic entry count."));
/// Initial synthetic count assigned to inline functions.
static cl::opt<int> InlineSyntheticCount(
"inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
cl::desc("Initial synthetic entry count for inline functions."));
/// Initial synthetic count assigned to cold functions.
static cl::opt<int> ColdSyntheticCount(
"cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
cl::desc("Initial synthetic entry count for cold functions."));
// Assign initial synthetic entry counts to functions.
static void
initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) {
auto MayHaveIndirectCalls = [](Function &F) {
for (auto *U : F.users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
return true;
}
return false;
};
for (Function &F : M) {
uint64_t InitialCount = InitialSyntheticCount;
if (F.isDeclaration())
continue;
if (F.hasFnAttribute(Attribute::AlwaysInline) ||
F.hasFnAttribute(Attribute::InlineHint)) {
// Use a higher value for inline functions to account for the fact that
// these are usually beneficial to inline.
InitialCount = InlineSyntheticCount;
} else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
// Local functions without inline hints get counts only through
// propagation.
InitialCount = 0;
} else if (F.hasFnAttribute(Attribute::Cold) ||
F.hasFnAttribute(Attribute::NoInline)) {
// Use a lower value for noinline and cold functions.
InitialCount = ColdSyntheticCount;
}
SetCount(&F, InitialCount);
}
}
PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
ModuleAnalysisManager &MAM) {
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
DenseMap<Function *, uint64_t> Counts;
// Set initial entry counts.
initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; });
// Compute the relative block frequency for a callsite. Use scaled numbers
// and not integers since the relative block frequency could be less than 1.
auto GetCallSiteRelFreq = [&](CallSite CS) {
Function *Caller = CS.getCaller();
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
BasicBlock *CSBB = CS.getInstruction()->getParent();
Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0);
BBFreq /= EntryFreq;
return BBFreq;
};
CallGraph CG(M);
// Propgate the entry counts on the callgraph.
propagateSyntheticCounts(
CG, GetCallSiteRelFreq, [&](Function *F) { return Counts[F]; },
[&](Function *F, uint64_t New) { Counts[F] += New; });
// Set the counts as metadata.
for (auto Entry : Counts)
Entry.first->setEntryCount(Entry.second, true);
return PreservedAnalyses::all();
}

View File

@ -0,0 +1,79 @@
; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
; CHECK-LABEL: define void @foo()
; CHECK: !prof ![[COUNT1:[0-9]+]]
define void @foo() {
ret void
}
; CHECK-LABEL: define void @foo_inline() #0
; CHECK: !prof ![[COUNT2:[0-9]+]]
define void @foo_inline() #0 {
ret void
}
; CHECK-LABEL: define void @foo_always_inline() #1
; CHECK: !prof ![[COUNT2]]
define void @foo_always_inline() #1 {
ret void
}
; CHECK-LABEL: define void @foo_cold() #2
; CHECK: !prof ![[COUNT3:[0-9]+]]
define void @foo_cold() #2 {
ret void
}
; CHECK-LABEL: define void @foo_noinline() #3
; CHECK: !prof ![[COUNT3]]
define void @foo_noinline() #3 {
ret void
}
; CHECK-LABEL: define internal void @foo_local()
; CHECK: !prof ![[COUNT4:[0-9]+]]
define internal void @foo_local() {
ret void
}
; CHECK-LABEL: define internal void @foo_local_escaped()
; CHECK: !prof ![[COUNT1]]
define internal void @foo_local_escaped() {
ret void
}
declare void @ext(void ()*)
define void @bar() {
call void @ext(void ()* nonnull @foo_local_escaped)
ret void
}
; CHECK-LABEL: define internal void @foo_local_inline() #0
; CHECK: !prof ![[COUNT2]]
define internal void @foo_local_inline() #0 {
ret void
}
; CHECK-LABEL: define internal void @foo_local_cold() #2
; CHECK: !prof ![[COUNT4]]
define internal void @foo_local_cold() #2 {
ret void
}
; CHECK-LABEL: define linkonce void @foo_linkonce()
; CHECK: !prof ![[COUNT1]]
define linkonce void @foo_linkonce() {
ret void
}
; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 10}
; CHECK: ![[COUNT2]] = !{!"synthetic_function_entry_count", i64 15}
; CHECK: ![[COUNT3]] = !{!"synthetic_function_entry_count", i64 5}
; CHECK: ![[COUNT4]] = !{!"synthetic_function_entry_count", i64 0}
attributes #0 = {inlinehint}
attributes #1 = {alwaysinline}
attributes #2 = {cold}
attributes #3 = {noinline}

View File

@ -0,0 +1,50 @@
; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
; CHECK-LABEL: define void @level1a(i32 %n)
; CHECK: !prof ![[COUNT1:[0-9]+]]
define void @level1a(i32 %n) {
entry:
%cmp = icmp sgt i32 %n, 10
br i1 %cmp, label %exit, label %loop
loop:
%i = phi i32 [%n, %entry], [%i1, %loop]
call void @level2a(i32 %n)
%i1 = sub i32 %i, 1
%cmp2 = icmp eq i32 %i1, 0
br i1 %cmp2, label %exit, label %loop, !prof !1
exit:
ret void
}
; CHECK-LABEL: define void @level2a(i32 %n)
; CHECK: !prof ![[COUNT2:[0-9]+]]
define void @level2a(i32 %n) {
call void @level2b(i32 %n)
ret void
}
; CHECK-LABEL: define void @level2b(i32 %n)
; CHECK: !prof ![[COUNT2]]
define void @level2b(i32 %n) {
entry:
call void @level2a(i32 %n)
%cmp = icmp eq i32 %n, 0
br i1 %cmp, label %then, label %else, !prof !2
then:
call void @level3a(i32 %n)
br label %else
else:
ret void
}
; CHECK-LABEL: define internal void @level3a(i32 %n)
; CHECK: !prof ![[COUNT3:[0-9]+]]
define internal void @level3a(i32 %n) {
ret void
}
!1 = !{!"branch_weights", i32 1, i32 99}
!2 = !{!"branch_weights", i32 1, i32 1}
; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 10}
; CHECK: ![[COUNT2]] = !{!"synthetic_function_entry_count", i64 520}
; CHECK: ![[COUNT3]] = !{!"synthetic_function_entry_count", i64 260}

View File

@ -0,0 +1,19 @@
; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
; CHECK-LABEL: define void @foo()
; CHECK: !prof ![[COUNT1:[0-9]+]]
define void @foo() {
call void @bar()
ret void
}
; CHECK-LABEL: define void @bar() #0
; CHECK: !prof ![[COUNT1]]
define void @bar() #0 {
call void @foo()
ret void
}
attributes #0 = {inlinehint}
; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 25}