mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
Add a pass to generate synthetic function entry counts.
Summary: This pass synthesizes function entry counts by traversing the callgraph and using the relative block frequencies of the callsites. The intended use of these counts is in inlining to determine hot/cold callsites in the absence of profile information. The pass is split into two files with the code that propagates the counts in a callgraph in a Utils file. I plan to add support for propagation in the thinlto link phase and the propagation code will be shared and hence this split. I did not add support to the old PM since hot callsite determination in inlining is not possible in old PM (although we could use hot callee heuristic with synthetic counts in the old PM it is not worth the effort tuning it) Reviewers: davidxl, silvas Subscribers: mgorny, mehdi_amini, llvm-commits Differential Revision: https://reviews.llvm.org/D41604 llvm-svn: 322110
This commit is contained in:
parent
6abdbd7d39
commit
f04207e3b2
33
include/llvm/Analysis/SyntheticCountsUtils.h
Normal file
33
include/llvm/Analysis/SyntheticCountsUtils.h
Normal file
@ -0,0 +1,33 @@
|
||||
//===- SyntheticCountsUtils.h - utilities for count propagation--*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines utilities for synthetic counts propagation.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
|
||||
#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/Support/ScaledNumber.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class CallGraph;
|
||||
class Function;
|
||||
|
||||
using Scaled64 = ScaledNumber<uint64_t>;
|
||||
void propagateSyntheticCounts(
|
||||
const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
|
||||
function_ref<uint64_t(Function *F)> GetCount,
|
||||
function_ref<void(Function *F, uint64_t)> AddToCount);
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
@ -237,10 +237,11 @@ public:
|
||||
/// \brief Set the entry count for this function.
|
||||
///
|
||||
/// Entry count is the number of times this function was executed based on
|
||||
/// pgo data. \p Imports points to a set of GUIDs that needs to be imported
|
||||
/// by the function for sample PGO, to enable the same inlines as the
|
||||
/// profiled optimized binary.
|
||||
void setEntryCount(uint64_t Count,
|
||||
/// pgo data. \p Synthetic indicates the count is synthesized by analysis and
|
||||
/// not from a profile run. \p Imports points to a set of GUIDs that needs to
|
||||
/// be imported by the function for sample PGO, to enable the same inlines as
|
||||
/// the profiled optimized binary.
|
||||
void setEntryCount(uint64_t Count, bool Synthetic = false,
|
||||
const DenseSet<GlobalValue::GUID> *Imports = nullptr);
|
||||
|
||||
/// \brief Get the entry count for this function.
|
||||
|
@ -66,10 +66,11 @@ public:
|
||||
/// Return metadata specifying that a branch or switch is unpredictable.
|
||||
MDNode *createUnpredictable();
|
||||
|
||||
/// Return metadata containing the entry \p Count for a function, and the
|
||||
/// Return metadata containing the entry \p Count for a function, a boolean
|
||||
/// \Synthetic indicating whether the counts were synthetized, and the
|
||||
/// GUIDs stored in \p Imports that need to be imported for sample PGO, to
|
||||
/// enable the same inlines as the profiled optimized binary
|
||||
MDNode *createFunctionEntryCount(uint64_t Count,
|
||||
MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic,
|
||||
const DenseSet<GlobalValue::GUID> *Imports);
|
||||
|
||||
/// Return metadata containing the section prefix for a function.
|
||||
|
19
include/llvm/Transforms/IPO/SyntheticCountsPropagation.h
Normal file
19
include/llvm/Transforms/IPO/SyntheticCountsPropagation.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H
|
||||
#define LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Support/ScaledNumber.h"
|
||||
|
||||
namespace llvm {
|
||||
class Function;
|
||||
class Module;
|
||||
|
||||
class SyntheticCountsPropagation
|
||||
: public PassInfoMixin<SyntheticCountsPropagation> {
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
|
||||
};
|
||||
} // namespace llvm
|
||||
#endif
|
@ -74,6 +74,7 @@ add_llvm_library(LLVMAnalysis
|
||||
ScalarEvolutionAliasAnalysis.cpp
|
||||
ScalarEvolutionExpander.cpp
|
||||
ScalarEvolutionNormalization.cpp
|
||||
SyntheticCountsUtils.cpp
|
||||
TargetLibraryInfo.cpp
|
||||
TargetTransformInfo.cpp
|
||||
Trace.cpp
|
||||
|
122
lib/Analysis/SyntheticCountsUtils.cpp
Normal file
122
lib/Analysis/SyntheticCountsUtils.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines utilities for propagating synthetic counts.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/SyntheticCountsUtils.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Given a set of functions in an SCC, propagate entry counts to functions
|
||||
// called by the SCC.
|
||||
static void
|
||||
propagateFromSCC(const SmallPtrSetImpl<Function *> &SCCFunctions,
|
||||
function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
|
||||
function_ref<uint64_t(Function *F)> GetCount,
|
||||
function_ref<void(Function *F, uint64_t)> AddToCount) {
|
||||
|
||||
SmallVector<CallSite, 16> CallSites;
|
||||
|
||||
// Gather all callsites in the SCC.
|
||||
auto GatherCallSites = [&]() {
|
||||
for (auto *F : SCCFunctions) {
|
||||
assert(F && !F->isDeclaration());
|
||||
for (auto &I : instructions(F)) {
|
||||
if (auto CS = CallSite(&I)) {
|
||||
CallSites.push_back(CS);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
GatherCallSites();
|
||||
|
||||
// Partition callsites so that the callsites that call functions in the same
|
||||
// SCC come first.
|
||||
auto Mid = partition(CallSites, [&](CallSite &CS) {
|
||||
auto *Callee = CS.getCalledFunction();
|
||||
if (Callee)
|
||||
return SCCFunctions.count(Callee);
|
||||
// FIXME: Use the !callees metadata to propagate counts through indirect
|
||||
// calls.
|
||||
return 0U;
|
||||
});
|
||||
|
||||
// For functions in the same SCC, update the counts in two steps:
|
||||
// 1. Compute the additional count for each function by propagating the counts
|
||||
// along all incoming edges to the function that originate from the same SCC
|
||||
// and summing them up.
|
||||
// 2. Add the additional counts to the functions in the SCC.
|
||||
// This ensures that the order of
|
||||
// traversal of functions within the SCC doesn't change the final result.
|
||||
|
||||
DenseMap<Function *, uint64_t> AdditionalCounts;
|
||||
for (auto It = CallSites.begin(); It != Mid; It++) {
|
||||
auto &CS = *It;
|
||||
auto RelFreq = GetCallSiteRelFreq(CS);
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
Function *Caller = CS.getCaller();
|
||||
RelFreq *= Scaled64(GetCount(Caller), 0);
|
||||
uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
|
||||
AdditionalCounts[Callee] += AdditionalCount;
|
||||
}
|
||||
|
||||
// Update the counts for the functions in the SCC.
|
||||
for (auto &Entry : AdditionalCounts)
|
||||
AddToCount(Entry.first, Entry.second);
|
||||
|
||||
// Now update the counts for functions not in SCC.
|
||||
for (auto It = Mid; It != CallSites.end(); It++) {
|
||||
auto &CS = *It;
|
||||
auto Weight = GetCallSiteRelFreq(CS);
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
Function *Caller = CS.getCaller();
|
||||
Weight *= Scaled64(GetCount(Caller), 0);
|
||||
AddToCount(Callee, Weight.toInt<uint64_t>());
|
||||
}
|
||||
}
|
||||
|
||||
/// Propgate synthetic entry counts on a callgraph.
|
||||
///
|
||||
/// This performs a reverse post-order traversal of the callgraph SCC. For each
|
||||
/// SCC, it first propagates the entry counts to the functions within the SCC
|
||||
/// through call edges and updates them in one shot. Then the entry counts are
|
||||
/// propagated to functions outside the SCC.
|
||||
void llvm::propagateSyntheticCounts(
|
||||
const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
|
||||
function_ref<uint64_t(Function *F)> GetCount,
|
||||
function_ref<void(Function *F, uint64_t)> AddToCount) {
|
||||
|
||||
SmallVector<SmallPtrSet<Function *, 8>, 16> SCCs;
|
||||
for (auto I = scc_begin(&CG); !I.isAtEnd(); ++I) {
|
||||
auto SCC = *I;
|
||||
|
||||
SmallPtrSet<Function *, 8> SCCFunctions;
|
||||
for (auto *Node : SCC) {
|
||||
Function *F = Node->getFunction();
|
||||
if (F && !F->isDeclaration()) {
|
||||
SCCFunctions.insert(F);
|
||||
}
|
||||
}
|
||||
SCCs.push_back(SCCFunctions);
|
||||
}
|
||||
|
||||
for (auto &SCCFunctions : reverse(SCCs))
|
||||
propagateFromSCC(SCCFunctions, GetCallSiteRelFreq, GetCount, AddToCount);
|
||||
}
|
@ -1320,10 +1320,11 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
|
||||
setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
|
||||
}
|
||||
|
||||
void Function::setEntryCount(uint64_t Count,
|
||||
void Function::setEntryCount(uint64_t Count, bool Synthetic,
|
||||
const DenseSet<GlobalValue::GUID> *S) {
|
||||
MDBuilder MDB(getContext());
|
||||
setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S));
|
||||
setMetadata(LLVMContext::MD_prof,
|
||||
MDB.createFunctionEntryCount(Count, Synthetic, S));
|
||||
}
|
||||
|
||||
Optional<uint64_t> Function::getEntryCount() const {
|
||||
|
@ -58,10 +58,14 @@ MDNode *MDBuilder::createUnpredictable() {
|
||||
}
|
||||
|
||||
MDNode *MDBuilder::createFunctionEntryCount(
|
||||
uint64_t Count, const DenseSet<GlobalValue::GUID> *Imports) {
|
||||
uint64_t Count, bool Synthetic,
|
||||
const DenseSet<GlobalValue::GUID> *Imports) {
|
||||
Type *Int64Ty = Type::getInt64Ty(Context);
|
||||
SmallVector<Metadata *, 8> Ops;
|
||||
Ops.push_back(createString("function_entry_count"));
|
||||
if (Synthetic)
|
||||
Ops.push_back(createString("synthetic_function_entry_count"));
|
||||
else
|
||||
Ops.push_back(createString("function_entry_count"));
|
||||
Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
|
||||
if (Imports) {
|
||||
SmallVector<GlobalValue::GUID, 2> OrderID(Imports->begin(), Imports->end());
|
||||
|
@ -1695,8 +1695,11 @@ void Verifier::verifyFunctionMetadata(
|
||||
"expected string with name of the !prof annotation", MD);
|
||||
MDString *MDS = cast<MDString>(MD->getOperand(0));
|
||||
StringRef ProfName = MDS->getString();
|
||||
Assert(ProfName.equals("function_entry_count"),
|
||||
"first operand should be 'function_entry_count'", MD);
|
||||
Assert(ProfName.equals("function_entry_count") ||
|
||||
ProfName.equals("synthetic_function_entry_count"),
|
||||
"first operand should be 'function_entry_count'"
|
||||
" or 'synthetic_function_entry_count'",
|
||||
MD);
|
||||
|
||||
// Check second operand.
|
||||
Assert(MD->getOperand(1) != nullptr, "second operand should not be null",
|
||||
|
@ -80,6 +80,7 @@
|
||||
#include "llvm/Transforms/IPO/PartialInlining.h"
|
||||
#include "llvm/Transforms/IPO/SCCP.h"
|
||||
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
|
||||
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
|
||||
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
|
||||
#include "llvm/Transforms/InstCombine/InstCombine.h"
|
||||
#include "llvm/Transforms/InstrProfiling.h"
|
||||
@ -176,6 +177,11 @@ static cl::opt<bool> EnableGVNSink(
|
||||
"enable-npm-gvn-sink", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
|
||||
|
||||
static cl::opt<bool> EnableSyntheticCounts(
|
||||
"enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
|
||||
cl::desc("Run synthetic function entry count generation "
|
||||
"pass"));
|
||||
|
||||
static Regex DefaultAliasRegex(
|
||||
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
|
||||
|
||||
@ -622,6 +628,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
||||
MPM.addPass(PGOIndirectCallPromotion(false, false));
|
||||
}
|
||||
|
||||
// Synthesize function entry counts for non-PGO compilation.
|
||||
if (EnableSyntheticCounts && !PGOOpt)
|
||||
MPM.addPass(SyntheticCountsPropagation());
|
||||
|
||||
// Require the GlobalsAA analysis for the module so we can query it within
|
||||
// the CGSCC pipeline.
|
||||
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
|
||||
|
@ -73,6 +73,7 @@ MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
|
||||
MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
|
||||
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
|
||||
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
|
||||
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
|
||||
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
|
||||
MODULE_PASS("verify", VerifierPass())
|
||||
#undef MODULE_PASS
|
||||
|
@ -29,6 +29,7 @@ add_llvm_library(LLVMipo
|
||||
SampleProfile.cpp
|
||||
StripDeadPrototypes.cpp
|
||||
StripSymbols.cpp
|
||||
SyntheticCountsPropagation.cpp
|
||||
ThinLTOBitcodeWriter.cpp
|
||||
WholeProgramDevirt.cpp
|
||||
|
||||
|
@ -1467,7 +1467,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
|
||||
// Sets the GUIDs that are inlined in the profiled binary. This is used
|
||||
// for ThinLink to make correct liveness analysis, and also make the IR
|
||||
// match the profiled binary before annotation.
|
||||
F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);
|
||||
F.setEntryCount(Samples->getHeadSamples() + 1, false, &InlinedGUIDs);
|
||||
|
||||
// Compute dominance and loop info needed for propagation.
|
||||
computeDominanceAndLoopInfo(F);
|
||||
|
127
lib/Transforms/IPO/SyntheticCountsPropagation.cpp
Normal file
127
lib/Transforms/IPO/SyntheticCountsPropagation.cpp
Normal file
@ -0,0 +1,127 @@
|
||||
//=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements a transformation that synthesizes entry counts for
|
||||
// functions and attaches !prof metadata to functions with the synthesized
|
||||
// counts. The presence of !prof metadata with counter name set to
|
||||
// 'synthesized_function_entry_count' indicate that the value of the counter is
|
||||
// an estimation of the likely execution count of the function. This transform
|
||||
// is applied only in non PGO mode as functions get 'real' profile-based
|
||||
// function entry counts in the PGO mode.
|
||||
//
|
||||
// The transformation works by first assigning some initial values to the entry
|
||||
// counts of all functions and then doing a top-down traversal of the
|
||||
// callgraph-scc to propagate the counts. For each function the set of callsites
|
||||
// and their relative block frequency is gathered. The relative block frequency
|
||||
// multiplied by the entry count of the caller and added to the callee's entry
|
||||
// count. For non-trivial SCCs, the new counts are computed from the previous
|
||||
// counts and updated in one shot.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Analysis/SyntheticCountsUtils.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
using Scaled64 = ScaledNumber<uint64_t>;
|
||||
|
||||
#define DEBUG_TYPE "synthetic-counts-propagation"
|
||||
|
||||
/// Initial synthetic count assigned to functions.
|
||||
static cl::opt<int>
|
||||
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("Initial value of synthetic entry count."));
|
||||
|
||||
/// Initial synthetic count assigned to inline functions.
|
||||
static cl::opt<int> InlineSyntheticCount(
|
||||
"inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
|
||||
cl::desc("Initial synthetic entry count for inline functions."));
|
||||
|
||||
/// Initial synthetic count assigned to cold functions.
|
||||
static cl::opt<int> ColdSyntheticCount(
|
||||
"cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
|
||||
cl::desc("Initial synthetic entry count for cold functions."));
|
||||
|
||||
// Assign initial synthetic entry counts to functions.
|
||||
static void
|
||||
initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) {
|
||||
auto MayHaveIndirectCalls = [](Function &F) {
|
||||
for (auto *U : F.users()) {
|
||||
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
for (Function &F : M) {
|
||||
uint64_t InitialCount = InitialSyntheticCount;
|
||||
if (F.isDeclaration())
|
||||
continue;
|
||||
if (F.hasFnAttribute(Attribute::AlwaysInline) ||
|
||||
F.hasFnAttribute(Attribute::InlineHint)) {
|
||||
// Use a higher value for inline functions to account for the fact that
|
||||
// these are usually beneficial to inline.
|
||||
InitialCount = InlineSyntheticCount;
|
||||
} else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
|
||||
// Local functions without inline hints get counts only through
|
||||
// propagation.
|
||||
InitialCount = 0;
|
||||
} else if (F.hasFnAttribute(Attribute::Cold) ||
|
||||
F.hasFnAttribute(Attribute::NoInline)) {
|
||||
// Use a lower value for noinline and cold functions.
|
||||
InitialCount = ColdSyntheticCount;
|
||||
}
|
||||
SetCount(&F, InitialCount);
|
||||
}
|
||||
}
|
||||
|
||||
PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
|
||||
ModuleAnalysisManager &MAM) {
|
||||
FunctionAnalysisManager &FAM =
|
||||
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
||||
DenseMap<Function *, uint64_t> Counts;
|
||||
// Set initial entry counts.
|
||||
initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; });
|
||||
|
||||
// Compute the relative block frequency for a callsite. Use scaled numbers
|
||||
// and not integers since the relative block frequency could be less than 1.
|
||||
auto GetCallSiteRelFreq = [&](CallSite CS) {
|
||||
Function *Caller = CS.getCaller();
|
||||
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
|
||||
BasicBlock *CSBB = CS.getInstruction()->getParent();
|
||||
Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
|
||||
Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0);
|
||||
BBFreq /= EntryFreq;
|
||||
return BBFreq;
|
||||
};
|
||||
|
||||
CallGraph CG(M);
|
||||
// Propgate the entry counts on the callgraph.
|
||||
propagateSyntheticCounts(
|
||||
CG, GetCallSiteRelFreq, [&](Function *F) { return Counts[F]; },
|
||||
[&](Function *F, uint64_t New) { Counts[F] += New; });
|
||||
|
||||
// Set the counts as metadata.
|
||||
for (auto Entry : Counts)
|
||||
Entry.first->setEntryCount(Entry.second, true);
|
||||
|
||||
return PreservedAnalyses::all();
|
||||
}
|
79
test/Transforms/SyntheticCountsPropagation/initial.ll
Normal file
79
test/Transforms/SyntheticCountsPropagation/initial.ll
Normal file
@ -0,0 +1,79 @@
|
||||
; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: define void @foo()
|
||||
; CHECK: !prof ![[COUNT1:[0-9]+]]
|
||||
define void @foo() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @foo_inline() #0
|
||||
; CHECK: !prof ![[COUNT2:[0-9]+]]
|
||||
define void @foo_inline() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @foo_always_inline() #1
|
||||
; CHECK: !prof ![[COUNT2]]
|
||||
define void @foo_always_inline() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @foo_cold() #2
|
||||
; CHECK: !prof ![[COUNT3:[0-9]+]]
|
||||
define void @foo_cold() #2 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @foo_noinline() #3
|
||||
; CHECK: !prof ![[COUNT3]]
|
||||
define void @foo_noinline() #3 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define internal void @foo_local()
|
||||
; CHECK: !prof ![[COUNT4:[0-9]+]]
|
||||
define internal void @foo_local() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define internal void @foo_local_escaped()
|
||||
; CHECK: !prof ![[COUNT1]]
|
||||
define internal void @foo_local_escaped() {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @ext(void ()*)
|
||||
|
||||
define void @bar() {
|
||||
call void @ext(void ()* nonnull @foo_local_escaped)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define internal void @foo_local_inline() #0
|
||||
; CHECK: !prof ![[COUNT2]]
|
||||
define internal void @foo_local_inline() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define internal void @foo_local_cold() #2
|
||||
; CHECK: !prof ![[COUNT4]]
|
||||
define internal void @foo_local_cold() #2 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define linkonce void @foo_linkonce()
|
||||
; CHECK: !prof ![[COUNT1]]
|
||||
define linkonce void @foo_linkonce() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 10}
|
||||
; CHECK: ![[COUNT2]] = !{!"synthetic_function_entry_count", i64 15}
|
||||
; CHECK: ![[COUNT3]] = !{!"synthetic_function_entry_count", i64 5}
|
||||
; CHECK: ![[COUNT4]] = !{!"synthetic_function_entry_count", i64 0}
|
||||
|
||||
attributes #0 = {inlinehint}
|
||||
attributes #1 = {alwaysinline}
|
||||
attributes #2 = {cold}
|
||||
attributes #3 = {noinline}
|
||||
|
50
test/Transforms/SyntheticCountsPropagation/prop.ll
Normal file
50
test/Transforms/SyntheticCountsPropagation/prop.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: define void @level1a(i32 %n)
|
||||
; CHECK: !prof ![[COUNT1:[0-9]+]]
|
||||
define void @level1a(i32 %n) {
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %n, 10
|
||||
br i1 %cmp, label %exit, label %loop
|
||||
loop:
|
||||
%i = phi i32 [%n, %entry], [%i1, %loop]
|
||||
call void @level2a(i32 %n)
|
||||
%i1 = sub i32 %i, 1
|
||||
%cmp2 = icmp eq i32 %i1, 0
|
||||
br i1 %cmp2, label %exit, label %loop, !prof !1
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @level2a(i32 %n)
|
||||
; CHECK: !prof ![[COUNT2:[0-9]+]]
|
||||
define void @level2a(i32 %n) {
|
||||
call void @level2b(i32 %n)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @level2b(i32 %n)
|
||||
; CHECK: !prof ![[COUNT2]]
|
||||
define void @level2b(i32 %n) {
|
||||
entry:
|
||||
call void @level2a(i32 %n)
|
||||
%cmp = icmp eq i32 %n, 0
|
||||
br i1 %cmp, label %then, label %else, !prof !2
|
||||
then:
|
||||
call void @level3a(i32 %n)
|
||||
br label %else
|
||||
else:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define internal void @level3a(i32 %n)
|
||||
; CHECK: !prof ![[COUNT3:[0-9]+]]
|
||||
define internal void @level3a(i32 %n) {
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = !{!"branch_weights", i32 1, i32 99}
|
||||
!2 = !{!"branch_weights", i32 1, i32 1}
|
||||
; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 10}
|
||||
; CHECK: ![[COUNT2]] = !{!"synthetic_function_entry_count", i64 520}
|
||||
; CHECK: ![[COUNT3]] = !{!"synthetic_function_entry_count", i64 260}
|
19
test/Transforms/SyntheticCountsPropagation/scc.ll
Normal file
19
test/Transforms/SyntheticCountsPropagation/scc.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: define void @foo()
|
||||
; CHECK: !prof ![[COUNT1:[0-9]+]]
|
||||
define void @foo() {
|
||||
call void @bar()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @bar() #0
|
||||
; CHECK: !prof ![[COUNT1]]
|
||||
define void @bar() #0 {
|
||||
call void @foo()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = {inlinehint}
|
||||
|
||||
; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 25}
|
Loading…
Reference in New Issue
Block a user