mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[ThinLTO] Compute synthetic function entry count
Summary: This patch computes the synthetic function entry count on the whole program callgraph (based on module summary) and writes the entry counts to the summary. After function importing, this count gets attached to the IR as metadata. Since it adds a new field to the summary, this bumps up the version. Reviewers: tejohnson Subscribers: mehdi_amini, inglorion, llvm-commits Differential Revision: https://reviews.llvm.org/D43521 llvm-svn: 349076
This commit is contained in:
parent
8fb4b5dbb4
commit
8482e48d19
@ -501,8 +501,9 @@ public:
|
||||
FunctionSummary::GVFlags(
|
||||
GlobalValue::LinkageTypes::AvailableExternallyLinkage,
|
||||
/*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
|
||||
0, FunctionSummary::FFlags{}, std::vector<ValueInfo>(),
|
||||
std::move(Edges), std::vector<GlobalValue::GUID>(),
|
||||
/*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
|
||||
std::vector<ValueInfo>(), std::move(Edges),
|
||||
std::vector<GlobalValue::GUID>(),
|
||||
std::vector<FunctionSummary::VFuncId>(),
|
||||
std::vector<FunctionSummary::VFuncId>(),
|
||||
std::vector<FunctionSummary::ConstVCall>(),
|
||||
@ -520,6 +521,11 @@ private:
|
||||
/// Function summary specific flags.
|
||||
FFlags FunFlags;
|
||||
|
||||
/// The synthesized entry count of the function.
|
||||
/// This is only populated during ThinLink phase and remains unused while
|
||||
/// generating per-module summaries.
|
||||
uint64_t EntryCount = 0;
|
||||
|
||||
/// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function.
|
||||
std::vector<EdgeTy> CallGraphEdgeList;
|
||||
|
||||
@ -527,14 +533,15 @@ private:
|
||||
|
||||
public:
|
||||
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
|
||||
std::vector<ValueInfo> Refs, std::vector<EdgeTy> CGEdges,
|
||||
uint64_t EntryCount, std::vector<ValueInfo> Refs,
|
||||
std::vector<EdgeTy> CGEdges,
|
||||
std::vector<GlobalValue::GUID> TypeTests,
|
||||
std::vector<VFuncId> TypeTestAssumeVCalls,
|
||||
std::vector<VFuncId> TypeCheckedLoadVCalls,
|
||||
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
|
||||
std::vector<ConstVCall> TypeCheckedLoadConstVCalls)
|
||||
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
|
||||
InstCount(NumInsts), FunFlags(FunFlags),
|
||||
InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
|
||||
CallGraphEdgeList(std::move(CGEdges)) {
|
||||
if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
|
||||
!TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
|
||||
@ -559,6 +566,12 @@ public:
|
||||
/// Get the instruction count recorded for this function.
|
||||
unsigned instCount() const { return InstCount; }
|
||||
|
||||
/// Get the synthetic entry count for this function.
|
||||
uint64_t entryCount() const { return EntryCount; }
|
||||
|
||||
/// Set the synthetic entry count for this function.
|
||||
void setEntryCount(uint64_t EC) { EntryCount = EC; }
|
||||
|
||||
/// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
|
||||
ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
|
||||
|
||||
@ -802,6 +815,9 @@ private:
|
||||
/// considered live.
|
||||
bool WithGlobalValueDeadStripping = false;
|
||||
|
||||
/// Indicates that summary-based synthetic entry count propagation has run
|
||||
bool HasSyntheticEntryCounts = false;
|
||||
|
||||
/// Indicates that distributed backend should skip compilation of the
|
||||
/// module. Flag is suppose to be set by distributed ThinLTO indexing
|
||||
/// when it detected that the module is not needed during the final
|
||||
@ -914,6 +930,9 @@ public:
|
||||
WithGlobalValueDeadStripping = true;
|
||||
}
|
||||
|
||||
bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
|
||||
void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
|
||||
|
||||
bool skipModuleByDistributedBackend() const {
|
||||
return SkipModuleByDistributedBackend;
|
||||
}
|
||||
@ -1158,6 +1177,7 @@ public:
|
||||
/// GraphTraits definition to build SCC for the index
|
||||
template <> struct GraphTraits<ValueInfo> {
|
||||
typedef ValueInfo NodeRef;
|
||||
using EdgeRef = FunctionSummary::EdgeTy &;
|
||||
|
||||
static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) {
|
||||
return P.first;
|
||||
@ -1166,6 +1186,8 @@ template <> struct GraphTraits<ValueInfo> {
|
||||
mapped_iterator<std::vector<FunctionSummary::EdgeTy>::iterator,
|
||||
decltype(&valueInfoFromEdge)>;
|
||||
|
||||
using ChildEdgeIteratorType = std::vector<FunctionSummary::EdgeTy>::iterator;
|
||||
|
||||
static NodeRef getEntryNode(ValueInfo V) { return V; }
|
||||
|
||||
static ChildIteratorType child_begin(NodeRef N) {
|
||||
@ -1187,6 +1209,26 @@ template <> struct GraphTraits<ValueInfo> {
|
||||
cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
|
||||
return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge);
|
||||
}
|
||||
|
||||
static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
|
||||
if (!N.getSummaryList().size()) // handle external function
|
||||
return FunctionSummary::ExternalNode.CallGraphEdgeList.begin();
|
||||
|
||||
FunctionSummary *F =
|
||||
cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
|
||||
return F->CallGraphEdgeList.begin();
|
||||
}
|
||||
|
||||
static ChildEdgeIteratorType child_edge_end(NodeRef N) {
|
||||
if (!N.getSummaryList().size()) // handle external function
|
||||
return FunctionSummary::ExternalNode.CallGraphEdgeList.end();
|
||||
|
||||
FunctionSummary *F =
|
||||
cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
|
||||
return F->CallGraphEdgeList.end();
|
||||
}
|
||||
|
||||
static NodeRef edge_dest(EdgeRef E) { return E.first; }
|
||||
};
|
||||
|
||||
template <>
|
||||
|
@ -224,7 +224,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
|
||||
GlobalValueSummary::GVFlags(
|
||||
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
|
||||
FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
|
||||
0, FunctionSummary::FFlags{}, Refs,
|
||||
/*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
|
||||
ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
|
||||
std::move(FSum.TypeTestAssumeVCalls),
|
||||
std::move(FSum.TypeCheckedLoadVCalls),
|
||||
|
17
include/llvm/LTO/SummaryBasedOptimizations.h
Normal file
17
include/llvm/LTO/SummaryBasedOptimizations.h
Normal file
@ -0,0 +1,17 @@
|
||||
//=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
|
||||
#define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
|
||||
namespace llvm {
|
||||
class ModuleSummaryIndex;
|
||||
void computeSyntheticCounts(ModuleSummaryIndex &Index);
|
||||
|
||||
} // namespace llvm
|
||||
#endif
|
@ -113,6 +113,10 @@ public:
|
||||
bool renameModuleForThinLTO(
|
||||
Module &M, const ModuleSummaryIndex &Index,
|
||||
SetVector<GlobalValue *> *GlobalsToImport = nullptr);
|
||||
|
||||
/// Compute synthetic function entry counts.
|
||||
void computeSyntheticCounts(ModuleSummaryIndex &Index);
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
@ -396,9 +396,9 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
|
||||
// Don't try to import functions with noinline attribute.
|
||||
F.getAttributes().hasFnAttribute(Attribute::NoInline)};
|
||||
auto FuncSummary = llvm::make_unique<FunctionSummary>(
|
||||
Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.takeVector(),
|
||||
TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
|
||||
TypeCheckedLoadVCalls.takeVector(),
|
||||
Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
|
||||
CallGraphEdges.takeVector(), TypeTests.takeVector(),
|
||||
TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
|
||||
TypeTestAssumeConstVCalls.takeVector(),
|
||||
TypeCheckedLoadConstVCalls.takeVector());
|
||||
if (NonRenamableLocal)
|
||||
@ -509,14 +509,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
|
||||
if (Function *F = dyn_cast<Function>(GV)) {
|
||||
std::unique_ptr<FunctionSummary> Summary =
|
||||
llvm::make_unique<FunctionSummary>(
|
||||
GVFlags, 0,
|
||||
GVFlags, /*InstCount=*/0,
|
||||
FunctionSummary::FFlags{
|
||||
F->hasFnAttribute(Attribute::ReadNone),
|
||||
F->hasFnAttribute(Attribute::ReadOnly),
|
||||
F->hasFnAttribute(Attribute::NoRecurse),
|
||||
F->returnDoesNotAlias(),
|
||||
/* NoInline = */ false},
|
||||
ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
|
||||
/*EntryCount=*/0, ArrayRef<ValueInfo>{},
|
||||
ArrayRef<FunctionSummary::EdgeTy>{},
|
||||
ArrayRef<GlobalValue::GUID>{},
|
||||
ArrayRef<FunctionSummary::VFuncId>{},
|
||||
ArrayRef<FunctionSummary::VFuncId>{},
|
||||
|
@ -14,12 +14,12 @@
|
||||
#include "llvm/Analysis/SyntheticCountsUtils.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/ModuleSummaryIndex.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -29,7 +29,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
|
||||
const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
|
||||
AddCountTy AddCount) {
|
||||
|
||||
SmallPtrSet<NodeRef, 8> SCCNodes;
|
||||
DenseSet<NodeRef> SCCNodes;
|
||||
SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
|
||||
|
||||
for (auto &Node : SCC)
|
||||
@ -111,3 +111,4 @@ void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
|
||||
}
|
||||
|
||||
template class llvm::SyntheticCountsUtils<const CallGraph *>;
|
||||
template class llvm::SyntheticCountsUtils<ModuleSummaryIndex *>;
|
||||
|
@ -7727,8 +7727,8 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
|
||||
return true;
|
||||
|
||||
auto FS = llvm::make_unique<FunctionSummary>(
|
||||
GVFlags, InstCount, FFlags, std::move(Refs), std::move(Calls),
|
||||
std::move(TypeIdInfo.TypeTests),
|
||||
GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs),
|
||||
std::move(Calls), std::move(TypeIdInfo.TypeTests),
|
||||
std::move(TypeIdInfo.TypeTestAssumeVCalls),
|
||||
std::move(TypeIdInfo.TypeCheckedLoadVCalls),
|
||||
std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
|
||||
|
@ -5247,9 +5247,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
||||
}
|
||||
const uint64_t Version = Record[0];
|
||||
const bool IsOldProfileFormat = Version == 1;
|
||||
if (Version < 1 || Version > 5)
|
||||
if (Version < 1 || Version > 6)
|
||||
return error("Invalid summary version " + Twine(Version) +
|
||||
", 1, 2, 3, 4 or 5 expected");
|
||||
". Version should be in the range [1-6].");
|
||||
Record.clear();
|
||||
|
||||
// Keep around the last seen summary to be used when we see an optional
|
||||
@ -5303,6 +5303,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
||||
// 1 bit: SkipModuleByDistributedBackend flag.
|
||||
if (Flags & 0x2)
|
||||
TheIndex.setSkipModuleByDistributedBackend();
|
||||
// 1 bit: HasSyntheticEntryCounts flag.
|
||||
if (Flags & 0x4)
|
||||
TheIndex.setHasSyntheticEntryCounts();
|
||||
break;
|
||||
}
|
||||
case bitc::FS_VALUE_GUID: { // [valueid, refguid]
|
||||
@ -5358,8 +5361,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
||||
IsOldProfileFormat, HasProfile, HasRelBF);
|
||||
setImmutableRefs(Refs, NumImmutableRefs);
|
||||
auto FS = llvm::make_unique<FunctionSummary>(
|
||||
Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
|
||||
std::move(Calls), std::move(PendingTypeTests),
|
||||
Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
|
||||
std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
|
||||
std::move(PendingTypeTestAssumeVCalls),
|
||||
std::move(PendingTypeCheckedLoadVCalls),
|
||||
std::move(PendingTypeTestAssumeConstVCalls),
|
||||
@ -5437,18 +5440,25 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
||||
uint64_t RawFlags = Record[2];
|
||||
unsigned InstCount = Record[3];
|
||||
uint64_t RawFunFlags = 0;
|
||||
uint64_t EntryCount = 0;
|
||||
unsigned NumRefs = Record[4];
|
||||
unsigned NumImmutableRefs = 0;
|
||||
int RefListStartIndex = 5;
|
||||
|
||||
if (Version >= 4) {
|
||||
RawFunFlags = Record[4];
|
||||
NumRefs = Record[5];
|
||||
RefListStartIndex = 6;
|
||||
size_t NumRefsIndex = 5;
|
||||
if (Version >= 5) {
|
||||
NumImmutableRefs = Record[6];
|
||||
RefListStartIndex = 7;
|
||||
if (Version >= 6) {
|
||||
NumRefsIndex = 6;
|
||||
EntryCount = Record[5];
|
||||
RefListStartIndex = 8;
|
||||
}
|
||||
NumImmutableRefs = Record[RefListStartIndex - 1];
|
||||
}
|
||||
NumRefs = Record[NumRefsIndex];
|
||||
}
|
||||
|
||||
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
|
||||
@ -5464,8 +5474,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
||||
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
|
||||
setImmutableRefs(Refs, NumImmutableRefs);
|
||||
auto FS = llvm::make_unique<FunctionSummary>(
|
||||
Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
|
||||
std::move(Edges), std::move(PendingTypeTests),
|
||||
Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
|
||||
std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
|
||||
std::move(PendingTypeTestAssumeVCalls),
|
||||
std::move(PendingTypeCheckedLoadVCalls),
|
||||
std::move(PendingTypeTestAssumeConstVCalls),
|
||||
|
@ -3601,7 +3601,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
|
||||
// Current version for the summary.
|
||||
// This is bumped whenever we introduce changes in the way some record are
|
||||
// interpreted, like flags for instance.
|
||||
static const uint64_t INDEX_VERSION = 5;
|
||||
static const uint64_t INDEX_VERSION = 6;
|
||||
|
||||
/// Emit the per-module summary section alongside the rest of
|
||||
/// the module's bitcode.
|
||||
@ -3732,6 +3732,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
Flags |= 0x1;
|
||||
if (Index.skipModuleByDistributedBackend())
|
||||
Flags |= 0x2;
|
||||
if (Index.hasSyntheticEntryCounts())
|
||||
Flags |= 0x4;
|
||||
Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
|
||||
|
||||
for (const auto &GVI : valueIds()) {
|
||||
@ -3747,6 +3749,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
|
||||
// numrefs x valueid, n x (valueid)
|
||||
@ -3861,6 +3864,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
|
||||
NameVals.push_back(FS->instCount());
|
||||
NameVals.push_back(getEncodedFFlags(FS->fflags()));
|
||||
NameVals.push_back(FS->entryCount());
|
||||
|
||||
// Fill in below
|
||||
NameVals.push_back(0); // numrefs
|
||||
NameVals.push_back(0); // immutablerefcnt
|
||||
@ -3875,8 +3880,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
ImmutableRefCnt++;
|
||||
Count++;
|
||||
}
|
||||
NameVals[5] = Count;
|
||||
NameVals[6] = ImmutableRefCnt;
|
||||
NameVals[6] = Count;
|
||||
NameVals[7] = ImmutableRefCnt;
|
||||
|
||||
bool HasProfileData = false;
|
||||
for (auto &EI : FS->calls()) {
|
||||
|
@ -4,6 +4,7 @@ add_llvm_library(LLVMLTO
|
||||
LTOBackend.cpp
|
||||
LTOModule.cpp
|
||||
LTOCodeGenerator.cpp
|
||||
SummaryBasedOptimizations.cpp
|
||||
UpdateCompilerUsed.cpp
|
||||
ThinLTOCodeGenerator.cpp
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "llvm/IR/Mangler.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/LTO/LTOBackend.h"
|
||||
#include "llvm/LTO/SummaryBasedOptimizations.h"
|
||||
#include "llvm/Linker/IRMover.h"
|
||||
#include "llvm/Object/IRObjectFile.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
@ -42,6 +43,7 @@
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
||||
#include "llvm/Transforms/Utils/SplitModule.h"
|
||||
|
||||
#include <set>
|
||||
@ -1170,6 +1172,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
|
||||
if (!ModuleToDefinedGVSummaries.count(Mod.first))
|
||||
ModuleToDefinedGVSummaries.try_emplace(Mod.first);
|
||||
|
||||
// Synthesize entry counts for functions in the CombinedIndex.
|
||||
computeSyntheticCounts(ThinLTO.CombinedIndex);
|
||||
|
||||
StringMap<FunctionImporter::ImportMapTy> ImportLists(
|
||||
ThinLTO.ModuleMap.size());
|
||||
StringMap<FunctionImporter::ExportSetTy> ExportLists(
|
||||
|
80
lib/LTO/SummaryBasedOptimizations.cpp
Normal file
80
lib/LTO/SummaryBasedOptimizations.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements optimizations that are based on the module summaries.
|
||||
// These optimizations are performed during the thinlink phase of the
|
||||
// compilation.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/LTO/SummaryBasedOptimizations.h"
|
||||
#include "llvm/Analysis/SyntheticCountsUtils.h"
|
||||
#include "llvm/IR/ModuleSummaryIndex.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
cl::opt<bool> ThinLTOSynthesizeEntryCounts(
|
||||
"thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
|
||||
cl::desc("Synthesize entry counts based on the summary"));
|
||||
|
||||
extern cl::opt<int> InitialSyntheticCount;
|
||||
|
||||
static void initializeCounts(ModuleSummaryIndex &Index) {
|
||||
auto Root = Index.calculateCallGraphRoot();
|
||||
// Root is a fake node. All its successors are the actual roots of the
|
||||
// callgraph.
|
||||
// FIXME: This initializes the entry counts of only the root nodes. This makes
|
||||
// sense when compiling a binary with ThinLTO, but for libraries any of the
|
||||
// non-root nodes could be called from outside.
|
||||
for (auto &C : Root.calls()) {
|
||||
auto &V = C.first;
|
||||
for (auto &GVS : V.getSummaryList()) {
|
||||
auto S = GVS.get()->getBaseObject();
|
||||
auto *F = cast<FunctionSummary>(S);
|
||||
F->setEntryCount(InitialSyntheticCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
|
||||
if (!ThinLTOSynthesizeEntryCounts)
|
||||
return;
|
||||
|
||||
using Scaled64 = ScaledNumber<uint64_t>;
|
||||
initializeCounts(Index);
|
||||
auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) {
|
||||
return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift);
|
||||
};
|
||||
auto GetEntryCount = [](ValueInfo V) {
|
||||
if (V.getSummaryList().size()) {
|
||||
auto S = V.getSummaryList().front().get()->getBaseObject();
|
||||
auto *F = cast<FunctionSummary>(S);
|
||||
return F->entryCount();
|
||||
} else {
|
||||
return UINT64_C(0);
|
||||
}
|
||||
};
|
||||
auto AddToEntryCount = [](ValueInfo V, uint64_t New) {
|
||||
if (!V.getSummaryList().size())
|
||||
return;
|
||||
for (auto &GVS : V.getSummaryList()) {
|
||||
auto S = GVS.get()->getBaseObject();
|
||||
auto *F = cast<FunctionSummary>(S);
|
||||
F->setEntryCount(SaturatingAdd(F->entryCount(), New));
|
||||
}
|
||||
};
|
||||
|
||||
// After initializing the counts in initializeCounts above, the counts have to
|
||||
// be propagated across the combined callgraph.
|
||||
// SyntheticCountsUtils::propagate takes care of this propagation on any
|
||||
// callgraph that specialized GraphTraits.
|
||||
SyntheticCountsUtils<ModuleSummaryIndex *>::propagate(
|
||||
&Index, GetCallSiteRelFreq, GetEntryCount, AddToEntryCount);
|
||||
Index.setHasSyntheticEntryCounts();
|
||||
}
|
@ -33,6 +33,7 @@
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/IRReader/IRReader.h"
|
||||
#include "llvm/LTO/LTO.h"
|
||||
#include "llvm/LTO/SummaryBasedOptimizations.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include "llvm/Object/IRObjectFile.h"
|
||||
#include "llvm/Support/CachePruning.h"
|
||||
@ -883,6 +884,9 @@ void ThinLTOCodeGenerator::run() {
|
||||
// Compute "dead" symbols, we don't want to import/export these!
|
||||
computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
|
||||
|
||||
// Synthesize entry counts for functions in the combined index.
|
||||
computeSyntheticCounts(*Index);
|
||||
|
||||
// Collect the import/export lists for all modules from the call-graph in the
|
||||
// combined index.
|
||||
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
|
||||
|
@ -46,7 +46,7 @@ using ProfileCount = Function::ProfileCount;
|
||||
#define DEBUG_TYPE "synthetic-counts-propagation"
|
||||
|
||||
/// Initial synthetic count assigned to functions.
|
||||
static cl::opt<int>
|
||||
cl::opt<int>
|
||||
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("Initial value of synthetic entry count."));
|
||||
|
@ -203,11 +203,26 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
|
||||
|
||||
void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
|
||||
|
||||
// Check the summaries to see if the symbol gets resolved to a known local
|
||||
// definition.
|
||||
ValueInfo VI;
|
||||
if (GV.hasName()) {
|
||||
VI = ImportIndex.getValueInfo(GV.getGUID());
|
||||
// Set synthetic function entry counts.
|
||||
if (VI && ImportIndex.hasSyntheticEntryCounts()) {
|
||||
if (Function *F = dyn_cast<Function>(&GV)) {
|
||||
if (!F->isDeclaration()) {
|
||||
for (auto &S : VI.getSummaryList()) {
|
||||
FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
|
||||
if (FS->modulePath() == M.getModuleIdentifier()) {
|
||||
F->setEntryCount(Function::ProfileCount(FS->entryCount(),
|
||||
Function::PCT_Synthetic));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check the summaries to see if the symbol gets resolved to a known local
|
||||
// definition.
|
||||
if (VI && VI.isDSOLocal()) {
|
||||
GV.setDSOLocal(true);
|
||||
if (GV.hasDLLImportStorageClass())
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
|
||||
|
||||
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
|
||||
; CHECK: <VERSION op0=5/>
|
||||
; CHECK: <VERSION op0=6/>
|
||||
|
||||
|
||||
|
||||
|
@ -33,7 +33,7 @@
|
||||
; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASID:[0-9]+]] op1=-5751648690987223394/>
|
||||
; COMBINED-NEXT: <VALUE_GUID
|
||||
; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASEEID:[0-9]+]] op1=-1039159065113703048/>
|
||||
; COMBINED-NEXT: <COMBINED {{.*}} op7=[[ALIASID]]/>
|
||||
; COMBINED-NEXT: <COMBINED {{.*}} op8=[[ALIASID]]/>
|
||||
; COMBINED-NEXT: <COMBINED {{.*}}
|
||||
; COMBINED-NEXT: <COMBINED_ALIAS {{.*}} op3=[[ALIASEEID]]
|
||||
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK
|
||||
|
@ -30,7 +30,7 @@
|
||||
; COMBINED-NEXT: <COMBINED
|
||||
; See if the call to func is registered, using the expected hotness type.
|
||||
; op6=2 which is hotnessType::None.
|
||||
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[FUNCID]] op8=2/>
|
||||
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[FUNCID]] op9=2/>
|
||||
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
|
||||
|
||||
; ModuleID = 'thinlto-function-summary-callgraph.ll'
|
||||
|
@ -71,7 +71,7 @@
|
||||
; COMBINED-NEXT: <COMBINED abbrevid=
|
||||
; COMBINED-NEXT: <COMBINED abbrevid=
|
||||
; COMBINED-NEXT: <COMBINED abbrevid=
|
||||
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[HOT1:.*]] op8=3 op9=[[COLD:.*]] op10=1 op11=[[HOT2:.*]] op12=3 op13=[[NONE1:.*]] op14=2 op15=[[HOT3:.*]] op16=3 op17=[[NONE2:.*]] op18=2 op19=[[NONE3:.*]] op20=2/>
|
||||
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[HOT1:.*]] op9=3 op10=[[COLD:.*]] op11=1 op12=[[HOT2:.*]] op13=3 op14=[[NONE1:.*]] op15=2 op16=[[HOT3:.*]] op17=3 op18=[[NONE2:.*]] op19=2 op20=[[NONE3:.*]] op21=2/>
|
||||
; COMBINED_NEXT: <COMBINED abbrevid=
|
||||
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
|
||||
|
||||
|
@ -58,7 +58,7 @@
|
||||
; COMBINED-NEXT: <COMBINED abbrevid=
|
||||
; COMBINED-NEXT: <COMBINED abbrevid=
|
||||
; COMBINED-NEXT: <COMBINED abbrevid=
|
||||
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[NONE1:.*]] op8=0 op9=[[HOT1:.*]] op10=3 op11=[[COLD1:.*]] op12=1 op13=[[NONE2:.*]] op14=0 op15=[[HOT2:.*]] op16=3 op17=[[COLD2:.*]] op18=1 op19=[[NONE3:.*]] op20=0 op21=[[HOT3:.*]] op22=3 op23=[[COLD3:.*]] op24=1/>
|
||||
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[NONE1:.*]] op9=0 op10=[[HOT1:.*]] op11=3 op12=[[COLD1:.*]] op13=1 op14=[[NONE2:.*]] op15=0 op16=[[HOT2:.*]] op17=3 op18=[[COLD2:.*]] op19=1 op20=[[NONE3:.*]] op21=0 op22=[[HOT3:.*]] op23=3 op24=[[COLD3:.*]] op25=1/>
|
||||
; COMBINED_NEXT: <COMBINED abbrevid=
|
||||
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
|
||||
|
||||
|
@ -33,7 +33,7 @@
|
||||
; COMBINED-NEXT: <VALUE_GUID
|
||||
; COMBINED-NEXT: <COMBINED
|
||||
; See if the call to func is registered.
|
||||
; COMBINED-NEXT: <COMBINED {{.*}} op7=[[FUNCID]]/>
|
||||
; COMBINED-NEXT: <COMBINED {{.*}} op8=[[FUNCID]]/>
|
||||
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
|
||||
|
||||
; ModuleID = 'thinlto-function-summary-callgraph.ll'
|
||||
|
21
test/Bitcode/thinlto-synthetic-count-flag.ll
Normal file
21
test/Bitcode/thinlto-synthetic-count-flag.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; REQUIRES: x86-registered-target
|
||||
; RUN: opt -module-summary %s -o %t.o
|
||||
|
||||
; Ensure synthetic entry count flag is not set on distributed index
|
||||
; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
|
||||
; RUN: -r %t.o,glob,plx -compute-dead=false
|
||||
; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=NOSYNTHETIC
|
||||
; NOSYNTHETIC: <FLAGS op0=0/>
|
||||
|
||||
; Ensure synthetic entry count flag is set on distributed index
|
||||
; when option used to enable synthetic count propagation
|
||||
; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
|
||||
; RUN: -r %t.o,glob,plx -thinlto-synthesize-entry-counts \
|
||||
; RUN: -compute-dead=false
|
||||
; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=HASSYNTHETIC
|
||||
; HASSYNTHETIC: <FLAGS op0=4/>
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@glob = global i32 0
|
9
test/ThinLTO/X86/Inputs/function_entry_count.ll
Normal file
9
test/ThinLTO/X86/Inputs/function_entry_count.ll
Normal file
@ -0,0 +1,9 @@
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
declare void @h();
|
||||
|
||||
define void @g() {
|
||||
call void @h();
|
||||
ret void
|
||||
}
|
44
test/ThinLTO/X86/function_entry_count.ll
Normal file
44
test/ThinLTO/X86/function_entry_count.ll
Normal file
@ -0,0 +1,44 @@
|
||||
; RUN: opt -thinlto-bc %s -write-relbf-to-summary -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc
|
||||
; RUN: opt -thinlto-bc %p/Inputs/function_entry_count.ll -write-relbf-to-summary -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc
|
||||
|
||||
; First perform the thin link on the normal bitcode file.
|
||||
; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t.o -save-temps -thinlto-synthesize-entry-counts \
|
||||
; RUN: -r=%t1.bc,g, \
|
||||
; RUN: -r=%t1.bc,f,px \
|
||||
; RUN: -r=%t1.bc,h,px \
|
||||
; RUN: -r=%t2.bc,h, \
|
||||
; RUN: -r=%t2.bc,g,px
|
||||
; RUN: llvm-dis -o - %t.o.1.3.import.bc | FileCheck %s
|
||||
|
||||
; RUN: llvm-lto -thinlto-action=run -thinlto-synthesize-entry-counts -exported-symbol=f \
|
||||
; RUN: -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. %t1.bc %t2.bc
|
||||
; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
|
||||
|
||||
; CHECK: define void @h() !prof ![[PROF2:[0-9]+]]
|
||||
; CHECK: define void @f(i32 %n) !prof ![[PROF1:[0-9]+]]
|
||||
; CHECK: define available_externally void @g() !prof ![[PROF2]]
|
||||
; CHECK-DAG: ![[PROF1]] = !{!"synthetic_function_entry_count", i64 10}
|
||||
; CHECK-DAG: ![[PROF2]] = !{!"synthetic_function_entry_count", i64 198}
|
||||
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
declare void @g();
|
||||
|
||||
define void @h() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f(i32 %n) {
|
||||
entry:
|
||||
%cmp = icmp slt i32 %n, 1
|
||||
br i1 %cmp, label %exit, label %loop
|
||||
loop:
|
||||
%n1 = phi i32 [%n, %entry], [%n2, %loop]
|
||||
call void @g()
|
||||
%n2 = sub i32 %n1, 1
|
||||
%cmp2 = icmp slt i32 %n, 1
|
||||
br i1 %cmp2, label %exit, label %loop
|
||||
exit:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user