mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
Add an internalization step to the ThinLTOCodeGenerator
Keeping as much as possible internal/private is known to help the optimizer. Let's try to benefit from this in ThinLTO. Note: this is early work, but is enough to build clang (and all the LLVM tools). I still need to write some lit-tests... Differential Revision: http://reviews.llvm.org/D19103 From: Mehdi Amini <mehdi.amini@apple.com> llvm-svn: 267317
This commit is contained in:
parent
ff7f9dafdb
commit
c68b6482c1
@ -200,6 +200,11 @@ public:
|
||||
*/
|
||||
void crossModuleImport(Module &Module, ModuleSummaryIndex &Index);
|
||||
|
||||
/**
|
||||
* Perform internalization.
|
||||
*/
|
||||
void internalize(Module &Module, ModuleSummaryIndex &Index);
|
||||
|
||||
/**
|
||||
* Perform post-importing ThinLTO optimizations.
|
||||
*/
|
||||
|
@ -17,6 +17,8 @@
|
||||
#ifdef HAVE_LLVM_REVISION
|
||||
#include "LLVMLTORevision.h"
|
||||
#endif
|
||||
|
||||
#include "UpdateCompilerUsed.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
|
||||
@ -32,6 +34,7 @@
|
||||
#include "llvm/IRReader/IRReader.h"
|
||||
#include "llvm/Linker/Linker.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include "llvm/Object/IRObjectFile.h"
|
||||
#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/CachePruning.h"
|
||||
@ -44,6 +47,7 @@
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/IPO/FunctionImport.h"
|
||||
#include "llvm/Transforms/IPO/Internalize.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#include "llvm/Transforms/ObjCARC.h"
|
||||
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
||||
@ -309,6 +313,77 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM) {
|
||||
PM.run(TheModule);
|
||||
}
|
||||
|
||||
// Create a DenseSet of GlobalValue to be used with the Internalizer.
|
||||
static DenseSet<const GlobalValue *> computePreservedSymbolsForModule(
|
||||
Module &TheModule, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
|
||||
const FunctionImporter::ExportSetTy &ExportList) {
|
||||
DenseSet<const GlobalValue *> PreservedGV;
|
||||
if (GUIDPreservedSymbols.empty())
|
||||
// Early exit: internalize is disabled when there is nothing to preserve.
|
||||
return PreservedGV;
|
||||
|
||||
auto AddPreserveGV = [&](const GlobalValue &GV) {
|
||||
auto GUID = GV.getGUID();
|
||||
if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID))
|
||||
PreservedGV.insert(&GV);
|
||||
};
|
||||
|
||||
for (auto &GV : TheModule)
|
||||
AddPreserveGV(GV);
|
||||
for (auto &GV : TheModule.globals())
|
||||
AddPreserveGV(GV);
|
||||
for (auto &GV : TheModule.aliases())
|
||||
AddPreserveGV(GV);
|
||||
|
||||
return PreservedGV;
|
||||
}
|
||||
|
||||
// Run internalization on \p TheModule
|
||||
static void
|
||||
doInternalizeModule(Module &TheModule, const TargetMachine &TM,
|
||||
const DenseSet<const GlobalValue *> &PreservedGV) {
|
||||
if (PreservedGV.empty()) {
|
||||
// Be friendly and don't nuke totally the module when the client didn't
|
||||
// supply anything to preserve.
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse inline ASM and collect the list of symbols that are not defined in
|
||||
// the current module.
|
||||
StringSet<> AsmUndefinedRefs;
|
||||
object::IRObjectFile::CollectAsmUndefinedRefs(
|
||||
Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
|
||||
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
|
||||
if (Flags & object::BasicSymbolRef::SF_Undefined)
|
||||
AsmUndefinedRefs.insert(Name);
|
||||
});
|
||||
|
||||
// Update the llvm.compiler_used globals to force preserving libcalls and
|
||||
// symbols referenced from asm
|
||||
UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs);
|
||||
|
||||
// Declare a callback for the internalize pass that will ask for every
|
||||
// candidate GlobalValue if it can be internalized or not.
|
||||
auto MustPreserveGV =
|
||||
[&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); };
|
||||
|
||||
llvm::internalizeModule(TheModule, MustPreserveGV);
|
||||
}
|
||||
|
||||
// Convert the PreservedSymbols map from "Name" based to "GUID" based.
|
||||
static DenseSet<GlobalValue::GUID>
|
||||
computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
|
||||
const Triple &TheTriple) {
|
||||
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
|
||||
for (auto &Entry : PreservedSymbols) {
|
||||
StringRef Name = Entry.first();
|
||||
if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
|
||||
Name = Name.drop_front();
|
||||
GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
|
||||
}
|
||||
return GUIDPreservedSymbols;
|
||||
}
|
||||
|
||||
std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
|
||||
TargetMachine &TM) {
|
||||
SmallVector<char, 128> OutputBuffer;
|
||||
@ -395,6 +470,9 @@ public:
|
||||
sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
|
||||
}
|
||||
|
||||
// Access the path to this entry in the cache.
|
||||
StringRef getEntryPath() { return EntryPath; }
|
||||
|
||||
// Try loading the buffer for this cache entry.
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
|
||||
if (EntryPath.empty())
|
||||
@ -429,6 +507,8 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
|
||||
Module &TheModule, const ModuleSummaryIndex &Index,
|
||||
StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
|
||||
const FunctionImporter::ImportMapTy &ImportList,
|
||||
const FunctionImporter::ExportSetTy &ExportList,
|
||||
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
|
||||
std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
|
||||
ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen,
|
||||
StringRef SaveTempsDir, unsigned count) {
|
||||
@ -436,6 +516,13 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
|
||||
// Save temps: after IPO.
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
|
||||
|
||||
// Prepare for internalization by computing the set of symbols to preserve.
|
||||
// We need to compute the list of symbols to preserve during internalization
|
||||
// before doing any promotion because after renaming we won't (easily) match
|
||||
// to the original name.
|
||||
auto PreservedGV = computePreservedSymbolsForModule(
|
||||
TheModule, GUIDPreservedSymbols, ExportList);
|
||||
|
||||
// "Benchmark"-like optimization: single-source case
|
||||
bool SingleModule = (ModuleMap.size() == 1);
|
||||
|
||||
@ -449,16 +536,24 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
|
||||
|
||||
// Save temps: after promotion.
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
|
||||
}
|
||||
|
||||
// Internalization
|
||||
doInternalizeModule(TheModule, TM, PreservedGV);
|
||||
|
||||
// Save internalized bitcode
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc");
|
||||
|
||||
if (!SingleModule) {
|
||||
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
|
||||
|
||||
// Save temps: after cross-module import.
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc");
|
||||
}
|
||||
|
||||
optimizeModule(TheModule, TM);
|
||||
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc");
|
||||
|
||||
if (DisableCodeGen) {
|
||||
// Configured to stop before CodeGen, serialize the bitcode and return.
|
||||
@ -516,7 +611,10 @@ void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
|
||||
}
|
||||
|
||||
void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
|
||||
CrossReferencedSymbols.insert(Name);
|
||||
// FIXME: At the moment, we don't take advantage of this extra information,
|
||||
// we're conservatively considering cross-references as preserved.
|
||||
// CrossReferencedSymbols.insert(Name);
|
||||
PreservedSymbols.insert(Name);
|
||||
}
|
||||
|
||||
// TargetMachine factory
|
||||
@ -619,11 +717,44 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
|
||||
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform internalization.
|
||||
*/
|
||||
void ThinLTOCodeGenerator::internalize(Module &TheModule,
|
||||
ModuleSummaryIndex &Index) {
|
||||
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
|
||||
auto ModuleCount = Index.modulePaths().size();
|
||||
auto ModuleIdentifier = TheModule.getModuleIdentifier();
|
||||
|
||||
// Convert the preserved symbols set from string to GUID
|
||||
auto GUIDPreservedSymbols =
|
||||
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
|
||||
|
||||
// Collect for each module the list of function it defines (GUID -> Summary).
|
||||
StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
|
||||
ModuleToDefinedGVSummaries(ModuleCount);
|
||||
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
|
||||
|
||||
// Generate import/export list
|
||||
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
|
||||
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
|
||||
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
|
||||
ExportLists);
|
||||
auto &ExportList = ExportLists[ModuleIdentifier];
|
||||
|
||||
// Internalization
|
||||
auto PreservedGV = computePreservedSymbolsForModule(
|
||||
TheModule, GUIDPreservedSymbols, ExportList);
|
||||
doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform post-importing ThinLTO optimizations.
|
||||
*/
|
||||
void ThinLTOCodeGenerator::optimize(Module &TheModule) {
|
||||
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
|
||||
|
||||
// Optimize now
|
||||
optimizeModule(TheModule, *TMBuilder.create());
|
||||
}
|
||||
|
||||
@ -694,10 +825,9 @@ void ThinLTOCodeGenerator::run() {
|
||||
ExportLists);
|
||||
|
||||
// Convert the preserved symbols set from string to GUID, this is needed for
|
||||
// computing the caching.
|
||||
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
|
||||
for (auto &Entry : PreservedSymbols)
|
||||
GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first()));
|
||||
// computing the caching hash and the internalization.
|
||||
auto GUIDPreservedSymbols =
|
||||
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
|
||||
|
||||
// Parallel optimizer + codegen
|
||||
{
|
||||
@ -714,18 +844,21 @@ void ThinLTOCodeGenerator::run() {
|
||||
// We use a std::map here to be able to have a defined ordering when
|
||||
// producing a hash for the cache entry.
|
||||
std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
|
||||
ResolveODR(*Index, ExportList, DefinedFunctions,
|
||||
ModuleIdentifier, ResolvedODR);
|
||||
ResolveODR(*Index, ExportList, DefinedFunctions, ModuleIdentifier,
|
||||
ResolvedODR);
|
||||
|
||||
// The module may be cached, this helps handling it.
|
||||
ModuleCacheEntry CacheEntry(
|
||||
CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(),
|
||||
ImportLists[ModuleBuffer.getBufferIdentifier()],
|
||||
ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR,
|
||||
DefinedFunctions, GUIDPreservedSymbols);
|
||||
ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
|
||||
ImportLists[ModuleIdentifier], ExportList,
|
||||
ResolvedODR, DefinedFunctions,
|
||||
GUIDPreservedSymbols);
|
||||
|
||||
{
|
||||
auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
|
||||
DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
|
||||
<< CacheEntry.getEntryPath() << "' for buffer " << count
|
||||
<< " " << ModuleIdentifier << "\n");
|
||||
|
||||
if (ErrOrBuffer) {
|
||||
// Cache Hit!
|
||||
ProducedBinaries[count] = std::move(ErrOrBuffer.get());
|
||||
@ -741,14 +874,14 @@ void ThinLTOCodeGenerator::run() {
|
||||
auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
|
||||
|
||||
// Save temps: original file.
|
||||
if (!SaveTempsDir.empty()) {
|
||||
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
|
||||
}
|
||||
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
|
||||
|
||||
auto &ImportList = ImportLists[ModuleIdentifier];
|
||||
// Run the main process now, and generates a binary
|
||||
auto OutputBuffer = ProcessThinLTOModule(
|
||||
*TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
|
||||
ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count);
|
||||
ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions,
|
||||
DisableCodeGen, SaveTempsDir, count);
|
||||
|
||||
CacheEntry.write(*OutputBuffer);
|
||||
ProducedBinaries[count] = std::move(OutputBuffer);
|
||||
|
19
test/ThinLTO/X86/internalize.ll
Normal file
19
test/ThinLTO/X86/internalize.ll
Normal file
@ -0,0 +1,19 @@
|
||||
;; RUN: opt -module-summary %s -o %t1.bc
|
||||
; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc
|
||||
; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR
|
||||
; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE
|
||||
|
||||
; REGULAR: define void @foo
|
||||
; REGULAR: define void @bar
|
||||
; INTERNALIZE: define void @foo
|
||||
; INTERNALIZE: define internal void @bar
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.11.0"
|
||||
|
||||
define void @foo() {
|
||||
ret void
|
||||
}
|
||||
define void @bar() {
|
||||
ret void
|
||||
}
|
@ -68,6 +68,7 @@ enum ThinLTOModes {
|
||||
THINLINK,
|
||||
THINPROMOTE,
|
||||
THINIMPORT,
|
||||
THININTERNALIZE,
|
||||
THINOPT,
|
||||
THINCODEGEN,
|
||||
THINALL
|
||||
@ -84,6 +85,9 @@ cl::opt<ThinLTOModes> ThinLTOMode(
|
||||
clEnumValN(THINIMPORT, "import", "Perform both promotion and "
|
||||
"cross-module importing (requires "
|
||||
"-thinlto-index)."),
|
||||
clEnumValN(THININTERNALIZE, "internalize",
|
||||
"Perform internalization driven by -exported-symbol "
|
||||
"(requires -thinlto-index)."),
|
||||
clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."),
|
||||
clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"),
|
||||
clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"),
|
||||
@ -105,10 +109,10 @@ static cl::opt<std::string> OutputFilename("o", cl::init(""),
|
||||
cl::desc("Override output filename"),
|
||||
cl::value_desc("filename"));
|
||||
|
||||
static cl::list<std::string>
|
||||
ExportedSymbols("exported-symbol",
|
||||
cl::desc("Symbol to export from the resulting object file"),
|
||||
cl::ZeroOrMore);
|
||||
static cl::list<std::string> ExportedSymbols(
|
||||
"exported-symbol",
|
||||
cl::desc("List of symbols to export from the resulting object file"),
|
||||
cl::ZeroOrMore);
|
||||
|
||||
static cl::list<std::string>
|
||||
DSOSymbols("dso-symbol",
|
||||
@ -329,6 +333,10 @@ public:
|
||||
ThinLTOProcessing(const TargetOptions &Options) {
|
||||
ThinGenerator.setCodePICModel(RelocModel);
|
||||
ThinGenerator.setTargetOptions(Options);
|
||||
|
||||
// Add all the exported symbols to the table of symbols to preserve.
|
||||
for (unsigned i = 0; i < ExportedSymbols.size(); ++i)
|
||||
ThinGenerator.preserveSymbol(ExportedSymbols[i]);
|
||||
}
|
||||
|
||||
void run() {
|
||||
@ -339,6 +347,8 @@ public:
|
||||
return promote();
|
||||
case THINIMPORT:
|
||||
return import();
|
||||
case THININTERNALIZE:
|
||||
return internalize();
|
||||
case THINOPT:
|
||||
return optimize();
|
||||
case THINCODEGEN:
|
||||
@ -432,6 +442,37 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void internalize() {
|
||||
if (InputFilenames.size() != 1 && !OutputFilename.empty())
|
||||
report_fatal_error("Can't handle a single output filename and multiple "
|
||||
"input files, do not provide an output filename and "
|
||||
"the output files will be suffixed from the input "
|
||||
"ones.");
|
||||
|
||||
if (ExportedSymbols.empty())
|
||||
errs() << "Warning: -internalize will not perform without "
|
||||
"-exported-symbol\n";
|
||||
|
||||
auto Index = loadCombinedIndex();
|
||||
auto InputBuffers = loadAllFilesForIndex(*Index);
|
||||
for (auto &MemBuffer : InputBuffers)
|
||||
ThinGenerator.addModule(MemBuffer->getBufferIdentifier(),
|
||||
MemBuffer->getBuffer());
|
||||
|
||||
for (auto &Filename : InputFilenames) {
|
||||
LLVMContext Ctx;
|
||||
auto TheModule = loadModule(Filename, Ctx);
|
||||
|
||||
ThinGenerator.internalize(*TheModule, *Index);
|
||||
|
||||
std::string OutputName = OutputFilename;
|
||||
if (OutputName.empty()) {
|
||||
OutputName = Filename + ".thinlto.internalized.bc";
|
||||
}
|
||||
writeModuleToFile(*TheModule, OutputName);
|
||||
}
|
||||
}
|
||||
|
||||
void optimize() {
|
||||
if (InputFilenames.size() != 1 && !OutputFilename.empty())
|
||||
report_fatal_error("Can't handle a single output filename and multiple "
|
||||
|
Loading…
Reference in New Issue
Block a user