1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

Add an internalization step to the ThinLTOCodeGenerator

Keeping as much as possible internal/private is
known to help the optimizer. Let's try to benefit from
this in ThinLTO.
Note: this is early work, but is enough to build clang (and
all the LLVM tools). I still need to write some lit-tests...

Differential Revision: http://reviews.llvm.org/D19103

From: Mehdi Amini <mehdi.amini@apple.com>
llvm-svn: 267317
This commit is contained in:
Mehdi Amini 2016-04-24 03:18:01 +00:00
parent ff7f9dafdb
commit c68b6482c1
4 changed files with 220 additions and 22 deletions

View File

@ -200,6 +200,11 @@ public:
*/
void crossModuleImport(Module &Module, ModuleSummaryIndex &Index);
/**
* Perform internalization.
*/
void internalize(Module &Module, ModuleSummaryIndex &Index);
/**
* Perform post-importing ThinLTO optimizations.
*/

View File

@ -17,6 +17,8 @@
#ifdef HAVE_LLVM_REVISION
#include "LLVMLTORevision.h"
#endif
#include "UpdateCompilerUsed.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
@ -32,6 +34,7 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CachePruning.h"
@ -44,6 +47,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
@ -309,6 +313,77 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM) {
PM.run(TheModule);
}
// Create a DenseSet of GlobalValue to be used with the Internalizer.
static DenseSet<const GlobalValue *> computePreservedSymbolsForModule(
Module &TheModule, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
const FunctionImporter::ExportSetTy &ExportList) {
DenseSet<const GlobalValue *> PreservedGV;
if (GUIDPreservedSymbols.empty())
// Early exit: internalize is disabled when there is nothing to preserve.
return PreservedGV;
auto AddPreserveGV = [&](const GlobalValue &GV) {
auto GUID = GV.getGUID();
if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID))
PreservedGV.insert(&GV);
};
for (auto &GV : TheModule)
AddPreserveGV(GV);
for (auto &GV : TheModule.globals())
AddPreserveGV(GV);
for (auto &GV : TheModule.aliases())
AddPreserveGV(GV);
return PreservedGV;
}
// Run internalization on \p TheModule
static void
doInternalizeModule(Module &TheModule, const TargetMachine &TM,
const DenseSet<const GlobalValue *> &PreservedGV) {
if (PreservedGV.empty()) {
// Be friendly and don't nuke totally the module when the client didn't
// supply anything to preserve.
return;
}
// Parse inline ASM and collect the list of symbols that are not defined in
// the current module.
StringSet<> AsmUndefinedRefs;
object::IRObjectFile::CollectAsmUndefinedRefs(
Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
if (Flags & object::BasicSymbolRef::SF_Undefined)
AsmUndefinedRefs.insert(Name);
});
// Update the llvm.compiler_used globals to force preserving libcalls and
// symbols referenced from asm
UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs);
// Declare a callback for the internalize pass that will ask for every
// candidate GlobalValue if it can be internalized or not.
auto MustPreserveGV =
[&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); };
llvm::internalizeModule(TheModule, MustPreserveGV);
}
// Convert the PreservedSymbols map from "Name" based to "GUID" based.
static DenseSet<GlobalValue::GUID>
computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
const Triple &TheTriple) {
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
for (auto &Entry : PreservedSymbols) {
StringRef Name = Entry.first();
if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
Name = Name.drop_front();
GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
}
return GUIDPreservedSymbols;
}
std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
TargetMachine &TM) {
SmallVector<char, 128> OutputBuffer;
@ -395,6 +470,9 @@ public:
sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
}
// Access the path to this entry in the cache.
StringRef getEntryPath() { return EntryPath; }
// Try loading the buffer for this cache entry.
ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
if (EntryPath.empty())
@ -429,6 +507,8 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
Module &TheModule, const ModuleSummaryIndex &Index,
StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen,
StringRef SaveTempsDir, unsigned count) {
@ -436,6 +516,13 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
// Save temps: after IPO.
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
// Prepare for internalization by computing the set of symbols to preserve.
// We need to compute the list of symbols to preserve during internalization
// before doing any promotion because after renaming we won't (easily) match
// to the original name.
auto PreservedGV = computePreservedSymbolsForModule(
TheModule, GUIDPreservedSymbols, ExportList);
// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);
@ -449,16 +536,24 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
// Save temps: after promotion.
saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
}
// Internalization
doInternalizeModule(TheModule, TM, PreservedGV);
// Save internalized bitcode
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc");
if (!SingleModule) {
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
// Save temps: after cross-module import.
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc");
}
optimizeModule(TheModule, TM);
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");
saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc");
if (DisableCodeGen) {
// Configured to stop before CodeGen, serialize the bitcode and return.
@ -516,7 +611,10 @@ void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
}
void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
CrossReferencedSymbols.insert(Name);
// FIXME: At the moment, we don't take advantage of this extra information,
// we're conservatively considering cross-references as preserved.
// CrossReferencedSymbols.insert(Name);
PreservedSymbols.insert(Name);
}
// TargetMachine factory
@ -619,11 +717,44 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
}
/**
* Perform internalization.
*/
void ThinLTOCodeGenerator::internalize(Module &TheModule,
ModuleSummaryIndex &Index) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
auto ModuleCount = Index.modulePaths().size();
auto ModuleIdentifier = TheModule.getModuleIdentifier();
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols =
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
// Collect for each module the list of function it defines (GUID -> Summary).
StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
ExportLists);
auto &ExportList = ExportLists[ModuleIdentifier];
// Internalization
auto PreservedGV = computePreservedSymbolsForModule(
TheModule, GUIDPreservedSymbols, ExportList);
doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV);
}
/**
* Perform post-importing ThinLTO optimizations.
*/
void ThinLTOCodeGenerator::optimize(Module &TheModule) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
// Optimize now
optimizeModule(TheModule, *TMBuilder.create());
}
@ -694,10 +825,9 @@ void ThinLTOCodeGenerator::run() {
ExportLists);
// Convert the preserved symbols set from string to GUID, this is needed for
// computing the caching.
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
for (auto &Entry : PreservedSymbols)
GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first()));
// computing the caching hash and the internalization.
auto GUIDPreservedSymbols =
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
// Parallel optimizer + codegen
{
@ -714,18 +844,21 @@ void ThinLTOCodeGenerator::run() {
// We use a std::map here to be able to have a defined ordering when
// producing a hash for the cache entry.
std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
ResolveODR(*Index, ExportList, DefinedFunctions,
ModuleIdentifier, ResolvedODR);
ResolveODR(*Index, ExportList, DefinedFunctions, ModuleIdentifier,
ResolvedODR);
// The module may be cached, this helps handling it.
ModuleCacheEntry CacheEntry(
CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(),
ImportLists[ModuleBuffer.getBufferIdentifier()],
ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR,
DefinedFunctions, GUIDPreservedSymbols);
ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
ImportLists[ModuleIdentifier], ExportList,
ResolvedODR, DefinedFunctions,
GUIDPreservedSymbols);
{
auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
<< CacheEntry.getEntryPath() << "' for buffer " << count
<< " " << ModuleIdentifier << "\n");
if (ErrOrBuffer) {
// Cache Hit!
ProducedBinaries[count] = std::move(ErrOrBuffer.get());
@ -741,14 +874,14 @@ void ThinLTOCodeGenerator::run() {
auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
// Save temps: original file.
if (!SaveTempsDir.empty()) {
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
}
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
auto &ImportList = ImportLists[ModuleIdentifier];
// Run the main process now, and generates a binary
auto OutputBuffer = ProcessThinLTOModule(
*TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count);
ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions,
DisableCodeGen, SaveTempsDir, count);
CacheEntry.write(*OutputBuffer);
ProducedBinaries[count] = std::move(OutputBuffer);

View File

@ -0,0 +1,19 @@
;; RUN: opt -module-summary %s -o %t1.bc
; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc
; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR
; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE
; REGULAR: define void @foo
; REGULAR: define void @bar
; INTERNALIZE: define void @foo
; INTERNALIZE: define internal void @bar
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
define void @foo() {
ret void
}
define void @bar() {
ret void
}

View File

@ -68,6 +68,7 @@ enum ThinLTOModes {
THINLINK,
THINPROMOTE,
THINIMPORT,
THININTERNALIZE,
THINOPT,
THINCODEGEN,
THINALL
@ -84,6 +85,9 @@ cl::opt<ThinLTOModes> ThinLTOMode(
clEnumValN(THINIMPORT, "import", "Perform both promotion and "
"cross-module importing (requires "
"-thinlto-index)."),
clEnumValN(THININTERNALIZE, "internalize",
"Perform internalization driven by -exported-symbol "
"(requires -thinlto-index)."),
clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."),
clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"),
clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"),
@ -105,10 +109,10 @@ static cl::opt<std::string> OutputFilename("o", cl::init(""),
cl::desc("Override output filename"),
cl::value_desc("filename"));
static cl::list<std::string>
ExportedSymbols("exported-symbol",
cl::desc("Symbol to export from the resulting object file"),
cl::ZeroOrMore);
static cl::list<std::string> ExportedSymbols(
"exported-symbol",
cl::desc("List of symbols to export from the resulting object file"),
cl::ZeroOrMore);
static cl::list<std::string>
DSOSymbols("dso-symbol",
@ -329,6 +333,10 @@ public:
ThinLTOProcessing(const TargetOptions &Options) {
ThinGenerator.setCodePICModel(RelocModel);
ThinGenerator.setTargetOptions(Options);
// Add all the exported symbols to the table of symbols to preserve.
for (unsigned i = 0; i < ExportedSymbols.size(); ++i)
ThinGenerator.preserveSymbol(ExportedSymbols[i]);
}
void run() {
@ -339,6 +347,8 @@ public:
return promote();
case THINIMPORT:
return import();
case THININTERNALIZE:
return internalize();
case THINOPT:
return optimize();
case THINCODEGEN:
@ -432,6 +442,37 @@ private:
}
}
void internalize() {
if (InputFilenames.size() != 1 && !OutputFilename.empty())
report_fatal_error("Can't handle a single output filename and multiple "
"input files, do not provide an output filename and "
"the output files will be suffixed from the input "
"ones.");
if (ExportedSymbols.empty())
errs() << "Warning: -internalize will not perform without "
"-exported-symbol\n";
auto Index = loadCombinedIndex();
auto InputBuffers = loadAllFilesForIndex(*Index);
for (auto &MemBuffer : InputBuffers)
ThinGenerator.addModule(MemBuffer->getBufferIdentifier(),
MemBuffer->getBuffer());
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
auto TheModule = loadModule(Filename, Ctx);
ThinGenerator.internalize(*TheModule, *Index);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
OutputName = Filename + ".thinlto.internalized.bc";
}
writeModuleToFile(*TheModule, OutputName);
}
}
void optimize() {
if (InputFilenames.size() != 1 && !OutputFilename.empty())
report_fatal_error("Can't handle a single output filename and multiple "