2016-12-16 01:26:30 +01:00
|
|
|
//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-06-01 03:02:12 +02:00
|
|
|
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
|
2017-02-14 04:42:38 +01:00
|
|
|
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
2016-12-16 01:26:30 +01:00
|
|
|
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
|
2017-05-10 20:52:16 +02:00
|
|
|
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
2016-12-16 01:26:30 +01:00
|
|
|
#include "llvm/Analysis/TypeMetadataUtils.h"
|
|
|
|
#include "llvm/Bitcode/BitcodeWriter.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
2017-02-08 21:44:00 +01:00
|
|
|
#include "llvm/IR/DebugInfo.h"
|
2016-12-16 01:26:30 +01:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
#include "llvm/IR/PassManager.h"
|
|
|
|
#include "llvm/Pass.h"
|
|
|
|
#include "llvm/Support/ScopedPrinter.h"
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include "llvm/Transforms/IPO.h"
|
2017-02-14 04:42:38 +01:00
|
|
|
#include "llvm/Transforms/IPO/FunctionAttrs.h"
|
2016-12-16 01:26:30 +01:00
|
|
|
#include "llvm/Transforms/Utils/Cloning.h"
|
2017-04-27 22:27:27 +02:00
|
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
2016-12-16 01:26:30 +01:00
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Promote each local-linkage entity defined by ExportM and used by ImportM by
|
|
|
|
// changing visibility and appending the given ModuleId.
|
2017-06-16 02:18:29 +02:00
|
|
|
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
|
|
|
|
SetVector<GlobalValue *> &PromoteExtra) {
|
2017-04-12 03:43:07 +02:00
|
|
|
DenseMap<const Comdat *, Comdat *> RenamedComdats;
|
2017-03-31 01:43:08 +02:00
|
|
|
for (auto &ExportGV : ExportM.global_values()) {
|
2016-12-16 01:26:30 +01:00
|
|
|
if (!ExportGV.hasLocalLinkage())
|
2017-03-31 01:43:08 +02:00
|
|
|
continue;
|
2016-12-16 01:26:30 +01:00
|
|
|
|
2017-04-12 03:43:07 +02:00
|
|
|
auto Name = ExportGV.getName();
|
2017-12-01 00:05:52 +01:00
|
|
|
GlobalValue *ImportGV = nullptr;
|
|
|
|
if (!PromoteExtra.count(&ExportGV)) {
|
|
|
|
ImportGV = ImportM.getNamedValue(Name);
|
|
|
|
if (!ImportGV)
|
|
|
|
continue;
|
|
|
|
ImportGV->removeDeadConstantUsers();
|
|
|
|
if (ImportGV->use_empty()) {
|
|
|
|
ImportGV->eraseFromParent();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2016-12-16 01:26:30 +01:00
|
|
|
|
2017-04-12 03:43:07 +02:00
|
|
|
std::string NewName = (Name + ModuleId).str();
|
|
|
|
|
|
|
|
if (const auto *C = ExportGV.getComdat())
|
|
|
|
if (C->getName() == Name)
|
|
|
|
RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
|
2016-12-16 01:26:30 +01:00
|
|
|
|
|
|
|
ExportGV.setName(NewName);
|
|
|
|
ExportGV.setLinkage(GlobalValue::ExternalLinkage);
|
|
|
|
ExportGV.setVisibility(GlobalValue::HiddenVisibility);
|
|
|
|
|
2017-06-16 02:18:29 +02:00
|
|
|
if (ImportGV) {
|
|
|
|
ImportGV->setName(NewName);
|
|
|
|
ImportGV->setVisibility(GlobalValue::HiddenVisibility);
|
|
|
|
}
|
2017-03-31 01:43:08 +02:00
|
|
|
}
|
2017-04-12 03:43:07 +02:00
|
|
|
|
|
|
|
if (!RenamedComdats.empty())
|
|
|
|
for (auto &GO : ExportM.global_objects())
|
|
|
|
if (auto *C = GO.getComdat()) {
|
|
|
|
auto Replacement = RenamedComdats.find(C);
|
|
|
|
if (Replacement != RenamedComdats.end())
|
|
|
|
GO.setComdat(Replacement->second);
|
|
|
|
}
|
2016-12-16 01:26:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Promote all internal (i.e. distinct) type ids used by the module by replacing
|
|
|
|
// them with external type ids formed using the module id.
|
|
|
|
//
|
|
|
|
// Note that this needs to be done before we clone the module because each clone
|
|
|
|
// will receive its own set of distinct metadata nodes.
|
|
|
|
void promoteTypeIds(Module &M, StringRef ModuleId) {
|
|
|
|
DenseMap<Metadata *, Metadata *> LocalToGlobal;
|
|
|
|
auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
|
|
|
|
Metadata *MD =
|
|
|
|
cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
|
|
|
|
|
|
|
|
if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
|
|
|
|
Metadata *&GlobalMD = LocalToGlobal[MD];
|
|
|
|
if (!GlobalMD) {
|
2017-12-28 17:58:54 +01:00
|
|
|
std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
|
2016-12-16 01:26:30 +01:00
|
|
|
GlobalMD = MDString::get(M.getContext(), NewName);
|
|
|
|
}
|
|
|
|
|
|
|
|
CI->setArgOperand(ArgNo,
|
|
|
|
MetadataAsValue::get(M.getContext(), GlobalMD));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (Function *TypeTestFunc =
|
|
|
|
M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
|
|
|
|
for (const Use &U : TypeTestFunc->uses()) {
|
|
|
|
auto CI = cast<CallInst>(U.getUser());
|
|
|
|
ExternalizeTypeId(CI, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Function *TypeCheckedLoadFunc =
|
|
|
|
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
|
|
|
|
for (const Use &U : TypeCheckedLoadFunc->uses()) {
|
|
|
|
auto CI = cast<CallInst>(U.getUser());
|
|
|
|
ExternalizeTypeId(CI, 2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (GlobalObject &GO : M.global_objects()) {
|
|
|
|
SmallVector<MDNode *, 1> MDs;
|
|
|
|
GO.getMetadata(LLVMContext::MD_type, MDs);
|
|
|
|
|
|
|
|
GO.eraseMetadata(LLVMContext::MD_type);
|
|
|
|
for (auto MD : MDs) {
|
|
|
|
auto I = LocalToGlobal.find(MD->getOperand(1));
|
|
|
|
if (I == LocalToGlobal.end()) {
|
|
|
|
GO.addMetadata(LLVMContext::MD_type, *MD);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
GO.addMetadata(
|
|
|
|
LLVMContext::MD_type,
|
|
|
|
*MDNode::get(M.getContext(),
|
|
|
|
ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Drop unused globals, and drop type information from function declarations.
|
|
|
|
// FIXME: If we made functions typeless then there would be no need to do this.
|
|
|
|
void simplifyExternals(Module &M) {
|
|
|
|
FunctionType *EmptyFT =
|
|
|
|
FunctionType::get(Type::getVoidTy(M.getContext()), false);
|
|
|
|
|
|
|
|
for (auto I = M.begin(), E = M.end(); I != E;) {
|
|
|
|
Function &F = *I++;
|
|
|
|
if (F.isDeclaration() && F.use_empty()) {
|
|
|
|
F.eraseFromParent();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-07-19 19:54:29 +02:00
|
|
|
if (!F.isDeclaration() || F.getFunctionType() == EmptyFT ||
|
|
|
|
// Changing the type of an intrinsic may invalidate the IR.
|
|
|
|
F.getName().startswith("llvm."))
|
2016-12-16 01:26:30 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
Function *NewF =
|
|
|
|
Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
|
|
|
|
NewF->setVisibility(F.getVisibility());
|
|
|
|
NewF->takeName(&F);
|
|
|
|
F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
|
|
|
|
F.eraseFromParent();
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
|
|
|
|
GlobalVariable &GV = *I++;
|
|
|
|
if (GV.isDeclaration() && GV.use_empty()) {
|
|
|
|
GV.eraseFromParent();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void filterModule(
|
2017-01-13 15:39:03 +01:00
|
|
|
Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
|
2016-12-16 01:26:30 +01:00
|
|
|
for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
|
|
|
|
I != E;) {
|
|
|
|
GlobalAlias *GA = &*I++;
|
|
|
|
if (ShouldKeepDefinition(GA))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
GlobalObject *GO;
|
2017-04-05 02:42:07 +02:00
|
|
|
if (GA->getValueType()->isFunctionTy())
|
2016-12-16 01:26:30 +01:00
|
|
|
GO = Function::Create(cast<FunctionType>(GA->getValueType()),
|
|
|
|
GlobalValue::ExternalLinkage, "", M);
|
|
|
|
else
|
|
|
|
GO = new GlobalVariable(
|
|
|
|
*M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
|
2017-05-11 10:53:00 +02:00
|
|
|
nullptr, "", nullptr,
|
2016-12-16 01:26:30 +01:00
|
|
|
GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
|
|
|
|
GO->takeName(GA);
|
|
|
|
GA->replaceAllUsesWith(GO);
|
|
|
|
GA->eraseFromParent();
|
|
|
|
}
|
2017-04-05 02:42:07 +02:00
|
|
|
|
|
|
|
for (Function &F : *M) {
|
|
|
|
if (ShouldKeepDefinition(&F))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
F.deleteBody();
|
|
|
|
F.setComdat(nullptr);
|
|
|
|
F.clearMetadata();
|
|
|
|
}
|
|
|
|
|
|
|
|
for (GlobalVariable &GV : M->globals()) {
|
|
|
|
if (ShouldKeepDefinition(&GV))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
GV.setInitializer(nullptr);
|
|
|
|
GV.setLinkage(GlobalValue::ExternalLinkage);
|
|
|
|
GV.setComdat(nullptr);
|
|
|
|
GV.clearMetadata();
|
|
|
|
}
|
2016-12-16 01:26:30 +01:00
|
|
|
}
|
|
|
|
|
2017-02-14 04:42:38 +01:00
|
|
|
void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
|
|
|
|
if (auto *F = dyn_cast<Function>(C))
|
|
|
|
return Fn(F);
|
2017-03-03 00:10:17 +01:00
|
|
|
if (isa<GlobalValue>(C))
|
|
|
|
return;
|
2017-02-14 04:42:38 +01:00
|
|
|
for (Value *Op : C->operands())
|
|
|
|
forEachVirtualFunction(cast<Constant>(Op), Fn);
|
|
|
|
}
|
|
|
|
|
2016-12-16 01:26:30 +01:00
|
|
|
// If it's possible to split M into regular and thin LTO parts, do so and write
|
|
|
|
// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
|
|
|
|
// regular LTO bitcode file to OS.
|
2017-02-14 04:42:38 +01:00
|
|
|
void splitAndWriteThinLTOBitcode(
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
raw_ostream &OS, raw_ostream *ThinLinkOS,
|
|
|
|
function_ref<AAResults &(Function &)> AARGetter, Module &M) {
|
2017-04-27 22:27:27 +02:00
|
|
|
std::string ModuleId = getUniqueModuleId(&M);
|
2016-12-16 01:26:30 +01:00
|
|
|
if (ModuleId.empty()) {
|
|
|
|
// We couldn't generate a module ID for this module, just write it out as a
|
|
|
|
// regular LTO module.
|
|
|
|
WriteBitcodeToFile(&M, OS);
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
if (ThinLinkOS)
|
|
|
|
// We don't have a ThinLTO part, but still write the module to the
|
|
|
|
// ThinLinkOS if requested so that the expected output file is produced.
|
|
|
|
WriteBitcodeToFile(&M, *ThinLinkOS);
|
2016-12-16 01:26:30 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
promoteTypeIds(M, ModuleId);
|
|
|
|
|
2017-02-14 04:42:38 +01:00
|
|
|
// Returns whether a global has attached type metadata. Such globals may
|
|
|
|
// participate in CFI or whole-program devirtualization, so they need to
|
|
|
|
// appear in the merged module instead of the thin LTO module.
|
|
|
|
auto HasTypeMetadata = [&](const GlobalObject *GO) {
|
2016-12-16 01:26:30 +01:00
|
|
|
SmallVector<MDNode *, 1> MDs;
|
2017-02-14 04:42:38 +01:00
|
|
|
GO->getMetadata(LLVMContext::MD_type, MDs);
|
2016-12-16 01:26:30 +01:00
|
|
|
return !MDs.empty();
|
|
|
|
};
|
|
|
|
|
2017-02-14 04:42:38 +01:00
|
|
|
// Collect the set of virtual functions that are eligible for virtual constant
|
|
|
|
// propagation. Each eligible function must not access memory, must return
|
|
|
|
// an integer of width <=64 bits, must take at least one argument, must not
|
|
|
|
// use its first argument (assumed to be "this") and all arguments other than
|
|
|
|
// the first one must be of <=64 bit integer type.
|
|
|
|
//
|
|
|
|
// Note that we test whether this copy of the function is readnone, rather
|
|
|
|
// than testing function attributes, which must hold for any copy of the
|
|
|
|
// function, even a less optimized version substituted at link time. This is
|
|
|
|
// sound because the virtual constant propagation optimizations effectively
|
|
|
|
// inline all implementations of the virtual function into each call site,
|
|
|
|
// rather than using function attributes to perform local optimization.
|
|
|
|
std::set<const Function *> EligibleVirtualFns;
|
2017-04-12 03:43:07 +02:00
|
|
|
// If any member of a comdat lives in MergedM, put all members of that
|
|
|
|
// comdat in MergedM to keep the comdat together.
|
|
|
|
DenseSet<const Comdat *> MergedMComdats;
|
2017-02-14 04:42:38 +01:00
|
|
|
for (GlobalVariable &GV : M.globals())
|
2017-04-12 03:43:07 +02:00
|
|
|
if (HasTypeMetadata(&GV)) {
|
|
|
|
if (const auto *C = GV.getComdat())
|
|
|
|
MergedMComdats.insert(C);
|
2017-02-14 04:42:38 +01:00
|
|
|
forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
|
|
|
|
auto *RT = dyn_cast<IntegerType>(F->getReturnType());
|
|
|
|
if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
|
|
|
|
!F->arg_begin()->use_empty())
|
|
|
|
return;
|
|
|
|
for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
|
|
|
|
auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
|
|
|
|
if (!ArgT || ArgT->getBitWidth() > 64)
|
|
|
|
return;
|
|
|
|
}
|
2017-07-11 07:39:20 +02:00
|
|
|
if (!F->isDeclaration() &&
|
|
|
|
computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
|
2017-02-14 04:42:38 +01:00
|
|
|
EligibleVirtualFns.insert(F);
|
|
|
|
});
|
2017-04-12 03:43:07 +02:00
|
|
|
}
|
2017-02-14 04:42:38 +01:00
|
|
|
|
2016-12-16 01:26:30 +01:00
|
|
|
ValueToValueMapTy VMap;
|
2017-02-14 04:42:38 +01:00
|
|
|
std::unique_ptr<Module> MergedM(
|
|
|
|
CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
|
2017-04-12 03:43:07 +02:00
|
|
|
if (const auto *C = GV->getComdat())
|
|
|
|
if (MergedMComdats.count(C))
|
|
|
|
return true;
|
2017-02-14 04:42:38 +01:00
|
|
|
if (auto *F = dyn_cast<Function>(GV))
|
|
|
|
return EligibleVirtualFns.count(F);
|
|
|
|
if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
|
|
|
|
return HasTypeMetadata(GVar);
|
|
|
|
return false;
|
|
|
|
}));
|
2017-02-08 21:44:00 +01:00
|
|
|
StripDebugInfo(*MergedM);
|
2016-12-16 01:26:30 +01:00
|
|
|
|
2017-02-14 04:42:38 +01:00
|
|
|
for (Function &F : *MergedM)
|
|
|
|
if (!F.isDeclaration()) {
|
|
|
|
// Reset the linkage of all functions eligible for virtual constant
|
|
|
|
// propagation. The canonical definitions live in the thin LTO module so
|
|
|
|
// that they can be imported.
|
|
|
|
F.setLinkage(GlobalValue::AvailableExternallyLinkage);
|
|
|
|
F.setComdat(nullptr);
|
|
|
|
}
|
|
|
|
|
2017-06-16 02:18:29 +02:00
|
|
|
SetVector<GlobalValue *> CfiFunctions;
|
|
|
|
for (auto &F : M)
|
|
|
|
if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
|
|
|
|
CfiFunctions.insert(&F);
|
|
|
|
|
2017-04-12 03:43:07 +02:00
|
|
|
// Remove all globals with type metadata, globals with comdats that live in
|
|
|
|
// MergedM, and aliases pointing to such globals from the thin LTO module.
|
2017-02-14 04:42:38 +01:00
|
|
|
filterModule(&M, [&](const GlobalValue *GV) {
|
|
|
|
if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
|
2017-04-12 03:43:07 +02:00
|
|
|
if (HasTypeMetadata(GVar))
|
|
|
|
return false;
|
|
|
|
if (const auto *C = GV->getComdat())
|
|
|
|
if (MergedMComdats.count(C))
|
|
|
|
return false;
|
2017-02-14 04:42:38 +01:00
|
|
|
return true;
|
|
|
|
});
|
2016-12-16 01:26:30 +01:00
|
|
|
|
2017-06-16 02:18:29 +02:00
|
|
|
promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
|
|
|
|
promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
|
|
|
|
|
|
|
|
SmallVector<MDNode *, 8> CfiFunctionMDs;
|
|
|
|
for (auto V : CfiFunctions) {
|
|
|
|
Function &F = *cast<Function>(V);
|
|
|
|
SmallVector<MDNode *, 2> Types;
|
|
|
|
F.getMetadata(LLVMContext::MD_type, Types);
|
|
|
|
|
|
|
|
auto &Ctx = MergedM->getContext();
|
|
|
|
SmallVector<Metadata *, 4> Elts;
|
|
|
|
Elts.push_back(MDString::get(Ctx, F.getName()));
|
|
|
|
CfiFunctionLinkage Linkage;
|
|
|
|
if (!F.isDeclarationForLinker())
|
|
|
|
Linkage = CFL_Definition;
|
|
|
|
else if (F.isWeakForLinker())
|
|
|
|
Linkage = CFL_WeakDeclaration;
|
|
|
|
else
|
|
|
|
Linkage = CFL_Declaration;
|
|
|
|
Elts.push_back(ConstantAsMetadata::get(
|
|
|
|
llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
|
|
|
|
for (auto Type : Types)
|
|
|
|
Elts.push_back(Type);
|
|
|
|
CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
|
|
|
|
}
|
2016-12-16 01:26:30 +01:00
|
|
|
|
2017-06-16 02:18:29 +02:00
|
|
|
if(!CfiFunctionMDs.empty()) {
|
|
|
|
NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
|
|
|
|
for (auto MD : CfiFunctionMDs)
|
|
|
|
NMD->addOperand(MD);
|
|
|
|
}
|
2016-12-16 01:26:30 +01:00
|
|
|
|
2017-06-16 02:18:29 +02:00
|
|
|
simplifyExternals(*MergedM);
|
2016-12-16 01:26:30 +01:00
|
|
|
|
|
|
|
// FIXME: Try to re-use BSI and PFI from the original module here.
|
2017-05-10 20:52:16 +02:00
|
|
|
ProfileSummaryInfo PSI(M);
|
|
|
|
ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
|
2016-12-16 01:26:30 +01:00
|
|
|
|
2017-06-09 01:01:49 +02:00
|
|
|
// Mark the merged module as requiring full LTO. We still want an index for
|
|
|
|
// it though, so that it can participate in summary-based dead stripping.
|
|
|
|
MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
|
|
|
|
ModuleSummaryIndex MergedMIndex =
|
|
|
|
buildModuleSummaryIndex(*MergedM, nullptr, &PSI);
|
|
|
|
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
SmallVector<char, 0> Buffer;
|
2016-12-16 01:26:30 +01:00
|
|
|
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
BitcodeWriter W(Buffer);
|
|
|
|
// Save the module hash produced for the full bitcode, which will
|
|
|
|
// be used in the backends, and use that in the minimized bitcode
|
|
|
|
// produced for the full link.
|
|
|
|
ModuleHash ModHash = {{0}};
|
|
|
|
W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
|
|
|
|
/*GenerateHash=*/true, &ModHash);
|
2017-06-09 01:01:49 +02:00
|
|
|
W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
|
|
|
|
&MergedMIndex);
|
2017-06-28 01:50:11 +02:00
|
|
|
W.writeSymtab();
|
2017-04-17 19:51:36 +02:00
|
|
|
W.writeStrtab();
|
2016-12-16 01:26:30 +01:00
|
|
|
OS << Buffer;
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
|
ThinLTO Minimized Bitcode File Size Reduction
Summary: Currently the ThinLTO minimized bitcode file only strip the debug info, but there is still a lot of information in the minimized bit code file that will be not used for thin linker. In this patch, most of the extra information is striped to reduce the minimized bitcode file. Now only ModuleVersion, ModuleInfo, ModuleGlobalValueSummary, ModuleHash, Symtab and Strtab are left. Now the minimized bitcode file size is reduced to 15%-30% of the debug info stripped bitcode file size.
Reviewers: danielcdh, tejohnson, pcc
Reviewed By: pcc
Subscribers: mehdi_amini, aprantl, inglorion, eraman, llvm-commits
Differential Revision: https://reviews.llvm.org/D35334
llvm-svn: 308760
2017-07-21 19:25:20 +02:00
|
|
|
// If a minimized bitcode module was requested for the thin link, only
|
|
|
|
// the information that is needed by thin link will be written in the
|
|
|
|
// given OS (the merged module will be written as usual).
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
if (ThinLinkOS) {
|
|
|
|
Buffer.clear();
|
|
|
|
BitcodeWriter W2(Buffer);
|
|
|
|
StripDebugInfo(M);
|
ThinLTO Minimized Bitcode File Size Reduction
Summary: Currently the ThinLTO minimized bitcode file only strip the debug info, but there is still a lot of information in the minimized bit code file that will be not used for thin linker. In this patch, most of the extra information is striped to reduce the minimized bitcode file. Now only ModuleVersion, ModuleInfo, ModuleGlobalValueSummary, ModuleHash, Symtab and Strtab are left. Now the minimized bitcode file size is reduced to 15%-30% of the debug info stripped bitcode file size.
Reviewers: danielcdh, tejohnson, pcc
Reviewed By: pcc
Subscribers: mehdi_amini, aprantl, inglorion, eraman, llvm-commits
Differential Revision: https://reviews.llvm.org/D35334
llvm-svn: 308760
2017-07-21 19:25:20 +02:00
|
|
|
W2.writeThinLinkBitcode(&M, Index, ModHash);
|
2017-06-09 01:01:49 +02:00
|
|
|
W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
|
|
|
|
&MergedMIndex);
|
2017-06-28 01:50:11 +02:00
|
|
|
W2.writeSymtab();
|
2017-04-17 19:51:36 +02:00
|
|
|
W2.writeStrtab();
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
*ThinLinkOS << Buffer;
|
|
|
|
}
|
2016-12-16 01:26:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Returns whether this module needs to be split because it uses type metadata.
|
|
|
|
bool requiresSplit(Module &M) {
|
|
|
|
SmallVector<MDNode *, 1> MDs;
|
|
|
|
for (auto &GO : M.global_objects()) {
|
|
|
|
GO.getMetadata(LLVMContext::MD_type, MDs);
|
|
|
|
if (!MDs.empty())
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
|
2017-02-14 04:42:38 +01:00
|
|
|
function_ref<AAResults &(Function &)> AARGetter,
|
|
|
|
Module &M, const ModuleSummaryIndex *Index) {
|
2016-12-16 01:26:30 +01:00
|
|
|
// See if this module has any type metadata. If so, we need to split it.
|
|
|
|
if (requiresSplit(M))
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
|
2016-12-16 01:26:30 +01:00
|
|
|
|
|
|
|
// Otherwise we can just write it out as a regular module.
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
|
|
|
|
// Save the module hash produced for the full bitcode, which will
|
|
|
|
// be used in the backends, and use that in the minimized bitcode
|
|
|
|
// produced for the full link.
|
|
|
|
ModuleHash ModHash = {{0}};
|
2016-12-16 01:26:30 +01:00
|
|
|
WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
/*GenerateHash=*/true, &ModHash);
|
ThinLTO Minimized Bitcode File Size Reduction
Summary: Currently the ThinLTO minimized bitcode file only strip the debug info, but there is still a lot of information in the minimized bit code file that will be not used for thin linker. In this patch, most of the extra information is striped to reduce the minimized bitcode file. Now only ModuleVersion, ModuleInfo, ModuleGlobalValueSummary, ModuleHash, Symtab and Strtab are left. Now the minimized bitcode file size is reduced to 15%-30% of the debug info stripped bitcode file size.
Reviewers: danielcdh, tejohnson, pcc
Reviewed By: pcc
Subscribers: mehdi_amini, aprantl, inglorion, eraman, llvm-commits
Differential Revision: https://reviews.llvm.org/D35334
llvm-svn: 308760
2017-07-21 19:25:20 +02:00
|
|
|
// If a minimized bitcode module was requested for the thin link, only
|
|
|
|
// the information that is needed by thin link will be written in the
|
|
|
|
// given OS.
|
|
|
|
if (ThinLinkOS && Index)
|
|
|
|
WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash);
|
2016-12-16 01:26:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
class WriteThinLTOBitcode : public ModulePass {
|
|
|
|
raw_ostream &OS; // raw_ostream to print on
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
// The output stream on which to emit a minimized module for use
|
|
|
|
// just in the thin link, if requested.
|
|
|
|
raw_ostream *ThinLinkOS;
|
2016-12-16 01:26:30 +01:00
|
|
|
|
|
|
|
public:
|
|
|
|
static char ID; // Pass identification, replacement for typeid
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
|
2016-12-16 01:26:30 +01:00
|
|
|
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
|
|
|
|
: ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
|
2016-12-16 01:26:30 +01:00
|
|
|
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
|
|
|
|
|
|
|
|
bool runOnModule(Module &M) override {
|
|
|
|
const ModuleSummaryIndex *Index =
|
|
|
|
&(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
|
2016-12-16 01:26:30 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.setPreservesAll();
|
2017-02-14 04:42:38 +01:00
|
|
|
AU.addRequired<AssumptionCacheTracker>();
|
2016-12-16 01:26:30 +01:00
|
|
|
AU.addRequired<ModuleSummaryIndexWrapperPass>();
|
2017-02-14 04:42:38 +01:00
|
|
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
2016-12-16 01:26:30 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
char WriteThinLTOBitcode::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
|
|
|
|
"Write ThinLTO Bitcode", false, true)
|
2017-02-14 04:42:38 +01:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
2016-12-16 01:26:30 +01:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
|
2017-02-14 04:42:38 +01:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
2016-12-16 01:26:30 +01:00
|
|
|
INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
|
|
|
|
"Write ThinLTO Bitcode", false, true)
|
|
|
|
|
[ThinLTO] Add support for emitting minimized bitcode for thin link
Summary:
The cumulative size of the bitcode files for a very large application
can be huge, particularly with -g. In a distributed build environment,
all of these files must be sent to the remote build node that performs
the thin link step, and this can exceed size limits.
The thin link actually only needs the summary along with a bitcode
symbol table. Until we have a proper bitcode symbol table, simply
stripping the debug metadata results in significant size reduction.
Add support for an option to additionally emit minimized bitcode
modules, just for use in the thin link step, which for now just strips
all debug metadata. I plan to add a cc1 option so this can be invoked
easily during the compile step.
However, care must be taken to ensure that these minimized thin link
bitcode files produce the same index as with the original bitcode files,
as these original bitcode files will be used in the backends.
Specifically:
1) The module hash used for caching is typically produced by hashing the
written bitcode, and we want to include the hash that would correspond
to the original bitcode file. This is because we want to ensure that
changes in the stripped portions affect caching. Added plumbing to emit
the same module hash in the minimized thin link bitcode file.
2) The module paths in the index are constructed from the module ID of
each thin linked bitcode, and typically is automatically generated from
the input file path. This is the path used for finding the modules to
import from, and obviously we need this to point to the original bitcode
files. Added gold-plugin support to take a suffix replacement during the
thin link that is used to override the identifier on the MemoryBufferRef
constructed from the loaded thin link bitcode file. The assumption is
that the build system can specify that the minimized bitcode file has a
name that is similar but uses a different suffix (e.g. out.thinlink.bc
instead of out.o).
Added various tests to ensure that we get identical index files out of
the thin link step.
Reviewers: mehdi_amini, pcc
Subscribers: Prazek, llvm-commits
Differential Revision: https://reviews.llvm.org/D31027
llvm-svn: 298638
2017-03-23 20:47:39 +01:00
|
|
|
ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
|
|
|
|
raw_ostream *ThinLinkOS) {
|
|
|
|
return new WriteThinLTOBitcode(Str, ThinLinkOS);
|
2016-12-16 01:26:30 +01:00
|
|
|
}
|
2017-06-01 03:02:12 +02:00
|
|
|
|
|
|
|
PreservedAnalyses
|
|
|
|
llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
|
|
|
|
FunctionAnalysisManager &FAM =
|
|
|
|
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
|
|
|
writeThinLTOBitcode(OS, ThinLinkOS,
|
|
|
|
[&FAM](Function &F) -> AAResults & {
|
|
|
|
return FAM.getResult<AAManager>(F);
|
|
|
|
},
|
|
|
|
M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
|
|
|
|
return PreservedAnalyses::all();
|
|
|
|
}
|