mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
CodeGen: Introduce splitCodeGen and teach LTOCodeGenerator to use it.
llvm::splitCodeGen is a function that implements the core of parallel LTO code generation. It uses llvm::SplitModule to split the module into linkable partitions and spawning one code generation thread per partition. The function produces multiple object files which can be linked in the usual way. This has been threaded through to LTOCodeGenerator (and llvm-lto for testing purposes). Separate patches will add parallel LTO support to the gold plugin and lld. Differential Revision: http://reviews.llvm.org/D12260 llvm-svn: 246236
This commit is contained in:
parent
1f54bd38ce
commit
b346721555
41
include/llvm/CodeGen/ParallelCG.h
Normal file
41
include/llvm/CodeGen/ParallelCG.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
//===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This header declares functions that can be used for parallel code generation.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_CODEGEN_PARALLELCG_H
|
||||||
|
#define LLVM_CODEGEN_PARALLELCG_H
|
||||||
|
|
||||||
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
|
#include "llvm/Support/CodeGen.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class Module;
|
||||||
|
class TargetOptions;
|
||||||
|
class raw_pwrite_stream;
|
||||||
|
|
||||||
|
/// Split M into OSs.size() partitions, and generate code for each. Writes
|
||||||
|
/// OSs.size() object files to the output streams in OSs. The resulting object
|
||||||
|
/// files if linked together are intended to be equivalent to the single object
|
||||||
|
/// file that would have been code generated from M.
|
||||||
|
///
|
||||||
|
/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr<Module>().
|
||||||
|
std::unique_ptr<Module>
|
||||||
|
splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
|
||||||
|
StringRef CPU, StringRef Features, const TargetOptions &Options,
|
||||||
|
Reloc::Model RM = Reloc::Default,
|
||||||
|
CodeModel::Model CM = CodeModel::Default,
|
||||||
|
CodeGenOpt::Level OL = CodeGenOpt::Default);
|
||||||
|
|
||||||
|
} // namespace llvm
|
||||||
|
|
||||||
|
#endif
|
@ -133,6 +133,12 @@ struct LTOCodeGenerator {
|
|||||||
// if the compilation was not successful.
|
// if the compilation was not successful.
|
||||||
std::unique_ptr<MemoryBuffer> compileOptimized(std::string &errMsg);
|
std::unique_ptr<MemoryBuffer> compileOptimized(std::string &errMsg);
|
||||||
|
|
||||||
|
// Compile the merged optimized module into out.size() object files each
|
||||||
|
// representing a linkable partition of the module. If out contains more than
|
||||||
|
// one element, code generation is done in parallel with out.size() threads.
|
||||||
|
// Object files will be written to members of out. Returns true on success.
|
||||||
|
bool compileOptimized(ArrayRef<raw_pwrite_stream *> out, std::string &errMsg);
|
||||||
|
|
||||||
void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
|
void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
|
||||||
|
|
||||||
LLVMContext &getContext() { return Context; }
|
LLVMContext &getContext() { return Context; }
|
||||||
@ -140,7 +146,6 @@ struct LTOCodeGenerator {
|
|||||||
private:
|
private:
|
||||||
void initializeLTOPasses();
|
void initializeLTOPasses();
|
||||||
|
|
||||||
bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg);
|
|
||||||
bool compileOptimizedToFile(const char **name, std::string &errMsg);
|
bool compileOptimizedToFile(const char **name, std::string &errMsg);
|
||||||
void applyScopeRestrictions();
|
void applyScopeRestrictions();
|
||||||
void applyRestriction(GlobalValue &GV, ArrayRef<StringRef> Libcalls,
|
void applyRestriction(GlobalValue &GV, ArrayRef<StringRef> Libcalls,
|
||||||
|
@ -80,6 +80,7 @@ add_llvm_library(LLVMCodeGen
|
|||||||
OptimizePHIs.cpp
|
OptimizePHIs.cpp
|
||||||
PHIElimination.cpp
|
PHIElimination.cpp
|
||||||
PHIEliminationUtils.cpp
|
PHIEliminationUtils.cpp
|
||||||
|
ParallelCG.cpp
|
||||||
Passes.cpp
|
Passes.cpp
|
||||||
PeepholeOptimizer.cpp
|
PeepholeOptimizer.cpp
|
||||||
PostRASchedulerList.cpp
|
PostRASchedulerList.cpp
|
||||||
|
95
lib/CodeGen/ParallelCG.cpp
Normal file
95
lib/CodeGen/ParallelCG.cpp
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
//===-- ParallelCG.cpp ----------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines functions that can be used for parallel code generation.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/ParallelCG.h"
|
||||||
|
#include "llvm/Bitcode/ReaderWriter.h"
|
||||||
|
#include "llvm/IR/LLVMContext.h"
|
||||||
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/Support/ErrorOr.h"
|
||||||
|
#include "llvm/Support/MemoryBuffer.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/thread.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
#include "llvm/Transforms/Utils/SplitModule.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
|
||||||
|
const Target *TheTarget, StringRef CPU, StringRef Features,
|
||||||
|
const TargetOptions &Options, Reloc::Model RM,
|
||||||
|
CodeModel::Model CM, CodeGenOpt::Level OL) {
|
||||||
|
std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
|
||||||
|
M->getTargetTriple(), CPU, Features, Options, RM, CM, OL));
|
||||||
|
|
||||||
|
legacy::PassManager CodeGenPasses;
|
||||||
|
if (TM->addPassesToEmitFile(CodeGenPasses, OS,
|
||||||
|
TargetMachine::CGFT_ObjectFile))
|
||||||
|
report_fatal_error("Failed to setup codegen");
|
||||||
|
CodeGenPasses.run(*M);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Module>
|
||||||
|
llvm::splitCodeGen(std::unique_ptr<Module> M,
|
||||||
|
ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU,
|
||||||
|
StringRef Features, const TargetOptions &Options,
|
||||||
|
Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
|
||||||
|
StringRef TripleStr = M->getTargetTriple();
|
||||||
|
std::string ErrMsg;
|
||||||
|
const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
|
||||||
|
if (!TheTarget)
|
||||||
|
report_fatal_error(Twine("Target not found: ") + ErrMsg);
|
||||||
|
|
||||||
|
if (OSs.size() == 1) {
|
||||||
|
codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM,
|
||||||
|
OL);
|
||||||
|
return M;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::thread> Threads;
|
||||||
|
SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) {
|
||||||
|
// We want to clone the module in a new context to multi-thread the codegen.
|
||||||
|
// We do it by serializing partition modules to bitcode (while still on the
|
||||||
|
// main thread, in order to avoid data races) and spinning up new threads
|
||||||
|
// which deserialize the partitions into separate contexts.
|
||||||
|
// FIXME: Provide a more direct way to do this in LLVM.
|
||||||
|
SmallVector<char, 0> BC;
|
||||||
|
raw_svector_ostream BCOS(BC);
|
||||||
|
WriteBitcodeToFile(MPart.get(), BCOS);
|
||||||
|
|
||||||
|
llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()];
|
||||||
|
Threads.emplace_back(
|
||||||
|
[TheTarget, CPU, Features, Options, RM, CM, OL,
|
||||||
|
ThreadOS](const SmallVector<char, 0> &BC) {
|
||||||
|
LLVMContext Ctx;
|
||||||
|
ErrorOr<std::unique_ptr<Module>> MOrErr =
|
||||||
|
parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()),
|
||||||
|
"<split-module>"),
|
||||||
|
Ctx);
|
||||||
|
if (!MOrErr)
|
||||||
|
report_fatal_error("Failed to read bitcode");
|
||||||
|
std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
|
||||||
|
|
||||||
|
codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features,
|
||||||
|
Options, RM, CM, OL);
|
||||||
|
},
|
||||||
|
// Pass BC using std::move to ensure that it get moved rather than
|
||||||
|
// copied into the thread's context.
|
||||||
|
std::move(BC));
|
||||||
|
});
|
||||||
|
|
||||||
|
for (std::thread &T : Threads)
|
||||||
|
T.join();
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
@ -18,6 +18,7 @@
|
|||||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
#include "llvm/Bitcode/ReaderWriter.h"
|
#include "llvm/Bitcode/ReaderWriter.h"
|
||||||
|
#include "llvm/CodeGen/ParallelCG.h"
|
||||||
#include "llvm/CodeGen/RuntimeLibcalls.h"
|
#include "llvm/CodeGen/RuntimeLibcalls.h"
|
||||||
#include "llvm/Config/config.h"
|
#include "llvm/Config/config.h"
|
||||||
#include "llvm/IR/Constants.h"
|
#include "llvm/IR/Constants.h"
|
||||||
@ -218,7 +219,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **name,
|
|||||||
// generate object file
|
// generate object file
|
||||||
tool_output_file objFile(Filename.c_str(), FD);
|
tool_output_file objFile(Filename.c_str(), FD);
|
||||||
|
|
||||||
bool genResult = compileOptimized(objFile.os(), errMsg);
|
bool genResult = compileOptimized(&objFile.os(), errMsg);
|
||||||
objFile.os().close();
|
objFile.os().close();
|
||||||
if (objFile.os().has_error()) {
|
if (objFile.os().has_error()) {
|
||||||
objFile.os().clear_error();
|
objFile.os().clear_error();
|
||||||
@ -495,25 +496,26 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out,
|
bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> out,
|
||||||
std::string &errMsg) {
|
std::string &errMsg) {
|
||||||
if (!this->determineTarget(errMsg))
|
if (!this->determineTarget(errMsg))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
legacy::PassManager codeGenPasses;
|
legacy::PassManager preCodeGenPasses;
|
||||||
|
|
||||||
// If the bitcode files contain ARC code and were compiled with optimization,
|
// If the bitcode files contain ARC code and were compiled with optimization,
|
||||||
// the ObjCARCContractPass must be run, so do it unconditionally here.
|
// the ObjCARCContractPass must be run, so do it unconditionally here.
|
||||||
codeGenPasses.add(createObjCARCContractPass());
|
preCodeGenPasses.add(createObjCARCContractPass());
|
||||||
|
preCodeGenPasses.run(*MergedModule);
|
||||||
|
|
||||||
if (TargetMach->addPassesToEmitFile(codeGenPasses, out,
|
// Do code generation. We need to preserve the module in case the client calls
|
||||||
TargetMachine::CGFT_ObjectFile)) {
|
// writeMergedModules() after compilation, but we only need to allow this at
|
||||||
errMsg = "target file type not supported";
|
// parallelism level 1. This is achieved by having splitCodeGen return the
|
||||||
return false;
|
// original module at parallelism level 1 which we then assign back to
|
||||||
}
|
// MergedModule.
|
||||||
|
MergedModule =
|
||||||
// Run the code generator, and write object file
|
splitCodeGen(std::move(MergedModule), out, MCpu, FeatureStr, Options,
|
||||||
codeGenPasses.run(*MergedModule);
|
RelocModel, CodeModel::Default, CGOptLevel);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
22
test/LTO/X86/parallel.ll
Normal file
22
test/LTO/X86/parallel.ll
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
; RUN: llvm-as -o %t.bc %s
|
||||||
|
; RUN: llvm-lto -exported-symbol=foo -exported-symbol=bar -j2 -o %t.o %t.bc
|
||||||
|
; RUN: llvm-nm %t.o.0 | FileCheck --check-prefix=CHECK0 %s
|
||||||
|
; RUN: llvm-nm %t.o.1 | FileCheck --check-prefix=CHECK1 %s
|
||||||
|
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
; CHECK0-NOT: bar
|
||||||
|
; CHECK0: T foo
|
||||||
|
; CHECK0-NOT: bar
|
||||||
|
define void @foo() {
|
||||||
|
call void @bar()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK1-NOT: foo
|
||||||
|
; CHECK1: T bar
|
||||||
|
; CHECK1-NOT: foo
|
||||||
|
define void @bar() {
|
||||||
|
call void @foo()
|
||||||
|
ret void
|
||||||
|
}
|
@ -22,7 +22,9 @@
|
|||||||
#include "llvm/Support/PrettyStackTrace.h"
|
#include "llvm/Support/PrettyStackTrace.h"
|
||||||
#include "llvm/Support/Signals.h"
|
#include "llvm/Support/Signals.h"
|
||||||
#include "llvm/Support/TargetSelect.h"
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Support/ToolOutputFile.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include <list>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
@ -77,6 +79,9 @@ static cl::opt<bool> SetMergedModule(
|
|||||||
"set-merged-module", cl::init(false),
|
"set-merged-module", cl::init(false),
|
||||||
cl::desc("Use the first input module as the merged module"));
|
cl::desc("Use the first input module as the merged module"));
|
||||||
|
|
||||||
|
static cl::opt<unsigned> Parallelism("j", cl::Prefix, cl::init(1),
|
||||||
|
cl::desc("Number of backend threads"));
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct ModuleInfo {
|
struct ModuleInfo {
|
||||||
std::vector<bool> CanBeHidden;
|
std::vector<bool> CanBeHidden;
|
||||||
@ -240,24 +245,41 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
if (!OutputFilename.empty()) {
|
if (!OutputFilename.empty()) {
|
||||||
std::string ErrorInfo;
|
std::string ErrorInfo;
|
||||||
std::unique_ptr<MemoryBuffer> Code = CodeGen.compile(
|
if (!CodeGen.optimize(DisableInline, DisableGVNLoadPRE,
|
||||||
DisableInline, DisableGVNLoadPRE, DisableLTOVectorization, ErrorInfo);
|
DisableLTOVectorization, ErrorInfo)) {
|
||||||
if (!Code) {
|
errs() << argv[0] << ": error optimizing the code: " << ErrorInfo << "\n";
|
||||||
errs() << argv[0]
|
|
||||||
<< ": error compiling the code: " << ErrorInfo << "\n";
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::error_code EC;
|
std::list<tool_output_file> OSs;
|
||||||
raw_fd_ostream FileStream(OutputFilename, EC, sys::fs::F_None);
|
std::vector<raw_pwrite_stream *> OSPtrs;
|
||||||
if (EC) {
|
for (unsigned I = 0; I != Parallelism; ++I) {
|
||||||
errs() << argv[0] << ": error opening the file '" << OutputFilename
|
std::string PartFilename = OutputFilename;
|
||||||
<< "': " << EC.message() << "\n";
|
if (Parallelism != 1)
|
||||||
|
PartFilename += "." + utostr(I);
|
||||||
|
std::error_code EC;
|
||||||
|
OSs.emplace_back(PartFilename, EC, sys::fs::F_None);
|
||||||
|
if (EC) {
|
||||||
|
errs() << argv[0] << ": error opening the file '" << PartFilename
|
||||||
|
<< "': " << EC.message() << "\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
OSPtrs.push_back(&OSs.back().os());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!CodeGen.compileOptimized(OSPtrs, ErrorInfo)) {
|
||||||
|
errs() << argv[0] << ": error compiling the code: " << ErrorInfo << "\n";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
FileStream.write(Code->getBufferStart(), Code->getBufferSize());
|
for (tool_output_file &OS : OSs)
|
||||||
|
OS.keep();
|
||||||
} else {
|
} else {
|
||||||
|
if (Parallelism != 1) {
|
||||||
|
errs() << argv[0] << ": -j must be specified together with -o\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
std::string ErrorInfo;
|
std::string ErrorInfo;
|
||||||
const char *OutputName = nullptr;
|
const char *OutputName = nullptr;
|
||||||
if (!CodeGen.compile_to_file(&OutputName, DisableInline,
|
if (!CodeGen.compile_to_file(&OutputName, DisableInline,
|
||||||
|
Loading…
Reference in New Issue
Block a user