mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
Add MIR-level outlining pass
This is a patch for the outliner described in the RFC at: http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html The outliner is a code-size reduction pass which works by finding repeated sequences of instructions in a program, and replacing them with calls to functions. This is useful to people working in low-memory environments, where sacrificing performance for space is acceptable. This adds an interprocedural outliner directly before printing assembly. For reference on how this would work, this patch also includes X86 target hooks and an X86 test. The outliner is run like so: clang -mno-red-zone -mllvm -enable-machine-outliner file.c Patch by Jessica Paquette<jpaquette@apple.com>! rdar://29166825 Differential Revision: https://reviews.llvm.org/D26872 llvm-svn: 296418
This commit is contained in:
parent
330a189992
commit
00b30110fb
@ -402,6 +402,10 @@ namespace llvm {
|
||||
|
||||
/// This pass frees the memory occupied by the MachineFunction.
|
||||
FunctionPass *createFreeMachineFunctionPass();
|
||||
|
||||
/// This pass performs outlining on machine instructions directly before
|
||||
/// printing assembly.
|
||||
ModulePass *createMachineOutlinerPass();
|
||||
} // End llvm namespace
|
||||
|
||||
/// Target machine pass initializer for passes with dependencies. Use with
|
||||
|
@ -236,6 +236,7 @@ void initializeMachineLICMPass(PassRegistry&);
|
||||
void initializeMachineLoopInfoPass(PassRegistry&);
|
||||
void initializeMachineModuleInfoPass(PassRegistry&);
|
||||
void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry&);
|
||||
void initializeMachineOutlinerPass(PassRegistry&);
|
||||
void initializeMachinePipelinerPass(PassRegistry&);
|
||||
void initializeMachinePostDominatorTreePass(PassRegistry&);
|
||||
void initializeMachineRegionInfoPassPass(PassRegistry&);
|
||||
|
@ -1508,6 +1508,63 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Return how many instructions would be saved by outlining a
|
||||
/// sequence containing \p SequenceSize instructions that appears
|
||||
/// \p Occurrences times in a module.
|
||||
virtual unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences)
|
||||
const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::getOutliningBenefit!");
|
||||
}
|
||||
|
||||
/// Represents how an instruction should be mapped by the outliner.
|
||||
/// \p Legal instructions are those which are safe to outline.
|
||||
/// \p Illegal instructions are those which cannot be outlined.
|
||||
/// \p Invisible instructions are instructions which can be outlined, but
|
||||
/// shouldn't actually impact the outlining result.
|
||||
enum MachineOutlinerInstrType {Legal, Illegal, Invisible};
|
||||
|
||||
/// Return true if the instruction is legal to outline.
|
||||
virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::getOutliningType!");
|
||||
}
|
||||
|
||||
/// Insert a custom epilogue for outlined functions.
|
||||
/// This may be empty, in which case no epilogue or return statement will be
|
||||
/// emitted.
|
||||
virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB,
|
||||
MachineFunction &MF) const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!");
|
||||
}
|
||||
|
||||
/// Insert a call to an outlined function into the program.
|
||||
/// Returns an iterator to the spot where we inserted the call. This must be
|
||||
/// implemented by the target.
|
||||
virtual MachineBasicBlock::iterator
|
||||
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &It, MachineFunction &MF)
|
||||
const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::insertOutlinedCall!");
|
||||
}
|
||||
|
||||
/// Insert a custom prologue for outlined functions.
|
||||
/// This may be empty, in which case no prologue will be emitted.
|
||||
virtual void insertOutlinerPrologue(MachineBasicBlock &MBB,
|
||||
MachineFunction &MF) const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::insertOutlinerPrologue!");
|
||||
}
|
||||
|
||||
/// Return true if the function can safely be outlined from.
|
||||
/// By default, this means that the function has no red zone.
|
||||
virtual bool isFunctionSafeToOutlineFrom(MachineFunction &F) const {
|
||||
llvm_unreachable("Target didn't implement "
|
||||
"TargetInstrInfo::isFunctionSafeToOutlineFrom!");
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
|
||||
unsigned CatchRetOpcode;
|
||||
|
@ -74,6 +74,7 @@ add_llvm_library(LLVMCodeGen
|
||||
MachineModuleInfo.cpp
|
||||
MachineModuleInfoImpls.cpp
|
||||
MachineOptimizationRemarkEmitter.cpp
|
||||
MachineOutliner.cpp
|
||||
MachinePassRegistry.cpp
|
||||
MachinePipeliner.cpp
|
||||
MachinePostDominators.cpp
|
||||
|
@ -57,6 +57,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
|
||||
initializeMachineLoopInfoPass(Registry);
|
||||
initializeMachineModuleInfoPass(Registry);
|
||||
initializeMachineOptimizationRemarkEmitterPassPass(Registry);
|
||||
initializeMachineOutlinerPass(Registry);
|
||||
initializeMachinePipelinerPass(Registry);
|
||||
initializeMachinePostDominatorTreePass(Registry);
|
||||
initializeMachineRegionInfoPassPass(Registry);
|
||||
|
1399
lib/CodeGen/MachineOutliner.cpp
Normal file
1399
lib/CodeGen/MachineOutliner.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -92,6 +92,9 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
|
||||
cl::desc("Verify generated machine code"),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore);
|
||||
static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
|
||||
cl::Hidden,
|
||||
cl::desc("Enable machine outliner"));
|
||||
|
||||
static cl::opt<std::string>
|
||||
PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
|
||||
@ -674,6 +677,9 @@ void TargetPassConfig::addMachinePasses() {
|
||||
addPass(&XRayInstrumentationID, false);
|
||||
addPass(&PatchableFunctionID, false);
|
||||
|
||||
if (EnableMachineOutliner)
|
||||
PM->add(createMachineOutlinerPass());
|
||||
|
||||
AddingMachinePasses = false;
|
||||
}
|
||||
|
||||
|
@ -10383,3 +10383,83 @@ namespace {
|
||||
char LDTLSCleanup::ID = 0;
|
||||
FunctionPass*
|
||||
llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
|
||||
|
||||
unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize,
|
||||
size_t Occurrences) const {
|
||||
unsigned NotOutlinedSize = SequenceSize * Occurrences;
|
||||
|
||||
// Sequence appears once in outlined function (Sequence.size())
|
||||
// One return instruction (+1)
|
||||
// One call per occurrence (Occurrences)
|
||||
unsigned OutlinedSize = (SequenceSize + 1) + Occurrences;
|
||||
|
||||
// Return the number of instructions saved by outlining this sequence.
|
||||
return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
|
||||
return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
|
||||
}
|
||||
|
||||
X86GenInstrInfo::MachineOutlinerInstrType
|
||||
X86InstrInfo::getOutliningType(MachineInstr &MI) const {
|
||||
|
||||
// Don't outline returns or basic block terminators.
|
||||
if (MI.isReturn() || MI.isTerminator())
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
// Don't outline anything that modifies or reads from the stack pointer.
|
||||
//
|
||||
// FIXME: There are instructions which are being manually built without
|
||||
// explicit uses/defs so we also have to check the MCInstrDesc. We should be
|
||||
// able to remove the extra checks once those are fixed up. For example,
|
||||
// sometimes we might get something like %RAX<def> = POP64r 1. This won't be
|
||||
// caught by modifiesRegister or readsRegister even though the instruction
|
||||
// really ought to be formed so that modifiesRegister/readsRegister would
|
||||
// catch it.
|
||||
if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
|
||||
MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
|
||||
MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
if (MI.readsRegister(X86::RIP, &RI) ||
|
||||
MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
|
||||
MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
if (MI.isPosition())
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
for (const MachineOperand &MOP : MI.operands())
|
||||
if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
|
||||
MOP.isTargetIndex())
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
// Don't allow debug values to impact outlining type.
|
||||
if (MI.isDebugValue() || MI.isIndirectDebugValue())
|
||||
return MachineOutlinerInstrType::Invisible;
|
||||
|
||||
return MachineOutlinerInstrType::Legal;
|
||||
}
|
||||
|
||||
void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
|
||||
MachineFunction &MF) const {
|
||||
|
||||
MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ));
|
||||
MBB.insert(MBB.end(), retq);
|
||||
}
|
||||
|
||||
void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
|
||||
MachineFunction &MF) const {
|
||||
return;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &It,
|
||||
MachineFunction &MF) const {
|
||||
It = MBB.insert(It,
|
||||
BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
|
||||
.addGlobalAddress(M.getNamedValue(MF.getName())));
|
||||
return It;
|
||||
}
|
||||
|
@ -545,6 +545,27 @@ public:
|
||||
|
||||
bool isTailCall(const MachineInstr &Inst) const override;
|
||||
|
||||
unsigned getOutliningBenefit(size_t SequenceSize,
|
||||
size_t Occurrences) const override;
|
||||
|
||||
bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
|
||||
|
||||
llvm::X86GenInstrInfo::MachineOutlinerInstrType
|
||||
getOutliningType(MachineInstr &MI) const override;
|
||||
|
||||
bool isFixablePostOutline(MachineInstr &MI) const;
|
||||
|
||||
void insertOutlinerEpilogue(MachineBasicBlock &MBB,
|
||||
MachineFunction &MF) const override;
|
||||
|
||||
void insertOutlinerPrologue(MachineBasicBlock &MBB,
|
||||
MachineFunction &MF) const override;
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &It,
|
||||
MachineFunction &MF) const override;
|
||||
|
||||
protected:
|
||||
/// Commutes the operands in the given instruction by changing the operands
|
||||
/// order and/or changing the instruction's opcode and/or the immediate value
|
||||
|
75
test/CodeGen/X86/machine-outliner-debuginfo.ll
Normal file
75
test/CodeGen/X86/machine-outliner-debuginfo.ll
Normal file
@ -0,0 +1,75 @@
|
||||
; RUN: llc -enable-machine-outliner -mtriple=x86_64-apple-darwin < %s | FileCheck %s
|
||||
|
||||
@x = global i32 0, align 4, !dbg !0
|
||||
|
||||
define i32 @main() #0 !dbg !11 {
|
||||
; CHECK-LABEL: _main:
|
||||
%1 = alloca i32, align 4
|
||||
%2 = alloca i32, align 4
|
||||
%3 = alloca i32, align 4
|
||||
%4 = alloca i32, align 4
|
||||
%5 = alloca i32, align 4
|
||||
; There is a debug value in the middle of this section, make sure debug values are ignored.
|
||||
; CHECK: callq l_OUTLINED_FUNCTION_0
|
||||
store i32 1, i32* %2, align 4
|
||||
store i32 2, i32* %3, align 4
|
||||
store i32 3, i32* %4, align 4
|
||||
call void @llvm.dbg.value(metadata i32 10, i64 0, metadata !15, metadata !16), !dbg !17
|
||||
store i32 4, i32* %5, align 4
|
||||
store i32 0, i32* @x, align 4, !dbg !24
|
||||
; This is the same sequence of instructions without a debug value. It should be outlined
|
||||
; in the same way.
|
||||
; CHECK: callq l_OUTLINED_FUNCTION_0
|
||||
store i32 1, i32* %2, align 4
|
||||
store i32 2, i32* %3, align 4
|
||||
store i32 3, i32* %4, align 4
|
||||
store i32 4, i32* %5, align 4
|
||||
store i32 1, i32* @x, align 4, !dbg !14
|
||||
ret i32 0, !dbg !25
|
||||
}
|
||||
|
||||
; CHECK-LABEL: l_OUTLINED_FUNCTION_0:
|
||||
; CHECK-NOT: .loc {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{^(is_stmt)}}
|
||||
; CHECK-NOT: ##DEBUG_VALUE: main:{{[a-z]}} <- {{[0-9]+}}
|
||||
; CHECK: movl $1, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $2, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $3, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $4, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
||||
|
||||
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
|
||||
|
||||
attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="true" }
|
||||
|
||||
!llvm.dbg.cu = !{!2}
|
||||
!llvm.module.flags = !{!7, !8, !9}
|
||||
!llvm.ident = !{!10}
|
||||
|
||||
!0 = !DIGlobalVariableExpression(var: !1)
|
||||
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true)
|
||||
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
|
||||
!3 = !DIFile(filename: "debug-test.c", directory: "dir")
|
||||
!4 = !{}
|
||||
!5 = !{!0}
|
||||
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!7 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!8 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!9 = !{i32 1, !"PIC Level", i32 2}
|
||||
!10 = !{!"clang version 5.0.0"}
|
||||
!11 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 4, type: !12, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4)
|
||||
!12 = !DISubroutineType(types: !13)
|
||||
!13 = !{!6}
|
||||
!14 = !DILocation(line: 7, column: 4, scope: !11)
|
||||
!15 = !DILocalVariable(name: "a", scope: !11, file: !3, line: 5, type: !6)
|
||||
!16 = !DIExpression()
|
||||
!17 = !DILocation(line: 5, column: 6, scope: !11)
|
||||
!18 = !DILocalVariable(name: "b", scope: !11, file: !3, line: 5, type: !6)
|
||||
!19 = !DILocation(line: 5, column: 9, scope: !11)
|
||||
!20 = !DILocalVariable(name: "c", scope: !11, file: !3, line: 5, type: !6)
|
||||
!21 = !DILocation(line: 5, column: 12, scope: !11)
|
||||
!22 = !DILocalVariable(name: "d", scope: !11, file: !3, line: 5, type: !6)
|
||||
!23 = !DILocation(line: 5, column: 15, scope: !11)
|
||||
!24 = !DILocation(line: 14, column: 4, scope: !11)
|
||||
!25 = !DILocation(line: 21, column: 2, scope: !11)
|
110
test/CodeGen/X86/machine-outliner.ll
Normal file
110
test/CodeGen/X86/machine-outliner.ll
Normal file
@ -0,0 +1,110 @@
|
||||
; RUN: llc -enable-machine-outliner -mtriple=x86_64-apple-darwin < %s | FileCheck %s
|
||||
|
||||
@x = global i32 0, align 4
|
||||
|
||||
define i32 @check_boundaries() #0 {
|
||||
; CHECK-LABEL: _check_boundaries:
|
||||
%1 = alloca i32, align 4
|
||||
%2 = alloca i32, align 4
|
||||
%3 = alloca i32, align 4
|
||||
%4 = alloca i32, align 4
|
||||
%5 = alloca i32, align 4
|
||||
store i32 0, i32* %1, align 4
|
||||
store i32 0, i32* %2, align 4
|
||||
%6 = load i32, i32* %2, align 4
|
||||
%7 = icmp ne i32 %6, 0
|
||||
br i1 %7, label %9, label %8
|
||||
|
||||
; CHECK: callq l_OUTLINED_FUNCTION_1
|
||||
; CHECK: cmpl $0, -{{[0-9]+}}(%rbp)
|
||||
store i32 1, i32* %2, align 4
|
||||
store i32 2, i32* %3, align 4
|
||||
store i32 3, i32* %4, align 4
|
||||
store i32 4, i32* %5, align 4
|
||||
br label %10
|
||||
|
||||
store i32 1, i32* %4, align 4
|
||||
br label %10
|
||||
|
||||
%11 = load i32, i32* %2, align 4
|
||||
%12 = icmp ne i32 %11, 0
|
||||
br i1 %12, label %14, label %13
|
||||
|
||||
; CHECK: callq l_OUTLINED_FUNCTION_1
|
||||
store i32 1, i32* %2, align 4
|
||||
store i32 2, i32* %3, align 4
|
||||
store i32 3, i32* %4, align 4
|
||||
store i32 4, i32* %5, align 4
|
||||
br label %15
|
||||
|
||||
store i32 1, i32* %4, align 4
|
||||
br label %15
|
||||
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @empty_1() #0 {
|
||||
; CHECK-LABEL: _empty_1:
|
||||
; CHECK-NOT: callq l_OUTLINED_FUNCTION_{{[0-9]+}}
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
define i32 @empty_2() #0 {
|
||||
; CHECK-LABEL: _empty_2
|
||||
; CHECK-NOT: callq l_OUTLINED_FUNCTION_{{[0-9]+}}
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
define i32 @no_empty_outlining() #0 {
|
||||
; CHECK-LABEL: _no_empty_outlining:
|
||||
%1 = alloca i32, align 4
|
||||
store i32 0, i32* %1, align 4
|
||||
; CHECK-NOT: callq l_OUTLINED_FUNCTION_{{[0-9]+}}
|
||||
%2 = call i32 @empty_1() #1
|
||||
%3 = call i32 @empty_2() #1
|
||||
%4 = call i32 @empty_1() #1
|
||||
%5 = call i32 @empty_2() #1
|
||||
%6 = call i32 @empty_1() #1
|
||||
%7 = call i32 @empty_2() #1
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @main() #0 {
|
||||
; CHECK-LABEL: _main:
|
||||
%1 = alloca i32, align 4
|
||||
%2 = alloca i32, align 4
|
||||
%3 = alloca i32, align 4
|
||||
%4 = alloca i32, align 4
|
||||
%5 = alloca i32, align 4
|
||||
|
||||
store i32 0, i32* %1, align 4
|
||||
store i32 0, i32* @x, align 4
|
||||
; CHECK: callq l_OUTLINED_FUNCTION_0
|
||||
store i32 1, i32* %2, align 4
|
||||
store i32 2, i32* %3, align 4
|
||||
store i32 3, i32* %4, align 4
|
||||
store i32 4, i32* %5, align 4
|
||||
store i32 1, i32* @x, align 4
|
||||
; CHECK: callq l_OUTLINED_FUNCTION_0
|
||||
store i32 1, i32* %2, align 4
|
||||
store i32 2, i32* %3, align 4
|
||||
store i32 3, i32* %4, align 4
|
||||
store i32 4, i32* %5, align 4
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="true" }
|
||||
|
||||
; CHECK-LABEL: l_OUTLINED_FUNCTION_0:
|
||||
; CHECK: movl $1, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $2, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $3, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $4, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
; CHECK-LABEL: l_OUTLINED_FUNCTION_1:
|
||||
; CHECK: movl $1, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $2, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $3, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: movl $4, -{{[0-9]+}}(%rbp)
|
||||
; CHECK-NEXT: retq
|
Loading…
Reference in New Issue
Block a user