mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
69b963173e
We introduce a codegen optimization pass which splits functions into hot and cold parts. This pass leverages the basic block sections feature recently introduced in LLVM from the Propeller project. The pass targets functions with profile coverage, identifies cold blocks and moves them to a separate section. The linker groups all cold blocks across functions together, decreasing fragmentation and improving icache and itlb utilization. We evaluated the Machine Function Splitter pass on clang bootstrap and SPECInt 2017. For clang bootstrap we observe a mean 2.33% runtime improvement with a ~32% reduction in itlb and stlb misses. Additionally, L1 icache misses reduced by 9.5% while L2 instruction misses reduced by 20%. For SPECInt we report the change in IntRate the C/C++ benchmarks. All benchmarks apart from mcf and x264 improve, on average by 0.6% with the max for deepsjeng at 1.6%. Benchmark % Change 500.perlbench_r 0.78 502.gcc_r 0.82 505.mcf_r -0.30 520.omnetpp_r 0.18 523.xalancbmk_r 0.37 525.x264_r -0.46 531.deepsjeng_r 1.61 541.leela_r 0.83 557.xz_r 0.15 Differential Revision: https://reviews.llvm.org/D85368
219 lines
5.9 KiB
LLVM
219 lines
5.9 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefix=MFS-DEFAULTS
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2
|
|
|
|
define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
|
|
;; Check that cold block is moved to .text.unlikely.
|
|
; MFS-DEFAULTS-LABEL: foo1
|
|
; MFS-DEFAULTS: .section .text.unlikely.foo1
|
|
; MFS-DEFAULTS-NEXT: foo1.cold:
|
|
; MFS-DEFAULTS-NOT: callq bar
|
|
; MFS-DEFAULTS-NEXT: callq baz
|
|
br i1 %0, label %2, label %4, !prof !17
|
|
|
|
2: ; preds = %1
|
|
%3 = call i32 @bar()
|
|
br label %6
|
|
|
|
4: ; preds = %1
|
|
%5 = call i32 @baz()
|
|
br label %6
|
|
|
|
6: ; preds = %4, %2
|
|
%7 = tail call i32 @qux()
|
|
ret void
|
|
}
|
|
|
|
define void @foo2(i1 zeroext %0) nounwind !prof !23 !section_prefix !16 {
|
|
;; Check that function marked unlikely is not split.
|
|
; MFS-DEFAULTS-LABEL: foo2
|
|
; MFS-DEFAULTS-NOT: foo2.cold:
|
|
br i1 %0, label %2, label %4, !prof !17
|
|
|
|
2: ; preds = %1
|
|
%3 = call i32 @bar()
|
|
br label %6
|
|
|
|
4: ; preds = %1
|
|
%5 = call i32 @baz()
|
|
br label %6
|
|
|
|
6: ; preds = %4, %2
|
|
%7 = tail call i32 @qux()
|
|
ret void
|
|
}
|
|
|
|
define void @foo3(i1 zeroext %0) nounwind !section_prefix !15 {
|
|
;; Check that function without profile data is not split.
|
|
; MFS-DEFAULTS-LABEL: foo3
|
|
; MFS-DEFAULTS-NOT: foo3.cold:
|
|
br i1 %0, label %2, label %4
|
|
|
|
2: ; preds = %1
|
|
%3 = call i32 @bar()
|
|
br label %6
|
|
|
|
4: ; preds = %1
|
|
%5 = call i32 @baz()
|
|
br label %6
|
|
|
|
6: ; preds = %4, %2
|
|
%7 = tail call i32 @qux()
|
|
ret void
|
|
}
|
|
|
|
define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 {
|
|
;; Check that count threshold works.
|
|
; MFS-OPTS1-LABEL: foo4
|
|
; MFS-OPTS1: .section .text.unlikely.foo4
|
|
; MFS-OPTS1-NEXT: foo4.cold:
|
|
; MFS-OPTS1-NOT: callq bar
|
|
; MFS-OPTS1-NOT: callq baz
|
|
; MFS-OPTS1-NEXT: callq bam
|
|
br i1 %0, label %3, label %7, !prof !18
|
|
|
|
3:
|
|
%4 = call i32 @bar()
|
|
br label %7
|
|
|
|
5:
|
|
%6 = call i32 @baz()
|
|
br label %7
|
|
|
|
7:
|
|
br i1 %1, label %8, label %10, !prof !19
|
|
|
|
8:
|
|
%9 = call i32 @bam()
|
|
br label %12
|
|
|
|
10:
|
|
%11 = call i32 @baz()
|
|
br label %12
|
|
|
|
12:
|
|
%13 = tail call i32 @qux()
|
|
ret void
|
|
}
|
|
|
|
define void @foo5(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 {
|
|
;; Check that profile summary info cutoff works.
|
|
; MFS-OPTS2-LABEL: foo5
|
|
; MFS-OPTS2: .section .text.unlikely.foo5
|
|
; MFS-OPTS2-NEXT: foo5.cold:
|
|
; MFS-OPTS2-NOT: callq bar
|
|
; MFS-OPTS2-NOT: callq baz
|
|
; MFS-OPTS2-NEXT: callq bam
|
|
br i1 %0, label %3, label %7, !prof !21
|
|
|
|
3:
|
|
%4 = call i32 @bar()
|
|
br label %7
|
|
|
|
5:
|
|
%6 = call i32 @baz()
|
|
br label %7
|
|
|
|
7:
|
|
br i1 %1, label %8, label %10, !prof !22
|
|
|
|
8:
|
|
%9 = call i32 @bam()
|
|
br label %12
|
|
|
|
10:
|
|
%11 = call i32 @baz()
|
|
br label %12
|
|
|
|
12:
|
|
%13 = call i32 @qux()
|
|
ret void
|
|
}
|
|
|
|
define void @foo6(i1 zeroext %0) nounwind section "nosplit" !prof !14 {
|
|
;; Check that function with section attribute is not split.
|
|
; MFS-DEFAULTS-LABEL: foo6
|
|
; MFS-DEFAULTS-NOT: foo6.cold:
|
|
br i1 %0, label %2, label %4, !prof !17
|
|
|
|
2: ; preds = %1
|
|
%3 = call i32 @bar()
|
|
br label %6
|
|
|
|
4: ; preds = %1
|
|
%5 = call i32 @baz()
|
|
br label %6
|
|
|
|
6: ; preds = %4, %2
|
|
%7 = tail call i32 @qux()
|
|
ret void
|
|
}
|
|
|
|
define i32 @foo7(i1 zeroext %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !14 {
|
|
;; Check that cold ehpads are not split out.
|
|
; MFS-DEFAULTS-LABEL: foo7
|
|
; MFS-DEFAULTS: .section .text.unlikely.foo7,"ax",@progbits
|
|
; MFS-DEFAULTS-NEXT: foo7.cold:
|
|
; MFS-DEFAULTS-NOT: callq _Unwind_Resume
|
|
; MFS-DEFAULTS: callq baz
|
|
entry:
|
|
invoke void @_Z1fv()
|
|
to label %try.cont unwind label %lpad
|
|
|
|
lpad:
|
|
%1 = landingpad { i8*, i32 }
|
|
cleanup
|
|
catch i8* bitcast (i8** @_ZTIi to i8*)
|
|
resume { i8*, i32 } %1
|
|
|
|
try.cont:
|
|
br i1 %0, label %2, label %4, !prof !17
|
|
|
|
2: ; preds = try.cont
|
|
%3 = call i32 @bar()
|
|
br label %6
|
|
|
|
4: ; preds = %1
|
|
%5 = call i32 @baz()
|
|
br label %6
|
|
|
|
6: ; preds = %4, %2
|
|
%7 = tail call i32 @qux()
|
|
ret i32 %7
|
|
}
|
|
|
|
declare i32 @bar()
|
|
declare i32 @baz()
|
|
declare i32 @bam()
|
|
declare i32 @qux()
|
|
declare void @_Z1fv()
|
|
declare i32 @__gxx_personality_v0(...)
|
|
|
|
@_ZTIi = external constant i8*
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"ProfileSummary", !1}
|
|
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
|
!2 = !{!"ProfileFormat", !"InstrProf"}
|
|
!3 = !{!"TotalCount", i64 10000}
|
|
!4 = !{!"MaxCount", i64 10}
|
|
!5 = !{!"MaxInternalCount", i64 1}
|
|
!6 = !{!"MaxFunctionCount", i64 1000}
|
|
!7 = !{!"NumCounts", i64 3}
|
|
!8 = !{!"NumFunctions", i64 5}
|
|
!9 = !{!"DetailedSummary", !10}
|
|
!10 = !{!11, !12, !13}
|
|
!11 = !{i32 10000, i64 100, i32 1}
|
|
!12 = !{i32 999900, i64 100, i32 1}
|
|
!13 = !{i32 999999, i64 1, i32 2}
|
|
!14 = !{!"function_entry_count", i64 7000}
|
|
!15 = !{!"function_section_prefix", !".hot"}
|
|
!16 = !{!"function_section_prefix", !".unlikely"}
|
|
!17 = !{!"branch_weights", i32 7000, i32 0}
|
|
!18 = !{!"branch_weights", i32 3000, i32 4000}
|
|
!19 = !{!"branch_weights", i32 1000, i32 6000}
|
|
!20 = !{!"function_entry_count", i64 10000}
|
|
!21 = !{!"branch_weights", i32 6000, i32 4000}
|
|
!22 = !{!"branch_weights", i32 80, i32 9920}
|
|
!23 = !{!"function_entry_count", i64 7}
|