mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
1f3e158bbf
by default. sample-profile-top-down-load is an internal option which can enable top-down order of inlining and profile annotation in sample profile load pass. It was found to be beneficial for better profile annotation. Recently we found it could also solve some build time issue. Suppose function A has many callsites in function B. In the last release binary where sample profile was collected, the outline copy of A is large because there are many other functions inlined into A. However although all the callsites calling A in B are inlined, but every inlined body is small (A was inlined into B before other functions are inlined into A), there is no build time issue in last release. In an optimized build using the sample profile collected from last release, without top-down inlining, we saw a case that A got very large because of inlining, and then multiple callsites of A got inlined into B, and that led to a huge B which caused significant build time issue besides profile annotation issue. To solve that problem, the patch enables the flag sample-profile-top-down-load by default. sample-profile-top-down-load can have better performance when it is enabled together with sample-profile-merge-inlinee so in this patch we also enable sample-profile-merge-inlinee by default. Differential Revision: https://reviews.llvm.org/D82919
126 lines
5.3 KiB
LLVM
126 lines
5.3 KiB
LLVM
; Note that this needs new pass manager for now. Passing `-sample-profile-top-down-load` to legacy pass manager is a no-op.
|
|
|
|
; Test we aren't doing specialization for inlining with default source order
|
|
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-top-down-load=false -S | FileCheck -check-prefix=DEFAULT %s
|
|
|
|
; Test we specialize based on call path with context-sensitive profile while inlining with '-sample-profile-top-down-load'
|
|
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load=true -S | FileCheck -check-prefix=TOPDOWN %s
|
|
|
|
|
|
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
|
|
|
|
define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
|
|
entry:
|
|
%x.addr = alloca i32, align 4
|
|
%y.addr = alloca i32, align 4
|
|
store i32 %x, i32* %x.addr, align 4
|
|
store i32 %y, i32* %y.addr, align 4
|
|
%tmp = load i32, i32* %x.addr, align 4, !dbg !8
|
|
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
|
|
%add = add nsw i32 %tmp, %tmp1, !dbg !8
|
|
%tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
|
|
%tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
|
|
%call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
|
|
ret i32 %add, !dbg !8
|
|
}
|
|
|
|
define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
|
|
entry:
|
|
%x.addr = alloca i32, align 4
|
|
%y.addr = alloca i32, align 4
|
|
store i32 %x, i32* %x.addr, align 4
|
|
store i32 %y, i32* %y.addr, align 4
|
|
%tmp = load i32, i32* %x.addr, align 4, !dbg !10
|
|
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
|
|
%add = sub nsw i32 %tmp, %tmp1, !dbg !10
|
|
ret i32 %add, !dbg !11
|
|
}
|
|
|
|
define i32 @main() #0 !dbg !12 {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%s = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 0, i32* %retval
|
|
store i32 0, i32* %i, align 4, !dbg !13
|
|
br label %while.cond, !dbg !14
|
|
|
|
while.cond: ; preds = %if.end, %entry
|
|
%tmp = load i32, i32* %i, align 4, !dbg !15
|
|
%inc = add nsw i32 %tmp, 1, !dbg !15
|
|
store i32 %inc, i32* %i, align 4, !dbg !15
|
|
%cmp = icmp slt i32 %tmp, 400000000, !dbg !15
|
|
br i1 %cmp, label %while.body, label %while.end, !dbg !15
|
|
|
|
while.body: ; preds = %while.cond
|
|
%tmp1 = load i32, i32* %i, align 4, !dbg !17
|
|
%cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
|
|
br i1 %cmp1, label %if.then, label %if.else, !dbg !17
|
|
|
|
if.then: ; preds = %while.body
|
|
%tmp2 = load i32, i32* %i, align 4, !dbg !19
|
|
%tmp3 = load i32, i32* %s, align 4, !dbg !19
|
|
%call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
|
|
store i32 %call, i32* %s, align 4, !dbg !19
|
|
br label %if.end, !dbg !19
|
|
|
|
if.else: ; preds = %while.body
|
|
store i32 30, i32* %s, align 4, !dbg !21
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %if.else, %if.then
|
|
br label %while.cond, !dbg !23
|
|
|
|
while.end: ; preds = %while.cond
|
|
%tmp4 = load i32, i32* %s, align 4, !dbg !25
|
|
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
|
|
ret i32 0, !dbg !26
|
|
}
|
|
|
|
declare i32 @printf(i8*, ...)
|
|
|
|
attributes #0 = { "use-sample-profile" }
|
|
|
|
!llvm.dbg.cu = !{!0}
|
|
!llvm.module.flags = !{!3, !4}
|
|
!llvm.ident = !{!5}
|
|
|
|
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
|
|
!1 = !DIFile(filename: "calls.cc", directory: ".")
|
|
!2 = !{}
|
|
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
|
!4 = !{i32 1, !"Debug Info Version", i32 3}
|
|
!5 = !{!"clang version 3.5 "}
|
|
!6 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
|
!7 = !DISubroutineType(types: !2)
|
|
!8 = !DILocation(line: 4, scope: !6)
|
|
!9 = distinct !DISubprogram(name: "sub", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
|
!10 = !DILocation(line: 20, scope: !9)
|
|
!11 = !DILocation(line: 21, scope: !9)
|
|
!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
|
!13 = !DILocation(line: 8, scope: !12)
|
|
!14 = !DILocation(line: 9, scope: !12)
|
|
!15 = !DILocation(line: 9, scope: !16)
|
|
!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
|
|
!17 = !DILocation(line: 10, scope: !18)
|
|
!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
|
|
!19 = !DILocation(line: 10, scope: !20)
|
|
!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
|
|
!21 = !DILocation(line: 10, scope: !22)
|
|
!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
|
|
!23 = !DILocation(line: 10, scope: !24)
|
|
!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
|
|
!25 = !DILocation(line: 11, scope: !12)
|
|
!26 = !DILocation(line: 12, scope: !12)
|
|
|
|
|
|
; DEFAULT: @_Z3sumii
|
|
; DEFAULT-NOT: call i32 @_Z3subii
|
|
; DEFAULT: @main()
|
|
; DEFAULT-NOT: call i32 @_Z3subii
|
|
|
|
; TOPDOWN: @_Z3sumii
|
|
; TOPDOWN-NOT: call i32 @_Z3subii
|
|
; TOPDOWN: @main()
|
|
; TOPDOWN: call i32 @_Z3subii
|