mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
ba7bb5fc7d
Sample profile loader can be run in both LTO prelink and postlink. Currently the counts annoation in postilnk doesn't fully overwrite what's done in prelink. I'm adding a switch (`-overwrite-existing-weights=1`) to enable a full overwrite, which includes: 1. Clear old metadata for calls when their parent block has a zero count. This could be caused by prelink code duplication. 2. Clear indirect call metadata if somehow all the rest targets have a sum of zero count. 3. Overwrite branch weight for basic blocks. With a CS profile, I was seeing #1 and #2 help reduce code size by preventing post-sample ICP and CGSCC inliner working on obsolete metadata, which come from a partial global inlining in prelink. It's not expected to work well for non-CS case with a less-accurate post-inline count quality. It's worth calling out that some prelink optimizations can damage counts quality in an irreversible way. One example is the loop rotate optimization. Due to lack of exact loop entry count (profiling can only give loop iteration count and loop exit count), moving one iteration out of the loop body leaves the rest iteration count unknown. We had to turn off prelink loop rotate to achieve a better postlink counts quality. A even better postlink counts quality can be archived by turning off prelink CGSCC inlining which is not context-sensitive. Reviewed By: wenlei, wmi Differential Revision: https://reviews.llvm.org/D102537
68 lines
3.1 KiB
LLVM
68 lines
3.1 KiB
LLVM
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile.prof -pass-remarks=sample-profile -S | FileCheck %s
|
|
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile.prof -pass-remarks=sample-profile -overwrite-existing-weights=1 -S | FileCheck %s -check-prefix=OVW
|
|
|
|
define dso_local i32 @foo(i32 %x, void (i32)* %f) #0 !dbg !4 !prof !10 {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%x.addr = alloca i32, align 4
|
|
store i32 %x, i32* %x.addr, align 4
|
|
%0 = load i32, i32* %x.addr, align 4
|
|
%cmp = icmp eq i32 %0, 0
|
|
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1)
|
|
br i1 %cmp, label %if.then, label %if.else, !prof !11
|
|
|
|
if.then:
|
|
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1)
|
|
; CHECK: call {{.*}}, !dbg ![[#]], !prof ![[#PROF:]]
|
|
; OVW: call {{.*}}, !dbg ![[#]], !prof ![[#PROF:]]
|
|
call void %f(i32 1), !dbg !13, !prof !16
|
|
store i32 1, i32* %retval, align 4
|
|
br label %return
|
|
|
|
if.else:
|
|
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 0)
|
|
; CHECK: call {{.*}}, !dbg ![[#]], !prof ![[#PROF]]
|
|
;; The block should have a 0 weight. Check the profile metadata is dropped.
|
|
; OVW-NOT: call {{.*}}, !dbg ![[#]], !prof
|
|
call void %f(i32 2), !dbg !15, !prof !16
|
|
store i32 2, i32* %retval, align 4
|
|
br label %return
|
|
|
|
return:
|
|
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1)
|
|
%1 = load i32, i32* %retval, align 4
|
|
ret i32 %1
|
|
}
|
|
|
|
; CHECK: ![[#PROF]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
|
|
; OVW: ![[#PROF]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
|
|
|
|
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
|
|
|
|
attributes #0 = {"use-sample-profile"}
|
|
|
|
!llvm.module.flags = !{!0, !1}
|
|
!llvm.pseudo_probe_desc = !{!2}
|
|
|
|
!0 = !{i32 7, !"Dwarf Version", i32 4}
|
|
!1 = !{i32 2, !"Debug Info Version", i32 3}
|
|
!2 = !{i64 6699318081062747564, i64 563022570642068, !"foo", null}
|
|
!4 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 9, type: !6, scopeLine: 9, spFlags: DISPFlagDefinition, unit: !9)
|
|
!5 = !DIFile(filename: "test.cpp", directory: "test")
|
|
!6 = !DISubroutineType(types: !7)
|
|
!7 = !{!8, !8}
|
|
!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
|
!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
|
|
!10 = !{!"function_entry_count", i64 14}
|
|
!11 = !{!"branch_weights", i32 100, i32 0}
|
|
;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
|
|
;; with an index of 5 and probe factor of 1.0.
|
|
!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575)
|
|
!13 = distinct !DILocation(line: 10, column: 11, scope: !12)
|
|
;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
|
|
;; with an index of 5 and probe factor of 0.
|
|
!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775)
|
|
!15 = distinct !DILocation(line: 10, column: 11, scope: !14)
|
|
!16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
|
|
|