mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
3594bb4c1b
Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count. This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes. A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead. Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D93264
46 lines
1.6 KiB
LLVM
46 lines
1.6 KiB
LLVM
; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S | FileCheck %s
|
|
|
|
declare i32 @f1()
|
|
declare i32 @f2()
|
|
declare void @f3()
|
|
|
|
|
|
;; This tests that the branch in 'merge' can be cloned up into T1.
|
|
define i32 @foo(i1 %cond, i1 %cond2) #0 {
|
|
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
|
|
br i1 %cond, label %T1, label %F1
|
|
T1:
|
|
; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]]
|
|
%v1 = call i32 @f1()
|
|
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
|
|
;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
|
|
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
|
|
%cond3 = icmp eq i32 %v1, 412
|
|
br label %Merge
|
|
F1:
|
|
; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
|
|
%v2 = call i32 @f2()
|
|
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
|
|
;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
|
|
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
|
|
br label %Merge
|
|
Merge:
|
|
|
|
%A = phi i1 [%cond3, %T1], [%cond2, %F1]
|
|
%B = phi i32 [%v1, %T1], [%v2, %F1]
|
|
br i1 %A, label %T2, label %F2
|
|
T2:
|
|
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
|
|
call void @f3()
|
|
ret i32 %B
|
|
F2:
|
|
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
|
|
ret i32 %B
|
|
}
|
|
|
|
; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7}
|
|
; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
|
|
|
|
attributes #0 = {"use-sample-profile"}
|
|
|