1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/Transforms/SampleProfile/pseudo-probe-verify.ll
Hongtao Yu 3594bb4c1b [CSSPGO] Introducing distribution factor for pseudo probe.
Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count.

This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes.

A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead.

Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D93264
2021-02-02 11:55:01 -08:00

78 lines
3.4 KiB
LLVM

; REQUIRES: x86_64-linux
; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY
; RUN: FileCheck %s < %t
; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
; VERIFY: Function foo:
; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00
; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00
declare void @foo2() nounwind
define void @foo(i32 %x) {
bb:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
%tmp = alloca [5 x i32*], align 16
br label %bb7.preheader
bb3.loopexit:
%spec.select.lcssa = phi i32 [ %spec.select, %bb10 ]
%tmp5.not = icmp eq i32 %spec.select.lcssa, 0
br i1 %tmp5.not, label %bb24, label %bb7.preheader
bb7.preheader:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
%tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ]
br label %bb10
bb10:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
%indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
%tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
%tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv
%tmp14 = load i32*, i32** %tmp13, align 8
%tmp15.not = icmp ne i32* %tmp14, null
%tmp18 = sext i1 %tmp15.not to i32
%spec.select = add nsw i32 %tmp1.14, %tmp18
call void @foo2(), !dbg !12
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 5
br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
bb24:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
ret void
}
;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
;; with an index of 6 and a scale of -1%.
; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.9.0"}
!12 = !DILocation(line: 2, column: 20, scope: !4)
!13 = distinct !{!13, !14}
!14 = !{!"llvm.loop.unroll.full"}