1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[AutoFDO] Avoid merging inlinee samples multiple times

A function call can be replicated by optimizations like loop unroll and jump threading and the replicates end up sharing the sample nested callee profile. Therefore when it comes to merging samples for uninlined callees in the sample profile inliner, a callee profile can be merged multiple times which will cause an assert to fire.

This change avoids merging same callee profile for duplicate callsites by filtering out callee profiles with a non-zero head sample count.

Reviewed By: wenlei, wmi

Differential Revision: https://reviews.llvm.org/D84997
This commit is contained in:
Hongtao Yu 2020-07-30 18:22:50 -07:00
parent e5aed69370
commit de5226d60a
2 changed files with 99 additions and 9 deletions

View File

@ -1106,16 +1106,26 @@ bool SampleProfileLoader::inlineHotFunctions(
}
if (ProfileMergeInlinee) {
// Use entry samples as head samples during the merge, as inlinees
// don't have head samples.
assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee");
const_cast<FunctionSamples *>(FS)->addHeadSamples(FS->getEntrySamples());
// A function call can be replicated by optimizations like callsite
// splitting or jump threading and the replicates end up sharing the
// sample nested callee profile instead of slicing the original inlinee's
// profile. We want to do merge exactly once by filtering out callee
// profiles with a non-zero head sample count.
if (FS->getHeadSamples() == 0) {
// Use entry samples as head samples during the merge, as inlinees
// don't have head samples.
const_cast<FunctionSamples *>(FS)->addHeadSamples(
FS->getEntrySamples());
// Note that we have to do the merge right after processing function.
// This allows OutlineFS's profile to be used for annotation during
// top-down processing of functions' annotation.
FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
OutlineFS->merge(*FS);
// Note that we have to do the merge right after processing function.
// This allows OutlineFS's profile to be used for annotation during
// top-down processing of functions' annotation.
FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
OutlineFS->merge(*FS);
} else
assert(FS->getHeadSamples() == FS->getEntrySamples() &&
"Expect same head and entry sample counts for profiles already "
"merged.");
} else {
auto pair =
notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});

View File

@ -0,0 +1,80 @@
;; Test we merge non-inlined profile only once with '-sample-profile-merge-inlinee'
; RUN: opt < %s -passes='function(callsite-splitting),sample-profile' -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -S | FileCheck %s
%struct.bitmap = type { i32, %struct.bitmap* }
; CHECK-LABEL: @main
define void @main(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt) #0 !dbg !6 {
entry:
br label %Top
Top:
%tobool1 = icmp eq %struct.bitmap* %a_elt, null
br i1 %tobool1, label %CallSiteBB, label %NextCond
NextCond:
%cmp = icmp ne %struct.bitmap* %b_elt, null
br i1 %cmp, label %CallSiteBB, label %End
CallSiteBB:
%p = phi i1 [0, %Top], [%c, %NextCond]
;; The call site is replicated by callsite-splitting pass and they end up share the same sample profile
; CHECK: call void @_Z3sumii(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false)
; CHECK: call void @_Z3sumii(%struct.bitmap* nonnull %a_elt, %struct.bitmap* nonnull %a_elt, %struct.bitmap* nonnull %b_elt, i1 %c)
call void @_Z3sumii(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %p), !dbg !8
br label %End
End:
ret void
}
define void @_Z3sumii(%struct.bitmap* %dst_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %c) #0 !dbg !12 {
entry:
%tobool = icmp ne %struct.bitmap* %a_elt, null
%tobool1 = icmp ne %struct.bitmap* %b_elt, null
%or.cond = and i1 %tobool, %tobool1, !dbg !13
br i1 %or.cond, label %Cond, label %Big
Cond:
%cmp = icmp eq %struct.bitmap* %dst_elt, %a_elt, !dbg !14
br i1 %cmp, label %Small, label %Big, !dbg !15
Small:
br label %End
Big:
br label %End
End:
ret void
}
attributes #0 = { "use-sample-profile" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!"clang version 3.5 "}
!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!7 = !DISubroutineType(types: !2)
!8 = !DILocation(line: 10, scope: !9)
!9 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 2)
!10 = distinct !DILexicalBlock(scope: !6, file: !1, line: 10)
!11 = !DILocation(line: 12, scope: !6)
!12 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!13 = !DILocation(line: 4, scope: !12)
!14 = !DILocation(line: 5, scope: !12)
!15 = !DILocation(line: 6, scope: !12)
;; Check the profile of funciton sum is only merged once though the original callsite is replicted.
; CHECK: name: "sum"
; CHECK-NEXT: {!"function_entry_count", i64 46}
; CHECK: !{!"branch_weights", i32 11, i32 37}
; CHECK: !{!"branch_weights", i32 11, i32 1}