1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[CSSPGO][llvm-profgen] Report samples for untrackable frames.

Fixing an issue where samples collected for an untrackable frame is not reported. An untrackable frame refers to a frame whose caller is untrackable due to missing debug info or pseudo probe. Though the frame is connected to its parent frame through the frame pointer chain at runtime, the compiler cannot build the connection without debug info or pseudo probe. In such case we just need to report the untrackable frame as the base frame and all of its child frames.

With more samples reported I'm seeing this improves the performance of an internal benchmark by 2.5%.

Reviewed By: wenlei, wlei

Differential Revision: https://reviews.llvm.org/D102961
This commit is contained in:
Hongtao Yu 2021-05-21 17:44:56 -07:00
parent 6d2f5ef7fe
commit 9a044d4e9b
6 changed files with 250 additions and 4 deletions

View File

@ -0,0 +1,178 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
; Function Attrs: nounwind readnone uwtable willreturn
define dso_local i32 @bar(i32 %x, i32 %y) local_unnamed_addr #0 !dbg !10 {
entry:
call void @llvm.dbg.value(metadata i32 %x, metadata !15, metadata !DIExpression()), !dbg !17
call void @llvm.dbg.value(metadata i32 %y, metadata !16, metadata !DIExpression()), !dbg !17
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !18
%rem = srem i32 %x, 3, !dbg !20
%tobool.not = icmp eq i32 %rem, 0, !dbg !20
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 2, i32 2, i64 -1), !dbg !21
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 3, i32 2, i64 -1), !dbg !23
%0 = sub i32 0, %y, !dbg !24
%retval.0.p = select i1 %tobool.not, i32 %y, i32 %0, !dbg !24
%retval.0 = add i32 %retval.0.p, %x, !dbg !24
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 4, i32 0, i64 -1), !dbg !25
ret i32 %retval.0, !dbg !25
}
; Function Attrs: noinline nounwind uwtable
define dso_local void @foo() local_unnamed_addr #1 !dbg !26 {
entry:
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !32
call void @llvm.dbg.value(metadata i32 0, metadata !30, metadata !DIExpression()), !dbg !33
call void @llvm.dbg.value(metadata i32 0, metadata !31, metadata !DIExpression()), !dbg !33
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !34
call void @llvm.dbg.value(metadata i32 1, metadata !31, metadata !DIExpression()), !dbg !33
br label %while.body, !dbg !35
while.body: ; preds = %entry, %if.end
%inc8 = phi i32 [ 1, %entry ], [ %inc, %if.end ]
%s.07 = phi i32 [ 0, %entry ], [ %s.1, %if.end ]
call void @llvm.dbg.value(metadata i32 %s.07, metadata !30, metadata !DIExpression()), !dbg !33
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg !36
%rem = urem i32 %inc8, 91, !dbg !38
%tobool.not = icmp eq i32 %rem, 0, !dbg !38
br i1 %tobool.not, label %if.else, label %if.then, !dbg !39
if.then: ; preds = %while.body
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !40
call void @llvm.dbg.value(metadata i32 %inc8, metadata !15, metadata !DIExpression()) #6, !dbg !41
call void @llvm.dbg.value(metadata i32 %s.07, metadata !16, metadata !DIExpression()) #6, !dbg !41
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1) #6, !dbg !44
%rem.i = urem i32 %inc8, 3, !dbg !45
%tobool.not.i = icmp eq i32 %rem.i, 0, !dbg !45
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 2, i32 2, i64 -1) #6, !dbg !46
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 3, i32 2, i64 -1) #6, !dbg !47
%0 = sub i32 0, %s.07, !dbg !48
%retval.0.p.i = select i1 %tobool.not.i, i32 %s.07, i32 %0, !dbg !48
%retval.0.i = add i32 %retval.0.p.i, %inc8, !dbg !48
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 4, i32 0, i64 -1) #6, !dbg !49
call void @llvm.dbg.value(metadata i32 %retval.0.i, metadata !30, metadata !DIExpression()), !dbg !33
br label %if.end, !dbg !50
if.else: ; preds = %while.body
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 5, i32 0, i64 -1), !dbg !51
%add = add nsw i32 %s.07, 30, !dbg !51
call void @llvm.dbg.value(metadata i32 %add, metadata !30, metadata !DIExpression()), !dbg !33
br label %if.end
if.end: ; preds = %if.else, %if.then
%s.1 = phi i32 [ %retval.0.i, %if.then ], [ %add, %if.else ], !dbg !52
call void @llvm.dbg.value(metadata i32 %s.1, metadata !30, metadata !DIExpression()), !dbg !33
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1), !dbg !35
call void @llvm.dbg.value(metadata i32 %inc8, metadata !31, metadata !DIExpression()), !dbg !33
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !34
%inc = add nuw nsw i32 %inc8, 1, !dbg !34
call void @llvm.dbg.value(metadata i32 %inc, metadata !31, metadata !DIExpression()), !dbg !33
%exitcond.not = icmp eq i32 %inc, 16000001, !dbg !53
br i1 %exitcond.not, label %while.end, label %while.body, !dbg !35, !llvm.loop !54
while.end: ; preds = %if.end
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !57
%call1 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %s.1), !dbg !58
ret void, !dbg !60
}
; Function Attrs: nofree nounwind
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #2
; Function Attrs: nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #3 !dbg !61 {
entry:
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !64
call void @foo(), !dbg !65
ret i32 0, !dbg !67
}
; Function Attrs: inaccessiblememonly nounwind willreturn
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #5
attributes #0 = { nounwind readnone uwtable willreturn "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noinline nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nofree nounwind "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { inaccessiblememonly nounwind willreturn }
attributes #5 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #6 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}
!llvm.pseudo_probe_desc = !{!7, !8, !9}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test.c", directory: "test")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 12.0.0"}
!7 = !{i64 -2012135647395072713, i64 72617220756, !"bar", null}
!8 = !{i64 6699318081062747564, i64 563088904013236, !"foo", null}
!9 = !{i64 -2624081020897602054, i64 281479271677951, !"main", null}
!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !11, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !14)
!11 = !DISubroutineType(types: !12)
!12 = !{!13, !13, !13}
!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!14 = !{!15, !16}
!15 = !DILocalVariable(name: "x", arg: 1, scope: !10, file: !1, line: 3, type: !13)
!16 = !DILocalVariable(name: "y", arg: 2, scope: !10, file: !1, line: 3, type: !13)
!17 = !DILocation(line: 0, scope: !10)
!18 = !DILocation(line: 4, column: 9, scope: !19)
!19 = distinct !DILexicalBlock(scope: !10, file: !1, line: 4, column: 9)
!20 = !DILocation(line: 4, column: 11, scope: !19)
!21 = !DILocation(line: 5, column: 18, scope: !22)
!22 = distinct !DILexicalBlock(scope: !19, file: !1, line: 4, column: 16)
!23 = !DILocation(line: 7, column: 14, scope: !10)
!24 = !DILocation(line: 4, column: 9, scope: !10)
!25 = !DILocation(line: 8, column: 1, scope: !10)
!26 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 11, type: !27, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !29)
!27 = !DISubroutineType(types: !28)
!28 = !{null}
!29 = !{!30, !31}
!30 = !DILocalVariable(name: "s", scope: !26, file: !1, line: 12, type: !13)
!31 = !DILocalVariable(name: "i", scope: !26, file: !1, line: 12, type: !13)
!32 = !DILocation(line: 12, column: 5, scope: !26)
!33 = !DILocation(line: 0, scope: !26)
!34 = !DILocation(line: 13, column: 15, scope: !26)
!35 = !DILocation(line: 13, column: 7, scope: !26)
!36 = !DILocation(line: 14, column: 17, scope: !37)
!37 = distinct !DILexicalBlock(scope: !26, file: !1, line: 14, column: 17)
!38 = !DILocation(line: 14, column: 19, scope: !37)
!39 = !DILocation(line: 14, column: 17, scope: !26)
!40 = !DILocation(line: 14, column: 33, scope: !37)
!41 = !DILocation(line: 0, scope: !10, inlinedAt: !42)
!42 = distinct !DILocation(line: 14, column: 29, scope: !43)
!43 = !DILexicalBlockFile(scope: !37, file: !1, discriminator: 186646599)
!44 = !DILocation(line: 4, column: 9, scope: !19, inlinedAt: !42)
!45 = !DILocation(line: 4, column: 11, scope: !19, inlinedAt: !42)
!46 = !DILocation(line: 5, column: 18, scope: !22, inlinedAt: !42)
!47 = !DILocation(line: 7, column: 14, scope: !10, inlinedAt: !42)
!48 = !DILocation(line: 4, column: 9, scope: !10, inlinedAt: !42)
!49 = !DILocation(line: 8, column: 1, scope: !10, inlinedAt: !42)
!50 = !DILocation(line: 14, column: 25, scope: !37)
!51 = !DILocation(line: 14, column: 47, scope: !37)
!52 = !DILocation(line: 0, scope: !37)
!53 = !DILocation(line: 13, column: 18, scope: !26)
!54 = distinct !{!54, !35, !55, !56}
!55 = !DILocation(line: 14, column: 50, scope: !26)
!56 = !{!"llvm.loop.mustprogress"}
!57 = !DILocation(line: 15, column: 31, scope: !26)
!58 = !DILocation(line: 15, column: 9, scope: !59)
!59 = !DILexicalBlockFile(scope: !26, file: !1, discriminator: 186646607)
!60 = !DILocation(line: 16, column: 1, scope: !26)
!61 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 18, type: !62, scopeLine: 18, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!62 = !DISubroutineType(types: !63)
!63 = !{!13}
!64 = !DILocation(line: 19, column: 5, scope: !61)
!65 = !DILocation(line: 19, column: 5, scope: !66)
!66 = !DILexicalBlockFile(scope: !61, file: !1, discriminator: 7)
!67 = !DILocation(line: 20, column: 7, scope: !61)

Binary file not shown.

View File

@ -0,0 +1,5 @@
PERF_RECORD_MMAP2 1664112/1664112: [0x400000(0x1000) @ 0 08:11 806256818 82060973]: r-xp truncated-pseudoprobe.perfbin
40057d
4005b9
7f67469af555
0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0

View File

@ -0,0 +1,22 @@
; RUN: llvm-profgen --perfscript=%S/Inputs/truncated-pseudoprobe.perfscript --binary=%S/Inputs/truncated-pseudoprobe.perfbin --output=%t
; RUN: FileCheck %s --input-file %t
; CHECK: [foo]:75:0
; CHECK-NEXT: 2: 15
; CHECK-NEXT: 3: 15
; CHECK-NEXT: 4: 15
; CHECK-NEXT: 6: 15
; CHECK-NEXT: 8: 15 bar:15
; CHECK-NEXT: !CFGChecksum: 563088904013236
; CHECK-NEXT: !Attributes: 0
; CHECK: [foo:8 @ bar]:30:15
; CHECK-NEXT: 1: 15
; CHECK-NEXT: 2: 18446744073709551615
; CHECK-NEXT: 3: 18446744073709551615
; CHECK-NEXT: 4: 15
; CHECK-NEXT: !CFGChecksum: 72617220756
; CHECK-NEXT: !Attributes: 1
; truncated-pseudoprobe.perfbin is from the following compile commands:
; llc -pseudo-probe-for-profiling truncated-pseudoprobe.ll -filetype=obj -o truncated-pseudoprobe.o
; clang truncated-pseudoprobe.o -o truncated-pseudoprobe.perfbin

View File

@ -143,9 +143,11 @@ void VirtualUnwinder::collectSamplesFromFrameTrie(
if (!Cur->isDummyRoot()) {
if (!Stack.pushFrame(Cur)) {
// Process truncated context
// Start a new traversal ignoring its bottom context
T EmptyStack(Binary);
collectSamplesFromFrame(Cur, EmptyStack);
for (const auto &Item : Cur->Children) {
// Start a new traversal ignoring its bottom context
collectSamplesFromFrameTrie(Item.second.get());
collectSamplesFromFrameTrie(Item.second.get(), EmptyStack);
}
return;
}

View File

@ -75,8 +75,33 @@ struct LBREntry {
bool IsArtificial = false;
LBREntry(uint64_t S, uint64_t T, bool I)
: Source(S), Target(T), IsArtificial(I) {}
#ifndef NDEBUG
void print() const {
dbgs() << "from " << format("%#010x", Source) << " to "
<< format("%#010x", Target);
if (IsArtificial)
dbgs() << " Artificial";
}
#endif
};
#ifndef NDEBUG
static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
for (size_t I = 0; I < LBRStack.size(); I++) {
dbgs() << "[" << I << "] ";
LBRStack[I].print();
dbgs() << "\n";
}
}
static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
for (size_t I = 0; I < CallStack.size(); I++) {
dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
}
}
#endif
// Hash interface for generic data of type T
// Data should implement a \fn getHashCode and a \fn isEqual
// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
@ -185,6 +210,15 @@ struct HybridSample : public PerfSample {
}
HashCode = Hash;
}
#ifndef NDEBUG
__attribute__((used)) void print() const {
dbgs() << "LBR stack\n";
printLBRStack(LBRStack);
dbgs() << "Call stack\n";
printCallStack(CallStack);
}
#endif
};
// After parsing the sample, we record the samples by aggregating them
@ -224,6 +258,7 @@ struct UnwindState {
BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
}
bool isDummyRoot() { return Address == 0; }
bool isLeafFrame() { return Children.empty(); }
};
ProfiledFrame DummyTrieRoot;
@ -406,8 +441,13 @@ struct ProbeStack {
// Callsite merging may cause the loss of original probe IDs.
// Cutting off the context from here since the inliner will
// not know how to consume a context with unknown callsites.
if (!CallProbe)
if (!CallProbe) {
if (!Cur->isLeafFrame())
WithColor::warning()
<< "Untracked frame at " << format("%" PRIx64, Cur->Address)
<< " due to missing call probe\n";
return false;
}
Stack.push_back(CallProbe);
return true;
}
@ -608,7 +648,6 @@ private:
BinaryMap BinaryTable;
AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.
private:
BinarySampleCounterMap BinarySampleCounters;
// Samples with the repeating time generated by the perf reader
AggregatedCounter AggregatedSamples;