diff --git a/docs/CommandGuide/llvm-dwarfdump.rst b/docs/CommandGuide/llvm-dwarfdump.rst index 9d204ffe6f8..a3b62664cbe 100644 --- a/docs/CommandGuide/llvm-dwarfdump.rst +++ b/docs/CommandGuide/llvm-dwarfdump.rst @@ -84,6 +84,11 @@ OPTIONS Only recurse to a maximum depth of when dumping debug info entries. +.. option:: --statistics + + Collect debug info quality metrics and print the results + as machine-readable single-line JSON output. + .. option:: -x, --regex Treat any strings as regular expressions when searching diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 6c238df284a..1d1eb601a33 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -902,6 +902,13 @@ auto partition(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range)) { return std::partition(std::begin(Range), std::end(Range), P); } +/// Provide wrappers to std::lower_bound which take ranges instead of having to +/// pass begin/end explicitly. +template +auto lower_bound(R &&Range, ForwardIt I) -> decltype(std::begin(Range)) { + return std::lower_bound(std::begin(Range), std::end(Range), I); +} + /// \brief Given a range of type R, iterate the entire range and return a /// SmallVector with elements of the vector. This is useful, for example, /// when you want to iterate a range and then sort the results. diff --git a/test/tools/llvm-dwarfdump/X86/statistics.ll b/test/tools/llvm-dwarfdump/X86/statistics.ll new file mode 100644 index 00000000000..bc955453716 --- /dev/null +++ b/test/tools/llvm-dwarfdump/X86/statistics.ll @@ -0,0 +1,134 @@ +; RUN: llc -O0 %s -o - -filetype=obj \ +; RUN: | llvm-dwarfdump -statistics - | FileCheck %s + +; int GlobalConst = 42; +; int Global; +; +; struct S { +; static const int constant = 24; +; } s; +; +; int __attribute__((always_inline)) square(int i) { return i * i; } +; int cube(int i) { +; int squared = square(i); +; return squared*i; +; } + +; GlobalConst,Global,s,s.constant,square::i,cube::i,cube::squared +; CHECK: "unique source variables":7 +; +1 extra inline i. +; CHECK: "source variables":8 +; -1 square::i +; CHECK: "variables with location":7 +; CHECK: "scope bytes total":[[BYTES:[0-9]+]] +; Because of the dbg.value in the middle of the function, the pc range coverage +; must be below 100%. +; CHECK-NOT: "scope bytes covered":0 +; CHECK-NOT "scope bytes covered":[[BYTES]] +; CHECK: "scope bytes covered": + +; ModuleID = '/tmp/quality.cpp' +source_filename = "/tmp/quality.cpp" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +%struct.S = type { i8 } + +@GlobalConst = global i32 42, align 4, !dbg !0 +@Global = global i32 0, align 4, !dbg !6 +@s = global %struct.S zeroinitializer, align 1, !dbg !9 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define i32 @_Z6squarei(i32 %i) #0 !dbg !20 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + ; Modified to loose debug info for i here. + call void @llvm.dbg.declare(metadata i32* undef, metadata !23, metadata !24), !dbg !25 + %0 = load i32, i32* %i.addr, align 4, !dbg !26 + %1 = load i32, i32* %i.addr, align 4, !dbg !27 + %mul = mul nsw i32 %0, %1, !dbg !28 + ret i32 %mul, !dbg !29 +} + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @_Z4cubei(i32 %i) #2 !dbg !30 { +entry: + %i.addr.i = alloca i32, align 4 + call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !23, metadata !24), !dbg !31 + %i.addr = alloca i32, align 4 + %squared = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !33, metadata !24), !dbg !34 + %0 = load i32, i32* %i.addr, align 4, !dbg !37 + store i32 %0, i32* %i.addr.i, align 4 + %1 = load i32, i32* %i.addr.i, align 4, !dbg !38 + %2 = load i32, i32* %i.addr.i, align 4, !dbg !39 + %mul.i = mul nsw i32 %1, %2, !dbg !40 + ; Modified to cover only about 50% of the lexical scope. + call void @llvm.dbg.value(metadata i32 %mul.i, metadata !35, metadata !24), !dbg !36 + store i32 %mul.i, i32* %squared, align 4, !dbg !36 + %3 = load i32, i32* %squared, align 4, !dbg !41 + call void @llvm.dbg.value(metadata i32 %3, metadata !35, metadata !24), !dbg !36 + %4 = load i32, i32* %i.addr, align 4, !dbg !42 + %mul = mul nsw i32 %3, %4, !dbg !43 + ret i32 %mul, !dbg !44 +} + +attributes #0 = { alwaysinline nounwind ssp uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { noinline nounwind optnone ssp uwtable } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!15, !16, !17, !18} +!llvm.ident = !{!19} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "GlobalConst", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 6.0.0 (trunk 310529) (llvm/trunk 310534)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) +!3 = !DIFile(filename: "/tmp/quality.cpp", directory: "/Volumes/Data/llvm") +!4 = !{} +!5 = !{!0, !6, !9} +!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) +!7 = distinct !DIGlobalVariable(name: "Global", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true) +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = !DIGlobalVariableExpression(var: !10, expr: !DIExpression()) +!10 = distinct !DIGlobalVariable(name: "s", scope: !2, file: !3, line: 6, type: !11, isLocal: false, isDefinition: true) +!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !3, line: 4, size: 8, elements: !12, identifier: "_ZTS1S") +!12 = !{!13} +!13 = !DIDerivedType(tag: DW_TAG_member, name: "constant", scope: !11, file: !3, line: 5, baseType: !14, flags: DIFlagStaticMember, extraData: i32 24) +!14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8) +!15 = !{i32 2, !"Dwarf Version", i32 4} +!16 = !{i32 2, !"Debug Info Version", i32 3} +!17 = !{i32 1, !"wchar_size", i32 4} +!18 = !{i32 7, !"PIC Level", i32 2} +!19 = !{!"clang version 6.0.0 (trunk 310529) (llvm/trunk 310534)"} +!20 = distinct !DISubprogram(name: "square", linkageName: "_Z6squarei", scope: !3, file: !3, line: 8, type: !21, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4) +!21 = !DISubroutineType(types: !22) +!22 = !{!8, !8} +!23 = !DILocalVariable(name: "i", arg: 1, scope: !20, file: !3, line: 8, type: !8) +!24 = !DIExpression() +!25 = !DILocation(line: 8, column: 47, scope: !20) +!26 = !DILocation(line: 8, column: 59, scope: !20) +!27 = !DILocation(line: 8, column: 63, scope: !20) +!28 = !DILocation(line: 8, column: 61, scope: !20) +!29 = !DILocation(line: 8, column: 52, scope: !20) +!30 = distinct !DISubprogram(name: "cube", linkageName: "_Z4cubei", scope: !3, file: !3, line: 9, type: !21, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4) +!31 = !DILocation(line: 8, column: 47, scope: !20, inlinedAt: !32) +!32 = distinct !DILocation(line: 10, column: 17, scope: !30) +!33 = !DILocalVariable(name: "i", arg: 1, scope: !30, file: !3, line: 9, type: !8) +!34 = !DILocation(line: 9, column: 14, scope: !30) +!35 = !DILocalVariable(name: "squared", scope: !30, file: !3, line: 10, type: !8) +!36 = !DILocation(line: 10, column: 7, scope: !30) +!37 = !DILocation(line: 10, column: 24, scope: !30) +!38 = !DILocation(line: 8, column: 59, scope: !20, inlinedAt: !32) +!39 = !DILocation(line: 8, column: 63, scope: !20, inlinedAt: !32) +!40 = !DILocation(line: 8, column: 61, scope: !20, inlinedAt: !32) +!41 = !DILocation(line: 11, column: 10, scope: !30) +!42 = !DILocation(line: 11, column: 18, scope: !30) +!43 = !DILocation(line: 11, column: 17, scope: !30) +!44 = !DILocation(line: 11, column: 3, scope: !30) diff --git a/test/tools/llvm-dwarfdump/cmdline.test b/test/tools/llvm-dwarfdump/cmdline.test index 7a34a2d1ef7..de17cf9033a 100644 --- a/test/tools/llvm-dwarfdump/cmdline.test +++ b/test/tools/llvm-dwarfdump/cmdline.test @@ -14,6 +14,7 @@ HELP: -regex HELP: -show-children HELP: -show-form HELP: -show-parents +HELP: -statistics HELP: -summarize-types HELP-NOT: -reverse-iterate diff --git a/tools/llvm-dwarfdump/CMakeLists.txt b/tools/llvm-dwarfdump/CMakeLists.txt index 23fee30bfa4..77620e0faaf 100644 --- a/tools/llvm-dwarfdump/CMakeLists.txt +++ b/tools/llvm-dwarfdump/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_tool(llvm-dwarfdump + Statistics.cpp llvm-dwarfdump.cpp ) diff --git a/tools/llvm-dwarfdump/Statistics.cpp b/tools/llvm-dwarfdump/Statistics.cpp new file mode 100644 index 00000000000..9a7454a5262 --- /dev/null +++ b/tools/llvm-dwarfdump/Statistics.cpp @@ -0,0 +1,239 @@ +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/Object/ObjectFile.h" + +#define DEBUG_TYPE "dwarfdump" +using namespace llvm; +using namespace object; + +/// Holds statistics for one function (or other entity that has a PC range and +/// contains variables, such as a compile unit). +struct PerFunctionStats { + /// Number of inlined instances of this function. + unsigned NumFnInlined = 0; + /// Number of variables with location across all inlined instances. + unsigned TotalVarWithLoc = 0; + /// Number of constants with location across all inlined instances. + unsigned ConstantMembers = 0; + /// List of all Variables in this function. + SmallDenseSet VarsInFunction; + /// Compile units also cover a PC range, but have this flag set to false. + bool IsFunction = false; +}; + +/// Holds accumulated global statistics about local variables. +struct GlobalStats { + /// Total number of PC range bytes covered by DW_AT_locations. + unsigned ScopeBytesCovered = 0; + /// Total number of PC range bytes in each variable's enclosing scope, + /// starting from the first definition of the variable. + unsigned ScopeBytesFromFirstDefinition = 0; +}; + +/// Extract the low pc from a Die. +static uint64_t getLowPC(DWARFDie Die) { + if (Die.getAddressRanges().size()) + return Die.getAddressRanges()[0].LowPC; + return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0); +} + +/// Collect debug info quality metrics for one DIE. +static void collectStatsForDie(DWARFDie Die, std::string Prefix, + uint64_t ScopeLowPC, uint64_t BytesInScope, + StringMap &FnStatMap, + GlobalStats &GlobalStats) { + bool HasLoc = false; + uint64_t BytesCovered = 0; + uint64_t OffsetToFirstDefinition = 0; + if (Die.find(dwarf::DW_AT_const_value)) { + // This catches constant members *and* variables. + HasLoc = true; + BytesCovered = BytesInScope; + } else if (Die.getTag() == dwarf::DW_TAG_variable || + Die.getTag() == dwarf::DW_TAG_formal_parameter) { + // Handle variables and function arguments. + auto FormValue = Die.find(dwarf::DW_AT_location); + HasLoc = FormValue.hasValue(); + if (HasLoc) { + // Get PC coverage. + if (auto DebugLocOffset = FormValue->getAsSectionOffset()) { + auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc(); + if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) { + for (auto Entry : List->Entries) + BytesCovered += Entry.End - Entry.Begin; + if (List->Entries.size()) { + uint64_t FirstDef = List->Entries[0].Begin; + uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE()); + // Ranges sometimes start before the lexical scope. + if (UnitOfs + FirstDef >= ScopeLowPC) + OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC; + // Or even after it. Count that as a failure. + if (OffsetToFirstDefinition > BytesInScope) + OffsetToFirstDefinition = 0; + } + } + assert(BytesInScope); + } else { + // Assume the entire range is covered by a single location. + BytesCovered = BytesInScope; + } + } + } else { + // Not a variable or constant member. + return; + } + + // Collect PC range coverage data. + auto &FnStats = FnStatMap[Prefix]; + if (DWARFDie D = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) + Die = D; + // This is a unique ID for the variable inside the current object file. + unsigned CanonicalDieOffset = Die.getOffset(); + FnStats.VarsInFunction.insert(CanonicalDieOffset); + if (BytesInScope) { + FnStats.TotalVarWithLoc += (unsigned)HasLoc; + // Adjust for the fact the variables often start their lifetime in the + // middle of the scope. + BytesInScope -= OffsetToFirstDefinition; + // Turns out we have a lot of ranges that extend past the lexical scope. + GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered); + GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope; + assert(GlobalStats.ScopeBytesCovered <= + GlobalStats.ScopeBytesFromFirstDefinition); + } else { + FnStats.ConstantMembers++; + } +} + +/// Recursively collect debug info quality metrics. +static void collectStatsRecursive(DWARFDie Die, std::string Prefix, + uint64_t ScopeLowPC, uint64_t BytesInScope, + StringMap &FnStatMap, + GlobalStats &GlobalStats) { + // Handle any kind of lexical scope. + if (Die.getTag() == dwarf::DW_TAG_subprogram || + Die.getTag() == dwarf::DW_TAG_inlined_subroutine || + Die.getTag() == dwarf::DW_TAG_lexical_block) { + // Ignore forward declarations. + if (Die.find(dwarf::DW_AT_declaration)) + return; + + // Count the function. + if (Die.getTag() != dwarf::DW_TAG_lexical_block) { + StringRef Name = Die.getName(DINameKind::LinkageName); + if (Name.empty()) + Name = Die.getName(DINameKind::ShortName); + Prefix = Name; + // Skip over abstract origins. + if (Die.find(dwarf::DW_AT_inline)) + return; + // We've seen an (inlined) instance of this function. + auto &FnStats = FnStatMap[Name]; + FnStats.NumFnInlined++; + FnStats.IsFunction = true; + } + + // PC Ranges. + auto Ranges = Die.getAddressRanges(); + uint64_t BytesInThisScope = 0; + for (auto Range : Ranges) + BytesInThisScope += Range.HighPC - Range.LowPC; + ScopeLowPC = getLowPC(Die); + + if (BytesInThisScope) + BytesInScope = BytesInThisScope; + } else { + // Not a scope, visit the Die itself. It could be a variable. + collectStatsForDie(Die, Prefix, ScopeLowPC, BytesInScope, FnStatMap, + GlobalStats); + } + + // Traverse children. + DWARFDie Child = Die.getFirstChild(); + while (Child) { + collectStatsRecursive(Child, Prefix, ScopeLowPC, BytesInScope, FnStatMap, + GlobalStats); + Child = Child.getSibling(); + } +} + +/// Print machine-readable output. +/// The machine-readable format is single-line JSON output. +/// \{ +static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) { + OS << ",\"" << Key << "\":\"" << Value << '"'; + DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); +} +static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) { + OS << ",\"" << Key << "\":" << Value; + DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); +} +/// \} + +/// Collect debug info quality metrics for an entire DIContext. +/// +/// Do the impossible and reduce the quality of the debug info down to a few +/// numbers. The idea is to condense the data into numbers that can be tracked +/// over time to identify trends in newer compiler versions and gauge the effect +/// of particular optimizations. The raw numbers themselves are not particularly +/// useful, only the delta between compiling the same program with different +/// compilers is. +bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, + Twine Filename, raw_ostream &OS) { + StringRef FormatName = Obj.getFileFormatName(); + GlobalStats GlobalStats; + StringMap Statistics; + for (const auto &CU : static_cast(&DICtx)->compile_units()) + if (DWARFDie CUDie = CU->getUnitDIE(false)) + collectStatsRecursive(CUDie, "/", 0, 0, Statistics, GlobalStats); + + /// The version number should be increased every time the algorithm is changed + /// (including bug fixes). New metrics may be added without increasing the + /// version. + unsigned Version = 1; + unsigned VarTotal = 0; + unsigned VarUnique = 0; + unsigned VarWithLoc = 0; + unsigned NumFunctions = 0; + unsigned NumInlinedFunctions = 0; + for (auto &Entry : Statistics) { + PerFunctionStats &Stats = Entry.getValue(); + unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined; + unsigned Constants = Stats.ConstantMembers; + VarWithLoc += Stats.TotalVarWithLoc + Constants; + VarTotal += TotalVars + Constants; + VarUnique += Stats.VarsInFunction.size(); + DEBUG(for (auto V : Stats.VarsInFunction) + llvm::dbgs() << Entry.getKey() << ": " << V << "\n"); + NumFunctions += Stats.IsFunction; + NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined; + } + + // Print summary. + OS.SetBufferSize(1024); + OS << "{\"version\":\"" << Version << '"'; + DEBUG(llvm::dbgs() << "Variable location quality metrics\n"; + llvm::dbgs() << "---------------------------------\n"); + printDatum(OS, "file", Filename.str()); + printDatum(OS, "format", FormatName); + printDatum(OS, "source functions", NumFunctions); + printDatum(OS, "inlined functions", NumInlinedFunctions); + printDatum(OS, "unique source variables", VarUnique); + printDatum(OS, "source variables", VarTotal); + printDatum(OS, "variables with location", VarWithLoc); + printDatum(OS, "scope bytes total", + GlobalStats.ScopeBytesFromFirstDefinition); + printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered); + OS << "}\n"; + DEBUG( + llvm::dbgs() << "Total Availability: " + << (int)std::round((VarWithLoc * 100.0) / VarTotal) << "%\n"; + llvm::dbgs() << "PC Ranges covered: " + << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) / + GlobalStats.ScopeBytesFromFirstDefinition) + << "%\n"); + return true; +} diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 9e6fc773139..0bd6ca73b1b 100644 --- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -201,6 +201,10 @@ static opt SummarizeTypes("summarize-types", desc("Abbreviate the description of type unit entries"), cat(DwarfDumpCategory)); +static cl::opt + Statistics("statistics", + cl::desc("Emit JSON-formatted debug info quality metrics."), + cat(DwarfDumpCategory)); static opt Verify("verify", desc("Verify the DWARF debug info"), cat(DwarfDumpCategory)); static opt Quiet("quiet", desc("Use with -verify to not emit to STDOUT."), @@ -301,6 +305,9 @@ static void filterByName(const StringSet<> &Names, } } +bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, + Twine Filename, raw_ostream &OS); + static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx, Twine Filename, raw_ostream &OS) { logAllUnhandledErrors(DICtx.loadRegisterInfo(Obj), errs(), @@ -536,7 +543,10 @@ int main(int argc, char **argv) { return handleFile(Object, verifyObjectFile, OS); })) exit(1); - } else + } else if (Statistics) + for (auto Object : Objects) + handleFile(Object, collectStatsForObjectFile, OS); + else for (auto Object : Objects) handleFile(Object, dumpObjectFile, OS);