From b646654f66b6b1038cc9cb3383bf5e475900bdd9 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 15 Nov 2016 01:23:06 +0000 Subject: [PATCH] Improve DWARF parsing speed by improving DWARFAbbreviationDeclaration This patch gets a DWARF parsing speed improvement by having DWARFAbbreviationDeclaration instances know if they have a fixed byte size. If an abbreviation has a fixed byte size that can be calculated given a DWARFUnit, then parsing a DIE becomes two steps: parse ULEB128 abbrev code, and then add constant size to the offset. This patch also adds a fixed byte size to each DWARFAbbreviationDeclaration::AttributeSpec so that attributes can quickly skip their values if needed without the need to lookup the fixed for size. Notable improvements: - DWARFAbbreviationDeclaration::findAttributeIndex() now returns an Optional instead of a uint32_t and we no longer have to look for the magic -1U return value - Optional DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute attr) const; - DWARFAbbreviationDeclaration now has a getAttributeValue() function that extracts an attribute value given a DIE offset that takes advantage of the DWARFAbbreviationDeclaration::AttributeSpec::ByteSize - bool DWARFAbbreviationDeclaration::getAttributeValue(const uint32_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U, DWARFFormValue &FormValue) const; - A DWARFAbbreviationDeclaration instance can return a fixed byte size for itself so DWARF parsing is faster: - Optional DWARFAbbreviationDeclaration::getFixedAttributesByteSize(const DWARFUnit &U) const; - Any functions that used to take a "const DWARFUnit *U" that would crash if U was NULL now take a "const DWARFUnit &U" and are only called with a valid DWARFUnit Differential Revision: https://reviews.llvm.org/D26567 llvm-svn: 286924 --- .../DWARF/DWARFAbbreviationDeclaration.h | 74 +++++++++++- .../DebugInfo/DWARF/DWARFDebugInfoEntry.h | 5 +- include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 1 + .../DWARF/DWARFAbbreviationDeclaration.cpp | 109 +++++++++++++++++- lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 56 +++++---- lib/DebugInfo/DWARF/DWARFUnit.cpp | 3 +- tools/dsymutil/DwarfLinker.cpp | 15 +-- 7 files changed, 215 insertions(+), 48 deletions(-) diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index c9cba3b930d..fd28766db74 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -10,20 +10,34 @@ #ifndef LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #define LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Dwarf.h" - namespace llvm { +class DWARFUnit; +class DWARFFormValue; class raw_ostream; class DWARFAbbreviationDeclaration { public: struct AttributeSpec { - AttributeSpec(dwarf::Attribute A, dwarf::Form F) : Attr(A), Form(F) {} + AttributeSpec(dwarf::Attribute A, dwarf::Form F, Optional S) + : Attr(A), Form(F), ByteSize(S) {} dwarf::Attribute Attr; dwarf::Form Form; + /// If ByteSize has a value, then it contains the fixed size in bytes for + /// the Form in this object. If ByteSize doesn't have a value, then the + /// byte size of Form either varies according to the DWARFUnit that it is + /// contained in or the value size varies and must be decoded from the + /// debug information in order to determine its size. + Optional ByteSize; + /// Get the fixed byte size of this Form if possible. This function might + /// use the DWARFUnit to calculate the size of the Form, like for + /// DW_AT_address and DW_AT_ref_addr, so this isn't just an accessor for + /// the ByteSize member. + Optional getByteSize(const DWARFUnit &U) const; }; typedef SmallVector AttributeSpecVector; @@ -46,18 +60,70 @@ public: return dwarf::Form(0); } - uint32_t findAttributeIndex(dwarf::Attribute attr) const; + /// Get the index of the specified attribute. + /// + /// Searches the this abbreviation declaration for the index of the specified + /// attribute. + /// + /// \param Attr DWARF attribute to search for. + /// \returns Optional index of the attribute if found, None otherwise. + Optional findAttributeIndex(dwarf::Attribute attr) const; + + /// Extract a DWARF form value from a DIE specified by DIE offset. + /// + /// Extract an attribute value for a DWARFUnit given the DIE offset and the + /// attribute. + /// + /// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation + /// code in the .debug_info data. + /// \param Attr DWARF attribute to search for. + /// \param U the DWARFUnit the contains the DIE. + /// \param FormValue the form value that will be filled in. + /// \returns true if the attribute was extracted into \p FormValue. + bool getAttributeValue(const uint32_t DIEOffset, const dwarf::Attribute Attr, + const DWARFUnit &U, DWARFFormValue &FormValue) const; bool extract(DataExtractor Data, uint32_t* OffsetPtr); void dump(raw_ostream &OS) const; + // Return an optional byte size of all attribute data in this abbreviation + // if a constant byte size can be calculated given a DWARFUnit. This allows + // DWARF parsing to be faster as many DWARF DIEs have a fixed byte size. + Optional getFixedAttributesByteSize(const DWARFUnit &U) const; + private: void clear(); + /// A helper structure that can quickly determine the size in bytes of an + /// abbreviation declaration. + struct FixedSizeInfo { + /// The fixed byte size for fixed size forms. + uint16_t NumBytes; + /// Number of DW_FORM_address forms in this abbrevation declaration. + uint8_t NumAddrs; + /// Number of DW_FORM_ref_addr forms in this abbrevation declaration. + uint8_t NumRefAddrs; + /// Number of 4 byte in DWARF32 and 8 byte in DWARF64 forms. + uint8_t NumDwarfOffsets; + /// Constructor + FixedSizeInfo() + : NumBytes(0), NumAddrs(0), NumRefAddrs(0), NumDwarfOffsets(0) {} + /// Calculate the fixed size in bytes given a DWARFUnit. + /// + /// \param U the DWARFUnit to use when determing the byte size. + /// \returns the size in bytes for all attribute data in this abbreviation. + /// The returned size does not include bytes for the ULEB128 abbreviation + /// code + size_t getByteSize(const DWARFUnit &U) const; + }; + uint32_t Code; dwarf::Tag Tag; + uint8_t CodeByteSize; bool HasChildren; - AttributeSpecVector AttributeSpecs; + /// If this abbreviation has a fixed byte size then FixedAttributeSize member + /// variable below will have a value. + Optional FixedAttributeSize; }; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index 9cfa3e12041..4ec67a49719 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -47,7 +47,10 @@ public: /// Extracts a debug info entry, which is a child of a given unit, /// starting at a given offset. If DIE can't be extracted, returns false and /// doesn't change OffsetPtr. - bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr); + bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr); + /// High performance extraction should use this call. + bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, + const DataExtractor &DebugInfoData, uint32_t UEndOffset); uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == nullptr; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 6a4019919ab..a6228b531ce 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -56,6 +56,7 @@ private: public: DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F), U(nullptr) {} dwarf::Form getForm() const { return Form; } + void setForm(dwarf::Form F) { Form = F; } bool isFormClass(FormClass FC) const; const DWARFUnit *getUnit() const { return U; } void dump(raw_ostream &OS) const; diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index 637d12ddd43..638830ee568 100644 --- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -17,8 +19,10 @@ using namespace dwarf; void DWARFAbbreviationDeclaration::clear() { Code = 0; Tag = DW_TAG_null; + CodeByteSize = 0; HasChildren = false; AttributeSpecs.clear(); + FixedAttributeSize.reset(); } DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() { @@ -29,10 +33,12 @@ bool DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint32_t* OffsetPtr) { clear(); + const uint32_t Offset = *OffsetPtr; Code = Data.getULEB128(OffsetPtr); if (Code == 0) { return false; } + CodeByteSize = *OffsetPtr - Offset; Tag = static_cast(Data.getULEB128(OffsetPtr)); if (Tag == DW_TAG_null) { clear(); @@ -40,12 +46,52 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, } uint8_t ChildrenByte = Data.getU8(OffsetPtr); HasChildren = (ChildrenByte == DW_CHILDREN_yes); + // Assign a value to our optional FixedAttributeSize member variable. If + // this member variable still has a value after the while loop below, then + // all attribute data in this abbreviation declaration has a fixed byte size. + FixedAttributeSize = FixedSizeInfo(); + // Read all of the abbreviation attributes and forms. while (true) { auto A = static_cast(Data.getULEB128(OffsetPtr)); auto F = static_cast
(Data.getULEB128(OffsetPtr)); if (A && F) { - AttributeSpecs.push_back(AttributeSpec(A, F)); + auto FixedFormByteSize = DWARFFormValue::getFixedByteSize(F); + AttributeSpecs.push_back(AttributeSpec(A, F, FixedFormByteSize)); + // If this abbrevation still has a fixed byte size, then update the + // FixedAttributeSize as needed. + if (FixedAttributeSize) { + if (FixedFormByteSize) + FixedAttributeSize->NumBytes += *FixedFormByteSize; + else { + switch (F) { + case DW_FORM_addr: + ++FixedAttributeSize->NumAddrs; + break; + + case DW_FORM_ref_addr: + ++FixedAttributeSize->NumRefAddrs; + break; + + case DW_FORM_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + case DW_FORM_ref_sup: + ++FixedAttributeSize->NumDwarfOffsets; + break; + + default: + // Indicate we no longer have a fixed byte size for this + // abbreviation by clearing the FixedAttributeSize optional value + // so it doesn't have a value. + FixedAttributeSize.reset(); + break; + } + } + } } else if (A == 0 && F == 0) { // We successfully reached the end of this abbreviation declaration // since both attribute and form are zero. @@ -88,11 +134,64 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { OS << '\n'; } -uint32_t -DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute attr) const { +Optional +DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const { for (uint32_t i = 0, e = AttributeSpecs.size(); i != e; ++i) { - if (AttributeSpecs[i].Attr == attr) + if (AttributeSpecs[i].Attr == Attr) return i; } - return -1U; + return None; +} + +bool DWARFAbbreviationDeclaration::getAttributeValue( + const uint32_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U, + DWARFFormValue &FormValue) const { + Optional MatchAttrIndex = findAttributeIndex(Attr); + if (!MatchAttrIndex) + return false; + + auto DebugInfoData = U.getDebugInfoExtractor(); + + // Add the byte size of ULEB that for the abbrev Code so we can start + // skipping the attribute data. + uint32_t Offset = DIEOffset + CodeByteSize; + uint32_t AttrIndex = 0; + for (const auto &Spec : AttributeSpecs) { + if (*MatchAttrIndex == AttrIndex) { + // We have arrived at the attribute to extract, extract if from Offset. + FormValue.setForm(Spec.Form); + return FormValue.extractValue(DebugInfoData, &Offset, &U); + } + // March Offset along until we get to the attribute we want. + if (Optional FixedSize = Spec.getByteSize(U)) + Offset += *FixedSize; + else + DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, &U); + ++AttrIndex; + } + return false; +} + +size_t DWARFAbbreviationDeclaration::FixedSizeInfo::getByteSize( + const DWARFUnit &U) const { + size_t ByteSize = NumBytes; + if (NumAddrs) + ByteSize += NumAddrs * U.getAddressByteSize(); + if (NumRefAddrs) + ByteSize += NumRefAddrs * U.getRefAddrByteSize(); + if (NumDwarfOffsets) + ByteSize += NumDwarfOffsets * U.getDwarfOffsetByteSize(); + return ByteSize; +} + +Optional DWARFAbbreviationDeclaration::AttributeSpec::getByteSize( + const DWARFUnit &U) const { + return ByteSize ? ByteSize : DWARFFormValue::getFixedByteSize(Form, &U); +} + +Optional DWARFAbbreviationDeclaration::getFixedAttributesByteSize( + const DWARFUnit &U) const { + if (FixedAttributeSize) + return FixedAttributeSize->getByteSize(U); + return None; } diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 7890bf0368a..d11c5ae7133 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -30,7 +30,7 @@ static const DWARFUnit *findUnitAndExtractFast(DWARFDebugInfoEntryMinimal &DIE, const DWARFUnit *Unit, uint32_t *Offset) { Unit = Unit->getUnitSection().getUnitForOffset(*Offset); - return (Unit && DIE.extractFast(Unit, Offset)) ? Unit : nullptr; + return (Unit && DIE.extractFast(*Unit, Offset)) ? Unit : nullptr; } void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, DWARFUnit *u, @@ -183,11 +183,17 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, OS << ")\n"; } -bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U, +bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr) { + DataExtractor DebugInfoData = U.getDebugInfoExtractor(); + const uint32_t UEndOffset = U.getNextUnitOffset(); + return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset); +} +bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit &U, + uint32_t *OffsetPtr, + const DataExtractor &DebugInfoData, + uint32_t UEndOffset) { Offset = *OffsetPtr; - DataExtractor DebugInfoData = U->getDebugInfoExtractor(); - uint32_t UEndOffset = U->getNextUnitOffset(); if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset)) return false; uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); @@ -196,21 +202,29 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U, AbbrevDecl = nullptr; return true; } - AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode); + AbbrevDecl = U.getAbbreviations()->getAbbreviationDeclaration(AbbrCode); if (nullptr == AbbrevDecl) { // Restore the original offset. *OffsetPtr = Offset; return false; } + // See if all attributes in this DIE have fixed byte sizes. If so, we can + // just add this size to the offset to skip to the next DIE. + if (Optional FixedSize = AbbrevDecl->getFixedAttributesByteSize(U)) { + *OffsetPtr += *FixedSize; + return true; + } // Skip all data in the .debug_info for the attributes for (const auto &AttrSpec : AbbrevDecl->attributes()) { - auto Form = AttrSpec.Form; - - if (Optional FixedSize = DWARFFormValue::getFixedByteSize(Form, U)) + // Check if this attribute has a fixed byte size. + if (Optional FixedSize = AttrSpec.getByteSize(U)) { + // Attribute byte size if fixed, just add the size to the offset. *OffsetPtr += *FixedSize; - else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U)) { - // Restore the original offset. + } else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData, + OffsetPtr, &U)) { + // We failed to skip this attribute's value, restore the original offset + // and return the failure status. *OffsetPtr = Offset; return false; } @@ -230,27 +244,9 @@ bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const { bool DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFUnit *U, dwarf::Attribute Attr, DWARFFormValue &FormValue) const { - if (!AbbrevDecl) + if (!AbbrevDecl || !U) return false; - - uint32_t AttrIdx = AbbrevDecl->findAttributeIndex(Attr); - if (AttrIdx == -1U) - return false; - - DataExtractor DebugInfoData = U->getDebugInfoExtractor(); - uint32_t DebugInfoOffset = getOffset(); - - // Skip the abbreviation code so we are at the data for the attributes - DebugInfoData.getULEB128(&DebugInfoOffset); - - // Skip preceding attribute values. - for (uint32_t i = 0; i < AttrIdx; ++i) { - DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(i), - DebugInfoData, &DebugInfoOffset, U); - } - - FormValue = DWARFFormValue(AbbrevDecl->getFormByIndex(AttrIdx)); - return FormValue.extractValue(DebugInfoData, &DebugInfoOffset, U); + return AbbrevDecl->getAttributeValue(Offset, Attr, *U, FormValue); } const char *DWARFDebugInfoEntryMinimal::getAttributeValueAsString( diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index 7fb59983dc5..85ef3e42935 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -196,10 +196,11 @@ void DWARFUnit::extractDIEsToVector( uint32_t DIEOffset = Offset + getHeaderSize(); uint32_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntryMinimal DIE; + DataExtractor DebugInfoData = getDebugInfoExtractor(); uint32_t Depth = 0; bool IsCUDie = true; - while (DIEOffset < NextCUOffset && DIE.extractFast(this, &DIEOffset)) { + while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset)) { if (IsCUDie) { if (AppendCUDie) Dies.push_back(DIE); diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp index aa769058c72..ef4f8a940d3 100644 --- a/tools/dsymutil/DwarfLinker.cpp +++ b/tools/dsymutil/DwarfLinker.cpp @@ -2082,20 +2082,21 @@ unsigned DwarfLinker::shouldKeepVariableDIE(RelocationManager &RelocMgr, // Global variables with constant value can always be kept. if (!(Flags & TF_InFunctionScope) && - Abbrev->findAttributeIndex(dwarf::DW_AT_const_value) != -1U) { + Abbrev->findAttributeIndex(dwarf::DW_AT_const_value)) { MyInfo.InDebugMap = true; return Flags | TF_Keep; } - uint32_t LocationIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_location); - if (LocationIdx == -1U) + Optional LocationIdx = + Abbrev->findAttributeIndex(dwarf::DW_AT_location); + if (!LocationIdx) return Flags; uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode()); const DWARFUnit &OrigUnit = Unit.getOrigUnit(); uint32_t LocationOffset, LocationEndOffset; std::tie(LocationOffset, LocationEndOffset) = - getAttributeOffsets(Abbrev, LocationIdx, Offset, OrigUnit); + getAttributeOffsets(Abbrev, *LocationIdx, Offset, OrigUnit); // See if there is a relocation to a valid debug map entry inside // this variable's location. The order is important here. We want to @@ -2122,15 +2123,15 @@ unsigned DwarfLinker::shouldKeepSubprogramDIE( Flags |= TF_InFunctionScope; - uint32_t LowPcIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_low_pc); - if (LowPcIdx == -1U) + Optional LowPcIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_low_pc); + if (!LowPcIdx) return Flags; uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode()); const DWARFUnit &OrigUnit = Unit.getOrigUnit(); uint32_t LowPcOffset, LowPcEndOffset; std::tie(LowPcOffset, LowPcEndOffset) = - getAttributeOffsets(Abbrev, LowPcIdx, Offset, OrigUnit); + getAttributeOffsets(Abbrev, *LowPcIdx, Offset, OrigUnit); uint64_t LowPc = DIE.getAttributeValueAsAddress(&OrigUnit, dwarf::DW_AT_low_pc, -1ULL);