From 48fecb2d18451d5f779c0808dfa899a560d8bf54 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Mon, 16 Mar 2020 09:53:49 -0700 Subject: [PATCH] [NativeSession] Implement NativeSession::findSymbolByAddress. Summary: This implements searching for function symbols and public symbols by address. More specifically, -Implements NativeSession::findSymbolByAddress for function symbols and public symbols. I think data symbols are also searched for, but isn't implemented in this patch. -Adds classes for NativeFunctionSymbol and NativePublicSymbol -Adds a '-use-native-pdb-reader' option to llvm-symbolizer, for testing purposes. Reviewers: rnk, amccarth, labath Subscribers: mgorny, hiraditya, MaskRay, rupprecht, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D79269 --- include/llvm/DebugInfo/PDB/DIA/DIASession.h | 12 +- include/llvm/DebugInfo/PDB/IPDBSession.h | 11 +- .../PDB/Native/NativeFunctionSymbol.h | 45 +++++ .../DebugInfo/PDB/Native/NativePublicSymbol.h | 44 +++++ .../llvm/DebugInfo/PDB/Native/NativeSession.h | 14 +- .../llvm/DebugInfo/PDB/Native/SymbolCache.h | 24 +++ include/llvm/DebugInfo/Symbolize/Symbolize.h | 1 + lib/DebugInfo/PDB/CMakeLists.txt | 2 + lib/DebugInfo/PDB/DIA/DIASession.cpp | 8 +- .../PDB/Native/NativeFunctionSymbol.cpp | 57 ++++++ .../PDB/Native/NativePublicSymbol.cpp | 52 ++++++ lib/DebugInfo/PDB/Native/NativeSession.cpp | 48 ++++-- lib/DebugInfo/PDB/Native/SymbolCache.cpp | 163 +++++++++++++++++- lib/DebugInfo/Symbolize/Symbolize.cpp | 7 +- .../tools/llvm-symbolizer/pdb/pdb-native.test | 39 +++++ tools/llvm-symbolizer/llvm-symbolizer.cpp | 5 + unittests/DebugInfo/PDB/NativeSessionTest.cpp | 10 +- unittests/DebugInfo/PDB/PDBApiTest.cpp | 12 +- .../secondary/llvm/lib/DebugInfo/PDB/BUILD.gn | 2 + 19 files changed, 512 insertions(+), 44 deletions(-) create mode 100644 include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h create mode 100644 include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h create mode 100644 lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp create mode 100644 lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp create mode 100644 test/tools/llvm-symbolizer/pdb/pdb-native.test diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/include/llvm/DebugInfo/PDB/DIA/DIASession.h index 6f62e6061f5..09ab9e2861c 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIASession.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIASession.h @@ -38,13 +38,13 @@ public: bool addressForRVA(uint32_t RVA, uint32_t &Section, uint32_t &Offset) const override; - std::unique_ptr - findSymbolByAddress(uint64_t Address, PDB_SymType Type) const override; + std::unique_ptr findSymbolByAddress(uint64_t Address, + PDB_SymType Type) override; std::unique_ptr findSymbolByRVA(uint32_t RVA, - PDB_SymType Type) const override; - std::unique_ptr - findSymbolBySectOffset(uint32_t Section, uint32_t Offset, - PDB_SymType Type) const override; + PDB_SymType Type) override; + std::unique_ptr findSymbolBySectOffset(uint32_t Section, + uint32_t Offset, + PDB_SymType Type) override; std::unique_ptr findLineNumbers(const PDBSymbolCompiland &Compiland, diff --git a/include/llvm/DebugInfo/PDB/IPDBSession.h b/include/llvm/DebugInfo/PDB/IPDBSession.h index aa8d9c76d63..7e38654c655 100644 --- a/include/llvm/DebugInfo/PDB/IPDBSession.h +++ b/include/llvm/DebugInfo/PDB/IPDBSession.h @@ -42,13 +42,12 @@ public: return unique_dyn_cast_or_null(getSymbolById(SymbolId)); } + virtual std::unique_ptr findSymbolByAddress(uint64_t Address, + PDB_SymType Type) = 0; + virtual std::unique_ptr findSymbolByRVA(uint32_t RVA, + PDB_SymType Type) = 0; virtual std::unique_ptr - findSymbolByAddress(uint64_t Address, PDB_SymType Type) const = 0; - virtual std::unique_ptr - findSymbolByRVA(uint32_t RVA, PDB_SymType Type) const = 0; - virtual std::unique_ptr - findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const = 0; + findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, PDB_SymType Type) = 0; virtual std::unique_ptr findLineNumbers(const PDBSymbolCompiland &Compiland, diff --git a/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h new file mode 100644 index 00000000000..4adf89f0d69 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h @@ -0,0 +1,45 @@ +//===- NativeFunctionSymbol.h - info about function symbols -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEFUNCTIONSYMBOL_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEFUNCTIONSYMBOL_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +namespace llvm { +namespace pdb { + +class NativeFunctionSymbol : public NativeRawSymbol { +public: + NativeFunctionSymbol(NativeSession &Session, SymIndexId Id, + const codeview::ProcSym &Sym); + + ~NativeFunctionSymbol() override; + + void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const override; + + uint32_t getAddressOffset() const override; + uint32_t getAddressSection() const override; + std::string getName() const override; + PDB_SymType getSymTag() const override; + uint64_t getLength() const override; + uint32_t getRelativeVirtualAddress() const override; + uint64_t getVirtualAddress() const override; + +protected: + const codeview::ProcSym Sym; +}; + +} // namespace pdb +} // namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVEFUNCTIONSYMBOL_H diff --git a/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h new file mode 100644 index 00000000000..0a1451530f1 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h @@ -0,0 +1,44 @@ +//===- NativePublicSymbol.h - info about public symbols ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEPUBLICSYMBOL_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEPUBLICSYMBOL_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +namespace llvm { +namespace pdb { + +class NativePublicSymbol : public NativeRawSymbol { +public: + NativePublicSymbol(NativeSession &Session, SymIndexId Id, + const codeview::PublicSym32 &Sym); + + ~NativePublicSymbol() override; + + void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const override; + + uint32_t getAddressOffset() const override; + uint32_t getAddressSection() const override; + std::string getName() const override; + PDB_SymType getSymTag() const override; + uint32_t getRelativeVirtualAddress() const override; + uint64_t getVirtualAddress() const override; + +protected: + const codeview::PublicSym32 Sym; +}; + +} // namespace pdb +} // namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVEPUBLICSYMBOL_H diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h index 26b1992a03d..342e63599e6 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -54,13 +54,13 @@ public: bool addressForRVA(uint32_t RVA, uint32_t &Section, uint32_t &Offset) const override; - std::unique_ptr - findSymbolByAddress(uint64_t Address, PDB_SymType Type) const override; + std::unique_ptr findSymbolByAddress(uint64_t Address, + PDB_SymType Type) override; std::unique_ptr findSymbolByRVA(uint32_t RVA, - PDB_SymType Type) const override; - std::unique_ptr - findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const override; + PDB_SymType Type) override; + std::unique_ptr findSymbolBySectOffset(uint32_t Sect, + uint32_t Offset, + PDB_SymType Type) override; std::unique_ptr findLineNumbers(const PDBSymbolCompiland &Compiland, @@ -108,6 +108,8 @@ public: NativeExeSymbol &getNativeGlobalScope() const; SymbolCache &getSymbolCache() { return Cache; } const SymbolCache &getSymbolCache() const { return Cache; } + uint32_t getRVAFromSectOffset(uint32_t Section, uint32_t Offset) const; + uint64_t getVAFromSectOffset(uint32_t Section, uint32_t Offset) const; private: void initializeExeSymbol(); diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h index bf9eb717f98..33428bfbf42 100644 --- a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h +++ b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h @@ -10,6 +10,8 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLCACHE_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" @@ -51,6 +53,16 @@ class SymbolCache { /// Map from global symbol offset to SymIndexId. DenseMap GlobalOffsetToSymbolId; + /// Map from segment and code offset to SymIndexId. + DenseMap, SymIndexId> AddressToFunctionSymId; + DenseMap, SymIndexId> AddressToPublicSymId; + + /// Map from virtual address to module index. + using IMap = + IntervalMap>; + IMap::Allocator IMapAllocator; + IMap AddrToModuleIndex; + SymIndexId createSymbolPlaceholder() { SymIndexId Id = Cache.size(); Cache.push_back(nullptr); @@ -77,6 +89,15 @@ class SymbolCache { SymIndexId createSimpleType(codeview::TypeIndex TI, codeview::ModifierOptions Mods); + std::unique_ptr findFunctionSymbolBySectOffset(uint32_t Sect, + uint32_t Offset); + std::unique_ptr findPublicSymbolBySectOffset(uint32_t Sect, + uint32_t Offset); + + void parseSectionContribs(); + Optional getModuleIndexForAddr(uint32_t Sect, + uint32_t Offset) const; + public: SymbolCache(NativeSession &Session, DbiStream *Dbi); @@ -127,6 +148,9 @@ public: SymIndexId getOrCreateGlobalSymbolByOffset(uint32_t Offset); + std::unique_ptr + findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, PDB_SymType Type); + std::unique_ptr getOrCreateCompiland(uint32_t Index); uint32_t getNumCompilands() const; diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h index b0972ed1158..085e4bb4ccb 100644 --- a/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -43,6 +43,7 @@ public: bool Demangle = true; bool RelativeAddresses = false; bool UntagAddresses = false; + bool UseNativePDBReader = false; std::string DefaultArch; std::vector DsymHints; std::string FallbackDebugPath; diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt index 320ca78b525..1eda97a06a5 100644 --- a/lib/DebugInfo/PDB/CMakeLists.txt +++ b/lib/DebugInfo/PDB/CMakeLists.txt @@ -55,6 +55,8 @@ add_pdb_impl_folder(Native Native/NativeEnumModules.cpp Native/NativeEnumTypes.cpp Native/NativeExeSymbol.cpp + Native/NativeFunctionSymbol.cpp + Native/NativePublicSymbol.cpp Native/NativeRawSymbol.cpp Native/NativeSymbolEnumerator.cpp Native/NativeTypeArray.cpp diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp index 64ffa776bbd..2729e323696 100644 --- a/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -189,8 +189,8 @@ DIASession::getSymbolById(SymIndexId SymbolId) const { return PDBSymbol::create(*this, std::move(RawSymbol)); } -std::unique_ptr -DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { +std::unique_ptr DIASession::findSymbolByAddress(uint64_t Address, + PDB_SymType Type) { enum SymTagEnum EnumVal = static_cast(Type); CComPtr Symbol; @@ -207,7 +207,7 @@ DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { } std::unique_ptr DIASession::findSymbolByRVA(uint32_t RVA, - PDB_SymType Type) const { + PDB_SymType Type) { enum SymTagEnum EnumVal = static_cast(Type); CComPtr Symbol; @@ -220,7 +220,7 @@ std::unique_ptr DIASession::findSymbolByRVA(uint32_t RVA, std::unique_ptr DIASession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const { + PDB_SymType Type) { enum SymTagEnum EnumVal = static_cast(Type); CComPtr Symbol; diff --git a/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp new file mode 100644 index 00000000000..2537daa7493 --- /dev/null +++ b/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp @@ -0,0 +1,57 @@ +//===- NativeFunctionSymbol.cpp - info about function symbols----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" + +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativeFunctionSymbol::NativeFunctionSymbol(NativeSession &Session, + SymIndexId Id, + const codeview::ProcSym &Sym) + : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {} + +NativeFunctionSymbol::~NativeFunctionSymbol() {} + +void NativeFunctionSymbol::dump(raw_ostream &OS, int Indent, + PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const { + NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields); + dumpSymbolField(OS, "name", getName(), Indent); + dumpSymbolField(OS, "length", getLength(), Indent); + dumpSymbolField(OS, "offset", getAddressOffset(), Indent); + dumpSymbolField(OS, "section", getAddressSection(), Indent); +} + +uint32_t NativeFunctionSymbol::getAddressOffset() const { + return Sym.CodeOffset; +} + +uint32_t NativeFunctionSymbol::getAddressSection() const { return Sym.Segment; } +std::string NativeFunctionSymbol::getName() const { + return std::string(Sym.Name); +} + +PDB_SymType NativeFunctionSymbol::getSymTag() const { + return PDB_SymType::Function; +} + +uint64_t NativeFunctionSymbol::getLength() const { return Sym.CodeSize; } + +uint32_t NativeFunctionSymbol::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Sym.Segment, Sym.CodeOffset); +} + +uint64_t NativeFunctionSymbol::getVirtualAddress() const { + return Session.getVAFromSectOffset(Sym.Segment, Sym.CodeOffset); +} diff --git a/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp b/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp new file mode 100644 index 00000000000..7086af7e67a --- /dev/null +++ b/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp @@ -0,0 +1,52 @@ +//===- NativePublicSymbol.cpp - info about public symbols -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" + +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativePublicSymbol::NativePublicSymbol(NativeSession &Session, SymIndexId Id, + const codeview::PublicSym32 &Sym) + : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {} + +NativePublicSymbol::~NativePublicSymbol() {} + +void NativePublicSymbol::dump(raw_ostream &OS, int Indent, + PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const { + NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields); + dumpSymbolField(OS, "name", getName(), Indent); + dumpSymbolField(OS, "offset", getAddressOffset(), Indent); + dumpSymbolField(OS, "section", getAddressSection(), Indent); +} + +uint32_t NativePublicSymbol::getAddressOffset() const { return Sym.Offset; } + +uint32_t NativePublicSymbol::getAddressSection() const { return Sym.Segment; } + +std::string NativePublicSymbol::getName() const { + return std::string(Sym.Name); +} + +PDB_SymType NativePublicSymbol::getSymTag() const { + return PDB_SymType::PublicSymbol; +} + +uint32_t NativePublicSymbol::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Sym.Segment, Sym.Offset); +} + +uint64_t NativePublicSymbol::getVirtualAddress() const { + return Session.getVAFromSectOffset(Sym.Segment, Sym.Offset); +} diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index 9a9254f4302..043d2361af5 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -43,6 +43,7 @@ using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; +using namespace llvm::codeview; static DbiStream *getDbiStreamPtr(PDBFile &File) { Expected DbiS = File.getPDBDbiStream(); @@ -210,13 +211,13 @@ bool NativeSession::addressForVA(uint64_t VA, uint32_t &Section, bool NativeSession::addressForRVA(uint32_t RVA, uint32_t &Section, uint32_t &Offset) const { + Section = 0; + Offset = 0; + auto Dbi = Pdb->getPDBDbiStream(); if (!Dbi) return false; - Section = 0; - Offset = 0; - if ((int32_t)RVA < 0) return true; @@ -231,19 +232,25 @@ bool NativeSession::addressForRVA(uint32_t RVA, uint32_t &Section, } std::unique_ptr -NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { - return nullptr; +NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) { + uint32_t Section; + uint32_t Offset; + addressForVA(Address, Section, Offset); + return findSymbolBySectOffset(Section, Offset, Type); } -std::unique_ptr -NativeSession::findSymbolByRVA(uint32_t RVA, PDB_SymType Type) const { - return nullptr; +std::unique_ptr NativeSession::findSymbolByRVA(uint32_t RVA, + PDB_SymType Type) { + uint32_t Section; + uint32_t Offset; + addressForRVA(RVA, Section, Offset); + return findSymbolBySectOffset(Section, Offset, Type); } std::unique_ptr NativeSession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const { - return nullptr; + PDB_SymType Type) { + return Cache.findSymbolBySectOffset(Sect, Offset, Type); } std::unique_ptr @@ -352,3 +359,24 @@ NativeExeSymbol &NativeSession::getNativeGlobalScope() const { return Cache.getNativeSymbolById(ExeSymbol); } + +uint32_t NativeSession::getRVAFromSectOffset(uint32_t Section, + uint32_t Offset) const { + if (Section <= 0) + return 0; + + auto Dbi = getDbiStreamPtr(*Pdb); + if (!Dbi) + return 0; + + uint32_t MaxSection = Dbi->getSectionHeaders().size(); + if (Section > MaxSection + 1) + Section = MaxSection + 1; + auto &Sec = Dbi->getSectionHeaders()[Section - 1]; + return Sec.VirtualAddress + Offset; +} + +uint64_t NativeSession::getVAFromSectOffset(uint32_t Section, + uint32_t Offset) const { + return LoadAddress + getRVAFromSectOffset(Section, Offset); +} diff --git a/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/lib/DebugInfo/PDB/Native/SymbolCache.cpp index 5cdd628312f..554eea06dc6 100644 --- a/lib/DebugInfo/PDB/Native/SymbolCache.cpp +++ b/lib/DebugInfo/PDB/Native/SymbolCache.cpp @@ -1,13 +1,18 @@ #include "llvm/DebugInfo/PDB/Native/SymbolCache.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" +#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h" @@ -19,6 +24,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" @@ -62,7 +68,7 @@ static const struct BuiltinTypeEntry { }; SymbolCache::SymbolCache(NativeSession &Session, DbiStream *Dbi) - : Session(Session), Dbi(Dbi) { + : Session(Session), Dbi(Dbi), AddrToModuleIndex(IMapAllocator) { // Id 0 is reserved for the invalid symbol. Cache.push_back(nullptr); @@ -281,6 +287,123 @@ SymIndexId SymbolCache::getOrCreateGlobalSymbolByOffset(uint32_t Offset) { return Id; } +std::unique_ptr +SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, + PDB_SymType Type) { + if (AddrToModuleIndex.empty()) + parseSectionContribs(); + + switch (Type) { + case PDB_SymType::Function: + return findFunctionSymbolBySectOffset(Sect, Offset); + case PDB_SymType::PublicSymbol: + return findPublicSymbolBySectOffset(Sect, Offset); + case PDB_SymType::None: { + // FIXME: Implement for PDB_SymType::Data. + if (auto Sym = findFunctionSymbolBySectOffset(Sect, Offset)) + return Sym; + return nullptr; + } + default: + return nullptr; + } +} + +std::unique_ptr +SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) { + auto Iter = AddressToFunctionSymId.find({Sect, Offset}); + if (Iter != AddressToFunctionSymId.end()) + return getSymbolById(Iter->second); + + if (!Dbi) + return nullptr; + + auto Modi = getModuleIndexForAddr(Sect, Offset); + if (!Modi) + return nullptr; + + DbiModuleDescriptor ModDesc = Dbi->modules().getModuleDescriptor(*Modi); + uint16_t StreamIndex = ModDesc.getModuleStreamIndex(); + if (StreamIndex == kInvalidStreamIndex) + return nullptr; + auto ModStreamData = Session.getPDBFile().createIndexedStream(StreamIndex); + ModuleDebugStreamRef ModS(ModDesc, std::move(ModStreamData)); + if (auto EC = ModS.reload()) { + consumeError(std::move(EC)); + return nullptr; + } + + // Search for the symbol in this module. + CVSymbolArray Syms = ModS.getSymbolArray(); + for (auto I = Syms.begin(), E = Syms.end(); I != E; ++I) { + if (I->kind() != S_LPROC32 && I->kind() != S_GPROC32) + continue; + auto PS = cantFail(SymbolDeserializer::deserializeAs(*I)); + if (Sect == PS.Segment && Offset >= PS.CodeOffset && + Offset < PS.CodeOffset + PS.CodeSize) { + SymIndexId Id = createSymbol(PS); + return getSymbolById(Id); + } + + // Jump to the end of this ProcSym. + I = Syms.at(PS.End); + } + return nullptr; +} + +std::unique_ptr +SymbolCache::findPublicSymbolBySectOffset(uint32_t Sect, uint32_t Offset) { + auto Iter = AddressToPublicSymId.find({Sect, Offset}); + if (Iter != AddressToPublicSymId.end()) + return getSymbolById(Iter->second); + + auto Publics = Session.getPDBFile().getPDBPublicsStream(); + if (!Publics) + return nullptr; + + auto ExpectedSyms = Session.getPDBFile().getPDBSymbolStream(); + if (!ExpectedSyms) + return nullptr; + BinaryStreamRef SymStream = + ExpectedSyms->getSymbolArray().getUnderlyingStream(); + + // Use binary search to find the first public symbol with an address greater + // than or equal to Sect, Offset. + auto AddrMap = Publics->getAddressMap(); + auto First = AddrMap.begin(); + auto It = AddrMap.begin(); + size_t Count = AddrMap.size(); + size_t Half; + while (Count > 0) { + It = First; + Half = Count / 2; + It += Half; + Expected Sym = readSymbolFromStream(SymStream, *It); + if (!Sym) { + consumeError(Sym.takeError()); + return nullptr; + } + + auto PS = + cantFail(SymbolDeserializer::deserializeAs(Sym.get())); + if (PS.Segment < Sect || (PS.Segment == Sect && PS.Offset <= Offset)) { + First = ++It; + Count -= Half + 1; + } else + Count = Half; + } + --It; + + Expected Sym = readSymbolFromStream(SymStream, *It); + if (!Sym) { + consumeError(Sym.takeError()); + return nullptr; + } + auto PS = cantFail(SymbolDeserializer::deserializeAs(Sym.get())); + SymIndexId Id = createSymbol(PS); + return getSymbolById(Id); +} + std::unique_ptr SymbolCache::getOrCreateCompiland(uint32_t Index) { if (!Dbi) @@ -297,3 +420,41 @@ SymbolCache::getOrCreateCompiland(uint32_t Index) { return Session.getConcreteSymbolById(Compilands[Index]); } + +void SymbolCache::parseSectionContribs() { + if (!Dbi) + return; + + class Visitor : public ISectionContribVisitor { + NativeSession &Session; + IMap &AddrMap; + + public: + Visitor(NativeSession &Session, IMap &AddrMap) + : Session(Session), AddrMap(AddrMap) {} + void visit(const SectionContrib &C) override { + if (C.Size == 0) + return; + + uint64_t VA = Session.getVAFromSectOffset(C.ISect, C.Off); + uint64_t End = VA + C.Size; + + // Ignore overlapping sections based on the assumption that a valid + // PDB file should not have overlaps. + if (!AddrMap.overlaps(VA, End)) + AddrMap.insert(VA, End, C.Imod); + } + void visit(const SectionContrib2 &C) override { visit(C.Base); } + }; + + Visitor V(Session, AddrToModuleIndex); + Dbi->visitSectionContributions(V); +} + +Optional SymbolCache::getModuleIndexForAddr(uint32_t Sect, + uint32_t Offset) const { + auto Iter = AddrToModuleIndex.find(Session.getVAFromSectOffset(Sect, Offset)); + if (Iter == AddrToModuleIndex.end()) + return None; + return Iter.value(); +} diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index 768c306dfe3..ff017b07801 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -555,8 +555,11 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { using namespace pdb; std::unique_ptr Session; - if (auto Err = loadDataForEXE(PDB_ReaderType::DIA, - Objects.first->getFileName(), Session)) { + PDB_ReaderType ReaderType = Opts.UseNativePDBReader + ? PDB_ReaderType::Native + : PDB_ReaderType::DIA; + if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), + Session)) { Modules.emplace(ModuleName, std::unique_ptr()); // Return along the PDB filename to provide more context return createFileError(PDBFileName, std::move(Err)); diff --git a/test/tools/llvm-symbolizer/pdb/pdb-native.test b/test/tools/llvm-symbolizer/pdb/pdb-native.test new file mode 100644 index 00000000000..29a770e402e --- /dev/null +++ b/test/tools/llvm-symbolizer/pdb/pdb-native.test @@ -0,0 +1,39 @@ +RUN: echo 0x401380 > %t.input +RUN: echo 0x401390 >> %t.input +RUN: echo 0x4013A0 >> %t.input +RUN: echo 0x4013C0 >> %t.input +RUN: echo 0x4013D0 >> %t.input +RUN: echo 0x4013E0 >> %t.input +RUN: echo 0x4013F0 >> %t.input +RUN: echo 0x401420 >> %t.input +RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -use-native-pdb-reader < %t.input \ +RUN: | FileCheck %s +RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false -use-native-pdb-reader < %t.input \ +RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE + +Subtract ImageBase from all the offsets and run the test again with +--relative-address. + +RUN: %python -c 'import sys;print("\n".join([hex(int(x, 16) - 0x400000) for x in sys.stdin]))' < %t.input \ +RUN: | llvm-symbolizer -obj="%p/Inputs/test.exe" -use-native-pdb-reader -demangle=false --relative-address \ +RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE + +Currently only finding function/public symbol names is implemented. + +CHECK: foo(void) +CHECK: {{^private_symbol$}} +CHECK: {{^main}} +CHECK: {{^foo_cdecl$}} +CHECK: {{^foo_stdcall$}} +CHECK: {{^foo_fastcall$}} +CHECK: {{^foo_vectorcall$}} +CHECK: NS::Foo::bar(void) + +CHECK-NO-DEMANGLE: ?foo@@YAXXZ +CHECK-NO-DEMANGLE: private_symbol +CHECK-NO-DEMANGLE: _main +CHECK-NO-DEMANGLE: _foo_cdecl +CHECK-NO-DEMANGLE: _foo_stdcall@0 +CHECK-NO-DEMANGLE: @foo_fastcall@0 +CHECK-NO-DEMANGLE: foo_vectorcall@@0 +CHECK-NO-DEMANGLE: ?bar@Foo@NS@@QAEXXZ diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp index bb282a52b33..6a702c64a10 100644 --- a/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -163,6 +163,10 @@ static cl::opt clEnumValN(DIPrinter::OutputStyle::GNU, "GNU", "GNU addr2line style"))); +static cl::opt + ClUseNativePDBReader("use-native-pdb-reader", cl::init(0), + cl::desc("Use native PDB functionality")); + static cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); @@ -313,6 +317,7 @@ int main(int argc, char **argv) { Opts.FallbackDebugPath = ClFallbackDebugPath; Opts.DWPName = ClDwpName; Opts.DebugFileDirectory = ClDebugFileDirectory; + Opts.UseNativePDBReader = ClUseNativePDBReader; Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath; // If both --basenames and --relativenames are specified then pick the last // one. diff --git a/unittests/DebugInfo/PDB/NativeSessionTest.cpp b/unittests/DebugInfo/PDB/NativeSessionTest.cpp index c0606315c96..002dd6579a4 100644 --- a/unittests/DebugInfo/PDB/NativeSessionTest.cpp +++ b/unittests/DebugInfo/PDB/NativeSessionTest.cpp @@ -9,6 +9,8 @@ #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" +#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" #include "llvm/Support/Path.h" #include "llvm/Testing/Support/Error.h" @@ -30,9 +32,11 @@ static std::string getExePath() { TEST(NativeSessionTest, TestCreateFromExe) { std::unique_ptr S; - // Tests that the PDB file can be found if it is in the same directory as the - // executable. - Error E = pdb::loadDataForEXE(PDB_ReaderType::Native, getExePath(), S); + std::string ExePath = getExePath(); + Expected PdbPath = NativeSession::searchForPdb({ExePath}); + ASSERT_TRUE((bool)PdbPath); + + Error E = NativeSession::createFromPdbPath(PdbPath.get(), S); ASSERT_THAT_ERROR(std::move(E), Succeeded()); } diff --git a/unittests/DebugInfo/PDB/PDBApiTest.cpp b/unittests/DebugInfo/PDB/PDBApiTest.cpp index f48e8379046..eb3b696acde 100644 --- a/unittests/DebugInfo/PDB/PDBApiTest.cpp +++ b/unittests/DebugInfo/PDB/PDBApiTest.cpp @@ -82,17 +82,17 @@ class MockSession : public IPDBSession { uint32_t &Offset) const override { return false; } - std::unique_ptr - findSymbolByAddress(uint64_t Address, PDB_SymType Type) const override { + std::unique_ptr findSymbolByAddress(uint64_t Address, + PDB_SymType Type) override { return nullptr; } std::unique_ptr findSymbolByRVA(uint32_t RVA, - PDB_SymType Type) const override { + PDB_SymType Type) override { return nullptr; } - std::unique_ptr - findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const override { + std::unique_ptr findSymbolBySectOffset(uint32_t Sect, + uint32_t Offset, + PDB_SymType Type) override { return nullptr; } std::unique_ptr diff --git a/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn b/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn index d38b2bb214c..a11ace50151 100644 --- a/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn +++ b/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn @@ -33,6 +33,8 @@ static_library("PDB") { "Native/NativeEnumModules.cpp", "Native/NativeEnumTypes.cpp", "Native/NativeExeSymbol.cpp", + "Native/NativeFunctionSymbol.cpp", + "Native/NativePublicSymbol.cpp", "Native/NativeRawSymbol.cpp", "Native/NativeSession.cpp", "Native/NativeSymbolEnumerator.cpp",