diff --git a/include/llvm/DebugInfo/GSYM/FileEntry.h b/include/llvm/DebugInfo/GSYM/FileEntry.h new file mode 100644 index 00000000000..7d0fb3eaec7 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/FileEntry.h @@ -0,0 +1,69 @@ +//===- FileEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H +#define LLVM_DEBUGINFO_GSYM_FILEENTRY_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include +#include +#include + +namespace llvm { +namespace gsym { + +/// Files in GSYM are contained in FileEntry structs where we split the +/// directory and basename into two different strings in the string +/// table. This allows paths to shared commont directory and filename +/// strings and saves space. +struct FileEntry { + + /// Offsets in the string table. + /// @{ + uint32_t Dir = 0; + uint32_t Base = 0; + /// @} + + FileEntry() = default; + FileEntry(uint32_t D, uint32_t B) : Dir(D), Base(B) {} + + // Implement operator== so that FileEntry can be used as key in + // unordered containers. + bool operator==(const FileEntry &RHS) const { + return Base == RHS.Base && Dir == RHS.Dir; + }; + bool operator!=(const FileEntry &RHS) const { + return Base != RHS.Base || Dir != RHS.Dir; + }; +}; + +} // namespace gsym + +template <> struct DenseMapInfo { + static inline gsym::FileEntry getEmptyKey() { + const auto key = DenseMapInfo::getEmptyKey(); + return gsym::FileEntry(key, key); + + } + static inline gsym::FileEntry getTombstoneKey() { + const auto key = DenseMapInfo::getTombstoneKey(); + return gsym::FileEntry(key, key); + } + static unsigned getHashValue(const gsym::FileEntry &Val) { + return llvm::hash_combine(DenseMapInfo::getHashValue(Val.Dir), + DenseMapInfo::getHashValue(Val.Base)); + } + static bool isEqual(const gsym::FileEntry &LHS, const gsym::FileEntry &RHS) { + return LHS == RHS; + } +}; + +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H diff --git a/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/include/llvm/DebugInfo/GSYM/FunctionInfo.h new file mode 100644 index 00000000000..7c4819cfe47 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -0,0 +1,107 @@ +//===- FunctionInfo.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H +#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H + +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include +#include + +namespace llvm { +class raw_ostream; +namespace gsym { + +/// Function information in GSYM files encodes information for one +/// contiguous address range. The name of the function is encoded as +/// a string table offset and allows multiple functions with the same +/// name to share the name string in the string table. Line tables are +/// stored in a sorted vector of gsym::LineEntry objects and are split +/// into line tables for each function. If a function has a discontiguous +/// range, it will be split into two gsym::FunctionInfo objects. If the +/// function has inline functions, the information will be encoded in +/// the "Inline" member, see gsym::InlineInfo for more information. +struct FunctionInfo { + AddressRange Range; + uint32_t Name; ///< String table offset in the string table. + std::vector Lines; + InlineInfo Inline; + + FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0) + : Range(Addr, Addr + Size), Name(N) {} + + bool hasRichInfo() const { + /// Returns whether we have something else than range and name. When + /// converting information from a symbol table and from debug info, we + /// might end up with multiple FunctionInfo objects for the same range + /// and we need to be able to tell which one is the better object to use. + return !Lines.empty() || Inline.isValid(); + } + + bool isValid() const { + /// Address and size can be zero and there can be no line entries for a + /// symbol so the only indication this entry is valid is if the name is + /// not zero. This can happen when extracting information from symbol + /// tables that do not encode symbol sizes. In that case only the + /// address and name will be filled in. + return Name != 0; + } + + uint64_t startAddress() const { return Range.startAddress(); } + uint64_t endAddress() const { return Range.endAddress(); } + uint64_t size() const { return Range.size(); } + void setStartAddress(uint64_t Addr) { Range.setStartAddress(Addr); } + void setEndAddress(uint64_t Addr) { Range.setEndAddress(Addr); } + void setSize(uint64_t Size) { Range.setSize(Size); } + + void clear() { + Range.clear(); + Name = 0; + Lines.clear(); + Inline.clear(); + } +}; + +inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return LHS.Range == RHS.Range && LHS.Name == RHS.Name && + LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline; +} +inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) { + return !(LHS == RHS); +} +/// This sorting will order things consistently by address range first, but then +/// followed by inlining being valid and line tables. We might end up with a +/// FunctionInfo from debug info that will have the same range as one from the +/// symbol table, but we want to quickly be able to sort and use the best version +/// when creating the final GSYM file. +inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) { + // First sort by address range + if (LHS.Range != RHS.Range) + return LHS.Range < RHS.Range; + + // Then sort by inline + if (LHS.Inline.isValid() != RHS.Inline.isValid()) + return RHS.Inline.isValid(); + + // If the number of lines is the same, then compare line table entries + if (LHS.Lines.size() == RHS.Lines.size()) + return LHS.Lines < RHS.Lines; + // Then sort by number of line table entries (more is better) + return LHS.Lines.size() < RHS.Lines.size(); +} + +raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H diff --git a/include/llvm/DebugInfo/GSYM/InlineInfo.h b/include/llvm/DebugInfo/GSYM/InlineInfo.h new file mode 100644 index 00000000000..c639bf21e81 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -0,0 +1,77 @@ +//===- InlineInfo.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H +#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include +#include + + +namespace llvm { +class raw_ostream; + +namespace gsym { + +/// Inline information stores the name of the inline function along with +/// an array of address ranges. It also stores the call file and call line +/// that called this inline function. This allows us to unwind inline call +/// stacks back to the inline or concrete function that called this +/// function. Inlined functions contained in this function are stored in the +/// "Children" variable. All address ranges must be sorted and all address +/// ranges of all children must be contained in the ranges of this function. +/// Any clients that encode information will need to ensure the ranges are +/// all contined correctly or lookups could fail. Add ranges in these objects +/// must be contained in the top level FunctionInfo address ranges as well. +struct InlineInfo { + + uint32_t Name; ///< String table offset in the string table. + uint32_t CallFile; ///< 1 based file index in the file table. + uint32_t CallLine; ///< Source line number. + AddressRanges Ranges; + std::vector Children; + InlineInfo() : Name(0), CallFile(0), CallLine(0) {} + void clear() { + Name = 0; + CallFile = 0; + CallLine = 0; + Ranges.clear(); + Children.clear(); + } + bool isValid() const { return !Ranges.empty(); } + /// Lookup an address in the InlineInfo object + /// + /// This function is used to symbolicate an inline call stack and can + /// turn one address in the program into one or more inline call stacks + /// and have the stack trace show the original call site from + /// non-inlined code. + /// + /// \param Addr the address to lookup + /// \param InlineStack a vector of InlineInfo objects that describe the + /// inline call stack for a given address. + /// + /// \returns true if successful, false otherwise + typedef std::vector InlineArray; + llvm::Optional getInlineStack(uint64_t Addr) const; +}; + +inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) { + return LHS.Name == RHS.Name && LHS.CallFile == RHS.CallFile && + LHS.CallLine == RHS.CallLine && LHS.Ranges == RHS.Ranges && + LHS.Children == RHS.Children; +} + +raw_ostream &operator<<(raw_ostream &OS, const InlineInfo &FI); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H diff --git a/include/llvm/DebugInfo/GSYM/LineEntry.h b/include/llvm/DebugInfo/GSYM/LineEntry.h new file mode 100644 index 00000000000..d3ed23361e9 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/LineEntry.h @@ -0,0 +1,48 @@ +//===- LineEntry.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H +#define LLVM_DEBUGINFO_GSYM_LINEENTRY_H + +#include "llvm/DebugInfo/GSYM/Range.h" + +namespace llvm { +namespace gsym { + +/// Line entries are used to encode the line tables in FunctionInfo objects. +/// They are stored as a sorted vector of these objects and store the +/// address, file and line of the line table row for a given address. The +/// size of a line table entry is calculated by looking at the next entry +/// in the FunctionInfo's vector of entries. +struct LineEntry { + uint64_t Addr; ///< Start address of this line entry. + uint32_t File; ///< 1 based index of file in FileTable + uint32_t Line; ///< Source line number. + LineEntry(uint64_t A = 0, uint32_t F = 0, uint32_t L = 0) + : Addr(A), File(F), Line(L) {} + bool isValid() { return File != 0; } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const LineEntry &LE) { + return OS << "addr=" << HEX64(LE.Addr) << ", file=" << format("%3u", LE.File) + << ", line=" << format("%3u", LE.Line); +} + +inline bool operator==(const LineEntry &LHS, const LineEntry &RHS) { + return LHS.Addr == RHS.Addr && LHS.File == RHS.File && LHS.Line == RHS.Line; +} +inline bool operator!=(const LineEntry &LHS, const LineEntry &RHS) { + return !(LHS == RHS); +} +inline bool operator<(const LineEntry &LHS, const LineEntry &RHS) { + return LHS.Addr < RHS.Addr; +} +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H diff --git a/include/llvm/DebugInfo/GSYM/Range.h b/include/llvm/DebugInfo/GSYM/Range.h new file mode 100644 index 00000000000..888e352586f --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/Range.h @@ -0,0 +1,123 @@ +//===- AddressRange.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_RANGE_H +#define LLVM_DEBUGINFO_GSYM_RANGE_H + +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +#define HEX8(v) llvm::format_hex(v, 4) +#define HEX16(v) llvm::format_hex(v, 6) +#define HEX32(v) llvm::format_hex(v, 10) +#define HEX64(v) llvm::format_hex(v, 18) + +namespace llvm { +class raw_ostream; + +namespace gsym { + +/// A class that represents an address range. The range is specified using +/// a start and an end address. +class AddressRange { + uint64_t Start; + uint64_t End; +public: + AddressRange(uint64_t S = 0, uint64_t E = 0) : Start(S), End(E) {} + /// Access to the size must use the size() accessor to ensure the correct + /// answer. This allows an AddressRange to be constructed with invalid + /// address ranges where the end address is less that the start address + /// either because it was not set, or because of incorrect data. + uint64_t size() const { return Start < End ? End - Start : 0; } + void setStartAddress(uint64_t Addr) { Start = Addr; } + void setEndAddress(uint64_t Addr) { End = Addr; } + void setSize(uint64_t Size) { End = Start + Size; } + uint64_t startAddress() const { return Start; } + /// Access to the end address must use the size() accessor to ensure the + /// correct answer. This allows an AddressRange to be constructed with + /// invalid address ranges where the end address is less that the start + /// address either because it was not set, or because of incorrect data. + uint64_t endAddress() const { return Start + size(); } + void clear() { + Start = 0; + End = 0; + } + bool contains(uint64_t Addr) const { return Start <= Addr && Addr < endAddress(); } + bool isContiguousWith(const AddressRange &R) const { + return (Start <= R.endAddress()) && (endAddress() >= R.Start); + } + bool intersects(const AddressRange &R) const { + return (Start < R.endAddress()) && (endAddress() > R.Start); + } + bool intersect(const AddressRange &R) { + if (intersects(R)) { + Start = std::min(Start, R.Start); + End = std::max(endAddress(), R.endAddress()); + return true; + } + return false; + } +}; + +inline bool operator==(const AddressRange &LHS, const AddressRange &RHS) { + return LHS.startAddress() == RHS.startAddress() && LHS.endAddress() == RHS.endAddress(); +} +inline bool operator!=(const AddressRange &LHS, const AddressRange &RHS) { + return LHS.startAddress() != RHS.startAddress() || LHS.endAddress() != RHS.endAddress(); +} +inline bool operator<(const AddressRange &LHS, const AddressRange &RHS) { + if (LHS.startAddress() == RHS.startAddress()) + return LHS.endAddress() < RHS.endAddress(); + return LHS.startAddress() < RHS.startAddress(); +} +inline bool operator<(const AddressRange &LHS, uint64_t Addr) { + return LHS.startAddress() < Addr; +} +inline bool operator<(uint64_t Addr, const AddressRange &RHS) { + return Addr < RHS.startAddress(); +} + +raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); + +/// The AddressRanges class helps normalize address range collections. +/// This class keeps a sorted vector of AddressRange objects and can perform +/// insertions and searches efficiently. The address ranges are always sorted +/// and never contain any invalid or empty address ranges. This allows us to +/// emit address ranges into the GSYM file efficiently. Intersecting address +/// ranges are combined during insertion so that we can emit the most compact +/// representation for address ranges when writing to disk. +class AddressRanges { +protected: + typedef std::vector Collection; + Collection Ranges; +public: + void clear() { Ranges.clear(); } + bool empty() const { return Ranges.empty(); } + bool contains(uint64_t Addr) const; + void insert(const AddressRange &R); + size_t size() const { return Ranges.size(); } + bool operator==(const AddressRanges &RHS) const { + return Ranges == RHS.Ranges; + } + const AddressRange &operator[](size_t i) const { + assert(i < Ranges.size()); + return Ranges[i]; + } + Collection::const_iterator begin() const { return Ranges.begin(); } + Collection::const_iterator end() const { return Ranges.end(); } +}; + +raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_RANGE_H diff --git a/include/llvm/DebugInfo/GSYM/StringTable.h b/include/llvm/DebugInfo/GSYM/StringTable.h new file mode 100644 index 00000000000..96ab9e59b70 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/StringTable.h @@ -0,0 +1,54 @@ +//===- StringTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H +#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include +#include + + +namespace llvm { +namespace gsym { + +/// String tables in GSYM files are required to start with an empty +/// string at offset zero. Strings must be UTF8 NULL terminated strings. +struct StringTable { + StringRef Data; + StringTable() : Data() {} + StringTable(StringRef D) : Data(D) {} + StringRef operator[](size_t Offset) const { return getString(Offset); } + StringRef getString(uint32_t Offset) const { + if (Offset < Data.size()) { + auto End = Data.find('\0', Offset); + return Data.substr(Offset, End - Offset); + } + return StringRef(); + } + void clear() { Data = StringRef(); } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const StringTable &S) { + OS << "String table:\n"; + uint32_t Offset = 0; + const size_t Size = S.Data.size(); + while (Offset < Size) { + StringRef Str = S.getString(Offset); + OS << HEX32(Offset) << ": \"" << Str << "\"\n"; + Offset += Str.size() + 1; + } + return OS; +} + +} // namespace gsym +} // namespace llvm +#endif // #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index e7be0a0617d..6355ba08d03 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(DWARF) +add_subdirectory(GSYM) add_subdirectory(MSF) add_subdirectory(CodeView) add_subdirectory(PDB) diff --git a/lib/DebugInfo/GSYM/CMakeLists.txt b/lib/DebugInfo/GSYM/CMakeLists.txt new file mode 100644 index 00000000000..fc37d560e61 --- /dev/null +++ b/lib/DebugInfo/GSYM/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_library(LLVMDebugInfoGSYM + FunctionInfo.cpp + InlineInfo.cpp + Range.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/GSYM + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo + ) diff --git a/lib/DebugInfo/GSYM/FunctionInfo.cpp b/lib/DebugInfo/GSYM/FunctionInfo.cpp new file mode 100644 index 00000000000..bd0c8bc51f5 --- /dev/null +++ b/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -0,0 +1,23 @@ +//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" + +using namespace llvm; +using namespace gsym; + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { + OS << '[' << HEX64(FI.Range.startAddress()) << '-' + << HEX64(FI.Range.endAddress()) << "): " + << "Name=" << HEX32(FI.Name) << '\n'; + for (const auto &Line : FI.Lines) + OS << Line << '\n'; + OS << FI.Inline; + return OS; +} diff --git a/lib/DebugInfo/GSYM/InlineInfo.cpp b/lib/DebugInfo/GSYM/InlineInfo.cpp new file mode 100644 index 00000000000..78e6ed46274 --- /dev/null +++ b/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -0,0 +1,59 @@ +//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include +#include + +using namespace llvm; +using namespace gsym; + + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const InlineInfo &II) { + if (!II.isValid()) + return OS; + bool First = true; + for (auto Range : II.Ranges) { + if (First) + First = false; + else + OS << ' '; + OS << Range; + } + OS << " Name = " << HEX32(II.Name) << ", CallFile = " << II.CallFile + << ", CallLine = " << II.CallFile << '\n'; + for (const auto &Child : II.Children) + OS << Child; + return OS; +} + +static bool getInlineStackHelper(const InlineInfo &II, uint64_t Addr, + std::vector &InlineStack) { + if (II.Ranges.contains(Addr)) { + // If this is the top level that represents the concrete function, + // there will be no name and we shoud clear the inline stack. Otherwise + // we have found an inline call stack that we need to insert. + if (II.Name != 0) + InlineStack.insert(InlineStack.begin(), &II); + for (const auto &Child : II.Children) { + if (::getInlineStackHelper(Child, Addr, InlineStack)) + break; + } + return !InlineStack.empty(); + } + return false; +} + +llvm::Optional InlineInfo::getInlineStack(uint64_t Addr) const { + InlineArray Result; + if (getInlineStackHelper(*this, Addr, Result)) + return Result; + return llvm::None; +} diff --git a/lib/DebugInfo/GSYM/Range.cpp b/lib/DebugInfo/GSYM/Range.cpp new file mode 100644 index 00000000000..fdf4c2e6542 --- /dev/null +++ b/lib/DebugInfo/GSYM/Range.cpp @@ -0,0 +1,71 @@ +//===- Range.cpp ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/Range.h" +#include +#include + +using namespace llvm; +using namespace gsym; + + +void AddressRanges::insert(const AddressRange &Range) { + if (Range.size() == 0) + return; + // Ranges.insert(std::upper_bound(Ranges.begin(), Ranges.end(), Range), Range); + + // // Check if an existing range intersects with this range, and if so, + // // grow the intersecting ranges instead of adding a new one. + auto Begin = Ranges.begin(); + auto End = Ranges.end(); + const auto Iter = std::upper_bound(Begin, End, Range); + if (Iter != Begin) { + auto PrevIter = Iter - 1; + // If the previous range itersects with "Range" they will be combined. + if (PrevIter->intersect(Range)) { + // Now check if the previous range intersects with the next range since + // the previous range was combined. If so, combine them and remove the + // next range. + if (PrevIter->intersect(*Iter)) + Ranges.erase(Iter); + return; + } + } + // If the next range intersects with "Range", combined and return. + if (Iter != End && Iter->intersect(Range)) + return; + Ranges.insert(Iter, Range); +} + +bool AddressRanges::contains(uint64_t Addr) const { + if (Ranges.empty()) + return false; + auto Begin = Ranges.begin(); + auto Pos = std::upper_bound(Begin, Ranges.end(), Addr); + if (Pos == Begin) + return false; + --Pos; + return Pos->contains(Addr); +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { + return OS << '[' << HEX64(R.startAddress()) << " - " << HEX64(R.endAddress()) + << ")"; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) { + size_t Size = AR.size(); + for (size_t I=0; I +#include + +using namespace llvm; +using namespace gsym; + +TEST(GSYMTest, TestFileEntry) { + // Make sure default constructed GSYM FileEntry has zeroes in the + // directory and basename string table indexes. + FileEntry empty1; + FileEntry empty2; + EXPECT_EQ(empty1.Dir, 0u); + EXPECT_EQ(empty1.Base, 0u); + // Verify equality operator works + FileEntry a1(10,30); + FileEntry a2(10,30); + FileEntry b(10,40); + EXPECT_EQ(empty1, empty2); + EXPECT_EQ(a1, a2); + EXPECT_NE(a1, b); + EXPECT_NE(a1, empty1); + // Test we can use llvm::gsym::FileEntry in llvm::DenseMap. + DenseMap EntryToIndex; + constexpr uint32_t Index1 = 1; + constexpr uint32_t Index2 = 1; + auto R = EntryToIndex.insert(std::make_pair(a1, Index1)); + EXPECT_TRUE(R.second); + EXPECT_EQ(R.first->second, Index1); + R = EntryToIndex.insert(std::make_pair(a1, Index1)); + EXPECT_FALSE(R.second); + EXPECT_EQ(R.first->second, Index1); + R = EntryToIndex.insert(std::make_pair(b, Index2)); + EXPECT_TRUE(R.second); + EXPECT_EQ(R.first->second, Index2); + R = EntryToIndex.insert(std::make_pair(a1, Index2)); + EXPECT_FALSE(R.second); + EXPECT_EQ(R.first->second, Index2); +} + + +TEST(GSYMTest, TestFunctionInfo) { + // Test GSYM FunctionInfo structs and functionality. + FunctionInfo invalid; + EXPECT_FALSE(invalid.isValid()); + EXPECT_FALSE(invalid.hasRichInfo()); + const uint64_t StartAddr = 0x1000; + const uint64_t EndAddr = 0x1100; + const uint64_t Size = EndAddr - StartAddr; + const uint32_t NameOffset = 30; + FunctionInfo FI(StartAddr, Size, NameOffset); + EXPECT_TRUE(FI.isValid()); + EXPECT_FALSE(FI.hasRichInfo()); + EXPECT_EQ(FI.startAddress(), StartAddr); + EXPECT_EQ(FI.endAddress(), EndAddr); + EXPECT_EQ(FI.size(), Size); + const uint32_t FileIdx = 1; + const uint32_t Line = 12; + FI.Lines.push_back(LineEntry(StartAddr,FileIdx,Line)); + EXPECT_TRUE(FI.hasRichInfo()); + FI.clear(); + EXPECT_FALSE(FI.isValid()); + EXPECT_FALSE(FI.hasRichInfo()); + + FunctionInfo A1(0x1000, 0x100, NameOffset); + FunctionInfo A2(0x1000, 0x100, NameOffset); + FunctionInfo B; + // Check == operator + EXPECT_EQ(A1, A2); + // Make sure things are not equal if they only differ by start address. + B = A2; + B.setStartAddress(0x2000); + EXPECT_NE(B, A2); + // Make sure things are not equal if they only differ by size. + B = A2; + B.setSize(0x101); + EXPECT_NE(B, A2); + // Make sure things are not equal if they only differ by name. + B = A2; + B.Name = 60; + EXPECT_NE(B, A2); + // Check < operator. + // Check less than where address differs. + B = A2; + B.setStartAddress(A2.startAddress() + 0x1000); + EXPECT_LT(A1, B); + + // We use the < operator to take a variety of different FunctionInfo + // structs from a variety of sources: symtab, debug info, runtime info + // and we sort them and want the sorting to allow us to quickly get the + // best version of a function info. + FunctionInfo FISymtab(StartAddr, Size, NameOffset); + FunctionInfo FIWithLines(StartAddr, Size, NameOffset); + FIWithLines.Lines.push_back(LineEntry(StartAddr,FileIdx,Line)); + // Test that a FunctionInfo with just a name and size is less than one + // that has name, size and any number of line table entries + EXPECT_LT(FISymtab, FIWithLines); + + FunctionInfo FIWithLinesAndInline = FIWithLines; + FIWithLinesAndInline.Inline.Ranges.insert(AddressRange(StartAddr, StartAddr + 0x10)); + // Test that a FunctionInfo with name, size, and line entries is less than + // the same one with valid inline info + EXPECT_LT(FIWithLines, FIWithLinesAndInline); + + // Test if we have an entry with lines and one with more lines for the same + // range, the ones with more lines is greater than the one with less. + FunctionInfo FIWithMoreLines = FIWithLines; + FIWithMoreLines.Lines.push_back(LineEntry(StartAddr,FileIdx,Line+5)); + EXPECT_LT(FIWithLines, FIWithMoreLines); + + // Test that if we have the same number of lines we compare the line entries + // in the FunctionInfo.Lines vector. + FunctionInfo FIWithLinesWithHigherAddress = FIWithLines; + FIWithLinesWithHigherAddress.Lines[0].Addr += 0x10; + EXPECT_LT(FIWithLines, FIWithLinesWithHigherAddress); +} + +TEST(GSYMTest, TestInlineInfo) { + // Test InlineInfo structs. + InlineInfo II; + EXPECT_FALSE(II.isValid()); + II.Ranges.insert(AddressRange(0x1000,0x2000)); + // Make sure InlineInfo in valid with just an address range since + // top level InlineInfo objects have ranges with no name, call file + // or call line + EXPECT_TRUE(II.isValid()); + // Make sure InlineInfo isn't after being cleared. + II.clear(); + EXPECT_FALSE(II.isValid()); + + // Create an InlineInfo that contains the following data. The + // indentation of the address range indicates the parent child + // relationships of the InlineInfo objects: + // + // Variable Range and values + // =========== ==================================================== + // Root [0x100-0x200) (no name, file, or line) + // Inline1 [0x150-0x160) Name = 1, File = 1, Line = 11 + // Inline1Sub1 [0x152-0x155) Name = 2, File = 2, Line = 22 + // Inline1Sub2 [0x157-0x158) Name = 3, File = 3, Line = 33 + InlineInfo Root; + Root.Ranges.insert(AddressRange(0x100,0x200)); + InlineInfo Inline1; + Inline1.Ranges.insert(AddressRange(0x150,0x160)); + Inline1.Name = 1; + Inline1.CallFile = 1; + Inline1.CallLine = 11; + InlineInfo Inline1Sub1; + Inline1Sub1.Ranges.insert(AddressRange(0x152, 0x155)); + Inline1Sub1.Name = 2; + Inline1Sub1.CallFile = 2; + Inline1Sub1.CallLine = 22; + InlineInfo Inline1Sub2; + Inline1Sub2.Ranges.insert(AddressRange(0x157,0x158)); + Inline1Sub2.Name = 3; + Inline1Sub2.CallFile = 3; + Inline1Sub2.CallLine = 33; + Inline1.Children.push_back(Inline1Sub1); + Inline1.Children.push_back(Inline1Sub2); + Root.Children.push_back(Inline1); + + // Make sure an address that is out of range won't match + EXPECT_FALSE(Root.getInlineStack(0x50)); + + // Verify that we get no inline stacks for addresses out of [0x100-0x200) + EXPECT_FALSE(Root.getInlineStack(Root.Ranges[0].startAddress()-1)); + EXPECT_FALSE(Root.getInlineStack(Root.Ranges[0].endAddress())); + + // Verify we get no inline stack entries for addresses that are in + // [0x100-0x200) but not in [0x150-0x160) + EXPECT_FALSE(Root.getInlineStack(Inline1.Ranges[0].startAddress()-1)); + EXPECT_FALSE(Root.getInlineStack(Inline1.Ranges[0].endAddress())); + + + // Verify we get one inline stack entry for addresses that are in + // [[0x150-0x160)) but not in [0x152-0x155) or [0x157-0x158) + auto InlineInfos = Root.getInlineStack(Inline1.Ranges[0].startAddress()); + ASSERT_TRUE(InlineInfos); + ASSERT_EQ(InlineInfos->size(), 1u); + ASSERT_EQ(*InlineInfos->at(0), Inline1); + InlineInfos = Root.getInlineStack(Inline1.Ranges[0].endAddress()-1); + EXPECT_TRUE(InlineInfos); + ASSERT_EQ(InlineInfos->size(), 1u); + ASSERT_EQ(*InlineInfos->at(0), Inline1); + + // Verify we get two inline stack entries for addresses that are in + // [0x152-0x155) + InlineInfos = Root.getInlineStack(Inline1Sub1.Ranges[0].startAddress()); + EXPECT_TRUE(InlineInfos); + ASSERT_EQ(InlineInfos->size(), 2u); + ASSERT_EQ(*InlineInfos->at(0), Inline1Sub1); + ASSERT_EQ(*InlineInfos->at(1), Inline1); + InlineInfos = Root.getInlineStack(Inline1Sub1.Ranges[0].endAddress()-1); + EXPECT_TRUE(InlineInfos); + ASSERT_EQ(InlineInfos->size(), 2u); + ASSERT_EQ(*InlineInfos->at(0), Inline1Sub1); + ASSERT_EQ(*InlineInfos->at(1), Inline1); + + // Verify we get two inline stack entries for addresses that are in + // [0x157-0x158) + InlineInfos = Root.getInlineStack(Inline1Sub2.Ranges[0].startAddress()); + EXPECT_TRUE(InlineInfos); + ASSERT_EQ(InlineInfos->size(), 2u); + ASSERT_EQ(*InlineInfos->at(0), Inline1Sub2); + ASSERT_EQ(*InlineInfos->at(1), Inline1); + InlineInfos = Root.getInlineStack(Inline1Sub2.Ranges[0].endAddress()-1); + EXPECT_TRUE(InlineInfos); + ASSERT_EQ(InlineInfos->size(), 2u); + ASSERT_EQ(*InlineInfos->at(0), Inline1Sub2); + ASSERT_EQ(*InlineInfos->at(1), Inline1); +} + +TEST(GSYMTest, TestLineEntry) { + // test llvm::gsym::LineEntry structs. + const uint64_t ValidAddr = 0x1000; + const uint64_t InvalidFileIdx = 0; + const uint32_t ValidFileIdx = 1; + const uint32_t ValidLine = 5; + + LineEntry Invalid; + EXPECT_FALSE(Invalid.isValid()); + // Make sure that an entry is invalid if it has a bad file index. + LineEntry BadFile(ValidAddr, InvalidFileIdx, ValidLine); + EXPECT_FALSE(BadFile.isValid()); + // Test operators + LineEntry E1(ValidAddr, ValidFileIdx, ValidLine); + LineEntry E2(ValidAddr, ValidFileIdx, ValidLine); + LineEntry DifferentAddr(ValidAddr+1, ValidFileIdx, ValidLine); + LineEntry DifferentFile(ValidAddr, ValidFileIdx+1, ValidLine); + LineEntry DifferentLine(ValidAddr, ValidFileIdx, ValidLine+1); + EXPECT_TRUE(E1.isValid()); + EXPECT_EQ(E1, E2); + EXPECT_NE(E1, DifferentAddr); + EXPECT_NE(E1, DifferentFile); + EXPECT_NE(E1, DifferentLine); + EXPECT_LT(E1, DifferentAddr); +} + +TEST(GSYMTest, TestRanges) { + // test llvm::gsym::AddressRange. + const uint64_t StartAddr = 0x1000; + const uint64_t EndAddr = 0x2000; + // Verify constructor and API to ensure it takes start and end address. + const AddressRange Range(StartAddr, EndAddr); + EXPECT_EQ(Range.startAddress(), StartAddr); + EXPECT_EQ(Range.endAddress(), EndAddr); + EXPECT_EQ(Range.size(), EndAddr-StartAddr); + + // Verify llvm::gsym::AddressRange::contains(). + EXPECT_FALSE(Range.contains(0)); + EXPECT_FALSE(Range.contains(StartAddr-1)); + EXPECT_TRUE(Range.contains(StartAddr)); + EXPECT_TRUE(Range.contains(EndAddr-1)); + EXPECT_FALSE(Range.contains(EndAddr)); + EXPECT_FALSE(Range.contains(UINT64_MAX)); + + const AddressRange RangeSame(StartAddr, EndAddr); + const AddressRange RangeDifferentStart(StartAddr+1, EndAddr); + const AddressRange RangeDifferentEnd(StartAddr, EndAddr+1); + const AddressRange RangeDifferentStartEnd(StartAddr+1, EndAddr+1); + // Test == and != with values that are the same + EXPECT_EQ(Range, RangeSame); + EXPECT_FALSE(Range != RangeSame); + // Test == and != with values that are the different + EXPECT_NE(Range, RangeDifferentStart); + EXPECT_NE(Range, RangeDifferentEnd); + EXPECT_NE(Range, RangeDifferentStartEnd); + EXPECT_FALSE(Range == RangeDifferentStart); + EXPECT_FALSE(Range == RangeDifferentEnd); + EXPECT_FALSE(Range == RangeDifferentStartEnd); + + // Test "bool operator<(const AddressRange &, const AddressRange &)". + EXPECT_FALSE(Range < RangeSame); + EXPECT_FALSE(RangeSame < Range); + EXPECT_LT(Range, RangeDifferentStart); + EXPECT_LT(Range, RangeDifferentEnd); + EXPECT_LT(Range, RangeDifferentStartEnd); + // Test "bool operator<(const AddressRange &, uint64_t)" + EXPECT_LT(Range, StartAddr + 1); + // Test "bool operator<(uint64_t, const AddressRange &)" + EXPECT_LT(StartAddr - 1, Range); + + // Verify llvm::gsym::AddressRange::isContiguousWith() and + // llvm::gsym::AddressRange::intersects(). + const AddressRange EndsBeforeRangeStart(0, StartAddr-1); + const AddressRange EndsAtRangeStart(0, StartAddr); + const AddressRange OverlapsRangeStart(StartAddr-1, StartAddr+1); + const AddressRange InsideRange(StartAddr+1, EndAddr-1); + const AddressRange OverlapsRangeEnd(EndAddr-1, EndAddr+1); + const AddressRange StartsAtRangeEnd(EndAddr, EndAddr+0x100); + const AddressRange StartsAfterRangeEnd(EndAddr+1, EndAddr+0x100); + + EXPECT_FALSE(Range.isContiguousWith(EndsBeforeRangeStart)); + EXPECT_TRUE(Range.isContiguousWith(EndsAtRangeStart)); + EXPECT_TRUE(Range.isContiguousWith(OverlapsRangeStart)); + EXPECT_TRUE(Range.isContiguousWith(InsideRange)); + EXPECT_TRUE(Range.isContiguousWith(OverlapsRangeEnd)); + EXPECT_TRUE(Range.isContiguousWith(StartsAtRangeEnd)); + EXPECT_FALSE(Range.isContiguousWith(StartsAfterRangeEnd)); + + EXPECT_FALSE(Range.intersects(EndsBeforeRangeStart)); + EXPECT_FALSE(Range.intersects(EndsAtRangeStart)); + EXPECT_TRUE(Range.intersects(OverlapsRangeStart)); + EXPECT_TRUE(Range.intersects(InsideRange)); + EXPECT_TRUE(Range.intersects(OverlapsRangeEnd)); + EXPECT_FALSE(Range.intersects(StartsAtRangeEnd)); + EXPECT_FALSE(Range.intersects(StartsAfterRangeEnd)); + + // Test the functions that maintain GSYM address ranges: + // "bool AddressRange::contains(uint64_t Addr) const;" + // "void AddressRanges::insert(const AddressRange &R);" + AddressRanges Ranges; + Ranges.insert(AddressRange(0x1000, 0x2000)); + Ranges.insert(AddressRange(0x2000, 0x3000)); + Ranges.insert(AddressRange(0x4000, 0x5000)); + + EXPECT_FALSE(Ranges.contains(0)); + EXPECT_FALSE(Ranges.contains(0x1000-1)); + EXPECT_TRUE(Ranges.contains(0x1000)); + EXPECT_TRUE(Ranges.contains(0x2000)); + EXPECT_TRUE(Ranges.contains(0x4000)); + EXPECT_TRUE(Ranges.contains(0x2000-1)); + EXPECT_TRUE(Ranges.contains(0x3000-1)); + EXPECT_FALSE(Ranges.contains(0x3000+1)); + EXPECT_TRUE(Ranges.contains(0x5000-1)); + EXPECT_FALSE(Ranges.contains(0x5000+1)); + EXPECT_FALSE(Ranges.contains(UINT64_MAX)); + + // Verify that intersecting ranges get combined + Ranges.clear(); + Ranges.insert(AddressRange(0x1100, 0x1F00)); + // Verify a wholy contained range that is added doesn't do anything. + Ranges.insert(AddressRange(0x1500, 0x1F00)); + EXPECT_EQ(Ranges.size(), 1u); + EXPECT_EQ(Ranges[0], AddressRange(0x1100, 0x1F00)); + + // Verify a range that starts before and intersects gets combined. + Ranges.insert(AddressRange(0x1000, Ranges[0].startAddress()+1)); + EXPECT_EQ(Ranges.size(), 1u); + EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x1F00)); + + // Verify a range that starts inside and extends ranges gets combined. + Ranges.insert(AddressRange(Ranges[0].endAddress()-1, 0x2000)); + EXPECT_EQ(Ranges.size(), 1u); + EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x2000)); + + // Verify that adjacent ranges don't get combined + Ranges.insert(AddressRange(0x2000, 0x3000)); + EXPECT_EQ(Ranges.size(), 2u); + EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x2000)); + EXPECT_EQ(Ranges[1], AddressRange(0x2000, 0x3000)); + // Verify if we add an address range that intersects two ranges + // that they get combined + Ranges.insert(AddressRange(Ranges[0].endAddress()-1, + Ranges[1].startAddress()+1)); + EXPECT_EQ(Ranges.size(), 1u); + EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x3000)); + + +} + +TEST(GSYMTest, TestStringTable) { + StringTable StrTab(StringRef("\0Hello\0World\0", 13)); + // Test extracting strings from a string table. + EXPECT_EQ(StrTab.getString(0), ""); + EXPECT_EQ(StrTab.getString(1), "Hello"); + EXPECT_EQ(StrTab.getString(7), "World"); + EXPECT_EQ(StrTab.getString(8), "orld"); + // Test pointing to last NULL terminator gets empty string. + EXPECT_EQ(StrTab.getString(12), ""); + // Test pointing to past end gets empty string. + EXPECT_EQ(StrTab.getString(13), ""); +}