From a5e9c530d606b0d5f3d4dea3d8356b1ee5804252 Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Tue, 3 May 2016 22:18:17 +0000
Subject: [PATCH] Move CodeViewTypeStream to DebugInfo/CodeView

Ability to parse codeview type streams is also needed by
DebugInfoPDB for parsing PDBs, so moving this into a library
gives us this option.  Since DebugInfoPDB had already hand
rolled some code to do this, that code is now convereted over
to using this common abstraction.

Differential Revision: http://reviews.llvm.org/D19887
Reviewed By: dblaikie, amccarth

llvm-svn: 268454
---
 include/llvm/DebugInfo/CodeView/TypeStream.h | 130 ++++++++++++++++
 include/llvm/DebugInfo/PDB/Raw/TpiStream.h   |  10 +-
 lib/DebugInfo/PDB/Raw/TpiStream.cpp          |  25 +--
 tools/llvm-pdbdump/llvm-pdbdump.cpp          |  27 ++--
 tools/llvm-readobj/COFFDumper.cpp            | 151 ++-----------------
 5 files changed, 160 insertions(+), 183 deletions(-)
 create mode 100644 include/llvm/DebugInfo/CodeView/TypeStream.h
diff --git a/include/llvm/DebugInfo/CodeView/TypeStream.h b/include/llvm/DebugInfo/CodeView/TypeStream.h
new file mode 100644
index 00000000000..99e2af6dc7f
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/TypeStream.h
@@ -0,0 +1,130 @@
+//===- TypeStream.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESTREAM_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPESTREAM_H
+
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorOr.h"
+
+#include <stdint.h>
+
+namespace llvm {
+namespace codeview {
+
+/// Consumes sizeof(T) bytes from the given byte sequence. Returns an error if
+/// there are not enough bytes remaining. Reinterprets the consumed bytes as a
+/// T object and points 'Res' at them.
+template <typename T>
+inline std::error_code consumeObject(StringRef &Data, const T *&Res) {
+  if (Data.size() < sizeof(*Res))
+    return object::object_error::parse_failed;
+  Res = reinterpret_cast<const T *>(Data.data());
+  Data = Data.drop_front(sizeof(*Res));
+  return std::error_code();
+}
+
+inline std::error_code consumeUInt32(StringRef &Data, uint32_t &Res) {
+  const support::ulittle32_t *IntPtr;
+  if (auto EC = consumeObject(Data, IntPtr))
+    return EC;
+  Res = *IntPtr;
+  return std::error_code();
+}
+
+// A const input iterator interface to the CodeView type stream.
+class TypeIterator {
+public:
+  struct TypeRecord {
+    std::size_t Length;
+    TypeLeafKind Leaf;
+    StringRef LeafData;
+  };
+
+  explicit TypeIterator(const StringRef &SectionData)
+      : Data(SectionData), AtEnd(false) {
+    next(); // Prime the pump
+  }
+
+  TypeIterator() : AtEnd(true) {}
+
+  // For iterators to compare equal, they must both point at the same record
+  // in the same data stream, or they must both be at the end of a stream.
+  friend bool operator==(const TypeIterator &lhs, const TypeIterator &rhs) {
+    return (lhs.Data.begin() == rhs.Data.begin()) || (lhs.AtEnd && rhs.AtEnd);
+  }
+
+  friend bool operator!=(const TypeIterator &lhs, const TypeIterator &rhs) {
+    return !(lhs == rhs);
+  }
+
+  const TypeRecord &operator*() const {
+    assert(!AtEnd);
+    return Current;
+  }
+
+  const TypeRecord *operator->() const {
+    assert(!AtEnd);
+    return &Current;
+  }
+
+  TypeIterator operator++() {
+    next();
+    return *this;
+  }
+
+  TypeIterator operator++(int) {
+    TypeIterator Original = *this;
+    ++*this;
+    return Original;
+  }
+
+private:
+  void next() {
+    assert(!AtEnd && "Attempted to advance more than one past the last rec");
+    if (Data.empty()) {
+      // We've advanced past the last record.
+      AtEnd = true;
+      return;
+    }
+
+    const TypeRecordPrefix *Rec;
+    if (consumeObject(Data, Rec))
+      return;
+    Current.Length = Rec->Len;
+    Current.Leaf = static_cast<TypeLeafKind>(uint16_t(Rec->Leaf));
+    Current.LeafData = Data.substr(0, Current.Length - 2);
+
+    // The next record starts immediately after this one.
+    Data = Data.drop_front(Current.LeafData.size());
+
+    // FIXME: The stream contains LF_PAD bytes that we need to ignore, but those
+    // are typically included in LeafData. We may need to call skipPadding() if
+    // we ever find a record that doesn't count those bytes.
+
+    return;
+  }
+
+  StringRef Data;
+  TypeRecord Current;
+  bool AtEnd;
+};
+
+inline iterator_range<TypeIterator> makeTypeRange(StringRef Data) {
+  return make_range(TypeIterator(Data), TypeIterator());
+}
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/PDB/Raw/TpiStream.h b/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
index f40ef373882..1da9c657219 100644
--- a/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
+++ b/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
 #define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
 
+#include "llvm/DebugInfo/CodeView/TypeStream.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/DebugInfo/PDB/Raw/ByteStream.h"
 #include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
@@ -25,12 +26,6 @@ class TpiStream {
   struct HeaderInfo;
 
 public:
-  struct HashedTypeRecord {
-    uint32_t Hash;
-    codeview::TypeLeafKind Kind;
-    ArrayRef<uint8_t> Record;
-  };
-
   TpiStream(PDBFile &File);
   ~TpiStream();
   std::error_code reload();
@@ -41,7 +36,7 @@ public:
   uint32_t TypeIndexEnd() const;
   uint32_t NumTypeRecords() const;
 
-  ArrayRef<HashedTypeRecord> records() const;
+  iterator_range<codeview::TypeIterator> types() const;
 
 private:
   PDBFile &Pdb;
@@ -53,7 +48,6 @@ private:
   ByteStream HashValuesBuffer;
   ByteStream HashAdjBuffer;
 
-  std::vector<HashedTypeRecord> TypeRecords;
   std::unique_ptr<HeaderInfo> Header;
 };
 }
diff --git a/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/lib/DebugInfo/PDB/Raw/TpiStream.cpp
index 7ee4c60789b..703ae5d7183 100644
--- a/lib/DebugInfo/PDB/Raw/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Raw/TpiStream.cpp
@@ -88,25 +88,6 @@ std::error_code TpiStream::reload() {
 
   // The actual type records themselves come from this stream
   RecordsBuffer.initialize(Reader, Header->TypeRecordBytes);
-  TypeRecords.resize(TypeIndexEnd() - ::MinTypeIndex);
-  StreamReader RecordsReader(RecordsBuffer);
-  for (uint32_t I = TypeIndexBegin(); I < TypeIndexEnd(); ++I) {
-    HashedTypeRecord &Record = TypeRecords[I - ::MinTypeIndex];
-    codeview::TypeRecordPrefix Prefix;
-    if (auto EC = RecordsReader.readObject(&Prefix))
-      return EC;
-
-    Record.Kind =
-        static_cast<codeview::TypeLeafKind>(static_cast<uint16_t>(Prefix.Leaf));
-
-    // Since we read this entire buffer into a ByteStream, we are guaranteed
-    // that the entire buffer is contiguous (i.e. there's no longer a chance
-    // that it splits across a page boundary.  So we can request a reference
-    // directly into the stream buffer to avoid unnecessary memory copies.
-    uint32_t RecordSize = Prefix.Len - sizeof(Prefix.Leaf);
-    if (auto EC = RecordsReader.getArrayRef(Record.Record, RecordSize))
-      return EC;
-  }
 
   // Hash indices, hash values, etc come from the hash stream.
   MappedBlockStream HS(Header->HashStreamIndex, Pdb);
@@ -136,8 +117,6 @@ uint32_t TpiStream::NumTypeRecords() const {
   return TypeIndexEnd() - TypeIndexBegin();
 }
 
-ArrayRef<TpiStream::HashedTypeRecord> TpiStream::records() const {
-  const HashedTypeRecord *Begin =
-      &TypeRecords[TypeIndexBegin() - ::MinTypeIndex];
-  return ArrayRef<HashedTypeRecord>(Begin, NumTypeRecords());
+iterator_range<codeview::TypeIterator> TpiStream::types() const {
+  return codeview::makeTypeRange(RecordsBuffer.str());
 }
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp
index b58ed8f143e..8fb1d87d14f 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp
@@ -149,27 +149,24 @@ cl::opt<bool> NoEnumDefs("no-enum-definitions",
                          cl::cat(FilterCategory));
 }
 
-static void dumpBytes(raw_ostream &S, ArrayRef<uint8_t> Bytes,
-                      uint32_t BytesPerRow, uint32_t Indent) {
+static void dumpBytes(raw_ostream &S, StringRef Bytes, uint32_t BytesPerRow,
+                      uint32_t Indent) {
   S << "[";
-  uint32_t I = 0;
 
-  uint32_t BytesRemaining = Bytes.size();
-  while (BytesRemaining > 0) {
-    uint32_t BytesThisLine = std::min(BytesRemaining, BytesPerRow);
-    for (size_t L = 0; L < BytesThisLine; ++L, ++I) {
-      S << format_hex_no_prefix(Bytes[I], 2, true);
-      if (L + 1 < BytesThisLine)
+  while (!Bytes.empty()) {
+    uint32_t BytesThisLine = std::min(Bytes.size(), BytesPerRow);
+    while (BytesThisLine > 0) {
+      S << format_hex_no_prefix(uint8_t(Bytes.front()), 2, true);
+      Bytes = Bytes.drop_front();
+      if (--BytesThisLine > 0)
         S << ' ';
     }
-    BytesRemaining -= BytesThisLine;
-    if (BytesRemaining > 0) {
+    if (!Bytes.empty()) {
       S << '\n';
       S.indent(Indent);
     }
   }
   S << ']';
-  S.flush();
 }
 
 static void dumpStructure(RawSession &RS) {
@@ -321,10 +318,10 @@ static void dumpStructure(RawSession &RS) {
   TpiStream &Tpi = File.getPDBTpiStream();
   outs() << "TPI Version: " << Tpi.getTpiVersion() << '\n';
   outs() << "Record count: " << Tpi.NumTypeRecords() << '\n';
-  for (auto &Record : Tpi.records()) {
-    outs().indent(2) << "Kind: 0x" << Record.Kind;
+  for (auto &Type : Tpi.types()) {
+    outs().indent(2) << "Kind: 0x" << Type.Leaf;
     outs().indent(2) << "Bytes: ";
-    dumpBytes(outs(), Record.Record, 16, 24);
+    dumpBytes(outs(), Type.LeafData, 16, 24);
     outs() << '\n';
   }
 }
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index e8acc1acea7..08d180c85e2 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -28,6 +28,7 @@
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeStream.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/COFF.h"
@@ -958,26 +959,6 @@ void COFFDumper::printCodeViewDebugInfo() {
   }
 }
 
-/// Consumes sizeof(T) bytes from the given byte sequence. Returns an error if
-/// there are not enough bytes remaining. Reinterprets the consumed bytes as a
-/// T object and points 'Res' at them.
-template <typename T>
-static std::error_code consumeObject(StringRef &Data, const T *&Res) {
-  if (Data.size() < sizeof(*Res))
-    return object_error::parse_failed;
-  Res = reinterpret_cast<const T *>(Data.data());
-  Data = Data.drop_front(sizeof(*Res));
-  return std::error_code();
-}
-
-static std::error_code consumeUInt32(StringRef &Data, uint32_t &Res) {
-  const ulittle32_t *IntPtr;
-  if (auto EC = consumeObject(Data, IntPtr))
-    return EC;
-  Res = *IntPtr;
-  return std::error_code();
-}
-
 void COFFDumper::initializeFileAndStringTables(StringRef Data) {
   while (!Data.empty() && (CVFileChecksumTable.data() == nullptr ||
                            CVStringTable.data() == nullptr)) {
@@ -1964,113 +1945,6 @@ static StringRef getLeafTypeName(TypeLeafKind LT) {
   return "UnknownLeaf";
 }
 
-// A const input iterator interface to the CodeView type stream.
-class CodeViewTypeIterator {
-public:
-  struct TypeRecord {
-    std::size_t Length;
-    TypeLeafKind Leaf;
-    StringRef LeafData;
-  };
-
-  explicit CodeViewTypeIterator(const StringRef &SectionData)
-      : Data(SectionData), AtEnd(false) {
-    if (Data.size() >= 4) {
-      Magic = *reinterpret_cast<const ulittle32_t *>(Data.data());
-      Data = Data.drop_front(4);
-    }
-    next(); // Prime the pump
-  }
-
-  CodeViewTypeIterator() : AtEnd(true) {}
-
-  // For iterators to compare equal, they must both point at the same record
-  // in the same data stream, or they must both be at the end of a stream.
-  friend bool operator==(const CodeViewTypeIterator &lhs,
-                         const CodeViewTypeIterator &rhs);
-
-  friend bool operator!=(const CodeViewTypeIterator &lhs,
-                         const CodeViewTypeIterator &rhs);
-
-  unsigned getMagic() const { return Magic; }
-
-  const TypeRecord &operator*() const {
-    assert(!AtEnd);
-    return Current;
-  }
-
-  const TypeRecord *operator->() const {
-    assert(!AtEnd);
-    return &Current;
-  }
-
-  CodeViewTypeIterator operator++() {
-    next();
-    return *this;
-  }
-
-  CodeViewTypeIterator operator++(int) {
-    CodeViewTypeIterator Original = *this;
-    ++*this;
-    return Original;
-  }
-
-private:
-  void next() {
-    assert(!AtEnd && "Attempted to advance more than one past the last rec");
-    if (Data.empty()) {
-      // We've advanced past the last record.
-      AtEnd = true;
-      return;
-    }
-
-    const TypeRecordPrefix *Rec;
-    if (consumeObject(Data, Rec))
-      return;
-    Current.Length = Rec->Len;
-    Current.Leaf = static_cast<TypeLeafKind>(uint16_t(Rec->Leaf));
-    Current.LeafData = Data.substr(0, Current.Length - 2);
-
-    // The next record starts immediately after this one.
-    Data = Data.drop_front(Current.LeafData.size());
-
-    // FIXME: The stream contains LF_PAD bytes that we need to ignore, but those
-    // are typically included in LeafData. We may need to call skipPadding() if
-    // we ever find a record that doesn't count those bytes.
-
-    return;
-  }
-
-  StringRef Data;
-  unsigned Magic = 0;
-  TypeRecord Current;
-  bool AtEnd;
-};
-
-bool operator==(const CodeViewTypeIterator &lhs,
-                const CodeViewTypeIterator &rhs) {
-  return (lhs.Data.begin() == rhs.Data.begin()) || (lhs.AtEnd && rhs.AtEnd);
-}
-
-bool operator!=(const CodeViewTypeIterator &lhs,
-                const CodeViewTypeIterator &rhs) {
-  return !(lhs == rhs);
-}
-
-struct CodeViewTypeStream {
-  CodeViewTypeIterator begin;
-  CodeViewTypeIterator end;
-  unsigned Magic;
-};
-
-CodeViewTypeStream CreateCodeViewTypeIter(const StringRef &Data) {
-  CodeViewTypeStream Stream;
-  Stream.begin = CodeViewTypeIterator(Data);
-  Stream.end   = CodeViewTypeIterator();
-  Stream.Magic = Stream.begin.getMagic();
-
-  return Stream;
-}
 
 void COFFDumper::printCodeViewTypeSection(StringRef SectionName,
                                           const SectionRef &Section) {
@@ -2081,31 +1955,34 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName,
   error(Section.getContents(Data));
   if (opts::CodeViewSubsectionBytes)
     W.printBinaryBlock("Data", Data);
-
   CVTD.dump(Data);
 }
 
 void CVTypeDumper::dump(StringRef Data) {
-  CodeViewTypeStream Stream = CreateCodeViewTypeIter(Data);
-  W.printHex("Magic", Stream.Magic);
+  uint32_t Magic;
+  if (consumeUInt32(Data, Magic))
+    return;
+  if (Magic != COFF::DEBUG_SECTION_MAGIC)
+    return;
 
-  for (auto Iter = Stream.begin; Iter != Stream.end; ++Iter) {
-    StringRef LeafData = Iter->LeafData;
+  W.printHex("Magic", Magic);
+  for (const auto &Record : makeTypeRange(Data)) {
+    StringRef LeafData = Record.LeafData;
 
     // Find the name of this leaf type.
-    StringRef LeafName = getLeafTypeName(Iter->Leaf);
+    StringRef LeafName = getLeafTypeName(Record.Leaf);
     DictScope S(W, LeafName);
     unsigned NextTypeIndex = 0x1000 + CVUDTNames.size();
-    W.printEnum("TypeLeafKind", unsigned(Iter->Leaf),
+    W.printEnum("TypeLeafKind", unsigned(Record.Leaf),
                 makeArrayRef(LeafTypeNames));
     W.printHex("TypeIndex", NextTypeIndex);
 
     // Fill this in inside the switch to get something in CVUDTNames.
     StringRef Name;
 
-    switch (Iter->Leaf) {
+    switch (Record.Leaf) {
     default: {
-      W.printHex("Size", Iter->Length);
+      W.printHex("Size", Record.Length);
       break;
     }
 
@@ -2121,7 +1998,7 @@ void CVTypeDumper::dump(StringRef Data) {
     }
 
     case LF_FIELDLIST: {
-      W.printHex("Size", Iter->Length);
+      W.printHex("Size", Record.Length);
       // FieldList has no fixed prefix that can be described with a struct. All
       // the bytes must be interpreted as more records.
       printCodeViewFieldList(LeafData);