[PDB] Emit index/offset pairs for TPI and IPI streams

Summary: This lets PDB readers lookup type record data by type index in O(log n) time. It also enables makes `cvdump -t` work on PDBs produced by LLD. cvdump will not dump a PDB that doesn't have an index-to-offset table. The table is sorted by type index, and has an entry every 8KB. Looking up a type record by index is a binary search of this table, followed by a scan of at most 8KB. Reviewers: ruiu, zturner, inglorion Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D31636 llvm-svn: 299958
2024-11-24 11:42:57 +01:00 · 2017-04-11 16:26:15 +00:00 · 2017-04-11 16:26:15 +00:00 · ce574c8dd0
commit ce574c8dd0
parent 8663bd080f
4 changed files with 128 additions and 21 deletions
--- a/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
@ -13,6 +13,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/BinaryByteStream.h"
 #include "llvm/Support/BinaryItemStream.h"
@ -63,6 +64,7 @@ public:

 private:
  uint32_t calculateHashBufferSize() const;
+  uint32_t calculateIndexOffsetSize() const;
  Error finalize();

  msf::MSFBuilder &Msf;
@ -73,6 +75,7 @@ private:
  Optional<PdbRaw_TpiVer> VerHeader;
  std::vector<ArrayRef<uint8_t>> TypeRecords;
  std::vector<uint32_t> TypeHashes;
+  std::vector<TypeIndexOffset> TypeIndexOffsets;
  uint32_t HashStreamIndex = kInvalidStreamIndex;
  std::unique_ptr<BinaryByteStream> HashValueStream;

--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@ -95,9 +95,10 @@ Error TpiStream::reload() {
        Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex);
    BinaryStreamReader HSR(*HS);

+    // There should be a hash value for every type record, or no hashes at all.
    uint32_t NumHashValues =
        Header->HashValueBuffer.Length / sizeof(ulittle32_t);
-    if (NumHashValues != NumTypeRecords())
+    if (NumHashValues != NumTypeRecords() && NumHashValues != 0)
      return make_error<RawError>(
          raw_error_code::corrupt_file,
          "TPI hash count does not match with the number of type records.");
@ -124,8 +125,9 @@ Error TpiStream::reload() {

    // TPI hash table is a parallel array for the type records.
    // Verify that the hash values match with type records.
-    if (auto EC = verifyHashValues())
-      return EC;
+    if (NumHashValues > 0)
+      if (auto EC = verifyHashValues())
+        return EC;
  }

  return Error::success();
--- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@ -45,7 +45,17 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) {

 void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record,
                                     Optional<uint32_t> Hash) {
-  TypeRecordBytes += Record.size();
+  // If we just crossed an 8KB threshold, add a type index offset.
+  size_t NewSize = TypeRecordBytes + Record.size();
+  constexpr size_t EightKB = 8 * 1024;
+  if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) {
+    TypeIndexOffsets.push_back(
+        {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex +
+                             TypeRecords.size()),
+         ulittle32_t(TypeRecordBytes)});
+  }
+  TypeRecordBytes = NewSize;
+
  TypeRecords.push_back(Record);
  if (Hash)
    TypeHashes.push_back(*Hash);
@ -58,7 +68,6 @@ Error TpiStreamBuilder::finalize() {
  TpiStreamHeader *H = Allocator.Allocate<TpiStreamHeader>();

  uint32_t Count = TypeRecords.size();
-  uint32_t HashBufferSize = calculateHashBufferSize();

  H->Version = *VerHeader;
  H->HeaderSize = sizeof(TpiStreamHeader);
@ -75,11 +84,15 @@ Error TpiStreamBuilder::finalize() {
  // the `HashStreamIndex` field of the `TpiStreamHeader`.  Therefore, the data
  // begins at offset 0 of this independent stream.
  H->HashValueBuffer.Off = 0;
-  H->HashValueBuffer.Length = HashBufferSize;
+  H->HashValueBuffer.Length = calculateHashBufferSize();
+
+  // We never write any adjustments into our PDBs, so this is usually some
+  // offset with zero length.
  H->HashAdjBuffer.Off = H->HashValueBuffer.Off + H->HashValueBuffer.Length;
  H->HashAdjBuffer.Length = 0;
+
  H->IndexOffsetBuffer.Off = H->HashAdjBuffer.Off + H->HashAdjBuffer.Length;
-  H->IndexOffsetBuffer.Length = 0;
+  H->IndexOffsetBuffer.Length = calculateIndexOffsetSize();

  Header = H;
  return Error::success();
@ -90,34 +103,42 @@ uint32_t TpiStreamBuilder::calculateSerializedLength() {
 }

 uint32_t TpiStreamBuilder::calculateHashBufferSize() const {
-  assert(TypeHashes.size() == TypeHashes.size() &&
+  assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) &&
         "either all or no type records should have hashes");
  return TypeHashes.size() * sizeof(ulittle32_t);
 }

+uint32_t TpiStreamBuilder::calculateIndexOffsetSize() const {
+  return TypeIndexOffsets.size() * sizeof(TypeIndexOffset);
+}
+
 Error TpiStreamBuilder::finalizeMsfLayout() {
  uint32_t Length = calculateSerializedLength();
  if (auto EC = Msf.setStreamSize(Idx, Length))
    return EC;

-  uint32_t HashBufferSize = calculateHashBufferSize();
+  uint32_t HashStreamSize =
+      calculateHashBufferSize() + calculateIndexOffsetSize();

-  if (HashBufferSize == 0)
+  if (HashStreamSize == 0)
    return Error::success();

-  auto ExpectedIndex = Msf.addStream(HashBufferSize);
+  auto ExpectedIndex = Msf.addStream(HashStreamSize);
  if (!ExpectedIndex)
    return ExpectedIndex.takeError();
  HashStreamIndex = *ExpectedIndex;
-  ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeHashes.size());
-  MutableArrayRef<ulittle32_t> HashBuffer(H, TypeHashes.size());
-  for (uint32_t I = 0; I < TypeHashes.size(); ++I) {
-    HashBuffer[I] = TypeHashes[I] % MinTpiHashBuckets;
+  if (!TypeHashes.empty()) {
+    ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeHashes.size());
+    MutableArrayRef<ulittle32_t> HashBuffer(H, TypeHashes.size());
+    for (uint32_t I = 0; I < TypeHashes.size(); ++I) {
+      HashBuffer[I] = TypeHashes[I] % MinTpiHashBuckets;
+    }
+    ArrayRef<uint8_t> Bytes(
+        reinterpret_cast<const uint8_t *>(HashBuffer.data()),
+        calculateHashBufferSize());
+    HashValueStream =
+        llvm::make_unique<BinaryByteStream>(Bytes, llvm::support::little);
  }
-  ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(HashBuffer.data()),
-                          HashBufferSize);
-  HashValueStream =
-      llvm::make_unique<BinaryByteStream>(Bytes, llvm::support::little);
  return Error::success();
 }

@ -141,8 +162,15 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
    auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
                                                              HashStreamIndex);
    BinaryStreamWriter HW(*HVS);
-    if (auto EC = HW.writeStreamRef(*HashValueStream))
-      return EC;
+    if (HashValueStream) {
+      if (auto EC = HW.writeStreamRef(*HashValueStream))
+        return EC;
+    }
+
+    for (auto &IndexOffset : TypeIndexOffsets) {
+      if (auto EC = HW.writeObject(IndexOffset))
+        return EC;
+    }
  }

  return Error::success();
--- a/test/DebugInfo/PDB/pdb-yaml-types.test
+++ b/test/DebugInfo/PDB/pdb-yaml-types.test
@ -0,0 +1,74 @@
+RUN: llvm-pdbdump pdb2yaml -tpi-stream %p/Inputs/big-read.pdb > %t.yaml
+RUN: FileCheck -check-prefix=YAML %s < %t.yaml
+RUN: llvm-pdbdump yaml2pdb %t.yaml -pdb %t.pdb
+RUN: llvm-pdbdump raw -tpi-records %t.pdb | FileCheck %s --check-prefix=PDB
+
+Only verify the beginning of the type stream.
+
+YAML:      TpiStream:       
+YAML-NEXT:   Version:         VC80
+YAML-NEXT:   Records:         
+YAML-NEXT:     - Kind:            LF_ARGLIST
+YAML-NEXT:       ArgList:         
+YAML-NEXT:         ArgIndices:      [  ]
+YAML-NEXT:     - Kind:            LF_PROCEDURE
+YAML-NEXT:       Procedure:       
+YAML-NEXT:         ReturnType:      3
+YAML-NEXT:         CallConv:        NearC
+YAML-NEXT:         Options:         [ None ]
+YAML-NEXT:         ParameterCount:  0
+YAML-NEXT:         ArgumentList:    4096
+YAML-NEXT:     - Kind:            LF_PROCEDURE
+YAML-NEXT:       Procedure:       
+YAML-NEXT:         ReturnType:      116
+YAML-NEXT:         CallConv:        NearC
+YAML-NEXT:         Options:         [ None ]
+YAML-NEXT:         ParameterCount:  0
+YAML-NEXT:         ArgumentList:    4096
+
+This test is mostly checking to make sure we include the type index offset
+table, and eventually hash codes. The type index offsets should be similar to
+what are already present in big-read.pdb.
+
+PDB:      Type Info Stream (TPI) {
+PDB-NEXT:   TPI Version: 20040203
+PDB-NEXT:   Record count: 728
+PDB-NEXT:   Records [
+PDB-NEXT:     {
+PDB-NEXT:       ArgList (0x1000) {
+PDB-NEXT:         TypeLeafKind: LF_ARGLIST (0x1201)
+PDB-NEXT:         NumArgs: 0
+PDB-NEXT:         Arguments [
+PDB-NEXT:         ]
+PDB-NEXT:       }
+PDB-NEXT:     }
+PDB-NEXT:     {
+PDB-NEXT:       Procedure (0x1001) {
+PDB-NEXT:         TypeLeafKind: LF_PROCEDURE (0x1008)
+PDB-NEXT:         ReturnType: void (0x3)
+PDB-NEXT:         CallingConvention: NearC (0x0)
+PDB-NEXT:         FunctionOptions [ (0x0)
+PDB-NEXT:         ]
+PDB-NEXT:         NumParameters: 0
+PDB-NEXT:         ArgListType: () (0x1000)
+PDB-NEXT:       }
+PDB-NEXT:     }
+PDB-NEXT:     {
+PDB-NEXT:       Procedure (0x1002) {
+PDB-NEXT:         TypeLeafKind: LF_PROCEDURE (0x1008)
+PDB-NEXT:         ReturnType: int (0x74)
+PDB-NEXT:         CallingConvention: NearC (0x0)
+PDB-NEXT:         FunctionOptions [ (0x0)
+PDB-NEXT:         ]
+PDB-NEXT:         NumParameters: 0
+PDB-NEXT:         ArgListType: () (0x1000)
+PDB-NEXT:       }
+PDB-NEXT:     }
+...
+PDB:          TypeIndexOffsets [
+PDB-NEXT:       Index: 0x1000, Offset: 0
+PDB-NEXT:       Index: 0x106c, Offset: 8,116
+PDB-NEXT:       Index: 0x1118, Offset: 16,372
+PDB-NEXT:       Index: 0x11df, Offset: 24,564
+PDB-NEXT:       Index: 0x128e, Offset: 32,752
+PDB-NEXT:     ]