2017-01-20 00:31:24 +01:00
|
|
|
//===- llvm/unittest/DebugInfo/PDB/HashTableTest.cpp ----------------------===//
|
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-01-20 00:31:24 +01:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-01-25 23:48:57 +01:00
|
|
|
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
|
2018-03-15 18:38:26 +01:00
|
|
|
|
|
|
|
#include "llvm/DebugInfo/PDB/Native/Hash.h"
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
|
2018-03-15 18:38:26 +01:00
|
|
|
#include "llvm/Support/Allocator.h"
|
2017-03-02 21:52:51 +01:00
|
|
|
#include "llvm/Support/BinaryByteStream.h"
|
|
|
|
#include "llvm/Support/BinaryStreamReader.h"
|
|
|
|
#include "llvm/Support/BinaryStreamWriter.h"
|
2018-03-15 18:38:26 +01:00
|
|
|
#include "llvm/Support/StringSaver.h"
|
2017-06-14 18:41:50 +02:00
|
|
|
#include "llvm/Testing/Support/Error.h"
|
|
|
|
|
|
|
|
#include "gtest/gtest.h"
|
2017-01-20 00:31:24 +01:00
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::pdb;
|
2017-02-28 01:04:07 +01:00
|
|
|
using namespace llvm::support;
|
2017-01-20 00:31:24 +01:00
|
|
|
|
|
|
|
namespace {
|
2018-03-15 18:38:26 +01:00
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
struct IdentityHashTraits {
|
|
|
|
uint32_t hashLookupKey(uint32_t N) const { return N; }
|
|
|
|
uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
|
|
|
|
uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class T = uint32_t>
|
|
|
|
class HashTableInternals : public HashTable<T> {
|
2017-01-20 00:31:24 +01:00
|
|
|
public:
|
2019-07-13 01:30:55 +02:00
|
|
|
using HashTable<T>::Buckets;
|
|
|
|
using HashTable<T>::Present;
|
|
|
|
using HashTable<T>::Deleted;
|
2017-01-20 00:31:24 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTableTest, TestSimple) {
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<> Table;
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(0u, Table.size());
|
|
|
|
EXPECT_GT(Table.capacity(), 0u);
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
IdentityHashTraits Traits;
|
|
|
|
Table.set_as(3u, 7, Traits);
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(1u, Table.size());
|
2019-07-13 01:30:55 +02:00
|
|
|
ASSERT_NE(Table.end(), Table.find_as(3u, Traits));
|
|
|
|
EXPECT_EQ(7u, Table.get(3u, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTableTest, TestCollision) {
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<> Table;
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(0u, Table.size());
|
|
|
|
EXPECT_GT(Table.capacity(), 0u);
|
|
|
|
|
|
|
|
// We use knowledge of the hash table's implementation details to make sure
|
|
|
|
// to add another value that is the equivalent to the first value modulo the
|
|
|
|
// hash table's capacity.
|
|
|
|
uint32_t N1 = Table.capacity() + 1;
|
|
|
|
uint32_t N2 = 2 * N1;
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
IdentityHashTraits Traits;
|
|
|
|
Table.set_as(N1, 7, Traits);
|
|
|
|
Table.set_as(N2, 12, Traits);
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(2u, Table.size());
|
2019-07-13 01:30:55 +02:00
|
|
|
ASSERT_NE(Table.end(), Table.find_as(N1, Traits));
|
|
|
|
ASSERT_NE(Table.end(), Table.find_as(N2, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
EXPECT_EQ(7u, Table.get(N1, Traits));
|
|
|
|
EXPECT_EQ(12u, Table.get(N2, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTableTest, TestRemove) {
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<> Table;
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(0u, Table.size());
|
|
|
|
EXPECT_GT(Table.capacity(), 0u);
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
IdentityHashTraits Traits;
|
|
|
|
Table.set_as(1u, 2, Traits);
|
|
|
|
Table.set_as(3u, 4, Traits);
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(2u, Table.size());
|
2019-07-13 01:30:55 +02:00
|
|
|
ASSERT_NE(Table.end(), Table.find_as(1u, Traits));
|
|
|
|
ASSERT_NE(Table.end(), Table.find_as(3u, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
EXPECT_EQ(2u, Table.get(1u, Traits));
|
|
|
|
EXPECT_EQ(4u, Table.get(3u, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTableTest, TestCollisionAfterMultipleProbes) {
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<> Table;
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(0u, Table.size());
|
|
|
|
EXPECT_GT(Table.capacity(), 0u);
|
|
|
|
|
|
|
|
// Probing looks for the first available slot. A slot may already be filled
|
|
|
|
// as a result of an item with a *different* hash value already being there.
|
|
|
|
// Test that when this happens, the probe still finds the value.
|
|
|
|
uint32_t N1 = Table.capacity() + 1;
|
|
|
|
uint32_t N2 = N1 + 1;
|
|
|
|
uint32_t N3 = 2 * N1;
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
IdentityHashTraits Traits;
|
|
|
|
Table.set_as(N1, 7, Traits);
|
|
|
|
Table.set_as(N2, 11, Traits);
|
|
|
|
Table.set_as(N3, 13, Traits);
|
2017-01-20 00:31:24 +01:00
|
|
|
EXPECT_EQ(3u, Table.size());
|
2019-07-13 01:30:55 +02:00
|
|
|
ASSERT_NE(Table.end(), Table.find_as(N1, Traits));
|
|
|
|
ASSERT_NE(Table.end(), Table.find_as(N2, Traits));
|
|
|
|
ASSERT_NE(Table.end(), Table.find_as(N3, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
EXPECT_EQ(7u, Table.get(N1, Traits));
|
|
|
|
EXPECT_EQ(11u, Table.get(N2, Traits));
|
|
|
|
EXPECT_EQ(13u, Table.get(N3, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTableTest, Grow) {
|
|
|
|
// So that we are independent of the load factor, `capacity` items, which is
|
|
|
|
// guaranteed to trigger a grow. Then verify that the size is the same, the
|
|
|
|
// capacity is larger, and all the original items are still in the table.
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<> Table;
|
|
|
|
IdentityHashTraits Traits;
|
2017-01-20 00:31:24 +01:00
|
|
|
uint32_t OldCapacity = Table.capacity();
|
|
|
|
for (uint32_t I = 0; I < OldCapacity; ++I) {
|
2019-07-13 01:30:55 +02:00
|
|
|
Table.set_as(OldCapacity + I * 2 + 1, I * 2 + 3, Traits);
|
2017-01-20 00:31:24 +01:00
|
|
|
}
|
|
|
|
EXPECT_EQ(OldCapacity, Table.size());
|
|
|
|
EXPECT_GT(Table.capacity(), OldCapacity);
|
|
|
|
for (uint32_t I = 0; I < OldCapacity; ++I) {
|
2019-07-13 01:30:55 +02:00
|
|
|
ASSERT_NE(Table.end(), Table.find_as(OldCapacity + I * 2 + 1, Traits));
|
|
|
|
EXPECT_EQ(I * 2 + 3, Table.get(OldCapacity + I * 2 + 1, Traits));
|
2017-01-20 00:31:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTableTest, Serialization) {
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<> Table;
|
|
|
|
IdentityHashTraits Traits;
|
2017-01-20 00:31:24 +01:00
|
|
|
uint32_t Cap = Table.capacity();
|
|
|
|
for (uint32_t I = 0; I < Cap; ++I) {
|
2019-07-13 01:30:55 +02:00
|
|
|
Table.set_as(Cap + I * 2 + 1, I * 2 + 3, Traits);
|
2017-01-20 00:31:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<uint8_t> Buffer(Table.calculateSerializedLength());
|
2017-02-28 01:04:07 +01:00
|
|
|
MutableBinaryByteStream Stream(Buffer, little);
|
2017-02-27 23:11:43 +01:00
|
|
|
BinaryStreamWriter Writer(Stream);
|
2017-06-14 18:41:50 +02:00
|
|
|
EXPECT_THAT_ERROR(Table.commit(Writer), Succeeded());
|
2017-01-20 00:31:24 +01:00
|
|
|
// We should have written precisely the number of bytes we calculated earlier.
|
|
|
|
EXPECT_EQ(Buffer.size(), Writer.getOffset());
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<> Table2;
|
2017-02-27 23:11:43 +01:00
|
|
|
BinaryStreamReader Reader(Stream);
|
2017-06-14 18:41:50 +02:00
|
|
|
EXPECT_THAT_ERROR(Table2.load(Reader), Succeeded());
|
2017-01-20 00:31:24 +01:00
|
|
|
// We should have read precisely the number of bytes we calculated earlier.
|
|
|
|
EXPECT_EQ(Buffer.size(), Reader.getOffset());
|
|
|
|
|
|
|
|
EXPECT_EQ(Table.size(), Table2.size());
|
|
|
|
EXPECT_EQ(Table.capacity(), Table2.capacity());
|
|
|
|
EXPECT_EQ(Table.Buckets, Table2.Buckets);
|
|
|
|
EXPECT_EQ(Table.Present, Table2.Present);
|
|
|
|
EXPECT_EQ(Table.Deleted, Table2.Deleted);
|
|
|
|
}
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
|
|
|
|
TEST(HashTableTest, NamedStreamMap) {
|
|
|
|
std::vector<StringRef> Streams = {"One", "Two", "Three", "Four",
|
|
|
|
"Five", "Six", "Seven"};
|
|
|
|
StringMap<uint32_t> ExpectedIndices;
|
|
|
|
for (uint32_t I = 0; I < Streams.size(); ++I)
|
|
|
|
ExpectedIndices[Streams[I]] = I + 1;
|
|
|
|
|
|
|
|
// To verify the hash table actually works, we want to verify that insertion
|
|
|
|
// order doesn't matter. So try inserting in every possible order of 7 items.
|
|
|
|
do {
|
|
|
|
NamedStreamMap NSM;
|
|
|
|
for (StringRef S : Streams)
|
|
|
|
NSM.set(S, ExpectedIndices[S]);
|
|
|
|
|
|
|
|
EXPECT_EQ(Streams.size(), NSM.size());
|
|
|
|
|
|
|
|
uint32_t N;
|
|
|
|
EXPECT_TRUE(NSM.get("One", N));
|
2018-02-16 23:46:45 +01:00
|
|
|
EXPECT_EQ(1U, N);
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
|
|
|
|
EXPECT_TRUE(NSM.get("Two", N));
|
2018-02-16 23:46:45 +01:00
|
|
|
EXPECT_EQ(2U, N);
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
|
|
|
|
EXPECT_TRUE(NSM.get("Three", N));
|
2018-02-16 23:46:45 +01:00
|
|
|
EXPECT_EQ(3U, N);
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
|
|
|
|
EXPECT_TRUE(NSM.get("Four", N));
|
2018-02-16 23:46:45 +01:00
|
|
|
EXPECT_EQ(4U, N);
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
|
|
|
|
EXPECT_TRUE(NSM.get("Five", N));
|
2018-02-16 23:46:45 +01:00
|
|
|
EXPECT_EQ(5U, N);
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
|
|
|
|
EXPECT_TRUE(NSM.get("Six", N));
|
2018-02-16 23:46:45 +01:00
|
|
|
EXPECT_EQ(6U, N);
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
|
|
|
|
EXPECT_TRUE(NSM.get("Seven", N));
|
2018-02-16 23:46:45 +01:00
|
|
|
EXPECT_EQ(7U, N);
|
Fix emission of PDB string table.
This was originally reported as a bug with the symptom being "cvdump
crashes when printing an LLD-linked PDB that has an S_FILESTATIC record
in it". After some additional investigation, I determined that this was
a symptom of a larger problem, and in fact the real problem was in the
way we emitted the global PDB string table. As evidence of this, you can
take any lld-generated PDB, run cvdump -stringtable on it, and it would
return no results.
My hypothesis was that cvdump could not *find* the string table to begin
with. Normally it would do this by looking in the "named stream map",
finding the string /names, and using its value as the stream index. If
this lookup fails, then cvdump would fail to load the string table.
To test this hypothesis, I looked at the name stream map generated by a
link.exe PDB, and I emitted exactly those bytes into an LLD-generated
PDB. Suddenly, cvdump could read our string table!
This code has always been hacky and we knew there was something we
didn't understand. After all, there were some comments to the effect of
"we have to emit strings in a specific order, otherwise things don't
work". The key to fixing this was finally understanding this.
The way it works is that it makes use of a generic serializable hash map
that maps integers to other integers. In this case, the "key" is the
offset into a buffer, and the value is the stream number. If you index
into the buffer at the offset specified by a given key, you find the
name. The underlying cause of all these problems is that we were using
the identity function for the hash. i.e. if a string's offset in the
buffer was 12, the hash value was 12. Instead, we need to hash the
string *at that offset*. There is an additional catch, in that we have
to compute the hash as a uint32 and then truncate it to uint16.
Making this work is a little bit annoying, because we use the same hash
table in other places as well, and normally just using the identity
function for the hash function is actually what's desired. I'm not
totally happy with the template goo I came up with, but it works in any
case.
The reason we never found this bug through our own testing is because we
were building a /parallel/ hash table (in the form of an
llvm::StringMap<>) and doing all of our lookups and "real" hash table
work against that. I deleted all of that code and now everything goes
through the real hash table. Then, to test it, I added a unit test which
adds 7 strings and queries the associated values. I test every possible
insertion order permutation of these 7 strings, to verify that it really
does work as expected.
Differential Revision: https://reviews.llvm.org/D43326
llvm-svn: 325386
2018-02-16 21:46:04 +01:00
|
|
|
} while (std::next_permutation(Streams.begin(), Streams.end()));
|
|
|
|
}
|
2018-03-15 18:38:26 +01:00
|
|
|
|
|
|
|
struct FooBar {
|
|
|
|
uint32_t X;
|
|
|
|
uint32_t Y;
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
bool operator==(const FooBar &RHS) const {
|
|
|
|
return X == RHS.X && Y == RHS.Y;
|
|
|
|
}
|
|
|
|
};
|
2018-03-15 18:38:26 +01:00
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
struct FooBarHashTraits {
|
2018-03-15 18:38:26 +01:00
|
|
|
std::vector<char> Buffer;
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
FooBarHashTraits() { Buffer.push_back(0); }
|
2018-03-15 18:38:26 +01:00
|
|
|
|
|
|
|
uint32_t hashLookupKey(StringRef S) const {
|
|
|
|
return llvm::pdb::hashStringV1(S);
|
|
|
|
}
|
|
|
|
|
|
|
|
StringRef storageKeyToLookupKey(uint32_t N) const {
|
|
|
|
if (N >= Buffer.size())
|
|
|
|
return StringRef();
|
|
|
|
|
|
|
|
return StringRef(Buffer.data() + N);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t lookupKeyToStorageKey(StringRef S) {
|
|
|
|
uint32_t N = Buffer.size();
|
|
|
|
Buffer.insert(Buffer.end(), S.begin(), S.end());
|
|
|
|
Buffer.push_back('\0');
|
|
|
|
return N;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(HashTableTest, NonTrivialValueType) {
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<FooBar> Table;
|
|
|
|
FooBarHashTraits Traits;
|
2018-03-15 18:38:26 +01:00
|
|
|
uint32_t Cap = Table.capacity();
|
|
|
|
for (uint32_t I = 0; I < Cap; ++I) {
|
|
|
|
FooBar F;
|
|
|
|
F.X = I;
|
|
|
|
F.Y = I + 1;
|
2019-07-13 01:30:55 +02:00
|
|
|
Table.set_as(utostr(I), F, Traits);
|
2018-03-15 18:38:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<uint8_t> Buffer(Table.calculateSerializedLength());
|
|
|
|
MutableBinaryByteStream Stream(Buffer, little);
|
|
|
|
BinaryStreamWriter Writer(Stream);
|
|
|
|
EXPECT_THAT_ERROR(Table.commit(Writer), Succeeded());
|
|
|
|
// We should have written precisely the number of bytes we calculated earlier.
|
|
|
|
EXPECT_EQ(Buffer.size(), Writer.getOffset());
|
|
|
|
|
2019-07-13 01:30:55 +02:00
|
|
|
HashTableInternals<FooBar> Table2;
|
2018-03-15 18:38:26 +01:00
|
|
|
BinaryStreamReader Reader(Stream);
|
|
|
|
EXPECT_THAT_ERROR(Table2.load(Reader), Succeeded());
|
|
|
|
// We should have read precisely the number of bytes we calculated earlier.
|
|
|
|
EXPECT_EQ(Buffer.size(), Reader.getOffset());
|
|
|
|
|
|
|
|
EXPECT_EQ(Table.size(), Table2.size());
|
|
|
|
EXPECT_EQ(Table.capacity(), Table2.capacity());
|
2019-07-13 01:30:55 +02:00
|
|
|
EXPECT_EQ(Table.Buckets, Table2.Buckets);
|
|
|
|
EXPECT_EQ(Table.Present, Table2.Present);
|
|
|
|
EXPECT_EQ(Table.Deleted, Table2.Deleted);
|
2018-03-15 18:38:26 +01:00
|
|
|
}
|