Re-land: "[Support] Replace HashString with djbHash."

This patch removes the HashString function from StringExtraces and replaces its uses with calls to djbHash from DJB.h. This change is *almost* NFC. While the algorithm is identical, the djbHash implementation in StringExtras used 0 as its default seed while the implementation in DJB uses 5381. The latter has been shown to result in less collisions and improved avalanching and is used by the DWARF accelerator tables. Because some test were implicitly relying on the hash order, I've reverted to using zero as a seed for the following two files: lld/include/lld/Core/SymbolTable.h llvm/lib/Support/StringMap.cpp Differential revision: https://reviews.llvm.org/D43615 llvm-svn: 326091
2024-11-22 10:42:39 +01:00 · 2018-02-26 15:16:42 +00:00 · 2018-02-26 15:16:42 +00:00 · e115cbbdd0
commit e115cbbdd0
parent 3506872ca3
2 changed files with 20 additions and 32 deletions
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@ -232,19 +232,6 @@ void SplitString(StringRef Source,
                 SmallVectorImpl<StringRef> &OutFragments,
                 StringRef Delimiters = " \t\n\v\f\r");

-/// HashString - Hash function for strings.
-///
-/// This is the Bernstein hash function.
-//
-// FIXME: Investigate whether a modified bernstein hash function performs
-// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
-//   X*33+c -> X*33^c
-inline unsigned HashString(StringRef Str, unsigned Result = 0) {
-  for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i)
-    Result = Result * 33 + (unsigned char)Str[i];
-  return Result;
-}
-
 /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
 inline StringRef getOrdinalSuffix(unsigned Val) {
  // It is critically important that we do this perfectly for
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@ -14,6 +14,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>

@ -32,7 +33,7 @@ static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {

 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
  ItemSize = itemSize;
-  
+
  // If a size is specified, initialize the table with that many buckets.
  if (InitSize) {
    // The table will grow when the number of entries reach 3/4 of the number of
@ -41,7 +42,7 @@ StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
    init(getMinBucketToReserveForEntries(InitSize));
    return;
  }
-  
+
  // Otherwise, initialize it with zero buckets to avoid the allocation.
  TheTable = nullptr;
  NumBuckets = 0;
@ -56,7 +57,7 @@ void StringMapImpl::init(unsigned InitSize) {
  unsigned NewNumBuckets = InitSize ? InitSize : 16;
  NumItems = 0;
  NumTombstones = 0;
-  
+
  TheTable = static_cast<StringMapEntryBase **>(
      std::calloc(NewNumBuckets+1,
                  sizeof(StringMapEntryBase **) + sizeof(unsigned)));
@ -82,7 +83,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
    init(16);
    HTSize = NumBuckets;
  }
-  unsigned FullHashValue = HashString(Name);
+  unsigned FullHashValue = djbHash(Name, 0);
  unsigned BucketNo = FullHashValue & (HTSize-1);
  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);

@ -98,11 +99,11 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
        HashTable[FirstTombstone] = FullHashValue;
        return FirstTombstone;
      }
-      
+
      HashTable[BucketNo] = FullHashValue;
      return BucketNo;
    }
-    
+
    if (BucketItem == getTombstoneVal()) {
      // Skip over tombstones.  However, remember the first one we see.
      if (FirstTombstone == -1) FirstTombstone = BucketNo;
@ -111,7 +112,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
      // case here is that we are only looking at the buckets (for item info
      // being non-null and for the full hash value) not at the items.  This
      // is important for cache locality.
-      
+
      // Do the comparison like this because Name isn't necessarily
      // null-terminated!
      char *ItemStr = (char*)BucketItem+ItemSize;
@ -120,10 +121,10 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
        return BucketNo;
      }
    }
-    
+
    // Okay, we didn't find the item.  Probe to the next bucket.
    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
    // Use quadratic probing, it has fewer clumping artifacts than linear
    // probing and has good cache behavior in the common case.
    ++ProbeAmt;
@ -136,7 +137,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
 int StringMapImpl::FindKey(StringRef Key) const {
  unsigned HTSize = NumBuckets;
  if (HTSize == 0) return -1;  // Really empty table?
-  unsigned FullHashValue = HashString(Key);
+  unsigned FullHashValue = djbHash(Key, 0);
  unsigned BucketNo = FullHashValue & (HTSize-1);
  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);

@ -146,7 +147,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
    // If we found an empty bucket, this key isn't in the table yet, return.
    if (LLVM_LIKELY(!BucketItem))
      return -1;
-    
+
    if (BucketItem == getTombstoneVal()) {
      // Ignore tombstones.
    } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
@ -154,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
      // case here is that we are only looking at the buckets (for item info
      // being non-null and for the full hash value) not at the items.  This
      // is important for cache locality.
-      
+
      // Do the comparison like this because NameStart isn't necessarily
      // null-terminated!
      char *ItemStr = (char*)BucketItem+ItemSize;
@ -163,10 +164,10 @@ int StringMapImpl::FindKey(StringRef Key) const {
        return BucketNo;
      }
    }
-    
+
    // Okay, we didn't find the item.  Probe to the next bucket.
    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
    // Use quadratic probing, it has fewer clumping artifacts than linear
    // probing and has good cache behavior in the common case.
    ++ProbeAmt;
@ -187,7 +188,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
 StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
  int Bucket = FindKey(Key);
  if (Bucket == -1) return nullptr;
-  
+
  StringMapEntryBase *Result = TheTable[Bucket];
  TheTable[Bucket] = getTombstoneVal();
  --NumItems;
@ -241,13 +242,13 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
          NewBucketNo = NewBucket;
        continue;
      }
-      
+
      // Otherwise probe for a spot.
      unsigned ProbeSize = 1;
      do {
        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
      } while (NewTableArray[NewBucket]);
-      
+
      // Finally found a slot.  Fill it in.
      NewTableArray[NewBucket] = Bucket;
      NewHashArray[NewBucket] = FullHash;
@ -255,9 +256,9 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
        NewBucketNo = NewBucket;
    }
  }
-  
+
  free(TheTable);
-  
+
  TheTable = NewTableArray;
  NumBuckets = NewSize;
  NumTombstones = 0;