[Support] Replace HashString with djbHash.

This removes the HashString function from StringExtraces and replaces its uses with calls to djbHash from DJB.h This is *almost* NFC. While the algorithm is identical, the djbHash implementation in StringExtras used 0 as its seed while the implementation in DJB uses 5381. The latter has been shown to result in less collisions and improved avalanching. https://reviews.llvm.org/D43615 (cherry picked from commit 77f7f965bc9499a9ae768a296ca5a1f7347d1d2c) llvm-svn: 326081
2024-11-25 04:02:41 +01:00 · 2018-02-26 11:30:13 +00:00 · 2018-02-26 11:30:13 +00:00 · e8b48cad10
commit e8b48cad10
parent c3fa426be7
2 changed files with 20 additions and 32 deletions
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@ -232,19 +232,6 @@ void SplitString(StringRef Source,
                 SmallVectorImpl<StringRef> &OutFragments,
                 StringRef Delimiters = " \t\n\v\f\r");

-/// HashString - Hash function for strings.
-///
-/// This is the Bernstein hash function.
-//
-// FIXME: Investigate whether a modified bernstein hash function performs
-// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
-//   X*33+c -> X*33^c
-inline unsigned HashString(StringRef Str, unsigned Result = 0) {
-  for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i)
-    Result = Result * 33 + (unsigned char)Str[i];
-  return Result;
-}
-
 /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
 inline StringRef getOrdinalSuffix(unsigned Val) {
  // It is critically important that we do this perfectly for
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@ -14,6 +14,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>

@ -32,7 +33,7 @@ static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {

 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
  ItemSize = itemSize;
-  
+
  // If a size is specified, initialize the table with that many buckets.
  if (InitSize) {
    // The table will grow when the number of entries reach 3/4 of the number of
@ -41,7 +42,7 @@ StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
    init(getMinBucketToReserveForEntries(InitSize));
    return;
  }
-  
+
  // Otherwise, initialize it with zero buckets to avoid the allocation.
  TheTable = nullptr;
  NumBuckets = 0;
@ -56,7 +57,7 @@ void StringMapImpl::init(unsigned InitSize) {
  unsigned NewNumBuckets = InitSize ? InitSize : 16;
  NumItems = 0;
  NumTombstones = 0;
-  
+
  TheTable = static_cast<StringMapEntryBase **>(
      std::calloc(NewNumBuckets+1,
                  sizeof(StringMapEntryBase **) + sizeof(unsigned)));
@ -82,7 +83,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
    init(16);
    HTSize = NumBuckets;
  }
-  unsigned FullHashValue = HashString(Name);
+  unsigned FullHashValue = djbHash(Name);
  unsigned BucketNo = FullHashValue & (HTSize-1);
  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);

@ -98,11 +99,11 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
        HashTable[FirstTombstone] = FullHashValue;
        return FirstTombstone;
      }
-      
+
      HashTable[BucketNo] = FullHashValue;
      return BucketNo;
    }
-    
+
    if (BucketItem == getTombstoneVal()) {
      // Skip over tombstones.  However, remember the first one we see.
      if (FirstTombstone == -1) FirstTombstone = BucketNo;
@ -111,7 +112,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
      // case here is that we are only looking at the buckets (for item info
      // being non-null and for the full hash value) not at the items.  This
      // is important for cache locality.
-      
+
      // Do the comparison like this because Name isn't necessarily
      // null-terminated!
      char *ItemStr = (char*)BucketItem+ItemSize;
@ -120,10 +121,10 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
        return BucketNo;
      }
    }
-    
+
    // Okay, we didn't find the item.  Probe to the next bucket.
    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
    // Use quadratic probing, it has fewer clumping artifacts than linear
    // probing and has good cache behavior in the common case.
    ++ProbeAmt;
@ -136,7 +137,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
 int StringMapImpl::FindKey(StringRef Key) const {
  unsigned HTSize = NumBuckets;
  if (HTSize == 0) return -1;  // Really empty table?
-  unsigned FullHashValue = HashString(Key);
+  unsigned FullHashValue = djbHash(Key);
  unsigned BucketNo = FullHashValue & (HTSize-1);
  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);

@ -146,7 +147,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
    // If we found an empty bucket, this key isn't in the table yet, return.
    if (LLVM_LIKELY(!BucketItem))
      return -1;
-    
+
    if (BucketItem == getTombstoneVal()) {
      // Ignore tombstones.
    } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
@ -154,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
      // case here is that we are only looking at the buckets (for item info
      // being non-null and for the full hash value) not at the items.  This
      // is important for cache locality.
-      
+
      // Do the comparison like this because NameStart isn't necessarily
      // null-terminated!
      char *ItemStr = (char*)BucketItem+ItemSize;
@ -163,10 +164,10 @@ int StringMapImpl::FindKey(StringRef Key) const {
        return BucketNo;
      }
    }
-    
+
    // Okay, we didn't find the item.  Probe to the next bucket.
    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
    // Use quadratic probing, it has fewer clumping artifacts than linear
    // probing and has good cache behavior in the common case.
    ++ProbeAmt;
@ -187,7 +188,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
 StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
  int Bucket = FindKey(Key);
  if (Bucket == -1) return nullptr;
-  
+
  StringMapEntryBase *Result = TheTable[Bucket];
  TheTable[Bucket] = getTombstoneVal();
  --NumItems;
@ -241,13 +242,13 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
          NewBucketNo = NewBucket;
        continue;
      }
-      
+
      // Otherwise probe for a spot.
      unsigned ProbeSize = 1;
      do {
        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
      } while (NewTableArray[NewBucket]);
-      
+
      // Finally found a slot.  Fill it in.
      NewTableArray[NewBucket] = Bucket;
      NewHashArray[NewBucket] = FullHash;
@ -255,9 +256,9 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
        NewBucketNo = NewBucket;
    }
  }
-  
+
  free(TheTable);
-  
+
  TheTable = NewTableArray;
  NumBuckets = NewSize;
  NumTombstones = 0;