1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-15 07:22:28 +02:00

Make token hashes fit within a 32-bit integer

The staticNetFilteringEngine uses token hashes to store/lookup
filters into Map objects.

Before this commit, the tokens were encoded into token hashes
as JS numbers (not exceeding MAX_SAFE_INTEGER) using at most
the 8 first characters of the token.

With this commit, token hashes are now restricted to fit
into 32-bit integers, and are derived from at most the 7 first
characters. This improves filter look-up performance as per
built-in benchmark().
This commit is contained in:
Raymond Hill 2019-04-28 10:15:15 -04:00
parent 510cda0bc5
commit ac58b8e688
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 18 additions and 25 deletions

View File

@ -137,7 +137,7 @@ const µBlock = (function() { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 12, // Increase when compiled format changes
compiledMagic: 13, // Increase when compiled format changes
selfieMagic: 13 // Increase when selfie format changes
},

View File

@ -49,17 +49,15 @@
this._validTokenChars[this._chars.charCodeAt(i)] = i + 1;
}
this._charsEx = '0123456789%abcdefghijklmnopqrstuvwxyz*.';
this._validTokenCharsEx = new Uint8Array(128);
for ( let i = 0, n = this._charsEx.length; i < n; i++ ) {
this._validTokenCharsEx[this._charsEx.charCodeAt(i)] = i + 1;
}
this.dotTokenHash = this.tokenHashFromString('.');
this.anyTokenHash = this.tokenHashFromString('..');
this.anyHTTPSTokenHash = this.tokenHashFromString('..https');
this.anyHTTPTokenHash = this.tokenHashFromString('..http');
this.noTokenHash = this.tokenHashFromString('*');
// Four upper bits of token hash are reserved for built-in predefined
// token hashes, which should never end up being used when tokenizing
// any arbitrary string.
this.dotTokenHash = 0x10000000;
this.anyTokenHash = 0x20000000;
this.anyHTTPSTokenHash = 0x30000000;
this.anyHTTPTokenHash = 0x40000000;
this.noTokenHash = 0x50000000;
this.emptyTokenHash = 0xF0000000;
this._urlIn = '';
this._urlOut = '';
@ -110,23 +108,18 @@
tokenHashFromString(s) {
const l = s.length;
if ( l === 0 ) { return 0; }
const vtc = this._validTokenCharsEx;
if ( l === 0 ) { return this.emptyTokenHash; }
const vtc = this._validTokenChars;
let th = vtc[s.charCodeAt(0)];
for ( let i = 1; i !== 8 && i !== l; i++ ) {
th = th * 64 + vtc[s.charCodeAt(i)];
for ( let i = 1; i !== 7 && i !== l; i++ ) {
th = th << 4 ^ vtc[s.charCodeAt(i)];
}
return th;
}
stringFromTokenHash(th) {
if ( th === 0 ) { return ''; }
let s = '';
while ( th > 0 ) {
s = `${this._charsEx.charAt((th & 0b111111)-1)}${s}`;
th /= 64;
}
return s;
return th.toString(16);
}
toSelfie() {
@ -153,7 +146,7 @@
const tokens = this._tokens;
let url = this._urlOut;
let l = url.length;
if ( l === 0 ) { return 0; }
if ( l === 0 ) { return this.emptyTokenHash; }
if ( l > 2048 ) {
url = url.slice(0, 2048);
l = 2048;
@ -172,8 +165,8 @@
if ( i === l ) { break; }
v = vtc[url.charCodeAt(i++)];
if ( v === 0 ) { break; }
if ( n === 8 ) { continue; }
th = th * 64 + v;
if ( n === 7 ) { continue; }
th = th << 4 ^ v;
n += 1;
}
if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) {