1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-07 03:12:33 +01:00

Order HNTrie nodes alphabetically to allow for early bailout

This commit implements the alphabetical ordering of HNTrie
nodes, so as to make it possible to bail out early at
HNTrie.matches() time.

Contrary to what I expected, there is no performance gain
observed to HNTrie.matches() as per benchmarks -- I find
the results perplexing.

Because of this I will revert this commit immediately.
The purpose of this commit is to record the changes so
that I can bring them back to life in the future whenever
I want to investigate further.
This commit is contained in:
Raymond Hill 2019-04-30 06:47:54 -04:00
parent a73024ad7b
commit f5f9e05071
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2

View File

@ -181,12 +181,14 @@ const HNTrieContainer = class {
if ( ineedle === 0 ) { return -1; }
ineedle -= 1;
let c = this.buf[ineedle];
let v, i0;
let v, i0, diff;
// find first segment with a first-character match
for (;;) {
v = this.buf32[icell+2];
i0 = char0 + (v & 0x00FFFFFF);
if ( this.buf[i0] === c ) { break; }
diff = this.buf[i0] - c;
if ( diff === 0 ) { break; }
if ( diff > 0 ) { return -1; }
icell = this.buf32[icell+0];
if ( icell === 0 ) { return -1; }
}
@ -254,7 +256,7 @@ const HNTrieContainer = class {
}
//
const char0 = this.buf32[HNTRIE_CHAR0_SLOT];
let inext;
let iprevious = iroot, diff, inext;
// find a matching cell: move down
for (;;) {
const vseg = this.buf32[icell+2];
@ -263,21 +265,38 @@ const HNTrieContainer = class {
// remainder is at label boundary? if yes, no need to add
// the rest since the shortest match is always reported
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
iprevious = icell;
icell = this.buf32[icell+1];
continue;
}
let isegchar0 = char0 + (vseg & 0x00FFFFFF);
// if first character is no match, move to next descendant
if ( this.buf[isegchar0] !== this.buf[lhnchar-1] ) {
diff = this.buf[isegchar0] - this.buf[lhnchar-1];
// current cell character greater than target character: insert
// a new node before current cell
if ( diff > 0 ) {
const inew = this.addCell(icell, 0, this.addSegment(lhnchar));
if ( icell === this.buf32[iprevious+0] ) {
this.buf32[iprevious+0] = inew;
} else /* if ( icell === this.buf32[iprevious+1] ) */ {
this.buf32[iprevious+1] = inew;
}
return 1;
}
// current cell character lesser than target character: keep
// looking
if ( diff !== 0 ) {
inext = this.buf32[icell+0];
// no match found: insert a new leaf descendant
if ( inext === 0 ) {
this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(lhnchar));
return 1;
}
iprevious = icell;
icell = inext;
continue;
}
// 1st character was tested
// current cell character matches: 1st character was tested
let isegchar = 1;
lhnchar -= 1;
// find 1st mismatch in rest of segment
@ -286,7 +305,8 @@ const HNTrieContainer = class {
for (;;) {
if ( isegchar === lsegchar ) { break; }
if ( lhnchar === 0 ) { break; }
if ( this.buf[isegchar0+isegchar] !== this.buf[lhnchar-1] ) { break; }
diff = this.buf[isegchar0+isegchar] - this.buf[lhnchar-1];
if ( diff !== 0 ) { break; }
isegchar += 1;
lhnchar -= 1;
}
@ -304,6 +324,7 @@ const HNTrieContainer = class {
// needle remainder: yes
else {
if ( inext !== 0 ) {
iprevious = icell;
icell = inext;
continue;
}
@ -326,14 +347,19 @@ const HNTrieContainer = class {
this.buf32[icell+1],
lsegchar - isegchar << 24 | isegchar0 + isegchar
);
this.buf32[icell+1] = inext;
// needle remainder: no = need boundary cell
if ( lhnchar === 0 ) {
this.buf32[icell+1] = this.addCell(0, inext, 0);
}
// needle remainder: yes = need new cell for remaining characters
else {
this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(lhnchar));
const isegment = this.addSegment(lhnchar);
if ( diff > 0 ) {
this.buf32[icell+1] = this.addCell(inext, 0, isegment);
} else {
this.buf32[icell+1] = inext;
this.buf32[inext+0] = this.addCell(0, 0, isegment);
}
}
}
return 1;