1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-10-04 16:47:15 +02:00

Store domain= option into trie container's character buffer

As the trie is not immediately created, in order to speed up
launch time, the `domain=` option was stored in the filterRefs
array until it was moved to the trie.

This commit instead stores the `domain=` option into the trie
container's character buffer.
This commit is contained in:
Raymond Hill 2021-12-18 12:53:09 -05:00
parent 47680c775d
commit e45d851f66
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
3 changed files with 107 additions and 57 deletions

View File

@ -175,7 +175,7 @@ const µBlock = { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 44, // Increase when compiled format changes compiledMagic: 44, // Increase when compiled format changes
selfieMagic: 44, // Increase when selfie format changes selfieMagic: 45, // Increase when selfie format changes
}, },
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -223,7 +223,7 @@ class HNTrieContainer {
return -1; return -1;
} }
createTrie(hostnames = undefined) { createTrie() {
// grow buffer if needed // grow buffer if needed
if ( (this.buf32[CHAR0_SLOT] - this.buf32[TRIE1_SLOT]) < 12 ) { if ( (this.buf32[CHAR0_SLOT] - this.buf32[TRIE1_SLOT]) < 12 ) {
this.growBuf(12, 0); this.growBuf(12, 0);
@ -233,12 +233,41 @@ class HNTrieContainer {
this.buf32[iroot+0] = 0; this.buf32[iroot+0] = 0;
this.buf32[iroot+1] = 0; this.buf32[iroot+1] = 0;
this.buf32[iroot+2] = 0; this.buf32[iroot+2] = 0;
if ( hostnames !== undefined ) { return iroot;
for ( const hn of hostnames ) { }
this.setNeedle(hn).add(iroot);
createTrieFromIterable(hostnames) {
const itrie = this.createTrie();
for ( const hn of hostnames ) {
if ( hn === '' ) { continue; }
this.setNeedle(hn).add(itrie);
}
return itrie;
}
createTrieFromStoredDomainOpt(i, n) {
const itrie = this.createTrie();
const jend = i + n;
let j = i, offset = 0, k = 0, c = 0;
while ( j !== jend ) {
offset = this.buf32[CHAR0_SLOT]; // Important
k = 0;
for (;;) {
if ( j === jend ) { break; }
c = this.buf[offset+j];
j += 1;
if ( c === 0x7C /* '|' */ ) { break; }
if ( k === 255 ) { continue; }
this.buf[k] = c;
k += 1;
}
if ( k !== 0 ) {
this.buf[255] = k;
this.add(itrie);
} }
} }
return iroot; this.needle = ''; // Important
return itrie;
} }
dumpTrie(iroot) { dumpTrie(iroot) {
@ -304,6 +333,12 @@ class HNTrieContainer {
}; };
} }
// TODO:
// Rework code to add from a string already present in the character
// buffer, i.e. not having to go through setNeedle() when adding a new
// hostname to a trie. This will require much work though, and probably
// changing the order in which string segments are stored in the
// character buffer.
addJS(iroot) { addJS(iroot) {
let lhnchar = this.buf[255]; let lhnchar = this.buf[255];
if ( lhnchar === 0 ) { return 0; } if ( lhnchar === 0 ) { return 0; }
@ -488,6 +523,31 @@ class HNTrieContainer {
return textDecoder.decode(this.buf.subarray(offset, offset + n)); return textDecoder.decode(this.buf.subarray(offset, offset + n));
} }
storeDomainOpt(s) {
let n = s.length;
if ( n === this.lastStoredLen && s === this.lastStored ) {
return this.lastStoredIndex;
}
this.lastStored = s;
this.lastStoredLen = n;
if ( (this.buf.length - this.buf32[CHAR1_SLOT]) < n ) {
this.growBuf(0, n);
}
const offset = this.buf32[CHAR1_SLOT];
this.buf32[CHAR1_SLOT] = offset + n;
const buf8 = this.buf;
for ( let i = 0; i < n; i++ ) {
buf8[offset+i] = s.charCodeAt(i);
}
return (this.lastStoredIndex = offset - this.buf32[CHAR0_SLOT]);
}
extractDomainOpt(i, n) {
const textDecoder = new TextDecoder();
const offset = this.buf32[CHAR0_SLOT] + i;
return textDecoder.decode(this.buf.subarray(offset, offset + n));
}
matchesHostname(hn, i, n) { matchesHostname(hn, i, n) {
this.setNeedle(hn); this.setNeedle(hn);
const buf8 = this.buf; const buf8 = this.buf;

View File

@ -1200,10 +1200,6 @@ class DomainOptIterator {
// A helper instance to reuse throughout // A helper instance to reuse throughout
const domainOptIterator = new DomainOptIterator(''); const domainOptIterator = new DomainOptIterator('');
const domainOptNormalizer = domainOpt => {
return domainOpt.split('|').sort().join('|');
};
/******************************************************************************/ /******************************************************************************/
// The optimal class is picked according to the content of the `domain=` // The optimal class is picked according to the content of the `domain=`
@ -1367,15 +1363,10 @@ registerFilterClass(FilterOriginMiss);
/******************************************************************************/ /******************************************************************************/
const FilterOriginHitSet = class { const FilterOriginHitSet = class {
// The `domainOpt` value may be in either the allocated refs or the trie,
// never in both at the same time.
static getDomainOpt(idata) { static getDomainOpt(idata) {
const itrie = filterData[idata+2]; return origHNTrieContainer.extractDomainOpt(
if ( itrie === 0 ) { filterData[idata+1],
return filterRefs[filterData[idata+4]].domainOpt; filterData[idata+2]
}
return domainOptNormalizer(
Array.from(origHNTrieContainer.trieIterator(itrie)).join('|')
); );
} }
@ -1384,13 +1375,13 @@ const FilterOriginHitSet = class {
} }
static match(idata) { static match(idata) {
const refs = filterRefs[filterData[idata+4]]; const refs = filterRefs[filterData[idata+6]];
if ( $docHostname === refs.$last ) { if ( $docHostname === refs.$last ) {
return filterData[idata+3] !== -1; return filterData[idata+5] !== -1;
} }
refs.$last = $docHostname; refs.$last = $docHostname;
const which = filterData[idata+1]; const which = filterData[idata+3];
const itrie = filterData[idata+2] || this.toTrie(idata); const itrie = filterData[idata+4] || this.toTrie(idata);
let lastResult = -1; let lastResult = -1;
if ( (which & 0b01) !== 0 ) { if ( (which & 0b01) !== 0 ) {
lastResult = origHNTrieContainer lastResult = origHNTrieContainer
@ -1402,16 +1393,19 @@ const FilterOriginHitSet = class {
.setNeedle($docEntity.compute()) .setNeedle($docEntity.compute())
.matches(itrie); .matches(itrie);
} }
return (filterData[idata+3] = lastResult) !== -1; return (filterData[idata+5] = lastResult) !== -1;
} }
static create(domainOpt, which = 0b11) { static create(domainOpt, which = 0b11) {
const idata = filterDataAllocLen(5); const idata = filterDataAllocLen(7);
filterData[idata+0] = FilterOriginHitSet.fid; filterData[idata+0] = FilterOriginHitSet.fid;
filterData[idata+1] = which; filterData[idata+1] = origHNTrieContainer.storeDomainOpt(domainOpt);
filterData[idata+2] = 0; // itrie filterData[idata+2] = domainOpt.length;
filterData[idata+3] = -1; // $lastResult filterData[idata+3] = which;
filterData[idata+4] = filterRefAdd({ domainOpt, $last: '' }); filterData[idata+4] = 0; // itrie
filterData[idata+5] = -1; // $lastResult
filterData[idata+6] = filterRefAdd({ $last: '' });
this.toTrie(idata);
return idata; return idata;
} }
@ -1424,28 +1418,26 @@ const FilterOriginHitSet = class {
} }
static fromCompiled(args) { static fromCompiled(args) {
const idata = filterDataAllocLen(5); const idata = filterDataAllocLen(7);
filterData[idata+0] = args[0]; // fid filterData[idata+0] = args[0]; // fid
filterData[idata+1] = args[2]; // which filterData[idata+1] = origHNTrieContainer.storeDomainOpt(args[1]);
filterData[idata+2] = 0; // itrie filterData[idata+2] = args[1].length;
filterData[idata+3] = -1; // $lastResult filterData[idata+3] = args[2]; // which
filterData[idata+4] = filterRefAdd({ domainOpt: args[1], $last: '' }); filterData[idata+4] = 0; // itrie
filterData[idata+5] = -1; // $lastResult
filterData[idata+6] = filterRefAdd({ $last: '' });
return idata; return idata;
} }
static toTrie(idata) { static toTrie(idata) {
const refs = filterRefs[filterData[idata+4]]; const itrie = filterData[idata+4] =
const itrie = filterData[idata+2] = origHNTrieContainer.createTrie( origHNTrieContainer.createTrieFromStoredDomainOpt(
domainOptIterator.reset(refs.domainOpt) filterData[idata+1],
); filterData[idata+2]
refs.domainOpt = ''; );
return itrie; return itrie;
} }
static getTrie(idata) {
return filterData[idata+2];
}
static keyFromArgs(args) { static keyFromArgs(args) {
return args[1]; return args[1];
} }
@ -1455,7 +1447,7 @@ const FilterOriginHitSet = class {
} }
static dumpInfo(idata) { static dumpInfo(idata) {
return `0b${filterData[idata+1].toString(2)} ${this.getDomainOpt(idata)}`; return `0b${filterData[idata+3].toString(2)} ${this.getDomainOpt(idata)}`;
} }
}; };
@ -1863,7 +1855,7 @@ const FilterHostnameDict = class {
const itrie = filterData[idata+1]; const itrie = filterData[idata+1];
if ( itrie !== 0 ) { return itrie; } if ( itrie !== 0 ) { return itrie; }
const hostnames = filterRefs[filterData[idata+3]]; const hostnames = filterRefs[filterData[idata+3]];
filterData[idata+1] = destHNTrieContainer.createTrie(hostnames); filterData[idata+1] = destHNTrieContainer.createTrieFromIterable(hostnames);
filterRefs[filterData[idata+3]] = null; filterRefs[filterData[idata+3]] = null;
return filterData[idata+1]; return filterData[idata+1];
} }
@ -1908,11 +1900,9 @@ const FilterDenyAllow = class {
} }
static fromCompiled(args) { static fromCompiled(args) {
const itrie = destHNTrieContainer.createTrie(); const itrie = destHNTrieContainer.createTrieFromIterable(
for ( const hn of domainOptIterator.reset(args[1]) ) { domainOptIterator.reset(args[1])
if ( hn === '' ) { continue; } );
destHNTrieContainer.setNeedle(hn).add(itrie);
}
const idata = filterDataAllocLen(3); const idata = filterDataAllocLen(3);
filterData[idata+0] = args[0]; // fid filterData[idata+0] = args[0]; // fid
filterData[idata+1] = itrie; // itrie filterData[idata+1] = itrie; // itrie
@ -3503,7 +3493,7 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2;
const FilterContainer = function() { const FilterContainer = function() {
this.compilerVersion = '6'; this.compilerVersion = '6';
this.selfieVersion = '6'; this.selfieVersion = '7';
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH; this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
this.optimizeTaskId = undefined; this.optimizeTaskId = undefined;
@ -4579,7 +4569,7 @@ FilterContainer.prototype.dump = function() {
const out = []; const out = [];
const toOutput = (depth, line) => { const toOutput = (depth, line, out) => {
out.push(`${' '.repeat(depth*2)}${line}`); out.push(`${' '.repeat(depth*2)}${line}`);
}; };
@ -4588,7 +4578,7 @@ FilterContainer.prototype.dump = function() {
const fc = filterGetClass(idata); const fc = filterGetClass(idata);
fcCounts.set(fc.name, (fcCounts.get(fc.name) || 0) + 1); fcCounts.set(fc.name, (fcCounts.get(fc.name) || 0) + 1);
const info = filterDumpInfo(idata) || ''; const info = filterDumpInfo(idata) || '';
toOutput(depth, info !== '' ? `${fc.name}: ${info}` : fc.name); toOutput(depth, info !== '' ? `${fc.name}: ${info}` : fc.name, out);
switch ( fc ) { switch ( fc ) {
case FilterBucket: case FilterBucket:
case FilterCompositeAll: case FilterCompositeAll:
@ -4618,7 +4608,7 @@ FilterContainer.prototype.dump = function() {
const realms = new Map([ const realms = new Map([
[ BlockAction, 'block' ], [ BlockAction, 'block' ],
[ BlockImportant, 'block-important' ], [ BlockImportant, 'block-important' ],
[ AllowAction, 'allow' ], [ AllowAction, 'unblock' ],
[ ModifyAction, 'modify' ], [ ModifyAction, 'modify' ],
]); ]);
const partyness = new Map([ const partyness = new Map([
@ -4627,9 +4617,9 @@ FilterContainer.prototype.dump = function() {
[ ThirdParty, '3rd-party' ], [ ThirdParty, '3rd-party' ],
]); ]);
for ( const [ realmBits, realmName ] of realms ) { for ( const [ realmBits, realmName ] of realms ) {
toOutput(1, `+ realm: ${realmName}`); toOutput(1, `+ realm: ${realmName}`, out);
for ( const [ partyBits, partyName ] of partyness ) { for ( const [ partyBits, partyName ] of partyness ) {
toOutput(2, `+ party: ${partyName}`); toOutput(2, `+ party: ${partyName}`, out);
const processedTypeBits = new Set(); const processedTypeBits = new Set();
for ( const typeName in typeNameToTypeValue ) { for ( const typeName in typeNameToTypeValue ) {
const typeBits = typeNameToTypeValue[typeName]; const typeBits = typeNameToTypeValue[typeName];
@ -4639,13 +4629,13 @@ FilterContainer.prototype.dump = function() {
const ibucket = this.bitsToBucketIndices[bits]; const ibucket = this.bitsToBucketIndices[bits];
if ( ibucket === 0 ) { continue; } if ( ibucket === 0 ) { continue; }
const thCount = this.buckets[ibucket].size; const thCount = this.buckets[ibucket].size;
toOutput(3, `+ type: ${typeName} (${thCount})`); toOutput(3, `+ type: ${typeName} (${thCount})`, out);
for ( const [ th, iunit ] of this.buckets[ibucket] ) { for ( const [ th, iunit ] of this.buckets[ibucket] ) {
thCounts.add(th); thCounts.add(th);
const ths = thConstants.has(th) const ths = thConstants.has(th)
? thConstants.get(th) ? thConstants.get(th)
: `0x${th.toString(16)}`; : `0x${th.toString(16)}`;
toOutput(4, `+ th: ${ths}`); toOutput(4, `+ th: ${ths}`, out);
dumpUnit(iunit, out, 5); dumpUnit(iunit, out, 5);
} }
} }