1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-15 15:32:28 +02:00

Code review of static network filtering engine

- Convert this.categories Map() into an array;

- Fix case of potentially using an invalid UintArray32
  (regression from latest changes)
This commit is contained in:
Raymond Hill 2020-11-08 13:50:36 -05:00
parent 96bfe3c9a7
commit 50da6706a4
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2

View File

@ -42,6 +42,7 @@ const urlTokenizer = µb.urlTokenizer;
// | | +------- bit 3- 4: party [0-3] // | | +------- bit 3- 4: party [0-3]
// | +--------- bit 5- 9: type [0-31] // | +--------- bit 5- 9: type [0-31]
// +-------------- bit 10-15: unused // +-------------- bit 10-15: unused
const CategoryCount = 1 << 0xa; // shift left to first unused bit
const ActionBitsMask = 0b0000000011; const ActionBitsMask = 0b0000000011;
const TypeBitsMask = 0b1111100000; const TypeBitsMask = 0b1111100000;
@ -312,12 +313,13 @@ const filterSequenceBufferResize = function(newSize) {
/******************************************************************************/ /******************************************************************************/
const bidiTrieMatchExtra = function(l, r, ix) { const bidiTrieMatchExtra = function(l, r, ix) {
const sequences = filterSequences;
for (;;) { for (;;) {
$patternMatchLeft = l; $patternMatchLeft = l;
$patternMatchRight = r; $patternMatchRight = r;
const iu = filterSequences[ix+0]; const iu = sequences[ix+0];
if ( filterUnits[iu].match() ) { return iu; } if ( filterUnits[iu].match() ) { return iu; }
ix = filterSequences[ix+1]; ix = sequences[ix+1];
if ( ix === 0 ) { break; } if ( ix === 0 ) { break; }
} }
return 0; return 0;
@ -2113,24 +2115,25 @@ const FilterBucket = class extends FilterCollection {
} }
optimizePatternTests() { optimizePatternTests() {
const sequences = filterSequences; // Important: do not locally cache filterSequences, its value can
// change when addUnitToTrie() is called.
let n = 0; let n = 0;
let i = this.i; let i = this.i;
do { do {
if ( filterUnits[sequences[i+0]].isBidiTrieable ) { n += 1; } if ( filterUnits[filterSequences[i+0]].isBidiTrieable ) { n += 1; }
i = sequences[i+1]; i = filterSequences[i+1];
} while ( i !== 0 && n < 3 ); } while ( i !== 0 && n < 3 );
if ( n < 3 ) { return; } if ( n < 3 ) { return; }
const ftrie = new FilterPlainTrie(); const ftrie = new FilterPlainTrie();
i = this.i; i = this.i;
let iprev = 0; let iprev = 0;
for (;;) { for (;;) {
const iunit = sequences[i+0]; const iunit = filterSequences[i+0];
const inext = sequences[i+1]; const inext = filterSequences[i+1];
if ( filterUnits[iunit].isBidiTrieable ) { if ( filterUnits[iunit].isBidiTrieable ) {
ftrie.addUnitToTrie(iunit); ftrie.addUnitToTrie(iunit);
if ( iprev !== 0 ) { if ( iprev !== 0 ) {
sequences[iprev+1] = inext; filterSequences[iprev+1] = inext;
} else { } else {
this.i = inext; this.i = inext;
} }
@ -2152,20 +2155,21 @@ const FilterBucket = class extends FilterCollection {
if ( candidateCount === 0 ) { return true; } if ( candidateCount === 0 ) { return true; }
}); });
if ( shouldPreTest !== true ) { return; } if ( shouldPreTest !== true ) { return; }
const sequences = filterSequences; // Important: do not locally cache filterSequences, its value can
// change when unshift() is called.
const bucket = new FilterBucketOfOriginHits(); const bucket = new FilterBucketOfOriginHits();
const domainOpts = []; const domainOpts = [];
let i = this.i; let i = this.i;
let iprev = 0; let iprev = 0;
for (;;) { for (;;) {
const iunit = sequences[i+0]; const iunit = filterSequences[i+0];
const inext = sequences[i+1]; const inext = filterSequences[i+1];
const f = filterUnits[iunit]; const f = filterUnits[iunit];
if ( f.hasOriginHit ) { if ( f.hasOriginHit ) {
domainOpts.push(f.domainOpt); domainOpts.push(f.domainOpt);
bucket.unshift(iunit); bucket.unshift(iunit);
if ( iprev !== 0 ) { if ( iprev !== 0 ) {
sequences[iprev+1] = inext; filterSequences[iprev+1] = inext;
} else { } else {
this.i = inext; this.i = inext;
} }
@ -2834,6 +2838,16 @@ const FilterContainer = function() {
this.anyHTTPSTokenHash = urlTokenizer.anyHTTPSTokenHash; this.anyHTTPSTokenHash = urlTokenizer.anyHTTPSTokenHash;
this.anyHTTPTokenHash = urlTokenizer.anyHTTPTokenHash; this.anyHTTPTokenHash = urlTokenizer.anyHTTPTokenHash;
this.optimizeTimerId = undefined; this.optimizeTimerId = undefined;
// As long as CategoryCount is reasonably low, we will use an array to
// store buckets using category bits as index. If ever CategoryCount
// becomes too large, we can just go back to using a Map.
this.categories = (( ) => {
const out = [];
for ( let i = 0; i < CategoryCount; i++ ) { out[i] = undefined; }
return out;
})();
this.reset(); this.reset();
}; };
@ -2856,7 +2870,7 @@ FilterContainer.prototype.reset = function() {
this.discardedCount = 0; this.discardedCount = 0;
this.goodFilters = new Set(); this.goodFilters = new Set();
this.badFilters = new Set(); this.badFilters = new Set();
this.categories = new Map(); this.categories.fill(undefined);
urlTokenizer.resetKnownTokens(); urlTokenizer.resetKnownTokens();
@ -2902,10 +2916,10 @@ FilterContainer.prototype.freeze = function() {
const tokenHash = args[1]; const tokenHash = args[1];
const fdata = args[2]; const fdata = args[2];
let bucket = this.categories.get(bits); let bucket = this.categories[bits];
if ( bucket === undefined ) { if ( bucket === undefined ) {
bucket = new Map(); bucket = new Map();
this.categories.set(bits, bucket); this.categories[bits] = bucket;
} }
let iunit = bucket.get(tokenHash); let iunit = bucket.get(tokenHash);
@ -2974,13 +2988,15 @@ FilterContainer.prototype.freeze = function() {
// readiness when no valid selfie is available. // readiness when no valid selfie is available.
this.optimizeTimerId = self.requestIdleCallback(( ) => { this.optimizeTimerId = self.requestIdleCallback(( ) => {
this.optimizeTimerId = undefined; this.optimizeTimerId = undefined;
for ( const [ catBits, bucket ] of this.categories ) { for ( let bits = 0, n = this.categories.length; bits < n; bits++ ) {
const bucket = this.categories[bits];
if ( bucket === undefined ) { continue; }
for ( const [ th, iunit ] of bucket ) { for ( const [ th, iunit ] of bucket ) {
const f = filterUnits[iunit]; const f = filterUnits[iunit];
if ( f instanceof FilterBucket === false ) { continue; } if ( f instanceof FilterBucket === false ) { continue; }
const optimizeBits = const optimizeBits =
(th === this.noTokenHash) || (th === this.noTokenHash) ||
(catBits & ActionBitsMask) === ModifyAction (bits & ActionBitsMask) === ModifyAction
? 0b10 ? 0b10
: 0b01; : 0b01;
const g = f.optimize(optimizeBits); const g = f.optimize(optimizeBits);
@ -3001,8 +3017,10 @@ FilterContainer.prototype.freeze = function() {
FilterContainer.prototype.toSelfie = function(path) { FilterContainer.prototype.toSelfie = function(path) {
const categoriesToSelfie = ( ) => { const categoriesToSelfie = ( ) => {
const selfie = []; const selfie = [];
for ( const [ catBits, bucket ] of this.categories ) { for ( let bits = 0, n = this.categories.length; bits < n; bits++ ) {
selfie.push([ catBits, Array.from(bucket) ]); const bucket = this.categories[bits];
if ( bucket === undefined ) { continue; }
selfie.push([ bits, Array.from(bucket) ]);
} }
return selfie; return selfie;
}; };
@ -3104,7 +3122,7 @@ FilterContainer.prototype.fromSelfie = function(path) {
} }
} }
for ( const [ catBits, bucket ] of selfie.categories ) { for ( const [ catBits, bucket ] of selfie.categories ) {
this.categories.set(catBits, new Map(bucket)); this.categories[catBits] = new Map(bucket);
} }
return true; return true;
}), }),
@ -3336,15 +3354,15 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
const catBits10 = ModifyAction | partyBits; const catBits10 = ModifyAction | partyBits;
const catBits11 = ModifyAction | typeBits | partyBits; const catBits11 = ModifyAction | typeBits | partyBits;
const bucket00 = this.categories.get(catBits00); const bucket00 = this.categories[catBits00];
const bucket01 = typeBits !== 0 const bucket01 = typeBits !== 0
? this.categories.get(catBits01) ? this.categories[catBits01]
: undefined; : undefined;
const bucket10 = partyBits !== 0 const bucket10 = partyBits !== 0
? this.categories.get(catBits10) ? this.categories[catBits10]
: undefined; : undefined;
const bucket11 = typeBits !== 0 && partyBits !== 0 const bucket11 = typeBits !== 0 && partyBits !== 0
? this.categories.get(catBits11) ? this.categories[catBits11]
: undefined; : undefined;
if ( if (
@ -3500,16 +3518,16 @@ FilterContainer.prototype.realmMatchString = function(
const catBits11 = realmBits | typeBits | partyBits; const catBits11 = realmBits | typeBits | partyBits;
const bucket00 = exactType === 0 const bucket00 = exactType === 0
? this.categories.get(catBits00) ? this.categories[catBits00]
: undefined; : undefined;
const bucket01 = exactType !== 0 || typeBits !== 0 const bucket01 = exactType !== 0 || typeBits !== 0
? this.categories.get(catBits01) ? this.categories[catBits01]
: undefined; : undefined;
const bucket10 = exactType === 0 && partyBits !== 0 const bucket10 = exactType === 0 && partyBits !== 0
? this.categories.get(catBits10) ? this.categories[catBits10]
: undefined; : undefined;
const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0 const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0
? this.categories.get(catBits11) ? this.categories[catBits11]
: undefined; : undefined;
if ( if (
@ -4002,7 +4020,9 @@ FilterContainer.prototype.test = function(docURL, type, url) {
FilterContainer.prototype.bucketHistogram = function() { FilterContainer.prototype.bucketHistogram = function() {
const results = []; const results = [];
for ( const [ bits, category ] of this.categories ) { for ( let bits = 0, n = this.categories.length; bits < n; bits++ ) {
const category = this.categories[bits];
if ( category === undefined ) { continue; }
for ( const [ th, iunit ] of category ) { for ( const [ th, iunit ] of category ) {
const token = urlTokenizer.stringFromTokenHash(th); const token = urlTokenizer.stringFromTokenHash(th);
const f = filterUnits[iunit]; const f = filterUnits[iunit];