From b265f2644d7c5eb353b5fc123e67f9419f1c998d Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Fri, 6 Nov 2020 12:04:03 -0500 Subject: [PATCH] Coallesce origin hit filters into their own bucket Performance-related work. There is a fair number of filters which can't be tokenized in uBO's own filter lists. Majority of those filters also declare a `domain=` option, examples: *$script,redirect-rule=noopjs,domain=... *$script,3p,domain=...,denyallow=... *$frame,3p,domain=... Such filters can be found in uBO's asset viewer using the following search expression: /^\*?\$[^\n]*?domain=/ Some filter buckets will contain many of those filters, for instance one of the bucket holding untokenizable `redirect=` filters has over 170 entries, which must be all visited when collating all matching `redirect=` filters. When a bucket contains many such filters, I found that it's worth to extract all the non-negated hostname values from `domain=` options into a single hntrie and perform a pre-test at match() time to find out whether the current origin of a network request matches any one of the collected hostnames, so as to avoid iterating through all the filters. Since there is rarely a match() for vast majority of network requests with `domain=` option, this pre-test saves a good amount of work, and this is measurable with the built-in benchmark. --- src/js/background.js | 4 +- src/js/static-net-filtering.js | 318 +++++++++++++++++++++++---------- 2 files changed, 228 insertions(+), 94 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index 1c86e1470..456156e85 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -139,8 +139,8 @@ const µBlock = (( ) => { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 31, // Increase when compiled format changes - selfieMagic: 31, // Increase when selfie format changes + compiledMagic: 32, // Increase when compiled format changes + selfieMagic: 32, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 2b76d124f..e07ebcffc 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -358,6 +358,12 @@ const filterFromCtor = function(ctor, ...args) { return iunit; }; +const filterUnitFromFilter = function(f) { + const iunit = filterUnits.length; + filterUnits.push(f); + return iunit; +}; + const filterUnitFromCompiled = function(args) { const ctor = filterClasses[args[0]]; const keygen = ctor.keyFromArgs; @@ -1237,6 +1243,10 @@ const FilterOriginHit = class { this.n = n; } + get domainOpt() { + return filterOrigin.trieContainer.extractHostname(this.i, this.n); + } + match() { return filterOrigin.trieContainer.matchesHostname( $docHostname, @@ -1250,11 +1260,7 @@ const FilterOriginHit = class { } logData(details) { - details.domains.push(this.getHostname()); - } - - getHostname() { - return filterOrigin.trieContainer.extractHostname(this.i, this.n); + details.domains.push(this.domainOpt); } static compile(hostname) { @@ -1273,6 +1279,8 @@ const FilterOriginHit = class { } }; +FilterOriginHit.prototype.hasOriginHit = true; + registerFilterClass(FilterOriginHit); /******************************************************************************/ @@ -1283,7 +1291,7 @@ const FilterOriginMiss = class extends FilterOriginHit { } logData(details) { - details.domains.push(`~${this.getHostname()}`); + details.domains.push(`~${this.domainOpt}`); } static compile(hostname) { @@ -1302,6 +1310,8 @@ const FilterOriginMiss = class extends FilterOriginHit { } }; +FilterOriginMiss.prototype.hasOriginHit = false; + registerFilterClass(FilterOriginMiss); /******************************************************************************/ @@ -1354,6 +1364,8 @@ const FilterOriginHitSet = class { } }; +FilterOriginHitSet.prototype.hasOriginHit = true; + registerFilterClass(FilterOriginHitSet); /******************************************************************************/ @@ -1386,6 +1398,8 @@ const FilterOriginMissSet = class extends FilterOriginHitSet { } }; +FilterOriginMissSet.prototype.hasOriginHit = false; + registerFilterClass(FilterOriginMissSet); /******************************************************************************/ @@ -1556,7 +1570,7 @@ const FilterModifierResult = class { const FilterCollection = class { constructor(i = 0) { - this.i = i | 0; + this.i = i; } get size() { @@ -1570,9 +1584,11 @@ const FilterCollection = class { this.i = filterSequenceAdd(iunit, j); } - shift() { + shift(drop = false) { const sequences = filterSequences; - filterUnits[sequences[this.i+0]] = null; + if ( drop ) { + filterUnits[sequences[this.i+0]] = null; + } this.i = sequences[this.i+1]; } @@ -1618,8 +1634,9 @@ const FilterCollection = class { return new ctor(i0, args[1].length); } - static fromSelfie(ctor, args) { - return new ctor(args[1]); + static fromSelfie(args, bucket) { + bucket.i = args[1]; + return bucket; } }; @@ -1645,8 +1662,11 @@ const FilterCompositeAny = class extends FilterCollection { return FilterCollection.fromCompiled(FilterCompositeAny, args); } - static fromSelfie(args) { - return FilterCollection.fromSelfie(FilterCompositeAny, args); + static fromSelfie(args, bucket) { + if ( bucket === undefined ) { + bucket = new FilterCompositeAny(); + } + return super.fromSelfie(args, bucket); } }; @@ -1694,9 +1714,26 @@ const FilterCompositeAll = class extends FilterCollection { return filterUnits[filterSequences[this.i]].isBidiTrieable === true; } + get hasOriginHit() { + return this.forEach(iunit => { + if ( filterUnits[iunit].hasOriginHit === true ) { + return true; + } + }); + } + + get domainOpt() { + return this.forEach(iunit => { + const f = filterUnits[iunit]; + if ( f.hasOriginHit === true ) { + return f.domainOpt; + } + }); + } + toBidiTrie() { const details = filterUnits[filterSequences[this.i]].toBidiTrie(); - this.shift(); + this.shift(true); return details; } @@ -1708,8 +1745,11 @@ const FilterCompositeAll = class extends FilterCollection { return FilterCollection.fromCompiled(FilterCompositeAll, args); } - static fromSelfie(args) { - return FilterCollection.fromSelfie(FilterCompositeAll, args); + static fromSelfie(args, bucket) { + if ( bucket === undefined ) { + bucket = new FilterCompositeAll(); + } + return super.fromSelfie(args, bucket); } }; @@ -1925,7 +1965,10 @@ registerFilterClass(FilterHTTPJustOrigin); const FilterPlainTrie = class { constructor(trie) { - this.plainTrie = trie; + this.plainTrie = trie !== undefined + ? trie + : bidiTrie.createOne(); + this.$matchedUnit = 0; } match() { @@ -1949,6 +1992,37 @@ const FilterPlainTrie = class { } } + addUnitToTrie(iunit) { + const f = filterUnits[iunit]; + const trieDetails = f.toBidiTrie(); + const id = this.plainTrie.add( + trieDetails.i, + trieDetails.n, + trieDetails.itok + ); + // No point storing a pattern with conditions if the bidi-trie already + // contain a pattern with no conditions. + const ix = this.plainTrie.getExtra(id); + if ( ix === 1 ) { + filterUnits[iunit] = null; + return; + } + // If the newly stored pattern has no condition, short-circuit existing + // ones since they will always be short-circuited by the condition-less + // pattern. + if ( f instanceof FilterPatternPlain ) { + this.plainTrie.setExtra(id, 1); + filterUnits[iunit] = null; + return; + } + // FilterCompositeAll is assumed here, i.e. with conditions. + if ( f.n === 1 ) { + filterUnits[iunit] = null; + iunit = filterSequences[f.i]; + } + this.plainTrie.setExtra(id, filterSequenceAdd(iunit, ix)); + } + toSelfie() { return [ this.fid, bidiTrie.compileOne(this.plainTrie) ]; } @@ -1958,27 +2032,27 @@ const FilterPlainTrie = class { } }; -FilterPlainTrie.prototype.$matchedUnit = 0; - registerFilterClass(FilterPlainTrie); /******************************************************************************/ const FilterBucket = class extends FilterCollection { + constructor(n = 0) { + super(); + this.n = n; + this.$matchedUnit = 0; + } + + get size() { + return this.n; + } + match() { - if ( this.plainTrie !== null ) { - if ( this.plainTrie.matches($tokenBeg, this) !== 0 ) { - this.$matchedTrie = true; - this.$matchedUnit = this.plainTrie.$iu; - return true; - } - } const sequences = filterSequences; const units = filterUnits; let i = this.i; while ( i !== 0 ) { if ( units[sequences[i+0]].match() ) { - this.$matchedTrie = false; this.$matchedUnit = sequences[i+0]; return true; } @@ -1998,108 +2072,154 @@ const FilterBucket = class extends FilterCollection { } } + unshift(iunit) { + super.unshift(iunit); + this.n += 1; + } + + shift() { + super.shift(); + this.n -= 1; + } + logData(details) { - if ( this.$matchedTrie ) { - const s = $requestURL.slice(this.plainTrie.$l, this.plainTrie.$r); - details.pattern.push(s); - details.regex.push(restrFromPlainPattern(s)); - } - if ( this.$matchedUnit !== -1 ) { - filterUnits[this.$matchedUnit].logData(details); - } + filterUnits[this.$matchedUnit].logData(details); } toSelfie() { - const selfie = super.toSelfie(); - if ( this.plainTrie !== null ) { - selfie.push(bidiTrie.compileOne(this.plainTrie)); + return [ this.fid, this.n, super.toSelfie() ]; + } + + static fromSelfie(args, bucket) { + if ( bucket === undefined ) { + bucket = new FilterBucket(args[1]); } - return selfie; + return super.fromSelfie(args[2], bucket); } optimize() { + if ( this.n >= 3 ) { + const f = this.optimizePatternTests(); + if ( f !== undefined ) { + if ( this.i === 0 ) { return f; } + this.unshift(filterUnitFromFilter(f)); + } + } + if ( this.n >= 10 ) { + const f = this.optimizeOriginHitTests(); + if ( f !== undefined ) { + if ( this.i === 0 ) { return f; } + this.unshift(filterUnitFromFilter(f)); + } + } + } + + optimizePatternTests() { const units = filterUnits; + const sequences = filterSequences; let n = 0; let i = this.i; do { - if ( units[filterSequences[i+0]].isBidiTrieable ) { n += 1; } - i = filterSequences[i+1]; + if ( units[sequences[i+0]].isBidiTrieable ) { n += 1; } + i = sequences[i+1]; } while ( i !== 0 && n < 3 ); if ( n < 3 ) { return; } - if ( this.plainTrie === null ) { - this.plainTrie = bidiTrie.createOne(); - } + const ftrie = new FilterPlainTrie(); i = this.i; let iprev = 0; for (;;) { - const iunit = filterSequences[i+0]; - const inext = filterSequences[i+1]; + const iunit = sequences[i+0]; + const inext = sequences[i+1]; if ( units[iunit].isBidiTrieable ) { - this._addToTrie(iunit); + ftrie.addUnitToTrie(iunit); if ( iprev !== 0 ) { - filterSequences[iprev+1] = inext; + sequences[iprev+1] = inext; } else { this.i = inext; } + this.n -= 1; } else { iprev = i; } if ( inext === 0 ) { break; } i = inext; } - if ( this.i === 0 ) { - return new FilterPlainTrie(this.plainTrie); - } + return ftrie; } - _addToTrie(iunit) { - const f = filterUnits[iunit]; - const trieDetails = f.toBidiTrie(); - const id = this.plainTrie.add( - trieDetails.i, - trieDetails.n, - trieDetails.itok - ); - // No point storing a pattern with conditions if the bidi-trie already - // contain a pattern with no conditions. - let ix = this.plainTrie.getExtra(id); - if ( ix === 1 ) { - filterUnits[iunit] = null; - return; - } - // If the newly stored pattern has no condition, shortcut existing - // ones since they will always be short-circuited by the - // condition-less pattern. - if ( f instanceof FilterPatternPlain ) { - this.plainTrie.setExtra(id, 1); - filterUnits[iunit] = null; - return; - } - // FilterCompositeAll is assumed here, i.e. with conditions. - if ( f.n === 1 ) { - filterUnits[iunit] = null; - iunit = filterSequences[f.i]; - } - this.plainTrie.setExtra(id, filterSequenceAdd(iunit, ix)); - } - - static fromSelfie(args) { - const bucket = FilterCollection.fromSelfie(FilterBucket, args); - if ( args.length > 2 && Array.isArray(args[2]) ) { - bucket.plainTrie = bidiTrie.createOne(args[2]); + optimizeOriginHitTests() { + const units = filterUnits; + let candidateCount = -10; + const shouldPreTest = this.forEach(iunit => { + if ( units[iunit].hasOriginHit !== true ) { return; } + candidateCount += 1; + if ( candidateCount === 0 ) { return true; } + }); + if ( shouldPreTest !== true ) { return; } + const sequences = filterSequences; + const bucket = new FilterBucketOfOriginHits(); + const domainOpts = []; + let i = this.i; + let iprev = 0; + for (;;) { + const iunit = sequences[i+0]; + const inext = sequences[i+1]; + const f = units[iunit]; + if ( f.hasOriginHit ) { + domainOpts.push(f.domainOpt); + bucket.unshift(iunit); + if ( iprev !== 0 ) { + sequences[iprev+1] = inext; + } else { + this.i = inext; + } + this.n -= 1; + } else { + iprev = i; + } + if ( inext === 0 ) { break; } + i = inext; } + bucket.originTestUnit = + filterFromCtor(FilterOriginHitSet, domainOpts.join('|')); return bucket; } }; -FilterBucket.prototype.plainTrie = null; -FilterBucket.prototype.$matchedUnit = 0; -FilterBucket.prototype.$matchedTrie = false; - registerFilterClass(FilterBucket); /******************************************************************************/ +const FilterBucketOfOriginHits = class extends FilterBucket { + constructor(i = 0) { + super(); + this.originTestUnit = i; + } + + match() { + return filterUnits[this.originTestUnit].match() && super.match(); + } + + matchAndFetchModifiers(env) { + if ( filterUnits[this.originTestUnit].match() ) { + super.matchAndFetchModifiers(env); + } + } + + toSelfie() { + return [ this.fid, this.originTestUnit, super.toSelfie() ]; + } + + static fromSelfie(args) { + const bucket = new FilterBucketOfOriginHits(args[1]); + return super.fromSelfie(args[2], bucket); + } +}; + +registerFilterClass(FilterBucketOfOriginHits); + +/******************************************************************************/ + const FILTER_UNITS_MIN = filterUnits.length; const FILTER_SEQUENCES_MIN = filterSequenceWritePtr; @@ -2438,7 +2558,13 @@ const FilterParser = class { this.typeBits &= ~this.notTypes; if ( this.typeBits === 0 ) { return false; } } - + // CSP directives implicitly apply only to document/subdocument. + if ( this.modifyType === parser.OPTTokenCsp ) { + if ( this.typeBits === 0 ) { + this.parseTypeOption(parser.OPTTokenDoc, false); + this.parseTypeOption(parser.OPTTokenFrame, false); + } + } // https://github.com/gorhill/uBlock/issues/2283 // Abort if type is only for unsupported types, otherwise // toggle off `unsupported` bit. @@ -2596,7 +2722,7 @@ const FilterParser = class { ) { continue; } - if ( token.startsWith('b') ) { + if ( token.charCodeAt(0) === 0x62 /* 'b' */ ) { const match = /\\+$/.exec(prefix); if ( match !== null && (match[0].length & 1) !== 0 ) { prefix += 'b'; @@ -3212,6 +3338,13 @@ FilterContainer.prototype.matchAndFetchModifiers = function( ? this.categories.get(catBits11) : undefined; + if ( + bucket00 === undefined && bucket01 === undefined && + bucket10 === undefined && bucket11 === undefined + ) { + return; + } + const results = []; const env = { modifier: vAPI.StaticFilteringParser.netOptionTokenIds.get(modifierType) || 0, @@ -3763,6 +3896,7 @@ FilterContainer.prototype.benchmark = async function(action, target) { fctxt.setURL(request.url); fctxt.setDocOriginFromURL(request.frameUrl); fctxt.setType(request.cpt); + this.redirectURL = undefined; const r = this.matchString(fctxt); matchCount += 1; if ( recorded !== undefined ) { recorded.push(r); }