From f060bb53827051d46c6773ae339ca0c8620b968b Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Fri, 15 May 2020 11:00:16 -0400 Subject: [PATCH] Replace FilterPlainHostname with composite filter FilterPlainHostname, an atomic filter unit, has been removed and is being replaced with a composite filter made of a pattern filter and a filter which test hostname boundaries. Doing so enables filters formerly being represented by FilterPlainHostname to be now represented as a plain pattern, and thus to be potentially stored in a bidi-trie. Comparing the new filter histogram with the previous one: FilterPatternPlain 24612 26432 1820 FilterComposite 17656 17125 -531 FilterPlainTrie Content 12977 13519 542 FilterPlainHostname 2904 0 -2904 FilterBucket 2121 1961 -160 FilterPlainTrie 1418 1578 160 Which means: - An extra 542 patterns could be stored in bidi-tries - There are 531 less composite filters needed - An extra 160 buckets could be aggregated into 160 bidi-trie Memory-wise, it's a marginal gain (as per Chromium's Javascript VM instance figure) -- i.e. not worth talking about). CPU-wise, no measurable difference. The benefit is that I consider this conceptually simplifies slightly the static network filtering code base. --- src/js/background.js | 2 +- src/js/static-net-filtering.js | 198 ++++++++++++++------------------- 2 files changed, 84 insertions(+), 116 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index 433ecedf4..5d82da6ad 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -135,7 +135,7 @@ const µBlock = (( ) => { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 26, // Increase when compiled format changes + compiledMagic: 27, // Increase when compiled format changes selfieMagic: 26, // Increase when selfie format changes }, diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 8c75be414..b56720886 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -864,45 +864,7 @@ registerFilterClass(FilterPatternGeneric); /******************************************************************************/ -const FilterPlainHostname = class { - constructor(s) { - this.s = s; - } - - match() { - if ( $requestHostname.endsWith(this.s) === false ) { return false; } - const offset = $requestHostname.length - this.s.length; - return offset === 0 || - $requestHostname.charCodeAt(offset - 1) === 0x2E /* '.' */; - } - - logData(details) { - details.pattern.push('||', this.s, '^'); - details.regex.push(restrFromPlainPattern(this.s), restrSeparator); - } - - toSelfie() { - return [ this.fid, this.s ]; - } - - static compile(details) { - return [ FilterPlainHostname.fid, details.f ]; - } - - static fromCompiled(args) { - return new FilterPlainHostname(args[1]); - } - - static fromSelfie(args) { - return new FilterPlainHostname(args[1]); - } -}; - -registerFilterClass(FilterPlainHostname); - -/******************************************************************************/ - -const FilterAnchorHn = class { +const FilterAnchorHnLeft = class { constructor() { this.lastLen = 0; this.lastBeg = -1; @@ -952,6 +914,41 @@ const FilterAnchorHn = class { return [ this.fid ]; } + static compile() { + return [ FilterAnchorHnLeft.fid ]; + } + + static fromCompiled() { + return new FilterAnchorHnLeft(); + } + + static fromSelfie() { + return new FilterAnchorHnLeft(); + } + + static keyFromArgs() { + } +}; + +registerFilterClass(FilterAnchorHnLeft); + +/******************************************************************************/ + +const FilterAnchorHn = class extends FilterAnchorHnLeft { + match() { + return super.match() && this.lastEnd === $patternMatchRight; + } + + logData(details) { + super.logData(details); + details.pattern.push('^'); + details.regex.push(restrSeparator); + } + + toSelfie() { + return [ this.fid ]; + } + static compile() { return [ FilterAnchorHn.fid ]; } @@ -2468,7 +2465,7 @@ const FilterParser = class { // hostname-anchored if ( s.startsWith('||') ) { - this.anchor |= 0x4; + this.anchor |= 0b100; s = s.slice(2); // convert hostname to punycode if needed @@ -3018,35 +3015,28 @@ FilterContainer.prototype.compile = function(raw, writer) { const units = []; - // Pattern - if ( parsed.isPureHostname ) { - parsed.anchor = 0; - units.push(FilterPlainHostname.compile(parsed)); - } else if ( parsed.isJustOrigin() ) { + // Special pattern/option cases: + // - `*$domain=...` + // - `|http://$domain=...` + // - `|https://$domain=...` + if ( parsed.isJustOrigin() ) { const hostnames = parsed.domainOpt.split('|'); if ( parsed.f === '*' ) { parsed.tokenHash = this.anyTokenHash; - for ( const hn of hostnames ) { - this.compileToAtomicFilter(parsed, hn, writer); - } - return true; - } - if ( parsed.f.startsWith('https') ) { + } else if /* 'https:' */ ( parsed.f.startsWith('https') ) { parsed.tokenHash = this.anyHTTPSTokenHash; - for ( const hn of hostnames ) { - this.compileToAtomicFilter(parsed, hn, writer); - } - return true; + } else /* 'http:' */ { + parsed.tokenHash = this.anyHTTPTokenHash; } - parsed.tokenHash = this.anyHTTPTokenHash; for ( const hn of hostnames ) { this.compileToAtomicFilter(parsed, hn, writer); } return true; - } else { - filterPattern.compile(parsed, units); } + // Pattern + filterPattern.compile(parsed, units); + // Type // EXPERIMENT: $requestTypeBit //if ( (parsed.typeBits & allNetworkTypesBits) !== 0 ) { @@ -3056,7 +3046,11 @@ FilterContainer.prototype.compile = function(raw, writer) { // Anchor if ( (parsed.anchor & 0b100) !== 0 ) { - units.push(FilterAnchorHn.compile()); + if ( parsed.isPureHostname ) { + units.push(FilterAnchorHn.compile()); + } else { + units.push(FilterAnchorHnLeft.compile()); + } } else if ( (parsed.anchor & 0b010) !== 0 ) { units.push(FilterAnchorLeft.compile()); } @@ -3668,63 +3662,37 @@ FilterContainer.prototype.bucketHistogram = function() { With default filter lists: - As of 2019-04-25: + As of 2020-05-15: - {"FilterPlainHnAnchored" => 11078} - {"FilterPlainPrefix1" => 7195} - {"FilterPrefix1Trie" => 5720} - {"FilterOriginHit" => 3561} - {"FilterWildcard2HnAnchored" => 2943} - {"FilterPair" => 2391} - {"FilterBucket" => 1922} - {"FilterWildcard1HnAnchored" => 1910} - {"FilterHnAnchoredTrie" => 1586} - {"FilterPlainHostname" => 1391} - {"FilterOriginHitSet" => 1155} - {"FilterPlain" => 634} - {"FilterWildcard1" => 423} - {"FilterGenericHnAnchored" => 389} - {"FilterOriginMiss" => 302} - {"FilterGeneric" => 163} - {"FilterOriginMissSet" => 150} - {"FilterRegex" => 124} - {"FilterPlainRightAnchored" => 110} - {"FilterGenericHnAndRightAnchored" => 95} - {"FilterHostnameDict" => 59} - {"FilterPlainLeftAnchored" => 30} - {"FilterJustOrigin" => 22} - {"FilterHTTPJustOrigin" => 19} - {"FilterHTTPSJustOrigin" => 18} - {"FilterExactMatch" => 5} - {"FilterOriginMixedSet" => 3} - - As of 2019-10-21: - - "FilterPatternPlain" => 27542} - "FilterComposite" => 17249} - "FilterPlainTrie" => 13235} - "FilterAnchorHn" => 11938} - "FilterPatternRightEx" => 4446} - "FilterOriginHit" => 4435} - "FilterBucket" => 3833} - "FilterPatternRight" => 3426} - "FilterPlainHostname" => 2786} - "FilterOriginHitSet" => 1433} - "FilterDataHolder" => 666} - "FilterPatternGeneric" => 548} - "FilterOriginMiss" => 441} - "FilterOriginMissSet" => 208} - "FilterTrailingSeparator" => 188} - "FilterRegex" => 181} - "FilterPatternLeft" => 172} - "FilterAnchorRight" => 100} - "FilterPatternLeftEx" => 82} - "FilterHostnameDict" => 60} - "FilterAnchorLeft" => 50} - "FilterJustOrigin" => 24} - "FilterHTTPJustOrigin" => 18} - "FilterTrue" => 17} - "FilterHTTPSJustOrigin" => 17} + "FilterHostnameDict" Content => 60772} + "FilterPatternPlain" => 26432} + "FilterComposite" => 17125} + "FilterPlainTrie Content" => 13519} + "FilterAnchorHnLeft" => 11931} + "FilterOriginHit" => 5524} + "FilterPatternRight" => 3376} + "FilterPatternRightEx" => 3130} + "FilterBucket" => 1961} + "FilterPlainTrie" => 1578} + "FilterOriginHitSet" => 1475} + "FilterAnchorHn" => 1453} + "FilterOriginMiss" => 730} + "FilterPatternGeneric" => 601} + "FilterDataHolder" => 404} + "FilterOriginMissSet" => 316} + "FilterTrailingSeparator" => 235} + "FilterAnchorRight" => 174} + "FilterPatternLeft" => 164} + "FilterRegex" => 125} + "FilterPatternLeftEx" => 68} + "FilterHostnameDict" => 62} + "FilterAnchorLeft" => 51} + "FilterJustOrigin" => 25} + "FilterTrue" => 18} + "FilterHTTPSJustOrigin" => 16} + "FilterHTTPJustOrigin" => 16} + "FilterType" => 0} + "FilterDenyAllow" => 0} */