1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-15 07:22:28 +02:00

Replace FilterPlainHostname with composite filter

FilterPlainHostname, an atomic filter unit, has been
removed and is being replaced with a composite filter
made of a pattern filter and a filter which test
hostname boundaries.

Doing so enables filters formerly being represented
by FilterPlainHostname to be now represented as a
plain pattern, and thus to be potentially stored in
a bidi-trie.

Comparing the new filter histogram with the previous
one:

FilterPatternPlain      24612   26432    1820
FilterComposite         17656   17125    -531
FilterPlainTrie Content 12977   13519     542
FilterPlainHostname      2904       0   -2904
FilterBucket             2121    1961    -160
FilterPlainTrie          1418    1578     160

Which means:
- An extra 542 patterns could be stored in bidi-tries
- There are 531 less composite filters needed
- An extra 160 buckets could be aggregated into 160
  bidi-trie

Memory-wise, it's a marginal gain (as per Chromium's
Javascript VM instance figure) -- i.e. not worth
talking about). CPU-wise, no measurable difference.

The benefit is that I consider this conceptually
simplifies slightly the static network filtering
code base.
This commit is contained in:
Raymond Hill 2020-05-15 11:00:16 -04:00
parent edc55034d7
commit f060bb5382
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 84 additions and 116 deletions

View File

@ -135,7 +135,7 @@ const µBlock = (( ) => { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 26, // Increase when compiled format changes
compiledMagic: 27, // Increase when compiled format changes
selfieMagic: 26, // Increase when selfie format changes
},

View File

@ -864,45 +864,7 @@ registerFilterClass(FilterPatternGeneric);
/******************************************************************************/
const FilterPlainHostname = class {
constructor(s) {
this.s = s;
}
match() {
if ( $requestHostname.endsWith(this.s) === false ) { return false; }
const offset = $requestHostname.length - this.s.length;
return offset === 0 ||
$requestHostname.charCodeAt(offset - 1) === 0x2E /* '.' */;
}
logData(details) {
details.pattern.push('||', this.s, '^');
details.regex.push(restrFromPlainPattern(this.s), restrSeparator);
}
toSelfie() {
return [ this.fid, this.s ];
}
static compile(details) {
return [ FilterPlainHostname.fid, details.f ];
}
static fromCompiled(args) {
return new FilterPlainHostname(args[1]);
}
static fromSelfie(args) {
return new FilterPlainHostname(args[1]);
}
};
registerFilterClass(FilterPlainHostname);
/******************************************************************************/
const FilterAnchorHn = class {
const FilterAnchorHnLeft = class {
constructor() {
this.lastLen = 0;
this.lastBeg = -1;
@ -952,6 +914,41 @@ const FilterAnchorHn = class {
return [ this.fid ];
}
static compile() {
return [ FilterAnchorHnLeft.fid ];
}
static fromCompiled() {
return new FilterAnchorHnLeft();
}
static fromSelfie() {
return new FilterAnchorHnLeft();
}
static keyFromArgs() {
}
};
registerFilterClass(FilterAnchorHnLeft);
/******************************************************************************/
const FilterAnchorHn = class extends FilterAnchorHnLeft {
match() {
return super.match() && this.lastEnd === $patternMatchRight;
}
logData(details) {
super.logData(details);
details.pattern.push('^');
details.regex.push(restrSeparator);
}
toSelfie() {
return [ this.fid ];
}
static compile() {
return [ FilterAnchorHn.fid ];
}
@ -2468,7 +2465,7 @@ const FilterParser = class {
// hostname-anchored
if ( s.startsWith('||') ) {
this.anchor |= 0x4;
this.anchor |= 0b100;
s = s.slice(2);
// convert hostname to punycode if needed
@ -3018,35 +3015,28 @@ FilterContainer.prototype.compile = function(raw, writer) {
const units = [];
// Pattern
if ( parsed.isPureHostname ) {
parsed.anchor = 0;
units.push(FilterPlainHostname.compile(parsed));
} else if ( parsed.isJustOrigin() ) {
// Special pattern/option cases:
// - `*$domain=...`
// - `|http://$domain=...`
// - `|https://$domain=...`
if ( parsed.isJustOrigin() ) {
const hostnames = parsed.domainOpt.split('|');
if ( parsed.f === '*' ) {
parsed.tokenHash = this.anyTokenHash;
for ( const hn of hostnames ) {
this.compileToAtomicFilter(parsed, hn, writer);
}
return true;
}
if ( parsed.f.startsWith('https') ) {
} else if /* 'https:' */ ( parsed.f.startsWith('https') ) {
parsed.tokenHash = this.anyHTTPSTokenHash;
for ( const hn of hostnames ) {
this.compileToAtomicFilter(parsed, hn, writer);
}
return true;
} else /* 'http:' */ {
parsed.tokenHash = this.anyHTTPTokenHash;
}
parsed.tokenHash = this.anyHTTPTokenHash;
for ( const hn of hostnames ) {
this.compileToAtomicFilter(parsed, hn, writer);
}
return true;
} else {
filterPattern.compile(parsed, units);
}
// Pattern
filterPattern.compile(parsed, units);
// Type
// EXPERIMENT: $requestTypeBit
//if ( (parsed.typeBits & allNetworkTypesBits) !== 0 ) {
@ -3056,7 +3046,11 @@ FilterContainer.prototype.compile = function(raw, writer) {
// Anchor
if ( (parsed.anchor & 0b100) !== 0 ) {
units.push(FilterAnchorHn.compile());
if ( parsed.isPureHostname ) {
units.push(FilterAnchorHn.compile());
} else {
units.push(FilterAnchorHnLeft.compile());
}
} else if ( (parsed.anchor & 0b010) !== 0 ) {
units.push(FilterAnchorLeft.compile());
}
@ -3668,63 +3662,37 @@ FilterContainer.prototype.bucketHistogram = function() {
With default filter lists:
As of 2019-04-25:
As of 2020-05-15:
{"FilterPlainHnAnchored" => 11078}
{"FilterPlainPrefix1" => 7195}
{"FilterPrefix1Trie" => 5720}
{"FilterOriginHit" => 3561}
{"FilterWildcard2HnAnchored" => 2943}
{"FilterPair" => 2391}
{"FilterBucket" => 1922}
{"FilterWildcard1HnAnchored" => 1910}
{"FilterHnAnchoredTrie" => 1586}
{"FilterPlainHostname" => 1391}
{"FilterOriginHitSet" => 1155}
{"FilterPlain" => 634}
{"FilterWildcard1" => 423}
{"FilterGenericHnAnchored" => 389}
{"FilterOriginMiss" => 302}
{"FilterGeneric" => 163}
{"FilterOriginMissSet" => 150}
{"FilterRegex" => 124}
{"FilterPlainRightAnchored" => 110}
{"FilterGenericHnAndRightAnchored" => 95}
{"FilterHostnameDict" => 59}
{"FilterPlainLeftAnchored" => 30}
{"FilterJustOrigin" => 22}
{"FilterHTTPJustOrigin" => 19}
{"FilterHTTPSJustOrigin" => 18}
{"FilterExactMatch" => 5}
{"FilterOriginMixedSet" => 3}
As of 2019-10-21:
"FilterPatternPlain" => 27542}
"FilterComposite" => 17249}
"FilterPlainTrie" => 13235}
"FilterAnchorHn" => 11938}
"FilterPatternRightEx" => 4446}
"FilterOriginHit" => 4435}
"FilterBucket" => 3833}
"FilterPatternRight" => 3426}
"FilterPlainHostname" => 2786}
"FilterOriginHitSet" => 1433}
"FilterDataHolder" => 666}
"FilterPatternGeneric" => 548}
"FilterOriginMiss" => 441}
"FilterOriginMissSet" => 208}
"FilterTrailingSeparator" => 188}
"FilterRegex" => 181}
"FilterPatternLeft" => 172}
"FilterAnchorRight" => 100}
"FilterPatternLeftEx" => 82}
"FilterHostnameDict" => 60}
"FilterAnchorLeft" => 50}
"FilterJustOrigin" => 24}
"FilterHTTPJustOrigin" => 18}
"FilterTrue" => 17}
"FilterHTTPSJustOrigin" => 17}
"FilterHostnameDict" Content => 60772}
"FilterPatternPlain" => 26432}
"FilterComposite" => 17125}
"FilterPlainTrie Content" => 13519}
"FilterAnchorHnLeft" => 11931}
"FilterOriginHit" => 5524}
"FilterPatternRight" => 3376}
"FilterPatternRightEx" => 3130}
"FilterBucket" => 1961}
"FilterPlainTrie" => 1578}
"FilterOriginHitSet" => 1475}
"FilterAnchorHn" => 1453}
"FilterOriginMiss" => 730}
"FilterPatternGeneric" => 601}
"FilterDataHolder" => 404}
"FilterOriginMissSet" => 316}
"FilterTrailingSeparator" => 235}
"FilterAnchorRight" => 174}
"FilterPatternLeft" => 164}
"FilterRegex" => 125}
"FilterPatternLeftEx" => 68}
"FilterHostnameDict" => 62}
"FilterAnchorLeft" => 51}
"FilterJustOrigin" => 25}
"FilterTrue" => 18}
"FilterHTTPSJustOrigin" => 16}
"FilterHTTPJustOrigin" => 16}
"FilterType" => 0}
"FilterDenyAllow" => 0}
*/