mirror of
https://github.com/gorhill/uBlock.git
synced 2024-09-15 07:22:28 +02:00
Replace FilterPlainHostname with composite filter
FilterPlainHostname, an atomic filter unit, has been removed and is being replaced with a composite filter made of a pattern filter and a filter which test hostname boundaries. Doing so enables filters formerly being represented by FilterPlainHostname to be now represented as a plain pattern, and thus to be potentially stored in a bidi-trie. Comparing the new filter histogram with the previous one: FilterPatternPlain 24612 26432 1820 FilterComposite 17656 17125 -531 FilterPlainTrie Content 12977 13519 542 FilterPlainHostname 2904 0 -2904 FilterBucket 2121 1961 -160 FilterPlainTrie 1418 1578 160 Which means: - An extra 542 patterns could be stored in bidi-tries - There are 531 less composite filters needed - An extra 160 buckets could be aggregated into 160 bidi-trie Memory-wise, it's a marginal gain (as per Chromium's Javascript VM instance figure) -- i.e. not worth talking about). CPU-wise, no measurable difference. The benefit is that I consider this conceptually simplifies slightly the static network filtering code base.
This commit is contained in:
parent
edc55034d7
commit
f060bb5382
@ -135,7 +135,7 @@ const µBlock = (( ) => { // jshint ignore:line
|
||||
|
||||
// Read-only
|
||||
systemSettings: {
|
||||
compiledMagic: 26, // Increase when compiled format changes
|
||||
compiledMagic: 27, // Increase when compiled format changes
|
||||
selfieMagic: 26, // Increase when selfie format changes
|
||||
},
|
||||
|
||||
|
@ -864,45 +864,7 @@ registerFilterClass(FilterPatternGeneric);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterPlainHostname = class {
|
||||
constructor(s) {
|
||||
this.s = s;
|
||||
}
|
||||
|
||||
match() {
|
||||
if ( $requestHostname.endsWith(this.s) === false ) { return false; }
|
||||
const offset = $requestHostname.length - this.s.length;
|
||||
return offset === 0 ||
|
||||
$requestHostname.charCodeAt(offset - 1) === 0x2E /* '.' */;
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
details.pattern.push('||', this.s, '^');
|
||||
details.regex.push(restrFromPlainPattern(this.s), restrSeparator);
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [ this.fid, this.s ];
|
||||
}
|
||||
|
||||
static compile(details) {
|
||||
return [ FilterPlainHostname.fid, details.f ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return new FilterPlainHostname(args[1]);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return new FilterPlainHostname(args[1]);
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterPlainHostname);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterAnchorHn = class {
|
||||
const FilterAnchorHnLeft = class {
|
||||
constructor() {
|
||||
this.lastLen = 0;
|
||||
this.lastBeg = -1;
|
||||
@ -952,6 +914,41 @@ const FilterAnchorHn = class {
|
||||
return [ this.fid ];
|
||||
}
|
||||
|
||||
static compile() {
|
||||
return [ FilterAnchorHnLeft.fid ];
|
||||
}
|
||||
|
||||
static fromCompiled() {
|
||||
return new FilterAnchorHnLeft();
|
||||
}
|
||||
|
||||
static fromSelfie() {
|
||||
return new FilterAnchorHnLeft();
|
||||
}
|
||||
|
||||
static keyFromArgs() {
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterAnchorHnLeft);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterAnchorHn = class extends FilterAnchorHnLeft {
|
||||
match() {
|
||||
return super.match() && this.lastEnd === $patternMatchRight;
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
super.logData(details);
|
||||
details.pattern.push('^');
|
||||
details.regex.push(restrSeparator);
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [ this.fid ];
|
||||
}
|
||||
|
||||
static compile() {
|
||||
return [ FilterAnchorHn.fid ];
|
||||
}
|
||||
@ -2468,7 +2465,7 @@ const FilterParser = class {
|
||||
|
||||
// hostname-anchored
|
||||
if ( s.startsWith('||') ) {
|
||||
this.anchor |= 0x4;
|
||||
this.anchor |= 0b100;
|
||||
s = s.slice(2);
|
||||
|
||||
// convert hostname to punycode if needed
|
||||
@ -3018,35 +3015,28 @@ FilterContainer.prototype.compile = function(raw, writer) {
|
||||
|
||||
const units = [];
|
||||
|
||||
// Pattern
|
||||
if ( parsed.isPureHostname ) {
|
||||
parsed.anchor = 0;
|
||||
units.push(FilterPlainHostname.compile(parsed));
|
||||
} else if ( parsed.isJustOrigin() ) {
|
||||
// Special pattern/option cases:
|
||||
// - `*$domain=...`
|
||||
// - `|http://$domain=...`
|
||||
// - `|https://$domain=...`
|
||||
if ( parsed.isJustOrigin() ) {
|
||||
const hostnames = parsed.domainOpt.split('|');
|
||||
if ( parsed.f === '*' ) {
|
||||
parsed.tokenHash = this.anyTokenHash;
|
||||
for ( const hn of hostnames ) {
|
||||
this.compileToAtomicFilter(parsed, hn, writer);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if ( parsed.f.startsWith('https') ) {
|
||||
} else if /* 'https:' */ ( parsed.f.startsWith('https') ) {
|
||||
parsed.tokenHash = this.anyHTTPSTokenHash;
|
||||
for ( const hn of hostnames ) {
|
||||
this.compileToAtomicFilter(parsed, hn, writer);
|
||||
}
|
||||
return true;
|
||||
} else /* 'http:' */ {
|
||||
parsed.tokenHash = this.anyHTTPTokenHash;
|
||||
}
|
||||
parsed.tokenHash = this.anyHTTPTokenHash;
|
||||
for ( const hn of hostnames ) {
|
||||
this.compileToAtomicFilter(parsed, hn, writer);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
filterPattern.compile(parsed, units);
|
||||
}
|
||||
|
||||
// Pattern
|
||||
filterPattern.compile(parsed, units);
|
||||
|
||||
// Type
|
||||
// EXPERIMENT: $requestTypeBit
|
||||
//if ( (parsed.typeBits & allNetworkTypesBits) !== 0 ) {
|
||||
@ -3056,7 +3046,11 @@ FilterContainer.prototype.compile = function(raw, writer) {
|
||||
|
||||
// Anchor
|
||||
if ( (parsed.anchor & 0b100) !== 0 ) {
|
||||
units.push(FilterAnchorHn.compile());
|
||||
if ( parsed.isPureHostname ) {
|
||||
units.push(FilterAnchorHn.compile());
|
||||
} else {
|
||||
units.push(FilterAnchorHnLeft.compile());
|
||||
}
|
||||
} else if ( (parsed.anchor & 0b010) !== 0 ) {
|
||||
units.push(FilterAnchorLeft.compile());
|
||||
}
|
||||
@ -3668,63 +3662,37 @@ FilterContainer.prototype.bucketHistogram = function() {
|
||||
|
||||
With default filter lists:
|
||||
|
||||
As of 2019-04-25:
|
||||
As of 2020-05-15:
|
||||
|
||||
{"FilterPlainHnAnchored" => 11078}
|
||||
{"FilterPlainPrefix1" => 7195}
|
||||
{"FilterPrefix1Trie" => 5720}
|
||||
{"FilterOriginHit" => 3561}
|
||||
{"FilterWildcard2HnAnchored" => 2943}
|
||||
{"FilterPair" => 2391}
|
||||
{"FilterBucket" => 1922}
|
||||
{"FilterWildcard1HnAnchored" => 1910}
|
||||
{"FilterHnAnchoredTrie" => 1586}
|
||||
{"FilterPlainHostname" => 1391}
|
||||
{"FilterOriginHitSet" => 1155}
|
||||
{"FilterPlain" => 634}
|
||||
{"FilterWildcard1" => 423}
|
||||
{"FilterGenericHnAnchored" => 389}
|
||||
{"FilterOriginMiss" => 302}
|
||||
{"FilterGeneric" => 163}
|
||||
{"FilterOriginMissSet" => 150}
|
||||
{"FilterRegex" => 124}
|
||||
{"FilterPlainRightAnchored" => 110}
|
||||
{"FilterGenericHnAndRightAnchored" => 95}
|
||||
{"FilterHostnameDict" => 59}
|
||||
{"FilterPlainLeftAnchored" => 30}
|
||||
{"FilterJustOrigin" => 22}
|
||||
{"FilterHTTPJustOrigin" => 19}
|
||||
{"FilterHTTPSJustOrigin" => 18}
|
||||
{"FilterExactMatch" => 5}
|
||||
{"FilterOriginMixedSet" => 3}
|
||||
|
||||
As of 2019-10-21:
|
||||
|
||||
"FilterPatternPlain" => 27542}
|
||||
"FilterComposite" => 17249}
|
||||
"FilterPlainTrie" => 13235}
|
||||
"FilterAnchorHn" => 11938}
|
||||
"FilterPatternRightEx" => 4446}
|
||||
"FilterOriginHit" => 4435}
|
||||
"FilterBucket" => 3833}
|
||||
"FilterPatternRight" => 3426}
|
||||
"FilterPlainHostname" => 2786}
|
||||
"FilterOriginHitSet" => 1433}
|
||||
"FilterDataHolder" => 666}
|
||||
"FilterPatternGeneric" => 548}
|
||||
"FilterOriginMiss" => 441}
|
||||
"FilterOriginMissSet" => 208}
|
||||
"FilterTrailingSeparator" => 188}
|
||||
"FilterRegex" => 181}
|
||||
"FilterPatternLeft" => 172}
|
||||
"FilterAnchorRight" => 100}
|
||||
"FilterPatternLeftEx" => 82}
|
||||
"FilterHostnameDict" => 60}
|
||||
"FilterAnchorLeft" => 50}
|
||||
"FilterJustOrigin" => 24}
|
||||
"FilterHTTPJustOrigin" => 18}
|
||||
"FilterTrue" => 17}
|
||||
"FilterHTTPSJustOrigin" => 17}
|
||||
"FilterHostnameDict" Content => 60772}
|
||||
"FilterPatternPlain" => 26432}
|
||||
"FilterComposite" => 17125}
|
||||
"FilterPlainTrie Content" => 13519}
|
||||
"FilterAnchorHnLeft" => 11931}
|
||||
"FilterOriginHit" => 5524}
|
||||
"FilterPatternRight" => 3376}
|
||||
"FilterPatternRightEx" => 3130}
|
||||
"FilterBucket" => 1961}
|
||||
"FilterPlainTrie" => 1578}
|
||||
"FilterOriginHitSet" => 1475}
|
||||
"FilterAnchorHn" => 1453}
|
||||
"FilterOriginMiss" => 730}
|
||||
"FilterPatternGeneric" => 601}
|
||||
"FilterDataHolder" => 404}
|
||||
"FilterOriginMissSet" => 316}
|
||||
"FilterTrailingSeparator" => 235}
|
||||
"FilterAnchorRight" => 174}
|
||||
"FilterPatternLeft" => 164}
|
||||
"FilterRegex" => 125}
|
||||
"FilterPatternLeftEx" => 68}
|
||||
"FilterHostnameDict" => 62}
|
||||
"FilterAnchorLeft" => 51}
|
||||
"FilterJustOrigin" => 25}
|
||||
"FilterTrue" => 18}
|
||||
"FilterHTTPSJustOrigin" => 16}
|
||||
"FilterHTTPJustOrigin" => 16}
|
||||
"FilterType" => 0}
|
||||
"FilterDenyAllow" => 0}
|
||||
|
||||
*/
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user