1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-09 12:22:33 +01:00

code review for static extended filtering, notably:

- use domain-derived integer hash to store filters

- remove code meant for firefox/legacy

- properly handle subdomains of entity-based filters
This commit is contained in:
Raymond Hill 2018-09-09 08:10:09 -04:00
parent 4682a33121
commit 06fe7e6871
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
5 changed files with 160 additions and 179 deletions

View File

@ -139,7 +139,7 @@ var µBlock = (function() { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 4, // Increase when compiled format changes compiledMagic: 5, // Increase when compiled format changes
selfieMagic: 4 // Increase when selfie format changes selfieMagic: 4 // Increase when selfie format changes
}, },

View File

@ -338,51 +338,6 @@ SelectorCacheEntry.prototype = {
/******************************************************************************/ /******************************************************************************/
/******************************************************************************/ /******************************************************************************/
// HHHHHHHHHHHH0000
// | |
// | |
// | +-- bit 3-0: reserved: 0=exception
// | 1=procedural
// +------ bit 15-4: FNV
let makeHash = function(token) {
// Based on: FNV32a
// http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
// The rest is custom, suited for uBlock.
let i1 = token.length;
let i2 = i1 >> 1;
let i4 = i1 >> 2;
let i8 = i1 >> 3;
let hval = (0x811c9dc5 ^ token.charCodeAt(0)) >>> 0;
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i1-1);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
return hval & 0xFFF0;
};
/******************************************************************************/
/******************************************************************************/
// Cosmetic filter family tree: // Cosmetic filter family tree:
// //
// Generic // Generic
@ -769,25 +724,19 @@ FilterContainer.prototype.compileSpecificSelector = function(
let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix); let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
if ( compiled === undefined ) { return; } if ( compiled === undefined ) { return; }
// https://github.com/chrisaljoudi/uBlock/issues/188 let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hostname);
// If not a real domain as per PSL, assign a synthetic one
let hash; // Exception?
if ( hostname.endsWith('.*') === false ) {
let domain = this.µburi.domainFromHostnameNoCache(hostname);
hash = domain !== '' ? makeHash(domain) : 0;
} else {
hash = makeHash(hostname);
}
if ( unhide === 1 ) { if ( unhide === 1 ) {
hash |= 0b01; hash |= 0b0001;
} }
writer.push([ // Procedural?
8, if ( compiled.charCodeAt(0) === 0x7B ) {
compiled.charCodeAt(0) !== 0x7B ? hash : hash | 0b10, hash |= 0b0010;
hostname, }
compiled
]); writer.push([ 8, hash, hostname, compiled ]);
}; };
/******************************************************************************/ /******************************************************************************/
@ -1268,8 +1217,9 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
if ( options.noCosmeticFiltering !== true ) { if ( options.noCosmeticFiltering !== true ) {
let entity = request.entity, let entity = request.entity,
domainHash = makeHash(request.domain), domainHash = µb.staticExtFilteringEngine.makeHash(request.domain),
entityHash = entity !== '' ? makeHash(entity) : undefined; entityHash = µb.staticExtFilteringEngine.makeHash(entity),
bucket;
// Exception cosmetic filters: prime with generic exception filters. // Exception cosmetic filters: prime with generic exception filters.
let exceptionSet = this.setRegister0; let exceptionSet = this.setRegister0;
@ -1278,32 +1228,34 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
exceptionSet.add(exception); exceptionSet.add(exception);
} }
// Specific exception cosmetic filters. // Specific exception cosmetic filters.
let bucket = this.specificFilters.get(domainHash | 0b01); if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0001);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet); bucket.retrieve(hostname, exceptionSet);
} }
bucket = this.specificFilters.get(domainHash | 0b11); bucket = this.specificFilters.get(domainHash | 0b0011);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet); bucket.retrieve(hostname, exceptionSet);
} }
}
// Specific entity-based exception cosmetic filters. // Specific entity-based exception cosmetic filters.
if ( entityHash !== undefined ) { if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b01); bucket = this.specificFilters.get(entityHash | 0b0001);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet); bucket.retrieve(entity, exceptionSet);
} }
bucket = this.specificFilters.get(entityHash | 0b11); bucket = this.specificFilters.get(entityHash | 0b0011);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet); bucket.retrieve(entity, exceptionSet);
} }
} }
// Special bucket for those filters without a valid // Special bucket for those filters without a valid
// domain name as per PSL. // domain name as per PSL.
bucket = this.specificFilters.get(0b01); bucket = this.specificFilters.get(0 | 0b0001);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet); bucket.retrieve(hostname, exceptionSet);
} }
bucket = this.specificFilters.get(0b11); bucket = this.specificFilters.get(0 | 0b0011);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet); bucket.retrieve(hostname, exceptionSet);
} }
@ -1317,20 +1269,23 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// slightly content script code. // slightly content script code.
let specificSet = this.setRegister1; let specificSet = this.setRegister1;
// Specific cosmetic filters. // Specific cosmetic filters.
bucket = this.specificFilters.get(domainHash | 0b00); if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0000);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet); bucket.retrieve(hostname, specificSet);
} }
}
// Specific entity-based cosmetic filters. // Specific entity-based cosmetic filters.
if ( entityHash !== undefined ) { if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b00); bucket = this.specificFilters.get(entityHash | 0b0000);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(entity, specificSet); bucket.retrieve(entity, specificSet);
} }
} }
// https://github.com/chrisaljoudi/uBlock/issues/188 // https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name as per PSL // Special bucket for those filters without a valid domain name
bucket = this.specificFilters.get(0b00); // as per PSL
bucket = this.specificFilters.get(0 | 0b0000);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet); bucket.retrieve(hostname, specificSet);
} }
@ -1346,20 +1301,23 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// Procedural cosmetic filters. // Procedural cosmetic filters.
let proceduralSet = this.setRegister2; let proceduralSet = this.setRegister2;
// Specific cosmetic filters. // Specific cosmetic filters.
bucket = this.specificFilters.get(domainHash | 0b10); if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0010);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet); bucket.retrieve(hostname, proceduralSet);
} }
}
// Specific entity-based cosmetic filters. // Specific entity-based cosmetic filters.
if ( entityHash !== undefined ) { if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b10); bucket = this.specificFilters.get(entityHash | 0b0010);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(entity, proceduralSet); bucket.retrieve(entity, proceduralSet);
} }
} }
// https://github.com/chrisaljoudi/uBlock/issues/188 // https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name as per PSL // Special bucket for those filters without a valid domain name
bucket = this.specificFilters.get(0b10); // as per PSL
bucket = this.specificFilters.get(0 | 0b0010);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet); bucket.retrieve(hostname, proceduralSet);
} }

View File

@ -226,7 +226,7 @@
}; };
api.compile = function(parsed, writer) { api.compile = function(parsed, writer) {
var selector = parsed.suffix.slice(1).trim(), let selector = parsed.suffix.slice(1).trim(),
compiled = µb.staticExtFilteringEngine.compileSelector(selector); compiled = µb.staticExtFilteringEngine.compileSelector(selector);
if ( compiled === undefined ) { return; } if ( compiled === undefined ) { return; }
@ -235,13 +235,16 @@
// TODO: Mind negated hostnames, they are currently discarded. // TODO: Mind negated hostnames, they are currently discarded.
for ( var hostname of parsed.hostnames ) { for ( let hn of parsed.hostnames ) {
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { continue; } if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
var domain = µb.URI.domainFromHostname(hostname); let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn);
if ( parsed.exception ) {
hash |= 0b0001;
}
writer.push([ writer.push([
compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65, compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65,
parsed.exception ? '!' + domain : domain, hash,
hostname, hn,
compiled compiled
]); ]);
} }
@ -249,7 +252,7 @@
api.fromCompiledContent = function(reader) { api.fromCompiledContent = function(reader) {
// Don't bother loading filters if stream filtering is not supported. // Don't bother loading filters if stream filtering is not supported.
//if ( µb.canFilterResponseBody === false ) { return; } if ( µb.canFilterResponseBody === false ) { return; }
// 1002 = html filtering // 1002 = html filtering
reader.select(1002); reader.select(1002);
@ -272,7 +275,7 @@
}; };
api.retrieve = function(request) { api.retrieve = function(request) {
var hostname = request.hostname; let hostname = request.hostname;
// https://github.com/gorhill/uBlock/issues/2835 // https://github.com/gorhill/uBlock/issues/2835
// Do not filter if the site is under an `allow` rule. // Do not filter if the site is under an `allow` rule.
@ -283,12 +286,16 @@
return; return;
} }
var out = []; let out = [];
if ( request.domain !== '' ) { let domainHash = µb.staticExtFilteringEngine.makeHash(request.domain);
filterDB.retrieve(request.domain, hostname, out); if ( domainHash !== 0 ) {
filterDB.retrieve(request.entity, request.entity, out); filterDB.retrieve(domainHash, hostname, out);
} }
filterDB.retrieve('', hostname, out); let entityHash = µb.staticExtFilteringEngine.makeHash(request.entity);
if ( entityHash !== 0 ) {
filterDB.retrieve(entityHash, request.entity, out);
}
filterDB.retrieve(0, hostname, out);
// TODO: handle exceptions. // TODO: handle exceptions.
@ -326,53 +333,6 @@
pselectors.clear(); pselectors.clear();
}; };
// TODO: Following methods is useful only to legacy Firefox. This can be
// removed once support for legacy Firefox is dropped. The only care
// at this point is for the code to work, not to be efficient.
// Only `script:has-text` selectors are considered.
api.retrieveScriptTagHostnames = function() {
var out = new Set();
for ( var entry of filterDB ) {
if ( entry.type !== 65 ) { continue; }
var o = JSON.parse(entry.selector);
if (
o.tasks.length === 1 &&
o.tasks[0].length === 2 &&
o.tasks[0][0] === ':has-text'
) {
out.add(entry.hostname);
}
}
if ( out.size !== 0 ) {
return Array.from(out);
}
};
api.retrieveScriptTagRegex = function(domain, hostname) {
var entries = api.retrieve({
hostname: hostname,
domain: domain,
entity: µb.URI.entityFromDomain(domain)
});
if ( entries === undefined ) { return; }
var out = new Set();
for ( var entry of entries ) {
if ( entry.type !== 65 ) { continue; }
var o = JSON.parse(entry.selector);
if (
o.tasks.length === 1 &&
o.tasks[0].length === 2 &&
o.tasks[0][0] === ':has-text'
) {
out.add(o.tasks[0][1]);
}
}
if ( out.size !== 0 ) {
return Array.from(out).join('|');
}
};
Object.defineProperties(api, { Object.defineProperties(api, {
acceptedCount: { acceptedCount: {
get: function() { get: function() {

View File

@ -244,7 +244,7 @@
if ( parsed.hostnames.length === 0 ) { if ( parsed.hostnames.length === 0 ) {
if ( parsed.exception ) { if ( parsed.exception ) {
writer.push([ 32, '!', '', parsed.suffix ]); writer.push([ 32, 0 | 0b0001, '', parsed.suffix ]);
} }
return; return;
} }
@ -253,21 +253,19 @@
// Ignore instances of exception filter with negated hostnames, // Ignore instances of exception filter with negated hostnames,
// because there is no way to create an exception to an exception. // because there is no way to create an exception to an exception.
let µburi = µb.URI; for ( let hn of parsed.hostnames ) {
let negated = hn.charCodeAt(0) === 0x7E /* '~' */;
for ( let hostname of parsed.hostnames ) {
let negated = hostname.charCodeAt(0) === 0x7E /* '~' */;
if ( negated ) { if ( negated ) {
hostname = hostname.slice(1); hn = hn.slice(1);
} }
let hash = µburi.domainFromHostname(hostname); let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn);
if ( parsed.exception ) { if ( parsed.exception ) {
if ( negated ) { continue; } if ( negated ) { continue; }
hash = '!' + hash; hash |= 0b0001;
} else if ( negated ) { } else if ( negated ) {
hash = '!' + hash; hash |= 0b0001;
} }
writer.push([ 32, hash, hostname, parsed.suffix ]); writer.push([ 32, hash, hn, parsed.suffix ]);
} }
}; };
@ -301,10 +299,10 @@
if ( scriptletDB.size === 0 ) { return; } if ( scriptletDB.size === 0 ) { return; }
if ( µb.hiddenSettings.ignoreScriptInjectFilters ) { return; } if ( µb.hiddenSettings.ignoreScriptInjectFilters ) { return; }
var reng = µb.redirectEngine; let reng = µb.redirectEngine;
if ( !reng ) { return; } if ( !reng ) { return; }
var hostname = request.hostname; let hostname = request.hostname;
// https://github.com/gorhill/uBlock/issues/2835 // https://github.com/gorhill/uBlock/issues/2835
// Do not inject scriptlets if the site is under an `allow` rule. // Do not inject scriptlets if the site is under an `allow` rule.
@ -320,7 +318,7 @@
// https://github.com/gorhill/uBlock/issues/1954 // https://github.com/gorhill/uBlock/issues/1954
// Implicit // Implicit
var hn = hostname; let hn = hostname;
for (;;) { for (;;) {
lookupScriptlet(hn + '.js', reng, scriptletsRegister); lookupScriptlet(hn + '.js', reng, scriptletsRegister);
if ( hn === domain ) { break; } if ( hn === domain ) { break; }
@ -334,11 +332,15 @@
// Explicit // Explicit
let entries = []; let entries = [];
if ( domain !== '' ) { let domainHash = µb.staticExtFilteringEngine.makeHash(domain);
scriptletDB.retrieve(domain, hostname, entries); if ( domainHash !== 0 ) {
scriptletDB.retrieve(entity, entity, entries); scriptletDB.retrieve(domainHash, hostname, entries);
} }
scriptletDB.retrieve('', hostname, entries); let entityHash = µb.staticExtFilteringEngine.makeHash(entity);
if ( entityHash !== 0 ) {
scriptletDB.retrieve(entityHash, entity, entries);
}
scriptletDB.retrieve(0, hostname, entries);
for ( let entry of entries ) { for ( let entry of entries ) {
lookupScriptlet(entry.token, reng, scriptletsRegister); lookupScriptlet(entry.token, reng, scriptletsRegister);
} }
@ -347,11 +349,13 @@
// Collect exception filters. // Collect exception filters.
entries = []; entries = [];
if ( domain !== '' ) { if ( domainHash !== 0 ) {
scriptletDB.retrieve('!' + domain, hostname, entries); scriptletDB.retrieve(domainHash | 0b0001, hostname, entries);
scriptletDB.retrieve('!' + entity, entity, entries);
} }
scriptletDB.retrieve('!', hostname, entries); if ( entityHash !== 0 ) {
scriptletDB.retrieve(entityHash | 0b0001, entity, entries);
}
scriptletDB.retrieve(0 | 0b0001, hostname, entries);
for ( let entry of entries ) { for ( let entry of entries ) {
exceptionsRegister.add(entry.token); exceptionsRegister.add(entry.token);
} }

View File

@ -405,7 +405,7 @@
api.HostnameBasedDB.prototype = { api.HostnameBasedDB.prototype = {
add: function(hash, entry) { add: function(hash, entry) {
var bucket = this.db.get(hash); let bucket = this.db.get(hash);
if ( bucket === undefined ) { if ( bucket === undefined ) {
this.db.set(hash, entry); this.db.set(hash, entry);
} else if ( Array.isArray(bucket) ) { } else if ( Array.isArray(bucket) ) {
@ -420,16 +420,21 @@
this.size = 0; this.size = 0;
}, },
retrieve: function(hash, hostname, out) { retrieve: function(hash, hostname, out) {
var bucket = this.db.get(hash); let bucket = this.db.get(hash);
if ( bucket === undefined ) { return; } if ( bucket === undefined ) { return; }
if ( Array.isArray(bucket) === false ) { if ( Array.isArray(bucket) === false ) {
if ( hostname.endsWith(bucket.hostname) ) { out.push(bucket); } bucket = [ bucket ];
return; }
for ( let entry of bucket ) {
if ( hostname.endsWith(entry.hostname) === false ) { continue; }
let i = hostname.length - entry.hostname.length;
if (
i === 0 ||
i === hostname.length ||
hostname.charCodeAt(i-1) === 0x2E /* '.' */
) {
out.push(entry);
} }
var i = bucket.length;
while ( i-- ) {
var entry = bucket[i];
if ( hostname.endsWith(entry.hostname) ) { out.push(entry); }
} }
}, },
toSelfie: function() { toSelfie: function() {
@ -484,6 +489,60 @@
resetParsed(parsed); resetParsed(parsed);
}; };
// HHHHHHHHHHHH0000
// | |
// | |
// | +-- bit 3-0: reserved
// +------ bit 15-4: FNV
api.makeHash = function(token) {
// Based on: FNV32a
// http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
// The rest is custom, suited for uBlock.
let i1 = token.length;
if ( i1 === 0 ) { return 0; }
let i2 = i1 >> 1;
let i4 = i1 >> 2;
let i8 = i1 >> 3;
let hval = (0x811c9dc5 ^ token.charCodeAt(0)) >>> 0;
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i1-1);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval &= 0xFFF0;
// Can't return 0, it's reserved for empty string.
return hval !== 0 ? hval : 0xfff0;
};
api.compileHostnameToHash = function(hostname) {
let domain;
if ( hostname.endsWith('.*') ) {
let pos = hostname.lastIndexOf('.', hostname.length - 3);
domain = pos !== -1 ? hostname.slice(pos + 1) : hostname;
} else {
domain = µb.URI.domainFromHostnameNoCache(hostname);
}
return api.makeHash(domain);
};
// https://github.com/chrisaljoudi/uBlock/issues/1004 // https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors. // Detect and report invalid CSS selectors.