1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-07-05 11:37:01 +02:00

Various code review related to extended filtering

Bring latest changes to procedural cosmetic filtering to uBOL.

Fix procedural filtering used in HTML filters.

Standardize quick hash algorithm used throughout to DJB2
(except that initialization step is skipped):
- http://www.cse.yorku.ca/~oz/hash.html#djb2
This commit is contained in:
Raymond Hill 2022-12-13 10:23:51 -05:00
parent 5ad2c34212
commit b603e9e81e
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
10 changed files with 194 additions and 128 deletions

View File

@ -50,15 +50,17 @@ let lastDomChange = Date.now();
/******************************************************************************/
// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/
// http://www.cse.yorku.ca/~oz/hash.html#djb2
// Must mirror dnrRulesetFromRawLists's version
const hashFromStr = (type, s) => {
const len = s.length;
const step = len + 7 >>> 3;
let hash = type;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) - hash + s.charCodeAt(i) | 0;
}
return hash & 0x00FFFFFF;
let hash = (type << 5) + type ^ len;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) + hash ^ s.charCodeAt(i);
}
return hash & 0xFF_FFFF;
};
/******************************************************************************/

View File

@ -52,6 +52,16 @@ const nonVisualElements = {
style: true,
};
const regexFromString = (s, exact = false) => {
if ( s === '' ) { return /^/; }
const match = /^\/(.+)\/([i]?)$/.exec(s);
if ( match !== null ) {
return new RegExp(match[1], match[2] || undefined);
}
const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(exact ? `^${reStr}$` : reStr, 'i');
};
/******************************************************************************/
// 'P' stands for 'Procedural'
@ -79,11 +89,7 @@ class PSelectorVoidTask extends PSelectorTask {
class PSelectorHasTextTask extends PSelectorTask {
constructor(task) {
super();
let arg0 = task[1], arg1;
if ( Array.isArray(task[1]) ) {
arg1 = arg0[1]; arg0 = arg0[0];
}
this.needle = new RegExp(arg0, arg1);
this.needle = regexFromString(task[1]);
}
transpose(node, output) {
if ( this.needle.test(node.textContent) ) {
@ -113,6 +119,24 @@ PSelectorIfNotTask.prototype.target = false;
/******************************************************************************/
class PSelectorMatchesAttrTask extends PSelectorTask {
constructor(task) {
super();
this.reAttr = regexFromString(task[1].attr, true);
this.reValue = regexFromString(task[1].value, true);
}
transpose(node, output) {
const attrs = node.getAttributeNames();
for ( const attr of attrs ) {
if ( this.reAttr.test(attr) === false ) { continue; }
if ( this.reValue.test(node.getAttribute(attr)) === false ) { continue; }
output.push(node);
}
}
}
/******************************************************************************/
class PSelectorMatchesCSSTask extends PSelectorTask {
constructor(task) {
super();
@ -168,11 +192,7 @@ class PSelectorMatchesMediaTask extends PSelectorTask {
class PSelectorMatchesPathTask extends PSelectorTask {
constructor(task) {
super();
let arg0 = task[1], arg1;
if ( Array.isArray(task[1]) ) {
arg1 = arg0[1]; arg0 = arg0[0];
}
this.needle = new RegExp(arg0, arg1);
this.needle = regexFromString(task[1]);
}
transpose(node, output) {
if ( this.needle.test(self.location.pathname + self.location.search) ) {
@ -442,6 +462,7 @@ PSelector.prototype.operatorToTaskMap = new Map([
[ 'has-text', PSelectorHasTextTask ],
[ 'if', PSelectorIfTask ],
[ 'if-not', PSelectorIfNotTask ],
[ 'matches-attr', PSelectorMatchesAttrTask ],
[ 'matches-css', PSelectorMatchesCSSTask ],
[ 'matches-css-after', PSelectorMatchesCSSAfterTask ],
[ 'matches-css-before', PSelectorMatchesCSSBeforeTask ],
@ -459,13 +480,13 @@ PSelector.prototype.operatorToTaskMap = new Map([
/******************************************************************************/
class PSelectorRoot extends PSelector {
constructor(o, styleToken) {
constructor(o) {
super(o);
this.budget = 200; // I arbitrary picked a 1/5 second
this.raw = o.raw;
this.cost = 0;
this.lastAllowanceTime = 0;
this.styleToken = styleToken;
this.action = o.action;
}
prime(input) {
try {
@ -485,6 +506,7 @@ class ProceduralFilterer {
this.styleTokenMap = new Map();
this.styledNodes = new Set();
this.timer = undefined;
this.hideStyle = 'display:none!important;';
this.addSelectors(selectors);
// Important: commit now (do not go through onDOMChanged) to be sure
// first pass is going to happen asap.
@ -493,21 +515,24 @@ class ProceduralFilterer {
addSelectors() {
for ( const selector of selectors ) {
let style, styleToken;
if ( selector.action === undefined ) {
style = 'display:none!important;';
} else if ( selector.action[0] === 'style' ) {
style = selector.action[1];
}
if ( style !== undefined ) {
styleToken = this.styleTokenFromStyle(style);
}
const pselector = new PSelectorRoot(selector, styleToken);
const pselector = new PSelectorRoot(selector);
this.primeProceduralSelector(pselector);
this.selectors.push(pselector);
}
this.onDOMChanged();
}
// This allows to perform potentially expensive initialization steps
// before the filters are ready to be applied.
primeProceduralSelector(pselector) {
if ( pselector.action === undefined ) {
this.styleTokenFromStyle(this.hideStyle);
} else if ( pselector.action[0] === 'style' ) {
this.styleTokenFromStyle(pselector.action[1]);
}
return pselector;
}
uBOL_commitNow() {
// https://github.com/uBlockOrigin/uBlock-issues/issues/341
// Be ready to unhide nodes which no longer matches any of
@ -534,10 +559,10 @@ class ProceduralFilterer {
}
t0 = t1;
if ( nodes.length === 0 ) { continue; }
this.styleNodes(nodes, pselector.styleToken);
this.processNodes(nodes, pselector.action);
}
this.unstyleNodes(toUnstyle);
this.unprocessNodes(toUnstyle);
}
styleTokenFromStyle(style) {
@ -552,22 +577,60 @@ class ProceduralFilterer {
return styleToken;
}
styleNodes(nodes, styleToken) {
if ( styleToken === undefined ) {
processNodes(nodes, action) {
const op = action && action[0] || '';
const arg = op !== '' ? action[1] : '';
switch ( op ) {
case '':
/* fall through */
case 'style': {
const styleToken = this.styleTokenFromStyle(
arg === '' ? this.hideStyle : arg
);
for ( const node of nodes ) {
node.setAttribute(this.masterToken, '');
node.setAttribute(styleToken, '');
this.styledNodes.add(node);
}
break;
}
case 'remove': {
for ( const node of nodes ) {
node.remove();
node.textContent = '';
}
return;
break;
}
for ( const node of nodes ) {
node.setAttribute(this.masterToken, '');
node.setAttribute(styleToken, '');
this.styledNodes.add(node);
case 'remove-attr': {
const reAttr = regexFromString(arg, true);
for ( const node of nodes ) {
for ( const name of node.getAttributeNames() ) {
if ( reAttr.test(name) === false ) { continue; }
node.removeAttribute(name);
}
}
break;
}
case 'remove-class': {
const reClass = regexFromString(arg, true);
for ( const node of nodes ) {
const cl = node.classList;
for ( const name of cl.values() ) {
if ( reClass.test(name) === false ) { continue; }
cl.remove(name);
}
}
break;
}
default:
break;
}
}
unstyleNodes(nodes) {
// TODO: Current assumption is one style per hit element. Could be an
// issue if an element has multiple styling and one styling is
// brought back. Possibly too rare to care about this for now.
unprocessNodes(nodes) {
for ( const node of nodes ) {
if ( this.styledNodes.has(node) ) { continue; }
node.removeAttribute(this.masterToken);

View File

@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 49, // Increase when compiled format changes
selfieMagic: 49, // Increase when selfie format changes
compiledMagic: 50, // Increase when compiled format changes
selfieMagic: 50, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -362,27 +362,6 @@ class PSelectorXpathTask extends PSelectorTask {
class PSelector {
constructor(o) {
if ( PSelector.prototype.operatorToTaskMap === undefined ) {
PSelector.prototype.operatorToTaskMap = new Map([
[ 'has', PSelectorIfTask ],
[ 'has-text', PSelectorHasTextTask ],
[ 'if', PSelectorIfTask ],
[ 'if-not', PSelectorIfNotTask ],
[ 'matches-attr', PSelectorMatchesAttrTask ],
[ 'matches-css', PSelectorMatchesCSSTask ],
[ 'matches-css-after', PSelectorMatchesCSSAfterTask ],
[ 'matches-css-before', PSelectorMatchesCSSBeforeTask ],
[ 'matches-media', PSelectorMatchesMediaTask ],
[ 'matches-path', PSelectorMatchesPathTask ],
[ 'min-text-length', PSelectorMinTextLengthTask ],
[ 'not', PSelectorIfNotTask ],
[ 'others', PSelectorOthersTask ],
[ 'spath', PSelectorSpathTask ],
[ 'upward', PSelectorUpwardTask ],
[ 'watch-attr', PSelectorWatchAttrs ],
[ 'xpath', PSelectorXpathTask ],
]);
}
this.raw = o.raw;
this.selector = o.selector;
this.tasks = [];
@ -392,7 +371,6 @@ class PSelector {
const ctor = this.operatorToTaskMap.get(task[0]) || PSelectorVoidTask;
tasks.push(new ctor(task));
}
// Initialize only after all tasks have been successfully instantiated
this.tasks = tasks;
}
prime(input) {
@ -436,7 +414,25 @@ class PSelector {
return false;
}
}
PSelector.prototype.operatorToTaskMap = undefined;
PSelector.prototype.operatorToTaskMap = new Map([
[ 'has', PSelectorIfTask ],
[ 'has-text', PSelectorHasTextTask ],
[ 'if', PSelectorIfTask ],
[ 'if-not', PSelectorIfNotTask ],
[ 'matches-attr', PSelectorMatchesAttrTask ],
[ 'matches-css', PSelectorMatchesCSSTask ],
[ 'matches-css-after', PSelectorMatchesCSSAfterTask ],
[ 'matches-css-before', PSelectorMatchesCSSBeforeTask ],
[ 'matches-media', PSelectorMatchesMediaTask ],
[ 'matches-path', PSelectorMatchesPathTask ],
[ 'min-text-length', PSelectorMinTextLengthTask ],
[ 'not', PSelectorIfNotTask ],
[ 'others', PSelectorOthersTask ],
[ 'spath', PSelectorSpathTask ],
[ 'upward', PSelectorUpwardTask ],
[ 'watch-attr', PSelectorWatchAttrs ],
[ 'xpath', PSelectorXpathTask ],
]);
class PSelectorRoot extends PSelector {
constructor(o) {

View File

@ -948,14 +948,14 @@ vAPI.DOMFilterer = class {
// vAPI.domSurveyor
{
// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/
// http://www.cse.yorku.ca/~oz/hash.html#djb2
// Must mirror cosmetic filtering compiler's version
const hashFromStr = (type, s) => {
const len = s.length;
const step = len + 7 >>> 3;
let hash = (type << 5) - type + (len & 0xFF) | 0;
let hash = (type << 5) + type ^ len;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) - hash + s.charCodeAt(i) | 0;
hash = (hash << 5) + hash ^ s.charCodeAt(i);
}
return hash & 0xFFFFFF;
};

View File

@ -152,17 +152,17 @@ SelectorCacheEntry.junkyard = [];
/******************************************************************************/
/******************************************************************************/
// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/
// http://www.cse.yorku.ca/~oz/hash.html#djb2
// Must mirror content script surveyor's version
const hashFromStr = (type, s) => {
const len = s.length;
const step = len + 7 >>> 3;
let hash = (type << 5) - type + (len & 0xFF) | 0;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) - hash + s.charCodeAt(i) | 0;
}
return hash & 0xFFFFFF;
let hash = (type << 5) + type ^ len;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) + hash ^ s.charCodeAt(i);
}
return hash & 0xFFFFFF;
};
// https://github.com/gorhill/uBlock/issues/1668

View File

@ -56,20 +56,33 @@ const htmlFilteringEngine = {
},
};
const PSelectorHasTextTask = class {
const regexFromString = (s, exact = false) => {
if ( s === '' ) { return /^/; }
const match = /^\/(.+)\/([i]?)$/.exec(s);
if ( match !== null ) {
return new RegExp(match[1], match[2] || undefined);
}
const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(exact ? `^${reStr}$` : reStr, 'i');
};
class PSelectorVoidTask {
constructor(task) {
let arg0 = task[1], arg1;
if ( Array.isArray(task[1]) ) {
arg1 = arg0[1]; arg0 = arg0[0];
}
this.needle = new RegExp(arg0, arg1);
console.info(`[uBO] HTML filtering: :${task[0]}() operator is not supported`);
}
transpose() {
}
}
class PSelectorHasTextTask {
constructor(task) {
this.needle = regexFromString(task[1]);
}
transpose(node, output) {
if ( this.needle.test(node.textContent) ) {
output.push(node);
}
}
};
}
const PSelectorIfTask = class {
constructor(task) {
@ -80,17 +93,14 @@ const PSelectorIfTask = class {
output.push(node);
}
}
get invalid() {
return this.pselector.invalid;
}
};
PSelectorIfTask.prototype.target = true;
const PSelectorIfNotTask = class extends PSelectorIfTask {
};
class PSelectorIfNotTask extends PSelectorIfTask {
}
PSelectorIfNotTask.prototype.target = false;
const PSelectorMinTextLengthTask = class {
class PSelectorMinTextLengthTask {
constructor(task) {
this.min = task[1];
}
@ -99,9 +109,9 @@ const PSelectorMinTextLengthTask = class {
output.push(node);
}
}
};
}
const PSelectorSpathTask = class {
class PSelectorSpathTask {
constructor(task) {
this.spath = task[1];
this.nth = /^(?:\s*[+~]|:)/.test(this.spath);
@ -132,9 +142,9 @@ const PSelectorSpathTask = class {
`:scope > :nth-child(${pos})${selector}`
);
}
};
}
const PSelectorUpwardTask = class {
class PSelectorUpwardTask {
constructor(task) {
const arg = task[1];
if ( typeof arg === 'number' ) {
@ -160,11 +170,11 @@ const PSelectorUpwardTask = class {
}
output.push(node);
}
};
}
PSelectorUpwardTask.prototype.i = 0;
PSelectorUpwardTask.prototype.s = '';
const PSelectorXpathTask = class {
class PSelectorXpathTask {
constructor(task) {
this.xpe = task[1];
}
@ -184,25 +194,17 @@ const PSelectorXpathTask = class {
}
}
}
};
}
const PSelector = class {
class PSelector {
constructor(o) {
this.raw = o.raw;
this.selector = o.selector;
this.tasks = [];
if ( !o.tasks ) { return; }
for ( const task of o.tasks ) {
const ctor = this.operatorToTaskMap.get(task[0]);
if ( ctor === undefined ) {
this.invalid = true;
break;
}
const ctor = this.operatorToTaskMap.get(task[0]) || PSelectorVoidTask;
const pselector = new ctor(task);
if ( pselector instanceof PSelectorIfTask && pselector.invalid ) {
this.invalid = true;
break;
}
this.tasks.push(pselector);
}
}
@ -215,7 +217,6 @@ const PSelector = class {
return Array.from(root.querySelectorAll(this.selector));
}
exec(input) {
if ( this.invalid ) { return []; }
let nodes = this.prime(input);
for ( const task of this.tasks ) {
if ( nodes.length === 0 ) { break; }
@ -228,7 +229,6 @@ const PSelector = class {
return nodes;
}
test(input) {
if ( this.invalid ) { return false; }
const nodes = this.prime(input);
for ( const node of nodes ) {
let output = [ node ];
@ -244,7 +244,7 @@ const PSelector = class {
}
return false;
}
};
}
PSelector.prototype.operatorToTaskMap = new Map([
[ 'has', PSelectorIfTask ],
[ 'has-text', PSelectorHasTextTask ],
@ -257,9 +257,8 @@ PSelector.prototype.operatorToTaskMap = new Map([
[ 'upward', PSelectorUpwardTask ],
[ 'xpath', PSelectorXpathTask ],
]);
PSelector.prototype.invalid = false;
const logOne = function(details, exception, selector) {
function logOne(details, exception, selector) {
µb.filteringContext
.duplicate()
.fromTabId(details.tabId)
@ -272,9 +271,9 @@ const logOne = function(details, exception, selector) {
raw: `${exception === 0 ? '##' : '#@#'}^${selector}`
})
.toLogger();
};
}
const applyProceduralSelector = function(details, selector) {
function applyProceduralSelector(details, selector) {
let pselector = pselectors.get(selector);
if ( pselector === undefined ) {
pselector = new PSelector(JSON.parse(selector));
@ -290,9 +289,9 @@ const applyProceduralSelector = function(details, selector) {
logOne(details, 0, pselector.raw);
}
return modified;
};
}
const applyCSSSelector = function(details, selector) {
function applyCSSSelector(details, selector) {
const nodes = docRegister.querySelectorAll(selector);
let modified = false;
for ( const node of nodes ) {
@ -303,7 +302,7 @@ const applyCSSSelector = function(details, selector) {
logOne(details, 0, selector);
}
return modified;
};
}
htmlFilteringEngine.reset = function() {
filterDB.clear();

View File

@ -34,16 +34,17 @@ import {
/******************************************************************************/
// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/
// http://www.cse.yorku.ca/~oz/hash.html#djb2
// Must mirror content script surveyor's version
const hashFromStr = (type, s) => {
const len = s.length;
const step = len + 7 >>> 3;
let hash = type;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) - hash + s.charCodeAt(i) | 0;
}
return hash & 0x00FFFFFF;
let hash = (type << 5) + type ^ len;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) + hash ^ s.charCodeAt(i);
}
return hash & 0xFFFFFF;
};
/******************************************************************************/

View File

@ -2131,17 +2131,17 @@ Parser.prototype.proceduralOperatorTokens = new Map([
[ 'has-text', 0b01 ],
[ 'if', 0b00 ],
[ 'if-not', 0b00 ],
[ 'matches-attr', 0b01 ],
[ 'matches-attr', 0b11 ],
[ 'matches-css', 0b11 ],
[ 'matches-media', 0b11 ],
[ 'matches-path', 0b11 ],
[ 'min-text-length', 0b01 ],
[ 'not', 0b01 ],
[ 'nth-ancestor', 0b00 ],
[ 'others', 0b01 ],
[ 'others', 0b11 ],
[ 'remove', 0b11 ],
[ 'remove-attr', 0b01 ],
[ 'remove-class', 0b01 ],
[ 'remove-attr', 0b11 ],
[ 'remove-class', 0b11 ],
[ 'style', 0b11 ],
[ 'upward', 0b01 ],
[ 'watch-attr', 0b11 ],

View File

@ -2688,6 +2688,9 @@ registerFilterClass(FilterOnHeaders);
// Benchmark for string-based tokens vs. safe-integer token values:
// https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html
// http://www.cse.yorku.ca/~oz/hash.html#djb2
// Use above algorithm to generate token hash.
const urlTokenizer = new (class {
constructor() {
this._chars = '0123456789%abcdefghijklmnopqrstuvwxyz';
@ -2728,7 +2731,7 @@ const urlTokenizer = new (class {
}
addKnownToken(th) {
this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1;
this.knownTokens[th & 0xFFFF] = 1;
}
// Tokenize on demand.
@ -2762,15 +2765,17 @@ const urlTokenizer = new (class {
return this._hasQuery > 0;
}
// http://www.cse.yorku.ca/~oz/hash.html#djb2
tokenHashFromString(s) {
const l = s.length;
if ( l === 0 ) { return EMPTY_TOKEN_HASH; }
const vtc = this._validTokenChars;
let th = vtc[s.charCodeAt(0)];
for ( let i = 1; i !== 7 /* MAX_TOKEN_LENGTH */ && i !== l; i++ ) {
th = th << 4 ^ vtc[s.charCodeAt(i)];
th = (th << 5) + th ^ vtc[s.charCodeAt(i)];
}
return th;
return th & 0xFFFFFFF;
}
stringFromTokenHash(th) {
@ -2831,11 +2836,11 @@ const urlTokenizer = new (class {
break;
}
if ( n === 7 /* MAX_TOKEN_LENGTH */ ) { continue; }
th = th << 4 ^ v;
th = (th << 5) + th ^ v;
n += 1;
}
if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) {
tokens[j+0] = th;
if ( knownTokens[th & 0xFFFF] !== 0 ) {
tokens[j+0] = th & 0xFFFFFFF;
tokens[j+1] = ti;
j += 2;
}