From b603e9e81ea46fb9e7ea5e1eb3eee82191c82219 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Tue, 13 Dec 2022 10:23:51 -0500 Subject: [PATCH] Various code review related to extended filtering Bring latest changes to procedural cosmetic filtering to uBOL. Fix procedural filtering used in HTML filters. Standardize quick hash algorithm used throughout to DJB2 (except that initialization step is skipped): - http://www.cse.yorku.ca/~oz/hash.html#djb2 --- .../mv3/extension/js/scripting/css-generic.js | 14 +- .../extension/js/scripting/css-procedural.js | 127 +++++++++++++----- src/js/background.js | 4 +- src/js/contentscript-extra.js | 42 +++--- src/js/contentscript.js | 6 +- src/js/cosmetic-filtering.js | 12 +- src/js/html-filtering.js | 79 ++++++----- src/js/static-dnr-filtering.js | 13 +- src/js/static-filtering-parser.js | 8 +- src/js/static-net-filtering.js | 17 ++- 10 files changed, 194 insertions(+), 128 deletions(-) diff --git a/platform/mv3/extension/js/scripting/css-generic.js b/platform/mv3/extension/js/scripting/css-generic.js index a7e089c79..dd78824d9 100644 --- a/platform/mv3/extension/js/scripting/css-generic.js +++ b/platform/mv3/extension/js/scripting/css-generic.js @@ -50,15 +50,17 @@ let lastDomChange = Date.now(); /******************************************************************************/ -// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ +// http://www.cse.yorku.ca/~oz/hash.html#djb2 +// Must mirror dnrRulesetFromRawLists's version + const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = type; - for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; - } - return hash & 0x00FFFFFF; + let hash = (type << 5) + type ^ len; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) + hash ^ s.charCodeAt(i); + } + return hash & 0xFF_FFFF; }; /******************************************************************************/ diff --git a/platform/mv3/extension/js/scripting/css-procedural.js b/platform/mv3/extension/js/scripting/css-procedural.js index 93ca36d53..364bd41f4 100644 --- a/platform/mv3/extension/js/scripting/css-procedural.js +++ b/platform/mv3/extension/js/scripting/css-procedural.js @@ -52,6 +52,16 @@ const nonVisualElements = { style: true, }; +const regexFromString = (s, exact = false) => { + if ( s === '' ) { return /^/; } + const match = /^\/(.+)\/([i]?)$/.exec(s); + if ( match !== null ) { + return new RegExp(match[1], match[2] || undefined); + } + const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(exact ? `^${reStr}$` : reStr, 'i'); +}; + /******************************************************************************/ // 'P' stands for 'Procedural' @@ -79,11 +89,7 @@ class PSelectorVoidTask extends PSelectorTask { class PSelectorHasTextTask extends PSelectorTask { constructor(task) { super(); - let arg0 = task[1], arg1; - if ( Array.isArray(task[1]) ) { - arg1 = arg0[1]; arg0 = arg0[0]; - } - this.needle = new RegExp(arg0, arg1); + this.needle = regexFromString(task[1]); } transpose(node, output) { if ( this.needle.test(node.textContent) ) { @@ -113,6 +119,24 @@ PSelectorIfNotTask.prototype.target = false; /******************************************************************************/ +class PSelectorMatchesAttrTask extends PSelectorTask { + constructor(task) { + super(); + this.reAttr = regexFromString(task[1].attr, true); + this.reValue = regexFromString(task[1].value, true); + } + transpose(node, output) { + const attrs = node.getAttributeNames(); + for ( const attr of attrs ) { + if ( this.reAttr.test(attr) === false ) { continue; } + if ( this.reValue.test(node.getAttribute(attr)) === false ) { continue; } + output.push(node); + } + } +} + +/******************************************************************************/ + class PSelectorMatchesCSSTask extends PSelectorTask { constructor(task) { super(); @@ -168,11 +192,7 @@ class PSelectorMatchesMediaTask extends PSelectorTask { class PSelectorMatchesPathTask extends PSelectorTask { constructor(task) { super(); - let arg0 = task[1], arg1; - if ( Array.isArray(task[1]) ) { - arg1 = arg0[1]; arg0 = arg0[0]; - } - this.needle = new RegExp(arg0, arg1); + this.needle = regexFromString(task[1]); } transpose(node, output) { if ( this.needle.test(self.location.pathname + self.location.search) ) { @@ -442,6 +462,7 @@ PSelector.prototype.operatorToTaskMap = new Map([ [ 'has-text', PSelectorHasTextTask ], [ 'if', PSelectorIfTask ], [ 'if-not', PSelectorIfNotTask ], + [ 'matches-attr', PSelectorMatchesAttrTask ], [ 'matches-css', PSelectorMatchesCSSTask ], [ 'matches-css-after', PSelectorMatchesCSSAfterTask ], [ 'matches-css-before', PSelectorMatchesCSSBeforeTask ], @@ -459,13 +480,13 @@ PSelector.prototype.operatorToTaskMap = new Map([ /******************************************************************************/ class PSelectorRoot extends PSelector { - constructor(o, styleToken) { + constructor(o) { super(o); this.budget = 200; // I arbitrary picked a 1/5 second this.raw = o.raw; this.cost = 0; this.lastAllowanceTime = 0; - this.styleToken = styleToken; + this.action = o.action; } prime(input) { try { @@ -485,6 +506,7 @@ class ProceduralFilterer { this.styleTokenMap = new Map(); this.styledNodes = new Set(); this.timer = undefined; + this.hideStyle = 'display:none!important;'; this.addSelectors(selectors); // Important: commit now (do not go through onDOMChanged) to be sure // first pass is going to happen asap. @@ -493,21 +515,24 @@ class ProceduralFilterer { addSelectors() { for ( const selector of selectors ) { - let style, styleToken; - if ( selector.action === undefined ) { - style = 'display:none!important;'; - } else if ( selector.action[0] === 'style' ) { - style = selector.action[1]; - } - if ( style !== undefined ) { - styleToken = this.styleTokenFromStyle(style); - } - const pselector = new PSelectorRoot(selector, styleToken); + const pselector = new PSelectorRoot(selector); + this.primeProceduralSelector(pselector); this.selectors.push(pselector); } this.onDOMChanged(); } + // This allows to perform potentially expensive initialization steps + // before the filters are ready to be applied. + primeProceduralSelector(pselector) { + if ( pselector.action === undefined ) { + this.styleTokenFromStyle(this.hideStyle); + } else if ( pselector.action[0] === 'style' ) { + this.styleTokenFromStyle(pselector.action[1]); + } + return pselector; + } + uBOL_commitNow() { // https://github.com/uBlockOrigin/uBlock-issues/issues/341 // Be ready to unhide nodes which no longer matches any of @@ -534,10 +559,10 @@ class ProceduralFilterer { } t0 = t1; if ( nodes.length === 0 ) { continue; } - this.styleNodes(nodes, pselector.styleToken); + this.processNodes(nodes, pselector.action); } - this.unstyleNodes(toUnstyle); + this.unprocessNodes(toUnstyle); } styleTokenFromStyle(style) { @@ -552,22 +577,60 @@ class ProceduralFilterer { return styleToken; } - styleNodes(nodes, styleToken) { - if ( styleToken === undefined ) { + processNodes(nodes, action) { + const op = action && action[0] || ''; + const arg = op !== '' ? action[1] : ''; + switch ( op ) { + case '': + /* fall through */ + case 'style': { + const styleToken = this.styleTokenFromStyle( + arg === '' ? this.hideStyle : arg + ); + for ( const node of nodes ) { + node.setAttribute(this.masterToken, ''); + node.setAttribute(styleToken, ''); + this.styledNodes.add(node); + } + break; + } + case 'remove': { for ( const node of nodes ) { node.remove(); node.textContent = ''; } - return; + break; } - for ( const node of nodes ) { - node.setAttribute(this.masterToken, ''); - node.setAttribute(styleToken, ''); - this.styledNodes.add(node); + case 'remove-attr': { + const reAttr = regexFromString(arg, true); + for ( const node of nodes ) { + for ( const name of node.getAttributeNames() ) { + if ( reAttr.test(name) === false ) { continue; } + node.removeAttribute(name); + } + } + break; + } + case 'remove-class': { + const reClass = regexFromString(arg, true); + for ( const node of nodes ) { + const cl = node.classList; + for ( const name of cl.values() ) { + if ( reClass.test(name) === false ) { continue; } + cl.remove(name); + } + } + break; + } + default: + break; } } - unstyleNodes(nodes) { + // TODO: Current assumption is one style per hit element. Could be an + // issue if an element has multiple styling and one styling is + // brought back. Possibly too rare to care about this for now. + unprocessNodes(nodes) { for ( const node of nodes ) { if ( this.styledNodes.has(node) ) { continue; } node.removeAttribute(this.masterToken); diff --git a/src/js/background.js b/src/js/background.js index 69fbcccff..a381fac6c 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 49, // Increase when compiled format changes - selfieMagic: 49, // Increase when selfie format changes + compiledMagic: 50, // Increase when compiled format changes + selfieMagic: 50, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/contentscript-extra.js b/src/js/contentscript-extra.js index e438e5902..0185a29e7 100644 --- a/src/js/contentscript-extra.js +++ b/src/js/contentscript-extra.js @@ -362,27 +362,6 @@ class PSelectorXpathTask extends PSelectorTask { class PSelector { constructor(o) { - if ( PSelector.prototype.operatorToTaskMap === undefined ) { - PSelector.prototype.operatorToTaskMap = new Map([ - [ 'has', PSelectorIfTask ], - [ 'has-text', PSelectorHasTextTask ], - [ 'if', PSelectorIfTask ], - [ 'if-not', PSelectorIfNotTask ], - [ 'matches-attr', PSelectorMatchesAttrTask ], - [ 'matches-css', PSelectorMatchesCSSTask ], - [ 'matches-css-after', PSelectorMatchesCSSAfterTask ], - [ 'matches-css-before', PSelectorMatchesCSSBeforeTask ], - [ 'matches-media', PSelectorMatchesMediaTask ], - [ 'matches-path', PSelectorMatchesPathTask ], - [ 'min-text-length', PSelectorMinTextLengthTask ], - [ 'not', PSelectorIfNotTask ], - [ 'others', PSelectorOthersTask ], - [ 'spath', PSelectorSpathTask ], - [ 'upward', PSelectorUpwardTask ], - [ 'watch-attr', PSelectorWatchAttrs ], - [ 'xpath', PSelectorXpathTask ], - ]); - } this.raw = o.raw; this.selector = o.selector; this.tasks = []; @@ -392,7 +371,6 @@ class PSelector { const ctor = this.operatorToTaskMap.get(task[0]) || PSelectorVoidTask; tasks.push(new ctor(task)); } - // Initialize only after all tasks have been successfully instantiated this.tasks = tasks; } prime(input) { @@ -436,7 +414,25 @@ class PSelector { return false; } } -PSelector.prototype.operatorToTaskMap = undefined; +PSelector.prototype.operatorToTaskMap = new Map([ + [ 'has', PSelectorIfTask ], + [ 'has-text', PSelectorHasTextTask ], + [ 'if', PSelectorIfTask ], + [ 'if-not', PSelectorIfNotTask ], + [ 'matches-attr', PSelectorMatchesAttrTask ], + [ 'matches-css', PSelectorMatchesCSSTask ], + [ 'matches-css-after', PSelectorMatchesCSSAfterTask ], + [ 'matches-css-before', PSelectorMatchesCSSBeforeTask ], + [ 'matches-media', PSelectorMatchesMediaTask ], + [ 'matches-path', PSelectorMatchesPathTask ], + [ 'min-text-length', PSelectorMinTextLengthTask ], + [ 'not', PSelectorIfNotTask ], + [ 'others', PSelectorOthersTask ], + [ 'spath', PSelectorSpathTask ], + [ 'upward', PSelectorUpwardTask ], + [ 'watch-attr', PSelectorWatchAttrs ], + [ 'xpath', PSelectorXpathTask ], +]); class PSelectorRoot extends PSelector { constructor(o) { diff --git a/src/js/contentscript.js b/src/js/contentscript.js index 278f8fabd..cec0b50c3 100644 --- a/src/js/contentscript.js +++ b/src/js/contentscript.js @@ -948,14 +948,14 @@ vAPI.DOMFilterer = class { // vAPI.domSurveyor { - // https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ + // http://www.cse.yorku.ca/~oz/hash.html#djb2 // Must mirror cosmetic filtering compiler's version const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = (type << 5) - type + (len & 0xFF) | 0; + let hash = (type << 5) + type ^ len; for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; + hash = (hash << 5) + hash ^ s.charCodeAt(i); } return hash & 0xFFFFFF; }; diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index d5b97d78e..d473371db 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -152,17 +152,17 @@ SelectorCacheEntry.junkyard = []; /******************************************************************************/ /******************************************************************************/ -// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ +// http://www.cse.yorku.ca/~oz/hash.html#djb2 // Must mirror content script surveyor's version const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = (type << 5) - type + (len & 0xFF) | 0; - for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; - } - return hash & 0xFFFFFF; + let hash = (type << 5) + type ^ len; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) + hash ^ s.charCodeAt(i); + } + return hash & 0xFFFFFF; }; // https://github.com/gorhill/uBlock/issues/1668 diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js index d70346ad9..c9dc8a8f0 100644 --- a/src/js/html-filtering.js +++ b/src/js/html-filtering.js @@ -56,20 +56,33 @@ const htmlFilteringEngine = { }, }; -const PSelectorHasTextTask = class { +const regexFromString = (s, exact = false) => { + if ( s === '' ) { return /^/; } + const match = /^\/(.+)\/([i]?)$/.exec(s); + if ( match !== null ) { + return new RegExp(match[1], match[2] || undefined); + } + const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(exact ? `^${reStr}$` : reStr, 'i'); +}; + +class PSelectorVoidTask { constructor(task) { - let arg0 = task[1], arg1; - if ( Array.isArray(task[1]) ) { - arg1 = arg0[1]; arg0 = arg0[0]; - } - this.needle = new RegExp(arg0, arg1); + console.info(`[uBO] HTML filtering: :${task[0]}() operator is not supported`); + } + transpose() { + } +} +class PSelectorHasTextTask { + constructor(task) { + this.needle = regexFromString(task[1]); } transpose(node, output) { if ( this.needle.test(node.textContent) ) { output.push(node); } } -}; +} const PSelectorIfTask = class { constructor(task) { @@ -80,17 +93,14 @@ const PSelectorIfTask = class { output.push(node); } } - get invalid() { - return this.pselector.invalid; - } }; PSelectorIfTask.prototype.target = true; -const PSelectorIfNotTask = class extends PSelectorIfTask { -}; +class PSelectorIfNotTask extends PSelectorIfTask { +} PSelectorIfNotTask.prototype.target = false; -const PSelectorMinTextLengthTask = class { +class PSelectorMinTextLengthTask { constructor(task) { this.min = task[1]; } @@ -99,9 +109,9 @@ const PSelectorMinTextLengthTask = class { output.push(node); } } -}; +} -const PSelectorSpathTask = class { +class PSelectorSpathTask { constructor(task) { this.spath = task[1]; this.nth = /^(?:\s*[+~]|:)/.test(this.spath); @@ -132,9 +142,9 @@ const PSelectorSpathTask = class { `:scope > :nth-child(${pos})${selector}` ); } -}; +} -const PSelectorUpwardTask = class { +class PSelectorUpwardTask { constructor(task) { const arg = task[1]; if ( typeof arg === 'number' ) { @@ -160,11 +170,11 @@ const PSelectorUpwardTask = class { } output.push(node); } -}; +} PSelectorUpwardTask.prototype.i = 0; PSelectorUpwardTask.prototype.s = ''; -const PSelectorXpathTask = class { +class PSelectorXpathTask { constructor(task) { this.xpe = task[1]; } @@ -184,25 +194,17 @@ const PSelectorXpathTask = class { } } } -}; +} -const PSelector = class { +class PSelector { constructor(o) { this.raw = o.raw; this.selector = o.selector; this.tasks = []; if ( !o.tasks ) { return; } for ( const task of o.tasks ) { - const ctor = this.operatorToTaskMap.get(task[0]); - if ( ctor === undefined ) { - this.invalid = true; - break; - } + const ctor = this.operatorToTaskMap.get(task[0]) || PSelectorVoidTask; const pselector = new ctor(task); - if ( pselector instanceof PSelectorIfTask && pselector.invalid ) { - this.invalid = true; - break; - } this.tasks.push(pselector); } } @@ -215,7 +217,6 @@ const PSelector = class { return Array.from(root.querySelectorAll(this.selector)); } exec(input) { - if ( this.invalid ) { return []; } let nodes = this.prime(input); for ( const task of this.tasks ) { if ( nodes.length === 0 ) { break; } @@ -228,7 +229,6 @@ const PSelector = class { return nodes; } test(input) { - if ( this.invalid ) { return false; } const nodes = this.prime(input); for ( const node of nodes ) { let output = [ node ]; @@ -244,7 +244,7 @@ const PSelector = class { } return false; } -}; +} PSelector.prototype.operatorToTaskMap = new Map([ [ 'has', PSelectorIfTask ], [ 'has-text', PSelectorHasTextTask ], @@ -257,9 +257,8 @@ PSelector.prototype.operatorToTaskMap = new Map([ [ 'upward', PSelectorUpwardTask ], [ 'xpath', PSelectorXpathTask ], ]); -PSelector.prototype.invalid = false; -const logOne = function(details, exception, selector) { +function logOne(details, exception, selector) { µb.filteringContext .duplicate() .fromTabId(details.tabId) @@ -272,9 +271,9 @@ const logOne = function(details, exception, selector) { raw: `${exception === 0 ? '##' : '#@#'}^${selector}` }) .toLogger(); -}; +} -const applyProceduralSelector = function(details, selector) { +function applyProceduralSelector(details, selector) { let pselector = pselectors.get(selector); if ( pselector === undefined ) { pselector = new PSelector(JSON.parse(selector)); @@ -290,9 +289,9 @@ const applyProceduralSelector = function(details, selector) { logOne(details, 0, pselector.raw); } return modified; -}; +} -const applyCSSSelector = function(details, selector) { +function applyCSSSelector(details, selector) { const nodes = docRegister.querySelectorAll(selector); let modified = false; for ( const node of nodes ) { @@ -303,7 +302,7 @@ const applyCSSSelector = function(details, selector) { logOne(details, 0, selector); } return modified; -}; +} htmlFilteringEngine.reset = function() { filterDB.clear(); diff --git a/src/js/static-dnr-filtering.js b/src/js/static-dnr-filtering.js index 19aa36901..bc58ae471 100644 --- a/src/js/static-dnr-filtering.js +++ b/src/js/static-dnr-filtering.js @@ -34,16 +34,17 @@ import { /******************************************************************************/ -// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ +// http://www.cse.yorku.ca/~oz/hash.html#djb2 +// Must mirror content script surveyor's version const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = type; - for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; - } - return hash & 0x00FFFFFF; + let hash = (type << 5) + type ^ len; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) + hash ^ s.charCodeAt(i); + } + return hash & 0xFFFFFF; }; /******************************************************************************/ diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index ae0642d1e..1f9ee265c 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -2131,17 +2131,17 @@ Parser.prototype.proceduralOperatorTokens = new Map([ [ 'has-text', 0b01 ], [ 'if', 0b00 ], [ 'if-not', 0b00 ], - [ 'matches-attr', 0b01 ], + [ 'matches-attr', 0b11 ], [ 'matches-css', 0b11 ], [ 'matches-media', 0b11 ], [ 'matches-path', 0b11 ], [ 'min-text-length', 0b01 ], [ 'not', 0b01 ], [ 'nth-ancestor', 0b00 ], - [ 'others', 0b01 ], + [ 'others', 0b11 ], [ 'remove', 0b11 ], - [ 'remove-attr', 0b01 ], - [ 'remove-class', 0b01 ], + [ 'remove-attr', 0b11 ], + [ 'remove-class', 0b11 ], [ 'style', 0b11 ], [ 'upward', 0b01 ], [ 'watch-attr', 0b11 ], diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index b1742d1d0..a81fb4646 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -2688,6 +2688,9 @@ registerFilterClass(FilterOnHeaders); // Benchmark for string-based tokens vs. safe-integer token values: // https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html +// http://www.cse.yorku.ca/~oz/hash.html#djb2 +// Use above algorithm to generate token hash. + const urlTokenizer = new (class { constructor() { this._chars = '0123456789%abcdefghijklmnopqrstuvwxyz'; @@ -2728,7 +2731,7 @@ const urlTokenizer = new (class { } addKnownToken(th) { - this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1; + this.knownTokens[th & 0xFFFF] = 1; } // Tokenize on demand. @@ -2762,15 +2765,17 @@ const urlTokenizer = new (class { return this._hasQuery > 0; } + // http://www.cse.yorku.ca/~oz/hash.html#djb2 + tokenHashFromString(s) { const l = s.length; if ( l === 0 ) { return EMPTY_TOKEN_HASH; } const vtc = this._validTokenChars; let th = vtc[s.charCodeAt(0)]; for ( let i = 1; i !== 7 /* MAX_TOKEN_LENGTH */ && i !== l; i++ ) { - th = th << 4 ^ vtc[s.charCodeAt(i)]; + th = (th << 5) + th ^ vtc[s.charCodeAt(i)]; } - return th; + return th & 0xFFFFFFF; } stringFromTokenHash(th) { @@ -2831,11 +2836,11 @@ const urlTokenizer = new (class { break; } if ( n === 7 /* MAX_TOKEN_LENGTH */ ) { continue; } - th = th << 4 ^ v; + th = (th << 5) + th ^ v; n += 1; } - if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) { - tokens[j+0] = th; + if ( knownTokens[th & 0xFFFF] !== 0 ) { + tokens[j+0] = th & 0xFFFFFFF; tokens[j+1] = ti; j += 2; }