diff --git a/platform/mv3/extension/js/scripting/css-generic.js b/platform/mv3/extension/js/scripting/css-generic.js index a7e089c79..dd78824d9 100644 --- a/platform/mv3/extension/js/scripting/css-generic.js +++ b/platform/mv3/extension/js/scripting/css-generic.js @@ -50,15 +50,17 @@ let lastDomChange = Date.now(); /******************************************************************************/ -// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ +// http://www.cse.yorku.ca/~oz/hash.html#djb2 +// Must mirror dnrRulesetFromRawLists's version + const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = type; - for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; - } - return hash & 0x00FFFFFF; + let hash = (type << 5) + type ^ len; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) + hash ^ s.charCodeAt(i); + } + return hash & 0xFF_FFFF; }; /******************************************************************************/ diff --git a/platform/mv3/extension/js/scripting/css-procedural.js b/platform/mv3/extension/js/scripting/css-procedural.js index 93ca36d53..364bd41f4 100644 --- a/platform/mv3/extension/js/scripting/css-procedural.js +++ b/platform/mv3/extension/js/scripting/css-procedural.js @@ -52,6 +52,16 @@ const nonVisualElements = { style: true, }; +const regexFromString = (s, exact = false) => { + if ( s === '' ) { return /^/; } + const match = /^\/(.+)\/([i]?)$/.exec(s); + if ( match !== null ) { + return new RegExp(match[1], match[2] || undefined); + } + const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(exact ? `^${reStr}$` : reStr, 'i'); +}; + /******************************************************************************/ // 'P' stands for 'Procedural' @@ -79,11 +89,7 @@ class PSelectorVoidTask extends PSelectorTask { class PSelectorHasTextTask extends PSelectorTask { constructor(task) { super(); - let arg0 = task[1], arg1; - if ( Array.isArray(task[1]) ) { - arg1 = arg0[1]; arg0 = arg0[0]; - } - this.needle = new RegExp(arg0, arg1); + this.needle = regexFromString(task[1]); } transpose(node, output) { if ( this.needle.test(node.textContent) ) { @@ -113,6 +119,24 @@ PSelectorIfNotTask.prototype.target = false; /******************************************************************************/ +class PSelectorMatchesAttrTask extends PSelectorTask { + constructor(task) { + super(); + this.reAttr = regexFromString(task[1].attr, true); + this.reValue = regexFromString(task[1].value, true); + } + transpose(node, output) { + const attrs = node.getAttributeNames(); + for ( const attr of attrs ) { + if ( this.reAttr.test(attr) === false ) { continue; } + if ( this.reValue.test(node.getAttribute(attr)) === false ) { continue; } + output.push(node); + } + } +} + +/******************************************************************************/ + class PSelectorMatchesCSSTask extends PSelectorTask { constructor(task) { super(); @@ -168,11 +192,7 @@ class PSelectorMatchesMediaTask extends PSelectorTask { class PSelectorMatchesPathTask extends PSelectorTask { constructor(task) { super(); - let arg0 = task[1], arg1; - if ( Array.isArray(task[1]) ) { - arg1 = arg0[1]; arg0 = arg0[0]; - } - this.needle = new RegExp(arg0, arg1); + this.needle = regexFromString(task[1]); } transpose(node, output) { if ( this.needle.test(self.location.pathname + self.location.search) ) { @@ -442,6 +462,7 @@ PSelector.prototype.operatorToTaskMap = new Map([ [ 'has-text', PSelectorHasTextTask ], [ 'if', PSelectorIfTask ], [ 'if-not', PSelectorIfNotTask ], + [ 'matches-attr', PSelectorMatchesAttrTask ], [ 'matches-css', PSelectorMatchesCSSTask ], [ 'matches-css-after', PSelectorMatchesCSSAfterTask ], [ 'matches-css-before', PSelectorMatchesCSSBeforeTask ], @@ -459,13 +480,13 @@ PSelector.prototype.operatorToTaskMap = new Map([ /******************************************************************************/ class PSelectorRoot extends PSelector { - constructor(o, styleToken) { + constructor(o) { super(o); this.budget = 200; // I arbitrary picked a 1/5 second this.raw = o.raw; this.cost = 0; this.lastAllowanceTime = 0; - this.styleToken = styleToken; + this.action = o.action; } prime(input) { try { @@ -485,6 +506,7 @@ class ProceduralFilterer { this.styleTokenMap = new Map(); this.styledNodes = new Set(); this.timer = undefined; + this.hideStyle = 'display:none!important;'; this.addSelectors(selectors); // Important: commit now (do not go through onDOMChanged) to be sure // first pass is going to happen asap. @@ -493,21 +515,24 @@ class ProceduralFilterer { addSelectors() { for ( const selector of selectors ) { - let style, styleToken; - if ( selector.action === undefined ) { - style = 'display:none!important;'; - } else if ( selector.action[0] === 'style' ) { - style = selector.action[1]; - } - if ( style !== undefined ) { - styleToken = this.styleTokenFromStyle(style); - } - const pselector = new PSelectorRoot(selector, styleToken); + const pselector = new PSelectorRoot(selector); + this.primeProceduralSelector(pselector); this.selectors.push(pselector); } this.onDOMChanged(); } + // This allows to perform potentially expensive initialization steps + // before the filters are ready to be applied. + primeProceduralSelector(pselector) { + if ( pselector.action === undefined ) { + this.styleTokenFromStyle(this.hideStyle); + } else if ( pselector.action[0] === 'style' ) { + this.styleTokenFromStyle(pselector.action[1]); + } + return pselector; + } + uBOL_commitNow() { // https://github.com/uBlockOrigin/uBlock-issues/issues/341 // Be ready to unhide nodes which no longer matches any of @@ -534,10 +559,10 @@ class ProceduralFilterer { } t0 = t1; if ( nodes.length === 0 ) { continue; } - this.styleNodes(nodes, pselector.styleToken); + this.processNodes(nodes, pselector.action); } - this.unstyleNodes(toUnstyle); + this.unprocessNodes(toUnstyle); } styleTokenFromStyle(style) { @@ -552,22 +577,60 @@ class ProceduralFilterer { return styleToken; } - styleNodes(nodes, styleToken) { - if ( styleToken === undefined ) { + processNodes(nodes, action) { + const op = action && action[0] || ''; + const arg = op !== '' ? action[1] : ''; + switch ( op ) { + case '': + /* fall through */ + case 'style': { + const styleToken = this.styleTokenFromStyle( + arg === '' ? this.hideStyle : arg + ); + for ( const node of nodes ) { + node.setAttribute(this.masterToken, ''); + node.setAttribute(styleToken, ''); + this.styledNodes.add(node); + } + break; + } + case 'remove': { for ( const node of nodes ) { node.remove(); node.textContent = ''; } - return; + break; } - for ( const node of nodes ) { - node.setAttribute(this.masterToken, ''); - node.setAttribute(styleToken, ''); - this.styledNodes.add(node); + case 'remove-attr': { + const reAttr = regexFromString(arg, true); + for ( const node of nodes ) { + for ( const name of node.getAttributeNames() ) { + if ( reAttr.test(name) === false ) { continue; } + node.removeAttribute(name); + } + } + break; + } + case 'remove-class': { + const reClass = regexFromString(arg, true); + for ( const node of nodes ) { + const cl = node.classList; + for ( const name of cl.values() ) { + if ( reClass.test(name) === false ) { continue; } + cl.remove(name); + } + } + break; + } + default: + break; } } - unstyleNodes(nodes) { + // TODO: Current assumption is one style per hit element. Could be an + // issue if an element has multiple styling and one styling is + // brought back. Possibly too rare to care about this for now. + unprocessNodes(nodes) { for ( const node of nodes ) { if ( this.styledNodes.has(node) ) { continue; } node.removeAttribute(this.masterToken); diff --git a/src/js/background.js b/src/js/background.js index 69fbcccff..a381fac6c 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 49, // Increase when compiled format changes - selfieMagic: 49, // Increase when selfie format changes + compiledMagic: 50, // Increase when compiled format changes + selfieMagic: 50, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/contentscript-extra.js b/src/js/contentscript-extra.js index e438e5902..0185a29e7 100644 --- a/src/js/contentscript-extra.js +++ b/src/js/contentscript-extra.js @@ -362,27 +362,6 @@ class PSelectorXpathTask extends PSelectorTask { class PSelector { constructor(o) { - if ( PSelector.prototype.operatorToTaskMap === undefined ) { - PSelector.prototype.operatorToTaskMap = new Map([ - [ 'has', PSelectorIfTask ], - [ 'has-text', PSelectorHasTextTask ], - [ 'if', PSelectorIfTask ], - [ 'if-not', PSelectorIfNotTask ], - [ 'matches-attr', PSelectorMatchesAttrTask ], - [ 'matches-css', PSelectorMatchesCSSTask ], - [ 'matches-css-after', PSelectorMatchesCSSAfterTask ], - [ 'matches-css-before', PSelectorMatchesCSSBeforeTask ], - [ 'matches-media', PSelectorMatchesMediaTask ], - [ 'matches-path', PSelectorMatchesPathTask ], - [ 'min-text-length', PSelectorMinTextLengthTask ], - [ 'not', PSelectorIfNotTask ], - [ 'others', PSelectorOthersTask ], - [ 'spath', PSelectorSpathTask ], - [ 'upward', PSelectorUpwardTask ], - [ 'watch-attr', PSelectorWatchAttrs ], - [ 'xpath', PSelectorXpathTask ], - ]); - } this.raw = o.raw; this.selector = o.selector; this.tasks = []; @@ -392,7 +371,6 @@ class PSelector { const ctor = this.operatorToTaskMap.get(task[0]) || PSelectorVoidTask; tasks.push(new ctor(task)); } - // Initialize only after all tasks have been successfully instantiated this.tasks = tasks; } prime(input) { @@ -436,7 +414,25 @@ class PSelector { return false; } } -PSelector.prototype.operatorToTaskMap = undefined; +PSelector.prototype.operatorToTaskMap = new Map([ + [ 'has', PSelectorIfTask ], + [ 'has-text', PSelectorHasTextTask ], + [ 'if', PSelectorIfTask ], + [ 'if-not', PSelectorIfNotTask ], + [ 'matches-attr', PSelectorMatchesAttrTask ], + [ 'matches-css', PSelectorMatchesCSSTask ], + [ 'matches-css-after', PSelectorMatchesCSSAfterTask ], + [ 'matches-css-before', PSelectorMatchesCSSBeforeTask ], + [ 'matches-media', PSelectorMatchesMediaTask ], + [ 'matches-path', PSelectorMatchesPathTask ], + [ 'min-text-length', PSelectorMinTextLengthTask ], + [ 'not', PSelectorIfNotTask ], + [ 'others', PSelectorOthersTask ], + [ 'spath', PSelectorSpathTask ], + [ 'upward', PSelectorUpwardTask ], + [ 'watch-attr', PSelectorWatchAttrs ], + [ 'xpath', PSelectorXpathTask ], +]); class PSelectorRoot extends PSelector { constructor(o) { diff --git a/src/js/contentscript.js b/src/js/contentscript.js index 278f8fabd..cec0b50c3 100644 --- a/src/js/contentscript.js +++ b/src/js/contentscript.js @@ -948,14 +948,14 @@ vAPI.DOMFilterer = class { // vAPI.domSurveyor { - // https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ + // http://www.cse.yorku.ca/~oz/hash.html#djb2 // Must mirror cosmetic filtering compiler's version const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = (type << 5) - type + (len & 0xFF) | 0; + let hash = (type << 5) + type ^ len; for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; + hash = (hash << 5) + hash ^ s.charCodeAt(i); } return hash & 0xFFFFFF; }; diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index d5b97d78e..d473371db 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -152,17 +152,17 @@ SelectorCacheEntry.junkyard = []; /******************************************************************************/ /******************************************************************************/ -// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ +// http://www.cse.yorku.ca/~oz/hash.html#djb2 // Must mirror content script surveyor's version const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = (type << 5) - type + (len & 0xFF) | 0; - for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; - } - return hash & 0xFFFFFF; + let hash = (type << 5) + type ^ len; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) + hash ^ s.charCodeAt(i); + } + return hash & 0xFFFFFF; }; // https://github.com/gorhill/uBlock/issues/1668 diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js index d70346ad9..c9dc8a8f0 100644 --- a/src/js/html-filtering.js +++ b/src/js/html-filtering.js @@ -56,20 +56,33 @@ const htmlFilteringEngine = { }, }; -const PSelectorHasTextTask = class { +const regexFromString = (s, exact = false) => { + if ( s === '' ) { return /^/; } + const match = /^\/(.+)\/([i]?)$/.exec(s); + if ( match !== null ) { + return new RegExp(match[1], match[2] || undefined); + } + const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(exact ? `^${reStr}$` : reStr, 'i'); +}; + +class PSelectorVoidTask { constructor(task) { - let arg0 = task[1], arg1; - if ( Array.isArray(task[1]) ) { - arg1 = arg0[1]; arg0 = arg0[0]; - } - this.needle = new RegExp(arg0, arg1); + console.info(`[uBO] HTML filtering: :${task[0]}() operator is not supported`); + } + transpose() { + } +} +class PSelectorHasTextTask { + constructor(task) { + this.needle = regexFromString(task[1]); } transpose(node, output) { if ( this.needle.test(node.textContent) ) { output.push(node); } } -}; +} const PSelectorIfTask = class { constructor(task) { @@ -80,17 +93,14 @@ const PSelectorIfTask = class { output.push(node); } } - get invalid() { - return this.pselector.invalid; - } }; PSelectorIfTask.prototype.target = true; -const PSelectorIfNotTask = class extends PSelectorIfTask { -}; +class PSelectorIfNotTask extends PSelectorIfTask { +} PSelectorIfNotTask.prototype.target = false; -const PSelectorMinTextLengthTask = class { +class PSelectorMinTextLengthTask { constructor(task) { this.min = task[1]; } @@ -99,9 +109,9 @@ const PSelectorMinTextLengthTask = class { output.push(node); } } -}; +} -const PSelectorSpathTask = class { +class PSelectorSpathTask { constructor(task) { this.spath = task[1]; this.nth = /^(?:\s*[+~]|:)/.test(this.spath); @@ -132,9 +142,9 @@ const PSelectorSpathTask = class { `:scope > :nth-child(${pos})${selector}` ); } -}; +} -const PSelectorUpwardTask = class { +class PSelectorUpwardTask { constructor(task) { const arg = task[1]; if ( typeof arg === 'number' ) { @@ -160,11 +170,11 @@ const PSelectorUpwardTask = class { } output.push(node); } -}; +} PSelectorUpwardTask.prototype.i = 0; PSelectorUpwardTask.prototype.s = ''; -const PSelectorXpathTask = class { +class PSelectorXpathTask { constructor(task) { this.xpe = task[1]; } @@ -184,25 +194,17 @@ const PSelectorXpathTask = class { } } } -}; +} -const PSelector = class { +class PSelector { constructor(o) { this.raw = o.raw; this.selector = o.selector; this.tasks = []; if ( !o.tasks ) { return; } for ( const task of o.tasks ) { - const ctor = this.operatorToTaskMap.get(task[0]); - if ( ctor === undefined ) { - this.invalid = true; - break; - } + const ctor = this.operatorToTaskMap.get(task[0]) || PSelectorVoidTask; const pselector = new ctor(task); - if ( pselector instanceof PSelectorIfTask && pselector.invalid ) { - this.invalid = true; - break; - } this.tasks.push(pselector); } } @@ -215,7 +217,6 @@ const PSelector = class { return Array.from(root.querySelectorAll(this.selector)); } exec(input) { - if ( this.invalid ) { return []; } let nodes = this.prime(input); for ( const task of this.tasks ) { if ( nodes.length === 0 ) { break; } @@ -228,7 +229,6 @@ const PSelector = class { return nodes; } test(input) { - if ( this.invalid ) { return false; } const nodes = this.prime(input); for ( const node of nodes ) { let output = [ node ]; @@ -244,7 +244,7 @@ const PSelector = class { } return false; } -}; +} PSelector.prototype.operatorToTaskMap = new Map([ [ 'has', PSelectorIfTask ], [ 'has-text', PSelectorHasTextTask ], @@ -257,9 +257,8 @@ PSelector.prototype.operatorToTaskMap = new Map([ [ 'upward', PSelectorUpwardTask ], [ 'xpath', PSelectorXpathTask ], ]); -PSelector.prototype.invalid = false; -const logOne = function(details, exception, selector) { +function logOne(details, exception, selector) { µb.filteringContext .duplicate() .fromTabId(details.tabId) @@ -272,9 +271,9 @@ const logOne = function(details, exception, selector) { raw: `${exception === 0 ? '##' : '#@#'}^${selector}` }) .toLogger(); -}; +} -const applyProceduralSelector = function(details, selector) { +function applyProceduralSelector(details, selector) { let pselector = pselectors.get(selector); if ( pselector === undefined ) { pselector = new PSelector(JSON.parse(selector)); @@ -290,9 +289,9 @@ const applyProceduralSelector = function(details, selector) { logOne(details, 0, pselector.raw); } return modified; -}; +} -const applyCSSSelector = function(details, selector) { +function applyCSSSelector(details, selector) { const nodes = docRegister.querySelectorAll(selector); let modified = false; for ( const node of nodes ) { @@ -303,7 +302,7 @@ const applyCSSSelector = function(details, selector) { logOne(details, 0, selector); } return modified; -}; +} htmlFilteringEngine.reset = function() { filterDB.clear(); diff --git a/src/js/static-dnr-filtering.js b/src/js/static-dnr-filtering.js index 19aa36901..bc58ae471 100644 --- a/src/js/static-dnr-filtering.js +++ b/src/js/static-dnr-filtering.js @@ -34,16 +34,17 @@ import { /******************************************************************************/ -// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ +// http://www.cse.yorku.ca/~oz/hash.html#djb2 +// Must mirror content script surveyor's version const hashFromStr = (type, s) => { const len = s.length; const step = len + 7 >>> 3; - let hash = type; - for ( let i = 0; i < len; i += step ) { - hash = (hash << 5) - hash + s.charCodeAt(i) | 0; - } - return hash & 0x00FFFFFF; + let hash = (type << 5) + type ^ len; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) + hash ^ s.charCodeAt(i); + } + return hash & 0xFFFFFF; }; /******************************************************************************/ diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index ae0642d1e..1f9ee265c 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -2131,17 +2131,17 @@ Parser.prototype.proceduralOperatorTokens = new Map([ [ 'has-text', 0b01 ], [ 'if', 0b00 ], [ 'if-not', 0b00 ], - [ 'matches-attr', 0b01 ], + [ 'matches-attr', 0b11 ], [ 'matches-css', 0b11 ], [ 'matches-media', 0b11 ], [ 'matches-path', 0b11 ], [ 'min-text-length', 0b01 ], [ 'not', 0b01 ], [ 'nth-ancestor', 0b00 ], - [ 'others', 0b01 ], + [ 'others', 0b11 ], [ 'remove', 0b11 ], - [ 'remove-attr', 0b01 ], - [ 'remove-class', 0b01 ], + [ 'remove-attr', 0b11 ], + [ 'remove-class', 0b11 ], [ 'style', 0b11 ], [ 'upward', 0b01 ], [ 'watch-attr', 0b11 ], diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index b1742d1d0..a81fb4646 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -2688,6 +2688,9 @@ registerFilterClass(FilterOnHeaders); // Benchmark for string-based tokens vs. safe-integer token values: // https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html +// http://www.cse.yorku.ca/~oz/hash.html#djb2 +// Use above algorithm to generate token hash. + const urlTokenizer = new (class { constructor() { this._chars = '0123456789%abcdefghijklmnopqrstuvwxyz'; @@ -2728,7 +2731,7 @@ const urlTokenizer = new (class { } addKnownToken(th) { - this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1; + this.knownTokens[th & 0xFFFF] = 1; } // Tokenize on demand. @@ -2762,15 +2765,17 @@ const urlTokenizer = new (class { return this._hasQuery > 0; } + // http://www.cse.yorku.ca/~oz/hash.html#djb2 + tokenHashFromString(s) { const l = s.length; if ( l === 0 ) { return EMPTY_TOKEN_HASH; } const vtc = this._validTokenChars; let th = vtc[s.charCodeAt(0)]; for ( let i = 1; i !== 7 /* MAX_TOKEN_LENGTH */ && i !== l; i++ ) { - th = th << 4 ^ vtc[s.charCodeAt(i)]; + th = (th << 5) + th ^ vtc[s.charCodeAt(i)]; } - return th; + return th & 0xFFFFFFF; } stringFromTokenHash(th) { @@ -2831,11 +2836,11 @@ const urlTokenizer = new (class { break; } if ( n === 7 /* MAX_TOKEN_LENGTH */ ) { continue; } - th = th << 4 ^ v; + th = (th << 5) + th ^ v; n += 1; } - if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) { - tokens[j+0] = th; + if ( knownTokens[th & 0xFFFF] !== 0 ) { + tokens[j+0] = th & 0xFFFFFFF; tokens[j+1] = ti; j += 2; }