From d1895d4749b76de5c06ed6545ac2a4a3cd1724a4 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sun, 29 Nov 2020 11:02:40 -0500 Subject: [PATCH] Another round of fine-tuning `queryprune=` syntax Related discussions: - https://github.com/uBlockOrigin/uBlock-issues/issues/1356#issuecomment-732411286 - https://github.com/AdguardTeam/CoreLibs/issues/1384 Changes: Negation character is `~` (instead of `!`). Drop special anchor character `|` -- leading `|` will be supported until no such filter is present in uBO's own filter lists. For example, instance of `queryprune=|ad` will have to be replaced with `queryprune=/^ad/` (or `queryprune=ad` if the name of the parameter to remove is exactly `ad`). Align semantic with that of AdGuard's `removeparam=`, except that specifying multiple `|`-separated names is not supported. --- src/js/background.js | 4 +- src/js/static-filtering-parser.js | 118 ++++++++++++++++++++---------- src/js/static-net-filtering.js | 93 +++++++++++------------ 3 files changed, 123 insertions(+), 92 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index e913b9ff1..af21783a2 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -140,8 +140,8 @@ const µBlock = (( ) => { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 36, // Increase when compiled format changes - selfieMagic: 36, // Increase when selfie format changes + compiledMagic: 37, // Increase when compiled format changes + selfieMagic: 37, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index 8504abf57..9337a627a 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -389,14 +389,24 @@ const Parser = class { } // If the pattern is not a regex, there might be options. + // + // The character `$` is deemed to be an option anchor if and only if + // all the following conditions are fulfilled: + // - `$` is not the last character in the filter + // - The character following `$` is either comma, alphanumeric, or `~`. if ( patternIsRegex === false ) { let optionsBits = 0; - let i = this.optionsAnchorSpan.i; + let i = this.optionsAnchorSpan.i - 3; for (;;) { i -= 3; if ( i < islice ) { break; } const bits = this.slices[i]; - if ( hasBits(bits, BITDollar) ) { break; } + if ( + hasBits(bits, BITDollar) && + hasBits(this.slices[i+3], BITAlphaNum | BITComma | BITTilde) + ) { + break; + } optionsBits |= bits; } if ( i >= islice ) { @@ -1151,6 +1161,41 @@ const Parser = class { BITFlavorError | BITFlavorUnsupported | BITFlavorIgnore ); } + static parseQueryPruneValue(arg) { + let s = arg; + if ( s === '*' ) { return { all: true }; } + const out = { }; + out.not = s.charCodeAt(0) === 0x7E /* '~' */; + if ( out.not ) { + s = s.slice(1); + } + const match = /^\/(.+)\/(i)?$/.exec(s); + if ( match !== null ) { + try { + out.re = new RegExp(match[1], match[2] || ''); + } + catch(ex) { + out.bad = true; + } + return out; + } + // TODO: remove once no longer used in filter lists + if ( s.startsWith('|') ) { + try { + out.re = new RegExp('^' + s.slice(1), 'i'); + } catch(ex) { + out.bad = true; + } + return out; + } + // Multiple values not supported (because very inefficient) + if ( s.includes('|') ) { + out.bad = true; + return out; + } + out.name = s; + return out; + } }; /******************************************************************************/ @@ -1926,20 +1971,21 @@ const OPTTokenInlineScript = 23; const OPTTokenMatchCase = 24; const OPTTokenMedia = 25; const OPTTokenMp4 = 26; -const OPTTokenObject = 27; -const OPTTokenOther = 28; -const OPTTokenPing = 29; -const OPTTokenPopunder = 30; -const OPTTokenPopup = 31; -const OPTTokenRedirect = 32; -const OPTTokenRedirectRule = 33; -const OPTTokenQueryprune = 34; -const OPTTokenScript = 35; -const OPTTokenShide = 36; -const OPTTokenXhr = 37; -const OPTTokenWebrtc = 38; -const OPTTokenWebsocket = 39; -const OPTTokenCount = 40; +const OPTTokenNoop = 27; +const OPTTokenObject = 28; +const OPTTokenOther = 29; +const OPTTokenPing = 30; +const OPTTokenPopunder = 31; +const OPTTokenPopup = 32; +const OPTTokenRedirect = 33; +const OPTTokenRedirectRule = 34; +const OPTTokenQueryprune = 35; +const OPTTokenScript = 36; +const OPTTokenShide = 37; +const OPTTokenXhr = 38; +const OPTTokenWebrtc = 39; +const OPTTokenWebsocket = 40; +const OPTTokenCount = 41; //const OPTPerOptionMask = 0x0000ff00; const OPTCanNegate = 1 << 8; @@ -2025,6 +2071,7 @@ Parser.prototype.OPTTokenInvalid = OPTTokenInvalid; Parser.prototype.OPTTokenMatchCase = OPTTokenMatchCase; Parser.prototype.OPTTokenMedia = OPTTokenMedia; Parser.prototype.OPTTokenMp4 = OPTTokenMp4; +Parser.prototype.OPTTokenNoop = OPTTokenNoop; Parser.prototype.OPTTokenObject = OPTTokenObject; Parser.prototype.OPTTokenOther = OPTTokenOther; Parser.prototype.OPTTokenPing = OPTTokenPing; @@ -2087,6 +2134,7 @@ const netOptionTokenDescriptors = new Map([ [ 'match-case', OPTTokenMatchCase ], [ 'media', OPTTokenMedia | OPTCanNegate | OPTNetworkType | OPTModifiableType | OPTRedirectableType | OPTNonCspableType ], [ 'mp4', OPTTokenMp4 | OPTNetworkType | OPTBlockOnly | OPTModifierType ], + [ '_', OPTTokenNoop ], [ 'object', OPTTokenObject | OPTCanNegate | OPTNetworkType | OPTModifiableType | OPTRedirectableType | OPTNonCspableType ], [ 'object-subrequest', OPTTokenObject | OPTCanNegate | OPTNetworkType | OPTModifiableType | OPTRedirectableType | OPTNonCspableType ], [ 'other', OPTTokenOther | OPTCanNegate | OPTNetworkType | OPTModifiableType | OPTRedirectableType | OPTNonCspableType ], @@ -2144,6 +2192,7 @@ Parser.netOptionTokenIds = new Map([ [ 'match-case', OPTTokenMatchCase ], [ 'media', OPTTokenMedia ], [ 'mp4', OPTTokenMp4 ], + [ '_', OPTTokenNoop ], [ 'object', OPTTokenObject ], [ 'object-subrequest', OPTTokenObject ], [ 'other', OPTTokenOther ], @@ -2191,6 +2240,7 @@ Parser.netOptionTokenNames = new Map([ [ OPTTokenMatchCase, 'match-case' ], [ OPTTokenMedia, 'media' ], [ OPTTokenMp4, 'mp4' ], + [ OPTTokenNoop, '_' ], [ OPTTokenObject, 'object' ], [ OPTTokenOther, 'other' ], [ OPTTokenPing, 'ping' ], @@ -2434,10 +2484,20 @@ const NetOptionsIterator = class { if ( this.interactive ) { this.parser.errorSlices(optSlices[i+1], optSlices[i+5]); } - } else if ( this.validateQueryPruneArg(i) === false ) { - optSlices[i] = OPTTokenInvalid; - if ( this.interactive ) { - this.parser.errorSlices(optSlices[i+4], optSlices[i+5]); + } else { + const val = this.parser.strFromSlices( + optSlices[i+4], + optSlices[i+5] - 3 + ); + const r = Parser.parseQueryPruneValue(val); + if ( r.bad ) { + optSlices[i] = OPTTokenInvalid; + if ( this.interactive ) { + this.parser.errorSlices( + optSlices[i+4], + optSlices[i+5] + ); + } } } } @@ -2501,24 +2561,6 @@ const NetOptionsIterator = class { this.readPtr = i + 6; return this; } - validateQueryPruneArg(i) { - let val = this.parser.strFromSlices( - this.optSlices[i+4], - this.optSlices[i+5] - 3 - ); - if ( val === '*' ) { return true; } - if ( val.charCodeAt(0) === 0x21 /* '!' */ ) { - val = val.slice(1); - } - if ( val.startsWith('|') ) { val = `^${val.slice(1)}`; } - if ( val.endsWith('|') ) { val = `${val.slice(0,-1)}$`; } - try { - void new RegExp(val); - } catch(ex) { - return false; - } - return true; - } }; /******************************************************************************/ diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 7a58c3775..d1145d106 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -2967,6 +2967,8 @@ const FilterParser = class { } this.optionUnitBits |= this.REDIRECT_BIT; break; + case this.parser.OPTTokenNoop: + break; case this.parser.OPTTokenQueryprune: if ( this.parseModifierOption(id, val) === false ) { return false; @@ -3232,33 +3234,20 @@ const FilterParser = class { makePatternFromQuerypruneValue() { let pattern = this.modifyValue; - if ( pattern === '*' || pattern.charCodeAt(0) === 0x21 /* '!' */ ) { + if ( pattern === '*' || pattern.charCodeAt(0) === 0x7E /* '~' */ ) { return false; } - if ( /^\w+$/.test(pattern) ) { - this.pattern = `${pattern}=`; - return true; - } - const reRegex = /^\/(.+)\/i?$/; - if ( reRegex.test(pattern) ) { - pattern = reRegex.exec(pattern)[1]; + const match = /^\/(.+)\/i?$/.exec(pattern); + if ( match !== null ) { + pattern = match[1]; + this.isRegex = true; + } else if ( pattern.startsWith('|') ) { + pattern = '\\b' + pattern.slice(1); + this.isRegex = true; } else { - let prefix = '', suffix = ''; - if ( pattern.startsWith('|') ) { - pattern = pattern.slice(1); - prefix = '\\b'; - } - if ( pattern.endsWith('|') ) { - pattern = pattern.slice(0, -1); - suffix = '\\b'; - } - if ( pattern.indexOf('|') !== -1 ) { - pattern = `(?:${pattern})`; - } - pattern = prefix + pattern + suffix; + pattern = encodeURIComponent(pattern).toLowerCase() + '='; } this.pattern = pattern; - this.isRegex = true; return true; } @@ -4323,24 +4312,42 @@ FilterContainer.prototype.filterQuery = function(fctxt) { const params = new Map(new self.URLSearchParams(url.slice(qpos + 1))); const out = []; for ( const directive of directives ) { + if ( params.size === 0 ) { break; } const modifier = directive.modifier; const isException = (directive.bits & AllowAction) !== 0; if ( isException && modifier.value === '' ) { out.push(directive); break; } - if ( modifier.cache === undefined ) { - this.parseFilterPruneValue(modifier); + const { all, bad, name, not, re } = this.parseFilterPruneValue(modifier); + if ( bad ) { continue; } + if ( all ) { + if ( isException === false ) { params.clear(); } + out.push(directive); + break; } - const { all, not, re } = modifier.cache; - let filtered = false; - for ( const [ key, value ] of params ) { - if ( all !== true && re.test(`${key}=${value}`) === not ) { + if ( name !== undefined ) { + const value = params.get(name); + if ( not === false ) { + if ( value !== undefined ) { + if ( isException === false ) { params.delete(name); } + out.push(directive); + } continue; } - if ( isException === false ) { - params.delete(key); + if ( value !== undefined ) { params.delete(name); } + if ( params.size !== 0 ) { + if ( isException === false ) { params.clear(); } + out.push(directive); } + if ( value !== undefined ) { params.set(name, value); } + continue; + } + if ( re === undefined ) { continue; } + let filtered = false; + for ( const [ key, value ] of params ) { + if ( re.test(`${key}=${value}`) === not ) { continue; } + if ( isException === false ) { params.delete(key); } filtered = true; } if ( filtered ) { @@ -4358,29 +4365,11 @@ FilterContainer.prototype.filterQuery = function(fctxt) { }; FilterContainer.prototype.parseFilterPruneValue = function(modifier) { - const cache = {}; - const reRegex = /^\/(.+)\/i?$/; - let retext = modifier.value; - if ( retext === '*' ) { - cache.all = true; - } else { - cache.not = retext.charCodeAt(0) === 0x21 /* '!' */; - if ( cache.not ) { retext = retext.slice(1); } - if ( /^\w+$/.test(retext) ) { - retext = `^${retext}=`; - } else if ( reRegex.test(retext) ) { - retext = reRegex.exec(retext)[1]; - } else { - if ( retext.startsWith('|') ) { retext = `^${retext.slice(1)}`; } - if ( retext.endsWith('|') ) { retext = `${retext.slice(0,-1)}$`; } - } - try { - cache.re = new RegExp(retext, 'i'); - } catch(ex) { - cache.re = /.^/; - } + if ( modifier.cache === undefined ) { + modifier.cache = + vAPI.StaticFilteringParser.parseQueryPruneValue(modifier.value); } - modifier.cache = cache; + return modifier.cache; }; /******************************************************************************/