From 194354cd5d776f99c10381102afea13e5d018756 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Thu, 29 Jun 2023 14:50:42 -0400 Subject: [PATCH] Add support for logical expressions to `!#if` directive Reference: https://adguard.com/kb/general/ad-filtering/create-own-filters/#conditions-directive This commit should make uBO fully compatible with the `!#if` directives found throughout AdGuard's filter lists. Additionally, added the new `!#else` directive for convenience to filter list authors: !#if cap_html_filtering example.com##^script:has-text(fakeAd) !#else example.com##+js(rmnt, script, fakeAd) !#endif --- src/js/codemirror/ubo-static-filtering.js | 19 ++--- src/js/messaging.js | 3 +- src/js/static-filtering-parser.js | 92 +++++++++++++++++------ 3 files changed, 74 insertions(+), 40 deletions(-) diff --git a/src/js/codemirror/ubo-static-filtering.js b/src/js/codemirror/ubo-static-filtering.js index 31cf7c552..d84018b9a 100644 --- a/src/js/codemirror/ubo-static-filtering.js +++ b/src/js/codemirror/ubo-static-filtering.js @@ -32,7 +32,7 @@ import { dom, qs$ } from '../dom.js'; const redirectNames = new Map(); const scriptletNames = new Map(); -const preparseDirectiveTokens = new Map(); +const preparseDirectiveEnv = []; const preparseDirectiveHints = []; const originHints = []; let hintHelperRegistered = false; @@ -88,15 +88,9 @@ CodeMirror.defineMode('ubo-static-filtering', function() { case sfp.NODE_TYPE_PREPARSE_DIRECTIVE_VALUE: return 'directive'; case sfp.NODE_TYPE_PREPARSE_DIRECTIVE_IF_VALUE: { - if ( preparseDirectiveTokens.size === 0 ) { - return 'positive strong'; - } const raw = astParser.getNodeString(currentWalkerNode); - const not = raw.startsWith('!'); - const token = not ? raw.slice(1) : raw; - return not === preparseDirectiveTokens.get(token) - ? 'negative strong' - : 'positive strong'; + const state = sfp.utils.preparser.evaluateExpr(raw, preparseDirectiveEnv); + return state ? 'positive strong' : 'negative strong'; } case sfp.NODE_TYPE_EXT_OPTIONS_ANCHOR: return astParser.getFlags(sfp.AST_FLAG_IS_EXCEPTION) @@ -287,10 +281,9 @@ CodeMirror.defineMode('ubo-static-filtering', function() { } } } - if ( Array.isArray(details.preparseDirectiveTokens)) { - details.preparseDirectiveTokens.forEach(([ a, b ]) => { - preparseDirectiveTokens.set(a, b); - }); + if ( Array.isArray(details.preparseDirectiveEnv)) { + preparseDirectiveEnv.length = 0; + preparseDirectiveEnv.push(...details.preparseDirectiveEnv); } if ( Array.isArray(details.preparseDirectiveHints)) { preparseDirectiveHints.push(...details.preparseDirectiveHints); diff --git a/src/js/messaging.js b/src/js/messaging.js index d060d84dc..5df6cd95a 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -1593,8 +1593,7 @@ const onMessage = function(request, sender, callback) { response = {}; if ( (request.hintUpdateToken || 0) === 0 ) { response.redirectResources = redirectEngine.getResourceDetails(); - response.preparseDirectiveTokens = - sfp.utils.preparser.getTokens(vAPI.webextFlavor.env); + response.preparseDirectiveEnv = vAPI.webextFlavor.env.slice(); response.preparseDirectiveHints = sfp.utils.preparser.getHints(); response.expertMode = µb.hiddenSettings.filterAuthorMode; diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index cc70571b7..fb0fcc641 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -780,7 +780,7 @@ export class AstFilterParser { this.reBadHostnameChars = /[\x00-\x24\x26-\x29\x2b\x2c\x2f\x3b-\x40\x5c\x5e\x60\x7b-\x7f]/; this.reIsEntity = /^[^*]+\.\*$/; this.rePreparseDirectiveIf = /^!#if /; - this.rePreparseDirectiveAny = /^!#(?:endif|if |include )/; + this.rePreparseDirectiveAny = /^!#(?:else|endif|if |include )/; this.reURL = /\bhttps?:\/\/\S+/; this.reHasPatternSpecialChars = /[\*\^]/; this.rePatternAllSpecialChars = /[\*\^]+|[^\x00-\x7f]+/g; @@ -1122,10 +1122,7 @@ export class AstFilterParser { this.linkRight(head, next); if ( type === NODE_TYPE_PREPARSE_DIRECTIVE_IF_VALUE ) { const rawToken = this.getNodeString(next).trim(); - const token = rawToken.charCodeAt(0) === 0x21 /* ! */ - ? rawToken.slice(1) - : rawToken; - if ( preparserIfTokens.has(token) === false ) { + if ( utils.preparser.evaluateExpr(rawToken) === undefined ) { this.addNodeFlags(next, NODE_FLAG_ERROR); this.addFlags(AST_FLAG_HAS_ERROR); this.astError = AST_ERROR_IF_TOKEN_UNKNOWN; @@ -4137,43 +4134,88 @@ export const utils = (( ) => { } }; + // Useful reference: + // https://adguard.com/kb/general/ad-filtering/create-own-filters/#conditions-directive + class preparser { + static evaluateExprToken(token, env = []) { + const not = token.charCodeAt(0) === 0x21 /* ! */; + if ( not ) { token = token.slice(1); } + const state = preparserTokens.get(token); + if ( state === undefined ) { return; } + return state === 'false' && not || env.includes(state) !== not; + } + + static evaluateExpr(expr, env = []) { + if ( expr.startsWith('(') && expr.endsWith(')') ) { + expr = expr.slice(1, -1); + } + const matches = Array.from(expr.matchAll(/(?:(?:&&|\|\|)\s+)?\S+/g)); + if ( matches.length === 0 ) { return; } + if ( matches[0][0].startsWith('|') || matches[0][0].startsWith('&') ) { return; } + let result = this.evaluateExprToken(matches[0][0], env); + for ( let i = 1; i < matches.length; i++ ) { + const parts = matches[i][0].split(/ +/); + if ( parts.length !== 2 ) { return; } + const state = this.evaluateExprToken(parts[1], env); + if ( state === undefined ) { return; } + if ( parts[0] === '||' ) { + result = result || state; + } else if ( parts[0] === '&&' ) { + result = result && state; + } else { + return; + } + } + return result; + } + // This method returns an array of indices, corresponding to position in // the content string which should alternatively be parsed and discarded. static splitter(content, env = []) { - const reIf = /^!#(if|endif)\b([^\n]*)(?:[\n\r]+|$)/gm; + const reIf = /^!#(if|else|endif)\b([^\n]*)(?:[\n\r]+|$)/gm; const stack = []; - const shouldDiscard = ( ) => stack.some(v => v); const parts = [ 0 ]; let discard = false; + const shouldDiscard = ( ) => stack.some(v => v); + + const begif = (startDiscard, match) => { + if ( discard === false && startDiscard ) { + parts.push(match.index); + discard = true; + } + stack.push(startDiscard); + }; + + const endif = match => { + stack.pop(); + const stopDiscard = shouldDiscard() === false; + if ( discard && stopDiscard ) { + parts.push(match.index + match[0].length); + discard = false; + } + }; + for (;;) { const match = reIf.exec(content); if ( match === null ) { break; } switch ( match[1] ) { case 'if': { - let expr = match[2].trim(); - const target = expr.charCodeAt(0) === 0x21 /* '!' */; - if ( target ) { expr = expr.slice(1); } - const token = preparserTokens.get(expr); - const startDiscard = - token === 'false' && target === false || - token !== undefined && env.includes(token) === target; - if ( discard === false && startDiscard ) { - parts.push(match.index); - discard = true; - } - stack.push(startDiscard); + const startDiscard = this.evaluateExpr(match[2].trim(), env) === false; + begif(startDiscard, match); + break; + } + case 'else': { + if ( stack.length === 0 ) { break; } + const startDiscard = stack[stack.length-1] === false; + endif(match); + begif(startDiscard, match); break; } case 'endif': { - stack.pop(); - const stopDiscard = shouldDiscard() === false; - if ( discard && stopDiscard ) { - parts.push(match.index + match[0].length); - discard = false; - } + endif(match); break; } default: