From 81498474d6d440b032681aa9952d593749b39efb Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Tue, 31 Jan 2023 14:15:13 -0500 Subject: [PATCH] Add support for regex-based values as target domain for static extended filters Related discussion: - https://github.com/uBlockOrigin/uBlock-issues/discussions/2234 Example of usage: /img[a-z]{3,5}\.buzz/##+js(nowoif) Use sparingly, when no other solution is practical from a maintenance point of view -- keeping in mind that uBO has to iterate through all the regex-based values, unlike plain hosyname or entity-based values which are mere lookups. Related commit: - https://github.com/gorhill/uBlock/commit/b1de8d3fe48755da58268ba86dccd5d76940b613 --- src/js/cosmetic-filtering.js | 26 +++++++------ src/js/html-filtering.js | 16 ++------ src/js/reverselookup-worker.js | 11 ++++-- src/js/static-ext-filtering-db.js | 61 +++++++++++++++++++++++-------- src/js/static-filtering-parser.js | 5 +-- 5 files changed, 73 insertions(+), 46 deletions(-) diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 41640654f..ee8487714 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -811,31 +811,33 @@ FilterContainer.prototype.retrieveSpecificSelectors = function( } // Retrieve filters with a non-empty hostname + const retrieveSets = [ specificSet, exceptionSet, proceduralSet, exceptionSet ]; + const discardSets = [ dummySet, exceptionSet ]; this.specificFilters.retrieve( hostname, - options.noSpecificCosmeticFiltering !== true - ? [ specificSet, exceptionSet, proceduralSet, exceptionSet ] - : [ dummySet, exceptionSet ], + options.noSpecificCosmeticFiltering ? discardSets : retrieveSets, 1 ); - // Retrieve filters with an empty hostname + // Retrieve filters with a regex-based hostname value this.specificFilters.retrieve( hostname, - options.noGenericCosmeticFiltering !== true - ? [ specificSet, exceptionSet, proceduralSet, exceptionSet ] - : [ dummySet, exceptionSet ], - 2 + options.noSpecificCosmeticFiltering ? discardSets : retrieveSets, + 3 ); - // Retrieve filters with a non-empty entity + // Retrieve filters with a entity-based hostname value if ( request.entity !== '' ) { this.specificFilters.retrieve( `${hostname.slice(0, -request.domain.length)}${request.entity}`, - options.noSpecificCosmeticFiltering !== true - ? [ specificSet, exceptionSet, proceduralSet, exceptionSet ] - : [ dummySet, exceptionSet ], + options.noSpecificCosmeticFiltering ? discardSets : retrieveSets, 1 ); } + // Retrieve filters with an empty hostname + this.specificFilters.retrieve( + hostname, + options.noGenericCosmeticFiltering ? discardSets : retrieveSets, + 2 + ); if ( exceptionSet.size !== 0 ) { out.exceptionFilters = Array.from(exceptionSet); diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js index 4b81c5ae3..6c388c47c 100644 --- a/src/js/html-filtering.js +++ b/src/js/html-filtering.js @@ -27,7 +27,6 @@ import logger from './logger.js'; import µb from './background.js'; import { sessionFirewall } from './filtering-engines.js'; import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js'; -import * as sfp from './static-filtering-parser.js'; /******************************************************************************/ @@ -315,9 +314,6 @@ htmlFilteringEngine.freeze = function() { htmlFilteringEngine.compile = function(parser, writer) { const isException = parser.isException(); - const root = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_HTML); - const headerName = parser.getNodeString(root); - const { raw, compiled } = parser.result; if ( compiled === undefined ) { const who = writer.properties.get('name') || '?'; @@ -380,19 +376,13 @@ htmlFilteringEngine.retrieve = function(details) { const plains = new Set(); const procedurals = new Set(); const exceptions = new Set(); + const retrieveSets = [ plains, exceptions, procedurals, exceptions ]; - filterDB.retrieve( - hostname, - [ plains, exceptions, procedurals, exceptions ] - ); + filterDB.retrieve(hostname, retrieveSets); const entity = details.entity !== '' ? `${hostname.slice(0, -details.domain.length)}${details.entity}` : '*'; - filterDB.retrieve( - entity, - [ plains, exceptions, procedurals, exceptions ], - 1 - ); + filterDB.retrieve(entity, retrieveSets, 1); if ( plains.size === 0 && procedurals.size === 0 ) { return; } diff --git a/src/js/reverselookup-worker.js b/src/js/reverselookup-worker.js index bc59931ce..516971a86 100644 --- a/src/js/reverselookup-worker.js +++ b/src/js/reverselookup-worker.js @@ -150,9 +150,14 @@ const fromExtendedFilter = function(details) { } const hostnameMatches = hn => { - return hn === '' || - reHostname.test(hn) || - reEntity !== undefined && reEntity.test(hn); + if ( hn === '' ) { return true; } + if ( hn.charCodeAt(0) === 0x2F /* / */ ) { + return (new RegExp(hn.slice(1,-1))).test(hostname); + } + if ( reHostname.test(hn) ) { return true; } + if ( reEntity === undefined ) { return false; } + if ( reEntity.test(hn) ) { return true; } + return false; }; const response = Object.create(null); diff --git a/src/js/static-ext-filtering-db.js b/src/js/static-ext-filtering-db.js index 4f3696cfc..3a0309e7d 100644 --- a/src/js/static-ext-filtering-db.js +++ b/src/js/static-ext-filtering-db.js @@ -29,6 +29,8 @@ const StaticExtFilteringHostnameDB = class { this.timer = undefined; this.strToIdMap = new Map(); this.hostnameToSlotIdMap = new Map(); + this.regexToSlotIdMap = new Map(); + this.regexMap = new Map(); // Array of integer pairs this.hostnameSlots = []; // Array of strings (selectors and pseudo-selectors) @@ -51,9 +53,16 @@ const StaticExtFilteringHostnameDB = class { } } const strId = iStr << this.nBits | bits; - let iHn = this.hostnameToSlotIdMap.get(hn); + const hnIsNotRegex = hn.charCodeAt(0) !== 0x2F /* / */; + let iHn = hnIsNotRegex + ? this.hostnameToSlotIdMap.get(hn) + : this.regexToSlotIdMap.get(hn); if ( iHn === undefined ) { - this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length); + if ( hnIsNotRegex ) { + this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length); + } else { + this.regexToSlotIdMap.set(hn, this.hostnameSlots.length); + } this.hostnameSlots.push(strId, 0); return; } @@ -67,9 +76,11 @@ const StaticExtFilteringHostnameDB = class { clear() { this.hostnameToSlotIdMap.clear(); + this.regexToSlotIdMap.clear(); this.hostnameSlots.length = 0; this.strSlots.length = 0; this.strToIdMap.clear(); + this.regexMap.clear(); this.size = 0; } @@ -92,39 +103,55 @@ const StaticExtFilteringHostnameDB = class { ); } - // modifiers = 1: return only specific items - // modifiers = 2: return only generic items + // modifiers = 0: all items + // modifiers = 1: only specific items + // modifiers = 2: only generic items + // modifiers = 3: only regex-based items // retrieve(hostname, out, modifiers = 0) { - if ( modifiers === 2 ) { - hostname = ''; - } + let hn = hostname; + if ( modifiers === 2 ) { hn = ''; } const mask = out.length - 1; // out.length must be power of two for (;;) { - let iHn = this.hostnameToSlotIdMap.get(hostname); + let iHn = this.hostnameToSlotIdMap.get(hn); if ( iHn !== undefined ) { do { const strId = this.hostnameSlots[iHn+0]; - out[strId & mask].add( - this.strSlots[strId >>> this.nBits] - ); + out[strId & mask].add(this.strSlots[strId >>> this.nBits]); iHn = this.hostnameSlots[iHn+1]; } while ( iHn !== 0 ); } - if ( hostname === '' ) { break; } - const pos = hostname.indexOf('.'); + if ( hn === '' ) { break; } + const pos = hn.indexOf('.'); if ( pos === -1 ) { if ( modifiers === 1 ) { break; } - hostname = ''; + hn = ''; } else { - hostname = hostname.slice(pos + 1); + hn = hn.slice(pos + 1); } } + if ( modifiers !== 0 && modifiers !== 3 ) { return; } + // TODO: consider using a combined regex to test once for whether + // iterating is worth it. + for ( const restr of this.regexToSlotIdMap.keys() ) { + let re = this.regexMap.get(restr); + if ( re === undefined ) { + this.regexMap.set(restr, (re = new RegExp(restr.slice(1,-1)))); + } + if ( re.test(hostname) === false ) { continue; } + let iHn = this.regexToSlotIdMap.get(restr); + do { + const strId = this.hostnameSlots[iHn+0]; + out[strId & mask].add(this.strSlots[strId >>> this.nBits]); + iHn = this.hostnameSlots[iHn+1]; + } while ( iHn !== 0 ); + } } toSelfie() { return { hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap), + regexToSlotIdMap: Array.from(this.regexToSlotIdMap), hostnameSlots: this.hostnameSlots, strSlots: this.strSlots, size: this.size @@ -134,6 +161,10 @@ const StaticExtFilteringHostnameDB = class { fromSelfie(selfie) { if ( selfie === undefined ) { return; } this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap); + // Regex-based lookup available in uBO 1.47.0 and above + if ( Array.isArray(selfie.regexToSlotIdMap) ) { + this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap); + } this.hostnameSlots = selfie.hostnameSlots; this.strSlots = selfie.strSlots; this.size = selfie.size; diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index 63bd112b2..f069bb4d2 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -1066,8 +1066,7 @@ export class AstFilterParser { realBad = true; break; case NODE_TYPE_NET_OPTION_NAME_WEBRTC: - bad = true; - realBad = isNegated || hasValue; + realBad = true; break; case NODE_TYPE_NET_PATTERN: realBad = this.hasOptions() === false && @@ -1784,7 +1783,7 @@ export class AstFilterParser { ); this.addFlags(AST_FLAG_HAS_OPTIONS); this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next); - this.linkDown(next, this.parseDomainList(next, ',', 0b01110)); + this.linkDown(next, this.parseDomainList(next, ',', 0b11110)); prev = this.linkRight(prev, next); } next = this.allocTypedNode(