From c3bc2c741d61db3e99b313835c2ae34a4a008359 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sun, 15 Mar 2020 12:23:25 -0400 Subject: [PATCH] Add support for `cname` type and `denyallow` option This concerns the static network filtering engine. Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/943 * * * New static network filter type: `cname` By default, network requests which are result of resolving a canonical name are subject to filtering. This filtering can be bypassed by creating exception filters using the `cname` option. For example: @@*$cname The filter above tells the network filtering engine to except network requests which fulfill all the following conditions: - network request is blocked - network request is that of an unaliased hostname Filter list authors are discouraged from using exception filters of `cname` type, unless there no other practical solution such that maintenance burden become the greater issue. Of course, such exception filters should be as narrow as possible, i.e. apply to specific domain, etc. * * * New static network filter option: `denyallow` The purpose of `denyallow` is bring default-deny/allow-exceptionally ability into static network filtering arsenal. Example of usage: *$3p,script, \ denyallow=x.com|y.com \ domain=a.com|b.com The above filter tells the network filtering engine that when the context is `a.com` or `b.com`, block all 3rd-party scripts except those from `x.com` and `y.com`. Essentially, the new `denyallow` option makes it easier to implement default-deny/allow-exceptionally in static filter lists, whereas before this had to be done with unwieldy regular expressions[1], or through the mix of broadly blocking filters along with exception filters[2]. [1] https://hg.adblockplus.org/ruadlist/rev/f362910bc9a0 [2] Typically filters which pattern are of the form `|http*://` --- src/js/background.js | 4 +- src/js/filtering-context.js | 6 +- src/js/messaging.js | 8 +-- src/js/pagestore.js | 77 +++++++++++++++++++---- src/js/reverselookup.js | 8 +-- src/js/static-net-filtering.js | 110 ++++++++++++++++++++++++++++----- 6 files changed, 172 insertions(+), 41 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index 46b1d523e..22a57e588 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -130,8 +130,8 @@ const µBlock = (( ) => { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 23, // Increase when compiled format changes - selfieMagic: 25, // Increase when selfie format changes + compiledMagic: 26, // Increase when compiled format changes + selfieMagic: 26, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/filtering-context.js b/src/js/filtering-context.js index 5ae123ec9..962372305 100644 --- a/src/js/filtering-context.js +++ b/src/js/filtering-context.js @@ -81,11 +81,9 @@ this.setDocOriginFromURL(details.documentUrl); } else { const pageStore = µBlock.pageStoreFromTabId(this.tabId); - const docStore = pageStore && pageStore.frames.get(this.docId); + const docStore = pageStore && pageStore.getFrame(this.docId); if ( docStore ) { - this.docOrigin = undefined; - this.docHostname = docStore.pageHostname; - this.docDomain = docStore.pageDomain; + this.setDocOriginFromURL(docStore.rawURL); } else { this.setDocOrigin(this.tabOrigin); } diff --git a/src/js/messaging.js b/src/js/messaging.js index b3004aa38..9cda182ea 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -532,8 +532,8 @@ const retrieveContentScriptParameters = function(senderDetails, request) { // `generichide` must be evaluated in the frame context. if ( noCosmeticFiltering === false ) { const genericHide = - µb.staticNetFilteringEngine.matchStringElementHide( - 'generic', + µb.staticNetFilteringEngine.matchStringReverse( + 'generichide', request.url ); response.noGenericCosmeticFiltering = genericHide === 2; @@ -559,8 +559,8 @@ const retrieveContentScriptParameters = function(senderDetails, request) { // Add support for `specifichide`. if ( noCosmeticFiltering === false ) { const specificHide = - µb.staticNetFilteringEngine.matchStringElementHide( - 'specific', + µb.staticNetFilteringEngine.matchStringReverse( + 'specifichide', request.url ); response.noSpecificCosmeticFiltering = specificHide === 2; diff --git a/src/js/pagestore.js b/src/js/pagestore.js index 615515a72..7853b30a3 100644 --- a/src/js/pagestore.js +++ b/src/js/pagestore.js @@ -151,10 +151,7 @@ NetFilteringResultCache.prototype.shelfLife = 15000; /******************************************************************************/ -// Frame stores are used solely to associate a URL with a frame id. The -// name `pageHostname` is used because of historical reasons. A more -// appropriate name is `frameHostname` -- something to do in a future -// refactoring. +// Frame stores are used solely to associate a URL with a frame id. // To mitigate memory churning const frameStoreJunkyard = []; @@ -166,15 +163,19 @@ const FrameStore = class { } init(frameURL) { - const µburi = µb.URI; - this.pageHostname = µburi.hostnameFromURI(frameURL); - this.pageDomain = - µburi.domainFromHostname(this.pageHostname) || this.pageHostname; + this.exceptCname = undefined; + this.rawURL = frameURL; + if ( frameURL !== undefined ) { + this.hostname = vAPI.hostnameFromURI(frameURL); + this.domain = + vAPI.domainFromHostname(this.hostname) || this.hostname; + } return this; } dispose() { - this.pageHostname = this.pageDomain = ''; + this.exceptCname = undefined; + this.rawURL = this.hostname = this.domain = ''; if ( frameStoreJunkyard.length < frameStoreJunkyardMax ) { frameStoreJunkyard.push(this); } @@ -239,7 +240,6 @@ const PageStore = class { this.rawURL = tabContext.rawURL; this.hostnameToCountMap = new Map(); this.contentLastModified = 0; - this.frames = new Map(); this.logData = undefined; this.perLoadBlockedRequestCount = 0; this.perLoadAllowedRequestCount = 0; @@ -250,6 +250,9 @@ const PageStore = class { this.internalRedirectionCount = 0; this.extraData.clear(); + this.frames = new Map(); + this.setFrame(0, tabContext.rawURL); + // The current filtering context is cloned because: // - We may be called with or without the current context having been // initialized. @@ -303,6 +306,7 @@ const PageStore = class { // As part of https://github.com/chrisaljoudi/uBlock/issues/405 // URL changed, force a re-evaluation of filtering switch this.rawURL = tabContext.rawURL; + this.setFrame(0, this.rawURL); return this; } @@ -541,9 +545,22 @@ const PageStore = class { // Static filtering has lowest precedence. if ( result === 0 || result === 3 ) { - result = µb.staticNetFilteringEngine.matchString(fctxt); - if ( result !== 0 && µb.logger.enabled ) { - fctxt.filter = µb.staticNetFilteringEngine.toLogData(); + const snfe = µb.staticNetFilteringEngine; + result = snfe.matchString(fctxt); + if ( result !== 0 ) { + if ( µb.logger.enabled ) { + fctxt.filter = snfe.toLogData(); + } + // https://github.com/uBlockOrigin/uBlock-issues/issues/943 + // Blanket-except blocked aliased canonical hostnames? + if ( + result === 1 && + fctxt.aliasURL !== undefined && + snfe.isBlockImportant() === false && + this.shouldExceptCname(fctxt) + ) { + return 2; + } } } @@ -646,6 +663,40 @@ const PageStore = class { return 1; } + shouldExceptCname(fctxt) { + let exceptCname; + let frameStore; + if ( fctxt.docId !== undefined ) { + frameStore = this.getFrame(fctxt.docId); + if ( frameStore instanceof Object ) { + exceptCname = frameStore.exceptCname; + } + } + if ( exceptCname === undefined ) { + const result = µb.staticNetFilteringEngine.matchStringReverse( + 'cname', + frameStore instanceof Object + ? frameStore.rawURL + : fctxt.getDocOrigin() + ); + if ( result === 2 ) { + exceptCname = µb.logger.enabled + ? µb.staticNetFilteringEngine.toLogData() + : true; + } else { + exceptCname = false; + } + if ( frameStore instanceof Object ) { + frameStore.exceptCname = exceptCname; + } + } + if ( exceptCname === false ) { return false; } + if ( exceptCname instanceof Object ) { + fctxt.setFilter(exceptCname); + } + return true; + } + getBlockedResources(request, response) { const normalURL = µb.normalizePageURL(this.tabId, request.frameURL); const resources = request.resources; diff --git a/src/js/reverselookup.js b/src/js/reverselookup.js index 468b534ab..1bda3f676 100644 --- a/src/js/reverselookup.js +++ b/src/js/reverselookup.js @@ -172,13 +172,13 @@ const fromCosmeticFilter = async function(details) { domain: µBlock.URI.domainFromHostname(hostname), hostname: hostname, ignoreGeneric: - µBlock.staticNetFilteringEngine.matchStringElementHide( - 'generic', + µBlock.staticNetFilteringEngine.matchStringReverse( + 'generichide', details.url ) === 2, ignoreSpecific: - µBlock.staticNetFilteringEngine.matchStringElementHide( - 'specific', + µBlock.staticNetFilteringEngine.matchStringReverse( + 'specifichide', details.url ) === 2, rawFilter: details.rawFilter diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index a44b32ae8..b1d2ad9da 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -75,10 +75,11 @@ const typeNameToTypeValue = { 'specifichide': 16 << 4, 'inline-font': 17 << 4, 'inline-script': 18 << 4, - 'data': 19 << 4, // special: a generic data holder - 'redirect': 20 << 4, - 'webrtc': 21 << 4, - 'unsupported': 22 << 4, + 'cname': 19 << 4, + 'data': 20 << 4, // special: a generic data holder + 'redirect': 21 << 4, + 'webrtc': 22 << 4, + 'unsupported': 23 << 4, }; const otherTypeBitValue = typeNameToTypeValue.other; @@ -119,10 +120,11 @@ const typeValueToTypeName = { 16: 'specifichide', 17: 'inline-font', 18: 'inline-script', - 19: 'data', - 20: 'redirect', - 21: 'webrtc', - 22: 'unsupported', + 19: 'cname', + 20: 'data', + 21: 'redirect', + 22: 'webrtc', + 23: 'unsupported', }; // https://github.com/gorhill/uBlock/issues/1493 @@ -130,6 +132,7 @@ const typeValueToTypeName = { const toNormalizedType = { 'all': 'all', 'beacon': 'ping', + 'cname': 'cname', 'css': 'stylesheet', 'data': 'data', 'doc': 'main_frame', @@ -220,8 +223,16 @@ const toLogDataInternal = function(categoryBits, tokenHash, iunit) { const pattern = []; const regex = []; const options = []; + const denyallow = []; const domains = []; - const logData = { pattern, regex, domains, options, isRegex: false }; + const logData = { + pattern, + regex, + denyallow, + domains, + options, + isRegex: false, + }; filterUnits[iunit].logData(logData); if ( categoryBits & 0x002 ) { logData.options.unshift('important'); @@ -246,6 +257,9 @@ const toLogDataInternal = function(categoryBits, tokenHash, iunit) { if ( categoryBits & 0x001 ) { raw = '@@' + raw; } + if ( denyallow.length !== 0 ) { + options.push(`denyallow=${denyallow.join('|')}`); + } if ( domains.length !== 0 ) { options.push(`domain=${domains.join('|')}`); } @@ -273,6 +287,10 @@ const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0; /******************************************************************************/ +// TODO: Unify [ string instance, string usage instance ] pairs + +/******************************************************************************/ + let filterUnits = [ null ]; let filterSequences = new Uint32Array(131072); @@ -1697,6 +1715,50 @@ registerFilterClass(FilterHostnameDict); /******************************************************************************/ +const FilterDenyAllow = class { + constructor(s, trieArgs) { + this.s = s; + this.hndict = FilterHostnameDict.trieContainer.createOne(trieArgs); + } + + match() { + return this.hndict.matches($requestHostname) === -1; + } + + logData(details) { + details.denyallow.push(this.s); + } + + toSelfie() { + return [ + this.fid, + this.s, + FilterHostnameDict.trieContainer.compileOne(this.hndict), + ]; + } + + static compile(details) { + return [ FilterDenyAllow.fid, details.denyallow ]; + } + + static unitFromCompiled(args) { + const f = new FilterDenyAllow(args[1]); + for ( const hn of args[1].split('|') ) { + if ( hn === '' ) { continue; } + f.hndict.add(hn); + } + return filterUnits.push(f) - 1; + } + + static fromSelfie(args) { + return new FilterDenyAllow(...args.slice(1)); + } +}; + +registerFilterClass(FilterDenyAllow); + +/******************************************************************************/ + // Dictionary of hostnames for filters which only purpose is to match // the document origin. @@ -2126,6 +2188,7 @@ const FilterParser = class { this.party = AnyParty; this.fopts = ''; this.domainOpt = ''; + this.denyallow = ''; this.isPureHostname = false; this.isRegex = false; this.raw = ''; @@ -2184,7 +2247,7 @@ const FilterParser = class { } } - parseDomainOption(s) { + parseHostnameList(s) { if ( this.reHasUnicode.test(s) ) { const hostnames = s.split('|'); let i = hostnames.length; @@ -2222,13 +2285,21 @@ const FilterParser = class { // Detect and discard filter if domain option contains nonsensical // characters. if ( opt.startsWith('domain=') ) { - this.domainOpt = this.parseDomainOption(opt.slice(7)); + this.domainOpt = this.parseHostnameList(opt.slice(7)); if ( this.domainOpt === '' ) { this.unsupported = true; break; } continue; } + if ( opt.startsWith('denyallow=') ) { + this.denyallow = this.parseHostnameList(opt.slice(10)); + if ( this.denyallow === '' ) { + this.unsupported = true; + break; + } + continue; + } if ( opt === 'important' ) { this.important = Important; continue; @@ -2374,7 +2445,7 @@ const FilterParser = class { this.unsupported = true; return this; } - this.parseOptions(s.slice(pos + 1)); + this.parseOptions(s.slice(pos + 1).trim()); if ( this.unsupported ) { return this; } s = s.slice(0, pos); } @@ -3015,6 +3086,11 @@ FilterContainer.prototype.compile = function(raw, writer) { ); } + // Deny-allow + if ( parsed.denyallow !== '' ) { + units.push(FilterDenyAllow.compile(parsed)); + } + // Data if ( parsed.dataType !== undefined ) { units.push(FilterDataHolder.compile(parsed)); @@ -3334,8 +3410,8 @@ FilterContainer.prototype.realmMatchString = function( // https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/ // Add support for `specifichide`. -FilterContainer.prototype.matchStringElementHide = function(type, url) { - const typeBits = typeNameToTypeValue[`${type}hide`] | 0x80000000; +FilterContainer.prototype.matchStringReverse = function(type, url) { + const typeBits = typeNameToTypeValue[type] | 0x80000000; // Prime tokenizer: we get a normalized URL in return. $requestURL = urlTokenizer.setURL(url); @@ -3425,6 +3501,12 @@ FilterContainer.prototype.toLogData = function() { /******************************************************************************/ +FilterContainer.prototype.isBlockImportant = function() { + return (this.$catbits & BlockImportant) === BlockImportant; +}; + +/******************************************************************************/ + FilterContainer.prototype.getFilterCount = function() { return this.acceptedCount - this.discardedCount; };