From 8f98622374ca36e69c60878979583b55615d4dd9 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Thu, 23 Sep 2021 09:38:27 -0400 Subject: [PATCH] Refactor implementation of `~[type]` filter options Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/1730 A new filter unit, FilterNotType, is introduced to enforce negated filter type options. Before this commit, there was no actual negated types in the static network filtering engine, as a negated type was internally converted to non-negated types at compile time. As a result, the logger would never output a matching filter with its original negated type options. This commit no longer causes an internal conversion to take place at compile time, but explicitly enforce negated types at match time, and as a result the logger will from now on output matching filter with their original negated type options. --- src/js/background.js | 4 +- src/js/static-net-filtering.js | 112 ++++++++++++++++++++++++--------- 2 files changed, 86 insertions(+), 30 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index 2858770a2..5243356be 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -154,8 +154,8 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 38, // Increase when compiled format changes - selfieMagic: 38, // Increase when selfie format changes + compiledMagic: 39, // Increase when compiled format changes + selfieMagic: 39, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 6f137ed12..f3fee435d 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -135,8 +135,6 @@ const allTypesBits = 1 << (typeNameToTypeValue['main_frame'] >>> TypeBitsOffset) - 1 | 1 << (typeNameToTypeValue['inline-font'] >>> TypeBitsOffset) - 1 | 1 << (typeNameToTypeValue['inline-script'] >>> TypeBitsOffset) - 1; -const documentTypeBit = - 1 << (typeNameToTypeValue['main_frame'] >>> TypeBitsOffset) - 1; const unsupportedTypeBit = 1 << (typeNameToTypeValue['unsupported'] >>> TypeBitsOffset) - 1; @@ -161,6 +159,8 @@ const typeValueToTypeName = [ 'inline-font', 'inline-script', 'cname', + '', + '', 'webrtc', 'unsupported', ]; @@ -186,6 +186,7 @@ const EMPTY_TOKEN_HASH = 0xF0000000; // See the following as short-lived registers, used during evaluation. They are // valid until the next evaluation. +let $requestTypeValue = 0; let $requestURL = ''; let $requestURLRaw = ''; let $requestHostname = ''; @@ -1271,6 +1272,57 @@ registerFilterClass(FilterRegex); /******************************************************************************/ +// stylesheet: 1 => bit 0 +// image: 2 => bit 1 +// object: 3 => bit 2 +// script: 4 => bit 3 +// ... + +const FilterNotType = class { + constructor(notTypeBits) { + this.notTypeBits = notTypeBits; + } + + match() { + return $requestTypeValue !== 0 && + (this.notTypeBits & (1 << ($requestTypeValue - 1))) === 0; + } + + logData(details) { + let bits = this.notTypeBits; + for ( let i = 1; bits !== 0 && i < typeValueToTypeName.length; i++ ) { + const bit = 1 << (i - 1); + if ( (bits & bit) === 0 ) { continue; } + bits &= ~bit; + details.options.push(`~${typeValueToTypeName[i]}`); + } + } + + toSelfie() { + return [ this.fid, this.notTypeBits ]; + } + + static compile(details) { + return [ FilterNotType.fid, details.notTypeBits ]; + } + + static fromCompiled(args) { + return new FilterNotType(args[1]); + } + + static fromSelfie(args) { + return new FilterNotType(args[1]); + } + + static keyFromArgs(args) { + return `${args[1]}`; + } +}; + +registerFilterClass(FilterNotType); + +/******************************************************************************/ + // A helper class to parse `domain=` option. const DomainOptIterator = class { @@ -2908,8 +2960,10 @@ class FilterCompiler { : allTypesBits; if ( not ) { this.notTypeBits |= typeBit; + this.typeBits &= ~typeBit; } else { this.typeBits |= typeBit; + this.notTypeBits &= ~typeBit; } } @@ -3085,14 +3139,8 @@ class FilterCompiler { // - no network type is present -- i.e. all network types are // implicitly toggled on if ( this.notTypeBits !== 0 ) { - if ( - (this.notTypeBits & allNetworkTypesBits) !== 0 || - (this.typeBits & allNetworkTypesBits) === 0 - ) { - this.typeBits |= allNetworkTypesBits; - } this.typeBits &= ~this.notTypeBits; - if ( this.typeBits === 0 ) { return false; } + this.optionUnitBits |= this.NOT_TYPE_BIT; } // CSP directives implicitly apply only to document/subdocument. @@ -3434,6 +3482,11 @@ class FilterCompiler { units.push(FilterAnchorRight.compile()); } + // Not types + if ( this.notTypeBits !== 0 ) { + units.push(FilterNotType.compile(this)); + } + // Strict partiness if ( this.strictParty !== 0 ) { units.push(FilterStrictParty.compile(this)); @@ -3500,14 +3553,11 @@ class FilterCompiler { return; } // If all network types are set, create a typeless filter - if ( - (typeBits & allNetworkTypesBits) === allNetworkTypesBits && - (this.notTypeBits & documentTypeBit) === 0 - ) { + if ( (typeBits & allNetworkTypesBits) === allNetworkTypesBits ) { writer.push([ catBits, this.tokenHash, fdata ]); typeBits &= ~allNetworkTypesBits; + if ( typeBits === 0 ) { return; } } - // One filter per specific types let bitOffset = 1; do { @@ -3531,6 +3581,7 @@ FilterCompiler.prototype.STRICT_PARTY_BIT = 0b00001000; FilterCompiler.prototype.CSP_BIT = 0b00010000; FilterCompiler.prototype.QUERYPRUNE_BIT = 0b00100000; FilterCompiler.prototype.REDIRECT_BIT = 0b01000000; +FilterCompiler.prototype.NOT_TYPE_BIT = 0b10000000; FilterCompiler.prototype.FILTER_OK = 0; FilterCompiler.prototype.FILTER_INVALID = 1; @@ -3896,14 +3947,16 @@ FilterContainer.prototype.matchAndFetchModifiers = function( fctxt, modifierType ) { + const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; + $requestURL = urlTokenizer.setURL(fctxt.url); $requestURLRaw = fctxt.url; $docHostname = fctxt.getDocHostname(); $docDomain = fctxt.getDocDomain(); $docEntity.reset(); $requestHostname = fctxt.getHostname(); + $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; - const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; const catBits00 = ModifyAction; @@ -4192,6 +4245,7 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) { // Prime tokenizer: we get a normalized URL in return. $requestURL = urlTokenizer.setURL(url); $requestURLRaw = url; + $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; this.$filterUnit = 0; // These registers will be used by various filters @@ -4234,17 +4288,17 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) { * @returns {integer} 0=no match, 1=block, 2=allow (exeption) */ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) { - let typeValue = typeNameToTypeValue[fctxt.type]; + let typeBits = typeNameToTypeValue[fctxt.type]; if ( modifiers === 0 ) { - if ( typeValue === undefined ) { - typeValue = otherTypeBitValue; - } else if ( typeValue === 0 || typeValue > otherTypeBitValue ) { + if ( typeBits === undefined ) { + typeBits = otherTypeBitValue; + } else if ( typeBits === 0 || typeBits > otherTypeBitValue ) { modifiers |= 0b0001; } } if ( (modifiers & 0b0001) !== 0 ) { - if ( typeValue === undefined ) { return 0; } - typeValue |= 0x80000000; + if ( typeBits === undefined ) { return 0; } + typeBits |= 0x80000000; } const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; @@ -4259,13 +4313,14 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) { $docDomain = fctxt.getDocDomain(); $docEntity.reset(); $requestHostname = fctxt.getHostname(); + $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; // Evaluate block realm before allow realm, and allow realm before // block-important realm, i.e. by order of likelihood of a match. - const r = this.realmMatchString(BlockAction, typeValue, partyBits); + const r = this.realmMatchString(BlockAction, typeBits, partyBits); if ( r || (modifiers & 0b0010) !== 0 ) { - if ( this.realmMatchString(AllowAction, typeValue, partyBits) ) { - if ( this.realmMatchString(BlockImportant, typeValue, partyBits) ) { + if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) { + if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) { return 1; } return 2; @@ -4278,7 +4333,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) { /******************************************************************************/ FilterContainer.prototype.matchHeaders = function(fctxt, headers) { - const typeValue = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; + const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; // Prime tokenizer: we get a normalized URL in return. @@ -4291,13 +4346,14 @@ FilterContainer.prototype.matchHeaders = function(fctxt, headers) { $docDomain = fctxt.getDocDomain(); $docEntity.reset(); $requestHostname = fctxt.getHostname(); + $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; $httpHeaders.init(headers); let r = 0; - if ( this.realmMatchString(HEADERS | BlockImportant, typeValue, partyBits) ) { + if ( this.realmMatchString(HEADERS | BlockImportant, typeBits, partyBits) ) { r = 1; - } else if ( this.realmMatchString(HEADERS | BlockAction, typeValue, partyBits) ) { - r = this.realmMatchString(HEADERS | AllowAction, typeValue, partyBits) + } else if ( this.realmMatchString(HEADERS | BlockAction, typeBits, partyBits) ) { + r = this.realmMatchString(HEADERS | AllowAction, typeBits, partyBits) ? 2 : 1; }