1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-02 00:42:45 +01:00

Refactor implementation of ~[type] filter options

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/1730

A new filter unit, FilterNotType, is introduced to enforce
negated filter type options.

Before this commit, there was no actual negated types in the
static network filtering engine, as a negated type was internally
converted to non-negated types at compile time. As a result,
the logger would never output a matching filter with its original
negated type options.

This commit no longer causes an internal conversion to take place
at compile time, but explicitly enforce negated types at match time,
and as a result the logger will from now on output matching filter
with their original negated type options.
This commit is contained in:
Raymond Hill 2021-09-23 09:38:27 -04:00
parent f49c4e254b
commit 8f98622374
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 86 additions and 30 deletions

View File

@ -154,8 +154,8 @@ const µBlock = { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 38, // Increase when compiled format changes
selfieMagic: 38, // Increase when selfie format changes
compiledMagic: 39, // Increase when compiled format changes
selfieMagic: 39, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -135,8 +135,6 @@ const allTypesBits =
1 << (typeNameToTypeValue['main_frame'] >>> TypeBitsOffset) - 1 |
1 << (typeNameToTypeValue['inline-font'] >>> TypeBitsOffset) - 1 |
1 << (typeNameToTypeValue['inline-script'] >>> TypeBitsOffset) - 1;
const documentTypeBit =
1 << (typeNameToTypeValue['main_frame'] >>> TypeBitsOffset) - 1;
const unsupportedTypeBit =
1 << (typeNameToTypeValue['unsupported'] >>> TypeBitsOffset) - 1;
@ -161,6 +159,8 @@ const typeValueToTypeName = [
'inline-font',
'inline-script',
'cname',
'',
'',
'webrtc',
'unsupported',
];
@ -186,6 +186,7 @@ const EMPTY_TOKEN_HASH = 0xF0000000;
// See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation.
let $requestTypeValue = 0;
let $requestURL = '';
let $requestURLRaw = '';
let $requestHostname = '';
@ -1271,6 +1272,57 @@ registerFilterClass(FilterRegex);
/******************************************************************************/
// stylesheet: 1 => bit 0
// image: 2 => bit 1
// object: 3 => bit 2
// script: 4 => bit 3
// ...
const FilterNotType = class {
constructor(notTypeBits) {
this.notTypeBits = notTypeBits;
}
match() {
return $requestTypeValue !== 0 &&
(this.notTypeBits & (1 << ($requestTypeValue - 1))) === 0;
}
logData(details) {
let bits = this.notTypeBits;
for ( let i = 1; bits !== 0 && i < typeValueToTypeName.length; i++ ) {
const bit = 1 << (i - 1);
if ( (bits & bit) === 0 ) { continue; }
bits &= ~bit;
details.options.push(`~${typeValueToTypeName[i]}`);
}
}
toSelfie() {
return [ this.fid, this.notTypeBits ];
}
static compile(details) {
return [ FilterNotType.fid, details.notTypeBits ];
}
static fromCompiled(args) {
return new FilterNotType(args[1]);
}
static fromSelfie(args) {
return new FilterNotType(args[1]);
}
static keyFromArgs(args) {
return `${args[1]}`;
}
};
registerFilterClass(FilterNotType);
/******************************************************************************/
// A helper class to parse `domain=` option.
const DomainOptIterator = class {
@ -2908,8 +2960,10 @@ class FilterCompiler {
: allTypesBits;
if ( not ) {
this.notTypeBits |= typeBit;
this.typeBits &= ~typeBit;
} else {
this.typeBits |= typeBit;
this.notTypeBits &= ~typeBit;
}
}
@ -3085,14 +3139,8 @@ class FilterCompiler {
// - no network type is present -- i.e. all network types are
// implicitly toggled on
if ( this.notTypeBits !== 0 ) {
if (
(this.notTypeBits & allNetworkTypesBits) !== 0 ||
(this.typeBits & allNetworkTypesBits) === 0
) {
this.typeBits |= allNetworkTypesBits;
}
this.typeBits &= ~this.notTypeBits;
if ( this.typeBits === 0 ) { return false; }
this.optionUnitBits |= this.NOT_TYPE_BIT;
}
// CSP directives implicitly apply only to document/subdocument.
@ -3434,6 +3482,11 @@ class FilterCompiler {
units.push(FilterAnchorRight.compile());
}
// Not types
if ( this.notTypeBits !== 0 ) {
units.push(FilterNotType.compile(this));
}
// Strict partiness
if ( this.strictParty !== 0 ) {
units.push(FilterStrictParty.compile(this));
@ -3500,14 +3553,11 @@ class FilterCompiler {
return;
}
// If all network types are set, create a typeless filter
if (
(typeBits & allNetworkTypesBits) === allNetworkTypesBits &&
(this.notTypeBits & documentTypeBit) === 0
) {
if ( (typeBits & allNetworkTypesBits) === allNetworkTypesBits ) {
writer.push([ catBits, this.tokenHash, fdata ]);
typeBits &= ~allNetworkTypesBits;
if ( typeBits === 0 ) { return; }
}
// One filter per specific types
let bitOffset = 1;
do {
@ -3531,6 +3581,7 @@ FilterCompiler.prototype.STRICT_PARTY_BIT = 0b00001000;
FilterCompiler.prototype.CSP_BIT = 0b00010000;
FilterCompiler.prototype.QUERYPRUNE_BIT = 0b00100000;
FilterCompiler.prototype.REDIRECT_BIT = 0b01000000;
FilterCompiler.prototype.NOT_TYPE_BIT = 0b10000000;
FilterCompiler.prototype.FILTER_OK = 0;
FilterCompiler.prototype.FILTER_INVALID = 1;
@ -3896,14 +3947,16 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
fctxt,
modifierType
) {
const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue;
$requestURL = urlTokenizer.setURL(fctxt.url);
$requestURLRaw = fctxt.url;
$docHostname = fctxt.getDocHostname();
$docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname();
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue;
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
const catBits00 = ModifyAction;
@ -4192,6 +4245,7 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) {
// Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(url);
$requestURLRaw = url;
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
this.$filterUnit = 0;
// These registers will be used by various filters
@ -4234,17 +4288,17 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) {
* @returns {integer} 0=no match, 1=block, 2=allow (exeption)
*/
FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
let typeValue = typeNameToTypeValue[fctxt.type];
let typeBits = typeNameToTypeValue[fctxt.type];
if ( modifiers === 0 ) {
if ( typeValue === undefined ) {
typeValue = otherTypeBitValue;
} else if ( typeValue === 0 || typeValue > otherTypeBitValue ) {
if ( typeBits === undefined ) {
typeBits = otherTypeBitValue;
} else if ( typeBits === 0 || typeBits > otherTypeBitValue ) {
modifiers |= 0b0001;
}
}
if ( (modifiers & 0b0001) !== 0 ) {
if ( typeValue === undefined ) { return 0; }
typeValue |= 0x80000000;
if ( typeBits === undefined ) { return 0; }
typeBits |= 0x80000000;
}
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
@ -4259,13 +4313,14 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
$docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname();
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
// Evaluate block realm before allow realm, and allow realm before
// block-important realm, i.e. by order of likelihood of a match.
const r = this.realmMatchString(BlockAction, typeValue, partyBits);
const r = this.realmMatchString(BlockAction, typeBits, partyBits);
if ( r || (modifiers & 0b0010) !== 0 ) {
if ( this.realmMatchString(AllowAction, typeValue, partyBits) ) {
if ( this.realmMatchString(BlockImportant, typeValue, partyBits) ) {
if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) {
if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) {
return 1;
}
return 2;
@ -4278,7 +4333,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
/******************************************************************************/
FilterContainer.prototype.matchHeaders = function(fctxt, headers) {
const typeValue = typeNameToTypeValue[fctxt.type] || otherTypeBitValue;
const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue;
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
// Prime tokenizer: we get a normalized URL in return.
@ -4291,13 +4346,14 @@ FilterContainer.prototype.matchHeaders = function(fctxt, headers) {
$docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname();
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
$httpHeaders.init(headers);
let r = 0;
if ( this.realmMatchString(HEADERS | BlockImportant, typeValue, partyBits) ) {
if ( this.realmMatchString(HEADERS | BlockImportant, typeBits, partyBits) ) {
r = 1;
} else if ( this.realmMatchString(HEADERS | BlockAction, typeValue, partyBits) ) {
r = this.realmMatchString(HEADERS | AllowAction, typeValue, partyBits)
} else if ( this.realmMatchString(HEADERS | BlockAction, typeBits, partyBits) ) {
r = this.realmMatchString(HEADERS | AllowAction, typeBits, partyBits)
? 2
: 1;
}