1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-10-06 09:37:12 +02:00

Implement network filter option replace=

Reference documentation:
https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier

This is a network filter option which can only be loaded from a
trusted source.

Since this filter is about modifying the response body, it currently
only works in Firefox.

As discussed with filter list maintainers.
This commit is contained in:
Raymond Hill 2023-11-03 18:59:33 -04:00
parent aeff955667
commit 7c3e060c01
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
7 changed files with 543 additions and 450 deletions

View File

@ -184,8 +184,8 @@ const µBlock = { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 56, // Increase when compiled format changes compiledMagic: 57, // Increase when compiled format changes
selfieMagic: 56, // Increase when selfie format changes selfieMagic: 57, // Increase when selfie format changes
}, },
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -174,6 +174,7 @@ const loadBenchmarkDataset = (( ) => {
let removeparamCount = 0; let removeparamCount = 0;
let cspCount = 0; let cspCount = 0;
let permissionsCount = 0; let permissionsCount = 0;
let replaceCount = 0;
for ( let i = 0; i < requests.length; i++ ) { for ( let i = 0; i < requests.length; i++ ) {
const request = requests[i]; const request = requests[i];
fctxt.setURL(request.url); fctxt.setURL(request.url);
@ -202,6 +203,9 @@ const loadBenchmarkDataset = (( ) => {
} }
} }
staticNetFilteringEngine.matchHeaders(fctxt, []); staticNetFilteringEngine.matchHeaders(fctxt, []);
if ( staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'replace') ) {
replaceCount += 1;
}
} else if ( redirectEngine !== undefined ) { } else if ( redirectEngine !== undefined ) {
if ( staticNetFilteringEngine.redirectRequest(redirectEngine, fctxt) ) { if ( staticNetFilteringEngine.redirectRequest(redirectEngine, fctxt) ) {
redirectCount += 1; redirectCount += 1;
@ -222,6 +226,7 @@ const loadBenchmarkDataset = (( ) => {
`\tremoveparam=: ${removeparamCount}`, `\tremoveparam=: ${removeparamCount}`,
`\tcsp=: ${cspCount}`, `\tcsp=: ${cspCount}`,
`\tpermissions=: ${permissionsCount}`, `\tpermissions=: ${permissionsCount}`,
`\treplace=: ${replaceCount}`,
]; ];
const s = output.join('\n'); const s = output.join('\n');
console.info(s); console.info(s);

View File

@ -175,6 +175,7 @@ const FilteringContext = class {
fromFilteringContext(other) { fromFilteringContext(other) {
this.realm = other.realm; this.realm = other.realm;
this.id = other.id;
this.type = other.type; this.type = other.type;
this.method = other.method; this.method = other.method;
this.url = other.url; this.url = other.url;

View File

@ -429,15 +429,15 @@ htmlFilteringEngine.retrieve = function(details) {
} }
}; };
htmlFilteringEngine.apply = function(doc, details) { htmlFilteringEngine.apply = function(doc, details, selectors) {
docRegister = doc; docRegister = doc;
let modified = false; let modified = false;
for ( const selector of details.selectors.plains ) { for ( const selector of selectors.plains ) {
if ( applyCSSSelector(details, selector) ) { if ( applyCSSSelector(details, selector) ) {
modified = true; modified = true;
} }
} }
for ( const selector of details.selectors.procedurals ) { for ( const selector of selectors.procedurals ) {
if ( applyProceduralSelector(details, selector) ) { if ( applyProceduralSelector(details, selector) ) {
modified = true; modified = true;
} }

View File

@ -187,6 +187,7 @@ export const NODE_TYPE_NET_OPTION_NAME_POPUP = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REDIRECT = iota++; export const NODE_TYPE_NET_OPTION_NAME_REDIRECT = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE = iota++; export const NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM = iota++; export const NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REPLACE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_SCRIPT = iota++; export const NODE_TYPE_NET_OPTION_NAME_SCRIPT = iota++;
export const NODE_TYPE_NET_OPTION_NAME_SHIDE = iota++; export const NODE_TYPE_NET_OPTION_NAME_SHIDE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_TO = iota++; export const NODE_TYPE_NET_OPTION_NAME_TO = iota++;
@ -265,6 +266,7 @@ export const nodeTypeFromOptionName = new Map([
/* synonym */ [ 'rewrite', NODE_TYPE_NET_OPTION_NAME_REDIRECT ], /* synonym */ [ 'rewrite', NODE_TYPE_NET_OPTION_NAME_REDIRECT ],
[ 'redirect-rule', NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ], [ 'redirect-rule', NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ],
[ 'removeparam', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ], [ 'removeparam', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ],
[ 'replace', NODE_TYPE_NET_OPTION_NAME_REPLACE ],
/* synonym */ [ 'queryprune', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ], /* synonym */ [ 'queryprune', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ],
[ 'script', NODE_TYPE_NET_OPTION_NAME_SCRIPT ], [ 'script', NODE_TYPE_NET_OPTION_NAME_SCRIPT ],
[ 'shide', NODE_TYPE_NET_OPTION_NAME_SHIDE ], [ 'shide', NODE_TYPE_NET_OPTION_NAME_SHIDE ],
@ -597,9 +599,14 @@ const exCharCodeAt = (s, i) => {
return pos >= 0 ? s.charCodeAt(pos) : -1; return pos >= 0 ? s.charCodeAt(pos) : -1;
}; };
const toEscapedCharRegex = c => {
const safe = c.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${safe}`, 'g');
};
/******************************************************************************/ /******************************************************************************/
class argListParser { class ArgListParser {
constructor(separatorChar = ',', mustQuote = false) { constructor(separatorChar = ',', mustQuote = false) {
this.separatorChar = this.actualSeparatorChar = separatorChar; this.separatorChar = this.actualSeparatorChar = separatorChar;
this.separatorCode = this.actualSeparatorCode = separatorChar.charCodeAt(0); this.separatorCode = this.actualSeparatorCode = separatorChar.charCodeAt(0);
@ -612,10 +619,10 @@ class argListParser {
this.reWhitespaceStart = /^\s+/; this.reWhitespaceStart = /^\s+/;
this.reWhitespaceEnd = /\s+$/; this.reWhitespaceEnd = /\s+$/;
this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/; this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/;
this.reEscapedDoubleQuote = /((?:^|[^\\])(?:\\\\)*)\\"/g; this.reEscapedDoubleQuote = toEscapedCharRegex('"');
this.reEscapedSingleQuote = /((?:^|[^\\])(?:\\\\)*)\\'/g; this.reEscapedSingleQuote = toEscapedCharRegex("'");
this.reEscapedBacktick = /((?:^|[^\\])(?:\\\\)*)\\`/g; this.reEscapedBacktick = toEscapedCharRegex('`');
this.reEscapedSeparator = new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${this.separatorChar}`, 'g'); this.reEscapedSeparator = toEscapedCharRegex(this.separatorChar);
this.unescapedSeparator = `$1${this.separatorChar}`; this.unescapedSeparator = `$1${this.separatorChar}`;
} }
nextArg(pattern, beg = 0) { nextArg(pattern, beg = 0) {
@ -871,7 +878,7 @@ export class AstFilterParser {
this.rePlainEntity = /^(?:[\da-z][\da-z_-]*\.)+\*$/; this.rePlainEntity = /^(?:[\da-z][\da-z_-]*\.)+\*$/;
this.reHostsSink = /^[\w%.:\[\]-]+\s+/; this.reHostsSink = /^[\w%.:\[\]-]+\s+/;
this.reHostsRedirect = /(?:0\.0\.0\.0|broadcasthost|local|localhost(?:\.localdomain)?|ip6-\w+)(?:[^\w.-]|$)/; this.reHostsRedirect = /(?:0\.0\.0\.0|broadcasthost|local|localhost(?:\.localdomain)?|ip6-\w+)(?:[^\w.-]|$)/;
this.reNetOptionComma = /,(?!\d*\})/g; this.reNetOptionComma = /,(?:~?[13a-z-]+(?:=.*?)?|_+)(?:,|$)/;
this.rePointlessLeftAnchor = /^\|\|?\*+/; this.rePointlessLeftAnchor = /^\|\|?\*+/;
this.reIsTokenChar = /^[%0-9A-Za-z]/; this.reIsTokenChar = /^[%0-9A-Za-z]/;
this.rePointlessLeadingWildcards = /^(\*+)[^%0-9A-Za-z\u{a0}-\u{10FFFF}]/u; this.rePointlessLeadingWildcards = /^(\*+)[^%0-9A-Za-z\u{a0}-\u{10FFFF}]/u;
@ -898,7 +905,7 @@ export class AstFilterParser {
this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/; this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/;
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/; this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
this.reNoopOption = /^_+$/; this.reNoopOption = /^_+$/;
this.scriptletArgListParser = new argListParser(','); this.scriptletArgListParser = new ArgListParser(',');
} }
parse(raw) { parse(raw) {
@ -1414,6 +1421,7 @@ export class AstFilterParser {
break; break;
case NODE_TYPE_NET_OPTION_NAME_REDIRECT: case NODE_TYPE_NET_OPTION_NAME_REDIRECT:
case NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: case NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
case NODE_TYPE_NET_OPTION_NAME_REPLACE:
case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
realBad = isNegated || (isException || hasValue) === false || realBad = isNegated || (isException || hasValue) === false ||
modifierType !== 0; modifierType !== 0;
@ -1474,6 +1482,20 @@ export class AstFilterParser {
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount; realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
break; break;
} }
case NODE_TYPE_NET_OPTION_NAME_REPLACE: {
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
if ( realBad ) { break; }
if ( this.options.trustedSource !== true ) {
this.astError = AST_ERROR_UNTRUSTED_SOURCE;
realBad = true;
break;
}
if ( this.interactive ) {
const value = this.getNetOptionValue(NODE_TYPE_NET_OPTION_NAME_REPLACE);
realBad = parseReplaceValue(value) === undefined;
}
break;
}
case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount; realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
if ( realBad ) { break; } if ( realBad ) { break; }
@ -1959,9 +1981,8 @@ export class AstFilterParser {
} }
endOfNetOption(s, beg) { endOfNetOption(s, beg) {
this.reNetOptionComma.lastIndex = beg; const match = this.reNetOptionComma.exec(s.slice(beg));
const match = this.reNetOptionComma.exec(s); return match !== null ? beg + match.index : s.length;
return match !== null ? match.index : s.length;
} }
parseNetOption(parent) { parseNetOption(parent) {
@ -2975,6 +2996,39 @@ export function parseHeaderValue(arg) {
return out; return out;
} }
// https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier
export function parseReplaceValue(s) {
if ( s.charCodeAt(0) !== 0x2F /* / */ ) { return; }
const { reEscapedComma, reEscapedDollarSign } = parseReplaceValue;
const parser = new ArgListParser('/');
parser.nextArg(s, 1);
let pattern = s.slice(parser.argBeg, parser.argEnd);
if ( parser.transform ) {
pattern = parser.normalizeArg(pattern);
}
pattern = pattern
.replace(reEscapedDollarSign, '$1$$$')
.replace(reEscapedComma, '$1,');
parser.nextArg(s, parser.separatorEnd);
let replacement = s.slice(parser.argBeg, parser.argEnd);
if ( parser.separatorEnd === parser.separatorBeg ) { return; }
if ( parser.transform ) {
replacement = parser.normalizeArg(replacement);
}
replacement = replacement
.replace(reEscapedDollarSign, '$1$$')
.replace(reEscapedComma, '$1,');
const flags = s.slice(parser.separatorEnd);
try {
return { re: new RegExp(pattern, flags), replacement };
} catch(_) {
}
}
parseReplaceValue.reEscapedDollarSign = toEscapedCharRegex('$');
parseReplaceValue.reEscapedComma = toEscapedCharRegex(',');
/******************************************************************************/ /******************************************************************************/
export const netOptionTokenDescriptors = new Map([ export const netOptionTokenDescriptors = new Map([
@ -3025,6 +3079,7 @@ export const netOptionTokenDescriptors = new Map([
/* synonym */ [ 'rewrite', { mustAssign: true } ], /* synonym */ [ 'rewrite', { mustAssign: true } ],
[ 'redirect-rule', { mustAssign: true } ], [ 'redirect-rule', { mustAssign: true } ],
[ 'removeparam', { } ], [ 'removeparam', { } ],
[ 'replace', { mustAssign: true } ],
/* synonym */ [ 'queryprune', { } ], /* synonym */ [ 'queryprune', { } ],
[ 'script', { canNegate: true } ], [ 'script', { canNegate: true } ],
[ 'shide', { } ], [ 'shide', { } ],

View File

@ -69,23 +69,29 @@ const keyvalStore = typeof vAPI !== 'undefined'
// |+-------------- bit 10: headers-based filters // |+-------------- bit 10: headers-based filters
// +--------------- bit 11-15: unused // +--------------- bit 11-15: unused
const CategoryCount = 1 << 0xb; // shift left to first unused bit
const RealmBitsMask = 0b00000000111; const RealmBitsMask = 0b00000000111;
const ActionBitsMask = 0b00000000011; const ActionBitsMask = 0b00000000011;
const TypeBitsMask = 0b01111100000; const TypeBitsMask = 0b01111100000;
const TypeBitsOffset = 5; const TypeBitsOffset = 5;
const BlockAction = 0b00000000000; const BLOCK_REALM = 0b00000000000000000;
const AllowAction = 0b00000000001; const ALLOW_REALM = 0b00000000000000001;
const Important = 0b00000000010; const IMPORTANT_REALM = 0b00000000000000010;
const BlockImportant = BlockAction | Important; const BLOCKIMPORTANT_REALM = BLOCK_REALM | IMPORTANT_REALM;
const ModifyAction = 0b00000000100; const ANYPARTY_REALM = 0b00000000000000000;
const AnyParty = 0b00000000000; const FIRSTPARTY_REALM = 0b00000000000001000;
const FirstParty = 0b00000001000; const THIRDPARTY_REALM = 0b00000000000010000;
const ThirdParty = 0b00000010000; const ALLPARTIES_REALM = FIRSTPARTY_REALM | THIRDPARTY_REALM;
const AllParties = 0b00000011000; const HEADERS_REALM = 0b00000010000000000;
const HEADERS = 0b10000000000; const REDIRECT_REALM = 0b00000100000000000;
const REMOVEPARAM_REALM = 0b00001000000000000;
const CSP_REALM = 0b00010000000000000;
const PERMISSIONS_REALM = 0b00100000000000000;
const URLTRANSFORM_REALM = 0b01000000000000000;
const REPLACE_REALM = 0b10000000000000000;
const MODIFY_REALMS = REDIRECT_REALM | CSP_REALM |
REMOVEPARAM_REALM | PERMISSIONS_REALM |
URLTRANSFORM_REALM | REPLACE_REALM;
const typeNameToTypeValue = { const typeNameToTypeValue = {
'no_type': 0 << TypeBitsOffset, 'no_type': 0 << TypeBitsOffset,
@ -186,6 +192,17 @@ const MODIFIER_TYPE_REMOVEPARAM = 3;
const MODIFIER_TYPE_CSP = 4; const MODIFIER_TYPE_CSP = 4;
const MODIFIER_TYPE_PERMISSIONS = 5; const MODIFIER_TYPE_PERMISSIONS = 5;
const MODIFIER_TYPE_URLTRANSFORM = 6; const MODIFIER_TYPE_URLTRANSFORM = 6;
const MODIFIER_TYPE_REPLACE = 7;
const modifierBitsFromType = new Map([
[ MODIFIER_TYPE_REDIRECT, REDIRECT_REALM ],
[ MODIFIER_TYPE_REDIRECTRULE, REDIRECT_REALM ],
[ MODIFIER_TYPE_REMOVEPARAM, REMOVEPARAM_REALM ],
[ MODIFIER_TYPE_CSP, CSP_REALM ],
[ MODIFIER_TYPE_PERMISSIONS, PERMISSIONS_REALM ],
[ MODIFIER_TYPE_URLTRANSFORM, URLTRANSFORM_REALM ],
[ MODIFIER_TYPE_REPLACE, REPLACE_REALM ],
]);
const modifierTypeFromName = new Map([ const modifierTypeFromName = new Map([
[ 'redirect', MODIFIER_TYPE_REDIRECT ], [ 'redirect', MODIFIER_TYPE_REDIRECT ],
@ -194,6 +211,7 @@ const modifierTypeFromName = new Map([
[ 'csp', MODIFIER_TYPE_CSP ], [ 'csp', MODIFIER_TYPE_CSP ],
[ 'permissions', MODIFIER_TYPE_PERMISSIONS ], [ 'permissions', MODIFIER_TYPE_PERMISSIONS ],
[ 'urltransform', MODIFIER_TYPE_URLTRANSFORM ], [ 'urltransform', MODIFIER_TYPE_URLTRANSFORM ],
[ 'replace', MODIFIER_TYPE_REPLACE ],
]); ]);
const modifierNameFromType = new Map([ const modifierNameFromType = new Map([
@ -203,6 +221,7 @@ const modifierNameFromType = new Map([
[ MODIFIER_TYPE_CSP, 'csp' ], [ MODIFIER_TYPE_CSP, 'csp' ],
[ MODIFIER_TYPE_PERMISSIONS, 'permissions' ], [ MODIFIER_TYPE_PERMISSIONS, 'permissions' ],
[ MODIFIER_TYPE_URLTRANSFORM, 'urltransform' ], [ MODIFIER_TYPE_URLTRANSFORM, 'urltransform' ],
[ MODIFIER_TYPE_REPLACE, 'replace' ],
]); ]);
//const typeValueFromCatBits = catBits => (catBits >>> TypeBitsOffset) & 0b11111; //const typeValueFromCatBits = catBits => (catBits >>> TypeBitsOffset) & 0b11111;
@ -339,7 +358,7 @@ class LogData {
this.raw = this.regex = ''; this.raw = this.regex = '';
return; return;
} }
this.result = (categoryBits & AllowAction) === 0 ? 1 : 2; this.result = (categoryBits & ALLOW_REALM) === 0 ? 1 : 2;
const pattern = []; const pattern = [];
const regex = []; const regex = [];
const options = []; const options = [];
@ -356,9 +375,9 @@ class LogData {
isRegex: false, isRegex: false,
}; };
filterLogData(iunit, logData); filterLogData(iunit, logData);
if ( (categoryBits & ThirdParty) !== 0 ) { if ( (categoryBits & THIRDPARTY_REALM) !== 0 ) {
logData.options.unshift('3p'); logData.options.unshift('3p');
} else if ( (categoryBits & FirstParty) !== 0 ) { } else if ( (categoryBits & FIRSTPARTY_REALM) !== 0 ) {
logData.options.unshift('1p'); logData.options.unshift('1p');
} }
const type = categoryBits & TypeBitsMask; const type = categoryBits & TypeBitsMask;
@ -373,7 +392,7 @@ class LogData {
) { ) {
raw += '*'; raw += '*';
} }
if ( (categoryBits & AllowAction) !== 0 ) { if ( (categoryBits & ALLOW_REALM) !== 0 ) {
raw = '@@' + raw; raw = '@@' + raw;
} }
if ( denyallow.length !== 0 ) { if ( denyallow.length !== 0 ) {
@ -2171,7 +2190,7 @@ class FilterModifierResult {
} }
get result() { get result() {
return (this.bits & AllowAction) === 0 ? 1 : 2; return (this.bits & ALLOW_REALM) === 0 ? 1 : 2;
} }
get value() { get value() {
@ -3188,6 +3207,7 @@ class FilterCompiler {
[ sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE, MODIFIER_TYPE_REDIRECTRULE ], [ sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE, MODIFIER_TYPE_REDIRECTRULE ],
[ sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM, MODIFIER_TYPE_REMOVEPARAM ], [ sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM, MODIFIER_TYPE_REMOVEPARAM ],
[ sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM, MODIFIER_TYPE_URLTRANSFORM ], [ sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM, MODIFIER_TYPE_URLTRANSFORM ],
[ sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE, MODIFIER_TYPE_REPLACE ],
]); ]);
// These top 100 "bad tokens" are collated using the "miss" histogram // These top 100 "bad tokens" are collated using the "miss" histogram
// from tokenHistograms(). The "score" is their occurrence among the // from tokenHistograms(). The "score" is their occurrence among the
@ -3299,7 +3319,7 @@ class FilterCompiler {
} }
reset() { reset() {
this.action = BlockAction; this.action = BLOCK_REALM;
// anchor: bit vector // anchor: bit vector
// 0000 (0x0): no anchoring // 0000 (0x0): no anchoring
// 0001 (0x1): anchored to the end of the URL. // 0001 (0x1): anchored to the end of the URL.
@ -3314,7 +3334,7 @@ class FilterCompiler {
this.modifyValue = undefined; this.modifyValue = undefined;
this.pattern = ''; this.pattern = '';
this.patternMatchCase = false; this.patternMatchCase = false;
this.party = AnyParty; this.party = ANYPARTY_REALM;
this.optionUnitBits = 0; this.optionUnitBits = 0;
this.fromDomainOpt = ''; this.fromDomainOpt = '';
this.toDomainOpt = ''; this.toDomainOpt = '';
@ -3395,7 +3415,7 @@ class FilterCompiler {
if ( not ) { if ( not ) {
firstParty = !firstParty; firstParty = !firstParty;
} }
this.party |= firstParty ? FirstParty : ThirdParty; this.party |= firstParty ? FIRSTPARTY_REALM : THIRDPARTY_REALM;
} }
processHostnameList(iter, out = []) { processHostnameList(iter, out = []) {
@ -3421,7 +3441,7 @@ class FilterCompiler {
processCspOption(value) { processCspOption(value) {
this.modifyType = MODIFIER_TYPE_CSP; this.modifyType = MODIFIER_TYPE_CSP;
this.modifyValue = value || ''; this.modifyValue = value || '';
this.optionUnitBits |= this.CSP_BIT; this.optionUnitBits |= MODIFY_BIT;
return true; return true;
} }
@ -3435,7 +3455,7 @@ class FilterCompiler {
parser.getNetFilterDenyallowOptionIterator(), parser.getNetFilterDenyallowOptionIterator(),
); );
if ( this.denyallowOpt === '' ) { return false; } if ( this.denyallowOpt === '' ) { return false; }
this.optionUnitBits |= this.DENYALLOW_BIT; this.optionUnitBits |= DENYALLOW_BIT;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_FROM: case sfp.NODE_TYPE_NET_OPTION_NAME_FROM:
this.fromDomainOpt = this.processHostnameList( this.fromDomainOpt = this.processHostnameList(
@ -3443,58 +3463,44 @@ class FilterCompiler {
this.fromDomainOptList this.fromDomainOptList
); );
if ( this.fromDomainOpt === '' ) { return false; } if ( this.fromDomainOpt === '' ) { return false; }
this.optionUnitBits |= this.FROM_BIT; this.optionUnitBits |= FROM_BIT;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_HEADER: { case sfp.NODE_TYPE_NET_OPTION_NAME_HEADER: {
this.headerOpt = parser.getNetOptionValue(id) || ''; this.headerOpt = parser.getNetOptionValue(id) || '';
this.optionUnitBits |= this.HEADER_BIT; this.optionUnitBits |= HEADER_BIT;
break; break;
} }
case sfp.NODE_TYPE_NET_OPTION_NAME_METHOD: case sfp.NODE_TYPE_NET_OPTION_NAME_METHOD:
this.processMethodOption(parser.getNetOptionValue(id)); this.processMethodOption(parser.getNetOptionValue(id));
this.optionUnitBits |= this.METHOD_BIT; this.optionUnitBits |= METHOD_BIT;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_PERMISSIONS: case sfp.NODE_TYPE_NET_OPTION_NAME_PERMISSIONS:
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM:
case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE:
case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) { if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false; return false;
} }
this.optionUnitBits |= this.PERMISSIONS_BIT; this.optionUnitBits |= MODIFY_BIT;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: { case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: {
const actualId = this.action === AllowAction const actualId = this.action === ALLOW_REALM
? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE
: id; : id;
if ( this.processModifierOption(actualId, parser.getNetOptionValue(id)) === false ) { if ( this.processModifierOption(actualId, parser.getNetOptionValue(id)) === false ) {
return false; return false;
} }
this.optionUnitBits |= this.REDIRECT_BIT; this.optionUnitBits |= MODIFY_BIT;
break; break;
} }
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.REDIRECT_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.REMOVEPARAM_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_TO: case sfp.NODE_TYPE_NET_OPTION_NAME_TO:
this.toDomainOpt = this.processHostnameList( this.toDomainOpt = this.processHostnameList(
parser.getNetFilterToOptionIterator(), parser.getNetFilterToOptionIterator(),
this.toDomainOptList this.toDomainOptList
); );
if ( this.toDomainOpt === '' ) { return false; } if ( this.toDomainOpt === '' ) { return false; }
this.optionUnitBits |= this.TO_BIT; this.optionUnitBits |= TO_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.REDIRECT_BIT;
break; break;
default: default:
break; break;
@ -3511,7 +3517,7 @@ class FilterCompiler {
} }
if ( parser.isException() ) { if ( parser.isException() ) {
this.action = AllowAction; this.action = ALLOW_REALM;
} }
if ( parser.isLeftHnAnchored() ) { if ( parser.isLeftHnAnchored() ) {
@ -3539,14 +3545,14 @@ class FilterCompiler {
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT1P: case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT1P:
this.strictParty = this.strictParty === -1 ? 0 : 1; this.strictParty = this.strictParty === -1 ? 0 : 1;
this.optionUnitBits |= this.STRICT_PARTY_BIT; this.optionUnitBits |= STRICT_PARTY_BIT;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_3P: case sfp.NODE_TYPE_NET_OPTION_NAME_3P:
this.processPartyOption(false, parser.isNegatedOption(type)); this.processPartyOption(false, parser.isNegatedOption(type));
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT3P: case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT3P:
this.strictParty = this.strictParty === 1 ? 0 : -1; this.strictParty = this.strictParty === 1 ? 0 : -1;
this.optionUnitBits |= this.STRICT_PARTY_BIT; this.optionUnitBits |= STRICT_PARTY_BIT;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_ALL: case sfp.NODE_TYPE_NET_OPTION_NAME_ALL:
this.processTypeOption(-1); this.processTypeOption(-1);
@ -3586,6 +3592,7 @@ class FilterCompiler {
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT:
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM: case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM:
case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE:
case sfp.NODE_TYPE_NET_OPTION_NAME_TO: case sfp.NODE_TYPE_NET_OPTION_NAME_TO:
case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
if ( this.processOptionWithValue(parser, type) === false ) { if ( this.processOptionWithValue(parser, type) === false ) {
@ -3599,31 +3606,31 @@ class FilterCompiler {
break; break;
} }
case sfp.NODE_TYPE_NET_OPTION_NAME_EMPTY: { case sfp.NODE_TYPE_NET_OPTION_NAME_EMPTY: {
const id = this.action === AllowAction const id = this.action === ALLOW_REALM
? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE
: sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT; : sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT;
if ( this.processModifierOption(id, 'empty') === false ) { if ( this.processModifierOption(id, 'empty') === false ) {
return this.FILTER_INVALID; return this.FILTER_INVALID;
} }
this.optionUnitBits |= this.REDIRECT_BIT; this.optionUnitBits |= MODIFY_BIT;
break; break;
} }
case sfp.NODE_TYPE_NET_OPTION_NAME_IMPORTANT: case sfp.NODE_TYPE_NET_OPTION_NAME_IMPORTANT:
this.optionUnitBits |= this.IMPORTANT_BIT; this.optionUnitBits |= IMPORTANT_BIT;
this.action = BlockImportant; this.action = BLOCKIMPORTANT_REALM;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_MATCHCASE: case sfp.NODE_TYPE_NET_OPTION_NAME_MATCHCASE:
this.patternMatchCase = true; this.patternMatchCase = true;
break; break;
case sfp.NODE_TYPE_NET_OPTION_NAME_MP4: { case sfp.NODE_TYPE_NET_OPTION_NAME_MP4: {
const id = this.action === AllowAction const id = this.action === ALLOW_REALM
? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE
: sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT; : sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT;
if ( this.processModifierOption(id, 'noopmp4-1s') === false ) { if ( this.processModifierOption(id, 'noopmp4-1s') === false ) {
return this.FILTER_INVALID; return this.FILTER_INVALID;
} }
this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_MEDIA, false); this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_MEDIA, false);
this.optionUnitBits |= this.REDIRECT_BIT; this.optionUnitBits |= MODIFY_BIT;
break; break;
} }
default: default:
@ -3631,8 +3638,8 @@ class FilterCompiler {
} }
} }
if ( this.party === AllParties ) { if ( this.party === ALLPARTIES_REALM ) {
this.party = AnyParty; this.party = ANYPARTY_REALM;
} }
// Negated network types? Toggle on all network type bits. // Negated network types? Toggle on all network type bits.
@ -3649,7 +3656,7 @@ class FilterCompiler {
} else { } else {
this.typeBits &= ~this.notTypeBits; this.typeBits &= ~this.notTypeBits;
} }
this.optionUnitBits |= this.NOT_TYPE_BIT; this.optionUnitBits |= NOT_TYPE_BIT;
} }
// CSP/permissions options implicitly apply only to // CSP/permissions options implicitly apply only to
@ -3813,7 +3820,7 @@ class FilterCompiler {
} }
isJustOrigin() { isJustOrigin() {
if ( this.optionUnitBits !== this.FROM_BIT ) { return false; } if ( this.optionUnitBits !== FROM_BIT ) { return false; }
if ( this.isRegex ) { return false; } if ( this.isRegex ) { return false; }
if ( /[\/~]/.test(this.fromDomainOpt) ) { return false; } if ( /[\/~]/.test(this.fromDomainOpt) ) { return false; }
if ( this.pattern === '*' ) { return true; } if ( this.pattern === '*' ) { return true; }
@ -3857,7 +3864,7 @@ class FilterCompiler {
if ( parser.options.toDNR !== true ) { if ( parser.options.toDNR !== true ) {
const parsedBlock = this.clone(); const parsedBlock = this.clone();
parsedBlock.modifyType = undefined; parsedBlock.modifyType = undefined;
parsedBlock.optionUnitBits &= ~this.REDIRECT_BIT; parsedBlock.optionUnitBits &= ~MODIFY_BIT;
parsedBlock.compileToFilter(writer); parsedBlock.compileToFilter(writer);
} }
} }
@ -3958,14 +3965,14 @@ class FilterCompiler {
// Header // Header
if ( this.headerOpt !== undefined ) { if ( this.headerOpt !== undefined ) {
units.push(FilterOnHeaders.compile(this)); units.push(FilterOnHeaders.compile(this));
this.action |= HEADERS; this.action |= HEADERS_REALM;
} }
// Important // Important
// //
// IMPORTANT: must always appear at the end of the sequence, so as to // IMPORTANT: must always appear at the end of the sequence, so as to
// ensure $isBlockImportant is set only for matching filters. // ensure $isBlockImportant is set only for matching filters.
if ( (this.optionUnitBits & this.IMPORTANT_BIT) !== 0 ) { if ( (this.optionUnitBits & IMPORTANT_BIT) !== 0 ) {
units.push(FilterImportant.compile()); units.push(FilterImportant.compile());
} }
@ -3974,7 +3981,8 @@ class FilterCompiler {
// IMPORTANT: the modifier unit MUST always appear first in a sequence // IMPORTANT: the modifier unit MUST always appear first in a sequence
if ( this.modifyType !== undefined ) { if ( this.modifyType !== undefined ) {
units.unshift(FilterModifier.compile(this)); units.unshift(FilterModifier.compile(this));
this.action = (this.action & ~ActionBitsMask) | ModifyAction; this.action = (this.action & ~ActionBitsMask) |
modifierBitsFromType.get(this.modifyType);
} }
this.compileToAtomicFilter( this.compileToAtomicFilter(
@ -4047,18 +4055,16 @@ class FilterCompiler {
} }
} }
FilterCompiler.prototype.FROM_BIT = 0b000000000001; // These are to quickly test whether a filter is composite
FilterCompiler.prototype.TO_BIT = 0b000000000010; const FROM_BIT = 0b000000001;
FilterCompiler.prototype.DENYALLOW_BIT = 0b000000000100; const TO_BIT = 0b000000010;
FilterCompiler.prototype.HEADER_BIT = 0b000000001000; const DENYALLOW_BIT = 0b000000100;
FilterCompiler.prototype.STRICT_PARTY_BIT = 0b000000010000; const HEADER_BIT = 0b000001000;
FilterCompiler.prototype.CSP_BIT = 0b000000100000; const STRICT_PARTY_BIT = 0b000010000;
FilterCompiler.prototype.REMOVEPARAM_BIT = 0b000001000000; const MODIFY_BIT = 0b000100000;
FilterCompiler.prototype.REDIRECT_BIT = 0b000010000000; const NOT_TYPE_BIT = 0b001000000;
FilterCompiler.prototype.NOT_TYPE_BIT = 0b000100000000; const IMPORTANT_BIT = 0b010000000;
FilterCompiler.prototype.IMPORTANT_BIT = 0b001000000000; const METHOD_BIT = 0b100000000;
FilterCompiler.prototype.METHOD_BIT = 0b010000000000;
FilterCompiler.prototype.PERMISSIONS_BIT = 0b100000000000;
FilterCompiler.prototype.FILTER_OK = 0; FilterCompiler.prototype.FILTER_OK = 0;
FilterCompiler.prototype.FILTER_INVALID = 1; FilterCompiler.prototype.FILTER_INVALID = 1;
@ -4068,16 +4074,15 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2;
/******************************************************************************/ /******************************************************************************/
const FilterContainer = function() { const FilterContainer = function() {
this.compilerVersion = '8'; this.compilerVersion = '10';
this.selfieVersion = '9'; this.selfieVersion = '10';
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH; this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
this.optimizeTaskId = undefined; this.optimizeTaskId = undefined;
// As long as CategoryCount is reasonably low, we will use an array to // As long as CategoryCount is reasonably low, we will use an array to
// store buckets using category bits as index. If ever CategoryCount // store buckets using category bits as index. If ever CategoryCount
// becomes too large, we can just go back to using a Map. // becomes too large, we can just go back to using a Map.
this.bitsToBucketIndices = JSON.parse(`[${'0,'.repeat(CategoryCount-1)}0]`); this.bitsToBucket = new Map();
this.buckets = [ new Map() ];
this.goodFilters = new Set(); this.goodFilters = new Set();
this.badFilters = new Set(); this.badFilters = new Set();
this.unitsToOptimize = []; this.unitsToOptimize = [];
@ -4105,8 +4110,7 @@ FilterContainer.prototype.reset = function() {
this.goodFilters.clear(); this.goodFilters.clear();
this.badFilters.clear(); this.badFilters.clear();
this.unitsToOptimize.length = 0; this.unitsToOptimize.length = 0;
this.bitsToBucketIndices.fill(0); this.bitsToBucket.clear();
this.buckets.length = 1;
urlTokenizer.resetKnownTokens(); urlTokenizer.resetKnownTokens();
@ -4145,16 +4149,14 @@ FilterContainer.prototype.freeze = function() {
const args = unserialize(line); const args = unserialize(line);
const bits = args[0]; const bits = args[0];
let ibucket = this.bitsToBucketIndices[bits]; const bucket = this.bitsToBucket.get(bits) || (new Map());
if ( ibucket === 0 ) { if ( bucket.size === 0 ) {
ibucket = this.bitsToBucketIndices[bits] = this.buckets.length; this.bitsToBucket.set(bits, bucket);
this.buckets.push(new Map());
} }
const tokenHash = args[1]; const tokenHash = args[1];
const fdata = args[2]; const fdata = args[2];
const bucket = this.buckets[ibucket];
let iunit = bucket.get(tokenHash) || 0; let iunit = bucket.get(tokenHash) || 0;
if ( tokenHash === DOT_TOKEN_HASH ) { if ( tokenHash === DOT_TOKEN_HASH ) {
@ -4204,9 +4206,9 @@ FilterContainer.prototype.freeze = function() {
// the block-important realm should be checked when and only when // the block-important realm should be checked when and only when
// there is a matched exception filter, which important filters are // there is a matched exception filter, which important filters are
// meant to override. // meant to override.
if ( (bits & ActionBitsMask) === BlockImportant ) { if ( (bits & ActionBitsMask) === BLOCKIMPORTANT_REALM ) {
this.addFilterUnit( this.addFilterUnit(
bits & ~Important, bits & ~IMPORTANT_REALM,
tokenHash, tokenHash,
filterFromCompiled(fdata) filterFromCompiled(fdata)
); );
@ -4348,14 +4350,14 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
} }
const realms = new Map([ const realms = new Map([
[ BlockAction, 'block' ], [ BLOCK_REALM, 'block' ],
[ AllowAction, 'allow' ], [ ALLOW_REALM, 'allow' ],
[ ModifyAction, 'modify' ], [ MODIFY_REALMS, 'modify' ],
]); ]);
const partyness = new Map([ const partyness = new Map([
[ AnyParty, '' ], [ ANYPARTY_REALM, '' ],
[ FirstParty, 'firstParty' ], [ FIRSTPARTY_REALM, 'firstParty' ],
[ ThirdParty, 'thirdParty' ], [ THIRDPARTY_REALM, 'thirdParty' ],
]); ]);
const types = new Set([ const types = new Set([
'no_type', 'no_type',
@ -4403,7 +4405,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
// Collect generichide filters // Collect generichide filters
const generichideExclusions = []; const generichideExclusions = [];
{ {
const bucket = buckets.get(AllowAction | typeNameToTypeValue['generichide']); const bucket = buckets.get(ALLOW_REALM | typeNameToTypeValue['generichide']);
if ( bucket ) { if ( bucket ) {
for ( const rules of bucket.values() ) { for ( const rules of bucket.values() ) {
for ( const rule of rules ) { for ( const rule of rules ) {
@ -4460,7 +4462,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
operation: 'append', operation: 'append',
value: rule.__modifierValue, value: rule.__modifierValue,
}]; }];
if ( rule.__modifierAction === AllowAction ) { if ( rule.__modifierAction === ALLOW_REALM ) {
dnrAddRuleError(rule, 'Unsupported modifier exception'); dnrAddRuleError(rule, 'Unsupported modifier exception');
} }
break; break;
@ -4471,7 +4473,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
operation: 'append', operation: 'append',
value: rule.__modifierValue.split('|').join(', '), value: rule.__modifierValue.split('|').join(', '),
}]; }];
if ( rule.__modifierAction === AllowAction ) { if ( rule.__modifierAction === ALLOW_REALM ) {
dnrAddRuleError(rule, 'Unsupported modifier exception'); dnrAddRuleError(rule, 'Unsupported modifier exception');
} }
break; break;
@ -4489,7 +4491,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
if ( rule.__modifierValue !== '' && resource === undefined ) { if ( rule.__modifierValue !== '' && resource === undefined ) {
dnrAddRuleError(rule, `Unpatchable redirect filter: ${rule.__modifierValue}`); dnrAddRuleError(rule, `Unpatchable redirect filter: ${rule.__modifierValue}`);
} }
if ( rule.__modifierAction !== AllowAction ) { if ( rule.__modifierAction !== ALLOW_REALM ) {
const extensionPath = resource || token; const extensionPath = resource || token;
rule.action.type = 'redirect'; rule.action.type = 'redirect';
rule.action.redirect = { extensionPath }; rule.action.redirect = { extensionPath };
@ -4534,14 +4536,14 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
'xmlhttprequest', 'xmlhttprequest',
]; ];
} }
if ( rule.__modifierAction === AllowAction ) { if ( rule.__modifierAction === ALLOW_REALM ) {
dnrAddRuleError(rule, 'Unsupported modifier exception'); dnrAddRuleError(rule, 'Unsupported modifier exception');
} }
break; break;
case 'urltransform': { case 'urltransform': {
const path = rule.__modifierValue; const path = rule.__modifierValue;
let priority = rule.priority || 1; let priority = rule.priority || 1;
if ( rule.__modifierAction !== AllowAction ) { if ( rule.__modifierAction !== ALLOW_REALM ) {
const transform = { path }; const transform = { path };
rule.action.type = 'redirect'; rule.action.type = 'redirect';
rule.action.redirect = { transform }; rule.action.redirect = { transform };
@ -4574,12 +4576,10 @@ FilterContainer.prototype.addFilterUnit = function(
tokenHash, tokenHash,
inewunit inewunit
) { ) {
let ibucket = this.bitsToBucketIndices[bits]; const bucket = this.bitsToBucket.get(bits) || (new Map());
if ( ibucket === 0 ) { if ( bucket.size === 0 ) {
ibucket = this.bitsToBucketIndices[bits] = this.buckets.length; this.bitsToBucket.set(bits, bucket);
this.buckets.push(new Map());
} }
const bucket = this.buckets[ibucket];
const istoredunit = bucket.get(tokenHash) || 0; const istoredunit = bucket.get(tokenHash) || 0;
if ( istoredunit === 0 ) { if ( istoredunit === 0 ) {
bucket.set(tokenHash, inewunit); bucket.set(tokenHash, inewunit);
@ -4614,7 +4614,7 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
const t0 = Date.now(); const t0 = Date.now();
while ( this.unitsToOptimize.length !== 0 ) { while ( this.unitsToOptimize.length !== 0 ) {
const { bits, tokenHash } = this.unitsToOptimize.pop(); const { bits, tokenHash } = this.unitsToOptimize.pop();
const bucket = this.buckets[this.bitsToBucketIndices[bits]]; const bucket = this.bitsToBucket.get(bits);
const iunit = bucket.get(tokenHash); const iunit = bucket.get(tokenHash);
const fc = filterGetClass(iunit); const fc = filterGetClass(iunit);
switch ( fc ) { switch ( fc ) {
@ -4623,7 +4623,7 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
break; break;
case FilterBucket: { case FilterBucket: {
const optimizeBits = const optimizeBits =
(tokenHash === NO_TOKEN_HASH) || (bits & ModifyAction) !== 0 (tokenHash === NO_TOKEN_HASH) || (bits & MODIFY_REALMS) !== 0
? 0b10 ? 0b10
: 0b01; : 0b01;
const inewunit = FilterBucket.optimize(iunit, optimizeBits); const inewunit = FilterBucket.optimize(iunit, optimizeBits);
@ -4658,14 +4658,6 @@ FilterContainer.prototype.toSelfie = async function(storage, path) {
if ( typeof storage !== 'object' || storage === null ) { return; } if ( typeof storage !== 'object' || storage === null ) { return; }
if ( typeof storage.put !== 'function' ) { return; } if ( typeof storage.put !== 'function' ) { return; }
const bucketsToSelfie = ( ) => {
const selfie = [];
for ( const bucket of this.buckets ) {
selfie.push(Array.from(bucket));
}
return selfie;
};
bidiTrieOptimize(true); bidiTrieOptimize(true);
keyvalStore.setItem( keyvalStore.setItem(
'SNFE.origHNTrieContainer.trieDetails', 'SNFE.origHNTrieContainer.trieDetails',
@ -4700,8 +4692,10 @@ FilterContainer.prototype.toSelfie = async function(storage, path) {
processedFilterCount: this.processedFilterCount, processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount, acceptedCount: this.acceptedCount,
discardedCount: this.discardedCount, discardedCount: this.discardedCount,
bitsToBucketIndices: this.bitsToBucketIndices, bitsToBucket: Array.from(this.bitsToBucket).map(kv => {
buckets: bucketsToSelfie(), kv[1] = Array.from(kv[1]);
return kv;
}),
urlTokenizer: urlTokenizer.toSelfie(), urlTokenizer: urlTokenizer.toSelfie(),
}) })
) )
@ -4750,12 +4744,6 @@ FilterContainer.prototype.fromSelfie = async function(storage, path) {
if ( results.slice(1).every(v => v === true) === false ) { return false; } if ( results.slice(1).every(v => v === true) === false ) { return false; }
const bucketsFromSelfie = selfie => {
for ( let i = 0; i < selfie.length; i++ ) {
this.buckets[i] = new Map(selfie[i]);
}
};
const details = results[0]; const details = results[0];
if ( typeof details !== 'object' || details === null ) { return false; } if ( typeof details !== 'object' || details === null ) { return false; }
if ( typeof details.content !== 'string' ) { return false; } if ( typeof details.content !== 'string' ) { return false; }
@ -4770,8 +4758,10 @@ FilterContainer.prototype.fromSelfie = async function(storage, path) {
this.processedFilterCount = selfie.processedFilterCount; this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount; this.acceptedCount = selfie.acceptedCount;
this.discardedCount = selfie.discardedCount; this.discardedCount = selfie.discardedCount;
this.bitsToBucketIndices = selfie.bitsToBucketIndices; this.bitsToBucket = new Map(selfie.bitsToBucket.map(kv => {
bucketsFromSelfie(selfie.buckets); kv[1] = new Map(kv[1]);
return kv;
}));
urlTokenizer.fromSelfie(selfie.urlTokenizer); urlTokenizer.fromSelfie(selfie.urlTokenizer);
// If this point is never reached, it means the internal state is // If this point is never reached, it means the internal state is
@ -4837,38 +4827,37 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
$requestMethodBit = fctxt.method || 0; $requestMethodBit = fctxt.method || 0;
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; const modifierType = modifierTypeFromName.get(modifierName);
const modifierBits = modifierBitsFromType.get(modifierType);
const catBits00 = ModifyAction; const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM;
const catBits01 = ModifyAction | typeBits;
const catBits10 = ModifyAction | partyBits;
const catBits11 = ModifyAction | typeBits | partyBits;
const ibucket00 = this.bitsToBucketIndices[catBits00]; const catBits00 = modifierBits;
const ibucket01 = typeBits !== 0 ? this.bitsToBucketIndices[catBits01] const catBits01 = modifierBits | typeBits;
: 0; const catBits10 = modifierBits | partyBits;
const ibucket10 = partyBits !== 0 const catBits11 = modifierBits | typeBits | partyBits;
? this.bitsToBucketIndices[catBits10]
: 0; const bucket00 = this.bitsToBucket.get(catBits00);
const ibucket11 = typeBits !== 0 && partyBits !== 0 const bucket01 = typeBits !== 0
? this.bitsToBucketIndices[catBits11] ? this.bitsToBucket.get(catBits01)
: 0; : undefined;
const bucket10 = partyBits !== 0
? this.bitsToBucket.get(catBits10)
: undefined;
const bucket11 = typeBits !== 0 && partyBits !== 0
? this.bitsToBucket.get(catBits11)
: undefined;
if ( if (
ibucket00 === 0 && ibucket01 === 0 && bucket00 === undefined && bucket01 === undefined &&
ibucket10 === 0 && ibucket11 === 0 bucket10 === undefined && bucket11 === undefined
) { ) {
return; return;
} }
const bucket00 = this.buckets[ibucket00];
const bucket01 = this.buckets[ibucket01];
const bucket10 = this.buckets[ibucket10];
const bucket11 = this.buckets[ibucket11];
const results = []; const results = [];
const env = { const env = {
type: modifierTypeFromName.get(modifierName) || 0, type: modifierType || 0,
bits: 0, bits: 0,
th: 0, th: 0,
iunit: 0, iunit: 0,
@ -4884,28 +4873,28 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
env.th = th; env.th = th;
$tokenBeg = tokenHashes[i+1]; $tokenBeg = tokenHashes[i+1];
if ( if (
(ibucket00 !== 0) && (bucket00 !== undefined) &&
(iunit = bucket00.get(th) || 0) !== 0 (iunit = bucket00.get(th) || 0) !== 0
) { ) {
env.bits = catBits00; env.iunit = iunit; env.bits = catBits00; env.iunit = iunit;
filterMatchAndFetchModifiers(iunit, env); filterMatchAndFetchModifiers(iunit, env);
} }
if ( if (
(ibucket01 !== 0) && (bucket01 !== undefined) &&
(iunit = bucket01.get(th) || 0) !== 0 (iunit = bucket01.get(th) || 0) !== 0
) { ) {
env.bits = catBits01; env.iunit = iunit; env.bits = catBits01; env.iunit = iunit;
filterMatchAndFetchModifiers(iunit, env); filterMatchAndFetchModifiers(iunit, env);
} }
if ( if (
(ibucket10 !== 0) && (bucket10 !== undefined) &&
(iunit = bucket10.get(th) || 0) !== 0 (iunit = bucket10.get(th) || 0) !== 0
) { ) {
env.bits = catBits10; env.iunit = iunit; env.bits = catBits10; env.iunit = iunit;
filterMatchAndFetchModifiers(iunit, env); filterMatchAndFetchModifiers(iunit, env);
} }
if ( if (
(ibucket11 !== 0) && (bucket11 !== undefined) &&
(iunit = bucket11.get(th) || 0) !== 0 (iunit = bucket11.get(th) || 0) !== 0
) { ) {
env.bits = catBits11; env.iunit = iunit; env.bits = catBits11; env.iunit = iunit;
@ -4921,7 +4910,7 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
// occurrences. // occurrences.
if ( results.length === 1 ) { if ( results.length === 1 ) {
const result = results[0]; const result = results[0];
if ( (result.bits & AllowAction) !== 0 ) { return; } if ( (result.bits & ALLOW_REALM) !== 0 ) { return; }
return [ result ]; return [ result ];
} }
@ -4932,9 +4921,9 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
for ( const result of results ) { for ( const result of results ) {
const actionBits = result.bits & ActionBitsMask; const actionBits = result.bits & ActionBitsMask;
const modifyValue = result.value; const modifyValue = result.value;
if ( actionBits === BlockImportant ) { if ( actionBits === BLOCKIMPORTANT_REALM ) {
toAddImportant.set(modifyValue, result); toAddImportant.set(modifyValue, result);
} else if ( actionBits === BlockAction ) { } else if ( actionBits === BLOCK_REALM ) {
toAdd.set(modifyValue, result); toAdd.set(modifyValue, result);
} else { } else {
toRemove.set(modifyValue, result); toRemove.set(modifyValue, result);
@ -5014,55 +5003,50 @@ FilterContainer.prototype.realmMatchString = function(
const catBits10 = realmBits | partyBits; const catBits10 = realmBits | partyBits;
const catBits11 = realmBits | typeBits | partyBits; const catBits11 = realmBits | typeBits | partyBits;
const ibucket00 = exactType === 0 const bucket00 = exactType === 0
? this.bitsToBucketIndices[catBits00] ? this.bitsToBucket.get(catBits00)
: 0; : undefined;
const ibucket01 = exactType !== 0 || typeBits !== 0 const bucket01 = exactType !== 0 || typeBits !== 0
? this.bitsToBucketIndices[catBits01] ? this.bitsToBucket.get(catBits01)
: 0; : undefined;
const ibucket10 = exactType === 0 && partyBits !== 0 const bucket10 = exactType === 0 && partyBits !== 0
? this.bitsToBucketIndices[catBits10] ? this.bitsToBucket.get(catBits10)
: 0; : undefined;
const ibucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0 const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0
? this.bitsToBucketIndices[catBits11] ? this.bitsToBucket.get(catBits11)
: 0; : undefined;
if ( if (
ibucket00 === 0 && ibucket01 === 0 && bucket00 === undefined && bucket01 === undefined &&
ibucket10 === 0 && ibucket11 === 0 bucket10 === undefined && bucket11 === undefined
) { ) {
return false; return false;
} }
const bucket00 = this.buckets[ibucket00];
const bucket01 = this.buckets[ibucket01];
const bucket10 = this.buckets[ibucket10];
const bucket11 = this.buckets[ibucket11];
let catBits = 0, iunit = 0; let catBits = 0, iunit = 0;
// Pure hostname-based filters // Pure hostname-based filters
let tokenHash = DOT_TOKEN_HASH; let tokenHash = DOT_TOKEN_HASH;
if ( if (
(ibucket00 !== 0) && (bucket00 !== undefined) &&
(iunit = bucket00.get(DOT_TOKEN_HASH) || 0) !== 0 && (iunit = bucket00.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
catBits = catBits00; catBits = catBits00;
} else if ( } else if (
(ibucket01 !== 0) && (bucket01 !== undefined) &&
(iunit = bucket01.get(DOT_TOKEN_HASH) || 0) !== 0 && (iunit = bucket01.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
catBits = catBits01; catBits = catBits01;
} else if ( } else if (
(ibucket10 !== 0) && (bucket10 !== undefined) &&
(iunit = bucket10.get(DOT_TOKEN_HASH) || 0) !== 0 && (iunit = bucket10.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
catBits = catBits10; catBits = catBits10;
} else if ( } else if (
(ibucket11 !== 0) && (bucket11 !== undefined) &&
(iunit = bucket11.get(DOT_TOKEN_HASH) || 0) !== 0 && (iunit = bucket11.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
@ -5077,7 +5061,7 @@ FilterContainer.prototype.realmMatchString = function(
if ( tokenHash === INVALID_TOKEN_HASH ) { return false; } if ( tokenHash === INVALID_TOKEN_HASH ) { return false; }
$tokenBeg = tokenHashes[i+1]; $tokenBeg = tokenHashes[i+1];
if ( if (
(ibucket00 !== 0) && (bucket00 !== undefined) &&
(iunit = bucket00.get(tokenHash) || 0) !== 0 && (iunit = bucket00.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
@ -5085,7 +5069,7 @@ FilterContainer.prototype.realmMatchString = function(
break; break;
} }
if ( if (
(ibucket01 !== 0) && (bucket01 !== undefined) &&
(iunit = bucket01.get(tokenHash) || 0) !== 0 && (iunit = bucket01.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
@ -5093,7 +5077,7 @@ FilterContainer.prototype.realmMatchString = function(
break; break;
} }
if ( if (
(ibucket10 !== 0) && (bucket10 !== undefined) &&
(iunit = bucket10.get(tokenHash) || 0) !== 0 && (iunit = bucket10.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
@ -5101,7 +5085,7 @@ FilterContainer.prototype.realmMatchString = function(
break; break;
} }
if ( if (
(ibucket11 !== 0) && (bucket11 !== undefined) &&
(iunit = bucket11.get(tokenHash) || 0) !== 0 && (iunit = bucket11.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true) (filterMatch(iunit) === true)
) { ) {
@ -5147,9 +5131,9 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) {
$docDomain = domainFromHostname($docHostname); $docDomain = domainFromHostname($docHostname);
// Exception filters // Exception filters
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) { if ( this.realmMatchString(ALLOW_REALM, typeBits, FIRSTPARTY_REALM) ) {
// Important block filters. // Important block filters.
if ( this.realmMatchString(BlockImportant, typeBits, FirstParty) ) { if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, FIRSTPARTY_REALM) ) {
return 1; return 1;
} }
return 2; return 2;
@ -5194,7 +5178,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
typeBits |= 0x80000000; typeBits |= 0x80000000;
} }
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM;
// Prime tokenizer: we get a normalized URL in return. // Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(fctxt.url); $requestURL = urlTokenizer.setURL(fctxt.url);
@ -5211,11 +5195,11 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
// Evaluate block realm before allow realm, and allow realm before // Evaluate block realm before allow realm, and allow realm before
// block-important realm, i.e. by order of likelihood of a match. // block-important realm, i.e. by order of likelihood of a match.
const r = this.realmMatchString(BlockAction, typeBits, partyBits); const r = this.realmMatchString(BLOCK_REALM, typeBits, partyBits);
if ( r || (modifiers & 0b0010) !== 0 ) { if ( r || (modifiers & 0b0010) !== 0 ) {
if ( $isBlockImportant ) { return 1; } if ( $isBlockImportant ) { return 1; }
if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) { if ( this.realmMatchString(ALLOW_REALM, typeBits, partyBits) ) {
if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) { if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, partyBits) ) {
return 1; return 1;
} }
return 2; return 2;
@ -5229,7 +5213,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
FilterContainer.prototype.matchHeaders = function(fctxt, headers) { FilterContainer.prototype.matchHeaders = function(fctxt, headers) {
const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue;
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM;
// Prime tokenizer: we get a normalized URL in return. // Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(fctxt.url); $requestURL = urlTokenizer.setURL(fctxt.url);
@ -5245,13 +5229,13 @@ FilterContainer.prototype.matchHeaders = function(fctxt, headers) {
$httpHeaders.init(headers); $httpHeaders.init(headers);
let r = 0; let r = 0;
if ( this.realmMatchString(HEADERS | BlockAction, typeBits, partyBits) ) { if ( this.realmMatchString(HEADERS_REALM | BLOCK_REALM, typeBits, partyBits) ) {
r = 1; r = 1;
} }
if ( r !== 0 && $isBlockImportant !== true ) { if ( r !== 0 && $isBlockImportant !== true ) {
if ( this.realmMatchString(HEADERS | AllowAction, typeBits, partyBits) ) { if ( this.realmMatchString(HEADERS_REALM | ALLOW_REALM, typeBits, partyBits) ) {
r = 2; r = 2;
if ( this.realmMatchString(HEADERS | BlockImportant, typeBits, partyBits) ) { if ( this.realmMatchString(HEADERS_REALM | BLOCKIMPORTANT_REALM, typeBits, partyBits) ) {
r = 1; r = 1;
} }
} }
@ -5275,7 +5259,7 @@ FilterContainer.prototype.redirectRequest = function(redirectEngine, fctxt) {
} }
// Redirect to highest-ranked directive // Redirect to highest-ranked directive
const directive = directives[highest]; const directive = directives[highest];
if ( (directive.bits & AllowAction) !== 0 ) { return directives; } if ( (directive.bits & ALLOW_REALM) !== 0 ) { return directives; }
const { token } = parseRedirectRequestValue(directive); const { token } = parseRedirectRequestValue(directive);
fctxt.redirectURL = redirectEngine.tokenToURL(fctxt, token); fctxt.redirectURL = redirectEngine.tokenToURL(fctxt, token);
if ( fctxt.redirectURL === undefined ) { return; } if ( fctxt.redirectURL === undefined ) { return; }
@ -5286,7 +5270,7 @@ FilterContainer.prototype.transformRequest = function(fctxt) {
const directives = this.matchAndFetchModifiers(fctxt, 'urltransform'); const directives = this.matchAndFetchModifiers(fctxt, 'urltransform');
if ( directives === undefined ) { return; } if ( directives === undefined ) { return; }
const directive = directives[directives.length-1]; const directive = directives[directives.length-1];
if ( (directive.bits & AllowAction) !== 0 ) { return directives; } if ( (directive.bits & ALLOW_REALM) !== 0 ) { return directives; }
const redirectURL = new URL(fctxt.url); const redirectURL = new URL(fctxt.url);
if ( directive.value === redirectURL.pathname ) { return; } if ( directive.value === redirectURL.pathname ) { return; }
redirectURL.pathname = directive.value; redirectURL.pathname = directive.value;
@ -5309,10 +5293,10 @@ function compareRedirectRequests(redirectEngine, a, b) {
parseRedirectRequestValue(b); parseRedirectRequestValue(b);
if ( redirectEngine.hasToken(btok) === false ) { return 1; } if ( redirectEngine.hasToken(btok) === false ) { return 1; }
if ( abits !== bbits ) { if ( abits !== bbits ) {
if ( (abits & Important) !== 0 ) { return 1; } if ( (abits & IMPORTANT_REALM) !== 0 ) { return 1; }
if ( (bbits & Important) !== 0 ) { return -1; } if ( (bbits & IMPORTANT_REALM) !== 0 ) { return -1; }
if ( (abits & AllowAction) !== 0 ) { return -1; } if ( (abits & ALLOW_REALM) !== 0 ) { return -1; }
if ( (bbits & AllowAction) !== 0 ) { return 1; } if ( (bbits & ALLOW_REALM) !== 0 ) { return 1; }
} }
return aint - bint; return aint - bint;
} }
@ -5348,7 +5332,7 @@ FilterContainer.prototype.filterQuery = function(fctxt) {
const out = []; const out = [];
for ( const directive of directives ) { for ( const directive of directives ) {
if ( params.size === 0 ) { break; } if ( params.size === 0 ) { break; }
const isException = (directive.bits & AllowAction) !== 0; const isException = (directive.bits & ALLOW_REALM) !== 0;
if ( isException && directive.value === '' ) { if ( isException && directive.value === '' ) {
out.push(directive); out.push(directive);
break; break;
@ -5471,10 +5455,8 @@ FilterContainer.prototype.test = async function(docURL, type, url) {
FilterContainer.prototype.bucketHistogram = function() { FilterContainer.prototype.bucketHistogram = function() {
const results = []; const results = [];
for ( let bits = 0; bits < this.bitsToBucketIndices.length; bits++ ) { for ( const [ bits, bucket ] of this.bitsToBucket ) {
const ibucket = this.bitsToBucketIndices[bits]; for ( const [ th, iunit ] of bucket ) {
if ( ibucket === 0 ) { continue; }
for ( const [ th, iunit ] of this.buckets[ibucket] ) {
const token = urlTokenizer.stringFromTokenHash(th); const token = urlTokenizer.stringFromTokenHash(th);
const fc = filterGetClass(iunit); const fc = filterGetClass(iunit);
const count = fc.getCount !== undefined ? fc.getCount(iunit) : 1; const count = fc.getCount !== undefined ? fc.getCount(iunit) : 1;
@ -5553,15 +5535,20 @@ FilterContainer.prototype.dump = function() {
const thCounts = new Set(); const thCounts = new Set();
const realms = new Map([ const realms = new Map([
[ BlockAction, 'block' ], [ BLOCK_REALM, 'block' ],
[ BlockImportant, 'block-important' ], [ BLOCKIMPORTANT_REALM, 'block-important' ],
[ AllowAction, 'unblock' ], [ ALLOW_REALM, 'unblock' ],
[ ModifyAction, 'modify' ], [ REDIRECT_REALM, 'redirect' ],
[ REMOVEPARAM_REALM, 'removeparam' ],
[ CSP_REALM, 'csp' ],
[ PERMISSIONS_REALM, 'permissions' ],
[ URLTRANSFORM_REALM, 'urltransform' ],
[ REPLACE_REALM, 'replace' ],
]); ]);
const partyness = new Map([ const partyness = new Map([
[ AnyParty, 'any-party' ], [ ANYPARTY_REALM, 'any-party' ],
[ FirstParty, '1st-party' ], [ FIRSTPARTY_REALM, '1st-party' ],
[ ThirdParty, '3rd-party' ], [ THIRDPARTY_REALM, '3rd-party' ],
]); ]);
for ( const [ realmBits, realmName ] of realms ) { for ( const [ realmBits, realmName ] of realms ) {
toOutput(1, `+ realm: ${realmName}`); toOutput(1, `+ realm: ${realmName}`);
@ -5573,11 +5560,11 @@ FilterContainer.prototype.dump = function() {
if ( processedTypeBits.has(typeBits) ) { continue; } if ( processedTypeBits.has(typeBits) ) { continue; }
processedTypeBits.add(typeBits); processedTypeBits.add(typeBits);
const bits = realmBits | partyBits | typeBits; const bits = realmBits | partyBits | typeBits;
const ibucket = this.bitsToBucketIndices[bits]; const bucket = this.bitsToBucket.get(bits);
if ( ibucket === 0 ) { continue; } if ( bucket === undefined ) { continue; }
const thCount = this.buckets[ibucket].size; const thCount = bucket.size;
toOutput(3, `+ type: ${typeName} (${thCount})`); toOutput(3, `+ type: ${typeName} (${thCount})`);
for ( const [ th, iunit ] of this.buckets[ibucket] ) { for ( const [ th, iunit ] of bucket) {
thCounts.add(th); thCounts.add(th);
const ths = thConstants.has(th) const ths = thConstants.has(th)
? thConstants.get(th) ? thConstants.get(th)

View File

@ -32,6 +32,7 @@ import scriptletFilteringEngine from './scriptlet-filtering.js';
import staticNetFilteringEngine from './static-net-filtering.js'; import staticNetFilteringEngine from './static-net-filtering.js';
import textEncode from './text-encode.js'; import textEncode from './text-encode.js';
import µb from './background.js'; import µb from './background.js';
import * as sfp from './static-filtering-parser.js';
import { import {
sessionFirewall, sessionFirewall,
@ -483,11 +484,10 @@ const onBeforeBehindTheSceneRequest = function(fctxt) {
const onHeadersReceived = function(details) { const onHeadersReceived = function(details) {
// https://github.com/uBlockOrigin/uBlock-issues/issues/610 // https://github.com/uBlockOrigin/uBlock-issues/issues/610
// Process behind-the-scene requests in a special way. // Process behind-the-scene requests in a special way.
if ( if ( details.tabId < 0 ) {
details.tabId < 0 && if ( normalizeBehindTheSceneResponseHeaders(details) === false ) {
normalizeBehindTheSceneResponseHeaders(details) === false return;
) { }
return;
} }
const fctxt = µb.filteringContext.fromWebrequestDetails(details); const fctxt = µb.filteringContext.fromWebrequestDetails(details);
@ -524,8 +524,6 @@ const onHeadersReceived = function(details) {
} }
} }
if ( isRootDoc === false && fctxt.itype !== fctxt.SUB_FRAME ) { return; }
// https://github.com/gorhill/uBlock/issues/2813 // https://github.com/gorhill/uBlock/issues/2813
// Disable the blocking of large media elements if the document is itself // Disable the blocking of large media elements if the document is itself
// a media element: the resource was not prevented from loading so no // a media element: the resource was not prevented from loading so no
@ -539,10 +537,29 @@ const onHeadersReceived = function(details) {
} }
} }
// At this point we have a HTML document. const bodyFilterSession = bodyFilterer.canFilter(fctxt, details);
if ( bodyFilterSession !== undefined ) {
const filteredHTML = // `replace=` filter option
µb.canFilterResponseData && filterDocument(fctxt, details) === true; const replaceDirectives =
staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'replace');
if ( replaceDirectives ) {
bodyFilterSession.addJob({
fn: textResponseFilterer,
args: [ replaceDirectives ],
});
}
// html filtering
if ( isRootDoc || fctxt.itype === fctxt.SUB_FRAME ) {
const selectors = htmlFilteringEngine.retrieve(bodyFilterSession);
if ( selectors ) {
bodyFilterSession.addJob({
fn: htmlResponseFilterer,
args: [ selectors ],
});
}
}
bodyFilterSession.launch();
}
let modifiedHeaders = false; let modifiedHeaders = false;
if ( httpheaderFilteringEngine.apply(fctxt, responseHeaders) === true ) { if ( httpheaderFilteringEngine.apply(fctxt, responseHeaders) === true ) {
@ -551,7 +568,6 @@ const onHeadersReceived = function(details) {
if ( injectCSP(fctxt, pageStore, responseHeaders) === true ) { if ( injectCSP(fctxt, pageStore, responseHeaders) === true ) {
modifiedHeaders = true; modifiedHeaders = true;
} }
if ( injectPP(fctxt, pageStore, responseHeaders) === true ) { if ( injectPP(fctxt, pageStore, responseHeaders) === true ) {
modifiedHeaders = true; modifiedHeaders = true;
} }
@ -562,7 +578,7 @@ const onHeadersReceived = function(details) {
// https://github.com/uBlockOrigin/uBlock-issues/issues/229 // https://github.com/uBlockOrigin/uBlock-issues/issues/229
// Use `no-cache` instead of `no-cache, no-store, must-revalidate`, this // Use `no-cache` instead of `no-cache, no-store, must-revalidate`, this
// allows Firefox's offline mode to work as expected. // allows Firefox's offline mode to work as expected.
if ( (filteredHTML || modifiedHeaders) && dontCacheResponseHeaders ) { if ( modifiedHeaders && dontCacheResponseHeaders ) {
const cacheControl = µb.hiddenSettings.cacheControlForFirefox1376932; const cacheControl = µb.hiddenSettings.cacheControlForFirefox1376932;
if ( cacheControl !== 'unset' ) { if ( cacheControl !== 'unset' ) {
let i = headerIndexFromName('cache-control', responseHeaders); let i = headerIndexFromName('cache-control', responseHeaders);
@ -601,272 +617,301 @@ const normalizeBehindTheSceneResponseHeaders = function(details) {
return true; return true;
}; };
/******************************************************************************/
function textResponseFilterer(session, directives) {
const applied = [];
for ( const directive of directives ) {
if ( directive.refs instanceof Object === false ) { continue; }
const { refs } = directive;
if ( refs.$cache === null ) {
refs.$cache = sfp.parseReplaceValue(refs.value);
}
const cache = refs.$cache;
if ( cache === undefined ) { continue; }
if ( cache.re.test(session.getString()) !== true ) { continue; }
session.setString(session.getString().replace(
cache.re,
cache.replacement
));
applied.push(directive);
}
if ( applied && logger.enabled ) {
session.setRealm('network')
.pushFilters(applied.map(a => a.logData()))
.toLogger();
}
return applied.length !== 0;
}
/******************************************************************************/
function htmlResponseFilterer(session, selectors) {
if ( htmlResponseFilterer.domParser === null ) {
htmlResponseFilterer.domParser = new DOMParser();
htmlResponseFilterer.xmlSerializer = new XMLSerializer();
}
const doc = htmlResponseFilterer.domParser.parseFromString(
session.getString(),
session.mime
);
if ( selectors !== undefined ) {
if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) {
return false;
}
}
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
const doctypeStr = [
doc.doctype instanceof Object ?
htmlResponseFilterer.xmlSerializer.serializeToString(doc.doctype) + '\n' :
'',
doc.documentElement.outerHTML,
].join('\n');
session.setString(doctypeStr);
return true;
}
htmlResponseFilterer.domParser = null;
htmlResponseFilterer.xmlSerializer = null;
/******************************************************************************* /*******************************************************************************
The response body filterer is responsible for: The response body filterer is responsible for:
- Realize static network filter option `replace=`
- HTML filtering - HTML filtering
In the spirit of efficiency, the response body filterer works this way:
If:
- HTML filtering: no.
Then:
No response body filtering is initiated.
If:
- HTML filtering: yes.
Then:
Assemble all response body data into a single buffer. Once all the
response data has been received, create a document from it. Then:
- Remove all DOM elements matching HTML filters.
Then serialize the resulting modified document as the new response
body.
**/ **/
const filterDocument = (( ) => { const bodyFilterer = (( ) => {
const filterers = new Map(); const sessions = new Map();
let domParser, xmlSerializer,
utf8TextDecoder, textDecoder, textEncoder;
const textDecode = function(encoding, buffer) {
if (
textDecoder !== undefined &&
textDecoder.encoding !== encoding
) {
textDecoder = undefined;
}
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder(encoding);
}
return textDecoder.decode(buffer);
};
const reContentTypeDocument = /^(?:text\/html|application\/xhtml\+xml)/i; const reContentTypeDocument = /^(?:text\/html|application\/xhtml\+xml)/i;
const reContentTypeCharset = /charset=['"]?([^'" ]+)/i; const reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
const otherValidMimes = new Set([
'application/javascript',
'application/json',
'application/xml',
'application/xhtml+xml',
]);
let textDecoder, textEncoder;
const mimeFromContentType = function(contentType) { const mimeFromContentType = contentType => {
const match = reContentTypeDocument.exec(contentType); const match = reContentTypeDocument.exec(contentType);
if ( match !== null ) { if ( match === null ) { return; }
return match[0].toLowerCase(); return match[0].toLowerCase();
}
}; };
const charsetFromContentType = function(contentType) { const charsetFromContentType = contentType => {
const match = reContentTypeCharset.exec(contentType); const match = reContentTypeCharset.exec(contentType);
if ( match !== null ) { if ( match === null ) { return; }
return match[1].toLowerCase(); return match[1].toLowerCase();
}
}; };
const charsetFromDoc = function(doc) { const charsetFromStream = bytes => {
let meta = doc.querySelector('meta[charset]'); if ( bytes.length < 3 ) { return; }
if ( meta !== null ) { if ( bytes[0] === 0xEF && bytes[1] === 0xBB && bytes[2] === 0xBF ) {
return meta.getAttribute('charset').toLowerCase(); return 'utf-8';
} }
meta = doc.querySelector( let i = -1;
'meta[http-equiv="content-type" i][content]' while ( i < 65536 ) {
); i += 1;
if ( meta !== null ) { /* c */ if ( bytes[i+0] !== 0x63 ) { continue; }
return charsetFromContentType(meta.getAttribute('content')); /* h */ if ( bytes[i+1] !== 0x68 ) { continue; }
/* a */ if ( bytes[i+2] !== 0x61 ) { continue; }
/* r */ if ( bytes[i+3] !== 0x72 ) { continue; }
/* s */ if ( bytes[i+4] !== 0x73 ) { continue; }
/* e */ if ( bytes[i+5] !== 0x65 ) { continue; }
/* t */ if ( bytes[i+6] !== 0x74 ) { continue; }
break;
} }
if ( (i - 40) >= 65536 ) { return; }
i += 8;
// find first alpha character
let j = 0;
while ( j < 8 ) {
j += 1;
const c = bytes[i+j];
if ( c >= 0x41 && c <= 0x5A ) { break; }
if ( c >= 0x61 && c <= 0x7A ) { break; }
j += 1;
}
if ( j === 8 ) { return; }
i += j;
// Collect characters until first non charset-name-character
const chars = [];
j = 0;
while ( j < 24 ) {
const c = bytes[i+j];
if ( c < 0x2D ) { break; }
if ( c > 0x2D && c < 0x30 ) { break; }
if ( c > 0x39 && c < 0x41 ) { break; }
if ( c > 0x5A && c < 0x61 ) { break; }
if ( c > 0x7A ) { break; }
chars.push(c);
j += 1;
}
if ( j === 20 ) { return; }
return String.fromCharCode(...chars).toLowerCase();
}; };
const streamClose = function(filterer, buffer) { const streamClose = (session, buffer) => {
if ( buffer !== undefined ) { if ( buffer !== undefined ) {
filterer.stream.write(buffer); session.stream.write(buffer);
} else if ( filterer.buffer !== undefined ) { } else if ( session.buffer !== undefined ) {
filterer.stream.write(filterer.buffer); session.stream.write(session.buffer);
} }
filterer.stream.close(); session.stream.close();
}; };
const onStreamData = function(ev) { const onStreamData = function(ev) {
const filterer = filterers.get(this); const session = sessions.get(this);
if ( filterer === undefined ) { if ( session === undefined ) {
this.write(ev.data); this.write(ev.data);
this.disconnect(); this.disconnect();
return; return;
} }
if ( if ( this.status !== 'transferringdata' ) {
this.status !== 'transferringdata' && if ( this.status !== 'finishedtransferringdata' ) {
this.status !== 'finishedtransferringdata' sessions.delete(this);
) { this.disconnect();
filterers.delete(this); return;
this.disconnect(); }
return;
} }
// TODO: if ( session.buffer === null ) {
// - Possibly improve buffer growth, if benchmarking shows it's worth session.buffer = new Uint8Array(ev.data);
// it.
// - Also evaluate whether keeping a list of buffers and then decoding
// them in sequence using TextDecoder's "stream" option is more
// efficient. Can the data buffers be safely kept around for later
// use?
// - Informal, quick benchmarks seem to show most of the overhead is
// from calling TextDecoder.decode() and TextEncoder.encode(), and if
// confirmed, there is nothing which can be done uBO-side to reduce
// overhead.
if ( filterer.buffer === null ) {
filterer.buffer = new Uint8Array(ev.data);
return; return;
} }
const buffer = new Uint8Array( const buffer = new Uint8Array(
filterer.buffer.byteLength + session.buffer.byteLength + ev.data.byteLength
ev.data.byteLength
); );
buffer.set(filterer.buffer); buffer.set(session.buffer);
buffer.set(new Uint8Array(ev.data), filterer.buffer.byteLength); buffer.set(new Uint8Array(ev.data), session.buffer.byteLength);
filterer.buffer = buffer; session.buffer = buffer;
}; };
const onStreamStop = function() { const onStreamStop = function() {
const filterer = filterers.get(this); const session = sessions.get(this);
filterers.delete(this); sessions.delete(this);
if ( filterer === undefined || filterer.buffer === null ) { if ( session === undefined || session.buffer === null ) {
this.close(); this.close();
return; return;
} }
if ( this.status !== 'finishedtransferringdata' ) { return; } if ( this.status !== 'finishedtransferringdata' ) { return; }
if ( domParser === undefined ) { // If encoding is still unknown, try to extract from stream data
domParser = new DOMParser(); if ( session.charset === undefined ) {
xmlSerializer = new XMLSerializer(); const charsetFound = charsetFromStream(session.buffer);
} if ( charsetFound === undefined ) { return streamClose(session); }
if ( textEncoder === undefined ) { const charsetUsed = textEncode.normalizeCharset(charsetFound);
textEncoder = new TextEncoder(); if ( charsetUsed === undefined ) { return streamClose(session); }
} session.charset = charsetUsed;
let doc;
// If stream encoding is still unknnown, try to extract from document.
let charsetFound = filterer.charset,
charsetUsed = charsetFound;
if ( charsetFound === undefined ) {
if ( utf8TextDecoder === undefined ) {
utf8TextDecoder = new TextDecoder();
}
doc = domParser.parseFromString(
utf8TextDecoder.decode(filterer.buffer.slice(0, 1024)),
filterer.mime
);
charsetFound = charsetFromDoc(doc);
charsetUsed = textEncode.normalizeCharset(charsetFound);
if ( charsetUsed === undefined ) {
return streamClose(filterer);
}
}
doc = domParser.parseFromString(
textDecode(charsetUsed, filterer.buffer),
filterer.mime
);
// https://github.com/gorhill/uBlock/issues/3507
// In case of no explicit charset found, try to find one again, but
// this time with the whole document parsed.
if ( charsetFound === undefined ) {
charsetFound = textEncode.normalizeCharset(charsetFromDoc(doc));
if ( charsetFound !== charsetUsed ) {
if ( charsetFound === undefined ) {
return streamClose(filterer);
}
charsetUsed = charsetFound;
doc = domParser.parseFromString(
textDecode(charsetFound, filterer.buffer),
filterer.mime
);
}
} }
let modified = false; let modified = false;
if ( filterer.selectors !== undefined ) { while ( session.jobs.length !== 0 ) {
if ( htmlFilteringEngine.apply(doc, filterer) ) { const job = session.jobs.shift();
modified = true; modified = job.fn(session, ...job.args) || modified;
} }
if ( modified !== true ) { return streamClose(session); }
if ( textEncoder === undefined ) {
textEncoder = new TextEncoder();
}
let encodedStream = textEncoder.encode(session.str);
if ( session.charset !== 'utf-8' ) {
encodedStream = textEncode.encode(session.charset, encodedStream);
} }
if ( modified === false ) { streamClose(session, encodedStream);
return streamClose(filterer);
}
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
const doctypeStr = doc.doctype instanceof Object ?
xmlSerializer.serializeToString(doc.doctype) + '\n' :
'';
// https://github.com/gorhill/uBlock/issues/3391
let encodedStream = textEncoder.encode(
doctypeStr +
doc.documentElement.outerHTML
);
if ( charsetUsed !== 'utf-8' ) {
encodedStream = textEncode.encode(
charsetUsed,
encodedStream
);
}
streamClose(filterer, encodedStream);
}; };
const onStreamError = function() { const onStreamError = function() {
filterers.delete(this); sessions.delete(this);
}; };
return function(fctxt, extras) { return class Session extends µb.FilteringContext {
// https://github.com/gorhill/uBlock/issues/3478 constructor(fctxt, details, mime, charset) {
const statusCode = extras.statusCode || 0; super(fctxt);
if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) { this.entity = entityFromDomain(this.getDomain());
return; this.stream = null;
this.buffer = null;
this.mime = mime;
this.charset = charset;
this.str = null;
this.jobs = [];
} }
getString() {
const hostname = fctxt.getHostname(); if ( this.str !== null ) { return this.str; }
if ( hostname === '' ) { return; } if ( textDecoder !== undefined ) {
if ( textDecoder.encoding !== this.charset ) {
const domain = fctxt.getDomain(); textDecoder = undefined;
}
const request = {
stream: undefined,
tabId: fctxt.tabId,
url: fctxt.url,
hostname: hostname,
domain: domain,
entity: entityFromDomain(domain),
selectors: undefined,
buffer: null,
mime: 'text/html',
charset: undefined
};
request.selectors = htmlFilteringEngine.retrieve(request);
if ( request.selectors === undefined ) { return; }
const headers = extras.responseHeaders;
const contentType = headerValueFromName('content-type', headers);
if ( contentType !== '' ) {
request.mime = mimeFromContentType(contentType);
if ( request.mime === undefined ) { return; }
let charset = charsetFromContentType(contentType);
if ( charset !== undefined ) {
charset = textEncode.normalizeCharset(charset);
if ( charset === undefined ) { return; }
request.charset = charset;
} }
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder(this.charset);
}
this.str = textDecoder.decode(this.buffer);
return this.str;
} }
// https://bugzilla.mozilla.org/show_bug.cgi?id=1426789 setString(s) {
const disposition = headerValueFromName('content-disposition', headers); this.str = s;
if ( disposition !== '' && disposition.startsWith('inline') === false ) { return; } }
addJob(job) {
this.jobs.push(job);
}
launch() {
if ( this.jobs.length === 0 ) { return; }
this.stream = browser.webRequest.filterResponseData(this.id);
this.stream.ondata = onStreamData;
this.stream.onstop = onStreamStop;
this.stream.onerror = onStreamError;
sessions.set(this.stream, this);
return true;
}
static canFilter(fctxt, details) {
if ( µb.canFilterResponseData !== true ) { return; }
const stream = request.stream = // https://github.com/gorhill/uBlock/issues/3478
browser.webRequest.filterResponseData(extras.requestId); const statusCode = details.statusCode || 0;
stream.ondata = onStreamData; if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) {
stream.onstop = onStreamStop; return;
stream.onerror = onStreamError; }
filterers.set(stream, request);
return true; const hostname = fctxt.getHostname();
if ( hostname === '' ) { return; }
// https://bugzilla.mozilla.org/show_bug.cgi?id=1426789
const headers = details.responseHeaders;
const disposition = headerValueFromName('content-disposition', headers);
if ( disposition !== '' && disposition.startsWith('inline') === false ) {
return;
}
const contentType = headerValueFromName('content-type', headers);
let mime, charset;
if ( contentType !== '' ) {
mime = mimeFromContentType(contentType);
if ( mime === undefined ) { return; }
charset = charsetFromContentType(contentType);
if ( charset !== undefined ) {
charset = textEncode.normalizeCharset(charset);
if ( charset === undefined ) { return; }
}
}
if ( mime.startsWith('text/') === false ) {
if ( otherValidMimes.has(mime) === false ) { return; }
}
return new Session(fctxt, details, mime, charset);
}
}; };
})(); })();