1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-15 15:32:28 +02:00

Expand parser's ability to process static extended filtering

This commit moves some of the parsing logic of static
extended filtering into the static filtering parser; this
allows better syntax highlighting and creation-time
error-catching for cosmetic, HTML, and scriptlet filters.
This commit is contained in:
Raymond Hill 2020-06-13 08:48:56 -04:00
parent 681bd70116
commit 1a082e0581
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
10 changed files with 879 additions and 862 deletions

View File

@ -32,9 +32,12 @@
text-decoration-style: solid;
text-decoration-line: underline;
}
.cm-s-default .cm-error {
color: inherit;
}
.cm-s-default .cm-error,
.CodeMirror-linebackground.error {
background-color: #ff000018;
background-color: #ff000016;
text-decoration: underline red;
text-underline-position: under;
}

View File

@ -29,54 +29,35 @@ CodeMirror.defineMode("ubo-static-filtering", function() {
let parserSlot = 0;
let netOptionValueMode = false;
const colorSpan = function(stream) {
if ( parser.category === parser.CATNone || parser.shouldIgnore() ) {
stream.skipToEnd();
return 'comment';
}
if ( parser.category === parser.CATComment ) {
stream.skipToEnd();
return reDirective.test(stream.string)
? 'variable strong'
: 'comment';
}
if ( (parser.slices[parserSlot] & parser.BITIgnore) !== 0 ) {
const colorExtSpan = function(stream) {
if ( parserSlot < parser.optionsAnchorSpan.i ) {
const style = (parser.slices[parserSlot] & parser.BITComma) === 0
? 'string-2'
: 'def';
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'comment';
return style;
}
if ( (parser.slices[parserSlot] & parser.BITError) !== 0 ) {
if (
parserSlot >= parser.optionsAnchorSpan.i &&
parserSlot < parser.patternSpan.i
) {
const style = (parser.flavorBits & parser.BITFlavorException) !== 0
? 'tag'
: 'def';
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'error';
return `${style} strong`;
}
if ( parser.category === parser.CATStaticExtFilter ) {
if ( parserSlot < parser.optionsAnchorSpan.i ) {
const style = (parser.slices[parserSlot] & parser.BITComma) === 0
? 'string-2'
: 'def';
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return style;
}
if (
parserSlot >= parser.optionsAnchorSpan.i &&
parserSlot < parser.patternSpan.i
) {
const style = (parser.flavorBits & parser.BITFlavorException) !== 0
? 'tag'
: 'def';
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return `${style} strong`;
}
if ( parserSlot >= parser.patternSpan.i ) {
stream.skipToEnd();
return 'variable';
}
if ( parserSlot >= parser.patternSpan.i ) {
stream.skipToEnd();
return '';
return 'variable';
}
stream.skipToEnd();
return '';
};
const colorNetSpan = function(stream) {
if ( parserSlot < parser.exceptionSpan.i ) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
@ -165,6 +146,37 @@ CodeMirror.defineMode("ubo-static-filtering", function() {
return '';
};
const colorSpan = function(stream) {
if ( parser.category === parser.CATNone || parser.shouldIgnore() ) {
stream.skipToEnd();
return 'comment';
}
if ( parser.category === parser.CATComment ) {
stream.skipToEnd();
return reDirective.test(stream.string)
? 'variable strong'
: 'comment';
}
if ( (parser.slices[parserSlot] & parser.BITIgnore) !== 0 ) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'comment';
}
if ( (parser.slices[parserSlot] & parser.BITError) !== 0 ) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'error';
}
if ( parser.category === parser.CATStaticExtFilter ) {
return colorExtSpan(stream);
}
if ( parser.category === parser.CATStaticNetFilter ) {
return colorNetSpan(stream);
}
stream.skipToEnd();
return null;
};
return {
token: function(stream) {
if ( stream.sol() ) {

View File

@ -205,13 +205,7 @@ const FilterContainer = function() {
this.specificFilters = new µb.staticExtFilteringEngine.HostnameBasedDB(2);
// temporary filters
this.sessionFilterDB = new (
class extends µb.staticExtFilteringEngine.SessionDB {
compile(s) {
return µb.staticExtFilteringEngine.compileSelector(s);
}
}
)();
this.sessionFilterDB = new µb.staticExtFilteringEngine.SessionDB();
// low generic cosmetic filters, organized by id/class then simple/complex.
this.lowlyGeneric = Object.create(null);
@ -351,14 +345,12 @@ FilterContainer.prototype.keyFromSelector = function(selector) {
/******************************************************************************/
FilterContainer.prototype.compile = function(parsed, writer) {
FilterContainer.prototype.compile = function(parser, writer) {
// 1000 = cosmetic filtering
writer.select(1000);
const hostnames = parsed.hostnames;
let i = hostnames.length;
if ( i === 0 ) {
this.compileGenericSelector(parsed, writer);
if ( parser.hasOptions() === false ) {
this.compileGenericSelector(parser, writer);
return true;
}
@ -366,15 +358,15 @@ FilterContainer.prototype.compile = function(parsed, writer) {
// Negated hostname means the filter applies to all non-negated hostnames
// of same filter OR globally if there is no non-negated hostnames.
let applyGlobally = true;
while ( i-- ) {
const hostname = hostnames[i];
if ( hostname.startsWith('~') === false ) {
for ( const { hn, not, bad } of parser.extOptions() ) {
if ( bad ) { continue; }
if ( not === false ) {
applyGlobally = false;
}
this.compileSpecificSelector(hostname, parsed, writer);
this.compileSpecificSelector(parser, hn, not, writer);
}
if ( applyGlobally ) {
this.compileGenericSelector(parsed, writer);
this.compileGenericSelector(parser, writer);
}
return true;
@ -382,22 +374,31 @@ FilterContainer.prototype.compile = function(parsed, writer) {
/******************************************************************************/
FilterContainer.prototype.compileGenericSelector = function(parsed, writer) {
if ( parsed.exception === false ) {
this.compileGenericHideSelector(parsed, writer);
FilterContainer.prototype.compileGenericSelector = function(parser, writer) {
if ( parser.isException() ) {
this.compileGenericUnhideSelector(parser, writer);
} else {
this.compileGenericUnhideSelector(parsed, writer);
this.compileGenericHideSelector(parser, writer);
}
};
/******************************************************************************/
FilterContainer.prototype.compileGenericHideSelector = function(
parsed,
parser,
writer
) {
const selector = parsed.suffix;
const type = selector.charCodeAt(0);
const { raw, compiled, pseudoclass } = parser.result;
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid generic cosmetic filter in ${who}: ${raw}`
});
}
const type = compiled.charCodeAt(0);
let key;
// Simple selector-based CSS rule: no need to test for whether the
@ -406,21 +407,19 @@ FilterContainer.prototype.compileGenericHideSelector = function(
// - ###ad-bigbox
// - ##.ads-bigbox
if ( type === 0x23 /* '#' */ ) {
key = this.keyFromSelector(selector);
if ( key === selector ) {
key = this.keyFromSelector(compiled);
if ( key === compiled ) {
writer.push([ 0, key.slice(1) ]);
return;
}
} else if ( type === 0x2E /* '.' */ ) {
key = this.keyFromSelector(selector);
if ( key === selector ) {
key = this.keyFromSelector(compiled);
if ( key === compiled ) {
writer.push([ 2, key.slice(1) ]);
return;
}
}
const compiled = µb.staticExtFilteringEngine.compileSelector(selector);
// Invalid cosmetic filter, possible reasons:
// - Bad syntax
// - Procedural filters (can't be generic): the compiled version of
@ -431,19 +430,15 @@ FilterContainer.prototype.compileGenericHideSelector = function(
// https://github.com/uBlockOrigin/uBlock-issues/issues/131
// Support generic procedural filters as per advanced settings.
// TODO: prevent double compilation.
if (
compiled === undefined ||
compiled !== selector &&
µb.staticExtFilteringEngine.compileSelector.pseudoclass === -1
) {
if ( compiled !== raw && pseudoclass === false ) {
if ( µb.hiddenSettings.allowGenericProceduralFilters === true ) {
return this.compileSpecificSelector('', parsed, writer);
return this.compileSpecificSelector(parser, '', false, writer);
}
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid generic cosmetic filter in ${who}: ##${selector}`
text: `Invalid generic cosmetic filter in ${who}: ##${raw}`
});
return;
}
@ -455,7 +450,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(
writer.push([
type === 0x23 /* '#' */ ? 1 : 3,
key.slice(1),
selector
compiled
]);
return;
}
@ -463,13 +458,13 @@ FilterContainer.prototype.compileGenericHideSelector = function(
// https://github.com/gorhill/uBlock/issues/909
// Anything which contains a plain id/class selector can be classified
// as a low generic cosmetic filter.
const matches = this.rePlainSelectorEx.exec(selector);
const matches = this.rePlainSelectorEx.exec(compiled);
if ( matches !== null ) {
const key = matches[1] || matches[2];
writer.push([
key.charCodeAt(0) === 0x23 /* '#' */ ? 1 : 3,
key.slice(1),
selector
compiled
]);
return;
}
@ -479,27 +474,27 @@ FilterContainer.prototype.compileGenericHideSelector = function(
// For efficiency purpose, we will distinguish between simple and complex
// selectors.
if ( this.reSimpleHighGeneric.test(selector) ) {
writer.push([ 4 /* simple */, selector ]);
if ( this.reSimpleHighGeneric.test(compiled) ) {
writer.push([ 4 /* simple */, compiled ]);
} else {
writer.push([ 5 /* complex */, selector ]);
writer.push([ 5 /* complex */, compiled ]);
}
};
/******************************************************************************/
FilterContainer.prototype.compileGenericUnhideSelector = function(
parsed,
parser,
writer
) {
// Procedural cosmetic filters are acceptable as generic exception filters.
const compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
const { raw, compiled } = parser.result;
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid cosmetic filter in ${who}: #@#${parsed.suffix}`
text: `Invalid cosmetic filter in ${who}: #@#${raw}`
});
return;
}
@ -516,28 +511,25 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(
/******************************************************************************/
FilterContainer.prototype.compileSpecificSelector = function(
parser,
hostname,
parsed,
not,
writer
) {
// https://github.com/chrisaljoudi/uBlock/issues/145
let unhide = parsed.exception ? 1 : 0;
if ( hostname.startsWith('~') ) {
hostname = hostname.slice(1);
unhide ^= 1;
}
const compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
const { raw, compiled, exception } = parser.result;
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid cosmetic filter in ${who}: ##${parsed.suffix}`
text: `Invalid cosmetic filter in ${who}: ##${raw}`
});
return;
}
// https://github.com/chrisaljoudi/uBlock/issues/145
let unhide = exception ? 1 : 0;
if ( not ) { unhide ^= 1; }
let kind = 0;
if ( unhide === 1 ) {

View File

@ -29,13 +29,7 @@
const duplicates = new Set();
const filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(2);
const sessionFilterDB = new (
class extends µb.staticExtFilteringEngine.SessionDB {
compile(s) {
return µb.staticExtFilteringEngine.compileSelector(s.slice(1));
}
}
)();
const sessionFilterDB = new µb.staticExtFilteringEngine.SessionDB();
let acceptedCount = 0;
let discardedCount = 0;
@ -298,15 +292,14 @@
filterDB.collectGarbage();
};
api.compile = function(parsed, writer) {
const selector = parsed.suffix.slice(1).trim();
const compiled = µb.staticExtFilteringEngine.compileSelector(selector);
api.compile = function(parser, writer) {
const { raw, compiled, exception } = parser.result;
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid HTML filter in ${who}: ##${selector}`
text: `Invalid HTML filter in ${who}: ##${raw}`
});
return;
}
@ -316,10 +309,9 @@
// TODO: Mind negated hostnames, they are currently discarded.
for ( const hn of parsed.hostnames ) {
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
for ( const { hn } of parser.extOptions() ) {
let kind = 0;
if ( parsed.exception ) {
if ( exception ) {
kind |= 0b01;
}
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {

View File

@ -750,11 +750,14 @@ const onMessage = function(request, sender, callback) {
let response;
switch ( request.what ) {
case 'compileCosmeticFilterSelector':
response = µb.staticExtFilteringEngine.compileSelector(
request.selector
);
case 'compileCosmeticFilterSelector': {
const parser = new vAPI.StaticFilteringParser();
parser.analyze(request.selector);
if ( (parser.flavorBits & parser.BITFlavorExtCosmetic) !== 0 ) {
response = parser.result.compiled;
}
break;
}
// https://github.com/gorhill/uBlock/issues/3497
// This needs to be removed once issue is fixed.
@ -1302,20 +1305,19 @@ const getURLFilteringData = function(details) {
};
const compileTemporaryException = function(filter) {
const match = /#@?#/.exec(filter);
if ( match === null ) { return; }
let selector = filter.slice(match.index + match[0].length).trim();
const parser = new vAPI.StaticFilteringParser();
parser.analyze(filter);
if ( parser.shouldDiscard() ) { return {}; }
let selector = parser.result.compiled;
let session;
if ( selector.startsWith('+js') ) {
if ( (parser.flavorBits & parser.BITFlavorExtScriptlet) !== 0 ) {
session = µb.scriptletFilteringEngine.getSession();
} else if ( (parser.flavorBits & parser.BITFlavorExtHTML) !== 0 ) {
session = µb.htmlFilteringEngine.getSession();
} else {
if ( selector.startsWith('^') ) {
session = µb.htmlFilteringEngine.getSession();
} else {
session = µb.cosmeticFilteringEngine.getSession();
}
session = µb.cosmeticFilteringEngine.getSession();
}
return { session, selector: session.compile(selector) };
return { session, selector };
};
const toggleTemporaryException = function(details) {

View File

@ -30,13 +30,7 @@
const reEscapeScriptArg = /[\\'"]/g;
const scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(1);
const sessionScriptletDB = new (
class extends µb.staticExtFilteringEngine.SessionDB {
compile(s) {
return s.slice(4, -1).trim();
}
}
)();
const sessionScriptletDB = new µb.staticExtFilteringEngine.SessionDB();
let acceptedCount = 0;
let discardedCount = 0;
@ -177,6 +171,7 @@
};
})();
// TODO: Probably should move this into StaticFilteringParser
const normalizeRawFilter = function(rawFilter) {
let rawToken = rawFilter.slice(4, -1);
let rawEnd = rawToken.length;
@ -288,20 +283,19 @@
scriptletDB.collectGarbage();
};
api.compile = function(parsed, writer) {
api.compile = function(parser, writer) {
// 1001 = scriptlet injection
writer.select(1001);
// Only exception filters are allowed to be global.
const normalized = normalizeRawFilter(parsed.suffix);
const { raw, exception } = parser.result;
const normalized = normalizeRawFilter(raw);
// Tokenless is meaningful only for exception filters.
if ( normalized === '+js()' && parsed.exception === false ) {
return;
}
if ( normalized === '+js()' && exception === false ) { return; }
if ( parsed.hostnames.length === 0 ) {
if ( parsed.exception ) {
if ( parser.hasOptions() === false ) {
if ( exception ) {
writer.push([ 32, '', 1, normalized ]);
}
return;
@ -311,16 +305,12 @@
// Ignore instances of exception filter with negated hostnames,
// because there is no way to create an exception to an exception.
for ( let hn of parsed.hostnames ) {
const negated = hn.charCodeAt(0) === 0x7E /* '~' */;
if ( negated ) {
hn = hn.slice(1);
}
for ( const { hn, not } of parser.extOptions() ) {
let kind = 0;
if ( parsed.exception ) {
if ( negated ) { continue; }
if ( exception ) {
if ( not ) { continue; }
kind |= 1;
} else if ( negated ) {
} else if ( not ) {
kind |= 1;
}
writer.push([ 32, hn, kind, normalized ]);

View File

@ -778,8 +778,7 @@ const filterToDOMInterface = (( ) => {
callback(lastResultset);
return;
}
const selector = filter.slice(2);
lastResultset = fromPlainCosmeticFilter(selector);
lastResultset = fromPlainCosmeticFilter(filter.slice(2));
if ( lastResultset ) {
if ( previewing ) { apply(); }
callback(lastResultset);
@ -788,7 +787,7 @@ const filterToDOMInterface = (( ) => {
// Procedural cosmetic filter
const response = await vAPI.messaging.send('elementPicker', {
what: 'compileCosmeticFilterSelector',
selector,
selector: filter,
});
lastResultset = fromCompiledCosmeticFilter(response);
if ( previewing ) { apply(); }

View File

@ -19,8 +19,6 @@
Home: https://github.com/gorhill/uBlock
*/
/* global punycode */
'use strict';
/*******************************************************************************
@ -52,517 +50,6 @@
µBlock.staticExtFilteringEngine = (( ) => {
const µb = µBlock;
const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/;
const emptyArray = [];
const parsed = {
exception: false,
hostnames: [],
suffix: ''
};
// To be called to ensure no big parent string of a string slice is
// left into memory after parsing filter lists is over.
const resetParsed = function() {
parsed.hostnames = [];
parsed.suffix = '';
};
const cssPseudoSelector = (( ) => {
const rePseudo = /:(?::?after|:?before|:[a-z][a-z-]*[a-z])$/;
return function(s) {
if ( s.lastIndexOf(':') === -1 ) { return -1; }
const match = rePseudo.exec(s);
return match !== null ? match.index : -1;
};
})();
// Return value:
// 0b00 (0) = not a valid CSS selector
// 0b01 (1) = valid CSS selector, without pseudo-element
// 0b11 (3) = valid CSS selector, with pseudo element
const cssSelectorType = (( ) => {
const div = document.createElement('div');
// Keep in mind:
// https://github.com/gorhill/uBlock/issues/693
// https://github.com/gorhill/uBlock/issues/1955
// https://github.com/gorhill/uBlock/issues/3111
// Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817
// is fixed.
let matchFn;
try {
div.matches(':scope');
matchFn = div.matches.bind(div);
} catch (ex) {
matchFn = div.querySelector.bind(div);
}
// Quick regex-based validation -- most cosmetic filters are of the
// simple form and in such case a regex is much faster.
const reSimple = /^[#.][A-Za-z_][\w-]*$/;
return s => {
if ( reSimple.test(s) ) { return 1; }
const pos = cssPseudoSelector(s);
if ( pos !== -1 ) {
return cssSelectorType(s.slice(0, pos)) === 1 ? 3 : 0;
}
try {
matchFn(`${s}, ${s}:not(#foo)`);
} catch (ex) {
return 0;
}
return 1;
};
})();
const isBadRegex = function(s) {
try {
void new RegExp(s);
} catch (ex) {
isBadRegex.message = ex.toString();
return true;
}
return false;
};
const translateAdguardCSSInjectionFilter = function(suffix) {
const matches = /^([^{]+)\{([^}]+)\}\s*$/.exec(suffix);
if ( matches === null ) { return ''; }
const selector = matches[1].trim();
const style = matches[2].trim();
// Special style directive `remove: true` is converted into a
// `:remove()` operator.
if ( /^\s*remove:\s*true[; ]*$/.test(style) ) {
return `${selector}:remove()`;
}
// For some reasons, many of Adguard's plain cosmetic filters are
// "disguised" as style-based cosmetic filters: convert such filters
// to plain cosmetic filters.
return /display\s*:\s*none\s*!important;?$/.test(style)
? selector
: `${selector}:style(${style})`;
};
const hostnamesFromPrefix = function(parser) {
const hostnames = [];
const hasUnicode = parser.optionHasUnicode();
for ( let { hn, not } of parser.options() ) {
hn = hn.trim();
if ( hn.length === 0 ) { continue; }
if ( hasUnicode ) {
hn = punycode.toASCII(hn);
}
hostnames.push(not ? `~${hn}` : hn);
}
return hostnames;
};
const compileProceduralSelector = (( ) => {
const reProceduralOperator = new RegExp([
'^(?:',
[
'-abp-contains',
'-abp-has',
'contains',
'has',
'has-text',
'if',
'if-not',
'matches-css',
'matches-css-after',
'matches-css-before',
'min-text-length',
'not',
'nth-ancestor',
'remove',
'style',
'upward',
'watch-attr',
'watch-attrs',
'xpath'
].join('|'),
')\\('
].join(''));
const reEatBackslashes = /\\([()])/g;
const reEscapeRegex = /[.*+?^${}()|[\]\\]/g;
const reNeedScope = /^\s*>/;
const reIsDanglingSelector = /[+>~\s]\s*$/;
const reIsSiblingSelector = /^\s*[+~]/;
const regexToRawValue = new Map();
let lastProceduralSelector = '',
lastProceduralSelectorCompiled;
// When dealing with literal text, we must first eat _some_
// backslash characters.
const compileText = function(s) {
const match = reParseRegexLiteral.exec(s);
let regexDetails;
if ( match !== null ) {
regexDetails = match[1];
if ( isBadRegex(regexDetails) ) { return; }
if ( match[2] ) {
regexDetails = [ regexDetails, match[2] ];
}
} else {
regexDetails = s.replace(reEatBackslashes, '$1')
.replace(reEscapeRegex, '\\$&');
regexToRawValue.set(regexDetails, s);
}
return regexDetails;
};
const compileCSSDeclaration = function(s) {
const pos = s.indexOf(':');
if ( pos === -1 ) { return; }
const name = s.slice(0, pos).trim();
const value = s.slice(pos + 1).trim();
const match = reParseRegexLiteral.exec(value);
let regexDetails;
if ( match !== null ) {
regexDetails = match[1];
if ( isBadRegex(regexDetails) ) { return; }
if ( match[2] ) {
regexDetails = [ regexDetails, match[2] ];
}
} else {
regexDetails = '^' + value.replace(reEscapeRegex, '\\$&') + '$';
regexToRawValue.set(regexDetails, value);
}
return { name: name, value: regexDetails };
};
const compileConditionalSelector = function(s) {
// https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
// Prepend `:scope ` if needed.
if ( reNeedScope.test(s) ) {
s = `:scope ${s}`;
}
return compile(s);
};
const compileInteger = function(s, min = 0, max = 0x7FFFFFFF) {
if ( /^\d+$/.test(s) === false ) { return; }
const n = parseInt(s, 10);
if ( n < min || n >= max ) { return; }
return n;
};
const compileNotSelector = function(s) {
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
// Reject instances of :not() filters for which the argument is
// a valid CSS selector, otherwise we would be adversely
// changing the behavior of CSS4's :not().
if ( cssSelectorType(s) === 0 ) {
return compileConditionalSelector(s);
}
};
const compileUpwardArgument = function(s) {
const i = compileInteger(s, 1, 256);
if ( i !== undefined ) { return i; }
if ( cssSelectorType(s) === 1 ) { return s; }
};
const compileRemoveSelector = function(s) {
if ( s === '' ) { return s; }
};
const compileSpathExpression = function(s) {
if ( cssSelectorType('*' + s) === 1 ) {
return s;
}
};
const compileStyleProperties = (( ) => {
let div;
// https://github.com/uBlockOrigin/uBlock-issues/issues/668
return function(s) {
if ( /url\(|\\/i.test(s) ) { return; }
if ( div === undefined ) {
div = document.createElement('div');
}
div.style.cssText = s;
if ( div.style.cssText === '' ) { return; }
div.style.cssText = '';
return s;
};
})();
const compileAttrList = function(s) {
const attrs = s.split('\s*,\s*');
const out = [];
for ( const attr of attrs ) {
if ( attr !== '' ) {
out.push(attr);
}
}
return out;
};
const compileXpathExpression = function(s) {
try {
document.createExpression(s, null);
} catch (e) {
return;
}
return s;
};
// https://github.com/gorhill/uBlock/issues/2793
const normalizedOperators = new Map([
[ ':-abp-contains', ':has-text' ],
[ ':-abp-has', ':has' ],
[ ':contains', ':has-text' ],
[ ':nth-ancestor', ':upward' ],
[ ':watch-attrs', ':watch-attr' ],
]);
const compileArgument = new Map([
[ ':has', compileConditionalSelector ],
[ ':has-text', compileText ],
[ ':if', compileConditionalSelector ],
[ ':if-not', compileConditionalSelector ],
[ ':matches-css', compileCSSDeclaration ],
[ ':matches-css-after', compileCSSDeclaration ],
[ ':matches-css-before', compileCSSDeclaration ],
[ ':min-text-length', compileInteger ],
[ ':not', compileNotSelector ],
[ ':remove', compileRemoveSelector ],
[ ':spath', compileSpathExpression ],
[ ':style', compileStyleProperties ],
[ ':upward', compileUpwardArgument ],
[ ':watch-attr', compileAttrList ],
[ ':xpath', compileXpathExpression ],
]);
const actionOperators = new Set([
':remove',
':style',
]);
// https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387
// Normalize (somewhat) the stringified version of procedural
// cosmetic filters -- this increase the likelihood of detecting
// duplicates given that uBO is able to understand syntax specific
// to other blockers.
// The normalized string version is what is reported in the logger,
// by design.
const decompile = function(compiled) {
const tasks = compiled.tasks;
if ( Array.isArray(tasks) === false ) {
return compiled.selector;
}
const raw = [ compiled.selector ];
let value;
for ( const task of tasks ) {
switch ( task[0] ) {
case ':has':
case ':if':
raw.push(`:has(${decompile(task[1])})`);
break;
case ':has-text':
if ( Array.isArray(task[1]) ) {
value = `/${task[1][0]}/${task[1][1]}`;
} else {
value = regexToRawValue.get(task[1]);
if ( value === undefined ) {
value = `/${task[1]}/`;
}
}
raw.push(`:has-text(${value})`);
break;
case ':matches-css':
case ':matches-css-after':
case ':matches-css-before':
if ( Array.isArray(task[1].value) ) {
value = `/${task[1].value[0]}/${task[1].value[1]}`;
} else {
value = regexToRawValue.get(task[1].value);
if ( value === undefined ) {
value = `/${task[1].value}/`;
}
}
raw.push(`${task[0]}(${task[1].name}: ${value})`);
break;
case ':not':
case ':if-not':
raw.push(`:not(${decompile(task[1])})`);
break;
case ':spath':
raw.push(task[1]);
break;
case ':min-text-length':
case ':remove':
case ':style':
case ':upward':
case ':watch-attr':
case ':xpath':
raw.push(`${task[0]}(${task[1]})`);
break;
}
}
return raw.join('');
};
const compile = function(raw, root = false) {
if ( raw === '' ) { return; }
const tasks = [];
const n = raw.length;
let prefix = '';
let i = 0;
let opPrefixBeg = 0;
let action;
for (;;) {
let c, match;
// Advance to next operator.
while ( i < n ) {
c = raw.charCodeAt(i++);
if ( c === 0x3A /* ':' */ ) {
match = reProceduralOperator.exec(raw.slice(i));
if ( match !== null ) { break; }
}
}
if ( i === n ) { break; }
const opNameBeg = i - 1;
const opNameEnd = i + match[0].length - 1;
i += match[0].length;
// Find end of argument: first balanced closing parenthesis.
// Note: unbalanced parenthesis can be used in a regex literal
// when they are escaped using `\`.
// TODO: need to handle quoted parentheses.
let pcnt = 1;
while ( i < n ) {
c = raw.charCodeAt(i++);
if ( c === 0x5C /* '\\' */ ) {
if ( i < n ) { i += 1; }
} else if ( c === 0x28 /* '(' */ ) {
pcnt +=1 ;
} else if ( c === 0x29 /* ')' */ ) {
pcnt -= 1;
if ( pcnt === 0 ) { break; }
}
}
// Unbalanced parenthesis? An unbalanced parenthesis is fine
// as long as the last character is a closing parenthesis.
if ( pcnt !== 0 && c !== 0x29 ) { return; }
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
// Maybe that one operator is a valid CSS selector and if so,
// then consider it to be part of the prefix.
if ( cssSelectorType(raw.slice(opNameBeg, i)) === 1 ) {
continue;
}
// Extract and remember operator details.
let operator = raw.slice(opNameBeg, opNameEnd);
operator = normalizedOperators.get(operator) || operator;
// Action operator can only be used as trailing operator in the
// root task list.
// Per-operator arguments validation
const args = compileArgument.get(operator)(
raw.slice(opNameEnd + 1, i - 1)
);
if ( args === undefined ) { return; }
if ( opPrefixBeg === 0 ) {
prefix = raw.slice(0, opNameBeg);
} else if ( opNameBeg !== opPrefixBeg ) {
if ( action !== undefined ) { return; }
const spath = compileSpathExpression(
raw.slice(opPrefixBeg, opNameBeg)
);
if ( spath === undefined ) { return; }
tasks.push([ ':spath', spath ]);
}
if ( action !== undefined ) { return; }
tasks.push([ operator, args ]);
if ( actionOperators.has(operator) ) {
if ( root === false ) { return; }
action = operator.slice(1);
}
opPrefixBeg = i;
if ( i === n ) { break; }
}
// No task found: then we have a CSS selector.
// At least one task found: nothing should be left to parse.
if ( tasks.length === 0 ) {
prefix = raw;
} else if ( opPrefixBeg < n ) {
if ( action !== undefined ) { return; }
const spath = compileSpathExpression(raw.slice(opPrefixBeg));
if ( spath === undefined ) { return; }
tasks.push([ ':spath', spath ]);
}
// https://github.com/NanoAdblocker/NanoCore/issues/1#issuecomment-354394894
// https://www.reddit.com/r/uBlockOrigin/comments/c6iem5/
// Convert sibling-selector prefix into :spath operator, but
// only if context is not the root.
if ( prefix !== '' ) {
if ( reIsDanglingSelector.test(prefix) ) { prefix += '*'; }
if ( cssSelectorType(prefix) === 0 ) {
if (
root ||
reIsSiblingSelector.test(prefix) === false ||
compileSpathExpression(prefix) === undefined
) {
return;
}
tasks.unshift([ ':spath', prefix ]);
prefix = '';
}
}
const out = { selector: prefix };
if ( tasks.length !== 0 ) {
out.tasks = tasks;
}
// Expose action to take in root descriptor.
//
// https://github.com/uBlockOrigin/uBlock-issues/issues/961
// https://github.com/uBlockOrigin/uBlock-issues/issues/382
// For the time being, `style` action can't be used in a
// procedural selector.
if ( action !== undefined ) {
if ( tasks.length > 1 && action === 'style' ) { return; }
out.action = action;
}
// Pseudo-selectors are valid only when used in a root task list.
if ( prefix !== '' ) {
const pos = cssPseudoSelector(prefix);
if ( pos !== -1 ) {
if ( root === false ) { return; }
out.pseudo = pos;
}
}
return out;
};
const entryPoint = function(raw) {
if ( raw === lastProceduralSelector ) {
return lastProceduralSelectorCompiled;
}
lastProceduralSelector = raw;
let compiled = compile(raw, true);
if ( compiled !== undefined ) {
compiled.raw = decompile(compiled);
}
lastProceduralSelectorCompiled = compiled;
return compiled;
};
entryPoint.reset = function() {
regexToRawValue.clear();
lastProceduralSelector = '';
lastProceduralSelectorCompiled = undefined;
};
return entryPoint;
})();
//--------------------------------------------------------------------------
// Public API
@ -750,156 +237,40 @@
//--------------------------------------------------------------------------
api.reset = function() {
compileProceduralSelector.reset();
µb.cosmeticFilteringEngine.reset();
µb.scriptletFilteringEngine.reset();
µb.htmlFilteringEngine.reset();
resetParsed(parsed);
};
api.freeze = function() {
compileProceduralSelector.reset();
µb.cosmeticFilteringEngine.freeze();
µb.scriptletFilteringEngine.freeze();
µb.htmlFilteringEngine.freeze();
resetParsed(parsed);
};
// https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors.
// Discard new ABP's `-abp-properties` directive until it is
// implemented (if ever). Unlikely, see:
// https://github.com/gorhill/uBlock/issues/1752
// https://github.com/gorhill/uBlock/issues/2624
// Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`.
// https://github.com/uBlockOrigin/uBlock-issues/issues/89
// Do not discard unknown pseudo-elements.
api.compileSelector = (( ) => {
const reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/;
const reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/;
const normalizedExtendedSyntaxOperators = new Map([
[ 'contains', ':has-text' ],
[ 'has', ':has' ],
[ 'matches-css', ':matches-css' ],
[ 'matches-css-after', ':matches-css-after' ],
[ 'matches-css-before', ':matches-css-before' ],
]);
const entryPoint = function(raw) {
entryPoint.pseudoclass = -1;
const extendedSyntax = reExtendedSyntax.test(raw);
if ( cssSelectorType(raw) === 1 && extendedSyntax === false ) {
return raw;
}
// We rarely reach this point -- majority of selectors are plain
// CSS selectors.
// Supported Adguard/ABP advanced selector syntax: will translate
// into uBO's syntax before further processing.
// Mind unsupported advanced selector syntax, such as ABP's
// `-abp-properties`.
// Note: extended selector syntax has been deprecated in ABP, in
// favor of the procedural one (i.e. `:operator(...)`).
// See https://issues.adblockplus.org/ticket/5287
if ( extendedSyntax ) {
let matches;
while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) {
const operator = normalizedExtendedSyntaxOperators.get(matches[1]);
if ( operator === undefined ) { return; }
raw = raw.slice(0, matches.index) +
operator + '(' + matches[3] + ')' +
raw.slice(matches.index + matches[0].length);
}
return entryPoint(raw);
}
// Procedural selector?
const compiled = compileProceduralSelector(raw);
if ( compiled === undefined ) { return; }
if ( compiled.pseudo !== undefined ) {
entryPoint.pseudoclass = compiled.pseudo;
}
return JSON.stringify(compiled);
};
entryPoint.pseudoclass = -1;
return entryPoint;
})();
api.compile = function(parser, writer) {
if ( parser.category !== parser.CATStaticExtFilter ) { return false; }
// Adguard's scriptlet injection: not supported.
if ( (parser.flavorBits & parser.BITFlavorUnsupported) !== 0 ) {
return true;
}
// Extract the selector.
let suffix = parser.strFromSpan(parser.patternSpan);
if ( suffix.length === 0 ) { return false; }
parsed.suffix = suffix;
// https://github.com/gorhill/uBlock/issues/952
// Find out whether we are dealing with an Adguard-specific cosmetic
// filter, and if so, translate it if supported, or discard it if not
// supported.
// We have an Adguard/ABP cosmetic filter if and only if the
// character is `$`, `%` or `?`, otherwise it's not a cosmetic
// filter.
// Adguard's style injection: translate to uBO's format.
if ( (parser.flavorBits & parser.BITFlavorExtStyle) !== 0 ) {
suffix = translateAdguardCSSInjectionFilter(suffix);
if ( suffix === '' ) { return true; }
parsed.suffix = suffix;
}
// Exception filter?
parsed.exception = parser.isException();
// Extract the hostname(s), punycode if required.
if ( parser.hasOptions() ) {
parsed.hostnames = hostnamesFromPrefix(parser);
} else {
parsed.hostnames = emptyArray;
}
// Backward compatibility with deprecated syntax.
if ( suffix.startsWith('script:') ) {
if ( suffix.startsWith('script:inject') ) {
suffix = parsed.suffix = '+js' + suffix.slice(13);
} else if ( suffix.startsWith('script:contains') ) {
suffix = parsed.suffix = '^script:has-text' + suffix.slice(15);
}
}
const c0 = suffix.charCodeAt(0);
// New shorter syntax for scriptlet injection engine.
if ( c0 === 0x2B /* '+' */ && suffix.startsWith('+js') ) {
µb.scriptletFilteringEngine.compile(parsed, writer);
// Scriptlet injection
if ( (parser.flavorBits & parser.BITFlavorExtScriptlet) !== 0 ) {
µb.scriptletFilteringEngine.compile(parser, writer);
return true;
}
// HTML filtering engine.
// HTML filtering
// TODO: evaluate converting Adguard's `$$` syntax into uBO's HTML
// filtering syntax.
if ( c0 === 0x5E /* '^' */ ) {
µb.htmlFilteringEngine.compile(parsed, writer);
if ( (parser.flavorBits & parser.BITFlavorExtHTML) !== 0 ) {
µb.htmlFilteringEngine.compile(parser, writer);
return true;
}
// Cosmetic filtering engine.
µb.cosmeticFilteringEngine.compile(parsed, writer);
// Cosmetic filtering
µb.cosmeticFilteringEngine.compile(parser, writer);
return true;
};

View File

@ -105,6 +105,13 @@ const Parser = class {
this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/;
this.reHostname = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/;
this.punycoder = new URL(self.location);
// TODO: reuse for network filtering analysis
this.result = {
exception: false,
raw: '',
compiled: '',
pseudoclass: false,
};
this.reset();
}
@ -206,6 +213,7 @@ const Parser = class {
this.patternSpan.i = from + 3;
this.patternSpan.l = this.rightSpaceSpan.i - this.patternSpan.i;
this.category = CATStaticExtFilter;
this.analyzeExtPattern();
return;
}
let flavorBits = 0;
@ -256,13 +264,55 @@ const Parser = class {
this.patternSpan.l = this.rightSpaceSpan.i - to;
this.flavorBits = flavorBits;
this.category = CATStaticExtFilter;
this.analyzeExtPattern();
}
// Use in syntax highlighting contexts
analyzeExtPattern() {
this.result.exception = this.isException();
this.result.compiled = undefined;
this.result.pseudoclass = false;
let selector = this.strFromSpan(this.patternSpan);
if ( selector === '' ) {
this.flavorBits |= BITFlavorUnsupported;
this.result.raw = '';
return;
}
const { i } = this.patternSpan;
// ##+js(...)
if (
hasBits(this.slices[i], BITPlus) &&
selector.startsWith('+js(') && selector.endsWith(')')
) {
this.flavorBits |= BITFlavorExtScriptlet;
this.result.raw = selector;
this.result.compiled = selector.slice(4, -1);
return;
}
// ##^...
if ( hasBits(this.slices[i], BITCaret) ) {
this.flavorBits |= BITFlavorExtHTML;
selector = selector.slice(1);
}
// ##...
else {
this.flavorBits |= BITFlavorExtCosmetic;
}
this.result.raw = selector;
if ( this.compileSelector(selector, this.result) === false ) {
this.flavorBits |= BITFlavorUnsupported;
}
}
// Use in syntax highlighting contexts
analyzeExtExtra() {
const { i, l } = this.optionsSpan;
if ( l === 0 ) { return; }
this.analyzeDomainList(i, i + l, BITComma, true);
if ( this.hasOptions() ) {
const { i, l } = this.optionsSpan;
this.analyzeDomainList(i, i + l, BITComma, 0b11);
}
if ( hasBits(this.flavorBits, BITFlavorUnsupported) ) {
this.markSpan(this.patternSpan, BITError);
}
}
// Static network filters are all of the form:
@ -569,13 +619,13 @@ const Parser = class {
this.netOptionsIterator.init();
}
analyzeDomainList(from, to, bitSeparator, canEntity) {
analyzeDomainList(from, to, bitSeparator, optionBits) {
if ( from >= to ) { return; }
let beg = from;
while ( beg < to ) {
let end = this.skipUntil(beg, to, bitSeparator);
if ( end === -1 ) { end = to; }
if ( this.analyzeDomain(beg, end, canEntity) === false ) {
if ( this.analyzeDomain(beg, end, optionBits) === false ) {
this.markSlices(beg, end, BITError);
}
beg = end + 3;
@ -586,15 +636,29 @@ const Parser = class {
}
}
analyzeDomain(from, to, canEntity) {
// bits:
// 0: can use entity-based hostnames
// 1: can use single wildcard
analyzeDomain(from, to, optionBits) {
const { slices } = this;
const len = to - from;
let len = to - from;
if ( len === 0 ) { return false; }
if ( hasBits(slices[from], BITTilde) ) {
if ( canEntity === false || slices[from+2] > 1 ) { return false; }
const not = hasBits(slices[from], BITTilde);
if ( not ) {
if ( (optionBits & 0b01) === 0 || slices[from+2] > 1 ) { return false; }
from += 3;
len -= 3;
}
if ( len === 0 ) { return false; }
// One slice only, check for single asterisk
if (
len === 3 &&
not === false &&
(optionBits & 0b10) !== 0 &&
hasBits(slices[from], BITAsterisk)
) {
return slices[from+2] === 1;
}
// First slice must be regex-equivalent of `\w`
if ( hasNoBits(slices[from], BITRegexWord | BITUnicode) ) { return false; }
// Last slice
@ -602,7 +666,7 @@ const Parser = class {
const last = to - 3;
if ( hasBits(slices[last], BITAsterisk) ) {
if (
canEntity === false ||
(optionBits & 0b01) === 0 ||
len < 9 ||
slices[last+2] > 1 ||
hasNoBits(slices[last-3], BITPeriod)
@ -618,7 +682,9 @@ const Parser = class {
for ( let i = from + 3; i < to - 3; i += 3 ) {
const bits = slices[i];
if ( hasNoBits(bits, BITHostname) ) { return false; }
if ( hasBits(bits, BITPeriod) && slices[i+2] > 1 ) { return false; }
if ( hasBits(bits, BITPeriod) && slices[i+2] > 1 ) {
return false;
}
if (
hasBits(bits, BITDash) && (
hasNoBits(slices[i-3], BITRegexWord | BITUnicode) ||
@ -786,6 +852,16 @@ const Parser = class {
return this.optionsSpan.l !== 0;
}
getPattern() {
if ( this.pattern !== '' ) { return this.pattern; }
const { i, l } = this.patternSpan;
if ( l === 0 ) { return ''; }
let beg = this.slices[i+1];
let end = this.slices[i+l+1];
this.pattern = this.raw.slice(beg, end);
return this.pattern;
}
getNetPattern() {
if ( this.pattern !== '' ) { return this.pattern; }
const { i, l } = this.patternSpan;
@ -909,13 +985,12 @@ const Parser = class {
return hasBits(this.optionsBits, BITUnicode);
}
options() {
if ( this.category === CATStaticNetFilter ) {
return this.netOptionsIterator;
} else if ( this.category === CATStaticExtFilter ) {
return this.extOptionsIterator;
}
return [];
netOptions() {
return this.netOptionsIterator;
}
extOptions() {
return this.extOptionsIterator;
}
patternTokens() {
@ -972,13 +1047,582 @@ const Parser = class {
hasError() {
return hasBits(this.flavorBits, BITFlavorError);
}
shouldDiscard() {
return hasBits(
this.flavorBits,
BITFlavorError | BITFlavorUnsupported | BITFlavorIgnore
);
}
};
/******************************************************************************/
// https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors.
// Discard new ABP's `-abp-properties` directive until it is
// implemented (if ever). Unlikely, see:
// https://github.com/gorhill/uBlock/issues/1752
// https://github.com/gorhill/uBlock/issues/2624
// Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`.
// https://github.com/uBlockOrigin/uBlock-issues/issues/89
// Do not discard unknown pseudo-elements.
Parser.prototype.compileSelector = (( ) => {
const reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/;
const reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/;
const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/;
const translateAdguardCSSInjectionFilter = function(suffix) {
const matches = /^([^{]+)\{([^}]+)\}\s*$/.exec(suffix);
if ( matches === null ) { return ''; }
const selector = matches[1].trim();
const style = matches[2].trim();
// Special style directive `remove: true` is converted into a
// `:remove()` operator.
if ( /^\s*remove:\s*true[; ]*$/.test(style) ) {
return `${selector}:remove()`;
}
// For some reasons, many of Adguard's plain cosmetic filters are
// "disguised" as style-based cosmetic filters: convert such filters
// to plain cosmetic filters.
return /display\s*:\s*none\s*!important;?$/.test(style)
? selector
: `${selector}:style(${style})`;
};
const normalizedExtendedSyntaxOperators = new Map([
[ 'contains', ':has-text' ],
[ 'has', ':has' ],
[ 'matches-css', ':matches-css' ],
[ 'matches-css-after', ':matches-css-after' ],
[ 'matches-css-before', ':matches-css-before' ],
]);
// Return value:
// 0b00 (0) = not a valid CSS selector
// 0b01 (1) = valid CSS selector, without pseudo-element
// 0b11 (3) = valid CSS selector, with pseudo element
const cssSelectorType = (( ) => {
// Quick regex-based validation -- most cosmetic filters are of the
// simple form and in such case a regex is much faster.
const reSimple = /^[#.][A-Za-z_][\w-]*$/;
const div = document.createElement('div');
// Keep in mind:
// https://github.com/gorhill/uBlock/issues/693
// https://github.com/gorhill/uBlock/issues/1955
// https://github.com/gorhill/uBlock/issues/3111
// Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817
// is fixed.
return s => {
if ( reSimple.test(s) ) { return 1; }
const pos = cssPseudoSelector(s);
if ( pos !== -1 ) {
return cssSelectorType(s.slice(0, pos)) === 1 ? 3 : 0;
}
try {
div.matches(`${s}, ${s}:not(#foo)`);
} catch (ex) {
return 0;
}
return 1;
};
})();
const cssPseudoSelector = (( ) => {
const rePseudo = /:(?::?after|:?before|:[a-z][a-z-]*[a-z])$/;
return function(s) {
if ( s.lastIndexOf(':') === -1 ) { return -1; }
const match = rePseudo.exec(s);
return match !== null ? match.index : -1;
};
})();
const compileProceduralSelector = (( ) => {
const reProceduralOperator = new RegExp([
'^(?:',
[
'-abp-contains',
'-abp-has',
'contains',
'has',
'has-text',
'if',
'if-not',
'matches-css',
'matches-css-after',
'matches-css-before',
'min-text-length',
'not',
'nth-ancestor',
'remove',
'style',
'upward',
'watch-attr',
'watch-attrs',
'xpath'
].join('|'),
')\\('
].join(''));
const reEatBackslashes = /\\([()])/g;
const reEscapeRegex = /[.*+?^${}()|[\]\\]/g;
const reNeedScope = /^\s*>/;
const reIsDanglingSelector = /[+>~\s]\s*$/;
const reIsSiblingSelector = /^\s*[+~]/;
const regexToRawValue = new Map();
const isBadRegex = function(s) {
try {
void new RegExp(s);
} catch (ex) {
isBadRegex.message = ex.toString();
return true;
}
return false;
};
// When dealing with literal text, we must first eat _some_
// backslash characters.
const compileText = function(s) {
const match = reParseRegexLiteral.exec(s);
let regexDetails;
if ( match !== null ) {
regexDetails = match[1];
if ( isBadRegex(regexDetails) ) { return; }
if ( match[2] ) {
regexDetails = [ regexDetails, match[2] ];
}
} else {
regexDetails = s.replace(reEatBackslashes, '$1')
.replace(reEscapeRegex, '\\$&');
regexToRawValue.set(regexDetails, s);
}
return regexDetails;
};
const compileCSSDeclaration = function(s) {
const pos = s.indexOf(':');
if ( pos === -1 ) { return; }
const name = s.slice(0, pos).trim();
const value = s.slice(pos + 1).trim();
const match = reParseRegexLiteral.exec(value);
let regexDetails;
if ( match !== null ) {
regexDetails = match[1];
if ( isBadRegex(regexDetails) ) { return; }
if ( match[2] ) {
regexDetails = [ regexDetails, match[2] ];
}
} else {
regexDetails = '^' + value.replace(reEscapeRegex, '\\$&') + '$';
regexToRawValue.set(regexDetails, value);
}
return { name: name, value: regexDetails };
};
const compileConditionalSelector = function(s) {
// https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
// Prepend `:scope ` if needed.
if ( reNeedScope.test(s) ) {
s = `:scope ${s}`;
}
return compile(s);
};
const compileInteger = function(s, min = 0, max = 0x7FFFFFFF) {
if ( /^\d+$/.test(s) === false ) { return; }
const n = parseInt(s, 10);
if ( n < min || n >= max ) { return; }
return n;
};
const compileNotSelector = function(s) {
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
// Reject instances of :not() filters for which the argument is
// a valid CSS selector, otherwise we would be adversely
// changing the behavior of CSS4's :not().
if ( cssSelectorType(s) === 0 ) {
return compileConditionalSelector(s);
}
};
const compileUpwardArgument = function(s) {
const i = compileInteger(s, 1, 256);
if ( i !== undefined ) { return i; }
if ( cssSelectorType(s) === 1 ) { return s; }
};
const compileRemoveSelector = function(s) {
if ( s === '' ) { return s; }
};
const compileSpathExpression = function(s) {
if ( cssSelectorType('*' + s) === 1 ) {
return s;
}
};
const compileStyleProperties = (( ) => {
let div;
// https://github.com/uBlockOrigin/uBlock-issues/issues/668
return function(s) {
if ( /url\(|\\/i.test(s) ) { return; }
if ( div === undefined ) {
div = document.createElement('div');
}
div.style.cssText = s;
if ( div.style.cssText === '' ) { return; }
div.style.cssText = '';
return s;
};
})();
const compileAttrList = function(s) {
const attrs = s.split('\s*,\s*');
const out = [];
for ( const attr of attrs ) {
if ( attr !== '' ) {
out.push(attr);
}
}
return out;
};
const compileXpathExpression = function(s) {
try {
document.createExpression(s, null);
} catch (e) {
return;
}
return s;
};
// https://github.com/gorhill/uBlock/issues/2793
const normalizedOperators = new Map([
[ ':-abp-contains', ':has-text' ],
[ ':-abp-has', ':has' ],
[ ':contains', ':has-text' ],
[ ':nth-ancestor', ':upward' ],
[ ':watch-attrs', ':watch-attr' ],
]);
const compileArgument = new Map([
[ ':has', compileConditionalSelector ],
[ ':has-text', compileText ],
[ ':if', compileConditionalSelector ],
[ ':if-not', compileConditionalSelector ],
[ ':matches-css', compileCSSDeclaration ],
[ ':matches-css-after', compileCSSDeclaration ],
[ ':matches-css-before', compileCSSDeclaration ],
[ ':min-text-length', compileInteger ],
[ ':not', compileNotSelector ],
[ ':remove', compileRemoveSelector ],
[ ':spath', compileSpathExpression ],
[ ':style', compileStyleProperties ],
[ ':upward', compileUpwardArgument ],
[ ':watch-attr', compileAttrList ],
[ ':xpath', compileXpathExpression ],
]);
const actionOperators = new Set([
':remove',
':style',
]);
// https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387
// Normalize (somewhat) the stringified version of procedural
// cosmetic filters -- this increase the likelihood of detecting
// duplicates given that uBO is able to understand syntax specific
// to other blockers.
// The normalized string version is what is reported in the logger,
// by design.
const decompile = function(compiled) {
const tasks = compiled.tasks;
if ( Array.isArray(tasks) === false ) {
return compiled.selector;
}
const raw = [ compiled.selector ];
let value;
for ( const task of tasks ) {
switch ( task[0] ) {
case ':has':
case ':if':
raw.push(`:has(${decompile(task[1])})`);
break;
case ':has-text':
if ( Array.isArray(task[1]) ) {
value = `/${task[1][0]}/${task[1][1]}`;
} else {
value = regexToRawValue.get(task[1]);
if ( value === undefined ) {
value = `/${task[1]}/`;
}
}
raw.push(`:has-text(${value})`);
break;
case ':matches-css':
case ':matches-css-after':
case ':matches-css-before':
if ( Array.isArray(task[1].value) ) {
value = `/${task[1].value[0]}/${task[1].value[1]}`;
} else {
value = regexToRawValue.get(task[1].value);
if ( value === undefined ) {
value = `/${task[1].value}/`;
}
}
raw.push(`${task[0]}(${task[1].name}: ${value})`);
break;
case ':not':
case ':if-not':
raw.push(`:not(${decompile(task[1])})`);
break;
case ':spath':
raw.push(task[1]);
break;
case ':min-text-length':
case ':remove':
case ':style':
case ':upward':
case ':watch-attr':
case ':xpath':
raw.push(`${task[0]}(${task[1]})`);
break;
}
}
return raw.join('');
};
const compile = function(raw, root = false) {
if ( raw === '' ) { return; }
const tasks = [];
const n = raw.length;
let prefix = '';
let i = 0;
let opPrefixBeg = 0;
let action;
// TODO: use slices instead of charCodeAt()
for (;;) {
let c, match;
// Advance to next operator.
while ( i < n ) {
c = raw.charCodeAt(i++);
if ( c === 0x3A /* ':' */ ) {
match = reProceduralOperator.exec(raw.slice(i));
if ( match !== null ) { break; }
}
}
if ( i === n ) { break; }
const opNameBeg = i - 1;
const opNameEnd = i + match[0].length - 1;
i += match[0].length;
// Find end of argument: first balanced closing parenthesis.
// Note: unbalanced parenthesis can be used in a regex literal
// when they are escaped using `\`.
// TODO: need to handle quoted parentheses.
let pcnt = 1;
while ( i < n ) {
c = raw.charCodeAt(i++);
if ( c === 0x5C /* '\\' */ ) {
if ( i < n ) { i += 1; }
} else if ( c === 0x28 /* '(' */ ) {
pcnt +=1 ;
} else if ( c === 0x29 /* ')' */ ) {
pcnt -= 1;
if ( pcnt === 0 ) { break; }
}
}
// Unbalanced parenthesis? An unbalanced parenthesis is fine
// as long as the last character is a closing parenthesis.
if ( pcnt !== 0 && c !== 0x29 ) { return; }
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
// Maybe that one operator is a valid CSS selector and if so,
// then consider it to be part of the prefix.
if ( cssSelectorType(raw.slice(opNameBeg, i)) === 1 ) {
continue;
}
// Extract and remember operator details.
let operator = raw.slice(opNameBeg, opNameEnd);
operator = normalizedOperators.get(operator) || operator;
// Action operator can only be used as trailing operator in the
// root task list.
// Per-operator arguments validation
const args = compileArgument.get(operator)(
raw.slice(opNameEnd + 1, i - 1)
);
if ( args === undefined ) { return; }
if ( opPrefixBeg === 0 ) {
prefix = raw.slice(0, opNameBeg);
} else if ( opNameBeg !== opPrefixBeg ) {
if ( action !== undefined ) { return; }
const spath = compileSpathExpression(
raw.slice(opPrefixBeg, opNameBeg)
);
if ( spath === undefined ) { return; }
tasks.push([ ':spath', spath ]);
}
if ( action !== undefined ) { return; }
tasks.push([ operator, args ]);
if ( actionOperators.has(operator) ) {
if ( root === false ) { return; }
action = operator.slice(1);
}
opPrefixBeg = i;
if ( i === n ) { break; }
}
// No task found: then we have a CSS selector.
// At least one task found: nothing should be left to parse.
if ( tasks.length === 0 ) {
prefix = raw;
} else if ( opPrefixBeg < n ) {
if ( action !== undefined ) { return; }
const spath = compileSpathExpression(raw.slice(opPrefixBeg));
if ( spath === undefined ) { return; }
tasks.push([ ':spath', spath ]);
}
// https://github.com/NanoAdblocker/NanoCore/issues/1#issuecomment-354394894
// https://www.reddit.com/r/uBlockOrigin/comments/c6iem5/
// Convert sibling-selector prefix into :spath operator, but
// only if context is not the root.
if ( prefix !== '' ) {
if ( reIsDanglingSelector.test(prefix) && tasks.length !== 0 ) {
prefix += ' *';
}
if ( cssSelectorType(prefix) === 0 ) {
if (
root ||
reIsSiblingSelector.test(prefix) === false ||
compileSpathExpression(prefix) === undefined
) {
return;
}
tasks.unshift([ ':spath', prefix ]);
prefix = '';
}
}
const out = { selector: prefix };
if ( tasks.length !== 0 ) {
out.tasks = tasks;
}
// Expose action to take in root descriptor.
//
// https://github.com/uBlockOrigin/uBlock-issues/issues/961
// https://github.com/uBlockOrigin/uBlock-issues/issues/382
// For the time being, `style` action can't be used in a
// procedural selector.
if ( action !== undefined ) {
if ( tasks.length > 1 && action === 'style' ) { return; }
out.action = action;
}
// Pseudo-selectors are valid only when used in a root task list.
if ( prefix !== '' ) {
const pos = cssPseudoSelector(prefix);
if ( pos !== -1 ) {
if ( root === false ) { return; }
out.pseudo = pos;
}
}
return out;
};
const entryPoint = function(raw) {
const compiled = compile(raw, true);
if ( compiled !== undefined ) {
compiled.raw = decompile(compiled);
}
return compiled;
};
entryPoint.reset = function() {
regexToRawValue.clear();
};
return entryPoint;
})();
const entryPoint = function(raw, out) {
// https://github.com/gorhill/uBlock/issues/952
// Find out whether we are dealing with an Adguard-specific cosmetic
// filter, and if so, translate it if supported, or discard it if not
// supported.
// We have an Adguard/ABP cosmetic filter if and only if the
// character is `$`, `%` or `?`, otherwise it's not a cosmetic
// filter.
// Adguard's style injection: translate to uBO's format.
if ( hasBits(this.flavorBits, BITFlavorExtStyle) ) {
raw = translateAdguardCSSInjectionFilter(raw);
if ( raw === '' ) { return false; }
out.raw = raw;
}
let extendedSyntax = false;
const selectorType = cssSelectorType(raw);
if ( selectorType !== 0 ) {
extendedSyntax = reExtendedSyntax.test(raw);
if ( extendedSyntax === false ) {
out.pseudoclass = selectorType === 3;
out.compiled = raw;
return true;
}
}
// We rarely reach this point -- majority of selectors are plain
// CSS selectors.
// Supported Adguard/ABP advanced selector syntax: will translate
// into uBO's syntax before further processing.
// Mind unsupported advanced selector syntax, such as ABP's
// `-abp-properties`.
// Note: extended selector syntax has been deprecated in ABP, in
// favor of the procedural one (i.e. `:operator(...)`).
// See https://issues.adblockplus.org/ticket/5287
if ( extendedSyntax ) {
let matches;
while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) {
const operator = normalizedExtendedSyntaxOperators.get(matches[1]);
if ( operator === undefined ) { return false; }
raw = raw.slice(0, matches.index) +
operator + '(' + matches[3] + ')' +
raw.slice(matches.index + matches[0].length);
}
return entryPoint.call(this, raw, out);
}
// Procedural selector?
const compiled = compileProceduralSelector(raw);
if ( compiled === undefined ) { return false; }
if ( compiled.pseudo !== undefined ) {
out.pseudoclass = compiled.pseudo;
}
out.compiled = JSON.stringify(compiled);
return true;
};
return entryPoint;
})();
/******************************************************************************/
const hasNoBits = (v, bits) => (v & bits) === 0;
const hasBits = (v, bits) => (v & bits) !== 0;
const hasNotAllBits = (v, bits) => (v & bits) !== bits;
//const hasAllBits = (v, bits) => (v & bits) === bits;
/******************************************************************************/
@ -987,42 +1631,45 @@ const CATStaticExtFilter = 1;
const CATStaticNetFilter = 2;
const CATComment = 3;
const BITSpace = 1 << 0;
const BITGlyph = 1 << 1;
const BITExclamation = 1 << 2;
const BITHash = 1 << 3;
const BITDollar = 1 << 4;
const BITPercent = 1 << 5;
const BITParen = 1 << 6;
const BITAsterisk = 1 << 7;
const BITComma = 1 << 8;
const BITDash = 1 << 9;
const BITPeriod = 1 << 10;
const BITSlash = 1 << 11;
const BITNum = 1 << 12;
const BITEqual = 1 << 13;
const BITQuestion = 1 << 14;
const BITAt = 1 << 15;
const BITAlpha = 1 << 16;
const BITUppercase = 1 << 17;
const BITSquareBracket = 1 << 18;
const BITBackslash = 1 << 19;
const BITCaret = 1 << 20;
const BITUnderscore = 1 << 21;
const BITBrace = 1 << 22;
const BITPipe = 1 << 23;
const BITTilde = 1 << 24;
const BITClosing = 1 << 28;
const BITUnicode = 1 << 29;
const BITIgnore = 1 << 30;
const BITError = 1 << 31;
const BITSpace = 1 << 0;
const BITGlyph = 1 << 1;
const BITExclamation = 1 << 2;
const BITHash = 1 << 3;
const BITDollar = 1 << 4;
const BITPercent = 1 << 5;
const BITParen = 1 << 6;
const BITAsterisk = 1 << 7;
const BITPlus = 1 << 8;
const BITComma = 1 << 9;
const BITDash = 1 << 10;
const BITPeriod = 1 << 11;
const BITSlash = 1 << 12;
const BITNum = 1 << 13;
const BITEqual = 1 << 14;
const BITQuestion = 1 << 15;
const BITAt = 1 << 16;
const BITAlpha = 1 << 17;
const BITUppercase = 1 << 18;
const BITSquareBracket = 1 << 19;
const BITBackslash = 1 << 20;
const BITCaret = 1 << 21;
const BITUnderscore = 1 << 22;
const BITBrace = 1 << 23;
const BITPipe = 1 << 24;
const BITTilde = 1 << 25;
const BITOpening = 1 << 27;
const BITClosing = 1 << 28;
const BITUnicode = 1 << 29;
// TODO: separate from character bits into a new slice slot.
const BITIgnore = 1 << 30;
const BITError = 1 << 31;
const BITAll = 0xFFFFFFFF;
const BITAlphaNum = BITNum | BITAlpha;
const BITRegexWord = BITAlphaNum | BITUnderscore;
const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode;
const BITPatternToken = BITNum | BITAlpha | BITPercent;
const BITLineComment = BITExclamation | BITHash | BITSquareBracket;
const BITAll = 0xFFFFFFFF;
const BITAlphaNum = BITNum | BITAlpha;
const BITRegexWord = BITAlphaNum | BITUnderscore;
const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode;
const BITPatternToken = BITNum | BITAlpha | BITPercent;
const BITLineComment = BITExclamation | BITHash | BITSquareBracket;
// Important: it is expected that lines passed to the parser have been
// trimmed of new line characters. Given this, any newline characters found
@ -1044,10 +1691,10 @@ const charDescBits = [
/* 0x25 % */ BITPercent,
/* 0x26 & */ BITGlyph,
/* 0x27 ' */ BITGlyph,
/* 0x28 ( */ BITParen,
/* 0x28 ( */ BITParen | BITOpening,
/* 0x29 ) */ BITParen | BITClosing,
/* 0x2A * */ BITAsterisk,
/* 0x2B + */ BITGlyph,
/* 0x2B + */ BITPlus,
/* 0x2C , */ BITComma,
/* 0x2D - */ BITDash,
/* 0x2E . */ BITPeriod,
@ -1095,7 +1742,7 @@ const charDescBits = [
/* 0x58 X */ BITAlpha | BITUppercase,
/* 0x59 Y */ BITAlpha | BITUppercase,
/* 0x5A Z */ BITAlpha | BITUppercase,
/* 0x5B [ */ BITSquareBracket,
/* 0x5B [ */ BITSquareBracket | BITOpening,
/* 0x5C \ */ BITBackslash,
/* 0x5D ] */ BITSquareBracket | BITClosing,
/* 0x5E ^ */ BITCaret,
@ -1127,7 +1774,7 @@ const charDescBits = [
/* 0x78 x */ BITAlpha,
/* 0x79 y */ BITAlpha,
/* 0x7A z */ BITAlpha,
/* 0x7B { */ BITBrace,
/* 0x7B { */ BITBrace | BITOpening,
/* 0x7C | */ BITPipe,
/* 0x7D } */ BITBrace | BITClosing,
/* 0x7E ~ */ BITTilde,
@ -1143,6 +1790,9 @@ const BITFlavorNetRightHnAnchor = 1 << 5;
const BITFlavorNetSpaceInPattern = 1 << 6;
const BITFlavorExtStyle = 1 << 7;
const BITFlavorExtStrong = 1 << 8;
const BITFlavorExtCosmetic = 1 << 9;
const BITFlavorExtScriptlet = 1 << 10;
const BITFlavorExtHTML = 1 << 11;
const BITFlavorIgnore = 1 << 29;
const BITFlavorUnsupported = 1 << 30;
const BITFlavorError = 1 << 31;
@ -1229,6 +1879,10 @@ Parser.prototype.BITAll = BITAll;
Parser.prototype.BITFlavorException = BITFlavorException;
Parser.prototype.BITFlavorExtStyle = BITFlavorExtStyle;
Parser.prototype.BITFlavorExtStrong = BITFlavorExtStrong;
Parser.prototype.BITFlavorExtCosmetic = BITFlavorExtCosmetic;
Parser.prototype.BITFlavorExtScriptlet = BITFlavorExtScriptlet;
Parser.prototype.BITFlavorExtHTML = BITFlavorExtHTML;
Parser.prototype.BITFlavorIgnore = BITFlavorIgnore;
Parser.prototype.BITFlavorUnsupported = BITFlavorUnsupported;
Parser.prototype.BITFlavorError = BITFlavorError;
@ -1298,6 +1952,9 @@ const NetOptionsIterator = class {
this.value = undefined;
this.done = true;
}
[Symbol.iterator]() {
return this.init();
}
init() {
this.readPtr = this.writePtr = 0;
this.done = this.parser.optionsSpan.l === 0;
@ -1415,7 +2072,7 @@ const NetOptionsIterator = class {
if ( this.interactive && hasBits(descriptor, OPTDomainList) ) {
this.parser.analyzeDomainList(
lval + 3, i, BITPipe,
(descriptor & 0xFF) === OPTTokenDomain
(descriptor & 0xFF) === OPTTokenDomain ? 0b01 : 0b00
);
}
} else {
@ -1480,9 +2137,6 @@ const NetOptionsIterator = class {
this.readPtr = i + 6;
return this;
}
[Symbol.iterator]() {
return this.init();
}
};
const netOptionTokens = new Map([
@ -1547,6 +2201,9 @@ const PatternTokenIterator = class {
}
[Symbol.iterator]() {
const { i, l } = this.parser.patternSpan;
if ( l === 0 ) {
return this.end();
}
this.l = i;
this.r = i + l;
this.i = i;
@ -1605,16 +2262,18 @@ const ExtOptionsIterator = class {
this.value = undefined;
this.done = true;
}
init() {
[Symbol.iterator]() {
const { i, l } = this.parser.optionsSpan;
this.l = i;
this.r = i + l;
this.done = false;
this.value = {
hn: undefined,
not: false,
bad: false,
};
if ( l === 0 ) {
this.l = this.r = 0;
this.done = true;
this.value = undefined;
} else {
this.l = i;
this.r = i + l;
this.done = false;
this.value = { hn: undefined, not: false, bad: false };
}
return this;
}
next() {
@ -1655,9 +2314,6 @@ const ExtOptionsIterator = class {
this.l = i;
return this;
}
[Symbol.iterator]() {
return this.init();
}
};
/******************************************************************************/

View File

@ -2308,7 +2308,7 @@ const FilterParser = class {
}
parseOptions(parser) {
for ( let { id, val, not } of parser.options() ) {
for ( let { id, val, not } of parser.netOptions() ) {
switch ( id ) {
case parser.OPTToken3p:
this.parsePartyOption(false, not);