1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-01 08:39:37 +02:00

Add a new static filtering parser

A new standalone static filtering parser is introduced,
vAPI.StaticFilteringParser. It's purpose is to parse
line of text into representation suitable for
compiling filters. It can additionally serves for
syntax highlighting purpose.

As a side effect, this solves:
- https://github.com/uBlockOrigin/uBlock-issues/issues/1038

This is a first draft, there are more work left to do
to further perfect the implementation and extend its
capabilities, especially those useful to assist filter
authors.

For the time being, this commits break line-continuation
syntax highlighting -- which was already flaky prior to
this commit anyway.
This commit is contained in:
Raymond Hill 2020-06-04 07:18:54 -04:00
parent e8c8fab8c8
commit 01b1ed9a98
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
10 changed files with 1917 additions and 568 deletions

View File

@ -54,6 +54,7 @@
<script src="js/i18n.js"></script> <script src="js/i18n.js"></script>
<script src="js/dashboard-common.js"></script> <script src="js/dashboard-common.js"></script>
<script src="js/cloud-ui.js"></script> <script src="js/cloud-ui.js"></script>
<script src="js/static-filtering-parser.js"></script>
<script src="js/1p-filters.js"></script> <script src="js/1p-filters.js"></script>
</body> </body>

View File

@ -45,6 +45,7 @@ body {
<script src="js/udom.js"></script> <script src="js/udom.js"></script>
<script src="js/i18n.js"></script> <script src="js/i18n.js"></script>
<script src="js/dashboard-common.js"></script> <script src="js/dashboard-common.js"></script>
<script src="js/static-filtering-parser.js"></script>
<script src="js/asset-viewer.js"></script> <script src="js/asset-viewer.js"></script>
</body> </body>

View File

@ -26,8 +26,9 @@
<script src="js/filtering-context.js"></script> <script src="js/filtering-context.js"></script>
<script src="js/redirect-engine.js"></script> <script src="js/redirect-engine.js"></script>
<script src="js/dynamic-net-filtering.js"></script> <script src="js/dynamic-net-filtering.js"></script>
<script src="js/static-net-filtering.js"></script>
<script src="js/url-net-filtering.js"></script> <script src="js/url-net-filtering.js"></script>
<script src="js/static-filtering-parser.js"></script>
<script src="js/static-net-filtering.js"></script>
<script src="js/static-ext-filtering.js"></script> <script src="js/static-ext-filtering.js"></script>
<script src="js/cosmetic-filtering.js"></script> <script src="js/cosmetic-filtering.js"></script>
<script src="js/scriptlet-filtering.js"></script> <script src="js/scriptlet-filtering.js"></script>

View File

@ -22,7 +22,17 @@
word-break: break-all; word-break: break-all;
} }
/* CodeMirror theme overrides */
.cm-s-default .cm-string-2 { color: #a30; }
.cm-s-default .cm-comment { color: #777; } .cm-s-default .cm-comment { color: #777; }
.cm-s-default .cm-keyword { color: #90b; }
.cm-s-default .cm-error,
.CodeMirror-linebackground.error {
background-color: #ff000018;
text-decoration: underline red;
text-underline-position: under;
}
.cm-directive { color: #333; font-weight: bold; } .cm-directive { color: #333; font-weight: bold; }
.cm-staticext { color: #008; } .cm-staticext { color: #008; }
.cm-staticnetBlock { color: #800; } .cm-staticnetBlock { color: #800; }

View File

@ -24,117 +24,155 @@
'use strict'; 'use strict';
CodeMirror.defineMode("ubo-static-filtering", function() { CodeMirror.defineMode("ubo-static-filtering", function() {
const reDirective = /^\s*!#(?:if|endif|include)\b/; const parser = new vAPI.StaticFilteringParser(true);
const reComment1 = /^\s*!/; const reDirective = /^!#(?:if|endif|include)\b/;
const reComment2 = /^\s*#/; let parserSlot = 0;
const reExt = /(#@?(?:\$\??|\?)?#)(?!##)/; let netOptionValueMode = false;
const reNet = /^\s*(?:@@)?.*(?:(\$)(?:[^$]+)?)?$/;
let lineStyle = null;
let anchorOptPos = null;
const lines = [];
let iLine = 0;
const lineFromLineBuffer = function() { const colorSpan = function(stream) {
return lines.length === 1 if ( parser.category === parser.CATNone || parser.shouldIgnore() ) {
? lines[0] stream.skipToEnd();
: lines.filter(a => a.replace(/^\s*|\s+\\$/g, '')).join(''); return 'comment';
};
const parseExtFilter = function() {
lineStyle = 'staticext';
for ( let i = 0; i < lines.length; i++ ) {
const match = reExt.exec(lines[i]);
if ( match === null ) { continue; }
anchorOptPos = { y: i, x: match.index, l: match[1].length };
break;
} }
}; if ( parser.category === parser.CATComment ) {
stream.skipToEnd();
const parseNetFilter = function() { return reDirective.test(stream.string)
lineStyle = lineFromLineBuffer().startsWith('@@') ? 'variable strong'
? 'staticnetAllow' : 'comment';
: 'staticnetBlock';
let i = lines.length;
while ( i-- ) {
const pos = lines[i].lastIndexOf('$');
if ( pos === -1 ) { continue; }
anchorOptPos = { y: i, x: pos, l: 1 };
break;
} }
}; if ( (parser.slices[parserSlot] & parser.BITIgnore) !== 0 ) {
stream.pos += parser.slices[parserSlot+2];
const highlight = function(stream) { parserSlot += 3;
if ( anchorOptPos !== null && iLine === anchorOptPos.y ) { return 'comment';
if ( stream.pos === anchorOptPos.x ) { }
stream.pos += anchorOptPos.l; if ( (parser.slices[parserSlot] & parser.BITError) !== 0 ) {
return `${lineStyle} staticOpt`; stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'error';
}
if ( parser.category === parser.CATStaticExtFilter ) {
if ( parserSlot < parser.optionsAnchorSpan.i ) {
const style = (parser.slices[parserSlot] & parser.BITComma) === 0
? 'string-2'
: 'def';
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return style;
} }
if ( stream.pos < anchorOptPos.x ) { if (
stream.pos = anchorOptPos.x; parserSlot >= parser.optionsAnchorSpan.i &&
return lineStyle; parserSlot < parser.patternSpan.i
) {
const style = (parser.flavorBits & parser.BITFlavorException) !== 0
? 'tag'
: 'def';
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return `${style} strong`;
} }
if ( parserSlot >= parser.patternSpan.i ) {
stream.skipToEnd();
return 'variable';
}
stream.skipToEnd();
return '';
}
if ( parserSlot < parser.exceptionSpan.i ) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return '';
}
if (
parserSlot === parser.exceptionSpan.i &&
parser.exceptionSpan.l !== 0
) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'tag strong';
}
if (
parserSlot === parser.patternLeftAnchorSpan.i &&
parser.patternLeftAnchorSpan.l !== 0 ||
parserSlot === parser.patternRightAnchorSpan.i &&
parser.patternRightAnchorSpan.l !== 0
) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'keyword strong';
}
if (
parserSlot >= parser.patternSpan.i &&
parserSlot < parser.patternRightAnchorSpan.i
) {
if ( (parser.slices[parserSlot] & (parser.BITAsterisk | parser.BITCaret)) !== 0 ) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'keyword strong';
}
const nextSlot = parser.skipUntil(
parserSlot,
parser.patternRightAnchorSpan.i,
parser.BITAsterisk | parser.BITCaret
);
stream.pos = parser.slices[nextSlot+1];
parserSlot = nextSlot;
return 'variable';
}
if (
parserSlot === parser.optionsAnchorSpan.i &&
parser.optionsAnchorSpan.l !== 0
) {
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return 'def strong';
}
if (
parserSlot >= parser.optionsSpan.i &&
parser.optionsSpan.l !== 0
) {
const bits = parser.slices[parserSlot];
let style;
if ( (bits & parser.BITComma) !== 0 ) {
style = 'def strong';
netOptionValueMode = false;
} else if ( (bits & parser.BITTilde) !== 0 ) {
style = 'keyword strong';
} else if ( (bits & parser.BITPipe) !== 0 ) {
style = 'def';
} else if ( netOptionValueMode ) {
style = 'string-2';
} else if ( (bits & parser.BITEqual) !== 0 ) {
netOptionValueMode = true;
}
stream.pos += parser.slices[parserSlot+2];
parserSlot += 3;
return style || 'def';
}
if (
parserSlot >= parser.commentSpan.i &&
parser.commentSpan.l !== 0
) {
stream.skipToEnd();
return 'comment';
} }
stream.skipToEnd(); stream.skipToEnd();
return lineStyle; return '';
};
const parseMultiLine = function() {
anchorOptPos = null;
const line = lineFromLineBuffer();
if ( reDirective.test(line) ) {
lineStyle = 'directive';
return;
}
if ( reComment1.test(line) ) {
lineStyle = 'comment';
return;
}
if ( line.indexOf('#') !== -1 ) {
if ( reExt.test(line) ) {
return parseExtFilter();
}
if ( reComment2.test(line) ) {
lineStyle = 'comment';
return;
}
}
if ( reNet.test(line) ) {
return parseNetFilter();
}
lineStyle = null;
}; };
return { return {
startState: function() {
},
token: function(stream) { token: function(stream) {
if ( iLine === lines.length || stream.string !== lines[iLine] ) { if ( stream.sol() ) {
iLine = 0; parser.analyze(stream.string);
parser.analyzeExtra(stream.string);
parserSlot = 0;
netOptionValueMode = false;
} }
if ( iLine === 0 ) { let style = colorSpan(stream);
if ( lines.length > 1 ) { if ( (parser.flavorBits & parser.BITFlavorError) !== 0 ) {
lines.length = 1; style += ' line-background-error';
}
let line = stream.string;
lines[0] = line;
if ( line.endsWith(' \\') ) {
do {
line = stream.lookAhead(lines.length);
if (
line === undefined ||
line.startsWith(' ') === false
) { break; }
lines.push(line);
} while ( line.endsWith(' \\') );
}
parseMultiLine();
} }
const style = highlight(stream); style = style.trim();
if ( stream.eol() ) { return style !== '' ? style : null;
iLine += 1;
}
return style;
}, },
}; };
}); });

View File

@ -135,7 +135,10 @@ const fromNetFilter = async function(rawFilter) {
const µb = µBlock; const µb = µBlock;
const writer = new µb.CompiledLineIO.Writer(); const writer = new µb.CompiledLineIO.Writer();
if ( µb.staticNetFilteringEngine.compile(rawFilter, writer) === false ) { const parser = new vAPI.StaticFilteringParser();
parser.analyze(rawFilter);
if ( µb.staticNetFilteringEngine.compile(parser, writer) === false ) {
return; return;
} }

View File

@ -52,7 +52,6 @@
µBlock.staticExtFilteringEngine = (( ) => { µBlock.staticExtFilteringEngine = (( ) => {
const µb = µBlock; const µb = µBlock;
const reHasUnicode = /[^\x00-\x7F]/;
const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/; const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/;
const emptyArray = []; const emptyArray = [];
const parsed = { const parsed = {
@ -142,23 +141,16 @@
: `${selector}:style(${style})`; : `${selector}:style(${style})`;
}; };
const hostnamesFromPrefix = function(s) { const hostnamesFromPrefix = function(parser) {
const hostnames = []; const hostnames = [];
const hasUnicode = reHasUnicode.test(s); const hasUnicode = parser.optionHasUnicode();
let beg = 0; for ( let { hn, not } of parser.options() ) {
while ( beg < s.length ) { hn = hn.trim();
let end = s.indexOf(',', beg); if ( hn.length === 0 ) { continue; }
if ( end === -1 ) { end = s.length; } if ( hasUnicode ) {
let hostname = s.slice(beg, end).trim(); hn = punycode.toASCII(hn);
if ( hostname.length !== 0 ) {
if ( hasUnicode ) {
hostname = hostname.charCodeAt(0) === 0x7E /* '~' */
? '~' + punycode.toASCII(hostname.slice(1))
: punycode.toASCII(hostname);
}
hostnames.push(hostname);
} }
beg = end + 1; hostnames.push(not ? `~${hn}` : hn);
} }
return hostnames; return hostnames;
}; };
@ -844,34 +836,16 @@
return entryPoint; return entryPoint;
})(); })();
api.compile = function(raw, writer) { api.compile = function(parser, writer) {
let lpos = raw.indexOf('#'); if ( parser.category !== parser.CATStaticExtFilter ) { return false; }
if ( lpos === -1 ) { return false; }
let rpos = lpos + 1;
if ( raw.charCodeAt(rpos) !== 0x23 /* '#' */ ) {
rpos = raw.indexOf('#', rpos + 1);
if ( rpos === -1 ) { return false; }
}
// https://github.com/AdguardTeam/AdguardFilters/commit/4fe02d73cee6 // Adguard's scriptlet injection: not supported.
// AdGuard also uses `$?` to force inline-based style rather than if ( (parser.flavorBits & parser.BITFlavorUnsupported) !== 0 ) {
// stylesheet-based style. return true;
// Coarse-check that the anchor is valid.
// `##`: l === 1
// `#@#`, `#$#`, `#%#`, `#?#`: l === 2
// `#@$#`, `#@%#`, `#@?#`, `#$?#`: l === 3
// `#@$?#`: l === 4
const anchorLen = rpos - lpos;
if ( anchorLen > 4 ) { return false; }
if (
anchorLen > 1 &&
/^@?(?:\$\??|%|\?)?$/.test(raw.slice(lpos + 1, rpos)) === false
) {
return false;
} }
// Extract the selector. // Extract the selector.
let suffix = raw.slice(rpos + 1).trim(); let suffix = parser.strFromSpan(parser.patternSpan);
if ( suffix.length === 0 ) { return false; } if ( suffix.length === 0 ) { return false; }
parsed.suffix = suffix; parsed.suffix = suffix;
@ -882,29 +856,21 @@
// We have an Adguard/ABP cosmetic filter if and only if the // We have an Adguard/ABP cosmetic filter if and only if the
// character is `$`, `%` or `?`, otherwise it's not a cosmetic // character is `$`, `%` or `?`, otherwise it's not a cosmetic
// filter. // filter.
let cCode = raw.charCodeAt(rpos - 1); // Adguard's style injection: translate to uBO's format.
if ( cCode !== 0x23 /* '#' */ && cCode !== 0x40 /* '@' */ ) { if ( (parser.flavorBits & parser.BITFlavorExtStyle) !== 0 ) {
// Adguard's scriptlet injection: not supported. suffix = translateAdguardCSSInjectionFilter(suffix);
if ( cCode === 0x25 /* '%' */ ) { return true; } if ( suffix === '' ) { return true; }
if ( cCode === 0x3F /* '?' */ && anchorLen > 2 ) { parsed.suffix = suffix;
cCode = raw.charCodeAt(rpos - 2);
}
// Adguard's style injection: translate to uBO's format.
if ( cCode === 0x24 /* '$' */ ) {
suffix = translateAdguardCSSInjectionFilter(suffix);
if ( suffix === '' ) { return true; }
parsed.suffix = suffix;
}
} }
// Exception filter? // Exception filter?
parsed.exception = raw.charCodeAt(lpos + 1) === 0x40 /* '@' */; parsed.exception = parser.isException();
// Extract the hostname(s), punycode if required. // Extract the hostname(s), punycode if required.
if ( lpos === 0 ) { if ( parser.hasOptions() ) {
parsed.hostnames = emptyArray; parsed.hostnames = hostnamesFromPrefix(parser);
} else { } else {
parsed.hostnames = hostnamesFromPrefix(raw.slice(0, lpos)); parsed.hostnames = emptyArray;
} }
// Backward compatibility with deprecated syntax. // Backward compatibility with deprecated syntax.

File diff suppressed because it is too large Load Diff

View File

@ -84,13 +84,16 @@ const typeNameToTypeValue = {
const otherTypeBitValue = typeNameToTypeValue.other; const otherTypeBitValue = typeNameToTypeValue.other;
const bitFromType = type =>
1 << ((typeNameToTypeValue[type] >>> 4) - 1);
// All network request types to bitmap // All network request types to bitmap
// bring origin to 0 (from 4 -- see typeNameToTypeValue) // bring origin to 0 (from 4 -- see typeNameToTypeValue)
// left-shift 1 by the above-calculated value // left-shift 1 by the above-calculated value
// subtract 1 to set all type bits // subtract 1 to set all type bits
const allNetworkTypesBits = const allNetworkTypesBits =
(1 << (otherTypeBitValue >>> 4)) - 1; (1 << (otherTypeBitValue >>> 4)) - 1;
const allTypesBits = const allTypesBits =
allNetworkTypesBits | allNetworkTypesBits |
1 << (typeNameToTypeValue['popup'] >>> 4) - 1 | 1 << (typeNameToTypeValue['popup'] >>> 4) - 1 |
@ -127,42 +130,6 @@ const typeValueToTypeName = {
23: 'unsupported', 23: 'unsupported',
}; };
// https://github.com/gorhill/uBlock/issues/1493
// Transpose `ping` into `other` for now.
const toNormalizedType = {
'all': 'all',
'beacon': 'ping',
'cname': 'cname',
'css': 'stylesheet',
'data': 'data',
'doc': 'main_frame',
'document': 'main_frame',
'font': 'font',
'frame': 'sub_frame',
'genericblock': 'unsupported',
'generichide': 'generichide',
'ghide': 'generichide',
'image': 'image',
'inline-font': 'inline-font',
'inline-script': 'inline-script',
'media': 'media',
'object': 'object',
'object-subrequest': 'object',
'other': 'other',
'ping': 'ping',
'popunder': 'popunder',
'popup': 'popup',
'script': 'script',
'specifichide': 'specifichide',
'shide': 'specifichide',
'stylesheet': 'stylesheet',
'subdocument': 'sub_frame',
'xhr': 'xmlhttprequest',
'xmlhttprequest': 'xmlhttprequest',
'webrtc': 'unsupported',
'websocket': 'websocket',
};
const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111; const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111;
/******************************************************************************/ /******************************************************************************/
@ -409,7 +376,7 @@ const filterPattern = {
units.push(FilterRegex.compile(parsed)); units.push(FilterRegex.compile(parsed));
return; return;
} }
const pattern = parsed.f; const pattern = parsed.pattern;
if ( pattern === '*' ) { if ( pattern === '*' ) {
units.push(FilterTrue.compile()); units.push(FilterTrue.compile());
return; return;
@ -439,27 +406,27 @@ const filterPattern = {
hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos
); );
if ( parsed.tokenBeg < parsed.firstWildcardPos ) { if ( parsed.tokenBeg < parsed.firstWildcardPos ) {
parsed.f = sleft; parsed.pattern = sleft;
units.push(FilterPatternPlain.compile(parsed)); units.push(FilterPatternPlain.compile(parsed));
parsed.f = sright; parsed.pattern = sright;
units.push(FilterPatternRight.compile(parsed, hasCaretCombo)); units.push(FilterPatternRight.compile(parsed, hasCaretCombo));
return; return;
} }
// parsed.tokenBeg > parsed.firstWildcardPos // parsed.tokenBeg > parsed.firstWildcardPos
parsed.f = sright; parsed.pattern = sright;
parsed.tokenBeg -= parsed.firstWildcardPos + 1; parsed.tokenBeg -= parsed.firstWildcardPos + 1;
units.push(FilterPatternPlain.compile(parsed)); units.push(FilterPatternPlain.compile(parsed));
parsed.f = sleft; parsed.pattern = sleft;
units.push(FilterPatternLeft.compile(parsed, hasCaretCombo)); units.push(FilterPatternLeft.compile(parsed, hasCaretCombo));
}, },
compileGeneric: function(parsed, units) { compileGeneric: function(parsed, units) {
const pattern = parsed.f; const pattern = parsed.pattern;
// Optimize special case: plain pattern with trailing caret // Optimize special case: plain pattern with trailing caret
if ( if (
parsed.firstWildcardPos === -1 && parsed.firstWildcardPos === -1 &&
parsed.firstCaretPos === (pattern.length - 1) parsed.firstCaretPos === (pattern.length - 1)
) { ) {
parsed.f = pattern.slice(0, -1); parsed.pattern = pattern.slice(0, -1);
units.push(FilterPatternPlain.compile(parsed)); units.push(FilterPatternPlain.compile(parsed));
units.push(FilterTrailingSeparator.compile()); units.push(FilterTrailingSeparator.compile());
return; return;
@ -479,10 +446,10 @@ const filterPattern = {
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; } // if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
// right += 1; // right += 1;
//} //}
//parsed.f = pattern.slice(left, right); //parsed.pattern = pattern.slice(left, right);
//parsed.tokenBeg -= left; //parsed.tokenBeg -= left;
//units.push(FilterPatternPlain.compile(parsed)); //units.push(FilterPatternPlain.compile(parsed));
//parsed.f = pattern; //parsed.pattern = pattern;
units.push(FilterPatternGeneric.compile(parsed)); units.push(FilterPatternGeneric.compile(parsed));
}, },
}; };
@ -565,7 +532,7 @@ const FilterPatternPlain = class {
} }
static compile(details) { static compile(details) {
return [ FilterPatternPlain.fid, details.f, details.tokenBeg ]; return [ FilterPatternPlain.fid, details.pattern, details.tokenBeg ];
} }
static fromCompiled(args) { static fromCompiled(args) {
@ -678,7 +645,7 @@ const FilterPatternLeft = class {
static compile(details, ex) { static compile(details, ex) {
return [ return [
ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid, ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid,
details.f details.pattern
]; ];
} }
@ -762,7 +729,7 @@ const FilterPatternRight = class {
static compile(details, ex) { static compile(details, ex) {
return [ return [
ex ? FilterPatternRightEx.fid : FilterPatternRight.fid, ex ? FilterPatternRightEx.fid : FilterPatternRight.fid,
details.f details.pattern
]; ];
} }
@ -853,7 +820,7 @@ const FilterPatternGeneric = class {
static compile(details) { static compile(details) {
const anchor = details.anchor; const anchor = details.anchor;
details.anchor = 0; details.anchor = 0;
return [ FilterPatternGeneric.fid, details.f, anchor ]; return [ FilterPatternGeneric.fid, details.pattern, anchor ];
} }
static fromCompiled(args) { static fromCompiled(args) {
@ -1115,7 +1082,7 @@ const FilterRegex = class {
} }
static compile(details) { static compile(details) {
return [ FilterRegex.fid, details.f ]; return [ FilterRegex.fid, details.pattern ];
} }
static fromCompiled(args) { static fromCompiled(args) {
@ -2101,25 +2068,42 @@ const FILTER_SEQUENCES_MIN = filterSequenceWritePtr;
/******************************************************************************/ /******************************************************************************/
const FilterParser = class { const FilterParser = class {
constructor() { constructor(parser) {
this.cantWebsocket = vAPI.cantWebsocket; this.cantWebsocket = vAPI.cantWebsocket;
this.domainOpt = ''; this.domainOpt = '';
this.noTokenHash = urlTokenizer.noTokenHash; this.noTokenHash = urlTokenizer.noTokenHash;
this.reBadDomainOptChars = /[+?^${}()[\]\\]/; this.reBadDomainOptChars = /[+?^${}()[\]\\]/;
this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i;
this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i;
this.reCanTrimCarets1 = /^[^*]*$/;
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/; this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
this.reHasUnicode = /[^\x00-\x7F]/; this.reHasUnicode = /[^\x00-\x7F]/;
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/; this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
this.reGoodToken = /[%0-9a-z]{1,}/g;
this.reSeparator = /[\/^]/;
this.reRegexToken = /[%0-9A-Za-z]{2,}/g; this.reRegexToken = /[%0-9A-Za-z]{2,}/g;
this.reRegexTokenAbort = /[([]/; this.reRegexTokenAbort = /[([]/;
this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/; this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/;
this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/; this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/;
this.reGoodToken = /[%0-9a-z]{1,}/g;
this.tokenIdToNormalizedType = new Map([
[ parser.OPTTokenCname, bitFromType('cname') ],
[ parser.OPTTokenCss, bitFromType('stylesheet') ],
[ parser.OPTTokenDoc, bitFromType('main_frame') ],
[ parser.OPTTokenFont, bitFromType('font') ],
[ parser.OPTTokenFrame, bitFromType('sub_frame') ],
[ parser.OPTTokenGenericblock, bitFromType('unsupported') ],
[ parser.OPTTokenGhide, bitFromType('generichide') ],
[ parser.OPTTokenImage, bitFromType('image') ],
[ parser.OPTTokenInlineFont, bitFromType('inline-font') ],
[ parser.OPTTokenInlineScript, bitFromType('inline-script') ],
[ parser.OPTTokenMedia, bitFromType('media') ],
[ parser.OPTTokenObject, bitFromType('object') ],
[ parser.OPTTokenOther, bitFromType('other') ],
[ parser.OPTTokenPing, bitFromType('ping') ],
[ parser.OPTTokenPopunder, bitFromType('popunder') ],
[ parser.OPTTokenPopup, bitFromType('popup') ],
[ parser.OPTTokenScript, bitFromType('script') ],
[ parser.OPTTokenShide, bitFromType('specifichide') ],
[ parser.OPTTokenXhr, bitFromType('xmlhttprequest') ],
[ parser.OPTTokenWebrtc, bitFromType('unsupported') ],
[ parser.OPTTokenWebsocket, bitFromType('websocket') ],
]);
// These top 100 "bad tokens" are collated using the "miss" histogram // These top 100 "bad tokens" are collated using the "miss" histogram
// from tokenHistograms(). The "score" is their occurrence among the // from tokenHistograms(). The "score" is their occurrence among the
// 200K+ URLs used in the benchmark and executed against default // 200K+ URLs used in the benchmark and executed against default
@ -2224,7 +2208,7 @@ const FilterParser = class {
[ 'scripts',1446 ], [ 'scripts',1446 ],
[ 'twitter',1440 ], [ 'twitter',1440 ],
[ 'crop',1431 ], [ 'crop',1431 ],
[ 'new',1412] [ 'new',1412],
]); ]);
this.maxTokenLen = urlTokenizer.MAX_TOKEN_LENGTH; this.maxTokenLen = urlTokenizer.MAX_TOKEN_LENGTH;
this.reset(); this.reset();
@ -2244,16 +2228,14 @@ const FilterParser = class {
this.dataType = undefined; this.dataType = undefined;
this.data = undefined; this.data = undefined;
this.invalid = false; this.invalid = false;
this.f = ''; this.pattern = '';
this.firstParty = false; this.firstParty = false;
this.thirdParty = false; this.thirdParty = false;
this.party = AnyParty; this.party = AnyParty;
this.fopts = '';
this.domainOpt = ''; this.domainOpt = '';
this.denyallow = ''; this.denyallow = '';
this.isPureHostname = false; this.isPureHostname = false;
this.isRegex = false; this.isRegex = false;
this.raw = '';
this.redirect = 0; this.redirect = 0;
this.token = '*'; this.token = '*';
this.tokenHash = this.noTokenHash; this.tokenHash = this.noTokenHash;
@ -2278,16 +2260,12 @@ const FilterParser = class {
return ''; return '';
} }
bitFromType(type) {
return 1 << ((typeNameToTypeValue[type] >>> 4) - 1);
}
// https://github.com/chrisaljoudi/uBlock/issues/589 // https://github.com/chrisaljoudi/uBlock/issues/589
// Be ready to handle multiple negated types // Be ready to handle multiple negated types
parseTypeOption(raw, not) { parseTypeOption(id, not) {
const typeBit = raw !== 'all' const typeBit = id !== -1
? this.bitFromType(toNormalizedType[raw]) ? this.tokenIdToNormalizedType.get(id)
: allTypesBits; : allTypesBits;
if ( not ) { if ( not ) {
this.notTypes |= typeBit; this.notTypes |= typeBit;
@ -2309,8 +2287,8 @@ const FilterParser = class {
} }
} }
parseHostnameList(s) { parseHostnameList(parser, s) {
if ( this.reHasUnicode.test(s) ) { if ( parser.optionHasUnicode() ) {
const hostnames = s.split('|'); const hostnames = s.split('|');
let i = hostnames.length; let i = hostnames.length;
while ( i-- ) { while ( i-- ) {
@ -2320,106 +2298,74 @@ const FilterParser = class {
} }
s = hostnames.join('|'); s = hostnames.join('|');
} }
// TODO: revisit
if ( this.reBadDomainOptChars.test(s) ) { return ''; } if ( this.reBadDomainOptChars.test(s) ) { return ''; }
return s; return s;
} }
parseOptions(s) { parseOptions(parser) {
this.fopts = s; for ( let { id, val, not, bad } of parser.options() ) {
for ( let opt of s.split(/\s*,\s*/) ) { if ( bad ) { return false; }
const not = opt.startsWith('~'); switch ( id ) {
if ( not ) { case parser.OPTToken3p:
opt = opt.slice(1);
}
if ( opt === 'third-party' || opt === '3p' ) {
this.parsePartyOption(false, not); this.parsePartyOption(false, not);
continue; break;
} case parser.OPTToken1p:
if ( opt === 'first-party' || opt === '1p' ) {
this.parsePartyOption(true, not); this.parsePartyOption(true, not);
continue; break;
} case parser.OPTTokenAll:
if ( toNormalizedType.hasOwnProperty(opt) ) { this.parseTypeOption(-1);
this.parseTypeOption(opt, not); break;
continue;
}
// https://github.com/gorhill/uBlock/issues/2294
// Detect and discard filter if domain option contains nonsensical
// characters.
if ( opt.startsWith('domain=') ) {
this.domainOpt = this.parseHostnameList(opt.slice(7));
if ( this.domainOpt === '' ) {
this.unsupported = true;
break;
}
continue;
}
if ( opt.startsWith('denyallow=') ) {
this.denyallow = this.parseHostnameList(opt.slice(10));
if ( this.denyallow === '' ) {
this.unsupported = true;
break;
}
continue;
}
if ( opt === 'important' ) {
this.important = Important;
continue;
}
if ( /^redirect(?:-rule)?=/.test(opt) ) {
if ( this.redirect !== 0 ) {
this.unsupported = true;
break;
}
this.redirect = opt.charCodeAt(8) === 0x3D /* '=' */ ? 1 : 2;
continue;
}
if (
opt.startsWith('csp=') &&
opt.length > 4 &&
this.reBadCSP.test(opt) === false
) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.data = opt.slice(4).trim();
continue;
}
if ( opt === 'csp' && this.action === AllowAction ) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.data = '';
continue;
}
// Used by Adguard:
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#empty-modifier
if ( opt === 'empty' || opt === 'mp4' ) {
if ( this.redirect !== 0 ) {
this.unsupported = true;
break;
}
this.redirect = 1;
continue;
}
// https://github.com/uBlockOrigin/uAssets/issues/192 // https://github.com/uBlockOrigin/uAssets/issues/192
if ( opt === 'badfilter' ) { case parser.OPTTokenBadfilter:
this.badFilter = true; this.badFilter = true;
continue; break;
} case parser.OPTTokenCsp:
this.typeBits = bitFromType('data');
this.dataType = 'csp';
if ( val !== undefined ) {
if ( this.reBadCSP.test(val) ) { return false; }
this.data = val;
} else if ( this.action === AllowAction ) {
this.data = '';
}
break;
// https://github.com/gorhill/uBlock/issues/2294
// Detect and discard filter if domain option contains nonsensical
// characters.
case parser.OPTTokenDomain:
this.domainOpt = this.parseHostnameList(parser, val);
if ( this.domainOpt === '' ) { return false; }
break;
case parser.OPTTokenDenyAllow:
this.denyallow = this.parseHostnameList(parser, val);
if ( this.denyallow === '' ) { return false; }
break;
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/ // https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
// Add support for `elemhide`. Rarely used but it happens. // Add support for `elemhide`. Rarely used but it happens.
if ( opt === 'elemhide' || opt === 'ehide' ) { case parser.OPTTokenEhide:
this.parseTypeOption('specifichide', not); this.parseTypeOption(parser.OPTTokenShide, not);
this.parseTypeOption('generichide', not); this.parseTypeOption(parser.OPTTokenGhide, not);
continue; break;
case parser.OPTTokenImportant:
this.important = Important;
break;
// Used by Adguard:
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#empty-modifier
case parser.OPTTokenEmpty:
case parser.OPTTokenMp4:
case parser.OPTTokenRedirect:
case parser.OPTTokenRedirectRule:
if ( this.redirect !== 0 ) { return false; }
this.redirect = id === parser.OPTTokenRedirectRule ? 2 : 1;
break;
default:
if ( this.tokenIdToNormalizedType.has(id) === false ) {
return false;
}
this.parseTypeOption(id, not);
break;
} }
// Unrecognized filter option: ignore whole filter.
this.unsupported = true;
break;
}
// Redirect rules can't be exception filters.
if ( this.redirect !== 0 && this.action !== BlockAction ) {
this.unsupported = true;
} }
// Negated network types? Toggle on all network type bits. // Negated network types? Toggle on all network type bits.
@ -2429,9 +2375,7 @@ const FilterParser = class {
} }
if ( this.notTypes !== 0 ) { if ( this.notTypes !== 0 ) {
this.typeBits &= ~this.notTypes; this.typeBits &= ~this.notTypes;
if ( this.typeBits === 0 ) { if ( this.typeBits === 0 ) { return false; }
this.unsupported = true;
}
} }
// https://github.com/gorhill/uBlock/issues/2283 // https://github.com/gorhill/uBlock/issues/2283
@ -2439,193 +2383,99 @@ const FilterParser = class {
// toggle off `unsupported` bit. // toggle off `unsupported` bit.
if ( this.typeBits & unsupportedTypeBit ) { if ( this.typeBits & unsupportedTypeBit ) {
this.typeBits &= ~unsupportedTypeBit; this.typeBits &= ~unsupportedTypeBit;
if ( this.typeBits === 0 ) { if ( this.typeBits === 0 ) { return false; }
this.unsupported = true;
}
} }
return true;
} }
// TODO: use charCodeAt where possible. parse(parser) {
parse(raw) {
// important! // important!
this.reset(); this.reset();
let s = this.raw = raw.trim(); if ( parser.hasError() ) {
if ( s.length === 0 ) {
this.invalid = true; this.invalid = true;
return this; return this;
} }
// Filters which are a single alphanumeric character are discarded // Filters which pattern is a single character other than `*` and have
// as unsupported. // no narrowing options are discarded as invalid.
if ( s.length === 1 && /[0-9a-z]/i.test(s) ) { if ( parser.patternIsDubious() ) {
this.unsupported = true; this.invalid = true;
return this; return this;
} }
// plain hostname? (from HOSTS file)
if ( this.reHostnameRule1.test(s) ) {
this.f = s.toLowerCase();
this.isPureHostname = true;
this.anchor |= 0b100;
return this;
}
// element hiding filter?
let pos = s.indexOf('#');
if ( pos !== -1 ) {
const c = s.charAt(pos + 1);
if ( c === '#' || c === '@' ) {
console.error('static-net-filtering.js > unexpected cosmetic filters');
this.invalid = true;
return this;
}
}
// block or allow filter? // block or allow filter?
// Important: this must be executed before parsing options // Important: this must be executed before parsing options
if ( s.startsWith('@@') ) { if ( parser.isException() ) {
this.action = AllowAction; this.action = AllowAction;
s = s.slice(2);
} }
// options this.isPureHostname = parser.patternIsPlainHostname();
// https://github.com/gorhill/uBlock/issues/842
// - ensure sure we are not dealing with a regex-based filter.
// - lookup the last occurrence of `$`.
if (
s.charCodeAt(0) !== 0x2F /* '/' */ ||
s.charCodeAt(s.length - 1) !== 0x2F /* '/' */
) {
pos = s.lastIndexOf('$');
if ( pos !== -1 ) {
// https://github.com/gorhill/uBlock/issues/952
// Discard Adguard-specific `$$` filters.
if ( s.indexOf('$$') !== -1 ) {
this.unsupported = true;
return this;
}
this.parseOptions(s.slice(pos + 1).trim());
if ( this.unsupported ) { return this; }
s = s.slice(0, pos);
}
}
// regex? // Plain hostname? (from HOSTS file)
if ( if ( this.isPureHostname && parser.hasOptions() === false ) {
s.length > 2 && this.pattern = parser.patternToLowercase();
s.charCodeAt(0) === 0x2F /* '/' */ && this.anchor |= 0b100;
s.charCodeAt(s.length - 1) === 0x2F /* '/' */
) {
this.isRegex = true;
this.f = s.slice(1, -1);
// https://github.com/gorhill/uBlock/issues/1246
// If the filter is valid, use the corrected version of the
// source string -- this ensure reverse-lookup will work fine.
this.f = this.normalizeRegexSource(this.f);
if ( this.f === '' ) {
this.unsupported = true;
}
return this; return this;
} }
// hostname-anchored // options
if ( s.startsWith('||') ) { if ( parser.hasOptions() && this.parseOptions(parser) === false ) {
this.anchor |= 0b100;
s = s.slice(2);
// convert hostname to punycode if needed
// https://github.com/gorhill/uBlock/issues/2599
if ( this.reHasUnicode.test(s) ) {
const matches = this.reIsolateHostname.exec(s);
if ( matches ) {
s = (matches[1] !== undefined ? matches[1] : '') +
punycode.toASCII(matches[2]) +
matches[3];
}
}
// https://github.com/chrisaljoudi/uBlock/issues/1096
if ( s.startsWith('^') ) {
this.unsupported = true;
return this;
}
// plain hostname? (from ABP filter list)
// https://github.com/gorhill/uBlock/issues/1757
// A filter can't be a pure-hostname one if there is a domain or
// csp option present.
if ( this.reHostnameRule2.test(s) ) {
if ( s.charCodeAt(s.length - 1) === 0x5E /* '^' */ ) {
s = s.slice(0, -1);
}
this.f = s.toLowerCase();
this.isPureHostname = true;
return this;
}
}
// left-anchored
else if ( s.startsWith('|') ) {
this.anchor |= 0x2;
s = s.slice(1);
}
// right-anchored
if ( s.endsWith('|') ) {
this.anchor |= 0x1;
s = s.slice(0, -1);
}
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
// Remove pointless leading *.
// https://github.com/gorhill/uBlock/issues/3034
// We can remove anchoring if we need to match all at the start.
if ( s.startsWith('*') ) {
s = s.replace(/^\*+([^%0-9a-z])/i, '$1');
this.anchor &= ~0x6;
}
// Remove pointless trailing *
// https://github.com/gorhill/uBlock/issues/3034
// We can remove anchoring if we need to match all at the end.
if ( s.endsWith('*') ) {
s = s.replace(/([^%0-9a-z])\*+$/i, '$1');
this.anchor &= ~0x1;
}
// nothing left?
if ( s === '' ) {
s = '*';
}
// TODO: remove once redirect rules with `*/*` pattern are no longer
// used.
else if ( this.redirect !== 0 && s === '/' ) {
s = '*';
}
// https://github.com/gorhill/uBlock/issues/1047
// Hostname-anchored makes no sense if matching all requests.
if ( s === '*' ) {
this.anchor = 0;
}
this.firstWildcardPos = s.indexOf('*');
if ( this.firstWildcardPos !== -1 ) {
this.secondWildcardPos = s.indexOf('*', this.firstWildcardPos + 1);
}
this.firstCaretPos = s.indexOf('^');
if ( this.firstCaretPos !== -1 ) {
this.secondCaretPos = s.indexOf('^', this.firstCaretPos + 1);
}
if ( s.length > 1024 ) {
this.unsupported = true; this.unsupported = true;
return this; return this;
} }
this.f = s.toLowerCase(); // regex?
if ( parser.patternIsRegex() ) {
this.isRegex = true;
// https://github.com/gorhill/uBlock/issues/1246
// If the filter is valid, use the corrected version of the
// source string -- this ensure reverse-lookup will work fine.
this.pattern = this.normalizeRegexSource(parser.getPattern());
if ( this.pattern === '' ) {
this.unsupported = true;
}
return this;
}
let pattern;
if ( parser.patternIsMatchAll() ) {
pattern = '*';
} else {
pattern = parser.patternToLowercase();
}
if ( parser.patternIsLeftHostnameAnchored() ) {
this.anchor |= 0b100;
} else if ( parser.patternIsLeftAnchored() ) {
this.anchor |= 0b010;
}
if ( parser.patternIsRightAnchored() ) {
this.anchor |= 0b001;
}
if ( parser.patternHasWildcard() ) {
this.firstWildcardPos = pattern.indexOf('*');
if ( this.firstWildcardPos !== -1 ) {
this.secondWildcardPos =
pattern.indexOf('*', this.firstWildcardPos + 1);
}
}
if ( parser.patternHasCaret() ) {
this.firstCaretPos = pattern.indexOf('^');
if ( this.firstCaretPos !== -1 ) {
this.secondCaretPos =
pattern.indexOf('^', this.firstCaretPos + 1);
}
}
if ( pattern.length > 1024 ) {
this.unsupported = true;
return this;
}
this.pattern = pattern;
return this; return this;
} }
@ -2635,41 +2485,24 @@ const FilterParser = class {
// are not good. Avoid if possible. This has a significant positive // are not good. Avoid if possible. This has a significant positive
// impact on performance. // impact on performance.
makeToken() { makeToken(parser) {
if ( this.isRegex ) { if ( this.isRegex ) {
this.extractTokenFromRegex(); return this.extractTokenFromRegex();
return;
} }
if ( this.f === '*' ) { return; } const match = this.findGoodToken(parser);
const matches = this.findGoodToken(); if ( match === null ) { return; }
if ( matches === null ) { return; } this.token = match.token;
this.token = matches[0];
this.tokenHash = urlTokenizer.tokenHashFromString(this.token); this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = matches.index; this.tokenBeg = match.pos;
} }
findGoodToken() { // Note: a one-char token is better than a documented bad token.
this.reGoodToken.lastIndex = 0; findGoodToken(parser) {
const s = this.f;
let bestMatch = null; let bestMatch = null;
let bestBadness = 0; let bestBadness = 0;
let match; for ( const match of parser.patternTokens() ) {
while ( (match = this.reGoodToken.exec(s)) !== null ) { const badness = match.token.length > 1
const token = match[0]; ? this.badTokens.get(match.token) || 0
// https://github.com/gorhill/uBlock/issues/997
// Ignore token if preceded by wildcard.
const pos = match.index;
if (
pos !== 0 &&
s.charCodeAt(pos - 1) === 0x2A /* '*' */ ||
token.length < this.maxTokenLen &&
s.charCodeAt(pos + token.length) === 0x2A /* '*' */
) {
continue;
}
// A one-char token is better than a documented bad token.
const badness = token.length > 1
? this.badTokens.get(token) || 0
: 1; : 1;
if ( badness === 0 ) { return match; } if ( badness === 0 ) { return match; }
if ( bestBadness === 0 || badness < bestBadness ) { if ( bestBadness === 0 || badness < bestBadness ) {
@ -2685,7 +2518,7 @@ const FilterParser = class {
// a regex-based filter. // a regex-based filter.
extractTokenFromRegex() { extractTokenFromRegex() {
this.reRegexToken.lastIndex = 0; this.reRegexToken.lastIndex = 0;
const s = this.f; const s = this.pattern;
let matches; let matches;
while ( (matches = this.reRegexToken.exec(s)) !== null ) { while ( (matches = this.reRegexToken.exec(s)) !== null ) {
const prefix = s.slice(0, matches.index); const prefix = s.slice(0, matches.index);
@ -2712,9 +2545,9 @@ const FilterParser = class {
this.dataType === undefined && this.dataType === undefined &&
this.denyallow === '' && this.denyallow === '' &&
this.domainOpt !== '' && ( this.domainOpt !== '' && (
this.f === '*' || ( this.pattern === '*' || (
this.anchor === 0b010 && this.anchor === 0b010 &&
/^(?:http[s*]?:(?:\/\/)?)$/.test(this.f) /^(?:http[s*]?:(?:\/\/)?)$/.test(this.pattern)
) )
) && ) &&
this.domainOpt.indexOf('~') === -1; this.domainOpt.indexOf('~') === -1;
@ -2778,15 +2611,15 @@ FilterParser.parse = (( ) => {
ttlTimer = vAPI.setTimeout(ttlProcess, 10007); ttlTimer = vAPI.setTimeout(ttlProcess, 10007);
}; };
return s => { return p => {
if ( parser === undefined ) { if ( parser === undefined ) {
parser = new FilterParser(); parser = new FilterParser(p);
} }
last = Date.now(); last = Date.now();
if ( ttlTimer === undefined ) { if ( ttlTimer === undefined ) {
ttlTimer = vAPI.setTimeout(ttlProcess, 10007); ttlTimer = vAPI.setTimeout(ttlProcess, 10007);
} }
return parser.parse(s); return parser.parse(p);
}; };
})(); })();
@ -3072,10 +2905,10 @@ FilterContainer.prototype.fromSelfie = function(path) {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.compile = function(raw, writer) { FilterContainer.prototype.compile = function(parser, writer) {
// ORDER OF TESTS IS IMPORTANT! // ORDER OF TESTS IS IMPORTANT!
const parsed = FilterParser.parse(raw); const parsed = FilterParser.parse(parser);
// Ignore non-static network filters // Ignore non-static network filters
if ( parsed.invalid ) { return false; } if ( parsed.invalid ) { return false; }
@ -3086,20 +2919,20 @@ FilterContainer.prototype.compile = function(raw, writer) {
µb.logger.writeOne({ µb.logger.writeOne({
realm: 'message', realm: 'message',
type: 'error', type: 'error',
text: `Invalid network filter in ${who}: ${raw}` text: `Invalid network filter in ${who}: ${parser.raw}`
}); });
return false; return false;
} }
// Redirect rule // Redirect rule
if ( parsed.redirect !== 0 ) { if ( parsed.redirect !== 0 ) {
const result = this.compileRedirectRule(parsed, writer); const result = this.compileRedirectRule(parser.raw, parsed.badFilter, writer);
if ( result === false ) { if ( result === false ) {
const who = writer.properties.get('assetKey') || '?'; const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({ µb.logger.writeOne({
realm: 'message', realm: 'message',
type: 'error', type: 'error',
text: `Invalid redirect rule in ${who}: ${raw}` text: `Invalid redirect rule in ${who}: ${parser.raw}`
}); });
return false; return false;
} }
@ -3116,11 +2949,13 @@ FilterContainer.prototype.compile = function(raw, writer) {
parsed.dataType === undefined parsed.dataType === undefined
) { ) {
parsed.tokenHash = this.dotTokenHash; parsed.tokenHash = this.dotTokenHash;
this.compileToAtomicFilter(parsed, parsed.f, writer); this.compileToAtomicFilter(parsed, parsed.pattern, writer);
return true; return true;
} }
parsed.makeToken(); if ( parser.patternIsMatchAll() === false ) {
parsed.makeToken(parser);
}
// Special pattern/option cases: // Special pattern/option cases:
// - `*$domain=...` // - `*$domain=...`
@ -3131,9 +2966,9 @@ FilterContainer.prototype.compile = function(raw, writer) {
// are entries in the `domain=` option. // are entries in the `domain=` option.
if ( parsed.isJustOrigin() ) { if ( parsed.isJustOrigin() ) {
const tokenHash = parsed.tokenHash; const tokenHash = parsed.tokenHash;
if ( parsed.f === '*' || parsed.f.startsWith('http*') ) { if ( parsed.pattern === '*' || parsed.pattern.startsWith('http*') ) {
parsed.tokenHash = this.anyTokenHash; parsed.tokenHash = this.anyTokenHash;
} else if /* 'https:' */ ( parsed.f.startsWith('https') ) { } else if /* 'https:' */ ( parsed.pattern.startsWith('https') ) {
parsed.tokenHash = this.anyHTTPSTokenHash; parsed.tokenHash = this.anyHTTPSTokenHash;
} else /* 'http:' */ { } else /* 'http:' */ {
parsed.tokenHash = this.anyHTTPTokenHash; parsed.tokenHash = this.anyHTTPTokenHash;
@ -3251,10 +3086,10 @@ FilterContainer.prototype.compileToAtomicFilter = function(
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.compileRedirectRule = function(parsed, writer) { FilterContainer.prototype.compileRedirectRule = function(raw, badFilter, writer) {
const redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw); const redirects = µb.redirectEngine.compileRuleFromStaticFilter(raw);
if ( Array.isArray(redirects) === false ) { return false; } if ( Array.isArray(redirects) === false ) { return false; }
writer.select(parsed.badFilter ? 1 : 0); writer.select(badFilter ? 1 : 0);
const type = typeNameToTypeValue.redirect; const type = typeNameToTypeValue.redirect;
for ( const redirect of redirects ) { for ( const redirect of redirects ) {
writer.push([ type, redirect ]); writer.push([ type, redirect ]);

View File

@ -799,60 +799,36 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
// https://adblockplus.org/en/filters // https://adblockplus.org/en/filters
const staticNetFilteringEngine = this.staticNetFilteringEngine; const staticNetFilteringEngine = this.staticNetFilteringEngine;
const staticExtFilteringEngine = this.staticExtFilteringEngine; const staticExtFilteringEngine = this.staticExtFilteringEngine;
const reIsWhitespaceChar = /\s/;
const reMaybeLocalIp = /^[\d:f]/;
const reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/;
const reLocalIp = /^(?:(0\.0\.0\.)?0|127\.0\.0\.1|::1?|fe80::1%lo0)\s+/;
const lineIter = new this.LineIterator(this.processDirectives(rawText)); const lineIter = new this.LineIterator(this.processDirectives(rawText));
const parser = new vAPI.StaticFilteringParser();
parser.setMaxTokenLength(this.urlTokenizer.MAX_TOKEN_LENGTH);
while ( lineIter.eot() === false ) { while ( lineIter.eot() === false ) {
let line = lineIter.next().trim(); let line = lineIter.next();
if ( line.length === 0 ) { continue; }
while ( line.endsWith(' \\') ) { while ( line.endsWith(' \\') ) {
if ( lineIter.peek(4) !== ' ' ) { break; } if ( lineIter.peek(4) !== ' ' ) { break; }
line = line.slice(0, -2).trim() + lineIter.next().trim(); line = line.slice(0, -2).trim() + lineIter.next().trim();
} }
// Strip comments parser.analyze(line);
const c = line.charAt(0);
if ( c === '!' || c === '[' ) { continue; }
// Parse or skip cosmetic filters if ( parser.shouldIgnore() ) { continue; }
// All cosmetic filters are caught here
if ( staticExtFilteringEngine.compile(line, writer) ) { continue; }
// Whatever else is next can be assumed to not be a cosmetic filter if ( parser.category === parser.CATStaticExtFilter ) {
staticExtFilteringEngine.compile(parser, writer);
// Most comments start in first column continue;
if ( c === '#' ) { continue; }
// Catch comments somewhere on the line
// Remove:
// ... #blah blah blah
// ... # blah blah blah
// Don't remove:
// ...#blah blah blah
// because some ABP filters uses the `#` character (URL fragment)
const pos = line.indexOf('#');
if ( pos !== -1 && reIsWhitespaceChar.test(line.charAt(pos - 1)) ) {
line = line.slice(0, pos).trim();
} }
// https://github.com/gorhill/httpswitchboard/issues/15 if ( parser.category !== parser.CATStaticNetFilter ) { continue; }
// Ensure localhost et al. don't end up in the ubiquitous blacklist.
// With hosts files, we need to remove local IP redirection // https://github.com/gorhill/uBlock/issues/2599
if ( reMaybeLocalIp.test(c) ) { // convert hostname to punycode if needed
// Ignore hosts file redirect configuration if ( parser.patternHasUnicode() ) {
// 127.0.0.1 localhost parser.toPunycode();
// 255.255.255.255 broadcasthost
if ( reIsLocalhostRedirect.test(line) ) { continue; }
line = line.replace(reLocalIp, '').trim();
} }
staticNetFilteringEngine.compile(parser, writer);
if ( line.length === 0 ) { continue; }
staticNetFilteringEngine.compile(line, writer);
} }
return writer.toString(); return writer.toString();