From 01b1ed9a982965378d732ab0cb4bcd68727fe910 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Thu, 4 Jun 2020 07:18:54 -0400 Subject: [PATCH] Add a new static filtering parser A new standalone static filtering parser is introduced, vAPI.StaticFilteringParser. It's purpose is to parse line of text into representation suitable for compiling filters. It can additionally serves for syntax highlighting purpose. As a side effect, this solves: - https://github.com/uBlockOrigin/uBlock-issues/issues/1038 This is a first draft, there are more work left to do to further perfect the implementation and extend its capabilities, especially those useful to assist filter authors. For the time being, this commits break line-continuation syntax highlighting -- which was already flaky prior to this commit anyway. --- src/1p-filters.html | 1 + src/asset-viewer.html | 1 + src/background.html | 3 +- src/css/codemirror.css | 10 + src/js/codemirror/ubo-static-filtering.js | 234 ++-- src/js/reverselookup.js | 5 +- src/js/static-ext-filtering.js | 80 +- src/js/static-filtering-parser.js | 1518 +++++++++++++++++++++ src/js/static-net-filtering.js | 577 +++----- src/js/storage.js | 56 +- 10 files changed, 1917 insertions(+), 568 deletions(-) create mode 100644 src/js/static-filtering-parser.js diff --git a/src/1p-filters.html b/src/1p-filters.html index 27a2c81d9..fbb016f57 100644 --- a/src/1p-filters.html +++ b/src/1p-filters.html @@ -54,6 +54,7 @@ + diff --git a/src/asset-viewer.html b/src/asset-viewer.html index e4aedc4d8..b54ec865b 100644 --- a/src/asset-viewer.html +++ b/src/asset-viewer.html @@ -45,6 +45,7 @@ body { + diff --git a/src/background.html b/src/background.html index 8db736597..54a8e82d7 100644 --- a/src/background.html +++ b/src/background.html @@ -26,8 +26,9 @@ - + + diff --git a/src/css/codemirror.css b/src/css/codemirror.css index 5d3eafdc7..eda06a225 100644 --- a/src/css/codemirror.css +++ b/src/css/codemirror.css @@ -22,7 +22,17 @@ word-break: break-all; } +/* CodeMirror theme overrides */ +.cm-s-default .cm-string-2 { color: #a30; } .cm-s-default .cm-comment { color: #777; } +.cm-s-default .cm-keyword { color: #90b; } +.cm-s-default .cm-error, +.CodeMirror-linebackground.error { + background-color: #ff000018; + text-decoration: underline red; + text-underline-position: under; + } + .cm-directive { color: #333; font-weight: bold; } .cm-staticext { color: #008; } .cm-staticnetBlock { color: #800; } diff --git a/src/js/codemirror/ubo-static-filtering.js b/src/js/codemirror/ubo-static-filtering.js index 08bea84a3..6b2173dd0 100644 --- a/src/js/codemirror/ubo-static-filtering.js +++ b/src/js/codemirror/ubo-static-filtering.js @@ -24,117 +24,155 @@ 'use strict'; CodeMirror.defineMode("ubo-static-filtering", function() { - const reDirective = /^\s*!#(?:if|endif|include)\b/; - const reComment1 = /^\s*!/; - const reComment2 = /^\s*#/; - const reExt = /(#@?(?:\$\??|\?)?#)(?!##)/; - const reNet = /^\s*(?:@@)?.*(?:(\$)(?:[^$]+)?)?$/; - let lineStyle = null; - let anchorOptPos = null; - - const lines = []; - let iLine = 0; + const parser = new vAPI.StaticFilteringParser(true); + const reDirective = /^!#(?:if|endif|include)\b/; + let parserSlot = 0; + let netOptionValueMode = false; - const lineFromLineBuffer = function() { - return lines.length === 1 - ? lines[0] - : lines.filter(a => a.replace(/^\s*|\s+\\$/g, '')).join(''); - }; - - const parseExtFilter = function() { - lineStyle = 'staticext'; - for ( let i = 0; i < lines.length; i++ ) { - const match = reExt.exec(lines[i]); - if ( match === null ) { continue; } - anchorOptPos = { y: i, x: match.index, l: match[1].length }; - break; + const colorSpan = function(stream) { + if ( parser.category === parser.CATNone || parser.shouldIgnore() ) { + stream.skipToEnd(); + return 'comment'; } - }; - - const parseNetFilter = function() { - lineStyle = lineFromLineBuffer().startsWith('@@') - ? 'staticnetAllow' - : 'staticnetBlock'; - let i = lines.length; - while ( i-- ) { - const pos = lines[i].lastIndexOf('$'); - if ( pos === -1 ) { continue; } - anchorOptPos = { y: i, x: pos, l: 1 }; - break; + if ( parser.category === parser.CATComment ) { + stream.skipToEnd(); + return reDirective.test(stream.string) + ? 'variable strong' + : 'comment'; } - }; - - const highlight = function(stream) { - if ( anchorOptPos !== null && iLine === anchorOptPos.y ) { - if ( stream.pos === anchorOptPos.x ) { - stream.pos += anchorOptPos.l; - return `${lineStyle} staticOpt`; + if ( (parser.slices[parserSlot] & parser.BITIgnore) !== 0 ) { + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return 'comment'; + } + if ( (parser.slices[parserSlot] & parser.BITError) !== 0 ) { + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return 'error'; + } + if ( parser.category === parser.CATStaticExtFilter ) { + if ( parserSlot < parser.optionsAnchorSpan.i ) { + const style = (parser.slices[parserSlot] & parser.BITComma) === 0 + ? 'string-2' + : 'def'; + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return style; } - if ( stream.pos < anchorOptPos.x ) { - stream.pos = anchorOptPos.x; - return lineStyle; + if ( + parserSlot >= parser.optionsAnchorSpan.i && + parserSlot < parser.patternSpan.i + ) { + const style = (parser.flavorBits & parser.BITFlavorException) !== 0 + ? 'tag' + : 'def'; + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return `${style} strong`; } + if ( parserSlot >= parser.patternSpan.i ) { + stream.skipToEnd(); + return 'variable'; + } + stream.skipToEnd(); + return ''; + } + if ( parserSlot < parser.exceptionSpan.i ) { + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return ''; + } + if ( + parserSlot === parser.exceptionSpan.i && + parser.exceptionSpan.l !== 0 + ) { + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return 'tag strong'; + } + if ( + parserSlot === parser.patternLeftAnchorSpan.i && + parser.patternLeftAnchorSpan.l !== 0 || + parserSlot === parser.patternRightAnchorSpan.i && + parser.patternRightAnchorSpan.l !== 0 + ) { + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return 'keyword strong'; + } + if ( + parserSlot >= parser.patternSpan.i && + parserSlot < parser.patternRightAnchorSpan.i + ) { + if ( (parser.slices[parserSlot] & (parser.BITAsterisk | parser.BITCaret)) !== 0 ) { + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return 'keyword strong'; + } + const nextSlot = parser.skipUntil( + parserSlot, + parser.patternRightAnchorSpan.i, + parser.BITAsterisk | parser.BITCaret + ); + stream.pos = parser.slices[nextSlot+1]; + parserSlot = nextSlot; + return 'variable'; + } + if ( + parserSlot === parser.optionsAnchorSpan.i && + parser.optionsAnchorSpan.l !== 0 + ) { + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return 'def strong'; + } + if ( + parserSlot >= parser.optionsSpan.i && + parser.optionsSpan.l !== 0 + ) { + const bits = parser.slices[parserSlot]; + let style; + if ( (bits & parser.BITComma) !== 0 ) { + style = 'def strong'; + netOptionValueMode = false; + } else if ( (bits & parser.BITTilde) !== 0 ) { + style = 'keyword strong'; + } else if ( (bits & parser.BITPipe) !== 0 ) { + style = 'def'; + } else if ( netOptionValueMode ) { + style = 'string-2'; + } else if ( (bits & parser.BITEqual) !== 0 ) { + netOptionValueMode = true; + } + stream.pos += parser.slices[parserSlot+2]; + parserSlot += 3; + return style || 'def'; + } + if ( + parserSlot >= parser.commentSpan.i && + parser.commentSpan.l !== 0 + ) { + stream.skipToEnd(); + return 'comment'; } stream.skipToEnd(); - return lineStyle; - }; - - const parseMultiLine = function() { - anchorOptPos = null; - const line = lineFromLineBuffer(); - if ( reDirective.test(line) ) { - lineStyle = 'directive'; - return; - } - if ( reComment1.test(line) ) { - lineStyle = 'comment'; - return; - } - if ( line.indexOf('#') !== -1 ) { - if ( reExt.test(line) ) { - return parseExtFilter(); - } - if ( reComment2.test(line) ) { - lineStyle = 'comment'; - return; - } - } - if ( reNet.test(line) ) { - return parseNetFilter(); - } - lineStyle = null; + return ''; }; return { - startState: function() { - }, token: function(stream) { - if ( iLine === lines.length || stream.string !== lines[iLine] ) { - iLine = 0; + if ( stream.sol() ) { + parser.analyze(stream.string); + parser.analyzeExtra(stream.string); + parserSlot = 0; + netOptionValueMode = false; } - if ( iLine === 0 ) { - if ( lines.length > 1 ) { - lines.length = 1; - } - let line = stream.string; - lines[0] = line; - if ( line.endsWith(' \\') ) { - do { - line = stream.lookAhead(lines.length); - if ( - line === undefined || - line.startsWith(' ') === false - ) { break; } - lines.push(line); - } while ( line.endsWith(' \\') ); - } - parseMultiLine(); + let style = colorSpan(stream); + if ( (parser.flavorBits & parser.BITFlavorError) !== 0 ) { + style += ' line-background-error'; } - const style = highlight(stream); - if ( stream.eol() ) { - iLine += 1; - } - return style; + style = style.trim(); + return style !== '' ? style : null; }, }; }); diff --git a/src/js/reverselookup.js b/src/js/reverselookup.js index 1bda3f676..2b6e59f36 100644 --- a/src/js/reverselookup.js +++ b/src/js/reverselookup.js @@ -135,7 +135,10 @@ const fromNetFilter = async function(rawFilter) { const µb = µBlock; const writer = new µb.CompiledLineIO.Writer(); - if ( µb.staticNetFilteringEngine.compile(rawFilter, writer) === false ) { + const parser = new vAPI.StaticFilteringParser(); + parser.analyze(rawFilter); + + if ( µb.staticNetFilteringEngine.compile(parser, writer) === false ) { return; } diff --git a/src/js/static-ext-filtering.js b/src/js/static-ext-filtering.js index 43806e761..1b1533d95 100644 --- a/src/js/static-ext-filtering.js +++ b/src/js/static-ext-filtering.js @@ -52,7 +52,6 @@ µBlock.staticExtFilteringEngine = (( ) => { const µb = µBlock; - const reHasUnicode = /[^\x00-\x7F]/; const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/; const emptyArray = []; const parsed = { @@ -142,23 +141,16 @@ : `${selector}:style(${style})`; }; - const hostnamesFromPrefix = function(s) { + const hostnamesFromPrefix = function(parser) { const hostnames = []; - const hasUnicode = reHasUnicode.test(s); - let beg = 0; - while ( beg < s.length ) { - let end = s.indexOf(',', beg); - if ( end === -1 ) { end = s.length; } - let hostname = s.slice(beg, end).trim(); - if ( hostname.length !== 0 ) { - if ( hasUnicode ) { - hostname = hostname.charCodeAt(0) === 0x7E /* '~' */ - ? '~' + punycode.toASCII(hostname.slice(1)) - : punycode.toASCII(hostname); - } - hostnames.push(hostname); + const hasUnicode = parser.optionHasUnicode(); + for ( let { hn, not } of parser.options() ) { + hn = hn.trim(); + if ( hn.length === 0 ) { continue; } + if ( hasUnicode ) { + hn = punycode.toASCII(hn); } - beg = end + 1; + hostnames.push(not ? `~${hn}` : hn); } return hostnames; }; @@ -844,34 +836,16 @@ return entryPoint; })(); - api.compile = function(raw, writer) { - let lpos = raw.indexOf('#'); - if ( lpos === -1 ) { return false; } - let rpos = lpos + 1; - if ( raw.charCodeAt(rpos) !== 0x23 /* '#' */ ) { - rpos = raw.indexOf('#', rpos + 1); - if ( rpos === -1 ) { return false; } - } + api.compile = function(parser, writer) { + if ( parser.category !== parser.CATStaticExtFilter ) { return false; } - // https://github.com/AdguardTeam/AdguardFilters/commit/4fe02d73cee6 - // AdGuard also uses `$?` to force inline-based style rather than - // stylesheet-based style. - // Coarse-check that the anchor is valid. - // `##`: l === 1 - // `#@#`, `#$#`, `#%#`, `#?#`: l === 2 - // `#@$#`, `#@%#`, `#@?#`, `#$?#`: l === 3 - // `#@$?#`: l === 4 - const anchorLen = rpos - lpos; - if ( anchorLen > 4 ) { return false; } - if ( - anchorLen > 1 && - /^@?(?:\$\??|%|\?)?$/.test(raw.slice(lpos + 1, rpos)) === false - ) { - return false; + // Adguard's scriptlet injection: not supported. + if ( (parser.flavorBits & parser.BITFlavorUnsupported) !== 0 ) { + return true; } // Extract the selector. - let suffix = raw.slice(rpos + 1).trim(); + let suffix = parser.strFromSpan(parser.patternSpan); if ( suffix.length === 0 ) { return false; } parsed.suffix = suffix; @@ -882,29 +856,21 @@ // We have an Adguard/ABP cosmetic filter if and only if the // character is `$`, `%` or `?`, otherwise it's not a cosmetic // filter. - let cCode = raw.charCodeAt(rpos - 1); - if ( cCode !== 0x23 /* '#' */ && cCode !== 0x40 /* '@' */ ) { - // Adguard's scriptlet injection: not supported. - if ( cCode === 0x25 /* '%' */ ) { return true; } - if ( cCode === 0x3F /* '?' */ && anchorLen > 2 ) { - cCode = raw.charCodeAt(rpos - 2); - } - // Adguard's style injection: translate to uBO's format. - if ( cCode === 0x24 /* '$' */ ) { - suffix = translateAdguardCSSInjectionFilter(suffix); - if ( suffix === '' ) { return true; } - parsed.suffix = suffix; - } + // Adguard's style injection: translate to uBO's format. + if ( (parser.flavorBits & parser.BITFlavorExtStyle) !== 0 ) { + suffix = translateAdguardCSSInjectionFilter(suffix); + if ( suffix === '' ) { return true; } + parsed.suffix = suffix; } // Exception filter? - parsed.exception = raw.charCodeAt(lpos + 1) === 0x40 /* '@' */; + parsed.exception = parser.isException(); // Extract the hostname(s), punycode if required. - if ( lpos === 0 ) { - parsed.hostnames = emptyArray; + if ( parser.hasOptions() ) { + parsed.hostnames = hostnamesFromPrefix(parser); } else { - parsed.hostnames = hostnamesFromPrefix(raw.slice(0, lpos)); + parsed.hostnames = emptyArray; } // Backward compatibility with deprecated syntax. diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js new file mode 100644 index 000000000..bb65f3b69 --- /dev/null +++ b/src/js/static-filtering-parser.js @@ -0,0 +1,1518 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2020-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +/* global punycode */ + +'use strict'; + +/******************************************************************************* + + The goal is for the static filtering parser to avoid external + dependencies[1] to other code in the project. + + [1] Except unavoidable ones, such as punycode. + + Roughly, this is how things work: each input string (passed to analyze()) + is decomposed into a minimal set of distinct slices. Each slice is a + triplet of integers consisiting of: + + - a bit vector describing the characters inside the slice + - an index of where in the origin string the slice starts + - a length for the number of character in the slice + + Slice descriptor are all flatly stored in an array of integer so as to + avoid the need for a secondary data structure. Example: + + raw string: toto.com + toto . com + | | | + slices: [ 65536, 0, 4, 1024, 4, 1, 65536, 5, 3 ] + ^ ^ ^ + | | | + | | +---- number of characters + | +---- index in raw string + +---- bit vector + + Thus the number of slices to describe the `toto.com` string is made of + three slices, encoded into nine integers. + + Once a string has been encoded into slices, the parser will only work + with those slices in order to parse the filter represented by the + string, rather than performing string operations on the original string. + The result is that parsing is essentially number-crunching operations + rather than string operations, for the most part (potentially opening + the door for WASM code in the future to parse static filters). + + The array used to hold the slices is reused across string analysis, in + order to eliminate memory churning. + + Above the slices, there are various span objects used to describe + consecutive sequences of slices and which are filled in as a result + of parsing. + +**/ + +{ +// >>>>> start of local scope + +/******************************************************************************/ + +const Parser = class { + constructor(interactive = false) { + this.interactive = interactive; + this.raw = ''; + this.rawEnd = 0; + this.slices = []; + this.leftSpaceSpan = new Span(); + this.exceptionSpan = new Span(); + this.patternLeftAnchorSpan = new Span(); + this.patternSpan = new Span(); + this.patternRightAnchorSpan = new Span(); + this.optionsAnchorSpan = new Span(); + this.optionsSpan = new Span(); + this.commentSpan = new Span(); + this.rightSpaceSpan = new Span(); + this.eolSpan = new Span(); + this.spans = [ + this.leftSpaceSpan, + this.exceptionSpan, + this.patternLeftAnchorSpan, + this.patternSpan, + this.patternRightAnchorSpan, + this.optionsAnchorSpan, + this.optionsSpan, + this.commentSpan, + this.rightSpaceSpan, + this.eolSpan, + ]; + this.patternTokenIterator = new PatternTokenIterator(this); + this.netOptionsIterator = new NetOptionsIterator(this); + this.extOptionsIterator = new ExtOptionsIterator(this); + this.maxTokenLength = Number.MAX_SAFE_INTEGER; + this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/; + this.reset(); + } + + reset() { + this.rawPos = 0; + this.sliceWritePtr = 0; + this.category = CATNone; + this.allBits = 0; // bits found in any slices + this.patternBits = 0; // bits found in any pattern slices + this.optionsBits = 0; // bits found in any option slices + this.flavorBits = 0; + for ( const span of this.spans ) { span.reset(); } + this.pattern = ''; + } + + analyze(raw) { + this.slice(raw); + let slot = this.leftSpaceSpan.l; + if ( slot === this.rightSpaceSpan.i ) { return; } + + // test for `!`, `#`, or `[` + if ( hasBits(this.slices[slot], BITLineComment) ) { + // static extended filter? + if ( hasBits(this.slices[slot], BITHash) ) { + this.analyzeExt(slot); + if ( this.category === CATStaticExtFilter ) { return; } + } + // if not `#`, no ambiguity + this.category = CATComment; + return; + } + + // assume no inline comment + this.commentSpan.i = this.rightSpaceSpan.i; + + // extended filtering with options? + if ( hasBits(this.allBits, BITHash) ) { + let hashSlot = this.findFirstMatch(slot, BITHash); + if ( hashSlot !== -1 ) { + this.analyzeExt(hashSlot); + if ( this.category === CATStaticExtFilter ) { return; } + // inline comment? (a space followed by a hash) + if ( (this.allBits & BITSpace) !== 0 ) { + for (;;) { + if ( hasBits(this.slices[hashSlot-3], BITSpace) ) { + this.commentSpan.i = hashSlot-3; + this.commentSpan.l = this.rightSpaceSpan.i - hashSlot; + break; + } + hashSlot = this.findFirstMatch(hashSlot + 6, BITHash); + if ( hashSlot === -1 ) { break; } + } + } + } + } + // assume network filtering + this.analyzeNet(); + } + + // Use in syntax highlighting contexts + analyzeExtra() { + if ( this.category === CATStaticExtFilter ) { + this.analyzeExtExtra(); + } else if ( this.category === CATStaticNetFilter ) { + this.analyzeNetExtra(); + } + } + + // Static extended filters are all of the form: + // + // 1. options (optional): a comma-separated list of hostnames + // 2. anchor: regex equivalent => /^#@?[\$\??|%|\?)?#$/ + // 3. pattern + // + // Return true if a valid extended filter is found, otherwise false. + // When a valid extended filter is found: + // optionsSpan: first slot which contains options + // optionsAnchorSpan: first slot to anchor + // patternSpan: first slot to pattern + analyzeExt(from) { + let end = this.rightSpaceSpan.i; + // Number of consecutive #s. + const l = this.slices[from+2]; + // More than 3 #s is likely to be a comment in a hosts file. + if ( l > 3 ) { return; } + if ( l !== 1 ) { + // If a space immediately follows 2 #s, assume a comment. + if ( l === 2 ) { + if ( from+3 === end || hasBits(this.slices[from+3], BITSpace) ) { + return; + } + } else /* l === 3 */ { + this.splitSlot(from, 2); + end = this.rightSpaceSpan.i; + } + this.optionsSpan.i = this.leftSpaceSpan.i + this.leftSpaceSpan.l; + this.optionsSpan.l = from - this.optionsSpan.i; + this.optionsAnchorSpan.i = from; + this.optionsAnchorSpan.l = 3; + this.patternSpan.i = from + 3; + this.patternSpan.l = this.rightSpaceSpan.i - this.patternSpan.i; + this.category = CATStaticExtFilter; + return; + } + let flavorBits = 0; + let to = from + 3; + if ( to === end ) { return; } + // #@... + // ^ + if ( hasBits(this.slices[to], BITAt) ) { + if ( this.slices[to+2] !== 1 ) { return; } + flavorBits |= BITFlavorException; + to += 3; if ( to === end ) { return; } + } + // #$... + // ^ + if ( hasBits(this.slices[to], BITDollar) ) { + if ( this.slices[to+2] !== 1 ) { return; } + flavorBits |= BITFlavorExtStyle; + to += 3; if ( to === end ) { return; } + // #$?... + // ^ + if ( hasBits(this.slices[to], BITQuestion) ) { + if ( this.slices[to+2] !== 1 ) { return; } + flavorBits |= BITFlavorExtStrong; + to += 3; if ( to === end ) { return; } + } + } + // #[%?]... + // ^^ + else if ( hasBits(this.slices[to], BITPercent | BITQuestion) ) { + if ( this.slices[to+2] !== 1 ) { return; } + flavorBits |= hasBits(this.slices[to], BITQuestion) + ? BITFlavorExtStrong + : BITFlavorUnsupported; + to += 3; if ( to === end ) { return; } + } + // ##... + // ^ + if ( hasNoBits(this.slices[to], BITHash) ) { return; } + if ( this.slices[to+2] > 1 ) { + this.splitSlot(to, 1); + } + to += 3; + this.optionsSpan.i = this.leftSpaceSpan.i + this.leftSpaceSpan.l; + this.optionsSpan.l = from - this.optionsSpan.i; + this.optionsAnchorSpan.i = from; + this.optionsAnchorSpan.l = to - this.optionsAnchorSpan.i; + this.patternSpan.i = to; + this.patternSpan.l = this.rightSpaceSpan.i - to; + this.flavorBits = flavorBits; + this.category = CATStaticExtFilter; + } + + // Use in syntax highlighting contexts + analyzeExtExtra() { + const { i, l } = this.optionsSpan; + if ( l === 0 ) { return; } + this.analyzeDomainList(i, i + l, BITComma, true); + } + + // Static network filters are all of the form: + // + // 1. exception declarator (optional): `@@` + // 2. left-hand pattern anchor (optional): `||` or `|` + // 3. pattern: a valid pattern, one of + // a regex, starting and ending with `/` + // a sequence of characters with optional wildcard characters + // wildcard `*` : regex equivalent => /./ + // wildcard `^` : regex equivalent => /[^%.0-9a-z_-]|$/ + // 4. right-hand anchor (optional): `|` + // 5. options declarator (optional): `$` + // options: one or more options + // 6. inline comment (optional): ` #` + // + // When a valid static filter is found: + // exceptionSpan: first slice of exception declarator + // patternLeftAnchorSpan: first slice to left-hand pattern anchor + // patternSpan: all slices belonging to pattern + // patternRightAnchorSpan: first slice to right-hand pattern anchor + // optionsAnchorSpan: first slice to options anchor + // optionsSpan: first slice to options + analyzeNet() { + let islice = this.leftSpaceSpan.i; + + // Assume no exception + this.exceptionSpan.i = this.leftSpaceSpan.l; + // Exception? + if ( + islice < this.commentSpan.i && + hasBits(this.slices[islice], BITAt) + ) { + const l = this.slices[islice+2]; + // @@@*, ... => @@, @*, ... + if ( l >= 2 ) { + if ( l > 2 ) { + this.splitSlot(islice, 2); + } + this.exceptionSpan.l = 3; + islice += 3; + this.flavorBits |= BITFlavorException; + } + } + + // Assume no options + this.optionsAnchorSpan.i = this.optionsSpan.i = this.commentSpan.i; + + // Assume all is part of pattern + this.patternSpan.i = islice; + this.patternSpan.l = this.optionsAnchorSpan.i - islice; + + let patternStartIsRegex = + islice < this.optionsAnchorSpan.i && + hasBits(this.slices[islice], BITSlash); + + let patternIsRegex = patternStartIsRegex && ( + this.patternSpan.l === 3 && this.slices[this.patternSpan.i+2] > 2 || + hasBits(this.slices[this.optionsAnchorSpan.i-3], BITSlash) + ); + + // If the pattern is not a regex, there might be options. + if ( patternIsRegex === false ) { + let optionsBits = 0; + let i = this.optionsAnchorSpan.i; + for (;;) { + i -= 3; + if ( i < islice ) { break; } + const bits = this.slices[i]; + if ( hasBits(bits, BITDollar) ) { break; } + optionsBits |= bits; + } + if ( i >= islice ) { + const l = this.slices[i+2]; + if ( l > 1 ) { + // https://github.com/gorhill/uBlock/issues/952 + // AdGuard-specific `$$` filters => unsupported. + if ( this.findFirstOdd(0, BITHostname | BITComma | BITAsterisk) === i ) { + if ( this.interactive ) { + this.markSlices(i, i+3, BITError); + } + this.allBits |= BITError; + this.flavorBits |= BITFlavorError; + } else { + this.splitSlot(i, l - 1); + i += 3; + } + } + this.patternSpan.l = i - this.patternSpan.i; + this.optionsAnchorSpan.i = i; + this.optionsAnchorSpan.l = 3; + i += 3; + this.optionsSpan.i = i; + this.optionsSpan.l = this.commentSpan.i - i; + this.optionsBits = optionsBits; + patternIsRegex = patternStartIsRegex && ( + this.patternSpan.l === 3 && this.slices[this.patternSpan.i+2] > 2 || + hasBits(this.slices[this.optionsAnchorSpan.i-3], BITSlash) + ); + } + } + + // If the pattern is a regex, remember this. + if ( patternIsRegex ) { + this.flavorBits |= BITFlavorNetRegex; + } + + // Refine by processing pattern anchors. + // + // Assume no anchors. + this.patternLeftAnchorSpan.i = this.patternSpan.i; + this.patternRightAnchorSpan.i = this.optionsAnchorSpan.i; + // Not a regex, there might be anchors. + if ( patternIsRegex === false ) { + // Left anchor? + // `|`: anchor to start of URL + // `||`: anchor to left of a hostname label + if ( + this.patternSpan.l !== 0 && + hasBits(this.slices[this.patternSpan.i], BITPipe) + ) { + this.patternLeftAnchorSpan.l = 3; + const l = this.slices[this.patternSpan.i+2]; + // |||*, ... => ||, |*, ... + if ( l > 2 ) { + this.splitSlot(this.patternSpan.i, 2); + } else { + this.patternSpan.l -= 3; + } + this.patternSpan.i += 3; + this.flavorBits |= l === 1 + ? BITFlavorNetLeftURLAnchor + : BITFlavorNetLeftHnAnchor; + } + // Right anchor? + // `|`: anchor to end of URL + // `^`: anchor to end of hostname, when other conditions are + // fulfilled: + // the pattern is hostname-anchored on the left + // the pattern is made only of hostname characters + if ( this.patternSpan.l !== 0 ) { + const lastPatternSlice = this.patternSpan.l > 3 + ? this.patternRightAnchorSpan.i - 3 + : this.patternSpan.i; + const bits = this.slices[lastPatternSlice]; + if ( (bits & BITPipe) !== 0 ) { + this.patternRightAnchorSpan.i = lastPatternSlice; + this.patternRightAnchorSpan.l = 3; + const l = this.slices[this.patternRightAnchorSpan.i+2]; + // ..., ||* => ..., |*, | + if ( l > 1 ) { + this.splitSlot(this.patternRightAnchorSpan.i, l - 1); + this.patternRightAnchorSpan.i += 3; + } else { + this.patternSpan.l -= 3; + } + this.flavorBits |= BITFlavorNetRightURLAnchor; + } else if ( + hasBits(bits, BITCaret) && + this.slices[lastPatternSlice+2] === 1 && + hasBits(this.flavorBits, BITFlavorNetLeftHnAnchor) && + this.skipUntilNot( + this.patternSpan.i, + lastPatternSlice, + BITHostname + ) === lastPatternSlice + ) { + this.patternRightAnchorSpan.i = lastPatternSlice; + this.patternRightAnchorSpan.l = 3; + this.patternSpan.l -= 3; + this.flavorBits |= BITFlavorNetRightHnAnchor; + } + } + } + + // Collate useful pattern bits information for further use. + // + // https://github.com/gorhill/httpswitchboard/issues/15 + // When parsing a hosts file, ensure localhost et al. don't end up + // in the pattern. To accomplish this we establish the rule that + // if a pattern contains space characters, the pattern will be only + // the part following the last space occurrence. + { + const { i, l } = this.patternSpan; + let j = l; + for (;;) { + if ( j === 0 ) { break; } + j -= 3; + const bits = this.slices[i+j]; + if ( hasBits(bits, BITSpace) ) { break; } + this.patternBits |= bits; + } + if ( j !== 0 ) { + this.patternSpan.i += j + 3; + this.patternSpan.l -= j + 3; + if ( this.reIsLocalhostRedirect.test(this.getPattern()) ) { + this.flavorBits |= BITFlavorIgnore; + } + if ( this.interactive ) { + this.markSlices(0, this.patternSpan.i, BITIgnore); + } + } + } + + // Pointless wildcards and anchoring: + // - Eliminate leading wildcard not followed by a pattern token slice + // - Eliminate trailing wildcard not preceded by a pattern token slice + // - Eliminate pattern anchoring when irrelevant + // + // Leading wildcard history: + // https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448 + // Remove pointless leading *. + // https://github.com/gorhill/uBlock/issues/3034 + // We can remove anchoring if we need to match all at the start. + // + // Trailing wildcard history: + // https://github.com/gorhill/uBlock/issues/3034 + // We can remove anchoring if we need to match all at the end. + { + let { i, l } = this.patternSpan; + // Pointless leading wildcard + if ( + l > 3 && + hasBits(this.slices[i], BITAsterisk) && + hasNoBits(this.slices[i+3], BITPatternToken) + ) { + this.slices[i] |= BITIgnore; + i += 3; l -= 3; + this.patternSpan.i = i; + this.patternSpan.l = l; + // We can ignore left-hand pattern anchor + if ( this.patternLeftAnchorSpan.l !== 0 ) { + this.slices[this.patternLeftAnchorSpan.i] |= BITIgnore; + this.flavorBits &= ~BITFlavorNetLeftAnchor; + } + } + // Pointless trailing wildcard + if ( + l > 3 && + hasBits(this.slices[i+l-3], BITAsterisk) && + hasNoBits(this.slices[i+l-6], BITPatternToken) + ) { + // Ignore only if the pattern would not end up looking like + // a regex. + if ( + hasNoBits(this.slices[i], BITSlash) || + hasNoBits(this.slices[i+l-6], BITSlash) + ) { + this.slices[i+l-3] |= BITIgnore; + } + l -= 3; + this.patternSpan.l = l; + // We can ignore right-hand pattern anchor + if ( this.patternRightAnchorSpan.l !== 0 ) { + this.slices[this.patternRightAnchorSpan.i] |= BITIgnore; + this.flavorBits &= ~BITFlavorNetRightAnchor; + } + } + // Pointless left-hand pattern anchoring + if ( + ( + l === 0 || + l !== 0 && hasBits(this.slices[i], BITAsterisk) + ) && + hasBits(this.flavorBits, BITFlavorNetLeftAnchor) + ) { + this.slices[this.patternLeftAnchorSpan.i] |= BITIgnore; + this.flavorBits &= ~BITFlavorNetLeftAnchor; + } + // Pointless right-hand pattern anchoring + if ( + ( + l === 0 || + l !== 0 && hasBits(this.slices[i+l-3], BITAsterisk) + ) && + hasBits(this.flavorBits, BITFlavorNetRightAnchor) + ) { + this.slices[this.patternRightAnchorSpan.i] |= BITIgnore; + this.flavorBits &= ~BITFlavorNetRightAnchor; + } + } + + this.category = CATStaticNetFilter; + } + + analyzeNetExtra() { + for ( const _ of this.options() ) { void _; } + } + + analyzeDomainList(from, to, bitSeparator, canEntity) { + if ( from >= to ) { return; } + let beg = from; + while ( beg < to ) { + let end = this.skipUntil(beg, to, bitSeparator); + if ( end === -1 ) { end = to; } + if ( this.analyzeDomain(beg, end, canEntity) === false ) { + this.markSlices(beg, end, BITError); + } + beg = end + 3; + } + // Dangling separator at the end? + if ( hasBits(this.slices[to-3], bitSeparator) ) { + this.markSlices(to - 3, to, BITError); + } + + } + + analyzeDomain(from, to, canEntity) { + const { slices } = this; + const len = to - from; + if ( len === 0 ) { return false; } + if ( hasBits(slices[from], BITTilde) ) { + if ( canEntity === false || slices[from+2] > 1 ) { return false; } + from += 3; + } + if ( len === 0 ) { return false; } + // First slice must be regex-equivalent of `\w` + if ( hasNoBits(slices[from], BITRegexWord | BITUnicode) ) { return false; } + // Last slice + if ( len > 3 ) { + const last = to - 3; + if ( hasBits(slices[last], BITAsterisk) ) { + if ( + canEntity === false || + len < 9 || + slices[last+2] > 1 || + hasNoBits(slices[last-3], BITPeriod) + ) { + return false; + } + } else if ( hasNoBits(slices[to-3], BITAlphaNum | BITUnicode) ) { + return false; + } + } + // Middle slices + if ( len > 6 ) { + for ( let i = from + 3; i < to - 3; i += 3 ) { + const bits = slices[i]; + if ( hasNoBits(bits, BITHostname) ) { return false; } + if ( hasBits(bits, BITPeriod) && slices[i+2] > 1 ) { return false; } + if ( + hasBits(bits, BITDash) && ( + hasNoBits(slices[i-3], BITRegexWord | BITUnicode) || + hasNoBits(slices[i+3], BITRegexWord | BITUnicode) + ) + ) { + return false; + } + } + } + return true; + } + + slice(raw) { + this.reset(); + this.raw = raw; + this.rawEnd = raw.length; + if ( this.rawEnd === 0 ) { return; } + // All unicode characters are allowed in hostname + const unicodeBits = BITUnicode | BITAlpha; + // Create raw slices + const slices = this.slices; + let ptr = this.sliceWritePtr; + let c = raw.charCodeAt(0); + let aBits = c < 0x80 ? charDescBits[c] : unicodeBits; + slices[ptr+0] = aBits; + slices[ptr+1] = 0; + ptr += 2; + let allBits = aBits; + let i = 0, j = 1; + while ( j < this.rawEnd ) { + c = raw.charCodeAt(j); + const bBits = c < 0x80 ? charDescBits[c] : unicodeBits; + if ( bBits !== aBits ) { + slices[ptr+0] = j - i; + slices[ptr+1] = bBits; + slices[ptr+2] = j; + ptr += 3; + allBits |= bBits; + aBits = bBits; + i = j; + } + j += 1; + } + slices[ptr+0] = j - i; + ptr += 1; + // End-of-line slice + this.eolSpan.i = ptr; + slices[ptr+0] = 0; + slices[ptr+1] = this.rawEnd; + slices[ptr+2] = 0; + ptr += 3; + // Trim left + if ( (slices[0] & BITSpace) !== 0 ) { + this.leftSpaceSpan.l = 3; + } else { + this.leftSpaceSpan.l = 0; + } + // Trim right + const lastSlice = this.eolSpan.i - 3; + if ( + (lastSlice > this.leftSpaceSpan.i) && + (slices[lastSlice] & BITSpace) !== 0 + ) { + this.rightSpaceSpan.i = lastSlice; + this.rightSpaceSpan.l = 3; + } else { + this.rightSpaceSpan.i = this.eolSpan.i; + this.rightSpaceSpan.l = 0; + } + // Quit cleanly + this.sliceWritePtr = ptr; + this.allBits = allBits; + } + + splitSlot(slot, l) { + this.sliceWritePtr += 3; + if ( this.sliceWritePtr > this.slices.length ) { + this.slices.push(0, 0, 0); + } + this.slices.copyWithin(slot + 3, slot, this.sliceWritePtr - 3); + this.slices[slot+3+1] = this.slices[slot+1] + l; + this.slices[slot+3+2] = this.slices[slot+2] - l; + this.slices[slot+2] = l; + for ( const span of this.spans ) { + if ( span.i > slot ) { + span.i += 3; + } + } + } + + markSlices(beg, end, bits) { + while ( beg < end ) { + this.slices[beg] |= bits; + beg += 3; + } + } + + unmarkSlices(beg, end, bits) { + while ( beg < end ) { + this.slices[beg] &= ~bits; + beg += 3; + } + } + + findFirstMatch(from, bits) { + let to = from; + while ( to < this.sliceWritePtr ) { + if ( (this.slices[to] & bits) !== 0 ) { return to; } + to += 3; + } + return -1; + } + + findFirstOdd(from, bits) { + let to = from; + while ( to < this.sliceWritePtr ) { + if ( (this.slices[to] & bits) === 0 ) { return to; } + to += 3; + } + return -1; + } + + skipUntil(from, to, bits) { + let i = from + 3; + for (;;) { + if ( i === to || (this.slices[i] & bits) !== 0 ) { break; } + i += 3; + } + return i; + } + + skipUntilNot(from, to, bits) { + let i = from + 3; + for (;;) { + if ( i === to || (this.slices[i] & bits) === 0 ) { break; } + i += 3; + } + return i; + } + + strFromSlices(from, to) { + return this.raw.slice( + this.slices[from+1], + this.slices[to+1] + this.slices[to+2] + ); + } + + strFromSpan(span) { + if ( span.l === 0 ) { return ''; } + const beg = span.i; + return this.strFromSlices(beg, beg + span.l - 1); + } + + isBlank() { + return this.allBits === BITSpace; + } + + hasOptions() { + return this.optionsSpan.l !== 0; + } + + getPattern() { + if ( this.pattern !== '' ) { return this.pattern; } + const { i, l } = this.patternSpan; + if ( l === 0 ) { return ''; } + let beg = this.slices[i+1]; + let end = this.slices[i+l+1]; + if ( hasBits(this.flavorBits, BITFlavorNetRegex) ) { + beg += 1; end -= 1; + } + this.pattern = this.raw.slice(beg, end); + return this.pattern; + } + + // https://github.com/chrisaljoudi/uBlock/issues/1096 + // Examples of dubious filter content: + // - Single character other than `*` wildcard + patternIsDubious() { + return this.patternSpan.l === 3 && + this.patternBits !== BITAsterisk && + this.optionsSpan.l === 0; + } + + patternIsMatchAll() { + const { l } = this.patternSpan; + return l === 0 || + l === 3 && hasBits(this.patternBits, BITAsterisk); + } + + patternIsPlainHostname() { + if ( + hasBits(this.patternBits, ~BITHostname) || ( + hasBits(this.flavorBits, BITFlavorNetAnchor) && + hasNotAllBits(this.flavorBits, BITFlavorNetHnAnchor) + ) + ) { + return false; + } + const { i, l } = this.patternSpan; + return hasBits(this.slices[i], BITAlphaNum) && + hasBits(this.slices[i+l-3], BITAlphaNum); + } + + patternIsLeftHostnameAnchored() { + return hasBits(this.flavorBits, BITFlavorNetLeftHnAnchor); + } + + patternIsRightHostnameAnchored() { + return hasBits(this.flavorBits, BITFlavorNetRightHnAnchor); + } + + patternIsLeftAnchored() { + return hasBits(this.flavorBits, BITFlavorNetLeftURLAnchor); + } + + patternIsRightAnchored() { + return hasBits(this.flavorBits, BITFlavorNetRightURLAnchor); + } + + patternIsRegex() { + return (this.flavorBits & BITFlavorNetRegex) !== 0; + } + + patternHasWildcard() { + return hasBits(this.patternBits, BITAsterisk); + } + + patternHasCaret() { + return hasBits(this.patternBits, BITCaret); + } + + patternHasUnicode() { + return hasBits(this.patternBits, BITUnicode); + } + + patternHasUppercase() { + return hasBits(this.patternBits, BITUppercase); + } + + patternToLowercase() { + const hasUpper = this.patternHasUppercase(); + if ( hasUpper === false && this.pattern !== '' ) { + return this.pattern; + } + const { i, l } = this.patternSpan; + if ( l === 0 ) { return ''; } + const beg = this.slices[i+1]; + const end = this.slices[i+l+1]; + this.pattern = this.pattern || this.raw.slice(beg, end); + if ( hasUpper === false ) { return this.pattern; } + this.pattern = this.pattern.toLowerCase(); + this.raw = this.raw.slice(0, beg) + + this.pattern + + this.raw.slice(end); + this.unmarkSlices(i, i+l, BITUppercase); + this.patternBits &= ~BITUppercase; + return this.pattern; + } + + patternHasSpace() { + return hasBits(this.flavorBits, BITFlavorNetSpaceInPattern); + } + + patternHasLeadingWildcard() { + if ( hasBits(this.patternBits, BITAsterisk) === false ) { + return false; + } + const { i, l } = this.patternSpan; + return l !== 0 && hasBits(this.slices[i], BITAsterisk); + } + + patternHasTrailingWildcard() { + if ( hasBits(this.patternBits, BITAsterisk) === false ) { + return false; + } + const { i, l } = this.patternSpan; + return l !== 0 && hasBits(this.slices[i+l-1], BITAsterisk); + } + + optionHasUnicode() { + return hasBits(this.optionsBits, BITUnicode); + } + + options() { + if ( this.category === CATStaticNetFilter ) { + return this.netOptionsIterator; + } else if ( this.category === CATStaticExtFilter ) { + return this.extOptionsIterator; + } + return []; + } + + patternTokens() { + if ( this.category === CATStaticNetFilter ) { + return this.patternTokenIterator; + } + return []; + } + + setMaxTokenLength(l) { + this.maxTokenLength = l; + } + + hasUnicode() { + return hasBits(this.allBits, BITUnicode); + } + + toLowerCase() { + if ( hasBits(this.allBits, BITUppercase) ) { + this.raw = this.raw.toLowerCase(); + } + return this.raw; + } + + // TODO: if there is a need to punycode, we force a re-analysis post- + // punycode conversion. We could avoid the re-analysis by substituting + // the original pattern slices with the post-punycode ones, but it's + // not trivial work and given how rare this occurs it may not be worth + // worrying about this. + toPunycode() { + if ( this.patternHasUnicode() === false ) { return; } + const { i, l } = this.patternSpan; + if ( l === 0 ) { return; } + const re = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/; + let pattern = this.getPattern(); + const match = re.exec(this.pattern); + if ( match === null ) { return; } + pattern = punycode.toASCII(match[0]) + + this.pattern.slice(match.index + match[0].length); + const beg = this.slices[i+1]; + const end = this.slices[i+l+1]; + const raw = this.raw.slice(0, beg) + pattern + this.raw.slice(end); + this.analyze(raw); + } + + isException() { + return hasBits(this.flavorBits, BITFlavorException); + } + + shouldIgnore() { + return hasBits(this.flavorBits, BITFlavorIgnore); + } + + hasError() { + return hasBits(this.allBits, BITError); + } +}; + +/******************************************************************************/ + +const CATNone = 0; +const CATStaticExtFilter = 1; +const CATStaticNetFilter = 2; +const CATComment = 3; + +const BITSpace = 1 << 0; +const BITGlyph = 1 << 1; +const BITExclamation = 1 << 2; +const BITHash = 1 << 3; +const BITDollar = 1 << 4; +const BITPercent = 1 << 5; +const BITParen = 1 << 6; +const BITAsterisk = 1 << 7; +const BITComma = 1 << 8; +const BITDash = 1 << 9; +const BITPeriod = 1 << 10; +const BITSlash = 1 << 11; +const BITNum = 1 << 12; +const BITEqual = 1 << 13; +const BITQuestion = 1 << 14; +const BITAt = 1 << 15; +const BITAlpha = 1 << 16; +const BITUppercase = 1 << 17; +const BITSquareBracket = 1 << 18; +const BITBackslash = 1 << 19; +const BITCaret = 1 << 20; +const BITUnderscore = 1 << 21; +const BITBrace = 1 << 22; +const BITPipe = 1 << 23; +const BITTilde = 1 << 24; +const BITClosing = 1 << 28; +const BITUnicode = 1 << 29; +const BITIgnore = 1 << 30; +const BITError = 1 << 31; + +const BITAll = 0xFFFFFFFF; +const BITAlphaNum = BITNum | BITAlpha; +const BITRegexWord = BITAlphaNum | BITUnderscore; +const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode; +const BITPatternToken = BITNum | BITAlpha | BITPercent; +const BITLineComment = BITExclamation | BITHash | BITSquareBracket; + +const charDescBits = [ + /* 0x00 - 0x08 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x09 */ BITSpace, + /* 0x0A - 0x0F */ 0, 0, 0, 0, 0, 0, + /* 0x10 - 0x1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x20 */ BITSpace, + /* 0x21 ! */ BITExclamation, + /* 0x22 " */ BITGlyph, + /* 0x23 # */ BITHash, + /* 0x24 $ */ BITDollar, + /* 0x25 % */ BITPercent, + /* 0x26 & */ BITGlyph, + /* 0x27 ' */ BITGlyph, + /* 0x28 ( */ BITParen, + /* 0x29 ) */ BITParen | BITClosing, + /* 0x2A * */ BITAsterisk, + /* 0x2B + */ BITGlyph, + /* 0x2C , */ BITComma, + /* 0x2D - */ BITDash, + /* 0x2E . */ BITPeriod, + /* 0x2F / */ BITSlash, + /* 0x30 0 */ BITNum, + /* 0x31 1 */ BITNum, + /* 0x32 2 */ BITNum, + /* 0x33 3 */ BITNum, + /* 0x34 4 */ BITNum, + /* 0x35 5 */ BITNum, + /* 0x36 6 */ BITNum, + /* 0x37 7 */ BITNum, + /* 0x38 8 */ BITNum, + /* 0x39 9 */ BITNum, + /* 0x3A : */ BITGlyph, + /* 0x3B ; */ BITGlyph, + /* 0x3C < */ BITGlyph, + /* 0x3D = */ BITEqual, + /* 0x3E > */ BITGlyph, + /* 0x3F ? */ BITQuestion, + /* 0x40 @ */ BITAt, + /* 0x41 A */ BITAlpha | BITUppercase, + /* 0x42 B */ BITAlpha | BITUppercase, + /* 0x43 C */ BITAlpha | BITUppercase, + /* 0x44 D */ BITAlpha | BITUppercase, + /* 0x45 E */ BITAlpha | BITUppercase, + /* 0x46 F */ BITAlpha | BITUppercase, + /* 0x47 G */ BITAlpha | BITUppercase, + /* 0x48 H */ BITAlpha | BITUppercase, + /* 0x49 I */ BITAlpha | BITUppercase, + /* 0x4A J */ BITAlpha | BITUppercase, + /* 0x4B K */ BITAlpha | BITUppercase, + /* 0x4C L */ BITAlpha | BITUppercase, + /* 0x4D M */ BITAlpha | BITUppercase, + /* 0x4E N */ BITAlpha | BITUppercase, + /* 0x4F O */ BITAlpha | BITUppercase, + /* 0x50 P */ BITAlpha | BITUppercase, + /* 0x51 Q */ BITAlpha | BITUppercase, + /* 0x52 R */ BITAlpha | BITUppercase, + /* 0x53 S */ BITAlpha | BITUppercase, + /* 0x54 T */ BITAlpha | BITUppercase, + /* 0x55 U */ BITAlpha | BITUppercase, + /* 0x56 V */ BITAlpha | BITUppercase, + /* 0x57 W */ BITAlpha | BITUppercase, + /* 0x58 X */ BITAlpha | BITUppercase, + /* 0x59 Y */ BITAlpha | BITUppercase, + /* 0x5A Z */ BITAlpha | BITUppercase, + /* 0x5B [ */ BITSquareBracket, + /* 0x5C \ */ BITBackslash, + /* 0x5D ] */ BITSquareBracket | BITClosing, + /* 0x5E ^ */ BITCaret, + /* 0x5F _ */ BITUnderscore, + /* 0x60 ` */ BITGlyph, + /* 0x61 a */ BITAlpha, + /* 0x62 b */ BITAlpha, + /* 0x63 c */ BITAlpha, + /* 0x64 d */ BITAlpha, + /* 0x65 e */ BITAlpha, + /* 0x66 f */ BITAlpha, + /* 0x67 g */ BITAlpha, + /* 0x68 h */ BITAlpha, + /* 0x69 i */ BITAlpha, + /* 0x6A j */ BITAlpha, + /* 0x6B k */ BITAlpha, + /* 0x6C l */ BITAlpha, + /* 0x6D m */ BITAlpha, + /* 0x6E n */ BITAlpha, + /* 0x6F o */ BITAlpha, + /* 0x70 p */ BITAlpha, + /* 0x71 q */ BITAlpha, + /* 0x72 r */ BITAlpha, + /* 0x73 s */ BITAlpha, + /* 0x74 t */ BITAlpha, + /* 0x75 u */ BITAlpha, + /* 0x76 v */ BITAlpha, + /* 0x77 w */ BITAlpha, + /* 0x78 x */ BITAlpha, + /* 0x79 y */ BITAlpha, + /* 0x7A z */ BITAlpha, + /* 0x7B { */ BITBrace, + /* 0x7C | */ BITPipe, + /* 0x7D } */ BITBrace | BITClosing, + /* 0x7E ~ */ BITTilde, + /* 0x7F */ 0, +]; + +const BITFlavorException = 1 << 0; +const BITFlavorNetRegex = 1 << 1; +const BITFlavorNetLeftURLAnchor = 1 << 2; +const BITFlavorNetRightURLAnchor = 1 << 3; +const BITFlavorNetLeftHnAnchor = 1 << 4; +const BITFlavorNetRightHnAnchor = 1 << 5; +const BITFlavorNetSpaceInPattern = 1 << 6; +const BITFlavorExtStyle = 1 << 7; +const BITFlavorExtStrong = 1 << 8; +const BITFlavorIgnore = 1 << 29; +const BITFlavorUnsupported = 1 << 30; +const BITFlavorError = 1 << 31; + +const BITFlavorNetLeftAnchor = BITFlavorNetLeftURLAnchor | BITFlavorNetLeftHnAnchor; +const BITFlavorNetRightAnchor = BITFlavorNetRightURLAnchor | BITFlavorNetRightHnAnchor; +const BITFlavorNetHnAnchor = BITFlavorNetLeftHnAnchor | BITFlavorNetRightHnAnchor; +const BITFlavorNetAnchor = BITFlavorNetLeftAnchor | BITFlavorNetRightAnchor; + +const OPTTokenInvalid = 0; +const OPTToken1p = 1; +const OPTToken3p = 2; +const OPTTokenAll = 3; +const OPTTokenBadfilter = 4; +const OPTTokenCname = 5; +const OPTTokenCsp = 6; +const OPTTokenCss = 7; +const OPTTokenDenyAllow = 8; +const OPTTokenDoc = 9; +const OPTTokenDomain = 10; +const OPTTokenEhide = 11; +const OPTTokenEmpty = 12; +const OPTTokenFont = 13; +const OPTTokenFrame = 14; +const OPTTokenGenericblock = 15; +const OPTTokenGhide = 16; +const OPTTokenImage = 17; +const OPTTokenImportant = 18; +const OPTTokenInlineFont = 19; +const OPTTokenInlineScript = 20; +const OPTTokenMedia = 21; +const OPTTokenMp4 = 22; +const OPTTokenObject = 23; +const OPTTokenOther = 24; +const OPTTokenPing = 25; +const OPTTokenPopunder = 26; +const OPTTokenPopup = 27; +const OPTTokenRedirect = 28; +const OPTTokenRedirectRule = 29; +const OPTTokenScript = 30; +const OPTTokenShide = 31; +const OPTTokenXhr = 32; +const OPTTokenWebrtc = 33; +const OPTTokenWebsocket = 34; + +const OPTCanNegate = 1 << 16; +const OPTBlockOnly = 1 << 17; +const OPTAllowOnly = 1 << 18; +const OPTMustAssign = 1 << 19; +const OPTAllowMayAssign = 1 << 20; +const OPTDomainList = 1 << 21; +const OPTNotSupported = 1 << 22; + +const hasNoBits = (v, bits) => (v & bits) === 0; +const hasBits = (v, bits) => (v & bits) !== 0; +const hasNotAllBits = (v, bits) => (v & bits) !== bits; + +/******************************************************************************/ + +Parser.prototype.CATNone = CATNone; +Parser.prototype.CATStaticExtFilter = CATStaticExtFilter; +Parser.prototype.CATStaticNetFilter = CATStaticNetFilter; +Parser.prototype.CATComment = CATComment; + +Parser.prototype.BITSpace = BITSpace; +Parser.prototype.BITGlyph = BITGlyph; +Parser.prototype.BITComma = BITComma; +Parser.prototype.BITLineComment = BITLineComment; +Parser.prototype.BITPipe = BITPipe; +Parser.prototype.BITAsterisk = BITAsterisk; +Parser.prototype.BITCaret = BITCaret; +Parser.prototype.BITUppercase = BITUppercase; +Parser.prototype.BITHostname = BITHostname; +Parser.prototype.BITPeriod = BITPeriod; +Parser.prototype.BITDash = BITDash; +Parser.prototype.BITHash = BITHash; +Parser.prototype.BITEqual = BITEqual; +Parser.prototype.BITQuestion = BITQuestion; +Parser.prototype.BITPercent = BITPercent; +Parser.prototype.BITTilde = BITTilde; +Parser.prototype.BITUnicode = BITUnicode; +Parser.prototype.BITIgnore = BITIgnore; +Parser.prototype.BITError = BITError; +Parser.prototype.BITAll = BITAll; + +Parser.prototype.BITFlavorException = BITFlavorException; +Parser.prototype.BITFlavorExtStyle = BITFlavorExtStyle; +Parser.prototype.BITFlavorIgnore = BITFlavorIgnore; +Parser.prototype.BITFlavorUnsupported = BITFlavorUnsupported; +Parser.prototype.BITFlavorError = BITFlavorError; + +Parser.prototype.OPTTokenInvalid = OPTTokenInvalid; +Parser.prototype.OPTTokenAll = OPTTokenAll; +Parser.prototype.OPTTokenBadfilter = OPTTokenBadfilter; +Parser.prototype.OPTTokenCname = OPTTokenCname; +Parser.prototype.OPTTokenCsp = OPTTokenCsp; +Parser.prototype.OPTTokenDenyAllow = OPTTokenDenyAllow; +Parser.prototype.OPTTokenDoc = OPTTokenDoc; +Parser.prototype.OPTTokenDomain = OPTTokenDomain; +Parser.prototype.OPTTokenEhide = OPTTokenEhide; +Parser.prototype.OPTTokenEmpty = OPTTokenEmpty; +Parser.prototype.OPTToken1p = OPTToken1p; +Parser.prototype.OPTTokenFont = OPTTokenFont; +Parser.prototype.OPTTokenGenericblock = OPTTokenGenericblock; +Parser.prototype.OPTTokenGhide = OPTTokenGhide; +Parser.prototype.OPTTokenImage = OPTTokenImage; +Parser.prototype.OPTTokenImportant = OPTTokenImportant; +Parser.prototype.OPTTokenInlineFont = OPTTokenInlineFont; +Parser.prototype.OPTTokenInlineScript = OPTTokenInlineScript; +Parser.prototype.OPTTokenMedia = OPTTokenMedia; +Parser.prototype.OPTTokenMp4 = OPTTokenMp4; +Parser.prototype.OPTTokenObject = OPTTokenObject; +Parser.prototype.OPTTokenOther = OPTTokenOther; +Parser.prototype.OPTTokenPing = OPTTokenPing; +Parser.prototype.OPTTokenPopunder = OPTTokenPopunder; +Parser.prototype.OPTTokenPopup = OPTTokenPopup; +Parser.prototype.OPTTokenRedirect = OPTTokenRedirect; +Parser.prototype.OPTTokenRedirectRule = OPTTokenRedirectRule; +Parser.prototype.OPTTokenScript = OPTTokenScript; +Parser.prototype.OPTTokenShide = OPTTokenShide; +Parser.prototype.OPTTokenCss = OPTTokenCss; +Parser.prototype.OPTTokenFrame = OPTTokenFrame; +Parser.prototype.OPTToken3p = OPTToken3p; +Parser.prototype.OPTTokenXhr = OPTTokenXhr; +Parser.prototype.OPTTokenWebrtc = OPTTokenWebrtc; +Parser.prototype.OPTTokenWebsocket = OPTTokenWebsocket; + +/******************************************************************************/ + +const Span = class { + constructor() { + this.reset(); + } + reset() { + this.i = this.l = 0; + } +}; + +/******************************************************************************/ + +const NetOptionsIterator = class { + constructor(parser) { + this.parser = parser; + this.l = this.r = 0; + this.value = undefined; + this.done = true; + } + [Symbol.iterator]() { + const { i, l } = this.parser.optionsSpan; + this.l = i; + this.r = i + l; + this.exception = this.parser.isException(); + this.done = false; + this.value = { + id: OPTTokenInvalid, + val: undefined, + not: false, + bad: false, + }; + return this; + } + next() { + if ( this.l === this.r ) { + this.value = undefined; + this.done = true; + return this; + } + const parser = this.parser; + const { slices, interactive } = parser; + const value = this.value; + value.not = value.bad = false; + let i0 = this.l; + let i = i0; + if ( hasBits(slices[i], BITTilde) ) { + if ( slices[i+2] !== 1 ) { + value.bad = true; + if ( interactive ) { slices[i] |= BITError; } + } + value.not = true; + i += 3; + i0 = i; + } + let j = -1; + while ( i < this.r ) { + if ( hasBits(slices[i], BITComma) ) { break; } + if ( j === -1 && hasBits(slices[i], BITEqual) ) { j = i; } + i += 3; + } + const assigned = j !== -1; + if ( assigned ) { + const k = j + 3; + if ( k === i || slices[j+2] > 1 || k === this.r ) { + value.bad = true; + } + value.val = parser.raw.slice(slices[k+1], slices[i+1]); + } else { + value.val = undefined; + j = i; + } + const token = parser.raw.slice(slices[i0+1], slices[j+1]); + const descriptor = netOptionTokens.get(token) || OPTTokenInvalid; + value.id = descriptor & 0xFFFF; + if ( + descriptor === OPTTokenInvalid || + value.not && hasNoBits(descriptor, OPTCanNegate) || + this.exception && hasBits(descriptor, OPTBlockOnly) || + this.exception === false && hasBits(descriptor, OPTAllowOnly) || + assigned && hasNoBits(descriptor, OPTMustAssign) || + assigned === false && hasBits(descriptor, OPTMustAssign) && ( + this.exception === false || + hasNoBits(descriptor, OPTAllowMayAssign) + ) + ) { + value.bad = true; + } else if ( interactive && hasBits(descriptor, OPTDomainList) ) { + parser.analyzeDomainList(j + 3, i, BITPipe, value.id === OPTTokenDomain); + } + if ( i < this.r ) { + if ( interactive && (slices[i+2] !== 1 || (i+3) === this.r) ) { + parser.markSlices(i, i+3, BITError); + } + i += 3; + } + if ( interactive && (value.bad || hasBits(descriptor, OPTNotSupported)) ) { + parser.markSlices(this.l, i, BITError); + } + this.l = i; + return this; + } +}; + +const netOptionTokens = new Map([ + [ '1p', OPTToken1p | OPTCanNegate ], [ 'first-party', OPTToken1p | OPTCanNegate ], + [ '3p', OPTToken3p | OPTCanNegate ], [ 'third-party', OPTToken3p | OPTCanNegate ], + [ 'all', OPTTokenAll ], + [ 'badfilter', OPTTokenBadfilter ], + [ 'cname', OPTTokenCname | OPTAllowOnly ], + [ 'csp', OPTTokenCsp | OPTMustAssign | OPTAllowMayAssign ], + [ 'css', OPTTokenCss | OPTCanNegate ], [ 'stylesheet', OPTTokenCss | OPTCanNegate ], + [ 'denyallow', OPTTokenDenyAllow | OPTMustAssign | OPTDomainList ], + [ 'doc', OPTTokenDoc ], [ 'document', OPTTokenDoc ], + [ 'domain', OPTTokenDomain | OPTMustAssign | OPTDomainList ], + [ 'ehide', OPTTokenEhide ], [ 'elemhide', OPTTokenEhide ], + [ 'empty', OPTTokenEmpty | OPTBlockOnly ], + [ 'frame', OPTTokenFrame | OPTCanNegate ], [ 'subdocument', OPTTokenFrame | OPTCanNegate ], + [ 'font', OPTTokenFont | OPTCanNegate ], + [ 'genericblock', OPTTokenGenericblock | OPTNotSupported ], + [ 'ghide', OPTTokenGhide ], [ 'generichide', OPTTokenGhide ], + [ 'image', OPTTokenImage | OPTCanNegate ], + [ 'important', OPTTokenImportant | OPTBlockOnly ], + [ 'inline-font', OPTTokenInlineFont ], + [ 'inline-script', OPTTokenInlineScript ], + [ 'media', OPTTokenMedia | OPTCanNegate ], + [ 'mp4', OPTTokenMp4 ], + [ 'object', OPTTokenObject | OPTCanNegate ], [ 'object-subrequest', OPTTokenObject | OPTCanNegate ], + [ 'other', OPTTokenOther | OPTCanNegate ], + [ 'ping', OPTTokenPing | OPTCanNegate ], [ 'beacon', OPTTokenPing | OPTCanNegate ], + [ 'popunder', OPTTokenPopunder ], + [ 'popup', OPTTokenPopup ], + [ 'redirect', OPTTokenRedirect | OPTMustAssign | OPTBlockOnly ], + [ 'redirect-rule', OPTTokenRedirectRule | OPTMustAssign | OPTBlockOnly ], + [ 'script', OPTTokenScript | OPTCanNegate ], + [ 'shide', OPTTokenShide ], [ 'specifichide', OPTTokenShide ], + [ 'xhr', OPTTokenXhr | OPTCanNegate ], [ 'xmlhttprequest', OPTTokenXhr | OPTCanNegate ], + [ 'webrtc', OPTTokenWebrtc | OPTNotSupported ], + [ 'websocket', OPTTokenWebsocket | OPTCanNegate ], +]); + +/******************************************************************************/ + +// https://github.com/gorhill/uBlock/issues/997 +// Ignore token if preceded by wildcard. + +const PatternTokenIterator = class { + constructor(parser) { + this.parser = parser; + this.l = this.r = this.i = 0; + this.value = undefined; + this.done = true; + } + [Symbol.iterator]() { + const { i, l } = this.parser.patternSpan; + this.l = i; + this.r = i + l; + this.i = i; + this.done = false; + this.value = { token: '', pos: 0 }; + return this; + } + end() { + this.value = undefined; + this.done = true; + return this; + } + next() { + const { slices, maxTokenLength } = this.parser; + let { l, r, i, value } = this; + let sl = i, sr = 0; + for (;;) { + for (;;) { + if ( sl >= r ) { return this.end(); } + if ( hasBits(slices[sl], BITPatternToken) ) { break; } + sl += 3; + } + sr = sl + 3; + while ( sr < r && hasBits(slices[sr], BITPatternToken) ) { + sr += 3; + } + if ( + ( + sl === 0 || + hasNoBits(slices[sl-3], BITAsterisk) + ) && + ( + sr === r || + hasNoBits(slices[sr], BITAsterisk) || + (slices[sr+1] - slices[sl+1]) >= maxTokenLength + ) + ) { + break; + } + sl = sr + 3; + } + this.i = sr + 3; + const beg = slices[sl+1]; + value.token = this.parser.raw.slice(beg, slices[sr+1]); + value.pos = beg - slices[l+1]; + return this; + } +}; + +/******************************************************************************/ + +const ExtOptionsIterator = class { + constructor(parser) { + this.parser = parser; + this.l = this.r = 0; + this.value = undefined; + this.done = true; + } + [Symbol.iterator]() { + const { i, l } = this.parser.optionsSpan; + this.l = i; + this.r = i + l; + this.done = false; + this.value = { + hn: undefined, + not: false, + bad: false, + }; + return this; + } + next() { + if ( this.l === this.r ) { + this.value = undefined; + this.done = true; + return this; + } + const parser = this.parser; + const { slices, interactive } = parser; + const value = this.value; + value.not = value.bad = false; + let i0 = this.l; + let i = i0; + if ( hasBits(slices[i], BITTilde) ) { + if ( slices[i+2] !== 1 ) { + value.bad = true; + if ( interactive ) { slices[i] |= BITError; } + } + value.not = true; + i += 3; + i0 = i; + } + while ( i < this.r ) { + if ( hasBits(slices[i], BITComma) ) { break; } + i += 3; + } + value.hn = parser.raw.slice(slices[i0+1], slices[i+1]); + if ( i < this.r ) { + if ( interactive && (slices[i+2] !== 1 || (i+3) === this.r) ) { + parser.markSlices(i, i+3, BITError); + } + i += 3; + } + if ( interactive && value.bad ) { + parser.markSlices(this.l, i, BITError); + } + this.l = i; + return this; + } +}; + +/******************************************************************************/ + +if ( vAPI instanceof Object ) { + vAPI.StaticFilteringParser = Parser; +} else { + self.StaticFilteringParser = Parser; +} + +/******************************************************************************/ + +// <<<<< end of local scope +} diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 3e17d3467..78409b06a 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -84,13 +84,16 @@ const typeNameToTypeValue = { const otherTypeBitValue = typeNameToTypeValue.other; +const bitFromType = type => + 1 << ((typeNameToTypeValue[type] >>> 4) - 1); + // All network request types to bitmap // bring origin to 0 (from 4 -- see typeNameToTypeValue) // left-shift 1 by the above-calculated value // subtract 1 to set all type bits const allNetworkTypesBits = (1 << (otherTypeBitValue >>> 4)) - 1; - + const allTypesBits = allNetworkTypesBits | 1 << (typeNameToTypeValue['popup'] >>> 4) - 1 | @@ -127,42 +130,6 @@ const typeValueToTypeName = { 23: 'unsupported', }; -// https://github.com/gorhill/uBlock/issues/1493 -// Transpose `ping` into `other` for now. -const toNormalizedType = { - 'all': 'all', - 'beacon': 'ping', - 'cname': 'cname', - 'css': 'stylesheet', - 'data': 'data', - 'doc': 'main_frame', - 'document': 'main_frame', - 'font': 'font', - 'frame': 'sub_frame', - 'genericblock': 'unsupported', - 'generichide': 'generichide', - 'ghide': 'generichide', - 'image': 'image', - 'inline-font': 'inline-font', - 'inline-script': 'inline-script', - 'media': 'media', - 'object': 'object', - 'object-subrequest': 'object', - 'other': 'other', - 'ping': 'ping', - 'popunder': 'popunder', - 'popup': 'popup', - 'script': 'script', - 'specifichide': 'specifichide', - 'shide': 'specifichide', - 'stylesheet': 'stylesheet', - 'subdocument': 'sub_frame', - 'xhr': 'xmlhttprequest', - 'xmlhttprequest': 'xmlhttprequest', - 'webrtc': 'unsupported', - 'websocket': 'websocket', -}; - const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111; /******************************************************************************/ @@ -409,7 +376,7 @@ const filterPattern = { units.push(FilterRegex.compile(parsed)); return; } - const pattern = parsed.f; + const pattern = parsed.pattern; if ( pattern === '*' ) { units.push(FilterTrue.compile()); return; @@ -439,27 +406,27 @@ const filterPattern = { hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos ); if ( parsed.tokenBeg < parsed.firstWildcardPos ) { - parsed.f = sleft; + parsed.pattern = sleft; units.push(FilterPatternPlain.compile(parsed)); - parsed.f = sright; + parsed.pattern = sright; units.push(FilterPatternRight.compile(parsed, hasCaretCombo)); return; } // parsed.tokenBeg > parsed.firstWildcardPos - parsed.f = sright; + parsed.pattern = sright; parsed.tokenBeg -= parsed.firstWildcardPos + 1; units.push(FilterPatternPlain.compile(parsed)); - parsed.f = sleft; + parsed.pattern = sleft; units.push(FilterPatternLeft.compile(parsed, hasCaretCombo)); }, compileGeneric: function(parsed, units) { - const pattern = parsed.f; + const pattern = parsed.pattern; // Optimize special case: plain pattern with trailing caret if ( parsed.firstWildcardPos === -1 && parsed.firstCaretPos === (pattern.length - 1) ) { - parsed.f = pattern.slice(0, -1); + parsed.pattern = pattern.slice(0, -1); units.push(FilterPatternPlain.compile(parsed)); units.push(FilterTrailingSeparator.compile()); return; @@ -479,10 +446,10 @@ const filterPattern = { // if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; } // right += 1; //} - //parsed.f = pattern.slice(left, right); + //parsed.pattern = pattern.slice(left, right); //parsed.tokenBeg -= left; //units.push(FilterPatternPlain.compile(parsed)); - //parsed.f = pattern; + //parsed.pattern = pattern; units.push(FilterPatternGeneric.compile(parsed)); }, }; @@ -565,7 +532,7 @@ const FilterPatternPlain = class { } static compile(details) { - return [ FilterPatternPlain.fid, details.f, details.tokenBeg ]; + return [ FilterPatternPlain.fid, details.pattern, details.tokenBeg ]; } static fromCompiled(args) { @@ -678,7 +645,7 @@ const FilterPatternLeft = class { static compile(details, ex) { return [ ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid, - details.f + details.pattern ]; } @@ -762,7 +729,7 @@ const FilterPatternRight = class { static compile(details, ex) { return [ ex ? FilterPatternRightEx.fid : FilterPatternRight.fid, - details.f + details.pattern ]; } @@ -853,7 +820,7 @@ const FilterPatternGeneric = class { static compile(details) { const anchor = details.anchor; details.anchor = 0; - return [ FilterPatternGeneric.fid, details.f, anchor ]; + return [ FilterPatternGeneric.fid, details.pattern, anchor ]; } static fromCompiled(args) { @@ -1115,7 +1082,7 @@ const FilterRegex = class { } static compile(details) { - return [ FilterRegex.fid, details.f ]; + return [ FilterRegex.fid, details.pattern ]; } static fromCompiled(args) { @@ -2101,25 +2068,42 @@ const FILTER_SEQUENCES_MIN = filterSequenceWritePtr; /******************************************************************************/ const FilterParser = class { - constructor() { + constructor(parser) { this.cantWebsocket = vAPI.cantWebsocket; this.domainOpt = ''; this.noTokenHash = urlTokenizer.noTokenHash; this.reBadDomainOptChars = /[+?^${}()[\]\\]/; - this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i; - this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i; - this.reCanTrimCarets1 = /^[^*]*$/; - this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/; this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/; this.reHasUnicode = /[^\x00-\x7F]/; - this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/; this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/; - this.reGoodToken = /[%0-9a-z]{1,}/g; - this.reSeparator = /[\/^]/; this.reRegexToken = /[%0-9A-Za-z]{2,}/g; this.reRegexTokenAbort = /[([]/; this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/; this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/; + this.reGoodToken = /[%0-9a-z]{1,}/g; + this.tokenIdToNormalizedType = new Map([ + [ parser.OPTTokenCname, bitFromType('cname') ], + [ parser.OPTTokenCss, bitFromType('stylesheet') ], + [ parser.OPTTokenDoc, bitFromType('main_frame') ], + [ parser.OPTTokenFont, bitFromType('font') ], + [ parser.OPTTokenFrame, bitFromType('sub_frame') ], + [ parser.OPTTokenGenericblock, bitFromType('unsupported') ], + [ parser.OPTTokenGhide, bitFromType('generichide') ], + [ parser.OPTTokenImage, bitFromType('image') ], + [ parser.OPTTokenInlineFont, bitFromType('inline-font') ], + [ parser.OPTTokenInlineScript, bitFromType('inline-script') ], + [ parser.OPTTokenMedia, bitFromType('media') ], + [ parser.OPTTokenObject, bitFromType('object') ], + [ parser.OPTTokenOther, bitFromType('other') ], + [ parser.OPTTokenPing, bitFromType('ping') ], + [ parser.OPTTokenPopunder, bitFromType('popunder') ], + [ parser.OPTTokenPopup, bitFromType('popup') ], + [ parser.OPTTokenScript, bitFromType('script') ], + [ parser.OPTTokenShide, bitFromType('specifichide') ], + [ parser.OPTTokenXhr, bitFromType('xmlhttprequest') ], + [ parser.OPTTokenWebrtc, bitFromType('unsupported') ], + [ parser.OPTTokenWebsocket, bitFromType('websocket') ], + ]); // These top 100 "bad tokens" are collated using the "miss" histogram // from tokenHistograms(). The "score" is their occurrence among the // 200K+ URLs used in the benchmark and executed against default @@ -2224,7 +2208,7 @@ const FilterParser = class { [ 'scripts',1446 ], [ 'twitter',1440 ], [ 'crop',1431 ], - [ 'new',1412] + [ 'new',1412], ]); this.maxTokenLen = urlTokenizer.MAX_TOKEN_LENGTH; this.reset(); @@ -2244,16 +2228,14 @@ const FilterParser = class { this.dataType = undefined; this.data = undefined; this.invalid = false; - this.f = ''; + this.pattern = ''; this.firstParty = false; this.thirdParty = false; this.party = AnyParty; - this.fopts = ''; this.domainOpt = ''; this.denyallow = ''; this.isPureHostname = false; this.isRegex = false; - this.raw = ''; this.redirect = 0; this.token = '*'; this.tokenHash = this.noTokenHash; @@ -2278,16 +2260,12 @@ const FilterParser = class { return ''; } - bitFromType(type) { - return 1 << ((typeNameToTypeValue[type] >>> 4) - 1); - } - // https://github.com/chrisaljoudi/uBlock/issues/589 // Be ready to handle multiple negated types - parseTypeOption(raw, not) { - const typeBit = raw !== 'all' - ? this.bitFromType(toNormalizedType[raw]) + parseTypeOption(id, not) { + const typeBit = id !== -1 + ? this.tokenIdToNormalizedType.get(id) : allTypesBits; if ( not ) { this.notTypes |= typeBit; @@ -2309,8 +2287,8 @@ const FilterParser = class { } } - parseHostnameList(s) { - if ( this.reHasUnicode.test(s) ) { + parseHostnameList(parser, s) { + if ( parser.optionHasUnicode() ) { const hostnames = s.split('|'); let i = hostnames.length; while ( i-- ) { @@ -2320,106 +2298,74 @@ const FilterParser = class { } s = hostnames.join('|'); } + // TODO: revisit if ( this.reBadDomainOptChars.test(s) ) { return ''; } return s; } - parseOptions(s) { - this.fopts = s; - for ( let opt of s.split(/\s*,\s*/) ) { - const not = opt.startsWith('~'); - if ( not ) { - opt = opt.slice(1); - } - if ( opt === 'third-party' || opt === '3p' ) { + parseOptions(parser) { + for ( let { id, val, not, bad } of parser.options() ) { + if ( bad ) { return false; } + switch ( id ) { + case parser.OPTToken3p: this.parsePartyOption(false, not); - continue; - } - if ( opt === 'first-party' || opt === '1p' ) { + break; + case parser.OPTToken1p: this.parsePartyOption(true, not); - continue; - } - if ( toNormalizedType.hasOwnProperty(opt) ) { - this.parseTypeOption(opt, not); - continue; - } - // https://github.com/gorhill/uBlock/issues/2294 - // Detect and discard filter if domain option contains nonsensical - // characters. - if ( opt.startsWith('domain=') ) { - this.domainOpt = this.parseHostnameList(opt.slice(7)); - if ( this.domainOpt === '' ) { - this.unsupported = true; - break; - } - continue; - } - if ( opt.startsWith('denyallow=') ) { - this.denyallow = this.parseHostnameList(opt.slice(10)); - if ( this.denyallow === '' ) { - this.unsupported = true; - break; - } - continue; - } - if ( opt === 'important' ) { - this.important = Important; - continue; - } - if ( /^redirect(?:-rule)?=/.test(opt) ) { - if ( this.redirect !== 0 ) { - this.unsupported = true; - break; - } - this.redirect = opt.charCodeAt(8) === 0x3D /* '=' */ ? 1 : 2; - continue; - } - if ( - opt.startsWith('csp=') && - opt.length > 4 && - this.reBadCSP.test(opt) === false - ) { - this.parseTypeOption('data', not); - this.dataType = 'csp'; - this.data = opt.slice(4).trim(); - continue; - } - if ( opt === 'csp' && this.action === AllowAction ) { - this.parseTypeOption('data', not); - this.dataType = 'csp'; - this.data = ''; - continue; - } - // Used by Adguard: - // https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#empty-modifier - if ( opt === 'empty' || opt === 'mp4' ) { - if ( this.redirect !== 0 ) { - this.unsupported = true; - break; - } - this.redirect = 1; - continue; - } + break; + case parser.OPTTokenAll: + this.parseTypeOption(-1); + break; // https://github.com/uBlockOrigin/uAssets/issues/192 - if ( opt === 'badfilter' ) { + case parser.OPTTokenBadfilter: this.badFilter = true; - continue; - } + break; + case parser.OPTTokenCsp: + this.typeBits = bitFromType('data'); + this.dataType = 'csp'; + if ( val !== undefined ) { + if ( this.reBadCSP.test(val) ) { return false; } + this.data = val; + } else if ( this.action === AllowAction ) { + this.data = ''; + } + break; + // https://github.com/gorhill/uBlock/issues/2294 + // Detect and discard filter if domain option contains nonsensical + // characters. + case parser.OPTTokenDomain: + this.domainOpt = this.parseHostnameList(parser, val); + if ( this.domainOpt === '' ) { return false; } + break; + case parser.OPTTokenDenyAllow: + this.denyallow = this.parseHostnameList(parser, val); + if ( this.denyallow === '' ) { return false; } + break; // https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/ // Add support for `elemhide`. Rarely used but it happens. - if ( opt === 'elemhide' || opt === 'ehide' ) { - this.parseTypeOption('specifichide', not); - this.parseTypeOption('generichide', not); - continue; + case parser.OPTTokenEhide: + this.parseTypeOption(parser.OPTTokenShide, not); + this.parseTypeOption(parser.OPTTokenGhide, not); + break; + case parser.OPTTokenImportant: + this.important = Important; + break; + // Used by Adguard: + // https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#empty-modifier + case parser.OPTTokenEmpty: + case parser.OPTTokenMp4: + case parser.OPTTokenRedirect: + case parser.OPTTokenRedirectRule: + if ( this.redirect !== 0 ) { return false; } + this.redirect = id === parser.OPTTokenRedirectRule ? 2 : 1; + break; + default: + if ( this.tokenIdToNormalizedType.has(id) === false ) { + return false; + } + this.parseTypeOption(id, not); + break; } - // Unrecognized filter option: ignore whole filter. - this.unsupported = true; - break; - } - - // Redirect rules can't be exception filters. - if ( this.redirect !== 0 && this.action !== BlockAction ) { - this.unsupported = true; } // Negated network types? Toggle on all network type bits. @@ -2429,9 +2375,7 @@ const FilterParser = class { } if ( this.notTypes !== 0 ) { this.typeBits &= ~this.notTypes; - if ( this.typeBits === 0 ) { - this.unsupported = true; - } + if ( this.typeBits === 0 ) { return false; } } // https://github.com/gorhill/uBlock/issues/2283 @@ -2439,193 +2383,99 @@ const FilterParser = class { // toggle off `unsupported` bit. if ( this.typeBits & unsupportedTypeBit ) { this.typeBits &= ~unsupportedTypeBit; - if ( this.typeBits === 0 ) { - this.unsupported = true; - } + if ( this.typeBits === 0 ) { return false; } } + return true; } - // TODO: use charCodeAt where possible. - - parse(raw) { + parse(parser) { // important! this.reset(); - let s = this.raw = raw.trim(); - - if ( s.length === 0 ) { + if ( parser.hasError() ) { this.invalid = true; return this; } - // Filters which are a single alphanumeric character are discarded - // as unsupported. - if ( s.length === 1 && /[0-9a-z]/i.test(s) ) { - this.unsupported = true; + // Filters which pattern is a single character other than `*` and have + // no narrowing options are discarded as invalid. + if ( parser.patternIsDubious() ) { + this.invalid = true; return this; } - // plain hostname? (from HOSTS file) - if ( this.reHostnameRule1.test(s) ) { - this.f = s.toLowerCase(); - this.isPureHostname = true; - this.anchor |= 0b100; - return this; - } - - // element hiding filter? - let pos = s.indexOf('#'); - if ( pos !== -1 ) { - const c = s.charAt(pos + 1); - if ( c === '#' || c === '@' ) { - console.error('static-net-filtering.js > unexpected cosmetic filters'); - this.invalid = true; - return this; - } - } - // block or allow filter? // Important: this must be executed before parsing options - if ( s.startsWith('@@') ) { + if ( parser.isException() ) { this.action = AllowAction; - s = s.slice(2); } - // options - // https://github.com/gorhill/uBlock/issues/842 - // - ensure sure we are not dealing with a regex-based filter. - // - lookup the last occurrence of `$`. - if ( - s.charCodeAt(0) !== 0x2F /* '/' */ || - s.charCodeAt(s.length - 1) !== 0x2F /* '/' */ - ) { - pos = s.lastIndexOf('$'); - if ( pos !== -1 ) { - // https://github.com/gorhill/uBlock/issues/952 - // Discard Adguard-specific `$$` filters. - if ( s.indexOf('$$') !== -1 ) { - this.unsupported = true; - return this; - } - this.parseOptions(s.slice(pos + 1).trim()); - if ( this.unsupported ) { return this; } - s = s.slice(0, pos); - } - } + this.isPureHostname = parser.patternIsPlainHostname(); - // regex? - if ( - s.length > 2 && - s.charCodeAt(0) === 0x2F /* '/' */ && - s.charCodeAt(s.length - 1) === 0x2F /* '/' */ - ) { - this.isRegex = true; - this.f = s.slice(1, -1); - // https://github.com/gorhill/uBlock/issues/1246 - // If the filter is valid, use the corrected version of the - // source string -- this ensure reverse-lookup will work fine. - this.f = this.normalizeRegexSource(this.f); - if ( this.f === '' ) { - this.unsupported = true; - } + // Plain hostname? (from HOSTS file) + if ( this.isPureHostname && parser.hasOptions() === false ) { + this.pattern = parser.patternToLowercase(); + this.anchor |= 0b100; return this; } - // hostname-anchored - if ( s.startsWith('||') ) { - this.anchor |= 0b100; - s = s.slice(2); - - // convert hostname to punycode if needed - // https://github.com/gorhill/uBlock/issues/2599 - if ( this.reHasUnicode.test(s) ) { - const matches = this.reIsolateHostname.exec(s); - if ( matches ) { - s = (matches[1] !== undefined ? matches[1] : '') + - punycode.toASCII(matches[2]) + - matches[3]; - } - } - - // https://github.com/chrisaljoudi/uBlock/issues/1096 - if ( s.startsWith('^') ) { - this.unsupported = true; - return this; - } - - // plain hostname? (from ABP filter list) - // https://github.com/gorhill/uBlock/issues/1757 - // A filter can't be a pure-hostname one if there is a domain or - // csp option present. - if ( this.reHostnameRule2.test(s) ) { - if ( s.charCodeAt(s.length - 1) === 0x5E /* '^' */ ) { - s = s.slice(0, -1); - } - this.f = s.toLowerCase(); - this.isPureHostname = true; - return this; - } - } - - // left-anchored - else if ( s.startsWith('|') ) { - this.anchor |= 0x2; - s = s.slice(1); - } - - // right-anchored - if ( s.endsWith('|') ) { - this.anchor |= 0x1; - s = s.slice(0, -1); - } - - // https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448 - // Remove pointless leading *. - // https://github.com/gorhill/uBlock/issues/3034 - // We can remove anchoring if we need to match all at the start. - if ( s.startsWith('*') ) { - s = s.replace(/^\*+([^%0-9a-z])/i, '$1'); - this.anchor &= ~0x6; - } - // Remove pointless trailing * - // https://github.com/gorhill/uBlock/issues/3034 - // We can remove anchoring if we need to match all at the end. - if ( s.endsWith('*') ) { - s = s.replace(/([^%0-9a-z])\*+$/i, '$1'); - this.anchor &= ~0x1; - } - - // nothing left? - if ( s === '' ) { - s = '*'; - } - // TODO: remove once redirect rules with `*/*` pattern are no longer - // used. - else if ( this.redirect !== 0 && s === '/' ) { - s = '*'; - } - - // https://github.com/gorhill/uBlock/issues/1047 - // Hostname-anchored makes no sense if matching all requests. - if ( s === '*' ) { - this.anchor = 0; - } - - this.firstWildcardPos = s.indexOf('*'); - if ( this.firstWildcardPos !== -1 ) { - this.secondWildcardPos = s.indexOf('*', this.firstWildcardPos + 1); - } - this.firstCaretPos = s.indexOf('^'); - if ( this.firstCaretPos !== -1 ) { - this.secondCaretPos = s.indexOf('^', this.firstCaretPos + 1); - } - - if ( s.length > 1024 ) { + // options + if ( parser.hasOptions() && this.parseOptions(parser) === false ) { this.unsupported = true; return this; } - this.f = s.toLowerCase(); + // regex? + if ( parser.patternIsRegex() ) { + this.isRegex = true; + // https://github.com/gorhill/uBlock/issues/1246 + // If the filter is valid, use the corrected version of the + // source string -- this ensure reverse-lookup will work fine. + this.pattern = this.normalizeRegexSource(parser.getPattern()); + if ( this.pattern === '' ) { + this.unsupported = true; + } + return this; + } + + let pattern; + if ( parser.patternIsMatchAll() ) { + pattern = '*'; + } else { + pattern = parser.patternToLowercase(); + } + + if ( parser.patternIsLeftHostnameAnchored() ) { + this.anchor |= 0b100; + } else if ( parser.patternIsLeftAnchored() ) { + this.anchor |= 0b010; + } + if ( parser.patternIsRightAnchored() ) { + this.anchor |= 0b001; + } + + if ( parser.patternHasWildcard() ) { + this.firstWildcardPos = pattern.indexOf('*'); + if ( this.firstWildcardPos !== -1 ) { + this.secondWildcardPos = + pattern.indexOf('*', this.firstWildcardPos + 1); + } + } + + if ( parser.patternHasCaret() ) { + this.firstCaretPos = pattern.indexOf('^'); + if ( this.firstCaretPos !== -1 ) { + this.secondCaretPos = + pattern.indexOf('^', this.firstCaretPos + 1); + } + } + + if ( pattern.length > 1024 ) { + this.unsupported = true; + return this; + } + + this.pattern = pattern; return this; } @@ -2635,41 +2485,24 @@ const FilterParser = class { // are not good. Avoid if possible. This has a significant positive // impact on performance. - makeToken() { + makeToken(parser) { if ( this.isRegex ) { - this.extractTokenFromRegex(); - return; + return this.extractTokenFromRegex(); } - if ( this.f === '*' ) { return; } - const matches = this.findGoodToken(); - if ( matches === null ) { return; } - this.token = matches[0]; + const match = this.findGoodToken(parser); + if ( match === null ) { return; } + this.token = match.token; this.tokenHash = urlTokenizer.tokenHashFromString(this.token); - this.tokenBeg = matches.index; + this.tokenBeg = match.pos; } - findGoodToken() { - this.reGoodToken.lastIndex = 0; - const s = this.f; + // Note: a one-char token is better than a documented bad token. + findGoodToken(parser) { let bestMatch = null; let bestBadness = 0; - let match; - while ( (match = this.reGoodToken.exec(s)) !== null ) { - const token = match[0]; - // https://github.com/gorhill/uBlock/issues/997 - // Ignore token if preceded by wildcard. - const pos = match.index; - if ( - pos !== 0 && - s.charCodeAt(pos - 1) === 0x2A /* '*' */ || - token.length < this.maxTokenLen && - s.charCodeAt(pos + token.length) === 0x2A /* '*' */ - ) { - continue; - } - // A one-char token is better than a documented bad token. - const badness = token.length > 1 - ? this.badTokens.get(token) || 0 + for ( const match of parser.patternTokens() ) { + const badness = match.token.length > 1 + ? this.badTokens.get(match.token) || 0 : 1; if ( badness === 0 ) { return match; } if ( bestBadness === 0 || badness < bestBadness ) { @@ -2685,7 +2518,7 @@ const FilterParser = class { // a regex-based filter. extractTokenFromRegex() { this.reRegexToken.lastIndex = 0; - const s = this.f; + const s = this.pattern; let matches; while ( (matches = this.reRegexToken.exec(s)) !== null ) { const prefix = s.slice(0, matches.index); @@ -2712,9 +2545,9 @@ const FilterParser = class { this.dataType === undefined && this.denyallow === '' && this.domainOpt !== '' && ( - this.f === '*' || ( + this.pattern === '*' || ( this.anchor === 0b010 && - /^(?:http[s*]?:(?:\/\/)?)$/.test(this.f) + /^(?:http[s*]?:(?:\/\/)?)$/.test(this.pattern) ) ) && this.domainOpt.indexOf('~') === -1; @@ -2778,15 +2611,15 @@ FilterParser.parse = (( ) => { ttlTimer = vAPI.setTimeout(ttlProcess, 10007); }; - return s => { + return p => { if ( parser === undefined ) { - parser = new FilterParser(); + parser = new FilterParser(p); } last = Date.now(); if ( ttlTimer === undefined ) { ttlTimer = vAPI.setTimeout(ttlProcess, 10007); } - return parser.parse(s); + return parser.parse(p); }; })(); @@ -3072,10 +2905,10 @@ FilterContainer.prototype.fromSelfie = function(path) { /******************************************************************************/ -FilterContainer.prototype.compile = function(raw, writer) { +FilterContainer.prototype.compile = function(parser, writer) { // ORDER OF TESTS IS IMPORTANT! - const parsed = FilterParser.parse(raw); + const parsed = FilterParser.parse(parser); // Ignore non-static network filters if ( parsed.invalid ) { return false; } @@ -3086,20 +2919,20 @@ FilterContainer.prototype.compile = function(raw, writer) { µb.logger.writeOne({ realm: 'message', type: 'error', - text: `Invalid network filter in ${who}: ${raw}` + text: `Invalid network filter in ${who}: ${parser.raw}` }); return false; } // Redirect rule if ( parsed.redirect !== 0 ) { - const result = this.compileRedirectRule(parsed, writer); + const result = this.compileRedirectRule(parser.raw, parsed.badFilter, writer); if ( result === false ) { const who = writer.properties.get('assetKey') || '?'; µb.logger.writeOne({ realm: 'message', type: 'error', - text: `Invalid redirect rule in ${who}: ${raw}` + text: `Invalid redirect rule in ${who}: ${parser.raw}` }); return false; } @@ -3116,11 +2949,13 @@ FilterContainer.prototype.compile = function(raw, writer) { parsed.dataType === undefined ) { parsed.tokenHash = this.dotTokenHash; - this.compileToAtomicFilter(parsed, parsed.f, writer); + this.compileToAtomicFilter(parsed, parsed.pattern, writer); return true; } - parsed.makeToken(); + if ( parser.patternIsMatchAll() === false ) { + parsed.makeToken(parser); + } // Special pattern/option cases: // - `*$domain=...` @@ -3131,9 +2966,9 @@ FilterContainer.prototype.compile = function(raw, writer) { // are entries in the `domain=` option. if ( parsed.isJustOrigin() ) { const tokenHash = parsed.tokenHash; - if ( parsed.f === '*' || parsed.f.startsWith('http*') ) { + if ( parsed.pattern === '*' || parsed.pattern.startsWith('http*') ) { parsed.tokenHash = this.anyTokenHash; - } else if /* 'https:' */ ( parsed.f.startsWith('https') ) { + } else if /* 'https:' */ ( parsed.pattern.startsWith('https') ) { parsed.tokenHash = this.anyHTTPSTokenHash; } else /* 'http:' */ { parsed.tokenHash = this.anyHTTPTokenHash; @@ -3251,10 +3086,10 @@ FilterContainer.prototype.compileToAtomicFilter = function( /******************************************************************************/ -FilterContainer.prototype.compileRedirectRule = function(parsed, writer) { - const redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw); +FilterContainer.prototype.compileRedirectRule = function(raw, badFilter, writer) { + const redirects = µb.redirectEngine.compileRuleFromStaticFilter(raw); if ( Array.isArray(redirects) === false ) { return false; } - writer.select(parsed.badFilter ? 1 : 0); + writer.select(badFilter ? 1 : 0); const type = typeNameToTypeValue.redirect; for ( const redirect of redirects ) { writer.push([ type, redirect ]); diff --git a/src/js/storage.js b/src/js/storage.js index 9672f38f0..905adb19d 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -799,60 +799,36 @@ self.addEventListener('hiddenSettingsChanged', ( ) => { // https://adblockplus.org/en/filters const staticNetFilteringEngine = this.staticNetFilteringEngine; const staticExtFilteringEngine = this.staticExtFilteringEngine; - const reIsWhitespaceChar = /\s/; - const reMaybeLocalIp = /^[\d:f]/; - const reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/; - const reLocalIp = /^(?:(0\.0\.0\.)?0|127\.0\.0\.1|::1?|fe80::1%lo0)\s+/; const lineIter = new this.LineIterator(this.processDirectives(rawText)); + const parser = new vAPI.StaticFilteringParser(); + + parser.setMaxTokenLength(this.urlTokenizer.MAX_TOKEN_LENGTH); while ( lineIter.eot() === false ) { - let line = lineIter.next().trim(); - if ( line.length === 0 ) { continue; } + let line = lineIter.next(); while ( line.endsWith(' \\') ) { if ( lineIter.peek(4) !== ' ' ) { break; } line = line.slice(0, -2).trim() + lineIter.next().trim(); } - // Strip comments - const c = line.charAt(0); - if ( c === '!' || c === '[' ) { continue; } + parser.analyze(line); - // Parse or skip cosmetic filters - // All cosmetic filters are caught here - if ( staticExtFilteringEngine.compile(line, writer) ) { continue; } + if ( parser.shouldIgnore() ) { continue; } - // Whatever else is next can be assumed to not be a cosmetic filter - - // Most comments start in first column - if ( c === '#' ) { continue; } - - // Catch comments somewhere on the line - // Remove: - // ... #blah blah blah - // ... # blah blah blah - // Don't remove: - // ...#blah blah blah - // because some ABP filters uses the `#` character (URL fragment) - const pos = line.indexOf('#'); - if ( pos !== -1 && reIsWhitespaceChar.test(line.charAt(pos - 1)) ) { - line = line.slice(0, pos).trim(); + if ( parser.category === parser.CATStaticExtFilter ) { + staticExtFilteringEngine.compile(parser, writer); + continue; } - // https://github.com/gorhill/httpswitchboard/issues/15 - // Ensure localhost et al. don't end up in the ubiquitous blacklist. - // With hosts files, we need to remove local IP redirection - if ( reMaybeLocalIp.test(c) ) { - // Ignore hosts file redirect configuration - // 127.0.0.1 localhost - // 255.255.255.255 broadcasthost - if ( reIsLocalhostRedirect.test(line) ) { continue; } - line = line.replace(reLocalIp, '').trim(); + if ( parser.category !== parser.CATStaticNetFilter ) { continue; } + + // https://github.com/gorhill/uBlock/issues/2599 + // convert hostname to punycode if needed + if ( parser.patternHasUnicode() ) { + parser.toPunycode(); } - - if ( line.length === 0 ) { continue; } - - staticNetFilteringEngine.compile(line, writer); + staticNetFilteringEngine.compile(parser, writer); } return writer.toString();