1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-07 03:12:33 +01:00

Improve validation of hostname in domain= and denyallow options

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/1249

For "exotic" hostname values, the browser's own API will be
used to ultimately validate hostname values.
This commit is contained in:
Raymond Hill 2020-09-18 10:23:02 -04:00
parent 7e906b33c5
commit 3f299ef623
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 102 additions and 97 deletions

View File

@ -108,6 +108,10 @@ const Parser = class {
this.reHostsSource = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]+$/; this.reHostsSource = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]+$/;
this.reUnicodeChar = /[^\x00-\x7F]/; this.reUnicodeChar = /[^\x00-\x7F]/;
this.reUnicodeChars = /[^\x00-\x7F]/g; this.reUnicodeChars = /[^\x00-\x7F]/g;
this.reHostnameLabel = /[^.]+/g;
this.rePlainHostname = /^(?:[\w-]+\.)*[a-z]+$/;
this.rePlainEntity = /^(?:[\w-]+\.)+\*$/;
this.reEntity = /^[^*]+\.\*$/;
this.punycoder = new URL(self.location); this.punycoder = new URL(self.location);
this.selectorCompiler = new this.SelectorCompiler(this); this.selectorCompiler = new this.SelectorCompiler(this);
// TODO: reuse for network filtering analysis // TODO: reuse for network filtering analysis
@ -313,7 +317,7 @@ const Parser = class {
analyzeExtExtra() { analyzeExtExtra() {
if ( this.hasOptions() ) { if ( this.hasOptions() ) {
const { i, len } = this.optionsSpan; const { i, len } = this.optionsSpan;
this.analyzeDomainList(i, i + len, BITComma, 0b11); this.analyzeDomainList(i, i + len, BITComma, 0b1110);
} }
if ( hasBits(this.flavorBits, BITFlavorUnsupported) ) { if ( hasBits(this.flavorBits, BITFlavorUnsupported) ) {
this.markSpan(this.patternSpan, BITError); this.markSpan(this.patternSpan, BITError);
@ -668,66 +672,62 @@ const Parser = class {
} }
} }
// bits: analyzeDomain(from, to, modeBits) {
// 0: can use entity-based hostnames if ( to === from ) { return false; }
// 1: can use single wildcard return this.normalizeHostnameValue(
analyzeDomain(from, to, optionBits) { this.strFromSlices(from, to - 3),
const { slices } = this; modeBits
let len = to - from; ) !== undefined;
if ( len === 0 ) { return false; } }
const not = hasBits(slices[from], BITTilde);
if ( not ) { // Ultimately, let the browser API do the hostname normalization, after
if ( (optionBits & 0b01) === 0 || slices[from+2] > 1 ) { return false; } // making some other trivial checks.
from += 3; //
len -= 3; // modeBits:
// 0: can use wildcard at any position
// 1: can use entity-based hostnames
// 2: can use single wildcard
// 3: can be negated
normalizeHostnameValue(s, modeBits = 0b0000) {
const not = s.charCodeAt(0) === 0x7E /* '~' */;
if ( not && (modeBits & 0b1000) === 0 ) { return; }
let hn = not === false ? s : s.slice(1);
if ( this.rePlainHostname.test(hn) ) { return s; }
const hasWildcard = hn.lastIndexOf('*') !== -1;
if ( hasWildcard ) {
if ( modeBits === 0 ) { return; }
if ( hn.length === 1 ) {
if ( not || (modeBits & 0b0100) === 0 ) { return; }
return s;
}
if ( (modeBits & 0b0010) !== 0 ) {
if ( this.rePlainEntity.test(hn) ) { return s; }
if ( this.reEntity.test(hn) === false ) { return; }
} else if ( (modeBits & 0b0001) === 0 ) {
return;
}
hn = hn.replace(/\*/g, '__asterisk__');
}
this.punycoder.hostname = '_';
try {
this.punycoder.hostname = hn;
hn = this.punycoder.hostname;
} catch (_) {
return;
}
if ( hn === '_' || hn === '' ) { return; }
if ( hasWildcard ) {
hn = this.punycoder.hostname.replace(/__asterisk__/g, '*');
} }
if ( len === 0 ) { return false; }
// One slice only, check for single asterisk
if ( if (
len === 3 && (modeBits & 0b0001) === 0 && (
not === false && hn.charCodeAt(0) === 0x2E /* '.' */ ||
(optionBits & 0b10) !== 0 && hn.charCodeAt(hn.length - 1) === 0x2E /* '.' */
hasBits(slices[from], BITAsterisk) )
) { ) {
return slices[from+2] === 1; return;
} }
// First slice must be regex-equivalent of `\w` return not ? '~' + hn : hn;
if ( hasNoBits(slices[from], BITRegexWord | BITUnicode) ) { return false; }
// Last slice
if ( len > 3 ) {
const last = to - 3;
if ( hasBits(slices[last], BITAsterisk) ) {
if (
(optionBits & 0b01) === 0 ||
len < 9 ||
slices[last+2] > 1 ||
hasNoBits(slices[last-3], BITPeriod)
) {
return false;
}
} else if ( hasNoBits(slices[to-3], BITAlphaNum | BITUnicode) ) {
return false;
}
}
// Middle slices
if ( len > 6 ) {
for ( let i = from + 3; i < to - 3; i += 3 ) {
const bits = slices[i];
if ( hasNoBits(bits, BITHostname) ) { return false; }
if ( hasBits(bits, BITPeriod) && slices[i+2] > 1 ) {
return false;
}
if (
hasBits(bits, BITDash) && (
hasNoBits(slices[i-3], BITRegexWord | BITUnicode) ||
hasNoBits(slices[i+3], BITRegexWord | BITUnicode)
)
) {
return false;
}
}
}
return true;
} }
slice(raw) { slice(raw) {
@ -1081,6 +1081,8 @@ const Parser = class {
// Be ready to deal with non-punycode-able Unicode characters. // Be ready to deal with non-punycode-able Unicode characters.
// https://github.com/uBlockOrigin/uBlock-issues/issues/772 // https://github.com/uBlockOrigin/uBlock-issues/issues/772
// Encode Unicode characters beyond the hostname part. // Encode Unicode characters beyond the hostname part.
// Prepend with '*' character to prevent the browser API from refusing to
// punycode -- this occurs when the extracted label starts with a dash.
toASCII(dryrun = false) { toASCII(dryrun = false) {
if ( this.patternHasUnicode() === false ) { return true; } if ( this.patternHasUnicode() === false ) { return true; }
const { i, len } = this.patternSpan; const { i, len } = this.patternSpan;
@ -1090,16 +1092,14 @@ const Parser = class {
// Punycode hostname part of the pattern. // Punycode hostname part of the pattern.
if ( patternIsRegex === false ) { if ( patternIsRegex === false ) {
const match = this.reHostname.exec(pattern); const match = this.reHostname.exec(pattern);
if ( match === null ) { return true; } if ( match !== null ) {
try { const hn = match[0].replace(this.reHostnameLabel, s => {
this.punycoder.hostname = match[0].replace(/\*/g, '__asterisk__'); if ( this.reUnicodeChar.test(s) === false ) { return s; }
} catch(ex) { if ( s.charCodeAt(0) === 0x2D /* '-' */ ) { s = '*' + s; }
return false; return this.normalizeHostnameValue(s, 0b0001) || s;
});
pattern = hn + pattern.slice(match.index + match[0].length);
} }
const hn = this.punycoder.hostname;
if ( hn === '' ) { return false; }
const punycoded = hn.replace(/__asterisk__/g, '*');
pattern = punycoded + pattern.slice(match.index + match[0].length);
} }
// Percent-encode remaining Unicode characters. // Percent-encode remaining Unicode characters.
if ( this.reUnicodeChar.test(pattern) ) { if ( this.reUnicodeChar.test(pattern) ) {
@ -1755,7 +1755,6 @@ const BITError = 1 << 31;
const BITAll = 0xFFFFFFFF; const BITAll = 0xFFFFFFFF;
const BITAlphaNum = BITNum | BITAlpha; const BITAlphaNum = BITNum | BITAlpha;
const BITRegexWord = BITAlphaNum | BITUnderscore;
const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode; const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode;
const BITPatternToken = BITNum | BITAlpha | BITPercent; const BITPatternToken = BITNum | BITAlpha | BITPercent;
const BITLineComment = BITExclamation | BITHash | BITSquareBracket; const BITLineComment = BITExclamation | BITHash | BITSquareBracket;
@ -2226,7 +2225,7 @@ const NetOptionsIterator = class {
if ( this.interactive && hasBits(descriptor, OPTDomainList) ) { if ( this.interactive && hasBits(descriptor, OPTDomainList) ) {
this.parser.analyzeDomainList( this.parser.analyzeDomainList(
lval + 3, i, BITPipe, lval + 3, i, BITPipe,
(descriptor & 0xFF) === OPTTokenDomain ? 0b01 : 0b00 (descriptor & 0xFF) === OPTTokenDomain ? 0b1010 : 0b0000
); );
} }
} else { } else {

View File

@ -20,7 +20,6 @@
*/ */
/* jshint bitwise: false */ /* jshint bitwise: false */
/* global punycode */
'use strict'; 'use strict';
@ -1120,12 +1119,12 @@ const filterOrigin = (( ) => {
this.trieContainer = new µb.HNTrieContainer(); this.trieContainer = new µb.HNTrieContainer();
} }
compile(domainOpt, prepend, units) { compile(domainOptList, prepend, units) {
const hostnameHits = []; const hostnameHits = [];
const hostnameMisses = []; const hostnameMisses = [];
const entityHits = []; const entityHits = [];
const entityMisses = []; const entityMisses = [];
for ( const s of FilterParser.domainOptIterator(domainOpt) ) { for ( const s of domainOptList ) {
const len = s.length; const len = s.length;
const beg = len > 1 && s.charCodeAt(0) === 0x7E ? 1 : 0; const beg = len > 1 && s.charCodeAt(0) === 0x7E ? 1 : 0;
const end = len > 2 && const end = len > 2 &&
@ -1770,7 +1769,7 @@ const FilterDenyAllow = class {
} }
static compile(details) { static compile(details) {
return [ FilterDenyAllow.fid, details.denyallow ]; return [ FilterDenyAllow.fid, details.denyallowOpt ];
} }
static fromCompiled(args) { static fromCompiled(args) {
@ -2074,17 +2073,15 @@ const FILTER_SEQUENCES_MIN = filterSequenceWritePtr;
const FilterParser = class { const FilterParser = class {
constructor(parser) { constructor(parser) {
this.cantWebsocket = vAPI.cantWebsocket; this.cantWebsocket = vAPI.cantWebsocket;
this.domainOpt = '';
this.noTokenHash = urlTokenizer.noTokenHash; this.noTokenHash = urlTokenizer.noTokenHash;
this.reBadDomainOptChars = /[+?^${}()[\]\\]/;
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/; this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
this.reHasUnicode = /[^\x00-\x7F]/;
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/; this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
this.reRegexToken = /[%0-9A-Za-z]{2,}/g; this.reRegexToken = /[%0-9A-Za-z]{2,}/g;
this.reRegexTokenAbort = /[([]/; this.reRegexTokenAbort = /[([]/;
this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/; this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/;
this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/; this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/;
this.reGoodToken = /[%0-9a-z]{1,}/g; this.reGoodToken = /[%0-9a-z]{1,}/g;
this.domainOptList = [];
this.tokenIdToNormalizedType = new Map([ this.tokenIdToNormalizedType = new Map([
[ parser.OPTTokenCname, bitFromType('cname') ], [ parser.OPTTokenCname, bitFromType('cname') ],
[ parser.OPTTokenCss, bitFromType('stylesheet') ], [ parser.OPTTokenCss, bitFromType('stylesheet') ],
@ -2237,7 +2234,7 @@ const FilterParser = class {
this.thirdParty = false; this.thirdParty = false;
this.party = AnyParty; this.party = AnyParty;
this.domainOpt = ''; this.domainOpt = '';
this.denyallow = ''; this.denyallowOpt = '';
this.isPureHostname = false; this.isPureHostname = false;
this.isRegex = false; this.isRegex = false;
this.redirect = 0; this.redirect = 0;
@ -2291,20 +2288,24 @@ const FilterParser = class {
} }
} }
parseHostnameList(parser, s) { parseHostnameList(parser, s, modeBits, out = []) {
if ( parser.optionHasUnicode() ) { let beg = 0;
const hostnames = s.split('|'); let slen = s.length;
let i = hostnames.length; let i = 0;
while ( i-- ) { while ( beg < slen ) {
if ( this.reHasUnicode.test(hostnames[i]) ) { let end = s.indexOf('|', beg);
hostnames[i] = punycode.toASCII(hostnames[i]); if ( end === -1 ) { end = slen; }
} const hn = parser.normalizeHostnameValue(
s.slice(beg, end),
modeBits
);
if ( hn !== undefined ) {
out[i] = hn; i += 1;
} }
s = hostnames.join('|'); beg = end + 1;
} }
// TODO: revisit out.length = i;
if ( this.reBadDomainOptChars.test(s) ) { return ''; } return i === 1 ? out[0] : out.join('|');
return s;
} }
parseOptions(parser) { parseOptions(parser) {
@ -2337,12 +2338,17 @@ const FilterParser = class {
// Detect and discard filter if domain option contains nonsensical // Detect and discard filter if domain option contains nonsensical
// characters. // characters.
case parser.OPTTokenDomain: case parser.OPTTokenDomain:
this.domainOpt = this.parseHostnameList(parser, val); this.domainOpt = this.parseHostnameList(
parser,
val,
0b1010,
this.domainOptList
);
if ( this.domainOpt === '' ) { return false; } if ( this.domainOpt === '' ) { return false; }
break; break;
case parser.OPTTokenDenyAllow: case parser.OPTTokenDenyAllow:
this.denyallow = this.parseHostnameList(parser, val); this.denyallowOpt = this.parseHostnameList(parser, val, 0b0000);
if ( this.denyallow === '' ) { return false; } if ( this.denyallowOpt === '' ) { return false; }
break; break;
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/ // https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
// Add support for `elemhide`. Rarely used but it happens. // Add support for `elemhide`. Rarely used but it happens.
@ -2559,7 +2565,7 @@ const FilterParser = class {
isJustOrigin() { isJustOrigin() {
return this.isRegex === false && return this.isRegex === false &&
this.dataType === undefined && this.dataType === undefined &&
this.denyallow === '' && this.denyallowOpt === '' &&
this.domainOpt !== '' && ( this.domainOpt !== '' && (
this.pattern === '*' || ( this.pattern === '*' || (
this.anchor === 0b010 && this.anchor === 0b010 &&
@ -2961,7 +2967,7 @@ FilterContainer.prototype.compile = function(parser, writer) {
if ( if (
parsed.isPureHostname && parsed.isPureHostname &&
parsed.domainOpt === '' && parsed.domainOpt === '' &&
parsed.denyallow === '' && parsed.denyallowOpt === '' &&
parsed.dataType === undefined parsed.dataType === undefined
) { ) {
parsed.tokenHash = this.dotTokenHash; parsed.tokenHash = this.dotTokenHash;
@ -2990,7 +2996,7 @@ FilterContainer.prototype.compile = function(parser, writer) {
parsed.tokenHash = this.anyHTTPTokenHash; parsed.tokenHash = this.anyHTTPTokenHash;
} }
const entities = []; const entities = [];
for ( const hn of FilterParser.domainOptIterator(parsed.domainOpt) ) { for ( const hn of parsed.domainOptList ) {
if ( parsed.domainIsEntity(hn) === false ) { if ( parsed.domainIsEntity(hn) === false ) {
this.compileToAtomicFilter(parsed, hn, writer); this.compileToAtomicFilter(parsed, hn, writer);
} else { } else {
@ -3004,7 +3010,7 @@ FilterContainer.prototype.compile = function(parser, writer) {
const units = []; const units = [];
filterPattern.compile(parsed, units); filterPattern.compile(parsed, units);
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); } if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
filterOrigin.compile(entity, true, units); filterOrigin.compile([ entity ], true, units);
this.compileToAtomicFilter( this.compileToAtomicFilter(
parsed, FilterCompositeAll.compile(units), writer parsed, FilterCompositeAll.compile(units), writer
); );
@ -3034,14 +3040,14 @@ FilterContainer.prototype.compile = function(parser, writer) {
// Origin // Origin
if ( parsed.domainOpt !== '' ) { if ( parsed.domainOpt !== '' ) {
filterOrigin.compile( filterOrigin.compile(
parsed.domainOpt, parsed.domainOptList,
units.length !== 0 && filterClasses[units[0][0]].isSlow === true, units.length !== 0 && filterClasses[units[0][0]].isSlow === true,
units units
); );
} }
// Deny-allow // Deny-allow
if ( parsed.denyallow !== '' ) { if ( parsed.denyallowOpt !== '' ) {
units.push(FilterDenyAllow.compile(parsed)); units.push(FilterDenyAllow.compile(parsed));
} }