From c6397e3d3006153b9fed7358aa28f9eef94b44f8 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sun, 28 Jun 2020 08:28:29 -0400 Subject: [PATCH] Fix handling of non-punycodable Unicode characters Related feedback: - https://github.com/uBlockOrigin/uBlock-issues/issues/1118#issuecomment-650730158 --- src/js/static-filtering-parser.js | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index 0c4c7027d..3be3cde4b 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -628,7 +628,12 @@ const Parser = class { catch (ex) { this.markSpan(this.patternSpan, BITError); } - } else if ( this.patternIsDubious() ) { + } else if ( + this.patternIsDubious() || ( + this.patternHasUnicode() && + this.toPunycode(true) === false + ) + ) { this.markSpan(this.patternSpan, BITError); } this.netOptionsIterator.init(); @@ -1040,19 +1045,24 @@ const Parser = class { return this.raw; } - toPunycode() { + // https://github.com/uBlockOrigin/uBlock-issues/issues/1118#issuecomment-650730158 + // Be ready to deal with non-punycode-able Unicode characters. + toPunycode(dryrun = false) { if ( this.patternHasUnicode() === false ) { return true; } const { i, len } = this.patternSpan; if ( len === 0 ) { return true; } let pattern = this.getNetPattern(); const match = this.reHostname.exec(this.pattern); - if ( match === null ) { return; } + if ( match === null ) { return true; } try { this.punycoder.hostname = match[0].replace(/\*/g, '__asterisk__'); } catch(ex) { return false; } - const punycoded = this.punycoder.hostname.replace(/__asterisk__/g, '*'); + const hn = this.punycoder.hostname; + if ( hn === '' ) { return false; } + if ( dryrun ) { return true; } + const punycoded = hn.replace(/__asterisk__/g, '*'); pattern = punycoded + this.pattern.slice(match.index + match[0].length); const beg = this.slices[i+1]; const end = this.slices[i+len+1];