1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-15 07:22:28 +02:00
This commit is contained in:
Raymond Hill 2018-02-18 07:16:10 -05:00
parent c3f9f6b716
commit 21f7d42f54
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2

View File

@ -579,6 +579,19 @@ var filterDocument = (function() {
domParser, xmlSerializer, domParser, xmlSerializer,
utf8TextDecoder, textDecoder, textEncoder; utf8TextDecoder, textDecoder, textEncoder;
var textDecode = function(encoding, buffer) {
if (
textDecoder !== undefined &&
textDecoder.encoding !== encoding
) {
textDecoder = undefined;
}
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder(encoding);
}
return textDecoder.decode(buffer);
};
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i, var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i,
reContentTypeCharset = /charset=['"]?([^'" ]+)/i; reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
@ -740,36 +753,45 @@ var filterDocument = (function() {
var doc; var doc;
// If stream encoding is still unknnown, try to extract from document. // If stream encoding is still unknnown, try to extract from document.
if ( filterer.charset === undefined ) { var charsetFound = filterer.charset,
charsetUsed = charsetFound;
if ( charsetFound === undefined ) {
if ( utf8TextDecoder === undefined ) { if ( utf8TextDecoder === undefined ) {
utf8TextDecoder = new TextDecoder(); utf8TextDecoder = new TextDecoder();
} }
doc = domParser.parseFromString( doc = domParser.parseFromString(
utf8TextDecoder.decode(filterer.buffer.slice(0, 4096)), utf8TextDecoder.decode(filterer.buffer.slice(0, 1024)),
'text/html' 'text/html'
); );
filterer.charset = µb.textEncode.normalizeCharset(charsetFromDoc(doc)); charsetFound = charsetFromDoc(doc);
if ( filterer.charset === undefined ) { charsetUsed = µb.textEncode.normalizeCharset(charsetFound);
streamClose(filterer); if ( charsetUsed === undefined ) {
return; return streamClose(filterer);
} }
} }
if (
textDecoder !== undefined &&
textDecoder.encoding !== filterer.charset
) {
textDecoder = undefined;
}
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder(filterer.charset);
}
doc = domParser.parseFromString( doc = domParser.parseFromString(
textDecoder.decode(filterer.buffer), textDecode(charsetUsed, filterer.buffer),
'text/html' 'text/html'
); );
// https://github.com/gorhill/uBlock/issues/3507
// In case of no explicit charset found, try to find one again, but
// this time with the whole document parsed.
if ( charsetFound === undefined ) {
charsetFound = µb.textEncode.normalizeCharset(charsetFromDoc(doc));
if ( charsetFound !== charsetUsed ) {
if ( charsetFound === undefined ) {
return streamClose(filterer);
}
charsetUsed = charsetFound;
doc = domParser.parseFromString(
textDecode(charsetFound, filterer.buffer),
'text/html'
);
}
}
var modified = false; var modified = false;
if ( filterer.selectors !== undefined ) { if ( filterer.selectors !== undefined ) {
if ( µb.htmlFilteringEngine.apply(doc, filterer) ) { if ( µb.htmlFilteringEngine.apply(doc, filterer) ) {
@ -783,8 +805,7 @@ var filterDocument = (function() {
} }
if ( modified === false ) { if ( modified === false ) {
streamClose(filterer); return streamClose(filterer);
return;
} }
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353 // https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
@ -797,9 +818,9 @@ var filterDocument = (function() {
doctypeStr + doctypeStr +
doc.documentElement.outerHTML doc.documentElement.outerHTML
); );
if ( filterer.charset !== 'utf-8' ) { if ( charsetUsed !== 'utf-8' ) {
encodedStream = µb.textEncode.encode( encodedStream = µb.textEncode.encode(
filterer.charset, charsetUsed,
encodedStream encodedStream
); );
} }