mirror of
https://github.com/gorhill/uBlock.git
synced 2024-11-07 03:12:33 +01:00
fix #3507
This commit is contained in:
parent
c3f9f6b716
commit
21f7d42f54
@ -579,6 +579,19 @@ var filterDocument = (function() {
|
|||||||
domParser, xmlSerializer,
|
domParser, xmlSerializer,
|
||||||
utf8TextDecoder, textDecoder, textEncoder;
|
utf8TextDecoder, textDecoder, textEncoder;
|
||||||
|
|
||||||
|
var textDecode = function(encoding, buffer) {
|
||||||
|
if (
|
||||||
|
textDecoder !== undefined &&
|
||||||
|
textDecoder.encoding !== encoding
|
||||||
|
) {
|
||||||
|
textDecoder = undefined;
|
||||||
|
}
|
||||||
|
if ( textDecoder === undefined ) {
|
||||||
|
textDecoder = new TextDecoder(encoding);
|
||||||
|
}
|
||||||
|
return textDecoder.decode(buffer);
|
||||||
|
};
|
||||||
|
|
||||||
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i,
|
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i,
|
||||||
reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
|
reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
|
||||||
|
|
||||||
@ -740,36 +753,45 @@ var filterDocument = (function() {
|
|||||||
var doc;
|
var doc;
|
||||||
|
|
||||||
// If stream encoding is still unknnown, try to extract from document.
|
// If stream encoding is still unknnown, try to extract from document.
|
||||||
if ( filterer.charset === undefined ) {
|
var charsetFound = filterer.charset,
|
||||||
|
charsetUsed = charsetFound;
|
||||||
|
if ( charsetFound === undefined ) {
|
||||||
if ( utf8TextDecoder === undefined ) {
|
if ( utf8TextDecoder === undefined ) {
|
||||||
utf8TextDecoder = new TextDecoder();
|
utf8TextDecoder = new TextDecoder();
|
||||||
}
|
}
|
||||||
doc = domParser.parseFromString(
|
doc = domParser.parseFromString(
|
||||||
utf8TextDecoder.decode(filterer.buffer.slice(0, 4096)),
|
utf8TextDecoder.decode(filterer.buffer.slice(0, 1024)),
|
||||||
'text/html'
|
'text/html'
|
||||||
);
|
);
|
||||||
filterer.charset = µb.textEncode.normalizeCharset(charsetFromDoc(doc));
|
charsetFound = charsetFromDoc(doc);
|
||||||
if ( filterer.charset === undefined ) {
|
charsetUsed = µb.textEncode.normalizeCharset(charsetFound);
|
||||||
streamClose(filterer);
|
if ( charsetUsed === undefined ) {
|
||||||
return;
|
return streamClose(filterer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
|
||||||
textDecoder !== undefined &&
|
|
||||||
textDecoder.encoding !== filterer.charset
|
|
||||||
) {
|
|
||||||
textDecoder = undefined;
|
|
||||||
}
|
|
||||||
if ( textDecoder === undefined ) {
|
|
||||||
textDecoder = new TextDecoder(filterer.charset);
|
|
||||||
}
|
|
||||||
|
|
||||||
doc = domParser.parseFromString(
|
doc = domParser.parseFromString(
|
||||||
textDecoder.decode(filterer.buffer),
|
textDecode(charsetUsed, filterer.buffer),
|
||||||
'text/html'
|
'text/html'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// https://github.com/gorhill/uBlock/issues/3507
|
||||||
|
// In case of no explicit charset found, try to find one again, but
|
||||||
|
// this time with the whole document parsed.
|
||||||
|
if ( charsetFound === undefined ) {
|
||||||
|
charsetFound = µb.textEncode.normalizeCharset(charsetFromDoc(doc));
|
||||||
|
if ( charsetFound !== charsetUsed ) {
|
||||||
|
if ( charsetFound === undefined ) {
|
||||||
|
return streamClose(filterer);
|
||||||
|
}
|
||||||
|
charsetUsed = charsetFound;
|
||||||
|
doc = domParser.parseFromString(
|
||||||
|
textDecode(charsetFound, filterer.buffer),
|
||||||
|
'text/html'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var modified = false;
|
var modified = false;
|
||||||
if ( filterer.selectors !== undefined ) {
|
if ( filterer.selectors !== undefined ) {
|
||||||
if ( µb.htmlFilteringEngine.apply(doc, filterer) ) {
|
if ( µb.htmlFilteringEngine.apply(doc, filterer) ) {
|
||||||
@ -783,8 +805,7 @@ var filterDocument = (function() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( modified === false ) {
|
if ( modified === false ) {
|
||||||
streamClose(filterer);
|
return streamClose(filterer);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
|
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
|
||||||
@ -797,9 +818,9 @@ var filterDocument = (function() {
|
|||||||
doctypeStr +
|
doctypeStr +
|
||||||
doc.documentElement.outerHTML
|
doc.documentElement.outerHTML
|
||||||
);
|
);
|
||||||
if ( filterer.charset !== 'utf-8' ) {
|
if ( charsetUsed !== 'utf-8' ) {
|
||||||
encodedStream = µb.textEncode.encode(
|
encodedStream = µb.textEncode.encode(
|
||||||
filterer.charset,
|
charsetUsed,
|
||||||
encodedStream
|
encodedStream
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user