diff --git a/platform/webext/vapi-webrequest.js b/platform/webext/vapi-webrequest.js index 2caeb33e7..c2551ffbc 100644 --- a/platform/webext/vapi-webrequest.js +++ b/platform/webext/vapi-webrequest.js @@ -29,7 +29,11 @@ vAPI.net = { onBeforeRequest: {}, onBeforeMaybeSpuriousCSPReport: {}, onHeadersReceived: {}, - nativeCSPReportFiltering: true + nativeCSPReportFiltering: true, + webRequest: browser.webRequest, + canFilterResponseBody: + typeof browser.webRequest === 'object' && + typeof browser.webRequest.filterResponseData === 'function' }; /******************************************************************************/ diff --git a/src/background.html b/src/background.html index d3ccd0499..168e9ae20 100644 --- a/src/background.html +++ b/src/background.html @@ -22,7 +22,10 @@ + + + diff --git a/src/js/assets.js b/src/js/assets.js index 659f83b09..845ef68b4 100644 --- a/src/js/assets.js +++ b/src/js/assets.js @@ -53,11 +53,10 @@ api.removeObserver = function(observer) { }; var fireNotification = function(topic, details) { - var result; + var result, r; for ( var i = 0; i < observers.length; i++ ) { - if ( observers[i](topic, details) === false ) { - result = false; - } + r = observers[i](topic, details); + if ( r !== undefined ) { result = r; } } return result; }; @@ -955,7 +954,7 @@ var updateNext = function() { fireNotification( 'before-asset-updated', { assetKey: assetKey, type: assetEntry.content } - ) !== false + ) === true ) { return assetKey; } diff --git a/src/js/background.js b/src/js/background.js index c1a85de06..3d9a33018 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -95,6 +95,7 @@ var µBlock = (function() { // jshint ignore:line // Features detection. privacySettingsSupported: vAPI.browserSettings instanceof Object, cloudStorageSupported: vAPI.cloud instanceof Object, + canFilterResponseBody: vAPI.net.canFilterResponseBody === true, // https://github.com/chrisaljoudi/uBlock/issues/180 // Whitelist directives need to be loaded once the PSL is available @@ -120,8 +121,8 @@ var µBlock = (function() { // jshint ignore:line // read-only systemSettings: { - compiledMagic: 'vrgorlgelgws', - selfieMagic: 'pxpclstriajk' + compiledMagic: 'puuijtkfpspv', + selfieMagic: 'puuijtkfpspv' }, restoreBackupSettings: { diff --git a/src/js/contentscript.js b/src/js/contentscript.js index a231524af..1ba69ad5c 100644 --- a/src/js/contentscript.js +++ b/src/js/contentscript.js @@ -1379,20 +1379,9 @@ vAPI.domSurveyor = (function() { // Library of resources is located at: // https://github.com/gorhill/uBlock/blob/master/assets/ublock/resources.txt - if ( cfeDetails.scripts ) { - // Have the injected script tag remove itself when execution completes: - // to keep DOM as clean as possible. - var text = cfeDetails.scripts + - "\n" + - "(function() {\n" + - " var c = document.currentScript,\n" + - " p = c && c.parentNode;\n" + - " if ( p ) {\n" + - " p.removeChild(c);\n" + - " }\n" + - "})();"; - vAPI.injectScriptlet(document, text); - vAPI.injectedScripts = text; + if ( response.scriptlets ) { + vAPI.injectScriptlet(document, response.scriptlets); + vAPI.injectedScripts = response.scriptlets; } if ( vAPI.domSurveyor instanceof Object ) { @@ -1414,13 +1403,11 @@ vAPI.domSurveyor = (function() { }; // This starts bootstrap process. - var url = window.location.href; vAPI.messaging.send( 'contentscript', { what: 'retrieveContentScriptParameters', - pageURL: url, - locationURL: url, + url: window.location.href, isRootFrame: window === window.top }, bootstrapPhase1 diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 0e21e8c46..2561b8c37 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -19,9 +19,6 @@ Home: https://github.com/gorhill/uBlock */ -/* jshint bitwise: false */ -/* global punycode */ - 'use strict'; /******************************************************************************/ @@ -31,56 +28,6 @@ /******************************************************************************/ var µb = µBlock; - -/******************************************************************************/ - -var isValidCSSSelector = (function() { - var div = document.createElement('div'), - matchesFn; - // Keep in mind: - // https://github.com/gorhill/uBlock/issues/693 - // https://github.com/gorhill/uBlock/issues/1955 - if ( div.matches instanceof Function ) { - matchesFn = div.matches.bind(div); - } else if ( div.mozMatchesSelector instanceof Function ) { - matchesFn = div.mozMatchesSelector.bind(div); - } else if ( div.webkitMatchesSelector instanceof Function ) { - matchesFn = div.webkitMatchesSelector.bind(div); - } else if ( div.msMatchesSelector instanceof Function ) { - matchesFn = div.msMatchesSelector.bind(div); - } else { - matchesFn = div.querySelector.bind(div); - } - // https://github.com/gorhill/uBlock/issues/3111 - // Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817 - // is fixed. - try { - matchesFn(':scope'); - } catch (ex) { - matchesFn = div.querySelector.bind(div); - } - return function(s) { - try { - matchesFn(s + ', ' + s + ':not(#foo)'); - } catch (ex) { - return false; - } - return true; - }; -})(); - -var reIsRegexLiteral = /^\/.+\/$/; - -var isBadRegex = function(s) { - try { - void new RegExp(s); - } catch (ex) { - isBadRegex.message = ex.toString(); - return true; - } - return false; -}; - var cosmeticSurveyingMissCountMax = parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) || 15; /******************************************************************************/ @@ -222,174 +169,6 @@ registerFilterClass(FilterBucket); /******************************************************************************/ /******************************************************************************/ -var FilterParser = function() { - this.prefix = this.suffix = ''; - this.unhide = 0; - this.hostnames = []; - this.invalid = false; - this.cosmetic = true; - this.reNeedHostname = /^(?:script:contains|script:inject|.+?:-abp-contains|.+?:-abp-has|.+?:contains|.+?:has|.+?:has-text|.+?:if|.+?:if-not|.+?:matches-css(?:-before|-after)?|.*?:xpath)\(.+\)$/; -}; - -/******************************************************************************/ - -FilterParser.prototype.reset = function() { - this.raw = ''; - this.prefix = this.suffix = ''; - this.unhide = 0; - this.hostnames.length = 0; - this.invalid = false; - this.cosmetic = true; - return this; -}; - -/******************************************************************************/ - -FilterParser.prototype.parse = function(raw) { - // important! - this.reset(); - - this.raw = raw; - - // Find the bounds of the anchor. - var lpos = raw.indexOf('#'); - if ( lpos === -1 ) { - this.cosmetic = false; - return this; - } - var rpos = raw.indexOf('#', lpos + 1); - if ( rpos === -1 ) { - this.cosmetic = false; - return this; - } - - // Coarse-check that the anchor is valid. - // `##`: l = 1 - // `#@#`, `#$#`, `#%#`, `#?#`: l = 2 - // `#@$#`, `#@%#`, `#@?#`: l = 3 - if ( (rpos - lpos) > 3 ) { - this.cosmetic = false; - return this; - } - - // Find out type of cosmetic filter. - // Exception filter? - if ( raw.charCodeAt(lpos + 1) === 0x40 /* '@' */ ) { - this.unhide = 1; - } - - // https://github.com/gorhill/uBlock/issues/952 - // Find out whether we are dealing with an Adguard-specific cosmetic - // filter, and if so, translate it if supported, or discard it if not - // supported. - var cCode = raw.charCodeAt(rpos - 1); - if ( cCode !== 0x23 /* '#' */ && cCode !== 0x40 /* '@' */ ) { - // We have an Adguard/ABP cosmetic filter if and only if the character - // is `$`, `%` or `?`, otherwise it's not a cosmetic filter. - if ( - cCode !== 0x24 /* '$' */ && - cCode !== 0x25 /* '%' */ && - cCode !== 0x3F /* '?' */ - ) { - this.cosmetic = false; - return this; - } - // Adguard's scriptlet injection: not supported. - if ( cCode === 0x25 /* '%' */ ) { - this.invalid = true; - return this; - } - // Adguard's style injection: supported, but translate to uBO's format. - if ( cCode === 0x24 /* '$' */ ) { - raw = this.translateAdguardCSSInjectionFilter(raw); - if ( raw === '' ) { - this.invalid = true; - return this; - } - } - rpos = raw.indexOf('#', lpos + 1); - } - - // Extract the hostname(s). - if ( lpos !== 0 ) { - this.prefix = raw.slice(0, lpos); - } - - // Extract the selector. - this.suffix = raw.slice(rpos + 1).trim(); - if ( this.suffix.length === 0 ) { - this.cosmetic = false; - return this; - } - - // 2014-05-23: - // https://github.com/gorhill/httpswitchboard/issues/260 - // Any sequence of `#` longer than one means the line is not a valid - // cosmetic filter. - if ( this.suffix.indexOf('##') !== -1 ) { - this.cosmetic = false; - return this; - } - - // Normalize high-medium selectors: `href` is assumed to imply `a` tag. We - // need to do this here in order to correctly avoid duplicates. The test - // is designed to minimize overhead -- this is a low occurrence filter. - if ( this.suffix.startsWith('[href^="', 1) ) { - this.suffix = this.suffix.slice(1); - } - - if ( this.prefix !== '' ) { - this.hostnames = this.prefix.split(/\s*,\s*/); - } - - // For some selectors, it is mandatory to have a hostname or entity: - // ##script:contains(...) - // ##script:inject(...) - // ##.foo:-abp-contains(...) - // ##.foo:-abp-has(...) - // ##.foo:contains(...) - // ##.foo:has(...) - // ##.foo:has-text(...) - // ##.foo:if(...) - // ##.foo:if-not(...) - // ##.foo:matches-css(...) - // ##.foo:matches-css-after(...) - // ##.foo:matches-css-before(...) - // ##:xpath(...) - if ( - this.hostnames.length === 0 && - this.unhide === 0 && - this.reNeedHostname.test(this.suffix) - ) { - this.invalid = true; - return this; - } - - return this; -}; - -/******************************************************************************/ - -// Reference: https://adguard.com/en/filterrules.html#cssInjection - -FilterParser.prototype.translateAdguardCSSInjectionFilter = function(raw) { - var matches = /^([^#]*)#(@?)\$#([^{]+)\{([^}]+)\}$/.exec(raw); - if ( matches === null ) { - return ''; - } - // For now we do not allow generic CSS injections (prolly never). - if ( matches[1] === '' && matches[2] !== '@' ) { - return ''; - } - return matches[1] + - '#' + matches[2] + '#' + - matches[3].trim() + - ':style(' + matches[4].trim() + ')'; -}; - -/******************************************************************************/ -/******************************************************************************/ - var SelectorCacheEntry = function() { this.reset(); }; @@ -538,17 +317,11 @@ SelectorCacheEntry.prototype.retrieve = function(type, out) { /******************************************************************************/ /******************************************************************************/ -// Two Unicode characters: -// T0HHHHHHH HHHHHHHHH -// | | | -// | | | -// | | | -// | | +-- bit 8-0 of FNV -// | | -// | +-- bit 15-9 of FNV -// | -// +-- filter type (0=hide 1=unhide) -// +// 0000HHHHHHHHHHHH +// | +// | +// | +// +-- bit 11-0 of FNV var makeHash = function(token) { // Ref: Given a URL, returns a unique 4-character long hash string @@ -607,7 +380,6 @@ var makeHash = function(token) { var FilterContainer = function() { this.noDomainHash = '-'; - this.parser = new FilterParser(); this.reHasUnicode = /[^\x00-\x7F]/; this.rePlainSelector = /^[#.][\w\\-]+/; this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/; @@ -615,8 +387,25 @@ var FilterContainer = function() { this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g; this.reSimpleHighGeneric1 = /^[a-z]*\[[^[]+]$/; this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/; - this.reScriptSelector = /^script:(contains|inject)\((.+)\)$/; - this.punycode = punycode; + this.reNeedHostname = new RegExp([ + '^', + '(?:', + [ + 'script:contains', + '.+?:has', + '.+?:has-text', + '.+?:if', + '.+?:if-not', + '.+?:matches-css(?:-before|-after)?', + '.*?:xpath', + '.+?:-abp-contains', // ABP-specific for `:has-text` + '.+?:-abp-has', // ABP-specific for `:if` + '.+?:contains' // Adguard-specific for `:has-text` + ].join('|'), + ')', + '\\(.+\\)', + '$' + ].join('')); this.selectorCache = new Map(); this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes @@ -630,6 +419,9 @@ var FilterContainer = function() { // generic exception filters this.genericDonthideSet = new Set(); + // TODO: Think about reusing µb.staticExtFilteringEngine.HostnameBasedDB + // for both specific and procedural filters. This would require some + // refactoring. // hostname, entity-based filters this.specificFilters = new Map(); this.proceduralFilters = new Map(); @@ -664,9 +456,6 @@ var FilterContainer = function() { mru: new µb.MRUCache(16) }; - this.userScripts = new Map(); - this.userScriptCache = new µb.MRUCache(32); - // Short-lived: content is valid only during one function call. These // is to prevent repeated allocation/deallocation overheads -- the // constructors/destructors of javascript Set/Map is assumed to be costlier @@ -684,7 +473,6 @@ var FilterContainer = function() { // Reset all, thus reducing to a minimum memory footprint of the context. FilterContainer.prototype.reset = function() { - this.parser.reset(); this.µburi = µb.URI; this.frozen = false; this.acceptedCount = 0; @@ -720,12 +508,6 @@ FilterContainer.prototype.reset = function() { this.highlyGeneric.complex.dict.clear(); this.highlyGeneric.complex.str = ''; this.highlyGeneric.complex.mru.reset(); - - this.scriptTagFilters = {}; - this.scriptTagFilterCount = 0; - - this.userScripts.clear(); - this.userScriptCache.reset(); }; /******************************************************************************/ @@ -759,345 +541,9 @@ FilterContainer.prototype.freeze = function() { this.highlyGeneric.simple.str = µb.arrayFrom(this.highlyGeneric.simple.dict).join(',\n'); this.highlyGeneric.complex.str = µb.arrayFrom(this.highlyGeneric.complex.dict).join(',\n'); - this.parser.reset(); - this.compileSelector.reset(); - this.compileProceduralSelector.reset(); this.frozen = true; }; -/******************************************************************************/ - -// https://github.com/chrisaljoudi/uBlock/issues/1004 -// Detect and report invalid CSS selectors. - -// Discard new ABP's `-abp-properties` directive until it is -// implemented (if ever). Unlikely, see: -// https://github.com/gorhill/uBlock/issues/1752 - -// https://github.com/gorhill/uBlock/issues/2624 -// Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`. - -FilterContainer.prototype.compileSelector = (function() { - var reAfterBeforeSelector = /^(.+?)(::?after|::?before)$/, - reStyleSelector = /^(.+?):style\((.+?)\)$/, - reStyleBad = /url\([^)]+\)/, - reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/, - reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/, - div = document.createElement('div'); - - var normalizedExtendedSyntaxOperators = new Map([ - [ 'contains', ':has-text' ], - [ 'has', ':if' ], - [ 'matches-css', ':matches-css' ], - [ 'matches-css-after', ':matches-css-after' ], - [ 'matches-css-before', ':matches-css-before' ], - ]); - - var isValidStyleProperty = function(cssText) { - if ( reStyleBad.test(cssText) ) { return false; } - div.style.cssText = cssText; - if ( div.style.cssText === '' ) { return false; } - div.style.cssText = ''; - return true; - }; - - var entryPoint = function(raw) { - var extendedSyntax = reExtendedSyntax.test(raw); - if ( isValidCSSSelector(raw) && extendedSyntax === false ) { - return raw; - } - - // We rarely reach this point -- majority of selectors are plain - // CSS selectors. - - var matches, operator; - - // Supported Adguard/ABP advanced selector syntax: will translate into - // uBO's syntax before further processing. - // Mind unsupported advanced selector syntax, such as ABP's - // `-abp-properties`. - // Note: extended selector syntax has been deprecated in ABP, in favor - // of the procedural one (i.e. `:operator(...)`). See - // https://issues.adblockplus.org/ticket/5287 - if ( extendedSyntax ) { - while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) { - operator = normalizedExtendedSyntaxOperators.get(matches[1]); - if ( operator === undefined ) { return; } - raw = raw.slice(0, matches.index) + - operator + '(' + matches[3] + ')' + - raw.slice(matches.index + matches[0].length); - } - return this.compileSelector(raw); - } - - var selector = raw, - pseudoclass, style; - - // `:style` selector? - if ( (matches = reStyleSelector.exec(selector)) !== null ) { - selector = matches[1]; - style = matches[2]; - } - - // https://github.com/gorhill/uBlock/issues/2448 - // :after- or :before-based selector? - if ( (matches = reAfterBeforeSelector.exec(selector)) ) { - selector = matches[1]; - pseudoclass = matches[2]; - } - - if ( style !== undefined || pseudoclass !== undefined ) { - if ( isValidCSSSelector(selector) === false ) { - return; - } - if ( pseudoclass !== undefined ) { - selector += pseudoclass; - } - if ( style !== undefined ) { - if ( isValidStyleProperty(style) === false ) { return; } - return JSON.stringify({ - raw: raw, - style: [ selector, style ] - }); - } - return JSON.stringify({ - raw: raw, - pseudoclass: true - }); - } - - // `script:` filter? - if ( (matches = this.reScriptSelector.exec(raw)) !== null ) { - // :inject - if ( matches[1] === 'inject' ) { - return raw; - } - // :contains - if ( - reIsRegexLiteral.test(matches[2]) === false || - isBadRegex(matches[2].slice(1, -1)) === false - ) { - return raw; - } - } - - // Procedural selector? - var compiled; - if ( (compiled = this.compileProceduralSelector(raw)) ) { - return compiled; - } - - µb.logger.writeOne('', 'error', 'Cosmetic filtering – invalid filter: ' + raw); - }; - - entryPoint.reset = function() { - }; - - return entryPoint; -})(); - -/******************************************************************************/ - -FilterContainer.prototype.compileProceduralSelector = (function() { - var reOperatorParser = /(:(?:-abp-contains|-abp-has|contains|has|has-text|if|if-not|matches-css|matches-css-after|matches-css-before|xpath))\(.+\)$/, - reFirstParentheses = /^\(*/, - reLastParentheses = /\)*$/, - reEscapeRegex = /[.*+?^${}()|[\]\\]/g, - reNeedScope = /^\s*[+>~]/; - - var lastProceduralSelector = '', - lastProceduralSelectorCompiled, - regexToRawValue = new Map(); - - var compileCSSSelector = function(s) { - // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277 - // Prepend `:scope ` if needed. - if ( reNeedScope.test(s) ) { - s = ':scope ' + s; - } - if ( isValidCSSSelector(s) ) { - return s; - } - }; - - var compileText = function(s) { - var reText; - if ( reIsRegexLiteral.test(s) ) { - reText = s.slice(1, -1); - if ( isBadRegex(reText) ) { return; } - } else { - reText = s.replace(reEscapeRegex, '\\$&'); - regexToRawValue.set(reText, s); - } - return reText; - }; - - var compileCSSDeclaration = function(s) { - var name, value, reText, - pos = s.indexOf(':'); - if ( pos === -1 ) { return; } - name = s.slice(0, pos).trim(); - value = s.slice(pos + 1).trim(); - if ( reIsRegexLiteral.test(value) ) { - reText = value.slice(1, -1); - if ( isBadRegex(reText) ) { return; } - } else { - reText = '^' + value.replace(reEscapeRegex, '\\$&') + '$'; - regexToRawValue.set(reText, value); - } - return { name: name, value: reText }; - }; - - var compileConditionalSelector = function(s) { - // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277 - // Prepend `:scope ` if needed. - if ( reNeedScope.test(s) ) { - s = ':scope ' + s; - } - return compile(s); - }; - - var compileXpathExpression = function(s) { - var dummy; - try { - dummy = document.createExpression(s, null) instanceof XPathExpression; - } catch (e) { - return; - } - return s; - }; - - // https://github.com/gorhill/uBlock/issues/2793 - var normalizedOperators = new Map([ - [ ':-abp-contains', ':has-text' ], - [ ':-abp-has', ':if' ], - [ ':contains', ':has-text' ] - ]); - - var compileArgument = new Map([ - [ ':has', compileCSSSelector ], - [ ':has-text', compileText ], - [ ':if', compileConditionalSelector ], - [ ':if-not', compileConditionalSelector ], - [ ':matches-css', compileCSSDeclaration ], - [ ':matches-css-after', compileCSSDeclaration ], - [ ':matches-css-before', compileCSSDeclaration ], - [ ':xpath', compileXpathExpression ] - ]); - - // https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387 - // - Normalize (somewhat) the stringified version of procedural cosmetic - // filters -- this increase the likelihood of detecting duplicates given - // that uBO is able to understand syntax specific to other blockers. - // The normalized string version is what is reported in the logger, by - // design. - var decompile = function(compiled) { - var raw = [ compiled.selector ], - tasks = compiled.tasks, - value; - if ( Array.isArray(tasks) ) { - for ( var i = 0, n = tasks.length, task; i < n; i++ ) { - task = tasks[i]; - switch ( task[0] ) { - case ':has': - case ':xpath': - raw.push(task[0], '(', task[1], ')'); - break; - case ':has-text': - value = regexToRawValue.get(task[1]); - if ( value === undefined ) { - value = '/' + task[1] + '/'; - } - raw.push(task[0], '(', value, ')'); - break; - case ':matches-css': - case ':matches-css-after': - case ':matches-css-before': - value = regexToRawValue.get(task[1].value); - if ( value === undefined ) { - value = '/' + task[1].value + '/'; - } - raw.push(task[0], '(', task[1].name, ': ', value, ')'); - break; - case ':if': - case ':if-not': - raw.push(task[0], '(', decompile(task[1]), ')'); - break; - } - } - } - return raw.join(''); - }; - - var compile = function(raw) { - var matches = reOperatorParser.exec(raw); - if ( matches === null ) { - if ( isValidCSSSelector(raw) ) { return { selector: raw }; } - return; - } - var tasks = [], - firstOperand = raw.slice(0, matches.index), - currentOperator = matches[1], - selector = raw.slice(matches.index + currentOperator.length), - currentArgument = '', nextOperand, nextOperator, - depth = 0, opening, closing; - if ( firstOperand !== '' && isValidCSSSelector(firstOperand) === false ) { return; } - for (;;) { - matches = reOperatorParser.exec(selector); - if ( matches !== null ) { - nextOperand = selector.slice(0, matches.index); - nextOperator = matches[1]; - } else { - nextOperand = selector; - nextOperator = ''; - } - opening = reFirstParentheses.exec(nextOperand)[0].length; - closing = reLastParentheses.exec(nextOperand)[0].length; - if ( opening > closing ) { - if ( depth === 0 ) { currentArgument = ''; } - depth += 1; - } else if ( closing > opening && depth > 0 ) { - depth -= 1; - if ( depth === 0 ) { nextOperand = currentArgument + nextOperand; } - } - if ( depth !== 0 ) { - currentArgument += nextOperand + nextOperator; - } else { - currentOperator = normalizedOperators.get(currentOperator) || currentOperator; - currentArgument = compileArgument.get(currentOperator)(nextOperand.slice(1, -1)); - if ( currentArgument === undefined ) { return; } - tasks.push([ currentOperator, currentArgument ]); - currentOperator = nextOperator; - } - if ( nextOperator === '' ) { break; } - selector = selector.slice(matches.index + nextOperator.length); - } - if ( tasks.length === 0 || depth !== 0 ) { return; } - return { selector: firstOperand, tasks: tasks }; - }; - - var entryPoint = function(raw) { - if ( raw === lastProceduralSelector ) { - return lastProceduralSelectorCompiled; - } - lastProceduralSelector = raw; - var compiled = compile(raw); - if ( compiled !== undefined ) { - compiled.raw = decompile(compiled); - compiled = JSON.stringify(compiled); - } - lastProceduralSelectorCompiled = compiled; - return compiled; - }; - - entryPoint.reset = function() { - regexToRawValue = new Map(); - lastProceduralSelector = ''; - lastProceduralSelectorCompiled = undefined; - }; - - return entryPoint; -})(); /******************************************************************************/ @@ -1135,17 +581,12 @@ FilterContainer.prototype.keyFromSelector = function(selector) { /******************************************************************************/ -FilterContainer.prototype.compile = function(s, writer) { - var parsed = this.parser.parse(s); - if ( parsed.cosmetic === false ) { - return false; - } - if ( parsed.invalid ) { - return true; - } +FilterContainer.prototype.compile = function(parsed, writer) { + // 1000 = cosmetic filtering + writer.select(1000); - var hostnames = parsed.hostnames; - var i = hostnames.length; + var hostnames = parsed.hostnames, + i = hostnames.length; if ( i === 0 ) { this.compileGenericSelector(parsed, writer); return true; @@ -1155,9 +596,8 @@ FilterContainer.prototype.compile = function(s, writer) { // Negated hostname means the filter applies to all non-negated hostnames // of same filter OR globally if there is no non-negated hostnames. var applyGlobally = true; - var hostname; while ( i-- ) { - hostname = hostnames[i]; + var hostname = hostnames[i]; if ( hostname.startsWith('~') === false ) { applyGlobally = false; } @@ -1173,7 +613,7 @@ FilterContainer.prototype.compile = function(s, writer) { /******************************************************************************/ FilterContainer.prototype.compileGenericSelector = function(parsed, writer) { - if ( parsed.unhide === 0 ) { + if ( parsed.exception === false ) { this.compileGenericHideSelector(parsed, writer); } else { this.compileGenericUnhideSelector(parsed, writer); @@ -1183,6 +623,20 @@ FilterContainer.prototype.compileGenericSelector = function(parsed, writer) { /******************************************************************************/ FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer) { + // For some selectors, it is mandatory to have a hostname or entity: + // ##.foo:-abp-contains(...) + // ##.foo:-abp-has(...) + // ##.foo:contains(...) + // ##.foo:has(...) + // ##.foo:has-text(...) + // ##.foo:if(...) + // ##.foo:if-not(...) + // ##.foo:matches-css(...) + // ##.foo:matches-css-after(...) + // ##.foo:matches-css-before(...) + // ##:xpath(...) + if ( this.reNeedHostname.test(selector) ) { return; } + var selector = parsed.suffix, type = selector.charCodeAt(0), key; @@ -1198,7 +652,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer) return; } // Complex selector-based CSS rule. - if ( this.compileSelector(selector) !== undefined ) { + if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) { writer.push([ 1 /* lg+ */, key.slice(1), selector ]); } return; @@ -1215,13 +669,13 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer) return; } // Complex selector-based CSS rule. - if ( this.compileSelector(selector) !== undefined ) { + if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) { writer.push([ 3 /* lg+ */, key.slice(1), selector ]); } return; } - var compiled = this.compileSelector(selector); + var compiled = µb.staticExtFilteringEngine.compileSelector(selector); if ( compiled === undefined ) { return; } // TODO: Detect and error on procedural cosmetic filters. @@ -1259,18 +713,12 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer) /******************************************************************************/ -FilterContainer.prototype.compileGenericUnhideSelector = function(parsed, writer) { - var selector = parsed.suffix; - - // script:contains(...) - // script:inject(...) - if ( this.reScriptSelector.test(selector) ) { - writer.push([ 6 /* js */, '!', '', selector ]); - return; - } - +FilterContainer.prototype.compileGenericUnhideSelector = function( + parsed, + writer +) { // Procedural cosmetic filters are acceptable as generic exception filters. - var compiled = this.compileSelector(selector); + var compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix); if ( compiled === undefined ) { return; } // https://github.com/chrisaljoudi/uBlock/issues/497 @@ -1281,37 +729,24 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(parsed, writer /******************************************************************************/ -FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, writer) { +FilterContainer.prototype.compileHostnameSelector = function( + hostname, + parsed, + writer +) { // https://github.com/chrisaljoudi/uBlock/issues/145 - var unhide = parsed.unhide; + var unhide = parsed.exception ? 1 : 0; if ( hostname.startsWith('~') ) { hostname = hostname.slice(1); unhide ^= 1; } - // punycode if needed - if ( this.reHasUnicode.test(hostname) ) { - hostname = this.punycode.toASCII(hostname); - } - - var selector = parsed.suffix, - domain = this.µburi.domainFromHostname(hostname), - hash; - - // script:contains(...) - // script:inject(...) - if ( this.reScriptSelector.test(selector) ) { - hash = domain !== '' ? domain : this.noDomainHash; - if ( unhide ) { - hash = '!' + hash; - } - writer.push([ 6 /* js */, hash, hostname, selector ]); - return; - } - - var compiled = this.compileSelector(selector); + var compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix); if ( compiled === undefined ) { return; } + var domain = this.µburi.domainFromHostname(hostname), + hash; + // https://github.com/chrisaljoudi/uBlock/issues/188 // If not a real domain as per PSL, assign a synthetic one if ( hostname.endsWith('.*') === false ) { @@ -1319,7 +754,7 @@ FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, w } else { hash = makeHash(hostname); } - if ( unhide ) { + if ( unhide === 1 ) { hash = '!' + hash; } @@ -1336,23 +771,22 @@ FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, w /******************************************************************************/ -FilterContainer.prototype.fromCompiledContent = function( - reader, - skipGenericCosmetic, - skipCosmetic -) { - if ( skipCosmetic ) { +FilterContainer.prototype.fromCompiledContent = function(reader, options) { + if ( options.skipCosmetic ) { this.skipCompiledContent(reader); return; } - if ( skipGenericCosmetic ) { + if ( options.skipGenericCosmetic ) { this.skipGenericCompiledContent(reader); return; } var fingerprint, args, db, filter, bucket; - while ( reader.next() === true ) { + // 1000 = cosmetic filtering + reader.select(1000); + + while ( reader.next() ) { this.acceptedCount += 1; fingerprint = reader.fingerprint(); if ( this.duplicateBuster.has(fingerprint) ) { @@ -1410,12 +844,6 @@ FilterContainer.prototype.fromCompiledContent = function( this.highlyGeneric.complex.dict.add(args[1]); break; - // js, hash, example.com, script:contains(...) - // js, hash, example.com, script:inject(...) - case 6: - this.createScriptFilter(args); - break; - // https://github.com/chrisaljoudi/uBlock/issues/497 // Generic exception filters: expected to be a rare occurrence. // #@#.tweet @@ -1451,7 +879,10 @@ FilterContainer.prototype.fromCompiledContent = function( FilterContainer.prototype.skipGenericCompiledContent = function(reader) { var fingerprint, args, db, filter, bucket; - while ( reader.next() === true ) { + // 1000 = cosmetic filtering + reader.select(1000); + + while ( reader.next() ) { this.acceptedCount += 1; fingerprint = reader.fingerprint(); if ( this.duplicateBuster.has(fingerprint) ) { @@ -1463,13 +894,6 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) { switch ( args[0] ) { - // js, hash, example.com, script:contains(...) - // js, hash, example.com, script:inject(...) - case 6: - this.duplicateBuster.add(fingerprint); - this.createScriptFilter(args); - break; - // https://github.com/chrisaljoudi/uBlock/issues/497 // Generic exception filters: expected to be a rare occurrence. case 7: @@ -1504,268 +928,17 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) { /******************************************************************************/ FilterContainer.prototype.skipCompiledContent = function(reader) { - var fingerprint, args; + // 1000 = cosmetic filtering + reader.select(1000); - while ( reader.next() === true ) { + while ( reader.next() ) { this.acceptedCount += 1; - - args = reader.args(); - - // js, hash, example.com, script:contains(...) - // js, hash, example.com, script:inject(...) - if ( args[0] === 6 ) { - fingerprint = reader.fingerprint(); - if ( this.duplicateBuster.has(fingerprint) === false ) { - this.duplicateBuster.add(fingerprint); - this.createScriptFilter(args); - } - continue; - } - this.discardedCount += 1; } }; /******************************************************************************/ -FilterContainer.prototype.createScriptFilter = function(args) { - if ( args[3].startsWith('script:inject') ) { - return this.createUserScriptRule(args); - } - if ( args[3].startsWith('script:contains') ) { - return this.createScriptTagFilter(args); - } -}; - -/******************************************************************************/ - -// 0123456789012345678901 -// script:contains(token) -// ^ ^ -// 16 -1 - -FilterContainer.prototype.createScriptTagFilter = function(args) { - var hostname = args[2], - token = args[3].slice(16, -1); - token = token.startsWith('/') && token.endsWith('/') - ? token.slice(1, -1) - : token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - - if ( this.scriptTagFilters.hasOwnProperty(hostname) ) { - this.scriptTagFilters[hostname] += '|' + token; - } else { - this.scriptTagFilters[hostname] = token; - } - - this.scriptTagFilterCount += 1; -}; - -/******************************************************************************/ - -FilterContainer.prototype.retrieveScriptTagHostnames = function() { - return Object.keys(this.scriptTagFilters); -}; - -/******************************************************************************/ - -FilterContainer.prototype.retrieveScriptTagRegex = function(domain, hostname) { - if ( this.scriptTagFilterCount === 0 ) { - return; - } - var out = [], hn = hostname, pos; - - // Hostname-based - for (;;) { - if ( this.scriptTagFilters.hasOwnProperty(hn) ) { - out.push(this.scriptTagFilters[hn]); - } - if ( hn === domain ) { - break; - } - pos = hn.indexOf('.'); - if ( pos === -1 ) { - break; - } - hn = hn.slice(pos + 1); - } - - // Entity-based - pos = domain.indexOf('.'); - if ( pos !== -1 ) { - hn = domain.slice(0, pos) + '.*'; - if ( this.scriptTagFilters.hasOwnProperty(hn) ) { - out.push(this.scriptTagFilters[hn]); - } - } - if ( out.length !== 0 ) { - return out.join('|'); - } -}; - -/******************************************************************************/ - -// userScripts{hash} => FilterHostname | FilterBucket - -FilterContainer.prototype.createUserScriptRule = function(args) { - var hash = args[1], - filter = new FilterHostname(args[3].slice(14, -1), args[2]); - var bucket = this.userScripts.get(hash); - if ( bucket === undefined ) { - this.userScripts.set(hash, filter); - } else if ( bucket instanceof FilterBucket ) { - bucket.add(filter); - } else { - this.userScripts.set(hash, new FilterBucket(bucket, filter)); - } -}; - -/******************************************************************************/ - -// https://github.com/gorhill/uBlock/issues/1954 - -// 01234567890123456789 -// script:inject(token[, arg[, ...]]) -// ^ ^ -// 14 -1 - -FilterContainer.prototype.retrieveUserScripts = function( - domain, - hostname, - details -) { - if ( this.userScripts.size === 0 ) { return; } - if ( µb.hiddenSettings.ignoreScriptInjectFilters === true ) { return; } - - var reng = µb.redirectEngine; - if ( !reng ) { return; } - - this.mapRegister0.clear(); - - var toInject = this.mapRegister0, - pos = domain.indexOf('.'), - entity = pos !== -1 ? domain.slice(0, pos) + '.*' : ''; - - // Implicit - var hn = hostname; - for (;;) { - this._lookupUserScript(hn + '.js', reng, toInject); - if ( hn === domain ) { break; } - pos = hn.indexOf('.'); - if ( pos === -1 ) { break; } - hn = hn.slice(pos + 1); - } - if ( entity !== '' ) { - this._lookupUserScript(entity + '.js', reng, toInject); - } - - // Explicit (hash is domain). - var selectors = new Set(), - bucket; - if ( (bucket = this.userScripts.get(domain)) ) { - bucket.retrieve(hostname, selectors); - } - if ( entity !== '' && (bucket = this.userScripts.get(entity)) ) { - bucket.retrieve(entity, selectors); - } - for ( var selector of selectors ) { - this._lookupUserScript(selector, reng, toInject); - } - - if ( toInject.size === 0 ) { return; } - - // https://github.com/gorhill/uBlock/issues/2835 - // Do not inject scriptlets if the site is under an `allow` rule. - if ( - µb.userSettings.advancedUserEnabled === true && - µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2 - ) { - return; - } - - // Exceptions should be rare, so we check for exception only if there are - // scriptlets returned. - var exceptions = new Set(); - if ( (bucket = this.userScripts.get('!' + domain)) ) { - bucket.retrieve(hostname, exceptions); - } - if ( entity !== '' && (bucket = this.userScripts.get('!' + entity)) ) { - bucket.retrieve(hostname, exceptions); - } - - // Return an array of scriptlets, and log results if needed. - var out = [], - logger = µb.logger.isEnabled() ? µb.logger : null, - isException; - - for ( var entry of toInject ) { - if ( (isException = exceptions.has(entry[0])) === false ) { - out.push(entry[1]); - } - if ( logger === null ) { continue; } - logger.writeOne( - details.tabId, - 'cosmetic', - { - source: 'cosmetic', - raw: (isException ? '#@#' : '##') + 'script:inject(' + entry[0] + ')' - }, - 'dom', - details.locationURL, - null, - hostname - ); - } - - return out.join('\n'); -}; - -FilterContainer.prototype._lookupUserScript = function(raw, reng, toInject) { - if ( toInject.has(raw) ) { return; } - if ( this.userScriptCache.resetTime < reng.modifyTime ) { - this.userScriptCache.reset(); - } - var content = this.userScriptCache.lookup(raw); - if ( content === undefined ) { - var token, args, - pos = raw.indexOf(','); - if ( pos === -1 ) { - token = raw; - } else { - token = raw.slice(0, pos).trim(); - args = raw.slice(pos + 1).trim(); - } - content = reng.resourceContentFromName(token, 'application/javascript'); - if ( !content ) { return; } - if ( args ) { - content = this._fillupUserScript(content, args); - if ( !content ) { return; } - } - this.userScriptCache.add(raw, content); - } - toInject.set(raw, content); -}; - -// Fill template placeholders. Return falsy if: -// - At least one argument contains anything else than /\w/ and `.` - -FilterContainer.prototype._fillupUserScript = function(content, args) { - var i = 1, - pos, arg; - while ( args !== '' ) { - pos = args.indexOf(','); - if ( pos === -1 ) { pos = args.length; } - arg = args.slice(0, pos).trim().replace(this._reEscapeScriptArg, '\\$&'); - content = content.replace('{{' + i + '}}', arg); - args = args.slice(pos + 1).trim(); - i++; - } - return content; -}; - -FilterContainer.prototype._reEscapeScriptArg = /[\\'"]/g; - -/******************************************************************************/ - FilterContainer.prototype.toSelfie = function() { var selfieFromMap = function(map) { var selfie = []; @@ -1788,10 +961,7 @@ FilterContainer.prototype.toSelfie = function() { lowlyGenericCCL: µb.arrayFrom(this.lowlyGeneric.cl.complex), highSimpleGenericHideArray: µb.arrayFrom(this.highlyGeneric.simple.dict), highComplexGenericHideArray: µb.arrayFrom(this.highlyGeneric.complex.dict), - genericDonthideArray: µb.arrayFrom(this.genericDonthideSet), - scriptTagFilters: this.scriptTagFilters, - scriptTagFilterCount: this.scriptTagFilterCount, - userScripts: selfieFromMap(this.userScripts) + genericDonthideArray: µb.arrayFrom(this.genericDonthideSet) }; }; @@ -1823,9 +993,6 @@ FilterContainer.prototype.fromSelfie = function(selfie) { this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray); this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n'); this.genericDonthideSet = new Set(selfie.genericDonthideArray); - this.scriptTagFilters = selfie.scriptTagFilters; - this.scriptTagFilterCount = selfie.scriptTagFilterCount; - this.userScripts = mapFromSelfie(selfie.userScripts); this.frozen = true; }; @@ -2055,14 +1222,10 @@ FilterContainer.prototype.retrieveDomainSelectors = function( request, options ) { - if ( !request.locationURL ) { return; } - //console.time('cosmeticFilteringEngine.retrieveDomainSelectors'); - var hostname = this.µburi.hostnameFromURI(request.locationURL), - domain = this.µburi.domainFromHostname(hostname) || hostname, - pos = domain.indexOf('.'), - entity = pos === -1 ? '' : domain.slice(0, pos - domain.length) + '.*', + var hostname = request.hostname, + entity = request.entity, cacheEntry = this.selectorCache.get(hostname), entry; @@ -2076,8 +1239,7 @@ FilterContainer.prototype.retrieveDomainSelectors = function( var out = { ready: this.frozen, hostname: hostname, - domain: domain, - entity: entity, + domain: request.domain, declarativeFilters: [], exceptionFilters: [], hideNodeAttr: this.randomAlphaToken(), @@ -2087,12 +1249,11 @@ FilterContainer.prototype.retrieveDomainSelectors = function( injectedHideFilters: '', networkFilters: '', noDOMSurveying: this.hasGenericHide === false, - proceduralFilters: [], - scripts: undefined + proceduralFilters: [] }; if ( options.noCosmeticFiltering !== true ) { - var domainHash = makeHash(domain), + var domainHash = makeHash(request.domain), entityHash = entity !== '' ? makeHash(entity) : undefined, exception, bucket; @@ -2228,9 +1389,6 @@ FilterContainer.prototype.retrieveDomainSelectors = function( this.setRegister2.clear(); } - // Scriptlet injection. - out.scripts = this.retrieveUserScripts(domain, hostname, request); - // CSS selectors for collapsible blocked elements if ( cacheEntry ) { var networkFilters = []; diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js new file mode 100644 index 000000000..890c874d9 --- /dev/null +++ b/src/js/html-filtering.js @@ -0,0 +1,357 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2017 Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************/ + +µBlock.htmlFilteringEngine = (function() { + var api = {}; + + var µb = µBlock, + filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(), + pselectors = new Map(), + duplicates = new Set(), + docRegister, loggerRegister; + + var PSelectorHasTask = function(task) { + this.selector = task[1]; + }; + PSelectorHasTask.prototype.exec = function(input) { + var output = []; + for ( var node of input ) { + if ( node.querySelector(this.selector) !== null ) { + output.push(node); + } + } + return output; + }; + + var PSelectorHasTextTask = function(task) { + this.needle = new RegExp(task[1]); + }; + PSelectorHasTextTask.prototype.exec = function(input) { + var output = []; + for ( var node of input ) { + if ( this.needle.test(node.textContent) ) { + output.push(node); + } + } + return output; + }; + + var PSelectorIfTask = function(task) { + this.pselector = new PSelector(task[1]); + }; + PSelectorIfTask.prototype.target = true; + PSelectorIfTask.prototype.exec = function(input) { + var output = []; + for ( var node of input ) { + if ( this.pselector.test(node) === this.target ) { + output.push(node); + } + } + return output; + }; + + var PSelectorIfNotTask = function(task) { + PSelectorIfTask.call(this, task); + this.target = false; + }; + PSelectorIfNotTask.prototype = Object.create(PSelectorIfTask.prototype); + PSelectorIfNotTask.prototype.constructor = PSelectorIfNotTask; + + var PSelectorXpathTask = function(task) { + this.xpe = task[1]; + }; + PSelectorXpathTask.prototype.exec = function(input) { + var output = [], + xpe = docRegister.createExpression(this.xpe, null), + xpr = null; + for ( var node of input ) { + xpr = xpe.evaluate( + node, + XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, + xpr + ); + var j = xpr.snapshotLength; + while ( j-- ) { + node = xpr.snapshotItem(j); + if ( node.nodeType === 1 ) { + output.push(node); + } + } + } + return output; + }; + + var PSelector = function(o) { + if ( PSelector.prototype.operatorToTaskMap === undefined ) { + PSelector.prototype.operatorToTaskMap = new Map([ + [ ':has', PSelectorHasTask ], + [ ':has-text', PSelectorHasTextTask ], + [ ':if', PSelectorIfTask ], + [ ':if-not', PSelectorIfNotTask ], + [ ':xpath', PSelectorXpathTask ] + ]); + } + this.invalid = false; + this.raw = o.raw; + this.selector = o.selector; + this.tasks = []; + var tasks = o.tasks; + if ( !tasks ) { return; } + for ( var task of tasks ) { + var ctor = this.operatorToTaskMap.get(task[0]); + if ( ctor === undefined ) { + this.invalid = true; + break; + } + this.tasks.push(new ctor(task)); + } + }; + PSelector.prototype.operatorToTaskMap = undefined; + PSelector.prototype.prime = function(input) { + var root = input || docRegister; + if ( this.selector !== '' ) { + return root.querySelectorAll(this.selector); + } + return [ root ]; + }; + PSelector.prototype.exec = function(input) { + if ( this.invalid ) { return; } + var nodes = this.prime(input); + for ( var task of this.tasks ) { + if ( nodes.length === 0 ) { break; } + nodes = task.exec(nodes); + } + return nodes; + }; + + var logOne = function(details, selector) { + loggerRegister.writeOne( + details.tabId, + 'cosmetic', + { source: 'cosmetic', raw: '##^' + selector }, + 'dom', + details.url, + null, + details.hostname + ); + }; + + var applyProceduralSelector = function(details, selector) { + var pselector = pselectors.get(selector); + if ( pselector === undefined ) { + pselector = new PSelector(JSON.parse(selector)); + pselectors.set(selector, pselector); + } + var nodes = pselector.exec(), + i = nodes.length, + modified = false; + while ( i-- ) { + var node = nodes[i]; + if ( node.parentNode !== null ) { + node.parentNode.removeChild(node); + modified = true; + } + } + if ( modified && loggerRegister.isEnabled() ) { + logOne(details, pselector.raw); + } + return modified; + }; + + var applyCSSSelector = function(details, selector) { + var nodes = docRegister.querySelectorAll(selector), + i = nodes.length, + modified = false; + while ( i-- ) { + var node = nodes[i]; + if ( node.parentNode !== null ) { + node.parentNode.removeChild(node); + modified = true; + } + } + if ( modified && loggerRegister.isEnabled() ) { + logOne(details, selector); + } + return modified; + }; + + api.reset = function() { + filterDB.clear(); + pselectors.clear(); + duplicates.clear(); + }; + + api.freeze = function() { + duplicates.clear(); + }; + + api.compile = function(parsed, writer) { + var selector = parsed.suffix.slice(1).trim(), + compiled = µb.staticExtFilteringEngine.compileSelector(selector); + if ( compiled === undefined ) { return; } + + // 1002 = html filtering + writer.select(1002); + + // TODO: Mind negated hostnames, they are currently discarded. + + for ( var hostname of parsed.hostnames ) { + if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { continue; } + var domain = µb.URI.domainFromHostname(hostname); + writer.push([ + compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65, + parsed.exception ? '!' + domain : domain, + hostname, + compiled + ]); + } + }; + + api.fromCompiledContent = function(reader) { + // Don't bother loading filters if stream filtering is not supported. + //if ( µb.canFilterResponseBody === false ) { return; } + + // 1002 = html filtering + reader.select(1002); + + while ( reader.next() ) { + var fingerprint = reader.fingerprint(); + if ( duplicates.has(fingerprint) ) { continue; } + duplicates.add(fingerprint); + var args = reader.args(); + filterDB.add(args[1], { + type: args[0], + hostname: args[2], + selector: args[3] + }); + } + }; + + api.retrieve = function(request) { + var hostname = request.hostname; + + // https://github.com/gorhill/uBlock/issues/2835 + // Do not filter if the site is under an `allow` rule. + if ( + µb.userSettings.advancedUserEnabled && + µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2 + ) { + return; + } + + var out = []; + if ( request.domain !== '' ) { + filterDB.retrieve(request.domain, hostname, out); + filterDB.retrieve(request.entity, request.entity, out); + } + filterDB.retrieve('', hostname, out); + + // TODO: handle exceptions. + + if ( out.length !== 0 ) { + return out; + } + }; + + api.apply = function(doc, details) { + docRegister = doc; + loggerRegister = µb.logger; + var modified = false; + for ( var entry of details.selectors ) { + if ( entry.type === 64 ) { + if ( applyCSSSelector(details, entry.selector) ) { + modified = true; + } + } else { + if ( applyProceduralSelector(details, entry.selector) ) { + modified = true; + } + } + } + + docRegister = loggerRegister = undefined; + return modified; + }; + + api.toSelfie = function() { + return filterDB.toSelfie(); + }; + + api.fromSelfie = function(selfie) { + filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie); + pselectors.clear(); + }; + + // TODO: Following methods is useful only to legacy Firefox. This can be + // removed once support for legacy Firefox is dropped. The only care + // at this point is for the code to work, not to be efficient. + // Only `script:has-text` selectors are considered. + + api.retrieveScriptTagHostnames = function() { + var out = new Set(); + for ( var entry of filterDB ) { + if ( entry.type !== 65 ) { continue; } + var o = JSON.parse(entry.selector); + if ( + o.tasks.length === 1 && + o.tasks[0].length === 2 && + o.tasks[0][0] === ':has-text' + ) { + out.add(entry.hostname); + } + } + if ( out.size !== 0 ) { + return Array.from(out); + } + }; + + api.retrieveScriptTagRegex = function(domain, hostname) { + var entries = api.retrieve({ + hostname: hostname, + domain: domain, + entity: µb.URI.entityFromDomain(domain) + }); + if ( entries === undefined ) { return; } + var out = new Set(); + for ( var entry of entries ) { + if ( entry.type !== 65 ) { continue; } + var o = JSON.parse(entry.selector); + if ( + o.tasks.length === 1 && + o.tasks[0].length === 2 && + o.tasks[0][0] === ':has-text' + ) { + out.add(o.tasks[0][1]); + } + } + if ( out.size !== 0 ) { + return Array.from(out).join('|'); + } + }; + + return api; +})(); + +/******************************************************************************/ diff --git a/src/js/messaging.js b/src/js/messaging.js index 40ccaee61..51c91606c 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -102,7 +102,7 @@ var onMessage = function(request, sender, callback) { break; case 'compileCosmeticFilterSelector': - response = µb.cosmeticFilteringEngine.compileSelector(request.selector); + response = µb.staticExtFilteringEngine.compileSelector(request.selector); break; case 'cosmeticFiltersInjected': @@ -465,7 +465,7 @@ var onMessage = function(request, sender, callback) { var µb = µBlock, response, tabId, frameId, - pageStore; + pageStore = null; if ( sender && sender.tab ) { tabId = sender.tab.id; @@ -491,21 +491,33 @@ var onMessage = function(request, sender, callback) { break; case 'retrieveContentScriptParameters': - if ( pageStore && pageStore.getNetFilteringSwitch() ) { - response = { - collapseBlocked: µb.userSettings.collapseBlocked, - noCosmeticFiltering: pageStore.noCosmeticFiltering === true, - noGenericCosmeticFiltering: - pageStore.noGenericCosmeticFiltering === true - }; - request.tabId = tabId; - request.frameId = frameId; - response.specificCosmeticFilters = - µb.cosmeticFilteringEngine - .retrieveDomainSelectors(request, response); - if ( request.isRootFrame && µb.logger.isEnabled() ) { - µb.logCosmeticFilters(tabId); - } + if ( + pageStore === null || + pageStore.getNetFilteringSwitch() === false || + !request.url + ) { + break; + } + response = { + collapseBlocked: µb.userSettings.collapseBlocked, + noCosmeticFiltering: pageStore.noCosmeticFiltering === true, + noGenericCosmeticFiltering: + pageStore.noGenericCosmeticFiltering === true + }; + request.tabId = tabId; + request.frameId = frameId; + request.hostname = µb.URI.hostnameFromURI(request.url); + request.domain = µb.URI.domainFromHostname(request.hostname); + request.entity = µb.URI.entityFromDomain(request.domain); + response.specificCosmeticFilters = + µb.cosmeticFilteringEngine.retrieveDomainSelectors(request, response); + // If response body filtering is supported, than the scriptlets have + // already been injected. + if ( µb.canFilterResponseBody === false ) { + response.scriptlets = µb.scriptletFilteringEngine.retrieve(request); + } + if ( request.isRootFrame && µb.logger.isEnabled() ) { + µb.logCosmeticFilters(tabId); } break; diff --git a/src/js/reverselookup-worker.js b/src/js/reverselookup-worker.js index 7b2662c87..2073f9b8c 100644 --- a/src/js/reverselookup-worker.js +++ b/src/js/reverselookup-worker.js @@ -26,7 +26,26 @@ /******************************************************************************/ var listEntries = Object.create(null), - filterClassSeparator = '\n/* end of network - start of cosmetic */\n'; + reBlockStart = /^#block-start-(\d+)\n/gm; + +/******************************************************************************/ + +var extractBlocks = function(content, begId, endId) { + reBlockStart.lastIndex = 0; + var out = []; + var match = reBlockStart.exec(content); + while ( match !== null ) { + var beg = match.index + match[0].length; + var blockId = parseInt(match[1], 10); + if ( blockId >= begId && blockId < endId ) { + var end = content.indexOf('#block-end-' + match[1], beg); + out.push(content.slice(beg, end)); + reBlockStart.lastIndex = end; + } + match = reBlockStart.exec(content); + } + return out.join('\n'); +}; /******************************************************************************/ @@ -34,13 +53,11 @@ var fromNetFilter = function(details) { var lists = [], compiledFilter = details.compiledFilter, entry, content, pos, notFound; + for ( var assetKey in listEntries ) { entry = listEntries[assetKey]; if ( entry === undefined ) { continue; } - content = entry.content.slice( - 0, - entry.content.indexOf(filterClassSeparator) - ); + content = extractBlocks(entry.content, 0, 1000); pos = 0; for (;;) { pos = content.indexOf(compiledFilter, pos); @@ -96,7 +113,7 @@ var fromNetFilter = function(details) { // the various compiled versions. var fromCosmeticFilter = function(details) { - var match = /^#@?#/.exec(details.rawFilter), + var match = /^#@?#\^?/.exec(details.rawFilter), prefix = match[0], selector = details.rawFilter.slice(prefix.length); @@ -138,15 +155,14 @@ var fromCosmeticFilter = function(details) { } var response = Object.create(null), - assetKey, entry, content, found, beg, end, fargs; + assetKey, entry, content, + found, beg, end, + fargs, isProcedural; for ( assetKey in listEntries ) { entry = listEntries[assetKey]; if ( entry === undefined ) { continue; } - content = entry.content.slice( - entry.content.indexOf(filterClassSeparator) + - filterClassSeparator.length - ); + content = extractBlocks(entry.content, 1000, 2000); found = undefined; while ( (match = reNeedle.exec(content)) !== null ) { beg = content.lastIndexOf('\n', match.index); @@ -194,12 +210,15 @@ var fromCosmeticFilter = function(details) { found = prefix + selector; } break; - case 6: case 8: case 9: + case 32: + case 64: + case 65: + isProcedural = fargs[3].charCodeAt(0) === 0x7B; if ( - fargs[0] !== 9 && fargs[3] !== selector || - fargs[0] === 9 && JSON.parse(fargs[3]).raw !== selector + isProcedural === false && fargs[3] !== selector || + isProcedural && JSON.parse(fargs[3]).raw !== selector ) { break; } diff --git a/src/js/rpcreceiver.js b/src/js/rpcreceiver.js index 681d0f7d0..86adbcb59 100644 --- a/src/js/rpcreceiver.js +++ b/src/js/rpcreceiver.js @@ -35,22 +35,21 @@ if ( typeof vAPI.rpcReceiver !== 'object' ) { vAPI.rpcReceiver.getScriptTagHostnames = function() { var µb = µBlock; - var cfe = µb.cosmeticFilteringEngine; - if ( !cfe ) { return; } - return cfe.retrieveScriptTagHostnames(); + if ( µb.htmlFilteringEngine ) { + return µb.htmlFilteringEngine.retrieveScriptTagHostnames(); + } }; /******************************************************************************/ vAPI.rpcReceiver.getScriptTagFilters = function(details) { var µb = µBlock; - var cfe = µb.cosmeticFilteringEngine; - if ( !cfe ) { return; } + if ( !µb.htmlFilteringEngine ) { return; } // Fetching the script tag filters first: assuming it is faster than // checking whether the site is whitelisted. var hostname = details.frameHostname; - var r = cfe.retrieveScriptTagRegex( - µb.URI.domainFromHostname(hostname) || hostname, + var r = µb.htmlFilteringEngine.retrieveScriptTagRegex( + µb.URI.domainFromHostname(hostname), hostname ); // https://github.com/gorhill/uBlock/issues/838 diff --git a/src/js/scriptlet-filtering.js b/src/js/scriptlet-filtering.js new file mode 100644 index 000000000..b10066cf1 --- /dev/null +++ b/src/js/scriptlet-filtering.js @@ -0,0 +1,270 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2017 Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************/ + +µBlock.scriptletFilteringEngine = (function() { + var api = {}; + + var µb = µBlock, + scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(), + duplicates = new Set(), + scriptletCache = new µb.MRUCache(32), + exceptionsRegister = new Set(), + scriptletsRegister = new Map(), + reEscapeScriptArg = /[\\'"]/g; + + var scriptletRemover = [ + '(function() {', + ' var c = document.currentScript, p = c && c.parentNode;', + ' if ( p ) { p.removeChild(c); }', + '})();' + ].join('\n'); + + + var lookupScriptlet = function(raw, reng, toInject) { + if ( toInject.has(raw) ) { return; } + if ( scriptletCache.resetTime < reng.modifyTime ) { + scriptletCache.reset(); + } + var content = scriptletCache.lookup(raw); + if ( content === undefined ) { + var token, args, + pos = raw.indexOf(','); + if ( pos === -1 ) { + token = raw; + } else { + token = raw.slice(0, pos).trim(); + args = raw.slice(pos + 1).trim(); + } + content = reng.resourceContentFromName(token, 'application/javascript'); + if ( !content ) { return; } + if ( args ) { + content = patchScriptlet(content, args); + if ( !content ) { return; } + } + scriptletCache.add(raw, content); + } + toInject.set(raw, content); + }; + + // Fill template placeholders. Return falsy if: + // - At least one argument contains anything else than /\w/ and `.` + + var patchScriptlet = function(content, args) { + var i = 1, + pos, arg; + while ( args !== '' ) { + pos = args.indexOf(','); + if ( pos === -1 ) { pos = args.length; } + arg = args.slice(0, pos).trim().replace(reEscapeScriptArg, '\\$&'); + content = content.replace('{{' + i + '}}', arg); + args = args.slice(pos + 1).trim(); + i++; + } + return content; + }; + + var logOne = function(isException, token, details) { + µb.logger.writeOne( + details.tabId, + 'cosmetic', + { + source: 'cosmetic', + raw: (isException ? '#@#' : '##') + 'script:inject(' + token + ')' + }, + 'dom', + details.url, + null, + details.hostname + ); + }; + + api.reset = function() { + scriptletDB.clear(); + duplicates.clear(); + }; + + api.freeze = function() { + duplicates.clear(); + }; + + api.compile = function(parsed, writer) { + // 1001 = scriptlet injection + writer.select(1001); + + // Only exception filters are allowed to be global. + + if ( parsed.hostnames.length === 0 ) { + if ( parsed.exception ) { + writer.push([ 32, '!', '', parsed.suffix ]); + } + return; + } + + // https://github.com/gorhill/uBlock/issues/3375 + // Ignore instances of exception filter with negated hostnames, + // because there is no way to create an exception to an exception. + + var µburi = µb.URI; + + for ( var hostname of parsed.hostnames ) { + var negated = hostname.charCodeAt(0) === 0x7E /* '~' */; + if ( negated ) { + hostname = hostname.slice(1); + } + var hash = µburi.domainFromHostname(hostname); + if ( parsed.exception ) { + if ( negated ) { continue; } + hash = '!' + hash; + } else if ( negated ) { + hash = '!' + hash; + } + writer.push([ 32, hash, hostname, parsed.suffix ]); + } + }; + + // 01234567890123456789 + // script:inject(token[, arg[, ...]]) + // ^ ^ + // 14 -1 + + api.fromCompiledContent = function(reader) { + // 1001 = scriptlet injection + reader.select(1001); + + while ( reader.next() ) { + var fingerprint = reader.fingerprint(); + if ( duplicates.has(fingerprint) ) { continue; } + duplicates.add(fingerprint); + var args = reader.args(); + if ( args.length < 4 ) { continue; } + scriptletDB.add( + args[1], + { hostname: args[2], token: args[3].slice(14, -1) } + ); + } + }; + + api.retrieve = function(request) { + if ( scriptletDB.size === 0 ) { return; } + if ( µb.hiddenSettings.ignoreScriptInjectFilters ) { return; } + + var reng = µb.redirectEngine; + if ( !reng ) { return; } + + var hostname = request.hostname; + + // https://github.com/gorhill/uBlock/issues/2835 + // Do not inject scriptlets if the site is under an `allow` rule. + if ( + µb.userSettings.advancedUserEnabled && + µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2 + ) { + return; + } + + var domain = request.domain, + entity = request.entity, + entries, entry; + + // https://github.com/gorhill/uBlock/issues/1954 + // Implicit + var hn = hostname; + for (;;) { + lookupScriptlet(hn + '.js', reng, scriptletsRegister); + if ( hn === domain ) { break; } + var pos = hn.indexOf('.'); + if ( pos === -1 ) { break; } + hn = hn.slice(pos + 1); + } + if ( entity !== '' ) { + lookupScriptlet(entity + '.js', reng, scriptletsRegister); + } + + // Explicit + entries = []; + if ( domain !== '' ) { + scriptletDB.retrieve(domain, hostname, entries); + scriptletDB.retrieve(entity, entity, entries); + } + scriptletDB.retrieve('', hostname, entries); + for ( entry of entries ) { + lookupScriptlet(entry.token, reng, scriptletsRegister); + } + + if ( scriptletsRegister.size === 0 ) { return; } + + // Collect exception filters. + entries = []; + if ( domain !== '' ) { + scriptletDB.retrieve('!' + domain, hostname, entries); + scriptletDB.retrieve('!' + entity, entity, entries); + } + scriptletDB.retrieve('!', hostname, entries); + for ( entry of entries ) { + exceptionsRegister.add(entry.token); + } + + // Return an array of scriptlets, and log results if needed. + var out = [], + logger = µb.logger.isEnabled() ? µb.logger : null, + isException; + for ( entry of scriptletsRegister ) { + if ( (isException = exceptionsRegister.has(entry[0])) === false ) { + out.push(entry[1]); + } + if ( logger !== null ) { + logOne(isException, entry[0], request); + } + } + + scriptletsRegister.clear(); + exceptionsRegister.clear(); + + if ( out.length === 0 ) { return; } + + out.push(scriptletRemover); + + return out.join('\n'); + }; + + api.apply = function(doc, details) { + var script = doc.createElement('script'); + script.textContent = details.scriptlets; + doc.head.insertBefore(script, doc.head.firstChild); + return true; + }; + + api.toSelfie = function() { + return scriptletDB.toSelfie(); + }; + + api.fromSelfie = function(selfie) { + scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie); + }; + + return api; +})(); + +/******************************************************************************/ diff --git a/src/js/start.js b/src/js/start.js index f28280cf7..57f340812 100644 --- a/src/js/start.js +++ b/src/js/start.js @@ -39,7 +39,7 @@ vAPI.app.onShutdown = function() { µb.staticFilteringReverseLookup.shutdown(); µb.assets.updateStop(); µb.staticNetFilteringEngine.reset(); - µb.cosmeticFilteringEngine.reset(); + µb.staticExtFilteringEngine.reset(); µb.sessionFirewall.reset(); µb.permanentFirewall.reset(); µb.permanentFirewall.reset(); @@ -139,7 +139,7 @@ var onSelfieReady = function(selfie) { µb.availableFilterLists = selfie.availableFilterLists; µb.staticNetFilteringEngine.fromSelfie(selfie.staticNetFilteringEngine); µb.redirectEngine.fromSelfie(selfie.redirectEngine); - µb.cosmeticFilteringEngine.fromSelfie(selfie.cosmeticFilteringEngine); + µb.staticExtFilteringEngine.fromSelfie(selfie.staticExtFilteringEngine); return true; }; diff --git a/src/js/static-ext-filtering.js b/src/js/static-ext-filtering.js new file mode 100644 index 000000000..da5224c9e --- /dev/null +++ b/src/js/static-ext-filtering.js @@ -0,0 +1,680 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2017 Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +/* global punycode */ + +'use strict'; + +/******************************************************************************* + + All static extended filters are of the form: + + field 1: one hostname, or a list of comma-separated hostnames + field 2: `##` or `#@#` + field 3: selector + + The purpose of the static extended filtering engine is to coarse-parse and + dispatch to appropriate specialized filtering engines. There are currently + three specialized filtering engines: + + - cosmetic filtering (aka "element hiding" in Adblock Plus) + - scriptlet injection: selector starts with `script:inject` + - html filtering: selector starts with `^` + + Depending on the specialized filtering engine, field 1 may or may not be + optional. + + The static extended filtering engine also offers parsing capabilities which + are available to all other specialized fitlering engines. For example, + cosmetic and html filtering can ask the extended filtering engine to + compile/validate selectors. + +**/ + +µBlock.staticExtFilteringEngine = (function() { + var µb = µBlock, + reHostnameSeparator = /\s*,\s*/, + reHasUnicode = /[^\x00-\x7F]/, + reIsRegexLiteral = /^\/.+\/$/, + emptyArray = [], + parsed = { + hostnames: [], + exception: false, + suffix: '' + }; + + var isValidCSSSelector = (function() { + var div = document.createElement('div'), + matchesFn; + // Keep in mind: + // https://github.com/gorhill/uBlock/issues/693 + // https://github.com/gorhill/uBlock/issues/1955 + if ( div.matches instanceof Function ) { + matchesFn = div.matches.bind(div); + } else if ( div.mozMatchesSelector instanceof Function ) { + matchesFn = div.mozMatchesSelector.bind(div); + } else if ( div.webkitMatchesSelector instanceof Function ) { + matchesFn = div.webkitMatchesSelector.bind(div); + } else if ( div.msMatchesSelector instanceof Function ) { + matchesFn = div.msMatchesSelector.bind(div); + } else { + matchesFn = div.querySelector.bind(div); + } + // https://github.com/gorhill/uBlock/issues/3111 + // Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817 + // is fixed. + try { + matchesFn(':scope'); + } catch (ex) { + matchesFn = div.querySelector.bind(div); + } + return function(s) { + try { + matchesFn(s + ', ' + s + ':not(#foo)'); + } catch (ex) { + return false; + } + return true; + }; + })(); + + + var isBadRegex = function(s) { + try { + void new RegExp(s); + } catch (ex) { + isBadRegex.message = ex.toString(); + return true; + } + return false; + }; + + var translateAdguardCSSInjectionFilter = function(suffix) { + var matches = /^([^{]+)\{([^}]+)\}$/.exec(suffix); + if ( matches === null ) { return ''; } + return matches[1].trim() + ':style(' + matches[2].trim() + ')'; + }; + + var toASCIIHostname = function(hostname) { + if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { + return '~' + punycode.toASCII(hostname.slice(1)); + } + return punycode.toASCII(hostname); + }; + + var compileProceduralSelector = (function() { + var reOperatorParser = new RegExp([ + '(:(?:', + [ + '-abp-contains', + '-abp-has', + 'contains', + 'has', + 'has-text', + 'if', + 'if-not', + 'matches-css', + 'matches-css-after', + 'matches-css-before', + 'xpath' + ].join('|'), + '))\\(.+\\)$' + ].join('')); + + var reFirstParentheses = /^\(*/, + reLastParentheses = /\)*$/, + reEscapeRegex = /[.*+?^${}()|[\]\\]/g, + reNeedScope = /^\s*[+>~]/; + + var lastProceduralSelector = '', + lastProceduralSelectorCompiled, + regexToRawValue = new Map(); + + var compileCSSSelector = function(s) { + // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277 + // Prepend `:scope ` if needed. + if ( reNeedScope.test(s) ) { + s = ':scope ' + s; + } + if ( isValidCSSSelector(s) ) { + return s; + } + }; + + var compileText = function(s) { + var reText; + if ( reIsRegexLiteral.test(s) ) { + reText = s.slice(1, -1); + if ( isBadRegex(reText) ) { return; } + } else { + reText = s.replace(reEscapeRegex, '\\$&'); + regexToRawValue.set(reText, s); + } + return reText; + }; + + var compileCSSDeclaration = function(s) { + var name, value, reText, + pos = s.indexOf(':'); + if ( pos === -1 ) { return; } + name = s.slice(0, pos).trim(); + value = s.slice(pos + 1).trim(); + if ( reIsRegexLiteral.test(value) ) { + reText = value.slice(1, -1); + if ( isBadRegex(reText) ) { return; } + } else { + reText = '^' + value.replace(reEscapeRegex, '\\$&') + '$'; + regexToRawValue.set(reText, value); + } + return { name: name, value: reText }; + }; + + var compileConditionalSelector = function(s) { + // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277 + // Prepend `:scope ` if needed. + if ( reNeedScope.test(s) ) { + s = ':scope ' + s; + } + return compile(s); + }; + + var compileXpathExpression = function(s) { + try { + document.createExpression(s, null); + } catch (e) { + return; + } + return s; + }; + + // https://github.com/gorhill/uBlock/issues/2793 + var normalizedOperators = new Map([ + [ ':-abp-contains', ':has-text' ], + [ ':-abp-has', ':if' ], + [ ':contains', ':has-text' ] + ]); + + var compileArgument = new Map([ + [ ':has', compileCSSSelector ], + [ ':has-text', compileText ], + [ ':if', compileConditionalSelector ], + [ ':if-not', compileConditionalSelector ], + [ ':matches-css', compileCSSDeclaration ], + [ ':matches-css-after', compileCSSDeclaration ], + [ ':matches-css-before', compileCSSDeclaration ], + [ ':xpath', compileXpathExpression ] + ]); + + // https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387 + // Normalize (somewhat) the stringified version of procedural + // cosmetic filters -- this increase the likelihood of detecting + // duplicates given that uBO is able to understand syntax specific + // to other blockers. + // The normalized string version is what is reported in the logger, + // by design. + var decompile = function(compiled) { + var raw = [ compiled.selector ], + tasks = compiled.tasks, + value; + if ( Array.isArray(tasks) ) { + for ( var i = 0, n = tasks.length, task; i < n; i++ ) { + task = tasks[i]; + switch ( task[0] ) { + case ':has': + case ':xpath': + raw.push(task[0], '(', task[1], ')'); + break; + case ':has-text': + value = regexToRawValue.get(task[1]); + if ( value === undefined ) { + value = '/' + task[1] + '/'; + } + raw.push(task[0], '(', value, ')'); + break; + case ':matches-css': + case ':matches-css-after': + case ':matches-css-before': + value = regexToRawValue.get(task[1].value); + if ( value === undefined ) { + value = '/' + task[1].value + '/'; + } + raw.push(task[0], '(', task[1].name, ': ', value, ')'); + break; + case ':if': + case ':if-not': + raw.push(task[0], '(', decompile(task[1]), ')'); + break; + } + } + } + return raw.join(''); + }; + + var compile = function(raw) { + var matches = reOperatorParser.exec(raw); + if ( matches === null ) { + if ( isValidCSSSelector(raw) ) { return { selector: raw }; } + return; + } + var tasks = [], + firstOperand = raw.slice(0, matches.index), + currentOperator = matches[1], + selector = raw.slice(matches.index + currentOperator.length), + currentArgument = '', nextOperand, nextOperator, + depth = 0, opening, closing; + if ( + firstOperand !== '' && + isValidCSSSelector(firstOperand) === false + ) { + return; + } + for (;;) { + matches = reOperatorParser.exec(selector); + if ( matches !== null ) { + nextOperand = selector.slice(0, matches.index); + nextOperator = matches[1]; + } else { + nextOperand = selector; + nextOperator = ''; + } + opening = reFirstParentheses.exec(nextOperand)[0].length; + closing = reLastParentheses.exec(nextOperand)[0].length; + if ( opening > closing ) { + if ( depth === 0 ) { currentArgument = ''; } + depth += 1; + } else if ( closing > opening && depth > 0 ) { + depth -= 1; + if ( depth === 0 ) { + nextOperand = currentArgument + nextOperand; + } + } + if ( depth !== 0 ) { + currentArgument += nextOperand + nextOperator; + } else { + currentOperator = + normalizedOperators.get(currentOperator) || + currentOperator; + currentArgument = + compileArgument.get(currentOperator)( + nextOperand.slice(1, -1) + ); + if ( currentArgument === undefined ) { return; } + tasks.push([ currentOperator, currentArgument ]); + currentOperator = nextOperator; + } + if ( nextOperator === '' ) { break; } + selector = selector.slice(matches.index + nextOperator.length); + } + if ( tasks.length === 0 || depth !== 0 ) { return; } + return { selector: firstOperand, tasks: tasks }; + }; + + var entryPoint = function(raw) { + if ( raw === lastProceduralSelector ) { + return lastProceduralSelectorCompiled; + } + lastProceduralSelector = raw; + var compiled = compile(raw); + if ( compiled !== undefined ) { + compiled.raw = decompile(compiled); + compiled = JSON.stringify(compiled); + } + lastProceduralSelectorCompiled = compiled; + return compiled; + }; + + entryPoint.reset = function() { + regexToRawValue = new Map(); + lastProceduralSelector = ''; + lastProceduralSelectorCompiled = undefined; + }; + + return entryPoint; + })(); + + //-------------------------------------------------------------------------- + // Public API + //-------------------------------------------------------------------------- + + var api = {}; + + //-------------------------------------------------------------------------- + // Public classes + //-------------------------------------------------------------------------- + + api.HostnameBasedDB = function(selfie) { + if ( selfie !== undefined ) { + this.db = new Map(selfie.map); + this.size = selfie.size; + } else { + this.db = new Map(); + this.size = 0; + } + }; + + api.HostnameBasedDB.prototype = { + add: function(hash, entry) { + var bucket = this.db.get(hash); + if ( bucket === undefined ) { + this.db.set(hash, entry); + } else if ( Array.isArray(bucket) ) { + bucket.push(entry); + } else { + this.db.set(hash, [ bucket, entry ]); + } + this.size += 1; + }, + clear: function() { + this.db.clear(); + this.size = 0; + }, + retrieve: function(hash, hostname, out) { + var bucket = this.db.get(hash); + if ( bucket === undefined ) { return; } + if ( Array.isArray(bucket) === false ) { + if ( hostname.endsWith(bucket.hostname) ) { out.push(bucket); } + return; + } + var i = bucket.length; + while ( i-- ) { + var entry = bucket[i]; + if ( hostname.endsWith(entry.hostname) ) { out.push(entry); } + } + }, + toSelfie: function() { + return { + map: Array.from(this.db), + size: this.size + }; + } + }; + + api.HostnameBasedDB.prototype[Symbol.iterator] = (function() { + var Iter = function(db) { + this.mapIter = db.values(); + this.arrayIter = undefined; + }; + Iter.prototype.next = function() { + var result; + if ( this.arrayIter !== undefined ) { + result = this.arrayIter.next(); + if ( result.done === false ) { return result; } + this.arrayIter = undefined; + } + result = this.mapIter.next(); + if ( result.done || Array.isArray(result.value) === false ) { + return result; + } + this.arrayIter = result.value[Symbol.iterator](); + return this.arrayIter.next(); // array should never be empty + }; + return function() { + return new Iter(this.db); + }; + })(); + + //-------------------------------------------------------------------------- + // Public methods + //-------------------------------------------------------------------------- + + api.reset = function() { + compileProceduralSelector.reset(); + µb.cosmeticFilteringEngine.reset(); + µb.scriptletFilteringEngine.reset(); + µb.htmlFilteringEngine.reset(); + }; + + api.freeze = function() { + compileProceduralSelector.reset(); + µb.cosmeticFilteringEngine.freeze(); + µb.scriptletFilteringEngine.freeze(); + µb.htmlFilteringEngine.freeze(); + }; + + // https://github.com/chrisaljoudi/uBlock/issues/1004 + // Detect and report invalid CSS selectors. + + // Discard new ABP's `-abp-properties` directive until it is + // implemented (if ever). Unlikely, see: + // https://github.com/gorhill/uBlock/issues/1752 + + // https://github.com/gorhill/uBlock/issues/2624 + // Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`. + + api.compileSelector = (function() { + var reAfterBeforeSelector = /^(.+?)(::?after|::?before)$/, + reStyleSelector = /^(.+?):style\((.+?)\)$/, + reStyleBad = /url\([^)]+\)/, + reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/, + reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/, + div = document.createElement('div'); + + var normalizedExtendedSyntaxOperators = new Map([ + [ 'contains', ':has-text' ], + [ 'has', ':if' ], + [ 'matches-css', ':matches-css' ], + [ 'matches-css-after', ':matches-css-after' ], + [ 'matches-css-before', ':matches-css-before' ], + ]); + + var isValidStyleProperty = function(cssText) { + if ( reStyleBad.test(cssText) ) { return false; } + div.style.cssText = cssText; + if ( div.style.cssText === '' ) { return false; } + div.style.cssText = ''; + return true; + }; + + var entryPoint = function(raw) { + var extendedSyntax = reExtendedSyntax.test(raw); + if ( isValidCSSSelector(raw) && extendedSyntax === false ) { + return raw; + } + + // We rarely reach this point -- majority of selectors are plain + // CSS selectors. + + var matches, operator; + + // Supported Adguard/ABP advanced selector syntax: will translate into + // uBO's syntax before further processing. + // Mind unsupported advanced selector syntax, such as ABP's + // `-abp-properties`. + // Note: extended selector syntax has been deprecated in ABP, in favor + // of the procedural one (i.e. `:operator(...)`). See + // https://issues.adblockplus.org/ticket/5287 + if ( extendedSyntax ) { + while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) { + operator = normalizedExtendedSyntaxOperators.get(matches[1]); + if ( operator === undefined ) { return; } + raw = raw.slice(0, matches.index) + + operator + '(' + matches[3] + ')' + + raw.slice(matches.index + matches[0].length); + } + return entryPoint(raw); + } + + var selector = raw, + pseudoclass, style; + + // `:style` selector? + if ( (matches = reStyleSelector.exec(selector)) !== null ) { + selector = matches[1]; + style = matches[2]; + } + + // https://github.com/gorhill/uBlock/issues/2448 + // :after- or :before-based selector? + if ( (matches = reAfterBeforeSelector.exec(selector)) ) { + selector = matches[1]; + pseudoclass = matches[2]; + } + + if ( style !== undefined || pseudoclass !== undefined ) { + if ( isValidCSSSelector(selector) === false ) { + return; + } + if ( pseudoclass !== undefined ) { + selector += pseudoclass; + } + if ( style !== undefined ) { + if ( isValidStyleProperty(style) === false ) { return; } + return JSON.stringify({ + raw: raw, + style: [ selector, style ] + }); + } + return JSON.stringify({ + raw: raw, + pseudoclass: true + }); + } + + // Procedural selector? + var compiled; + if ( (compiled = compileProceduralSelector(raw)) ) { + return compiled; + } + + µb.logger.writeOne( + '', + 'error', + 'Cosmetic filtering – invalid filter: ' + raw + ); + }; + + return entryPoint; + })(); + + api.compile = function(raw, writer) { + var lpos = raw.indexOf('#'); + if ( lpos === -1 ) { return false; } + var rpos = lpos + 1; + if ( raw.charCodeAt(rpos) !== 0x23 /* '#' */ ) { + rpos = raw.indexOf('#', rpos + 1); + if ( rpos === -1 ) { return false; } + } + + // Coarse-check that the anchor is valid. + // `##`: l = 1 + // `#@#`, `#$#`, `#%#`, `#?#`: l = 2 + // `#@$#`, `#@%#`, `#@?#`: l = 3 + if ( (rpos - lpos) > 3 ) { return false; } + + // Extract the selector. + var suffix = parsed.suffix = raw.slice(rpos + 1).trim(); + if ( suffix.length === 0 ) { return false; } + + // https://github.com/gorhill/uBlock/issues/952 + // Find out whether we are dealing with an Adguard-specific cosmetic + // filter, and if so, translate it if supported, or discard it if not + // supported. + // We have an Adguard/ABP cosmetic filter if and only if the + // character is `$`, `%` or `?`, otherwise it's not a cosmetic + // filter. + var cCode = raw.charCodeAt(rpos - 1); + if ( cCode !== 0x23 /* '#' */ && cCode !== 0x40 /* '@' */ ) { + // Adguard's scriptlet injection: not supported. + if ( cCode === 0x25 /* '%' */ ) { return true; } + // Not a known extended filter. + if ( cCode !== 0x24 /* '$' */ && cCode !== 0x3F /* '?' */ ) { + return false; + } + // Adguard's style injection: translate to uBO's format. + if ( cCode === 0x24 /* '$' */ ) { + suffix = translateAdguardCSSInjectionFilter(suffix); + if ( suffix === '' ) { return true; } + } + } + + // Exception filter? + parsed.exception = raw.charCodeAt(lpos + 1) === 0x40 /* '@' */; + + // Extract the hostname(s), punycode if required. + if ( lpos === 0 ) { + parsed.hostnames = emptyArray; + } else { + var prefix = raw.slice(0, lpos); + parsed.hostnames = prefix.split(reHostnameSeparator); + if ( reHasUnicode.test(prefix) ) { + for ( var hostname of parsed.hostnames ) { + parsed.hostnames = toASCIIHostname(hostname); + } + } + } + + if ( suffix.startsWith('script:') ) { + // Scriptlet injection engine. + if ( suffix.startsWith('script:inject') ) { + µb.scriptletFilteringEngine.compile(parsed, writer); + return true; + } + // Script tag filtering: courtesy-conversion to HTML filtering. + if ( parsed.suffix.startsWith('script:contains') ) { + console.info( + 'uBO: ##script:contains(...) is deprecated, ' + + 'converting to ##^script:has-text(...)' + ); + suffix = parsed.suffix = suffix.replace( + /^script:contains/, + '^script:has-text' + ); + } + } + + // HTML filtering engine. + // TODO: evaluate converting Adguard's `$$` syntax into uBO's HTML + // filtering syntax. + if ( suffix.charCodeAt(0) === 0x5E /* '^' */ ) { + µb.htmlFilteringEngine.compile(parsed, writer); + return true; + } + + // Cosmetic filtering engine. + µb.cosmeticFilteringEngine.compile(parsed, writer); + return true; + }; + + api.fromCompiledContent = function(reader, options) { + µb.cosmeticFilteringEngine.fromCompiledContent(reader, options); + µb.scriptletFilteringEngine.fromCompiledContent(reader, options); + µb.htmlFilteringEngine.fromCompiledContent(reader, options); + }; + + api.toSelfie = function() { + return { + cosmetic: µb.cosmeticFilteringEngine.toSelfie(), + scriptlets: µb.scriptletFilteringEngine.toSelfie(), + html: µb.htmlFilteringEngine.toSelfie() + + }; + }; + + api.fromSelfie = function(selfie) { + µb.cosmeticFilteringEngine.fromSelfie(selfie.cosmetic); + µb.scriptletFilteringEngine.fromSelfie(selfie.scriptlets); + µb.htmlFilteringEngine.fromSelfie(selfie.html); + }; + + return api; +})(); + +/******************************************************************************/ diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index c32281520..d228282ee 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -2116,6 +2116,9 @@ FilterContainer.prototype.compile = function(raw, writer) { return false; } + // 0 = network filters + writer.select(0); + // Pure hostnames, use more efficient dictionary lookup // https://github.com/chrisaljoudi/uBlock/issues/665 // Create a dict keyed on request type etc. @@ -2268,6 +2271,9 @@ FilterContainer.prototype.fromCompiledContent = function(reader) { args, bits, bucket, entry, tokenHash, fdata, fingerprint; + // 0 = network filters + reader.select(0); + while ( reader.next() === true ) { args = reader.args(); bits = args[0]; diff --git a/src/js/storage.js b/src/js/storage.js index 256e80d15..d8dfaf1d0 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -347,7 +347,7 @@ vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists }); µb.staticNetFilteringEngine.freeze(); µb.redirectEngine.freeze(); - µb.cosmeticFilteringEngine.freeze(); + µb.staticExtFilteringEngine.freeze(); µb.selfieManager.destroy(); }; @@ -543,7 +543,7 @@ var onDone = function() { µb.staticNetFilteringEngine.freeze(); - µb.cosmeticFilteringEngine.freeze(); + µb.staticExtFilteringEngine.freeze(); µb.redirectEngine.freeze(); vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists }); @@ -586,7 +586,7 @@ µb.availableFilterLists = lists; µb.redirectEngine.reset(); - µb.cosmeticFilteringEngine.reset(); + µb.staticExtFilteringEngine.reset(); µb.staticNetFilteringEngine.reset(); µb.selfieManager.destroy(); µb.staticFilteringReverseLookup.resetLists(); @@ -703,23 +703,22 @@ /******************************************************************************/ µBlock.compileFilters = function(rawText) { - var networkFilters = new this.CompiledLineWriter(), - cosmeticFilters = new this.CompiledLineWriter(); + var writer = new this.CompiledLineWriter(); // Useful references: // https://adblockplus.org/en/filter-cheatsheet // https://adblockplus.org/en/filters var staticNetFilteringEngine = this.staticNetFilteringEngine, - cosmeticFilteringEngine = this.cosmeticFilteringEngine, + staticExtFilteringEngine = this.staticExtFilteringEngine, reIsWhitespaceChar = /\s/, reMaybeLocalIp = /^[\d:f]/, - reIsLocalhostRedirect = /\s+(?:broadcasthost|local|localhost|localhost\.localdomain)(?=\s|$)/, + reIsLocalhostRedirect = /\s+(?:broadcasthost|local|localhost|localhost\.localdomain)\b/, reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/, - line, lineRaw, c, pos, + line, c, pos, lineIter = new this.LineIterator(rawText); while ( lineIter.eot() === false ) { - line = lineRaw = lineIter.next().trim(); + line = lineIter.next().trim(); // rhill 2014-04-18: The trim is important here, as without it there // could be a lingering `\r` which would cause problems in the @@ -733,9 +732,7 @@ // Parse or skip cosmetic filters // All cosmetic filters are caught here - if ( cosmeticFilteringEngine.compile(line, cosmeticFilters) ) { - continue; - } + if ( staticExtFilteringEngine.compile(line, writer) ) { continue; } // Whatever else is next can be assumed to not be a cosmetic filter @@ -767,12 +764,10 @@ if ( line.length === 0 ) { continue; } - staticNetFilteringEngine.compile(line, networkFilters); + staticNetFilteringEngine.compile(line, writer); } - return networkFilters.toString() + - '\n/* end of network - start of cosmetic */\n' + - cosmeticFilters.toString(); + return writer.toString(); }; /******************************************************************************/ @@ -783,15 +778,12 @@ µBlock.applyCompiledFilters = function(rawText, firstparty) { if ( rawText === '' ) { return; } - var separator = '\n/* end of network - start of cosmetic */\n', - pos = rawText.indexOf(separator), - reader = new this.CompiledLineReader(rawText.slice(0, pos)); + var reader = new this.CompiledLineReader(rawText); this.staticNetFilteringEngine.fromCompiledContent(reader); - this.cosmeticFilteringEngine.fromCompiledContent( - reader.reset(rawText.slice(pos + separator.length)), - this.userSettings.ignoreGenericCosmeticFilters, - !firstparty && !this.userSettings.parseAllABPHideFilters - ); + this.staticExtFilteringEngine.fromCompiledContent(reader, { + skipGenericCosmetic: this.userSettings.ignoreGenericCosmeticFilters, + skipCosmetic: !firstparty && !this.userSettings.parseAllABPHideFilters + }); }; /******************************************************************************/ @@ -885,7 +877,7 @@ availableFilterLists: this.availableFilterLists, staticNetFilteringEngine: this.staticNetFilteringEngine.toSelfie(), redirectEngine: this.redirectEngine.toSelfie(), - cosmeticFilteringEngine: this.cosmeticFilteringEngine.toSelfie() + staticExtFilteringEngine: this.staticExtFilteringEngine.toSelfie() }; vAPI.cacheStorage.set({ selfie: selfie }); }.bind(µBlock); @@ -1068,7 +1060,7 @@ this.availableFilterLists.hasOwnProperty(details.assetKey) === false || this.selectedFilterLists.indexOf(details.assetKey) === -1 ) { - return false; + return; } } // https://github.com/gorhill/uBlock/issues/2594 @@ -1077,10 +1069,10 @@ this.hiddenSettings.ignoreRedirectFilters === true && this.hiddenSettings.ignoreScriptInjectFilters === true ) { - return false; + return; } } - return; + return true; } // Compile the list while we have the raw version in memory diff --git a/src/js/traffic.js b/src/js/traffic.js index eaea84158..1ff0c01d0 100644 --- a/src/js/traffic.js +++ b/src/js/traffic.js @@ -480,9 +480,10 @@ onBeforeMaybeSpuriousCSPReport.textDecoder = undefined; /******************************************************************************/ // To handle: -// - inline script tags -// - websockets -// - media elements larger than n kB +// - Media elements larger than n kB +// - Scriptlet injection (requires ability to modify response body) +// - HTML filtering (requires ability to modify response body) +// - CSP injection var onHeadersReceived = function(details) { // Do not interfere with behind-the-scene requests. @@ -490,15 +491,17 @@ var onHeadersReceived = function(details) { if ( vAPI.isBehindTheSceneTabId(tabId) ) { return; } var µb = µBlock, - requestType = details.type; + requestType = details.type, + isRootDoc = requestType === 'main_frame', + isDoc = isRootDoc || requestType === 'sub_frame'; - if ( requestType === 'main_frame' ) { + if ( isRootDoc ) { µb.tabContextManager.push(tabId, details.url); } var pageStore = µb.pageStoreFromTabId(tabId); if ( pageStore === null ) { - if ( requestType !== 'main_frame' ) { return; } + if ( isRootDoc === false ) { return; } pageStore = µb.bindTabToPageStats(tabId, 'beforeRequest'); } if ( pageStore.getNetFilteringSwitch() === false ) { return; } @@ -507,24 +510,283 @@ var onHeadersReceived = function(details) { return foilLargeMediaElement(pageStore, details); } + if ( isDoc && µb.canFilterResponseBody ) { + filterDocument(details); + } + // https://github.com/gorhill/uBlock/issues/2813 // Disable the blocking of large media elements if the document is itself // a media element: the resource was not prevented from loading so no // point to further block large media elements for the current document. - if ( requestType === 'main_frame' ) { + if ( isRootDoc ) { if ( reMediaContentTypes.test(headerValueFromName('content-type', details.responseHeaders)) ) { pageStore.allowLargeMediaElementsUntil = Date.now() + 86400000; } return injectCSP(pageStore, details); } - if ( requestType === 'sub_frame' ) { + if ( isDoc ) { return injectCSP(pageStore, details); } }; var reMediaContentTypes = /^(?:audio|image|video)\//; +/******************************************************************************* + + The response body filterer is responsible for: + + - Scriptlet filtering + - HTML filtering + + In the spirit of efficiency, the response body filterer works this way: + + If: + - HTML filtering: no. + - Scriptlet filtering: no. + Then: + No response body filtering is initiated. + + If: + - HTML filtering: no. + - Scriptlet filtering: yes. + Then: + Inject scriptlets before first chunk of response body data reported + then immediately disconnect response body data listener. + + If: + - HTML filtering: yes. + - Scriptlet filtering: no/yes. + Then: + Assemble all response body data into a single buffer. Once all the + response data has been received, create a document from it. Then: + - Inject scriptlets in the resulting DOM. + - Remove all DOM elements matching HTML filters. + Then serialize the resulting modified document as the new response + body. + + This way, the overhead is minimal for when only scriptlets need to be + injected. + + If the platform does not support response body filtering, the scriptlets + will be injected the old way, through the content script. + +**/ + +var filterDocument = (function() { + var µb = µBlock, + filterers = new Map(), + reDoctype = /^\s*]+?>/, + reJustASCII = /^[\x00-\x7E]*$/, + domParser, xmlSerializer, + textDecoderCharset, textDecoder, textEncoder; + + var streamJobDone = function(filterer, responseBytes) { + if ( + filterer.scriptlets === undefined || + filterer.selectors !== undefined || + filterer.charset !== undefined + ) { + return false; + } + if ( textDecoder === undefined ) { + textDecoder = new TextDecoder(); + } + // We need to insert after DOCTYPE, or else the browser may falls into + // quirks mode. + var responseStr = textDecoder.decode(responseBytes); + var match = reDoctype.exec(responseStr); + if ( match === null ) { return false; } + filterers.delete(filterer.stream); + if ( textEncoder === undefined ) { + textEncoder = new TextEncoder(); + } + var beforeByteLength = match.index + match[0].length; + var beforeBytes = reJustASCII.test(match[0]) ? + new Uint8Array(responseBytes, 0, beforeByteLength) : + textEncoder.encode(responseStr.slice(0, beforeByteLength)); + filterer.stream.write(beforeBytes); + filterer.stream.write( + textEncoder.encode('') + ); + filterer.stream.write( + new Uint8Array(responseBytes, beforeBytes.byteLength) + ); + filterer.stream.disconnect(); + return true; + }; + + var streamClose = function(filterer, buffer) { + if ( buffer !== undefined ) { + filterer.stream.write(buffer); + } else if ( filterer.buffer !== undefined ) { + filterer.stream.write(filterer.buffer); + } + filterer.stream.close(); + }; + + var onStreamData = function(ev) { + var filterer = filterers.get(this); + if ( filterer === undefined ) { + this.write(ev.data); + this.disconnect(); + return; + } + if ( + this.status !== 'transferringdata' && + this.status !== 'finishedtransferringdata' + ) { + filterers.delete(this); + this.disconnect(); + return; + } + // TODO: possibly improve buffer growth, if benchmarking shows it's + // worth it. + if ( filterer.buffer === null ) { + if ( streamJobDone(filterer, ev.data) ) { return; } + filterer.buffer = new Uint8Array(ev.data); + return; + } + var buffer = new Uint8Array( + filterer.buffer.byteLength + + ev.data.byteLength + ); + buffer.set(filterer.buffer); + buffer.set(new Uint8Array(ev.data), filterer.buffer.byteLength); + filterer.buffer = buffer; + }; + + var onStreamStop = function() { + var filterer = filterers.get(this); + filterers.delete(this); + if ( filterer === undefined || filterer.buffer === null ) { + this.close(); + return; + } + if ( this.status !== 'finishedtransferringdata' ) { return; } + + if ( domParser === undefined ) { + domParser = new DOMParser(); + xmlSerializer = new XMLSerializer(); + } + if ( textEncoder === undefined ) { + textEncoder = new TextEncoder(); + } + + // In case of unknown charset, assume utf-8. + if ( filterer.charset !== textDecoderCharset ) { + textDecoder = undefined; + } + if ( textDecoder === undefined ) { + try { + textDecoder = new TextDecoder(filterer.charset); + textDecoderCharset = filterer.charset; + } catch(ex) { + textDecoder = new TextDecoder(); + textDecoderCharset = undefined; + } + } + + var doc = domParser.parseFromString( + textDecoder.decode(filterer.buffer), + 'text/html' + ); + + var modified = false; + if ( filterer.selectors !== undefined ) { + if ( µb.htmlFilteringEngine.apply(doc, filterer) ) { + modified = true; + } + } + if ( filterer.scriptlets !== undefined ) { + if ( µb.scriptletFilteringEngine.apply(doc, filterer) ) { + modified = true; + } + } + + if ( modified === false ) { + streamClose(filterer); + return; + } + + // If the charset of the document was not utf-8, we need to change it + // to utf-8. + if ( textDecoderCharset !== undefined ) { + var meta = doc.createElement('meta'); + meta.setAttribute('charset', 'utf-8'); + doc.head.insertBefore(meta, doc.head.firstChild); + } + + // https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353 + var doctypeStr = doc.doctype instanceof Object ? + xmlSerializer.serializeToString(doc.doctype) + '\n' : + ''; + + streamClose( + filterer, + textEncoder.encode(doctypeStr + doc.documentElement.outerHTML) + ); + }; + + var onStreamError = function() { + filterers.delete(this); + }; + + return function(details) { + var hostname = µb.URI.hostnameFromURI(details.url); + if ( hostname === '' ) { return; } + + var domain = µb.URI.domainFromHostname(hostname); + + var request = { + stream: undefined, + tabId: details.tabId, + url: details.url, + hostname: hostname, + domain: domain, + entity: µb.URI.entityFromDomain(domain), + selectors: undefined, + scriptlets: undefined, + buffer: null, + charset: undefined + }; + request.selectors = µb.htmlFilteringEngine.retrieve(request); + request.scriptlets = µb.scriptletFilteringEngine.retrieve(request); + + if ( + request.selectors === undefined && + request.scriptlets === undefined + ) { + return; + } + + var headers = details.responseHeaders, + contentType = headerValueFromName('content-type', headers); + if ( contentType !== '' ) { + if ( reContentTypeDocument.test(contentType) === false ) { return; } + var match = reContentTypeCharset.exec(contentType); + if ( match !== null ) { + var charset = match[1].toLowerCase(); + if ( charset !== 'utf-8' ) { + request.charset = charset; + } + } + } + // https://bugzilla.mozilla.org/show_bug.cgi?id=1426789 + if ( headerValueFromName('content-disposition', headers) ) { return; } + + var stream = request.stream = + vAPI.net.webRequest.filterResponseData(details.requestId); + stream.ondata = onStreamData; + stream.onstop = onStreamStop; + stream.onerror = onStreamError; + filterers.set(stream, request); + }; +})(); + +var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i; +var reContentTypeCharset = /charset=['"]?([^'" ]+)/i; + /******************************************************************************/ var injectCSP = function(pageStore, details) { diff --git a/src/js/uritools.js b/src/js/uritools.js index eaca7731d..a988af4e0 100644 --- a/src/js/uritools.js +++ b/src/js/uritools.js @@ -1,7 +1,7 @@ /******************************************************************************* uBlock Origin - a browser extension to block requests. - Copyright (C) 2014-2016 Raymond Hill + Copyright (C) 2014-2017 Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -309,6 +309,13 @@ var psl = publicSuffixList; /******************************************************************************/ +URI.entityFromDomain = function(domain) { + var pos = domain.indexOf('.'); + return pos !== -1 ? domain.slice(0, pos) + '.*' : ''; +}; + +/******************************************************************************/ + URI.pathFromURI = function(uri) { var matches = rePathFromURI.exec(uri); return matches !== null ? matches[1] : ''; diff --git a/src/js/utils.js b/src/js/utils.js index 709937524..918a49b1b 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -225,7 +225,9 @@ /******************************************************************************/ µBlock.CompiledLineWriter = function() { - this.output = []; + this.blockId = undefined; + this.block = undefined; + this.blocks = new Map(); this.stringifier = JSON.stringify; }; @@ -235,46 +237,81 @@ µBlock.CompiledLineWriter.prototype = { push: function(args) { - this.output[this.output.length] = this.stringifier(args); + this.block[this.block.length] = this.stringifier(args); + }, + select: function(blockId) { + if ( blockId === this.blockId ) { return; } + this.blockId = blockId; + this.block = this.blocks.get(blockId); + if ( this.block === undefined ) { + this.blocks.set(blockId, (this.block = [])); + } }, toString: function() { - return this.output.join('\n'); + var result = []; + for ( var entry of this.blocks ) { + if ( entry[1].length === 0 ) { continue; } + result.push( + '#block-start-' + entry[0], + entry[1].join('\n'), + '#block-end-' + entry[0] + ); + } + return result.join('\n'); } }; -µBlock.CompiledLineReader = function(raw) { - this.reset(raw); +/******************************************************************************/ + +µBlock.CompiledLineReader = function(raw, blockId) { + this.block = ''; + this.len = 0; + this.offset = 0; + this.line = ''; this.parser = JSON.parse; + this.blocks = new Map(); + var reBlockStart = /^#block-start-(\d+)\n/gm, + match = reBlockStart.exec(raw), + beg, end; + while ( match !== null ) { + beg = match.index + match[0].length; + end = raw.indexOf('#block-end-' + match[1], beg); + this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end)); + reBlockStart.lastIndex = end; + match = reBlockStart.exec(raw); + } + if ( blockId !== undefined ) { + this.select(blockId); + } }; µBlock.CompiledLineReader.prototype = { - reset: function(raw) { - this.input = raw; - this.len = raw.length; - this.offset = 0; - this.s = ''; - return this; - }, next: function() { if ( this.offset === this.len ) { - this.s = ''; + this.line = ''; return false; } - var pos = this.input.indexOf('\n', this.offset); + var pos = this.block.indexOf('\n', this.offset); if ( pos !== -1 ) { - this.s = this.input.slice(this.offset, pos); + this.line = this.block.slice(this.offset, pos); this.offset = pos + 1; } else { - this.s = this.input.slice(this.offset); + this.line = this.block.slice(this.offset); this.offset = this.len; } return true; }, + select: function(blockId) { + this.block = this.blocks.get(blockId) || ''; + this.len = this.block.length; + this.offset = 0; + return this; + }, fingerprint: function() { - return this.s; + return this.line; }, args: function() { - return this.parser(this.s); + return this.parser(this.line); } };