1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-06 10:52:38 +01:00
uBlock/src/js/reverselookup.js

224 lines
6.2 KiB
JavaScript
Raw Normal View History

/*******************************************************************************
uBlock Origin - a comprehensive, efficient content blocker
Copyright (C) 2015-present Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
'use strict';
/******************************************************************************/
import staticNetFilteringEngine from './static-net-filtering.js';
import µb from './background.js';
import { CompiledListWriter } from './static-filtering-io.js';
import { i18n$ } from './i18n.js';
import * as sfp from './static-filtering-parser.js';
import {
domainFromHostname,
hostnameFromURI,
} from './uri-utils.js';
/******************************************************************************/
const pendingResponses = new Map();
let worker = null;
let needLists = true;
let messageId = 1;
const onWorkerMessage = function(e) {
const msg = e.data;
const resolver = pendingResponses.get(msg.id);
pendingResponses.delete(msg.id);
resolver(msg.response);
};
const stopWorker = function() {
workerTTLTimer.off();
if ( worker === null ) { return; }
worker.terminate();
worker = null;
needLists = true;
for ( const resolver of pendingResponses.values() ) {
resolver();
}
pendingResponses.clear();
};
const workerTTLTimer = vAPI.defer.create(stopWorker);
Redesign cache storage In uBO, the "cache storage" is used to save resources which can be safely discarded, though at the cost of having to fetch or recompute them again. Extension storage (browser.storage.local) is now always used as cache storage backend. This has always been the default for Chromium-based browsers. For Firefox-based browsers, IndexedDB was used as backend for cache storage, with fallback to extension storage when using Firefox in private mode by default. Extension storage is reliable since it works in all contexts, though it may not be the most performant one. To speed-up loading of resources from extension storage, uBO will now make use of Cache API storage, which will mirror content of key assets saved to extension storage. Typically loading resources from Cache API is faster than loading the same resources from the extension storage. Only resources which must be loaded in memory as fast as possible will make use of the Cache API storage layered on top of the extension storage. Compiled filter lists and memory snapshot of filtering engines (aka "selfies") will be mirrored to the Cache API storage, since these must be loaded into memory as fast as possible, and reloading filter lists from their compiled counterpart is a common operation. This new design makes it now seamless to work in permanent private mode for Firefox-based browsers, since extension storage now always contains cache-related assets. Support for IndexedDB is removed for the time being, except to support migration of cached assets the first time uBO runs with the new cache storage design. In order to easily support all choices of storage, a new serializer has been introduced, which is capable of serializing/deserializing structure-cloneable data to/from a JS string. Because of this new serializer, JS data structures can be stored directly from their native representation, and deserialized directly to their native representation from uBO's point of view, since the serialization occurs (if needed) only at the storage interface level. This new serializer simplifies many code paths where data structures such as Set, Map, TypedArray, RegExp, etc. had to be converted in a disparate manner to be able to persist them to extension storage. The new serializer supports workers and LZ4 compression. These can be configured through advanced settings. With this new layered design, it's possible to introduce more storage layers if measured as beneficial (i.e. maybe browser.storage.session) References: - https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/storage/local - https://developer.mozilla.org/en-US/docs/Web/API/Cache - https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm
2024-02-26 22:50:11 +01:00
const workerTTL = { min: 1.5 };
const initWorker = function() {
if ( worker === null ) {
worker = new Worker('js/reverselookup-worker.js');
worker.onmessage = onWorkerMessage;
}
// The worker will be shutdown after n minutes without being used.
workerTTLTimer.offon(workerTTL);
if ( needLists === false ) {
return Promise.resolve();
}
needLists = false;
const entries = new Map();
const onListLoaded = function(details) {
const entry = entries.get(details.assetKey);
// https://github.com/gorhill/uBlock/issues/536
// Use assetKey when there is no filter list title.
worker.postMessage({
what: 'setList',
details: {
assetKey: details.assetKey,
title: entry.title || details.assetKey,
supportURL: entry.supportURL,
content: details.content
}
});
};
for ( const listKey in µb.availableFilterLists ) {
if ( µb.availableFilterLists.hasOwnProperty(listKey) === false ) {
continue;
}
const entry = µb.availableFilterLists[listKey];
if ( entry.off === true ) { continue; }
entries.set(listKey, {
title: listKey !== µb.userFiltersPath ?
entry.title :
i18n$('1pPageName'),
supportURL: entry.supportURL || ''
});
}
if ( entries.size === 0 ) {
return Promise.resolve();
}
const promises = [];
for ( const listKey of entries.keys() ) {
promises.push(
µb.getCompiledFilterList(listKey).then(details => {
onListLoaded(details);
})
);
}
return Promise.all(promises);
};
const fromNetFilter = async function(rawFilter) {
if ( typeof rawFilter !== 'string' || rawFilter === '' ) { return; }
const writer = new CompiledListWriter();
const parser = new sfp.AstFilterParser({
trustedSource: true,
maxTokenLength: staticNetFilteringEngine.MAX_TOKEN_LENGTH,
nativeCssHas: vAPI.webextFlavor.env.includes('native_css_has'),
New cosmetic filter parser using CSSTree library The new parser no longer uses the browser DOM to validate that a cosmetic filter is valid or not, this is now done through a JS library, CSSTree. This means filter list authors will have to be more careful to ensure that a cosmetic filter is really valid, as there is no more guarantee that a cosmetic filter which works for a given browser/version will still work properly on another browser, or different version of the same browser. This change has become necessary because of many reasons, one of them being the flakiness of the previous parser as exposed by many issues lately: - https://github.com/uBlockOrigin/uBlock-issues/issues/2262 - https://github.com/uBlockOrigin/uBlock-issues/issues/2228 The new parser introduces breaking changes, there was no way to do otherwise. Some current procedural cosmetic filters will be shown as invalid with this change. This occurs because the CSSTree library gets confused with some syntax which was previously allowed by the previous parser because it was more permissive. Mainly the issue is with the arguments passed to some procedural cosmetic filters, and these issues can be solved as follow: Use quotes around the argument. You can use either single or double-quotes, whichever is most convenient. If your argument contains a single quote, use double-quotes, and vice versa. Additionally, try to escape a quote inside an argument using backslash. THis may work, but if not, use quotes around the argument. When the parser encounter quotes around an argument, it will discard them before trying to process the argument, same with escaped quotes inside the argument. Examples: Breakage: ...##^script:has-text(toscr') Fix: ...##^script:has-text(toscr\') Breakage: ...##:xpath(//*[contains(text(),"VPN")]):upward(2) Fix: ...##:xpath('//*[contains(text(),"VPN")]'):upward(2) There are not many filters which break in the default set of filter lists, so this should be workable for default lists. Unfortunately those fixes will break the filter for previous versions of uBO since these to not deal with quoted argument. In such case, it may be necessary to keep the previous filter, which will be discarded as broken on newer version of uBO. THis was a necessary change as the old parser was becoming more and more flaky after being constantly patched for new cases arising, The new parser should be far more robust and stay robist through expanding procedural cosmetic filter syntax. Additionally, in the MV3 version, filters are pre-compiled using a Nodejs script, i.e. outside the browser, so validating cosmetic filters using a live DOM no longer made sense. This new parser will have to be tested throughly before stable release.
2022-09-23 22:03:13 +02:00
});
parser.parse(rawFilter);
const compiler = staticNetFilteringEngine.createCompiler();
if ( compiler.compile(parser, writer) === false ) { return; }
await initWorker();
const id = messageId++;
worker.postMessage({
what: 'fromNetFilter',
2023-01-30 17:14:07 +01:00
id,
compiledFilter: writer.last(),
2023-01-30 17:14:07 +01:00
rawFilter,
});
return new Promise(resolve => {
pendingResponses.set(id, resolve);
});
};
const fromExtendedFilter = async function(details) {
if (
typeof details.rawFilter !== 'string' ||
details.rawFilter === ''
) {
return;
}
await initWorker();
const id = messageId++;
const hostname = hostnameFromURI(details.url);
const parser = new sfp.AstFilterParser({
trustedSource: true,
nativeCssHas: vAPI.webextFlavor.env.includes('native_css_has'),
});
parser.parse(details.rawFilter);
let compiled;
if ( parser.isScriptletFilter() ) {
compiled = JSON.stringify(parser.getScriptletArgs());
}
worker.postMessage({
what: 'fromExtendedFilter',
2023-01-30 17:14:07 +01:00
id,
domain: domainFromHostname(hostname),
2023-01-30 17:14:07 +01:00
hostname,
ignoreGeneric:
staticNetFilteringEngine.matchRequestReverse(
'generichide',
details.url
) === 2,
ignoreSpecific:
staticNetFilteringEngine.matchRequestReverse(
'specifichide',
details.url
) === 2,
rawFilter: details.rawFilter,
compiled,
});
return new Promise(resolve => {
pendingResponses.set(id, resolve);
});
};
// This tells the worker that filter lists may have changed.
const resetLists = function() {
needLists = true;
if ( worker === null ) { return; }
worker.postMessage({ what: 'resetLists' });
};
/******************************************************************************/
const staticFilteringReverseLookup = {
fromNetFilter,
fromExtendedFilter,
resetLists,
shutdown: stopWorker
};
export default staticFilteringReverseLookup;
/******************************************************************************/