1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-17 07:52:42 +01:00
uBlock/src/js/storage.js

1364 lines
46 KiB
JavaScript
Raw Normal View History

2014-06-24 00:42:43 +02:00
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2014-present Raymond Hill
2014-06-24 00:42:43 +02:00
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* global punycode, publicSuffixList */
'use strict';
2014-06-24 00:42:43 +02:00
/******************************************************************************/
µBlock.getBytesInUse = async function() {
const promises = [];
let bytesInUse;
// Not all platforms implement this method.
promises.push(
vAPI.storage.getBytesInUse instanceof Function
? vAPI.storage.getBytesInUse(null)
: undefined
);
if (
navigator.storage instanceof Object &&
navigator.storage.estimate instanceof Function
) {
promises.push(navigator.storage.estimate());
}
const results = await Promise.all(promises);
const processCount = count => {
if ( typeof count !== 'number' ) { return; }
if ( bytesInUse === undefined ) { bytesInUse = 0; }
bytesInUse += count;
return bytesInUse;
};
processCount(results[0]);
if ( results.length > 1 && results[1] instanceof Object ) {
processCount(results[1].usage);
}
µBlock.storageUsed = bytesInUse;
return bytesInUse;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
µBlock.saveLocalSettings = (( ) => {
const saveAfter = 4 * 60 * 1000;
2014-06-24 00:42:43 +02:00
const onTimeout = ( ) => {
const µb = µBlock;
2016-10-08 16:15:31 +02:00
if ( µb.localSettingsLastModified > µb.localSettingsLastSaved ) {
µb.saveLocalSettings();
2015-11-29 23:06:58 +01:00
}
vAPI.setTimeout(onTimeout, saveAfter);
};
2014-06-24 00:42:43 +02:00
2015-11-29 23:06:58 +01:00
vAPI.setTimeout(onTimeout, saveAfter);
return function() {
2018-07-26 00:24:14 +02:00
this.localSettingsLastSaved = Date.now();
return vAPI.storage.set(this.localSettings);
2018-07-26 00:24:14 +02:00
};
2015-11-29 23:06:58 +01:00
})();
2014-06-24 00:42:43 +02:00
/******************************************************************************/
µBlock.saveUserSettings = function() {
vAPI.storage.set(this.userSettings);
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
µBlock.loadHiddenSettings = async function() {
const bin = await vAPI.storage.get('hiddenSettings');
if ( bin instanceof Object === false ) { return; }
const hs = bin.hiddenSettings;
if ( hs instanceof Object ) {
const hsDefault = this.hiddenSettingsDefault;
for ( const key in hsDefault ) {
if (
hsDefault.hasOwnProperty(key) &&
hs.hasOwnProperty(key) &&
typeof hs[key] === typeof hsDefault[key]
) {
this.hiddenSettings[key] = hs[key];
}
2018-02-21 19:29:36 +01:00
}
if ( typeof this.hiddenSettings.suspendTabsUntilReady === 'boolean' ) {
this.hiddenSettings.suspendTabsUntilReady =
this.hiddenSettings.suspendTabsUntilReady
? 'yes'
: 'unset';
}
}
this.fireDOMEvent('hiddenSettingsChanged');
2018-02-21 19:29:36 +01:00
};
// Note: Save only the settings which values differ from the default ones.
// This way the new default values in the future will properly apply for those
// which were not modified by the user.
µBlock.saveHiddenSettings = function() {
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
const bin = { hiddenSettings: {} };
for ( const prop in this.hiddenSettings ) {
if (
this.hiddenSettings.hasOwnProperty(prop) &&
this.hiddenSettings[prop] !== this.hiddenSettingsDefault[prop]
) {
bin.hiddenSettings[prop] = this.hiddenSettings[prop];
}
}
vAPI.storage.set(bin);
2018-02-21 19:29:36 +01:00
};
Add ability to uncloak CNAME records Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/780 New webext permission added: `dns`, which purpose is to allow an extension to fetch the DNS record of specific hostnames, reference documentation: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/dns The webext API `dns` is available in Firefox 60+ only. The new API will enable uBO to "uncloak" the actual hostname used in network requests. The ability is currently disabled by default for now -- this is only a first commit related to the above issue to allow advanced users to immediately use the new ability. Four advanced settings have been created to control the uncloaking of actual hostnames: cnameAliasList: a space-separated list of hostnames. Default value: unset => empty list. Special value: * => all hostnames. A space-separated list of hostnames => this tells uBO to "uncloak" the hostnames in the list will. cnameIgnoreList: a space-separated list of hostnames. Default value: unset => empty list. Special value: * => all hostnames. A space-separated list of hostnames => this tells uBO to NOT re-run the network request through uBO's filtering engine with the CNAME hostname. This is useful to exclude commonly used actual hostnames from being re-run through uBO's filtering engine, so as to avoid pointless overhead. cnameIgnore1stParty: boolean. Default value: true. Whether uBO should ignore to re-run a network request through the filtering engine when the CNAME hostname is 1st-party to the alias hostname. cnameMaxTTL: number of minutes. Default value: 120. This tells uBO to clear its CNAME cache after the specified time. For efficiency purpose, uBO will cache alias=>CNAME associations for reuse so as to reduce calls to `browser.dns.resolve`. All the associations will be cleared after the specified time to ensure the map does not grow too large and too ensure uBO uses up to date CNAME information.
2019-11-19 18:05:33 +01:00
self.addEventListener('hiddenSettingsChanged', ( ) => {
const µbhs = µBlock.hiddenSettings;
self.log.verbosity = µbhs.consoleLogLevel;
Add ability to uncloak CNAME records Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/780 New webext permission added: `dns`, which purpose is to allow an extension to fetch the DNS record of specific hostnames, reference documentation: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/dns The webext API `dns` is available in Firefox 60+ only. The new API will enable uBO to "uncloak" the actual hostname used in network requests. The ability is currently disabled by default for now -- this is only a first commit related to the above issue to allow advanced users to immediately use the new ability. Four advanced settings have been created to control the uncloaking of actual hostnames: cnameAliasList: a space-separated list of hostnames. Default value: unset => empty list. Special value: * => all hostnames. A space-separated list of hostnames => this tells uBO to "uncloak" the hostnames in the list will. cnameIgnoreList: a space-separated list of hostnames. Default value: unset => empty list. Special value: * => all hostnames. A space-separated list of hostnames => this tells uBO to NOT re-run the network request through uBO's filtering engine with the CNAME hostname. This is useful to exclude commonly used actual hostnames from being re-run through uBO's filtering engine, so as to avoid pointless overhead. cnameIgnore1stParty: boolean. Default value: true. Whether uBO should ignore to re-run a network request through the filtering engine when the CNAME hostname is 1st-party to the alias hostname. cnameMaxTTL: number of minutes. Default value: 120. This tells uBO to clear its CNAME cache after the specified time. For efficiency purpose, uBO will cache alias=>CNAME associations for reuse so as to reduce calls to `browser.dns.resolve`. All the associations will be cleared after the specified time to ensure the map does not grow too large and too ensure uBO uses up to date CNAME information.
2019-11-19 18:05:33 +01:00
vAPI.net.setOptions({
cnameIgnoreList: µbhs.cnameIgnoreList,
cnameIgnore1stParty: µbhs.cnameIgnore1stParty,
cnameIgnoreExceptions: µbhs.cnameIgnoreExceptions,
cnameIgnoreRootDocument: µbhs.cnameIgnoreRootDocument,
cnameMaxTTL: µbhs.cnameMaxTTL,
cnameReplayFullURL: µbhs.cnameReplayFullURL,
cnameUncloak: µbhs.cnameUncloak,
cnameUncloakProxied: µbhs.cnameUncloakProxied,
Add ability to uncloak CNAME records Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/780 New webext permission added: `dns`, which purpose is to allow an extension to fetch the DNS record of specific hostnames, reference documentation: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/dns The webext API `dns` is available in Firefox 60+ only. The new API will enable uBO to "uncloak" the actual hostname used in network requests. The ability is currently disabled by default for now -- this is only a first commit related to the above issue to allow advanced users to immediately use the new ability. Four advanced settings have been created to control the uncloaking of actual hostnames: cnameAliasList: a space-separated list of hostnames. Default value: unset => empty list. Special value: * => all hostnames. A space-separated list of hostnames => this tells uBO to "uncloak" the hostnames in the list will. cnameIgnoreList: a space-separated list of hostnames. Default value: unset => empty list. Special value: * => all hostnames. A space-separated list of hostnames => this tells uBO to NOT re-run the network request through uBO's filtering engine with the CNAME hostname. This is useful to exclude commonly used actual hostnames from being re-run through uBO's filtering engine, so as to avoid pointless overhead. cnameIgnore1stParty: boolean. Default value: true. Whether uBO should ignore to re-run a network request through the filtering engine when the CNAME hostname is 1st-party to the alias hostname. cnameMaxTTL: number of minutes. Default value: 120. This tells uBO to clear its CNAME cache after the specified time. For efficiency purpose, uBO will cache alias=>CNAME associations for reuse so as to reduce calls to `browser.dns.resolve`. All the associations will be cleared after the specified time to ensure the map does not grow too large and too ensure uBO uses up to date CNAME information.
2019-11-19 18:05:33 +01:00
});
});
2018-02-21 19:29:36 +01:00
/******************************************************************************/
µBlock.hiddenSettingsFromString = function(raw) {
const out = Object.assign({}, this.hiddenSettingsDefault);
const lineIter = new this.LineIterator(raw);
while ( lineIter.eot() === false ) {
const line = lineIter.next();
const matches = /^\s*(\S+)\s+(.+)$/.exec(line);
if ( matches === null || matches.length !== 3 ) { continue; }
const name = matches[1];
if ( out.hasOwnProperty(name) === false ) { continue; }
const value = matches[2];
switch ( typeof out[name] ) {
case 'boolean':
if ( value === 'true' ) {
out[name] = true;
} else if ( value === 'false' ) {
out[name] = false;
}
break;
case 'string':
out[name] = value.trim();
break;
case 'number':
out[name] = parseInt(value, 10);
if ( isNaN(out[name]) ) {
out[name] = this.hiddenSettingsDefault[name];
}
break;
default:
break;
}
}
2018-02-21 19:29:36 +01:00
return out;
};
µBlock.stringFromHiddenSettings = function() {
const out = [];
for ( const key of Object.keys(this.hiddenSettings).sort() ) {
out.push(key + ' ' + this.hiddenSettings[key]);
}
return out.join('\n');
};
/******************************************************************************/
µBlock.savePermanentFirewallRules = function() {
2018-09-03 20:06:49 +02:00
vAPI.storage.set({
dynamicFilteringString: this.permanentFirewall.toString()
});
2015-03-27 18:00:55 +01:00
};
/******************************************************************************/
2015-05-21 20:15:17 +02:00
µBlock.savePermanentURLFilteringRules = function() {
2018-09-03 20:06:49 +02:00
vAPI.storage.set({
urlFilteringString: this.permanentURLFiltering.toString()
});
2015-05-21 20:15:17 +02:00
};
/******************************************************************************/
2015-03-27 18:00:55 +01:00
µBlock.saveHostnameSwitches = function() {
2018-09-03 20:06:49 +02:00
vAPI.storage.set({
hostnameSwitchesString: this.permanentSwitches.toString()
});
};
/******************************************************************************/
µBlock.saveWhitelist = function() {
2018-09-03 20:06:49 +02:00
vAPI.storage.set({
netWhitelist: this.arrayFromWhitelist(this.netWhitelist)
2018-09-03 20:06:49 +02:00
});
this.netWhitelistModifyTime = Date.now();
};
/*******************************************************************************
TODO(seamless migration):
The code related to 'remoteBlacklist' can be removed when I am confident
all users have moved to a version of uBO which no longer depends on
the property 'remoteBlacklists, i.e. v1.11 and beyond.
**/
µBlock.loadSelectedFilterLists = async function() {
const bin = await vAPI.storage.get('selectedFilterLists');
if ( bin instanceof Object && Array.isArray(bin.selectedFilterLists) ) {
this.selectedFilterLists = bin.selectedFilterLists;
return;
}
// https://github.com/gorhill/uBlock/issues/747
// Select default filter lists if first-time launch.
const lists = await this.assets.metadata();
this.saveSelectedFilterLists(this.autoSelectRegionalFilterLists(lists));
};
µBlock.saveSelectedFilterLists = function(newKeys, append = false) {
const oldKeys = this.selectedFilterLists.slice();
2017-01-26 16:17:38 +01:00
if ( append ) {
newKeys = newKeys.concat(oldKeys);
}
const newSet = new Set(newKeys);
2017-01-26 16:17:38 +01:00
// Purge unused filter lists from cache.
for ( const oldKey of oldKeys ) {
if ( newSet.has(oldKey) === false ) {
this.removeFilterList(oldKey);
2017-01-22 22:05:16 +01:00
}
2017-01-26 16:17:38 +01:00
}
newKeys = Array.from(newSet);
2017-01-26 16:17:38 +01:00
this.selectedFilterLists = newKeys;
return vAPI.storage.set({ selectedFilterLists: newKeys });
};
/******************************************************************************/
µBlock.applyFilterListSelection = function(details) {
let selectedListKeySet = new Set(this.selectedFilterLists);
let externalLists = this.userSettings.externalLists;
2017-01-22 22:05:16 +01:00
// Filter lists to select
if ( Array.isArray(details.toSelect) ) {
if ( details.merge ) {
for ( let i = 0, n = details.toSelect.length; i < n; i++ ) {
2017-01-22 22:05:16 +01:00
selectedListKeySet.add(details.toSelect[i]);
}
} else {
selectedListKeySet = new Set(details.toSelect);
}
}
// Imported filter lists to remove
if ( Array.isArray(details.toRemove) ) {
const removeURLFromHaystack = (haystack, needle) => {
2017-01-22 22:05:16 +01:00
return haystack.replace(
new RegExp(
'(^|\\n)' +
this.escapeRegex(needle) +
2017-01-22 22:05:16 +01:00
'(\\n|$)', 'g'),
'\n'
).trim();
};
for ( let i = 0, n = details.toRemove.length; i < n; i++ ) {
const assetKey = details.toRemove[i];
2017-01-22 22:05:16 +01:00
selectedListKeySet.delete(assetKey);
externalLists = removeURLFromHaystack(externalLists, assetKey);
this.removeFilterList(assetKey);
}
}
// Filter lists to import
if ( typeof details.toImport === 'string' ) {
// https://github.com/gorhill/uBlock/issues/1181
// Try mapping the URL of an imported filter list to the assetKey
// of an existing stock list.
const assetKeyFromURL = url => {
const needle = url.replace(/^https?:/, '');
const assets = this.availableFilterLists;
for ( const assetKey in assets ) {
const asset = assets[assetKey];
2017-01-22 22:05:16 +01:00
if ( asset.content !== 'filters' ) { continue; }
if ( typeof asset.contentURL === 'string' ) {
if ( asset.contentURL.endsWith(needle) ) { return assetKey; }
continue;
}
if ( Array.isArray(asset.contentURL) === false ) { continue; }
for ( let i = 0, n = asset.contentURL.length; i < n; i++ ) {
2017-01-22 22:05:16 +01:00
if ( asset.contentURL[i].endsWith(needle) ) {
return assetKey;
}
}
}
return url;
};
const importedSet = new Set(this.listKeysFromCustomFilterLists(externalLists));
const toImportSet = new Set(this.listKeysFromCustomFilterLists(details.toImport));
for ( const urlKey of toImportSet ) {
if ( importedSet.has(urlKey) ) { continue; }
const assetKey = assetKeyFromURL(urlKey);
if ( assetKey === urlKey ) {
importedSet.add(urlKey);
2017-01-22 22:05:16 +01:00
}
selectedListKeySet.add(assetKey);
}
externalLists = Array.from(importedSet).sort().join('\n');
2017-01-22 22:05:16 +01:00
}
const result = Array.from(selectedListKeySet);
2017-01-22 22:05:16 +01:00
if ( externalLists !== this.userSettings.externalLists ) {
this.userSettings.externalLists = externalLists;
vAPI.storage.set({ externalLists: externalLists });
}
this.saveSelectedFilterLists(result);
};
/******************************************************************************/
µBlock.listKeysFromCustomFilterLists = function(raw) {
const out = new Set();
const reIgnore = /^[!#]/;
const reValid = /^[a-z-]+:\/\/\S+/;
const lineIter = new this.LineIterator(raw);
2017-01-22 22:05:16 +01:00
while ( lineIter.eot() === false ) {
const location = lineIter.next().trim();
if ( reIgnore.test(location) || !reValid.test(location) ) { continue; }
2017-01-22 22:05:16 +01:00
out.add(location);
}
return Array.from(out);
2017-01-22 22:05:16 +01:00
};
/******************************************************************************/
µBlock.saveUserFilters = function(content) {
2015-12-07 14:59:22 +01:00
// https://github.com/gorhill/uBlock/issues/1022
// Be sure to end with an empty line.
2015-12-07 14:59:22 +01:00
content = content.trim();
if ( content !== '' ) { content += '\n'; }
this.removeCompiledFilterList(this.userFiltersPath);
return this.assets.put(this.userFiltersPath, content);
2014-07-13 02:32:44 +02:00
};
µBlock.loadUserFilters = function() {
return this.assets.get(this.userFiltersPath);
2014-07-13 02:32:44 +02:00
};
µBlock.appendUserFilters = async function(filters, options) {
filters = filters.trim();
if ( filters.length === 0 ) { return; }
2015-02-24 00:31:29 +01:00
// https://github.com/uBlockOrigin/uBlock-issues/issues/372
// Auto comment using user-defined template.
let comment = '';
if (
options instanceof Object &&
options.autoComment === true &&
this.hiddenSettings.autoCommentFilterTemplate.indexOf('{{') !== -1
) {
const d = new Date();
comment =
'! ' +
this.hiddenSettings.autoCommentFilterTemplate
.replace('{{date}}', d.toLocaleDateString())
.replace('{{time}}', d.toLocaleTimeString())
.replace('{{origin}}', options.origin);
}
const details = await this.loadUserFilters();
if ( details.error ) { return; }
// The comment, if any, will be applied if and only if it is different
// from the last comment found in the user filter list.
if ( comment !== '' ) {
const pos = details.content.lastIndexOf(comment);
if (
pos === -1 ||
details.content.indexOf('\n!', pos + 1) !== -1
) {
filters = '\n' + comment + '\n' + filters;
}
}
2015-02-24 00:31:29 +01:00
// https://github.com/chrisaljoudi/uBlock/issues/976
// If we reached this point, the filter quite probably needs to be
// added for sure: do not try to be too smart, trying to avoid
// duplicates at this point may lead to more issues.
await this.saveUserFilters(details.content.trim() + '\n' + filters);
2014-09-08 23:46:58 +02:00
const compiledFilters = this.compileFilters(
filters,
{ assetKey: this.userFiltersPath }
);
const snfe = this.staticNetFilteringEngine;
const cfe = this.cosmeticFilteringEngine;
const acceptedCount = snfe.acceptedCount + cfe.acceptedCount;
const discardedCount = snfe.discardedCount + cfe.discardedCount;
this.applyCompiledFilters(compiledFilters, true);
const entry = this.availableFilterLists[this.userFiltersPath];
const deltaEntryCount =
snfe.acceptedCount +
cfe.acceptedCount - acceptedCount;
const deltaEntryUsedCount =
deltaEntryCount -
(snfe.discardedCount + cfe.discardedCount - discardedCount);
entry.entryCount += deltaEntryCount;
entry.entryUsedCount += deltaEntryUsedCount;
vAPI.storage.set({ 'availableFilterLists': this.availableFilterLists });
this.staticNetFilteringEngine.freeze();
this.redirectEngine.freeze();
this.staticExtFilteringEngine.freeze();
this.selfieManager.destroy();
// https://www.reddit.com/r/uBlockOrigin/comments/cj7g7m/
// https://www.reddit.com/r/uBlockOrigin/comments/cnq0bi/
if ( options.killCache ) {
browser.webRequest.handlerBehaviorChanged();
}
2014-07-13 02:32:44 +02:00
};
µBlock.createUserFilters = function(details) {
this.appendUserFilters(details.filters, details);
// https://github.com/gorhill/uBlock/issues/1786
this.cosmeticFilteringEngine.removeFromSelectorCache(details.pageDomain);
};
2014-07-13 02:32:44 +02:00
/******************************************************************************/
µBlock.autoSelectRegionalFilterLists = function(lists) {
const selectedListKeys = [ this.userFiltersPath ];
for ( const key in lists ) {
if ( lists.hasOwnProperty(key) === false ) { continue; }
const list = lists[key];
if ( list.off !== true ) {
selectedListKeys.push(key);
continue;
2015-02-25 22:51:04 +01:00
}
2017-11-09 18:53:05 +01:00
if ( this.listMatchesEnvironment(list) ) {
selectedListKeys.push(key);
list.off = false;
}
}
return selectedListKeys;
};
2015-02-25 22:51:04 +01:00
/******************************************************************************/
µBlock.getAvailableLists = async function() {
let oldAvailableLists = {},
newAvailableLists = {};
// User filter list.
newAvailableLists[this.userFiltersPath] = {
group: 'user',
title: vAPI.i18n('1pPageName')
};
// Custom filter lists.
const importedListKeys = this.listKeysFromCustomFilterLists(
this.userSettings.externalLists
);
for ( const listKey of importedListKeys ) {
const entry = {
content: 'filters',
contentURL: listKey,
external: true,
group: 'custom',
submitter: 'user',
title: ''
};
newAvailableLists[listKey] = entry;
this.assets.registerAssetSource(listKey, entry);
}
// Convert a no longer existing stock list into an imported list.
const customListFromStockList = assetKey => {
const oldEntry = oldAvailableLists[assetKey];
if ( oldEntry === undefined || oldEntry.off === true ) { return; }
let listURL = oldEntry.contentURL;
if ( Array.isArray(listURL) ) {
listURL = listURL[0];
}
const newEntry = {
content: 'filters',
contentURL: listURL,
external: true,
group: 'custom',
submitter: 'user',
title: oldEntry.title || ''
};
newAvailableLists[listURL] = newEntry;
this.assets.registerAssetSource(listURL, newEntry);
importedListKeys.push(listURL);
this.userSettings.externalLists += '\n' + listURL;
this.userSettings.externalLists = this.userSettings.externalLists.trim();
vAPI.storage.set({ externalLists: this.userSettings.externalLists });
this.saveSelectedFilterLists([ listURL ], true);
};
// Load previously saved available lists -- these contains data
// computed at run-time, we will reuse this data if possible.
const [ bin, entries ] = await Promise.all([
vAPI.storage.get('availableFilterLists'),
this.assets.metadata(),
]);
oldAvailableLists = bin && bin.availableFilterLists || {};
for ( const assetKey in entries ) {
if ( entries.hasOwnProperty(assetKey) === false ) { continue; }
const entry = entries[assetKey];
if ( entry.content !== 'filters' ) { continue; }
newAvailableLists[assetKey] = Object.assign({}, entry);
}
// Load set of currently selected filter lists.
const listKeySet = new Set(this.selectedFilterLists);
for ( const listKey in newAvailableLists ) {
if ( newAvailableLists.hasOwnProperty(listKey) ) {
newAvailableLists[listKey].off = !listKeySet.has(listKey);
}
}
//finalize();
// Final steps:
// - reuse existing list metadata if any;
// - unregister unreferenced imported filter lists if any.
// Reuse existing metadata.
for ( const assetKey in oldAvailableLists ) {
const oldEntry = oldAvailableLists[assetKey];
const newEntry = newAvailableLists[assetKey];
// List no longer exists. If a stock list, try to convert to
// imported list if it was selected.
if ( newEntry === undefined ) {
this.removeFilterList(assetKey);
if ( assetKey.indexOf('://') === -1 ) {
customListFromStockList(assetKey);
}
continue;
2014-06-24 00:42:43 +02:00
}
if ( oldEntry.entryCount !== undefined ) {
newEntry.entryCount = oldEntry.entryCount;
2016-01-03 19:58:25 +01:00
}
if ( oldEntry.entryUsedCount !== undefined ) {
newEntry.entryUsedCount = oldEntry.entryUsedCount;
2014-07-25 22:12:20 +02:00
}
// This may happen if the list name was pulled from the list
// content.
// https://github.com/chrisaljoudi/uBlock/issues/982
// There is no guarantee the title was successfully extracted from
// the list content.
if (
newEntry.title === '' &&
typeof oldEntry.title === 'string' &&
oldEntry.title !== ''
) {
newEntry.title = oldEntry.title;
2017-01-26 16:17:38 +01:00
}
}
2017-01-26 16:17:38 +01:00
// Remove unreferenced imported filter lists.
for ( const assetKey in newAvailableLists ) {
const newEntry = newAvailableLists[assetKey];
if ( newEntry.submitter !== 'user' ) { continue; }
if ( importedListKeys.indexOf(assetKey) !== -1 ) { continue; }
delete newAvailableLists[assetKey];
this.assets.unregisterAssetSource(assetKey);
this.removeFilterList(assetKey);
}
2016-01-03 19:58:25 +01:00
return newAvailableLists;
2015-02-24 00:31:29 +01:00
};
/******************************************************************************/
µBlock.loadFilterLists = (( ) => {
const loadedListKeys = [];
let loadingPromise;
let t0 = 0;
const onDone = function() {
log.info(`loadFilterLists() took ${Date.now()-t0} ms`);
this.staticNetFilteringEngine.freeze();
this.staticExtFilteringEngine.freeze();
this.redirectEngine.freeze();
vAPI.net.unsuspend();
vAPI.storage.set({ 'availableFilterLists': this.availableFilterLists });
2015-02-24 19:48:03 +01:00
vAPI.messaging.broadcast({
what: 'staticFilteringDataChanged',
parseCosmeticFilters: this.userSettings.parseAllABPHideFilters,
ignoreGenericCosmeticFilters: this.userSettings.ignoreGenericCosmeticFilters,
listKeys: loadedListKeys
});
2016-09-24 20:36:08 +02:00
this.selfieManager.destroy();
this.lz4Codec.relinquish();
this.compiledFormatChanged = false;
loadingPromise = undefined;
2014-07-25 22:12:20 +02:00
};
const applyCompiledFilters = function(assetKey, compiled) {
const snfe = this.staticNetFilteringEngine;
const sxfe = this.staticExtFilteringEngine;
let acceptedCount = snfe.acceptedCount + sxfe.acceptedCount,
discardedCount = snfe.discardedCount + sxfe.discardedCount;
this.applyCompiledFilters(compiled, assetKey === this.userFiltersPath);
if ( this.availableFilterLists.hasOwnProperty(assetKey) ) {
const entry = this.availableFilterLists[assetKey];
entry.entryCount = snfe.acceptedCount + sxfe.acceptedCount -
acceptedCount;
entry.entryUsedCount = entry.entryCount -
(snfe.discardedCount + sxfe.discardedCount - discardedCount);
2014-06-24 00:42:43 +02:00
}
loadedListKeys.push(assetKey);
2014-06-24 00:42:43 +02:00
};
const onFilterListsReady = function(lists) {
this.availableFilterLists = lists;
vAPI.net.suspend();
this.redirectEngine.reset();
this.staticExtFilteringEngine.reset();
this.staticNetFilteringEngine.reset();
this.selfieManager.destroy();
this.staticFilteringReverseLookup.resetLists();
2014-06-24 00:42:43 +02:00
2015-02-24 00:31:29 +01:00
// We need to build a complete list of assets to pull first: this is
// because it *may* happens that some load operations are synchronous:
// This happens for assets which do not exist, ot assets with no
// content.
const toLoad = [];
for ( const assetKey in lists ) {
if ( lists.hasOwnProperty(assetKey) === false ) { continue; }
if ( lists[assetKey].off ) { continue; }
2015-02-24 00:31:29 +01:00
toLoad.push(
this.getCompiledFilterList(assetKey).then(details => {
applyCompiledFilters.call(
this,
details.assetKey,
details.content
);
})
);
2014-06-24 00:42:43 +02:00
}
return Promise.all(toLoad);
2014-06-24 00:42:43 +02:00
};
return function() {
if ( loadingPromise instanceof Promise === false ) {
t0 = Date.now();
loadedListKeys.length = 0;
loadingPromise = Promise.all([
this.getAvailableLists().then(lists =>
onFilterListsReady.call(this, lists)
),
this.loadRedirectResources(),
]).then(( ) => {
onDone.call(this);
});
}
return loadingPromise;
};
})();
2014-06-24 00:42:43 +02:00
/******************************************************************************/
µBlock.getCompiledFilterList = async function(assetKey) {
const compiledPath = 'compiled/' + assetKey;
2014-06-24 00:42:43 +02:00
if ( this.compiledFormatChanged === false ) {
let compiledDetails = await this.assets.get(compiledPath);
if ( compiledDetails.content !== '' ) {
compiledDetails.assetKey = assetKey;
return compiledDetails;
}
}
const rawDetails = await this.assets.get(assetKey);
// Compiling an empty string results in an empty string.
if ( rawDetails.content === '' ) {
rawDetails.assetKey = assetKey;
return rawDetails;
}
2014-09-25 21:44:18 +02:00
this.extractFilterListMetadata(assetKey, rawDetails.content);
2014-09-25 21:44:18 +02:00
// Fetching the raw content may cause the compiled content to be
// generated somewhere else in uBO, hence we try one last time to
// fetch the compiled content in case it has become available.
let compiledDetails = await this.assets.get(compiledPath);
if ( compiledDetails.content === '' ) {
compiledDetails.content = this.compileFilters(
rawDetails.content,
{ assetKey: assetKey }
);
this.assets.put(compiledPath, compiledDetails.content);
}
compiledDetails.assetKey = assetKey;
return compiledDetails;
2015-02-24 00:31:29 +01:00
};
2015-02-24 00:31:29 +01:00
/******************************************************************************/
2018-02-23 12:42:17 +01:00
// https://github.com/gorhill/uBlock/issues/3406
// Lower minimum update period to 1 day.
µBlock.extractFilterListMetadata = function(assetKey, raw) {
const listEntry = this.availableFilterLists[assetKey];
if ( listEntry === undefined ) { return; }
// Metadata expected to be found at the top of content.
const head = raw.slice(0, 1024);
// https://github.com/gorhill/uBlock/issues/313
// Always try to fetch the name if this is an external filter list.
if ( listEntry.title === '' || listEntry.group === 'custom' ) {
const matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Title[\t ]*:([^\n]+)/i);
if ( matches !== null ) {
// https://bugs.chromium.org/p/v8/issues/detail?id=2869
// orphanizeString is to work around String.slice()
// potentially causing the whole raw filter list to be held in
// memory just because we cut out the title as a substring.
listEntry.title = this.orphanizeString(matches[1].trim());
}
}
// Extract update frequency information
const matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Expires[\t ]*:[\t ]*(\d+)[\t ]*(h)?/i);
if ( matches !== null ) {
let v = Math.max(parseInt(matches[1], 10), 1);
2018-05-16 20:55:12 +02:00
if ( matches[2] !== undefined ) {
v = Math.ceil(v / 24);
}
if ( v !== listEntry.updateAfter ) {
this.assets.registerAssetSource(assetKey, { updateAfter: v });
}
}
2014-09-25 21:44:18 +02:00
};
/******************************************************************************/
µBlock.removeCompiledFilterList = function(assetKey) {
this.assets.remove('compiled/' + assetKey);
};
µBlock.removeFilterList = function(assetKey) {
this.removeCompiledFilterList(assetKey);
this.assets.remove(assetKey);
};
/******************************************************************************/
2014-09-25 21:44:18 +02:00
µBlock.compileFilters = function(rawText, details) {
const writer = new this.CompiledLineIO.Writer();
2014-06-24 00:42:43 +02:00
// Populate the writer with information potentially useful to the
// client compilers.
if ( details ) {
if ( details.assetKey ) {
writer.properties.set('assetKey', details.assetKey);
}
}
2014-06-24 00:42:43 +02:00
// Useful references:
// https://adblockplus.org/en/filter-cheatsheet
// https://adblockplus.org/en/filters
const staticNetFilteringEngine = this.staticNetFilteringEngine;
const staticExtFilteringEngine = this.staticExtFilteringEngine;
const lineIter = new this.LineIterator(this.processDirectives(rawText));
const parser = new vAPI.StaticFilteringParser();
parser.setMaxTokenLength(this.urlTokenizer.MAX_TOKEN_LENGTH);
while ( lineIter.eot() === false ) {
let line = lineIter.next();
2015-01-23 17:32:49 +01:00
while ( line.endsWith(' \\') ) {
if ( lineIter.peek(4) !== ' ' ) { break; }
line = line.slice(0, -2).trim() + lineIter.next().trim();
}
parser.analyze(line);
if ( parser.shouldIgnore() ) { continue; }
2015-01-23 17:32:49 +01:00
if ( parser.category === parser.CATStaticExtFilter ) {
staticExtFilteringEngine.compile(parser, writer);
continue;
2015-01-23 17:32:49 +01:00
}
2015-03-07 19:20:18 +01:00
if ( parser.category !== parser.CATStaticNetFilter ) { continue; }
2014-06-24 00:42:43 +02:00
// https://github.com/gorhill/uBlock/issues/2599
// convert hostname to punycode if needed
if ( parser.patternHasUnicode() ) {
parser.toPunycode();
}
staticNetFilteringEngine.compile(parser, writer);
2015-02-24 00:31:29 +01:00
}
return writer.toString();
2015-02-24 00:31:29 +01:00
};
2014-06-24 00:42:43 +02:00
2015-02-24 00:31:29 +01:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
2016-02-17 15:28:20 +01:00
// https://github.com/gorhill/uBlock/issues/1395
2016-02-17 16:04:55 +01:00
// Added `firstparty` argument: to avoid discarding cosmetic filters when
2016-02-17 15:28:20 +01:00
// applying 1st-party filters.
µBlock.applyCompiledFilters = function(rawText, firstparty) {
if ( rawText === '' ) { return; }
let reader = new this.CompiledLineIO.Reader(rawText);
this.staticNetFilteringEngine.fromCompiledContent(reader);
this.staticExtFilteringEngine.fromCompiledContent(reader, {
skipGenericCosmetic: this.userSettings.ignoreGenericCosmeticFilters,
skipCosmetic: !firstparty && !this.userSettings.parseAllABPHideFilters
});
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
// https://github.com/AdguardTeam/AdguardBrowserExtension/issues/917
µBlock.processDirectives = function(content) {
const reIf = /^!#(if|endif)\b([^\n]*)/gm;
const stack = [];
const shouldDiscard = ( ) => stack.some(v => v);
const parts = [];
let beg = 0, discard = false;
while ( beg < content.length ) {
const match = reIf.exec(content);
if ( match === null ) { break; }
switch ( match[1] ) {
case 'if':
let expr = match[2].trim();
const target = expr.charCodeAt(0) === 0x21 /* '!' */;
if ( target ) { expr = expr.slice(1); }
const token = this.processDirectives.tokens.get(expr);
const startDiscard =
token === 'false' &&
target === false ||
token !== undefined &&
vAPI.webextFlavor.soup.has(token) === target;
if ( discard === false && startDiscard ) {
parts.push(content.slice(beg, match.index));
discard = true;
}
stack.push(startDiscard);
break;
case 'endif':
stack.pop();
const stopDiscard = shouldDiscard() === false;
if ( discard && stopDiscard ) {
beg = match.index + match[0].length + 1;
discard = false;
}
break;
default:
break;
}
}
if ( stack.length === 0 && parts.length !== 0 ) {
parts.push(content.slice(beg));
content = parts.join('\n');
}
return content.trim();
};
µBlock.processDirectives.tokens = new Map([
[ 'ext_ublock', 'ublock' ],
[ 'env_chromium', 'chromium' ],
[ 'env_edge', 'edge' ],
[ 'env_firefox', 'firefox' ],
[ 'env_legacy', 'legacy' ],
[ 'env_mobile', 'mobile' ],
[ 'env_safari', 'safari' ],
[ 'cap_html_filtering', 'html_filtering' ],
[ 'cap_user_stylesheet', 'user_stylesheet' ],
[ 'false', 'false' ],
]);
/******************************************************************************/
µBlock.loadRedirectResources = async function() {
try {
const success = await this.redirectEngine.resourcesFromSelfie();
if ( success === true ) { return true; }
const fetchPromises = [
this.redirectEngine.loadBuiltinResources()
];
2017-03-05 18:54:47 +01:00
const userResourcesLocation = this.hiddenSettings.userResourcesLocation;
if ( userResourcesLocation !== 'unset' ) {
for ( const url of userResourcesLocation.split(/\s+/) ) {
fetchPromises.push(this.assets.fetchText(url));
}
2017-03-05 18:54:47 +01:00
}
const results = await Promise.all(fetchPromises);
if ( Array.isArray(results) === false ) { return results; }
let content = '';
for ( let i = 1; i < results.length; i++ ) {
const result = results[i];
if (
result instanceof Object === false ||
typeof result.content !== 'string' ||
result.content === ''
) {
continue;
}
content += '\n\n' + result.content;
2018-02-15 23:25:38 +01:00
}
this.redirectEngine.resourcesFromString(content);
this.redirectEngine.selfieFromResources();
} catch(ex) {
log.info(ex);
return false;
}
return true;
};
/******************************************************************************/
µBlock.loadPublicSuffixList = async function() {
if ( this.hiddenSettings.disableWebAssembly !== true ) {
publicSuffixList.enableWASM();
}
try {
const result = await this.assets.get(`compiled/${this.pslAssetKey}`);
if ( publicSuffixList.fromSelfie(result.content, this.base64) ) {
return;
}
} catch (ex) {
log.info(ex);
}
const result = await this.assets.get(this.pslAssetKey);
if ( result.content !== '' ) {
this.compilePublicSuffixList(result.content);
}
};
µBlock.compilePublicSuffixList = function(content) {
publicSuffixList.parse(content, punycode.toASCII);
this.assets.put(
'compiled/' + this.pslAssetKey,
publicSuffixList.toSelfie(µBlock.base64)
);
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
2014-09-08 23:46:58 +02:00
// This is to be sure the selfie is generated in a sane manner: the selfie will
// be generated if the user doesn't change his filter lists selection for
2014-09-08 23:46:58 +02:00
// some set time.
2014-09-25 21:44:18 +02:00
µBlock.selfieManager = (( ) => {
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
const µb = µBlock;
let createTimer;
let destroyTimer;
2014-09-08 23:46:58 +02:00
// As of 2018-05-31:
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
// JSON.stringify-ing ourselves results in a better baseline
// memory usage at selfie-load time. For some reasons.
const create = async function() {
await Promise.all([
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
µb.assets.put(
'selfie/main',
JSON.stringify({
magic: µb.systemSettings.selfieMagic,
availableFilterLists: µb.availableFilterLists,
})
),
µb.redirectEngine.toSelfie('selfie/redirectEngine'),
µb.staticExtFilteringEngine.toSelfie(
'selfie/staticExtFilteringEngine'
),
µb.staticNetFilteringEngine.toSelfie(
'selfie/staticNetFilteringEngine'
),
]);
µb.lz4Codec.relinquish();
µb.selfieIsInvalid = false;
};
2015-11-29 23:06:58 +01:00
const loadMain = async function() {
const details = await µb.assets.get('selfie/main');
if (
details instanceof Object === false ||
typeof details.content !== 'string' ||
details.content === ''
) {
return false;
}
let selfie;
try {
selfie = JSON.parse(details.content);
} catch(ex) {
}
if (
selfie instanceof Object === false ||
selfie.magic !== µb.systemSettings.selfieMagic
) {
return false;
}
µb.availableFilterLists = selfie.availableFilterLists;
return true;
};
const load = async function() {
if ( µb.selfieIsInvalid ) {
return false;
}
try {
const results = await Promise.all([
loadMain(),
µb.redirectEngine.fromSelfie('selfie/redirectEngine'),
µb.staticExtFilteringEngine.fromSelfie(
'selfie/staticExtFilteringEngine'
),
µb.staticNetFilteringEngine.fromSelfie(
'selfie/staticNetFilteringEngine'
),
]);
if ( results.every(v => v) ) {
return µb.loadRedirectResources();
}
}
catch (reason) {
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
log.info(reason);
}
Expand bidi-trie usage in static network filtering engine Related issues: - https://github.com/uBlockOrigin/uBlock-issues/issues/761 - https://github.com/uBlockOrigin/uBlock-issues/issues/528 The previous bidi-trie code could only hold filters which are plain pattern, i.e. no wildcard characters, and which had no origin option (`domain=`), right and/or left anchor, and no `csp=` option. Example of filters that could be moved into a bidi-trie data structure: &ad_box_ /w/d/capu.php?z=$script,third-party ||liveonlinetv247.com/images/muvixx-150x50-watch-now-in-hd-play-btn.gif Examples of filters that could NOT be moved to a bidi-trie: -adap.$domain=~l-adap.org /tsc.php?*&ses= ||ibsrv.net/*forumsponsor$domain=[...] @@||imgspice.com/jquery.cookie.js|$script ||view.atdmt.com^*/iview/$third-party ||postimg.cc/image/$csp=[...] Ideally the filters above should be able to be moved to a bidi-trie since they are basically plain patterns, or at least partially moved to a bidi-trie when there is only a single wildcard (i.e. made of two plain patterns). Also, there were two distinct bidi-tries in which plain-pattern filters can be moved to: one for patterns without hostname anchoring and another one for patterns with hostname-anchoring. This was required because the hostname-anchored patterns have an extra condition which is outside the bidi-trie knowledge. This commit expands the number of filters which can be stored in the bidi-trie, and also remove the need to use two distinct bidi-tries. - Added ability to associate a pattern with an integer in the bidi-trie [1]. - The bidi-trie match code passes this externally provided integer when calling an externally provided method used for testing extra conditions that may be present for a plain pattern found to be matching in the bidi-trie. - Decomposed existing filters into smaller logical units: - FilterPlainLeftAnchored => FilterPatternPlain + FilterAnchorLeft - FilterPlainRightAnchored => FilterPatternPlain + FilterAnchorRight - FilterExactMatch => FilterPatternPlain + FilterAnchorLeft + FilterAnchorRight - FilterPlainHnAnchored => FilterPatternPlain + FilterAnchorHn - FilterWildcard1 => FilterPatternPlain + [ FilterPatternLeft or FilterPatternRight ] - FilterWildcard1HnAnchored => FilterPatternPlain + [ FilterPatternLeft or FilterPatternRight ] + FilterAnchorHn - FilterGenericHnAnchored => FilterPatternGeneric + FilterAnchorHn - FilterGenericHnAndRightAnchored => FilterPatternGeneric + FilterAnchorRight + FilterAnchorHn - FilterOriginMixedSet => FilterOriginMissSet + FilterOriginHitSet - Instances of FilterOrigin[...], FilterDataHolder can also be added to a composite filter to represent `domain=` and `csp=` options. - Added a new filter class, FilterComposite, for filters which are a combination of two or more logical units. A FilterComposite instance is a match when *all* filters composing it are a match. Since filters are now encoded into combination of smaller units, it becomes possible to extract the FilterPatternPlain component and store it in the bidi-trie, and use the integer as a handle for the remaining extra conditions, if any. Since a single pattern in the bidi-trie may be a component for different filters, the associated integer points to a sequence of extra conditions, and a match occurs as soon as one of the extra conditions (which may itself be a sequence of conditions) is fulfilled. Decomposing filters which are currently single instance into sequences of smaller logical filters means increasing the storage and CPU overhead when evaluating such filters. The CPU overhead is compensated by the fact that more filters can now moved into the bidi-trie, where the first match is efficiently evaluated. The extra conditions have to be evaluated if and only if there is a match in the bidi-trie. The storage overhead is compensated by the bidi-trie's intrinsic nature of merging similar patterns. Furthermore, the storage overhead is reduced by no longer using JavaScript array to store collection of filters (which is what FilterComposite is): the same technique used in [2] is imported to store sequences of filters. A sequence of filters is a sequence of integer pairs where the first integer is an index to an actual filter instance stored in a global array of filters (`filterUnits`), while the second integer is an index to the next pair in the sequence -- which means all sequences of filters are encoded in one single array of integers (`filterSequences` => Uint32Array). As a result, a sequence of filters can be represented by one single integer -- an index to the first pair -- regardless of the number of filters in the sequence. This representation is further leveraged to replace the use of JavaScript array in FilterBucket [3], which used a JavaScript array to store collection of filters. Doing so means there is no more need for FilterPair [4], which purpose was to be a lightweight representation when there was only two filters in a collection. As a result of the above changes, the map of `token` (integer) => filter instance (object) used to associate tokens to filters or collections of filters is replaced with a more efficient map of `token` (integer) to filter unit index (integer) to lookup a filter object from the global `filterUnits` array. Another consequence of using one single global array to store all filter instances means we can reuse existing instances when a logical filter instance is parameter-less, which is the case for FilterAnchorLeft, FilterAnchorRight, FilterAnchorHn, the index to these single instances is reused where needed. `urlTokenizer` now stores the character codes of the scanned URL into a bidi-trie buffer, for reuse when string matching methods are called. New method: `tokenHistogram()`, used to generate histograms of occurrences of token extracted from URLs in built-in benchmark. The top results of the "miss" histogram are used as "bad tokens", i.e. tokens to avoid if possible when compiling filter lists. All plain pattern strings are now stored in the bidi-trie memory buffer, regardless of whether they will be used in the trie proper or not. Three methods have been added to the bidi-trie to test stored string against the URL which is also stored in then bidi-trie. FilterParser is now instanciated on demand and released when no longer used. *** [1] https://github.com/gorhill/uBlock/blob/135a45a878f5b93bc538f822981e3a42b1e9073f/src/js/strie.js#L120 [2] https://github.com/gorhill/uBlock/commit/e94024d350b066e4e04a772b0a3dbc69daab3fb7 [3] https://github.com/gorhill/uBlock/blob/135a45a878f5b93bc538f822981e3a42b1e9073f/src/js/static-net-filtering.js#L1630 [4] https://github.com/gorhill/uBlock/blob/135a45a878f5b93bc538f822981e3a42b1e9073f/src/js/static-net-filtering.js#L1566
2019-10-21 14:15:58 +02:00
destroy();
return false;
};
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
const destroy = function() {
µb.cacheStorage.remove('selfie'); // TODO: obsolete, remove eventually.
µb.assets.remove(/^selfie\//);
µb.selfieIsInvalid = true;
createTimer = vAPI.setTimeout(( ) => {
createTimer = undefined;
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
create();
}, µb.hiddenSettings.selfieAfter * 60000);
};
2015-11-29 23:06:58 +01:00
const destroyAsync = function() {
if ( destroyTimer !== undefined ) { return; }
if ( createTimer !== undefined ) {
clearTimeout(createTimer);
createTimer = undefined;
}
destroyTimer = vAPI.setTimeout(
( ) => {
destroyTimer = undefined;
destroy();
},
1019
);
µb.selfieIsInvalid = true;
};
return { load, destroy: destroyAsync };
2015-11-29 23:06:58 +01:00
})();
2014-09-08 23:46:58 +02:00
/******************************************************************************/
2015-07-27 16:10:34 +02:00
// https://github.com/gorhill/uBlock/issues/531
// Overwrite user settings with admin settings if present.
//
// Admin settings match layout of a uBlock backup. Not all data is
// necessarily present, i.e. administrators may removed entries which
// values are left to the user's choice.
µBlock.restoreAdminSettings = async function() {
let data;
try {
const json = await vAPI.adminStorage.getItem('adminSettings');
2015-10-21 17:53:03 +02:00
if ( typeof json === 'string' && json !== '' ) {
data = JSON.parse(json);
} else if ( json instanceof Object ) {
data = json;
2015-07-27 16:10:34 +02:00
}
} catch (ex) {
console.error(ex);
}
2015-07-27 16:10:34 +02:00
if ( data instanceof Object === false ) { return; }
2015-07-27 16:10:34 +02:00
const bin = {};
let binNotEmpty = false;
2015-07-27 16:10:34 +02:00
// https://github.com/uBlockOrigin/uBlock-issues/issues/666
// Allows an admin to set their own 'assets.json' file, with their
// own set of stock assets.
if (
typeof data.assetsBootstrapLocation === 'string' &&
data.assetsBootstrapLocation !== ''
) {
µBlock.assetsBootstrapLocation = data.assetsBootstrapLocation;
}
if ( typeof data.userSettings === 'object' ) {
for ( const name in this.userSettings ) {
if ( this.userSettings.hasOwnProperty(name) === false ) {
continue;
2015-07-27 16:10:34 +02:00
}
if ( data.userSettings.hasOwnProperty(name) === false ) {
continue;
}
bin[name] = data.userSettings[name];
2015-10-21 17:53:03 +02:00
binNotEmpty = true;
}
}
2015-07-27 16:10:34 +02:00
// 'selectedFilterLists' is an array of filter list tokens. Each token
// is a reference to an asset in 'assets.json'.
if ( Array.isArray(data.selectedFilterLists) ) {
bin.selectedFilterLists = data.selectedFilterLists;
binNotEmpty = true;
}
2015-07-27 16:10:34 +02:00
if ( Array.isArray(data.whitelist) ) {
bin.netWhitelist = data.whitelist;
binNotEmpty = true;
} else if ( typeof data.netWhitelist === 'string' ) {
bin.netWhitelist = data.netWhitelist.split('\n');
binNotEmpty = true;
}
2015-07-27 16:10:34 +02:00
if ( typeof data.dynamicFilteringString === 'string' ) {
bin.dynamicFilteringString = data.dynamicFilteringString;
binNotEmpty = true;
}
2015-07-27 16:10:34 +02:00
if ( typeof data.urlFilteringString === 'string' ) {
bin.urlFilteringString = data.urlFilteringString;
binNotEmpty = true;
}
2015-07-27 16:10:34 +02:00
if ( typeof data.hostnameSwitchesString === 'string' ) {
bin.hostnameSwitchesString = data.hostnameSwitchesString;
binNotEmpty = true;
}
2015-07-27 16:10:34 +02:00
if ( binNotEmpty ) {
vAPI.storage.set(bin);
}
2015-10-21 17:53:03 +02:00
if ( typeof data.userFilters === 'string' ) {
this.saveUserFilters(data.userFilters);
}
2015-07-27 16:10:34 +02:00
};
/******************************************************************************/
2017-11-09 18:53:05 +01:00
// https://github.com/gorhill/uBlock/issues/2344
// Support mutliple locales per filter list.
// https://github.com/gorhill/uBlock/issues/3210
// Support ability to auto-enable a filter list based on user agent.
µBlock.listMatchesEnvironment = function(details) {
// Matches language?
if ( typeof details.lang === 'string' ) {
let re = this.listMatchesEnvironment.reLang;
if ( re === undefined ) {
const match = /^[a-z]+/.exec(self.navigator.language);
if ( match !== null ) {
re = new RegExp('\\b' + match[0] + '\\b');
this.listMatchesEnvironment.reLang = re;
}
2017-11-09 18:53:05 +01:00
}
if ( re !== undefined && re.test(details.lang) ) { return true; }
2017-11-09 18:53:05 +01:00
}
// Matches user agent?
if ( typeof details.ua === 'string' ) {
let re = new RegExp('\\b' + this.escapeRegex(details.ua) + '\\b', 'i');
2017-11-09 18:53:05 +01:00
if ( re.test(self.navigator.userAgent) ) { return true; }
}
return false;
};
/******************************************************************************/
µBlock.scheduleAssetUpdater = (( ) => {
let timer, next = 0;
return function(updateDelay) {
if ( timer ) {
clearTimeout(timer);
timer = undefined;
2014-09-08 23:46:58 +02:00
}
if ( updateDelay === 0 ) {
next = 0;
2015-02-24 00:31:29 +01:00
return;
}
const now = Date.now();
// Use the new schedule if and only if it is earlier than the previous
// one.
if ( next !== 0 ) {
updateDelay = Math.min(updateDelay, Math.max(next - now, 0));
2015-02-24 00:31:29 +01:00
}
next = now + updateDelay;
timer = vAPI.setTimeout(( ) => {
timer = undefined;
next = 0;
this.assets.updateStart({
delay: this.hiddenSettings.autoUpdateAssetFetchPeriod * 1000 ||
120000,
auto: true,
});
}, updateDelay);
2015-02-24 00:31:29 +01:00
};
})();
/******************************************************************************/
µBlock.assetObserver = function(topic, details) {
// Do not update filter list if not in use.
if ( topic === 'before-asset-updated' ) {
2017-05-08 20:00:41 +02:00
if ( details.type === 'filters' ) {
if (
this.availableFilterLists.hasOwnProperty(details.assetKey) === false ||
this.selectedFilterLists.indexOf(details.assetKey) === -1
) {
return;
2017-05-08 20:00:41 +02:00
}
}
return true;
}
// Compile the list while we have the raw version in memory
if ( topic === 'after-asset-updated' ) {
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
// Skip selfie-related content.
if ( details.assetKey.startsWith('selfie/') ) { return; }
const cached = typeof details.content === 'string' &&
details.content !== '';
if ( this.availableFilterLists.hasOwnProperty(details.assetKey) ) {
if ( cached ) {
2017-01-22 22:05:16 +01:00
if ( this.selectedFilterLists.indexOf(details.assetKey) !== -1 ) {
this.extractFilterListMetadata(
details.assetKey,
details.content
);
this.assets.put(
'compiled/' + details.assetKey,
this.compileFilters(
details.content,
{ assetKey: details.assetKey }
)
);
}
} else {
this.removeCompiledFilterList(details.assetKey);
2015-09-13 16:26:36 +02:00
}
} else if ( details.assetKey === this.pslAssetKey ) {
if ( cached ) {
this.compilePublicSuffixList(details.content);
}
}
vAPI.messaging.broadcast({
what: 'assetUpdated',
key: details.assetKey,
cached: cached
});
2017-05-06 19:19:05 +02:00
// https://github.com/gorhill/uBlock/issues/2585
Refactor selfie generation into a more flexible persistence mechanism The motivation is to address the higher peak memory usage at launch time with 3rd-gen HNTrie when a selfie was present. The selfie generation prior to this change was to collect all filtering data into a single data structure, and then to serialize that whole structure at once into storage (using JSON.stringify). However, HNTrie serialization requires that a large UintArray32 be converted into a plain JS array, which itslef would be indirectly converted into a JSON string. This was the main reason why peak memory usage would be higher at launch from selfie, since the JSON string would need to be wholly unserialized into JS objects, which themselves would need to be converted into more specialized data structures (like that Uint32Array one). The solution to lower peak memory usage at launch is to refactor selfie generation to allow a more piecemeal approach: each filtering component is given the ability to serialize itself rather than to be forced to be embedded in the master selfie. With this approach, the HNTrie buffer can now serialize to its own storage by converting the buffer data directly into a string which can be directly sent to storage. This avoiding expensive intermediate steps such as converting into a JS array and then to a JSON string. As part of the refactoring, there was also opportunistic code upgrade to ES6 and Promise (eventually all of uBO's code will be proper ES6). Additionally, the polyfill to bring getBytesInUse() to Firefox has been revisited to replace the rather expensive previous implementation with an implementation with virtually no overhead.
2019-02-14 19:33:55 +01:00
// Whenever an asset is overwritten, the current selfie is quite
// likely no longer valid.
2017-05-06 19:19:05 +02:00
this.selfieManager.destroy();
return;
}
2017-01-22 22:05:16 +01:00
// Update failed.
if ( topic === 'asset-update-failed' ) {
vAPI.messaging.broadcast({
what: 'assetUpdated',
key: details.assetKey,
failed: true
});
return;
}
// Reload all filter lists if needed.
if ( topic === 'after-assets-updated' ) {
if ( details.assetKeys.length !== 0 ) {
// https://github.com/gorhill/uBlock/pull/2314#issuecomment-278716960
if (
this.hiddenSettings.userResourcesLocation !== 'unset' ||
vAPI.webextFlavor.soup.has('devbuild')
) {
this.redirectEngine.invalidateResourcesSelfie();
}
this.loadFilterLists();
}
if ( this.userSettings.autoUpdate ) {
this.scheduleAssetUpdater(this.hiddenSettings.autoUpdatePeriod * 3600000 || 25200000);
} else {
this.scheduleAssetUpdater(0);
}
vAPI.messaging.broadcast({
what: 'assetsUpdated',
assetKeys: details.assetKeys
});
return;
}
// New asset source became available, if it's a filter list, should we
// auto-select it?
if ( topic === 'builtin-asset-source-added' ) {
if ( details.entry.content === 'filters' ) {
if (
details.entry.off !== true ||
2017-11-09 18:53:05 +01:00
this.listMatchesEnvironment(details.entry)
) {
this.saveSelectedFilterLists([ details.assetKey ], true);
}
}
return;
}
};