From 086766a924a42affccfd83373a869e888e2ea8a4 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Mon, 26 Feb 2024 16:50:11 -0500 Subject: [PATCH] Redesign cache storage In uBO, the "cache storage" is used to save resources which can be safely discarded, though at the cost of having to fetch or recompute them again. Extension storage (browser.storage.local) is now always used as cache storage backend. This has always been the default for Chromium-based browsers. For Firefox-based browsers, IndexedDB was used as backend for cache storage, with fallback to extension storage when using Firefox in private mode by default. Extension storage is reliable since it works in all contexts, though it may not be the most performant one. To speed-up loading of resources from extension storage, uBO will now make use of Cache API storage, which will mirror content of key assets saved to extension storage. Typically loading resources from Cache API is faster than loading the same resources from the extension storage. Only resources which must be loaded in memory as fast as possible will make use of the Cache API storage layered on top of the extension storage. Compiled filter lists and memory snapshot of filtering engines (aka "selfies") will be mirrored to the Cache API storage, since these must be loaded into memory as fast as possible, and reloading filter lists from their compiled counterpart is a common operation. This new design makes it now seamless to work in permanent private mode for Firefox-based browsers, since extension storage now always contains cache-related assets. Support for IndexedDB is removed for the time being, except to support migration of cached assets the first time uBO runs with the new cache storage design. In order to easily support all choices of storage, a new serializer has been introduced, which is capable of serializing/deserializing structure-cloneable data to/from a JS string. Because of this new serializer, JS data structures can be stored directly from their native representation, and deserialized directly to their native representation from uBO's point of view, since the serialization occurs (if needed) only at the storage interface level. This new serializer simplifies many code paths where data structures such as Set, Map, TypedArray, RegExp, etc. had to be converted in a disparate manner to be able to persist them to extension storage. The new serializer supports workers and LZ4 compression. These can be configured through advanced settings. With this new layered design, it's possible to introduce more storage layers if measured as beneficial (i.e. maybe browser.storage.session) References: - https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/storage/local - https://developer.mozilla.org/en-US/docs/Web/API/Cache - https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm --- platform/common/vapi-background.js | 23 +- src/js/assets.js | 157 +-- src/js/background.js | 3 +- src/js/base64-custom.js | 103 +- src/js/biditrie.js | 31 +- src/js/cachestorage.js | 693 +++++----- src/js/cosmetic-filtering.js | 20 +- src/js/hntrie.js | 35 +- src/js/messaging.js | 34 +- src/js/redirect-engine.js | 43 +- src/js/reverselookup.js | 2 +- src/js/scriptlet-filtering-core.js | 2 +- src/js/scuo-serializer.js | 1307 ++++++++++++++++++ src/js/start.js | 54 +- src/js/static-ext-filtering-db.js | 12 +- src/js/static-ext-filtering.js | 45 +- src/js/static-net-filtering.js | 180 +-- src/js/storage.js | 94 +- src/lib/publicsuffixlist/publicsuffixlist.js | 10 +- 19 files changed, 1920 insertions(+), 928 deletions(-) create mode 100644 src/js/scuo-serializer.js diff --git a/platform/common/vapi-background.js b/platform/common/vapi-background.js index 54148039e..08cfd5872 100644 --- a/platform/common/vapi-background.js +++ b/platform/common/vapi-background.js @@ -1671,10 +1671,7 @@ vAPI.cloud = (( ) => { const push = async function(details) { const { datakey, data, encode } = details; - if ( - data === undefined || - typeof data === 'string' && data === '' - ) { + if ( data === undefined || typeof data === 'string' && data === '' ) { return deleteChunks(datakey, 0); } const item = { @@ -1682,10 +1679,9 @@ vAPI.cloud = (( ) => { tstamp: Date.now(), data, }; - const json = JSON.stringify(item); const encoded = encode instanceof Function - ? await encode(json) - : json; + ? await encode(item) + : JSON.stringify(item); // Chunkify taking into account QUOTA_BYTES_PER_ITEM: // https://developer.chrome.com/extensions/storage#property-sync @@ -1750,13 +1746,16 @@ vAPI.cloud = (( ) => { i += 1; } encoded = encoded.join(''); - const json = decode instanceof Function - ? await decode(encoded) - : encoded; + let entry = null; try { - entry = JSON.parse(json); - } catch(ex) { + if ( decode instanceof Function ) { + entry = await decode(encoded) || null; + } + if ( typeof entry === 'string' ) { + entry = JSON.parse(entry); + } + } catch(_) { } return entry; }; diff --git a/src/js/assets.js b/src/js/assets.js index 5a550dfb4..6484d289d 100644 --- a/src/js/assets.js +++ b/src/js/assets.js @@ -528,12 +528,12 @@ function getAssetSourceRegistry() { assetSourceRegistryPromise = cacheStorage.get( 'assetSourceRegistry' ).then(bin => { - if ( - bin instanceof Object && - bin.assetSourceRegistry instanceof Object - ) { - assetSourceRegistry = bin.assetSourceRegistry; - return assetSourceRegistry; + if ( bin instanceof Object ) { + if ( bin.assetSourceRegistry instanceof Object ) { + assetSourceRegistry = bin.assetSourceRegistry; + ubolog('Loaded assetSourceRegistry'); + return assetSourceRegistry; + } } return assets.fetchText( µb.assetsBootstrapLocation || µb.assetsJsonPath @@ -543,6 +543,7 @@ function getAssetSourceRegistry() { : assets.fetchText(µb.assetsJsonPath); }).then(details => { updateAssetSourceRegistry(details.content, true); + ubolog('Loaded assetSourceRegistry'); return assetSourceRegistry; }); }); @@ -673,39 +674,27 @@ let assetCacheRegistryPromise; let assetCacheRegistry = {}; function getAssetCacheRegistry() { - if ( assetCacheRegistryPromise === undefined ) { - assetCacheRegistryPromise = cacheStorage.get( - 'assetCacheRegistry' - ).then(bin => { - if ( - bin instanceof Object && - bin.assetCacheRegistry instanceof Object - ) { - if ( Object.keys(assetCacheRegistry).length === 0 ) { - assetCacheRegistry = bin.assetCacheRegistry; - } else { - console.error( - 'getAssetCacheRegistry(): assetCacheRegistry reassigned!' - ); - if ( - Object.keys(bin.assetCacheRegistry).sort().join() !== - Object.keys(assetCacheRegistry).sort().join() - ) { - console.error( - 'getAssetCacheRegistry(): assetCacheRegistry changes overwritten!' - ); - } - } - } - return assetCacheRegistry; - }); + if ( assetCacheRegistryPromise !== undefined ) { + return assetCacheRegistryPromise; } - + assetCacheRegistryPromise = cacheStorage.get( + 'assetCacheRegistry' + ).then(bin => { + if ( bin instanceof Object === false ) { return; } + if ( bin.assetCacheRegistry instanceof Object === false ) { return; } + if ( Object.keys(assetCacheRegistry).length !== 0 ) { + return console.error('getAssetCacheRegistry(): assetCacheRegistry reassigned!'); + } + ubolog('Loaded assetCacheRegistry'); + assetCacheRegistry = bin.assetCacheRegistry; + }).then(( ) => + assetCacheRegistry + ); return assetCacheRegistryPromise; } const saveAssetCacheRegistry = (( ) => { - const save = function() { + const save = ( ) => { timer.off(); cacheStorage.set({ assetCacheRegistry }); }; @@ -726,7 +715,9 @@ async function assetCacheRead(assetKey, updateReadTime = false) { const reportBack = function(content) { if ( content instanceof Blob ) { content = ''; } const details = { assetKey, content }; - if ( content === '' ) { details.error = 'ENOTFOUND'; } + if ( content === '' || content === undefined ) { + details.error = 'ENOTFOUND'; + } return details; }; @@ -742,17 +733,11 @@ async function assetCacheRead(assetKey, updateReadTime = false) { ) + ' ms'; } - if ( - bin instanceof Object === false || - bin.hasOwnProperty(internalKey) === false - ) { - return reportBack(''); - } + if ( bin instanceof Object === false ) { return reportBack(''); } + if ( bin.hasOwnProperty(internalKey) === false ) { return reportBack(''); } const entry = assetCacheRegistry[assetKey]; - if ( entry === undefined ) { - return reportBack(''); - } + if ( entry === undefined ) { return reportBack(''); } entry.readTime = Date.now(); if ( updateReadTime ) { @@ -762,34 +747,22 @@ async function assetCacheRead(assetKey, updateReadTime = false) { return reportBack(bin[internalKey]); } -async function assetCacheWrite(assetKey, details) { - let content = ''; - let options = {}; - if ( typeof details === 'string' ) { - content = details; - } else if ( details instanceof Object ) { - content = details.content || ''; - options = details; - } - - if ( content === '' ) { +async function assetCacheWrite(assetKey, content, options = {}) { + if ( content === '' || content === undefined ) { return assetCacheRemove(assetKey); } - const cacheDict = await getAssetCacheRegistry(); + const { resourceTime, url } = options; - let entry = cacheDict[assetKey]; - if ( entry === undefined ) { - entry = cacheDict[assetKey] = {}; - } - entry.writeTime = entry.readTime = Date.now(); - entry.resourceTime = options.resourceTime || 0; - if ( typeof options.url === 'string' ) { - entry.remoteURL = options.url; - } - cacheStorage.set({ - assetCacheRegistry, - [`cache/${assetKey}`]: content + getAssetCacheRegistry().then(cacheDict => { + const entry = cacheDict[assetKey] || {}; + cacheDict[assetKey] = entry; + entry.writeTime = entry.readTime = Date.now(); + entry.resourceTime = resourceTime || 0; + if ( typeof url === 'string' ) { + entry.remoteURL = url; + } + cacheStorage.set({ assetCacheRegistry, [`cache/${assetKey}`]: content }); }); const result = { assetKey, content }; @@ -800,21 +773,31 @@ async function assetCacheWrite(assetKey, details) { return result; } -async function assetCacheRemove(pattern) { +async function assetCacheRemove(pattern, options = {}) { const cacheDict = await getAssetCacheRegistry(); const removedEntries = []; const removedContent = []; for ( const assetKey in cacheDict ) { - if ( pattern instanceof RegExp && !pattern.test(assetKey) ) { - continue; - } - if ( typeof pattern === 'string' && assetKey !== pattern ) { - continue; + if ( pattern instanceof RegExp ) { + if ( pattern.test(assetKey) === false ) { continue; } + } else if ( typeof pattern === 'string' ) { + if ( assetKey !== pattern ) { continue; } } removedEntries.push(assetKey); - removedContent.push('cache/' + assetKey); + removedContent.push(`cache/${assetKey}`); delete cacheDict[assetKey]; } + if ( options.janitor && pattern instanceof RegExp ) { + const re = new RegExp( + pattern.source.replace(/^\^/, 'cache\/'), + pattern.flags + ); + const keys = await cacheStorage.keys(re); + for ( const key of keys ) { + removedContent.push(key); + ubolog(`Removing stray ${key}`); + } + } if ( removedContent.length !== 0 ) { await Promise.all([ cacheStorage.remove(removedContent), @@ -980,8 +963,7 @@ assets.get = async function(assetKey, options = {}) { } if ( details.content === '' ) { continue; } if ( reIsExternalPath.test(contentURL) && options.dontCache !== true ) { - assetCacheWrite(assetKey, { - content: details.content, + assetCacheWrite(assetKey, details.content, { url: contentURL, silent: options.silent === true, }); @@ -1057,8 +1039,7 @@ async function getRemote(assetKey, options = {}) { } // Success - assetCacheWrite(assetKey, { - content: result.content, + assetCacheWrite(assetKey, result.content, { url: contentURL, resourceTime: result.resourceTime || 0, }); @@ -1101,6 +1082,17 @@ assets.put = async function(assetKey, content) { /******************************************************************************/ +assets.toCache = async function(assetKey, content) { + return assetCacheWrite(assetKey, content); +}; + +assets.fromCache = async function(assetKey) { + const details = await assetCacheRead(assetKey); + return details && details.content; +}; + +/******************************************************************************/ + assets.metadata = async function() { await Promise.all([ getAssetSourceRegistry(), @@ -1147,8 +1139,8 @@ assets.metadata = async function() { assets.purge = assetCacheMarkAsDirty; -assets.remove = function(pattern) { - return assetCacheRemove(pattern); +assets.remove = function(...args) { + return assetCacheRemove(...args); }; assets.rmrf = function() { @@ -1300,8 +1292,7 @@ async function diffUpdater() { 'Diff-Path', 'Diff-Expires', ]); - assetCacheWrite(data.assetKey, { - content: data.text, + assetCacheWrite(data.assetKey, data.text, { resourceTime: metadata.lastModified || 0, }); metadata.diffUpdated = true; diff --git a/src/js/background.js b/src/js/background.js index 470a13177..80bab5a95 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -56,6 +56,7 @@ const hiddenSettingsDefault = { blockingProfiles: '11111/#F00 11010/#C0F 11001/#00F 00001', cacheStorageAPI: 'unset', cacheStorageCompression: true, + cacheStorageMultithread: 2, cacheControlForFirefox1376932: 'no-cache, no-store, must-revalidate', cloudStorageCompression: true, cnameIgnoreList: 'unset', @@ -181,7 +182,7 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { compiledMagic: 57, // Increase when compiled format changes - selfieMagic: 57, // Increase when selfie format changes + selfieMagic: 58, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/base64-custom.js b/src/js/base64-custom.js index 34141b8a0..0d9a43fa2 100644 --- a/src/js/base64-custom.js +++ b/src/js/base64-custom.js @@ -46,105 +46,6 @@ const digitToVal = new Uint8Array(128); } } -// The sparse base64 codec is best for buffers which contains a lot of -// small u32 integer values. Those small u32 integer values are better -// represented with stringified integers, because small values can be -// represented with fewer bits than the usual base64 codec. For example, -// 0 become '0 ', i.e. 16 bits instead of 48 bits with official base64 -// codec. - -const sparseBase64 = { - magic: 'Base64_1', - - encode: function(arrbuf, arrlen) { - const inputLength = (arrlen + 3) >>> 2; - const inbuf = new Uint32Array(arrbuf, 0, inputLength); - const outputLength = this.magic.length + 7 + inputLength * 7; - const outbuf = new Uint8Array(outputLength); - // magic bytes - let j = 0; - for ( let i = 0; i < this.magic.length; i++ ) { - outbuf[j++] = this.magic.charCodeAt(i); - } - // array size - let v = inputLength; - do { - outbuf[j++] = valToDigit[v & 0b111111]; - v >>>= 6; - } while ( v !== 0 ); - outbuf[j++] = 0x20 /* ' ' */; - // array content - for ( let i = 0; i < inputLength; i++ ) { - v = inbuf[i]; - do { - outbuf[j++] = valToDigit[v & 0b111111]; - v >>>= 6; - } while ( v !== 0 ); - outbuf[j++] = 0x20 /* ' ' */; - } - if ( typeof TextDecoder === 'undefined' ) { - return JSON.stringify( - Array.from(new Uint32Array(outbuf.buffer, 0, j >>> 2)) - ); - } - const textDecoder = new TextDecoder(); - return textDecoder.decode(new Uint8Array(outbuf.buffer, 0, j)); - }, - - decode: function(instr, arrbuf) { - if ( instr.charCodeAt(0) === 0x5B /* '[' */ ) { - const inbuf = JSON.parse(instr); - if ( arrbuf instanceof ArrayBuffer === false ) { - return new Uint32Array(inbuf); - } - const outbuf = new Uint32Array(arrbuf); - outbuf.set(inbuf); - return outbuf; - } - if ( instr.startsWith(this.magic) === false ) { - throw new Error('Invalid µBlock.base64 encoding'); - } - const inputLength = instr.length; - const outputLength = this.decodeSize(instr) >> 2; - const outbuf = arrbuf instanceof ArrayBuffer === false - ? new Uint32Array(outputLength) - : new Uint32Array(arrbuf); - let i = instr.indexOf(' ', this.magic.length) + 1; - if ( i === -1 ) { - throw new Error('Invalid µBlock.base64 encoding'); - } - // array content - let j = 0; - for (;;) { - if ( j === outputLength || i >= inputLength ) { break; } - let v = 0, l = 0; - for (;;) { - const c = instr.charCodeAt(i++); - if ( c === 0x20 /* ' ' */ ) { break; } - v += digitToVal[c] << l; - l += 6; - } - outbuf[j++] = v; - } - if ( i < inputLength || j < outputLength ) { - throw new Error('Invalid µBlock.base64 encoding'); - } - return outbuf; - }, - - decodeSize: function(instr) { - if ( instr.startsWith(this.magic) === false ) { return 0; } - let v = 0, l = 0, i = this.magic.length; - for (;;) { - const c = instr.charCodeAt(i++); - if ( c === 0x20 /* ' ' */ ) { break; } - v += digitToVal[c] << l; - l += 6; - } - return v << 2; - }, -}; - // The dense base64 codec is best for typed buffers which values are // more random. For example, buffer contents as a result of compression // contain less repetitive values and thus the content is more @@ -154,7 +55,7 @@ const sparseBase64 = { // ArrayBuffer fails, the content of the resulting Uint8Array is // non-sensical. WASM-related? -const denseBase64 = { +export const denseBase64 = { magic: 'DenseBase64_1', encode: function(input) { @@ -242,5 +143,3 @@ const denseBase64 = { }; /******************************************************************************/ - -export { denseBase64, sparseBase64 }; diff --git a/src/js/biditrie.js b/src/js/biditrie.js index d0f64ee5b..132931638 100644 --- a/src/js/biditrie.js +++ b/src/js/biditrie.js @@ -576,34 +576,19 @@ class BidiTrieContainer { }; } - serialize(encoder) { - if ( encoder instanceof Object ) { - return encoder.encode( - this.buf32.buffer, - this.buf32[CHAR1_SLOT] - ); - } - return Array.from( - new Uint32Array( - this.buf32.buffer, - 0, - this.buf32[CHAR1_SLOT] + 3 >>> 2 - ) + toSelfie() { + return this.buf32.subarray( + 0, + this.buf32[CHAR1_SLOT] + 3 >>> 2 ); } - unserialize(selfie, decoder) { - const shouldDecode = typeof selfie === 'string'; - let byteLength = shouldDecode - ? decoder.decodeSize(selfie) - : selfie.length << 2; + fromSelfie(selfie) { + if ( selfie instanceof Uint32Array === false ) { return false; } + let byteLength = selfie.length << 2; if ( byteLength === 0 ) { return false; } this.reallocateBuf(byteLength); - if ( shouldDecode ) { - decoder.decode(selfie, this.buf8.buffer); - } else { - this.buf32.set(selfie); - } + this.buf32.set(selfie); return true; } diff --git a/src/js/cachestorage.js b/src/js/cachestorage.js index ef056af95..e70fc3229 100644 --- a/src/js/cachestorage.js +++ b/src/js/cachestorage.js @@ -19,179 +19,362 @@ Home: https://github.com/gorhill/uBlock */ -/* global browser, IDBDatabase, indexedDB */ +/* global browser, indexedDB */ 'use strict'; /******************************************************************************/ import lz4Codec from './lz4.js'; -import µb from './background.js'; import webext from './webext.js'; +import µb from './background.js'; +import { ubolog } from './console.js'; +import * as scuo from './scuo-serializer.js'; /******************************************************************************/ -// The code below has been originally manually imported from: -// Commit: https://github.com/nikrolls/uBlock-Edge/commit/d1538ea9bea89d507219d3219592382eee306134 -// Commit date: 29 October 2016 -// Commit author: https://github.com/nikrolls -// Commit message: "Implement cacheStorage using IndexedDB" - -// The original imported code has been subsequently modified as it was not -// compatible with Firefox. -// (a Promise thing, see https://github.com/dfahlander/Dexie.js/issues/317) -// Furthermore, code to migrate from browser.storage.local to vAPI.storage -// has been added, for seamless migration of cache-related entries into -// indexedDB. - -// https://bugzilla.mozilla.org/show_bug.cgi?id=1371255 -// Firefox-specific: we use indexedDB because browser.storage.local() has -// poor performance in Firefox. -// https://github.com/uBlockOrigin/uBlock-issues/issues/328 -// Use IndexedDB for Chromium as well, to take advantage of LZ4 -// compression. -// https://github.com/uBlockOrigin/uBlock-issues/issues/399 -// Revert Chromium support of IndexedDB, use advanced setting to force -// IndexedDB. -// https://github.com/uBlockOrigin/uBlock-issues/issues/409 -// Allow forcing the use of webext storage on Firefox. - const STORAGE_NAME = 'uBlock0CacheStorage'; +const extensionStorage = webext.storage.local; -// Default to webext storage. -const storageLocal = webext.storage.local; - -let storageReadyResolve; -const storageReadyPromise = new Promise(resolve => { - storageReadyResolve = resolve; -}); - -const cacheStorage = { - name: 'browser.storage.local', - get(...args) { - return storageReadyPromise.then(( ) => - storageLocal.get(...args).catch(reason => { - console.log(reason); - }) - ); - }, - set(...args) { - return storageReadyPromise.then(( ) => - storageLocal.set(...args).catch(reason => { - console.log(reason); - }) - ); - }, - remove(...args) { - return storageReadyPromise.then(( ) => - storageLocal.remove(...args).catch(reason => { - console.log(reason); - }) - ); - }, - clear(...args) { - return storageReadyPromise.then(( ) => - storageLocal.clear(...args).catch(reason => { - console.log(reason); - }) - ); - }, - select: function(selectedBackend) { - let actualBackend = selectedBackend; - if ( actualBackend === undefined || actualBackend === 'unset' ) { - actualBackend = vAPI.webextFlavor.soup.has('firefox') - ? 'indexedDB' - : 'browser.storage.local'; - } - if ( actualBackend === 'indexedDB' ) { - return selectIDB().then(success => { - if ( success || selectedBackend === 'indexedDB' ) { - clearWebext(); - storageReadyResolve(); - return 'indexedDB'; - } - clearIDB(); - storageReadyResolve(); - return 'browser.storage.local'; - }); - } - if ( actualBackend === 'browser.storage.local' ) { - clearIDB(); - } - storageReadyResolve(); - return Promise.resolve('browser.storage.local'); - - }, - error: undefined +const keysFromGetArg = arg => { + if ( arg === null || arg === undefined ) { return []; } + const type = typeof arg; + if ( type === 'string' ) { return [ arg ]; } + if ( Array.isArray(arg) ) { return arg; } + if ( type !== 'object' ) { return; } + return Object.keys(arg); }; +// Cache API is subject to quota so we will use it only for what is key +// performance-wise +const shouldCache = bin => { + const out = {}; + for ( const key of Object.keys(bin) ) { + if ( key.startsWith('cache/') ) { + if ( /^cache\/(compiled|selfie)\//.test(key) === false ) { continue; } + } + out[key] = bin[key]; + } + return out; +}; + +/******************************************************************************* + * + * Extension storage + * + * Always available. + * + * */ + +const cacheStorage = (( ) => { + + const LARGE = 65536; + + const compress = async (key, data) => { + const isLarge = typeof data === 'string' && data.length >= LARGE; + const µbhs = µb.hiddenSettings; + const after = await scuo.serializeAsync(data, { + compress: isLarge && µbhs.cacheStorageCompression, + multithreaded: isLarge && µbhs.cacheStorageMultithread || 0, + }); + return { key, data: after }; + }; + + const decompress = async (key, data) => { + if ( scuo.canDeserialize(data) === false ) { + return { key, data }; + } + const isLarge = data.length >= LARGE; + const after = await scuo.deserializeAsync(data, { + multithreaded: isLarge && µb.hiddenSettings.cacheStorageMultithread || 0, + }); + return { key, data: after }; + }; + + return { + name: 'browser.storage.local', + + get(arg) { + const keys = arg; + return cacheAPI.get(keysFromGetArg(arg)).then(bin => { + if ( bin !== undefined ) { return bin; } + return extensionStorage.get(keys).catch(reason => { + ubolog(reason); + }); + }).then(bin => { + if ( bin instanceof Object === false ) { return bin; } + const promises = []; + for ( const key of Object.keys(bin) ) { + promises.push(decompress(key, bin[key])); + } + return Promise.all(promises); + }).then(results => { + const bin = {}; + for ( const { key, data } of results ) { + bin[key] = data; + } + return bin; + }).catch(reason => { + ubolog(reason); + }); + }, + + async keys(regex) { + const results = await Promise.all([ + cacheAPI.keys(regex), + extensionStorage.get(null).catch(( ) => {}), + ]); + const keys = new Set(results[0]); + const bin = results[1] || {}; + for ( const key of Object.keys(bin) ) { + if ( regex && regex.test(key) === false ) { continue; } + keys.add(key); + } + return keys; + }, + + async set(keyvalStore) { + const keys = Object.keys(keyvalStore); + if ( keys.length === 0 ) { return; } + const promises = []; + for ( const key of keys ) { + promises.push(compress(key, keyvalStore[key])); + } + const results = await Promise.all(promises); + const serializedStore = {}; + for ( const { key, data } of results ) { + serializedStore[key] = data; + } + cacheAPI.set(shouldCache(serializedStore)); + return extensionStorage.set(serializedStore).catch(reason => { + ubolog(reason); + }); + }, + + remove(...args) { + cacheAPI.remove(...args); + return extensionStorage.remove(...args).catch(reason => { + ubolog(reason); + }); + }, + + clear(...args) { + cacheAPI.clear(...args); + return extensionStorage.clear(...args).catch(reason => { + ubolog(reason); + }); + }, + + async migrate(cacheAPI) { + if ( cacheAPI === 'browser.storage.local' ) { return; } + if ( cacheAPI !== 'indexedDB' ) { + if ( vAPI.webextFlavor.soup.has('firefox') === false ) { return; } + } + if ( browser.extension.inIncognitoContext ) { return; } + // Copy all items to new cache storage + const bin = await idbStorage.get(null); + if ( typeof bin !== 'object' || bin === null ) { return; } + const toMigrate = []; + for ( const key of Object.keys(bin) ) { + if ( key.startsWith('cache/selfie/') ) { continue; } + ubolog(`Migrating ${key}=${JSON.stringify(bin[key]).slice(0,32)}`); + toMigrate.push(cacheStorage.set({ [key]: bin[key] })); + } + idbStorage.clear(); + return Promise.all(toMigrate); + }, + + error: undefined + }; +})(); + // Not all platforms support getBytesInUse -if ( storageLocal.getBytesInUse instanceof Function ) { +if ( extensionStorage.getBytesInUse instanceof Function ) { cacheStorage.getBytesInUse = function(...args) { - return storageLocal.getBytesInUse(...args).catch(reason => { - console.log(reason); + return extensionStorage.getBytesInUse(...args).catch(reason => { + ubolog(reason); }); }; } -// Reassign API entries to that of indexedDB-based ones -const selectIDB = async function() { - let db; - let dbPromise; +/******************************************************************************* + * + * Cache API + * + * Purpose is to mirror cache-related items from extension storage, as its + * read/write operations are faster. May not be available/populated in + * private/incognito mode. + * + * */ - const noopfn = function () { - }; - - const disconnect = function() { - dbTimer.off(); - if ( db instanceof IDBDatabase ) { - db.close(); - db = undefined; +const cacheAPI = (( ) => { + const caches = globalThis.caches; + const cacheStoragePromise = new Promise(resolve => { + if ( typeof caches !== 'object' || caches === null ) { + ubolog('CacheStorage API not available'); + resolve(null); + return; } - }; - - const dbTimer = vAPI.defer.create(( ) => { - disconnect(); + resolve(caches.open(STORAGE_NAME).catch(reason => { + ubolog(reason); + })); }); - const keepAlive = function() { - dbTimer.offon(Math.max( - µb.hiddenSettings.autoUpdateAssetFetchPeriod * 2 * 1000, - 180000 - )); + const urlPrefix = 'https://ublock0.invalid/'; + + const keyToURL = key => + `${urlPrefix}${encodeURIComponent(key)}`; + + const urlToKey = url => + decodeURIComponent(url.slice(urlPrefix.length)); + + const getOne = async key => { + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache.match(keyToURL(key)).then(response => { + if ( response instanceof Response === false ) { return; } + return response.text(); + }).then(text => { + if ( text === undefined ) { return; } + return { key, text }; + }).catch(reason => { + ubolog(reason); + }); }; - // https://github.com/gorhill/uBlock/issues/3156 - // I have observed that no event was fired in Tor Browser 7.0.7 + - // medium security level after the request to open the database was - // created. When this occurs, I have also observed that the `error` - // property was already set, so this means uBO can detect here whether - // the database can be opened successfully. A try-catch block is - // necessary when reading the `error` property because we are not - // allowed to read this property outside of event handlers in newer - // implementation of IDBRequest (my understanding). + const getAll = async ( ) => { + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache.keys().then(requests => { + const promises = []; + for ( const request of requests ) { + promises.push(getOne(urlToKey(request.url))); + } + return Promise.all(promises); + }).then(responses => { + const bin = {}; + for ( const response of responses ) { + if ( response === undefined ) { continue; } + bin[response.key] = response.text; + } + return bin; + }).catch(reason => { + ubolog(reason); + }); + }; + + const setOne = async (key, text) => { + if ( text === undefined ) { return removeOne(key); } + const blob = new Blob([ text ], { type: 'text/plain;charset=utf-8'}); + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache + .put(keyToURL(key), new Response(blob)) + .catch(reason => { + ubolog(reason); + }); + }; + + const removeOne = async key => { + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache.delete(keyToURL(key)).catch(reason => { + ubolog(reason); + }); + }; + + return { + async get(arg) { + const keys = keysFromGetArg(arg); + if ( keys === undefined ) { return; } + if ( keys.length === 0 ) { + return getAll(); + } + const bin = {}; + const toFetch = keys.slice(); + const hasDefault = typeof arg === 'object' && Array.isArray(arg) === false; + for ( let i = 0; i < toFetch.length; i++ ) { + const key = toFetch[i]; + if ( hasDefault && arg[key] !== undefined ) { + bin[key] = arg[key]; + } + toFetch[i] = getOne(key); + } + const responses = await Promise.all(toFetch); + for ( const response of responses ) { + if ( response instanceof Object === false ) { continue; } + const { key, text } = response; + if ( typeof key !== 'string' ) { continue; } + if ( typeof text !== 'string' ) { continue; } + bin[key] = text; + } + if ( Object.keys(bin).length === 0 ) { return; } + return bin; + }, + + async keys(regex) { + const cache = await cacheStoragePromise; + if ( cache === null ) { return []; } + return cache.keys().then(requests => + requests.map(r => urlToKey(r.url)) + .filter(k => regex === undefined || regex.test(k)) + ).catch(( ) => []); + }, + + async set(keyvalStore) { + const keys = Object.keys(keyvalStore); + if ( keys.length === 0 ) { return; } + const promises = []; + for ( const key of keys ) { + promises.push(setOne(key, keyvalStore[key])); + } + return Promise.all(promises); + }, + + async remove(keys) { + const toRemove = []; + if ( typeof keys === 'string' ) { + toRemove.push(removeOne(keys)); + } else if ( Array.isArray(keys) ) { + for ( const key of keys ) { + toRemove.push(removeOne(key)); + } + } + return Promise.all(toRemove); + }, + + async clear() { + return globalThis.caches.delete(STORAGE_NAME).catch(reason => { + ubolog(reason); + }); + }, + }; +})(); + +/******************************************************************************* + * + * IndexedDB + * + * Deprecated, exists only for the purpose of migrating from older versions. + * + * */ + +const idbStorage = (( ) => { + let dbPromise; const getDb = function() { - keepAlive(); - if ( db !== undefined ) { - return Promise.resolve(db); - } - if ( dbPromise !== undefined ) { - return dbPromise; - } + if ( dbPromise !== undefined ) { return dbPromise; } dbPromise = new Promise(resolve => { let req; try { req = indexedDB.open(STORAGE_NAME, 1); if ( req.error ) { - console.log(req.error); + ubolog(req.error); req = undefined; } } catch(ex) { } if ( req === undefined ) { - db = null; - dbPromise = undefined; return resolve(null); } req.onupgradeneeded = function(ev) { @@ -215,24 +398,16 @@ const selectIDB = async function() { req.onsuccess = function(ev) { if ( resolve === undefined ) { return; } req = undefined; - db = ev.target.result; - dbPromise = undefined; - resolve(db); + resolve(ev.target.result); resolve = undefined; }; req.onerror = req.onblocked = function() { if ( resolve === undefined ) { return; } - req = undefined; - console.log(this.error); - db = null; - dbPromise = undefined; resolve(null); resolve = undefined; }; vAPI.defer.once(5000).then(( ) => { if ( resolve === undefined ) { return; } - db = null; - dbPromise = undefined; resolve(null); resolve = undefined; }); @@ -253,60 +428,12 @@ const selectIDB = async function() { }); }; - const toBlob = function(data) { - const value = data instanceof Uint8Array - ? new Blob([ data ]) - : data; - return Promise.resolve(value); - }; - - const compress = function(store, key, data) { - return lz4Codec.encode(data, toBlob).then(value => { - store.push({ key, value }); - }); - }; - const decompress = function(store, key, data) { return lz4Codec.decode(data, fromBlob).then(data => { store[key] = data; }); }; - const getFromDb = async function(keys, keyvalStore, callback) { - if ( typeof callback !== 'function' ) { return; } - if ( keys.length === 0 ) { return callback(keyvalStore); } - const promises = []; - const gotOne = function() { - if ( typeof this.result !== 'object' ) { return; } - const { key, value } = this.result; - keyvalStore[key] = value; - if ( value instanceof Blob === false ) { return; } - promises.push(decompress(keyvalStore, key, value)); - }; - try { - const db = await getDb(); - if ( !db ) { return callback(); } - const transaction = db.transaction(STORAGE_NAME, 'readonly'); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = ( ) => { - Promise.all(promises).then(( ) => { - callback(keyvalStore); - }); - }; - const table = transaction.objectStore(STORAGE_NAME); - for ( const key of keys ) { - const req = table.get(key); - req.onsuccess = gotOne; - req.onerror = noopfn; - } - } - catch(reason) { - console.info(`cacheStorage.getFromDb() failed: ${reason}`); - callback(); - } - }; - const visitAllFromDb = async function(visitFn) { const db = await getDb(); if ( !db ) { return visitFn(); } @@ -341,190 +468,40 @@ const selectIDB = async function() { if ( entry.value instanceof Blob === false ) { return; } promises.push(decompress(keyvalStore, key, value)); }).catch(reason => { - console.info(`cacheStorage.getAllFromDb() failed: ${reason}`); + ubolog(`cacheStorage.getAllFromDb() failed: ${reason}`); callback(); }); }; - // https://github.com/uBlockOrigin/uBlock-issues/issues/141 - // Mind that IDBDatabase.transaction() and IDBObjectStore.put() - // can throw: - // https://developer.mozilla.org/en-US/docs/Web/API/IDBDatabase/transaction - // https://developer.mozilla.org/en-US/docs/Web/API/IDBObjectStore/put - - const putToDb = async function(keyvalStore, callback) { - if ( typeof callback !== 'function' ) { - callback = noopfn; - } - const keys = Object.keys(keyvalStore); - if ( keys.length === 0 ) { return callback(); } - const promises = [ getDb() ]; - const entries = []; - const dontCompress = - µb.hiddenSettings.cacheStorageCompression !== true; - for ( const key of keys ) { - const value = keyvalStore[key]; - const isString = typeof value === 'string'; - if ( isString === false || dontCompress ) { - entries.push({ key, value }); - continue; - } - promises.push(compress(entries, key, value)); - } - const finish = ( ) => { - if ( callback === undefined ) { return; } - let cb = callback; - callback = undefined; - cb(); - }; - try { - const results = await Promise.all(promises); - const db = results[0]; - if ( !db ) { return callback(); } - const transaction = db.transaction( - STORAGE_NAME, - 'readwrite' - ); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = finish; - const table = transaction.objectStore(STORAGE_NAME); - for ( const entry of entries ) { - table.put(entry); - } - } catch (ex) { - finish(); - } - }; - - const deleteFromDb = async function(input, callback) { - if ( typeof callback !== 'function' ) { - callback = noopfn; - } - const keys = Array.isArray(input) ? input.slice() : [ input ]; - if ( keys.length === 0 ) { return callback(); } - const finish = ( ) => { - if ( callback === undefined ) { return; } - let cb = callback; - callback = undefined; - cb(); - }; - try { - const db = await getDb(); - if ( !db ) { return callback(); } - const transaction = db.transaction(STORAGE_NAME, 'readwrite'); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = finish; - const table = transaction.objectStore(STORAGE_NAME); - for ( const key of keys ) { - table.delete(key); - } - } catch (ex) { - finish(); - } - }; - const clearDb = async function(callback) { if ( typeof callback !== 'function' ) { - callback = noopfn; + callback = ()=>{}; } try { const db = await getDb(); if ( !db ) { return callback(); } - const transaction = db.transaction(STORAGE_NAME, 'readwrite'); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = ( ) => { - callback(); - }; - transaction.objectStore(STORAGE_NAME).clear(); + db.close(); + indexedDB.deleteDatabase(STORAGE_NAME); + callback(); } catch(reason) { - console.info(`cacheStorage.clearDb() failed: ${reason}`); callback(); } }; - await getDb(); - if ( !db ) { return false; } - - cacheStorage.name = 'indexedDB'; - cacheStorage.get = function get(keys) { - return storageReadyPromise.then(( ) => - new Promise(resolve => { - if ( keys === null ) { - return getAllFromDb(bin => resolve(bin)); - } - let toRead, output = {}; - if ( typeof keys === 'string' ) { - toRead = [ keys ]; - } else if ( Array.isArray(keys) ) { - toRead = keys; - } else /* if ( typeof keys === 'object' ) */ { - toRead = Object.keys(keys); - output = keys; - } - getFromDb(toRead, output, bin => resolve(bin)); - }) - ); - }; - cacheStorage.set = function set(keys) { - return storageReadyPromise.then(( ) => - new Promise(resolve => { - putToDb(keys, details => resolve(details)); - }) - ); - }; - cacheStorage.remove = function remove(keys) { - return storageReadyPromise.then(( ) => - new Promise(resolve => { - deleteFromDb(keys, ( ) => resolve()); - }) - ); - }; - cacheStorage.clear = function clear() { - return storageReadyPromise.then(( ) => - new Promise(resolve => { + return { + get: function get() { + return new Promise(resolve => { + return getAllFromDb(bin => resolve(bin)); + }); + }, + clear: function clear() { + return new Promise(resolve => { clearDb(( ) => resolve()); - }) - ); + }); + }, }; - cacheStorage.getBytesInUse = function getBytesInUse() { - return Promise.resolve(0); - }; - return true; -}; - -// https://github.com/uBlockOrigin/uBlock-issues/issues/328 -// Delete cache-related entries from webext storage. -const clearWebext = async function() { - let bin; - try { - bin = await webext.storage.local.get('assetCacheRegistry'); - } catch(ex) { - console.error(ex); - } - if ( bin instanceof Object === false ) { return; } - if ( bin.assetCacheRegistry instanceof Object === false ) { return; } - const toRemove = [ - 'assetCacheRegistry', - 'assetSourceRegistry', - ]; - for ( const key in bin.assetCacheRegistry ) { - if ( bin.assetCacheRegistry.hasOwnProperty(key) ) { - toRemove.push('cache/' + key); - } - } - webext.storage.local.remove(toRemove); -}; - -const clearIDB = function() { - try { - indexedDB.deleteDatabase(STORAGE_NAME); - } catch(ex) { - } -}; +})(); /******************************************************************************/ diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index f4782bc37..04fc93a8f 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -292,7 +292,7 @@ FilterContainer.prototype.reset = function() { this.highlyGeneric.complex.str = ''; this.highlyGeneric.complex.mru.reset(); - this.selfieVersion = 1; + this.selfieVersion = 2; }; /******************************************************************************/ @@ -576,9 +576,11 @@ FilterContainer.prototype.toSelfie = function() { acceptedCount: this.acceptedCount, discardedCount: this.discardedCount, specificFilters: this.specificFilters.toSelfie(), - lowlyGeneric: Array.from(this.lowlyGeneric), - highSimpleGenericHideArray: Array.from(this.highlyGeneric.simple.dict), - highComplexGenericHideArray: Array.from(this.highlyGeneric.complex.dict), + lowlyGeneric: this.lowlyGeneric, + highSimpleGenericHideDict: this.highlyGeneric.simple.dict, + highSimpleGenericHideStr: this.highlyGeneric.simple.str, + highComplexGenericHideDict: this.highlyGeneric.complex.dict, + highComplexGenericHideStr: this.highlyGeneric.complex.str, }; }; @@ -593,11 +595,11 @@ FilterContainer.prototype.fromSelfie = function(selfie) { this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; this.specificFilters.fromSelfie(selfie.specificFilters); - this.lowlyGeneric = new Map(selfie.lowlyGeneric); - this.highlyGeneric.simple.dict = new Set(selfie.highSimpleGenericHideArray); - this.highlyGeneric.simple.str = selfie.highSimpleGenericHideArray.join(',\n'); - this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray); - this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n'); + this.lowlyGeneric = selfie.lowlyGeneric; + this.highlyGeneric.simple.dict = selfie.highSimpleGenericHideDict; + this.highlyGeneric.simple.str = selfie.highSimpleGenericHideStr; + this.highlyGeneric.complex.dict = selfie.highComplexGenericHideDict; + this.highlyGeneric.complex.str = selfie.highComplexGenericHideStr; this.frozen = true; }; diff --git a/src/js/hntrie.js b/src/js/hntrie.js index e8031a651..cc726db5d 100644 --- a/src/js/hntrie.js +++ b/src/js/hntrie.js @@ -445,28 +445,17 @@ class HNTrieContainer { }; } - serialize(encoder) { - if ( encoder instanceof Object ) { - return encoder.encode( - this.buf32.buffer, - this.buf32[CHAR1_SLOT] - ); - } - return Array.from( - new Uint32Array( - this.buf32.buffer, - 0, - this.buf32[CHAR1_SLOT] + 3 >>> 2 - ) + toSelfie() { + return this.buf32.subarray( + 0, + this.buf32[CHAR1_SLOT] + 3 >>> 2 ); } - unserialize(selfie, decoder) { + fromSelfie(selfie) { + if ( selfie instanceof Uint32Array === false ) { return false; } this.needle = ''; - const shouldDecode = typeof selfie === 'string'; - let byteLength = shouldDecode - ? decoder.decodeSize(selfie) - : selfie.length << 2; + let byteLength = selfie.length << 2; if ( byteLength === 0 ) { return false; } byteLength = roundToPageSize(byteLength); if ( this.wasmMemory !== null ) { @@ -477,14 +466,10 @@ class HNTrieContainer { this.buf = new Uint8Array(this.wasmMemory.buffer); this.buf32 = new Uint32Array(this.buf.buffer); } - } else if ( byteLength > this.buf.length ) { - this.buf = new Uint8Array(byteLength); - this.buf32 = new Uint32Array(this.buf.buffer); - } - if ( shouldDecode ) { - decoder.decode(selfie, this.buf.buffer); - } else { this.buf32.set(selfie); + } else { + this.buf32 = selfie; + this.buf = new Uint8Array(this.buf32.buffer); } // https://github.com/uBlockOrigin/uBlock-issues/issues/2925 this.buf[255] = 0; diff --git a/src/js/messaging.js b/src/js/messaging.js index ec3f0f4e5..38b03a409 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -45,6 +45,7 @@ import { dnrRulesetFromRawLists } from './static-dnr-filtering.js'; import { i18n$ } from './i18n.js'; import { redirectEngine } from './redirect-engine.js'; import * as sfp from './static-filtering-parser.js'; +import * as scuo from './scuo-serializer.js'; import { permanentFirewall, @@ -925,21 +926,6 @@ const fromBase64 = function(encoded) { return Promise.resolve(u8array !== undefined ? u8array : encoded); }; -const toBase64 = function(data) { - const value = data instanceof Uint8Array - ? denseBase64.encode(data) - : data; - return Promise.resolve(value); -}; - -const compress = function(json) { - return lz4Codec.encode(json, toBase64); -}; - -const decompress = function(encoded) { - return lz4Codec.decode(encoded, fromBase64); -}; - const onMessage = function(request, sender, callback) { // Cloud storage support is optional. if ( µb.cloudStorageSupported !== true ) { @@ -961,15 +947,25 @@ const onMessage = function(request, sender, callback) { return; case 'cloudPull': - request.decode = decompress; + request.decode = encoded => { + if ( scuo.canDeserialize(encoded) ) { + return scuo.deserializeAsync(encoded, { thread: true }); + } + // Legacy decoding: needs to be kept around for the foreseeable future. + return lz4Codec.decode(encoded, fromBase64); + }; return vAPI.cloud.pull(request).then(result => { callback(result); }); case 'cloudPush': - if ( µb.hiddenSettings.cloudStorageCompression ) { - request.encode = compress; - } + request.encode = data => { + const options = { + compress: µb.hiddenSettings.cloudStorageCompression, + thread: true, + }; + return scuo.serializeAsync(data, options); + }; return vAPI.cloud.push(request).then(result => { callback(result); }); diff --git a/src/js/redirect-engine.js b/src/js/redirect-engine.js index 2f5806603..7d70e35ee 100644 --- a/src/js/redirect-engine.js +++ b/src/js/redirect-engine.js @@ -24,11 +24,7 @@ /******************************************************************************/ import redirectableResources from './redirect-resources.js'; - -import { - LineIterator, - orphanizeString, -} from './text-utils.js'; +import { LineIterator, orphanizeString } from './text-utils.js'; /******************************************************************************/ @@ -448,33 +444,22 @@ class RedirectEngine { } selfieFromResources(storage) { - storage.put( - RESOURCES_SELFIE_NAME, - JSON.stringify({ - version: RESOURCES_SELFIE_VERSION, - aliases: Array.from(this.aliases), - resources: Array.from(this.resources), - }) - ); + return storage.toCache(RESOURCES_SELFIE_NAME, { + version: RESOURCES_SELFIE_VERSION, + aliases: this.aliases, + resources: this.resources, + }); } async resourcesFromSelfie(storage) { - const result = await storage.get(RESOURCES_SELFIE_NAME); - let selfie; - try { - selfie = JSON.parse(result.content); - } catch(ex) { - } - if ( - selfie instanceof Object === false || - selfie.version !== RESOURCES_SELFIE_VERSION || - Array.isArray(selfie.resources) === false - ) { - return false; - } - this.aliases = new Map(selfie.aliases); - this.resources = new Map(); - for ( const [ token, entry ] of selfie.resources ) { + const selfie = await storage.fromCache(RESOURCES_SELFIE_NAME); + if ( selfie instanceof Object === false ) { return false; } + if ( selfie.version !== RESOURCES_SELFIE_VERSION ) { return false; } + if ( selfie.aliases instanceof Map === false ) { return false; } + if ( selfie.resources instanceof Map === false ) { return false; } + this.aliases = selfie.aliases; + this.resources = selfie.resources; + for ( const [ token, entry ] of this.resources ) { this.resources.set(token, RedirectEntry.fromDetails(entry)); } return true; diff --git a/src/js/reverselookup.js b/src/js/reverselookup.js index c21ca4bb1..e7bf24e94 100644 --- a/src/js/reverselookup.js +++ b/src/js/reverselookup.js @@ -62,7 +62,7 @@ const stopWorker = function() { }; const workerTTLTimer = vAPI.defer.create(stopWorker); -const workerTTL = { min: 5 }; +const workerTTL = { min: 1.5 }; const initWorker = function() { if ( worker === null ) { diff --git a/src/js/scriptlet-filtering-core.js b/src/js/scriptlet-filtering-core.js index 75818eb97..907844fbc 100644 --- a/src/js/scriptlet-filtering-core.js +++ b/src/js/scriptlet-filtering-core.js @@ -200,7 +200,7 @@ export class ScriptletFilteringEngine { } fromSelfie(selfie) { - if ( selfie instanceof Object === false ) { return false; } + if ( typeof selfie !== 'object' || selfie === null ) { return false; } if ( selfie.version !== VERSION ) { return false; } this.scriptletDB.fromSelfie(selfie); return true; diff --git a/src/js/scuo-serializer.js b/src/js/scuo-serializer.js new file mode 100644 index 000000000..1ffffa6be --- /dev/null +++ b/src/js/scuo-serializer.js @@ -0,0 +1,1307 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2024-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************* + * + * Structured-Cloneable to Unicode-Only SERIALIZER + * + * Purpose: + * + * Serialize/deserialize arbitrary JS data to/from well-formed Unicode strings. + * + * The browser does not expose an API to serialize structured-cloneable types + * into a single string. JSON.stringify() does not support complex JavaScript + * objects, and does not support references to composite types. Unless the + * data to serialize is only JS strings, it is difficult to easily switch + * from one type of storage to another. + * + * Serializing to a well-formed Unicode string allows to store structured- + * cloneable data to any storage. Not all storages support storing binary data, + * but all storages support storing Unicode strings. + * + * Structured-cloneable types: + * https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm#supported_types + * + * ----------------+------------------+------------------+---------------------- + * Data types | String | JSONable | structured-cloneable + * ================+============================================================ + * document.cookie | Yes | No | No + * ----------------+------------------+------------------+---------------------- + * localStorage | Yes | No | No + * ----------------+------------------+------------------+---------------------- + * IndexedDB | Yes | Yes | Yes + * ----------------+------------------+------------------+---------------------- + * browser.storage | Yes | Yes | No + * ----------------+------------------+------------------+---------------------- + * Cache API | Yes | No | No + * ----------------+------------------+------------------+---------------------- + * + * The above table shows that only JS strings can be persisted natively to all + * types of storage. The purpose of this library is to convert + * structure-cloneable data (which is a superset of JSONable data) into a + * single JS string. The resulting string is meant to be as small as possible. + * As a result, it is not human-readable, though it contains only printable + * ASCII characters -- and possibly Unicode characters beyond ASCII. + * + * The resulting JS string will not contain characters which require escaping + * should it be converted to a JSON value. However it may contain characters + * which require escaping should it be converted to a URI component. + * + * Characteristics: + * + * - Serializes/deserializes data to/from a single well-formed Unicode string + * - Strings do not require escaping, i.e. they are stored as-is + * - Supports multiple references to same object + * - Supports reference cycles + * - Supports synchronous and asynchronous API + * - Supports usage of Worker + * - Optionally supports LZ4 compression + * + * TODO: + * + * - Harden against unexpected conditions, such as corrupted string during + * deserialization. + * - Evaluate supporting checksum. + * + * */ + +const VERSION = 1; +const SEPARATORCHAR = ' '; +const SEPARATORCHARCODE = SEPARATORCHAR.charCodeAt(0); +const SENTINELCHAR = '!'; +const SENTINELCHARCODE = SENTINELCHAR.charCodeAt(0); +const MAGICPREFIX = `UOSC_${VERSION}${SEPARATORCHAR}`; +const MAGICLZ4PREFIX = `UOSC/lz4_${VERSION}${SEPARATORCHAR}`; +const FAILMARK = Number.MAX_SAFE_INTEGER; +// Avoid characters which require escaping when serialized to JSON: +const SAFECHARS = "&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~"; +const NUMSAFECHARS = SAFECHARS.length; +const BITS_PER_SAFECHARS = Math.log2(NUMSAFECHARS); + +const { intToChar, intToCharCode, charCodeToInt } = (( ) => { + const intToChar = []; + const intToCharCode = []; + const charCodeToInt = []; + for ( let i = 0; i < NUMSAFECHARS; i++ ) { + intToChar[i] = SAFECHARS.charAt(i); + intToCharCode[i] = SAFECHARS.charCodeAt(i); + charCodeToInt[i] = 0; + } + for ( let i = NUMSAFECHARS; i < 128; i++ ) { + intToChar[i] = ''; + intToCharCode[i] = 0; + charCodeToInt[i] = 0; + } + for ( let i = 0; i < SAFECHARS.length; i++ ) { + charCodeToInt[SAFECHARS.charCodeAt(i)] = i; + } + return { intToChar, intToCharCode, charCodeToInt }; +})(); + +let iota = 1; +const I_STRING_SMALL = iota++; +const I_STRING_LARGE = iota++; +const I_ZERO = iota++; +const I_INTEGER_SMALL_POS = iota++; +const I_INTEGER_SMALL_NEG = iota++; +const I_INTEGER_LARGE_POS = iota++; +const I_INTEGER_LARGE_NEG = iota++; +const I_BOOL_FALSE = iota++; +const I_BOOL_TRUE = iota++; +const I_NULL = iota++; +const I_UNDEFINED = iota++; +const I_FLOAT = iota++; +const I_REGEXP = iota++; +const I_DATE = iota++; +const I_REFERENCE = iota++; +const I_SMALL_OBJECT = iota++; +const I_LARGE_OBJECT = iota++; +const I_ARRAY_SMALL = iota++; +const I_ARRAY_LARGE = iota++; +const I_SET_SMALL = iota++; +const I_SET_LARGE = iota++; +const I_MAP_SMALL = iota++; +const I_MAP_LARGE = iota++; +const I_ARRAYBUFFER = iota++; +const I_INT8ARRAY = iota++; +const I_UINT8ARRAY = iota++; +const I_UINT8CLAMPEDARRAY = iota++; +const I_INT16ARRAY = iota++; +const I_UINT16ARRAY = iota++; +const I_INT32ARRAY = iota++; +const I_UINT32ARRAY = iota++; +const I_FLOAT32ARRAY = iota++; +const I_FLOAT64ARRAY = iota++; +const I_DATAVIEW = iota++; + +const C_STRING_SMALL = intToChar[I_STRING_SMALL]; +const C_STRING_LARGE = intToChar[I_STRING_LARGE]; +const C_ZERO = intToChar[I_ZERO]; +const C_INTEGER_SMALL_POS = intToChar[I_INTEGER_SMALL_POS]; +const C_INTEGER_SMALL_NEG = intToChar[I_INTEGER_SMALL_NEG]; +const C_INTEGER_LARGE_POS = intToChar[I_INTEGER_LARGE_POS]; +const C_INTEGER_LARGE_NEG = intToChar[I_INTEGER_LARGE_NEG]; +const C_BOOL_FALSE = intToChar[I_BOOL_FALSE]; +const C_BOOL_TRUE = intToChar[I_BOOL_TRUE]; +const C_NULL = intToChar[I_NULL]; +const C_UNDEFINED = intToChar[I_UNDEFINED]; +const C_FLOAT = intToChar[I_FLOAT]; +const C_REGEXP = intToChar[I_REGEXP]; +const C_DATE = intToChar[I_DATE]; +const C_REFERENCE = intToChar[I_REFERENCE]; +const C_SMALL_OBJECT = intToChar[I_SMALL_OBJECT]; +const C_LARGE_OBJECT = intToChar[I_LARGE_OBJECT]; +const C_ARRAY_SMALL = intToChar[I_ARRAY_SMALL]; +const C_ARRAY_LARGE = intToChar[I_ARRAY_LARGE]; +const C_SET_SMALL = intToChar[I_SET_SMALL]; +const C_SET_LARGE = intToChar[I_SET_LARGE]; +const C_MAP_SMALL = intToChar[I_MAP_SMALL]; +const C_MAP_LARGE = intToChar[I_MAP_LARGE]; +const C_ARRAYBUFFER = intToChar[I_ARRAYBUFFER]; +const C_INT8ARRAY = intToChar[I_INT8ARRAY]; +const C_UINT8ARRAY = intToChar[I_UINT8ARRAY]; +const C_UINT8CLAMPEDARRAY = intToChar[I_UINT8CLAMPEDARRAY]; +const C_INT16ARRAY = intToChar[I_INT16ARRAY]; +const C_UINT16ARRAY = intToChar[I_UINT16ARRAY]; +const C_INT32ARRAY = intToChar[I_INT32ARRAY]; +const C_UINT32ARRAY = intToChar[I_UINT32ARRAY]; +const C_FLOAT32ARRAY = intToChar[I_FLOAT32ARRAY]; +const C_FLOAT64ARRAY = intToChar[I_FLOAT64ARRAY]; +const C_DATAVIEW = intToChar[I_DATAVIEW]; + +// Just reuse already defined constants, we just need distinct values +const I_STRING = I_STRING_SMALL; +const I_NUMBER = I_FLOAT; +const I_BOOL = I_BOOL_FALSE; +const I_OBJECT = I_SMALL_OBJECT; +const I_ARRAY = I_ARRAY_SMALL; +const I_SET = I_SET_SMALL; +const I_MAP = I_MAP_SMALL; + +const typeToSerializedInt = { + 'string': I_STRING, + 'number': I_NUMBER, + 'boolean': I_BOOL, + 'object': I_OBJECT, +}; + +const xtypeToSerializedInt = { + '[object RegExp]': I_REGEXP, + '[object Date]': I_DATE, + '[object Array]': I_ARRAY, + '[object Set]': I_SET, + '[object Map]': I_MAP, + '[object ArrayBuffer]': I_ARRAYBUFFER, + '[object Int8Array]': I_INT8ARRAY, + '[object Uint8Array]': I_UINT8ARRAY, + '[object Uint8ClampedArray]': I_UINT8CLAMPEDARRAY, + '[object Int16Array]': I_INT16ARRAY, + '[object Uint16Array]': I_UINT16ARRAY, + '[object Int32Array]': I_INT32ARRAY, + '[object Uint32Array]': I_UINT32ARRAY, + '[object Float32Array]': I_FLOAT32ARRAY, + '[object Float64Array]': I_FLOAT64ARRAY, + '[object DataView]': I_DATAVIEW, +}; + +const typeToSerializedChar = { + '[object Int8Array]': C_INT8ARRAY, + '[object Uint8Array]': C_UINT8ARRAY, + '[object Uint8ClampedArray]': C_UINT8CLAMPEDARRAY, + '[object Int16Array]': C_INT16ARRAY, + '[object Uint16Array]': C_UINT16ARRAY, + '[object Int32Array]': C_INT32ARRAY, + '[object Uint32Array]': C_UINT32ARRAY, + '[object Float32Array]': C_FLOAT32ARRAY, + '[object Float64Array]': C_FLOAT64ARRAY, +}; + +const toArrayBufferViewConstructor = { + [`${I_INT8ARRAY}`]: Int8Array, + [`${I_UINT8ARRAY}`]: Uint8Array, + [`${I_UINT8CLAMPEDARRAY}`]: Uint8ClampedArray, + [`${I_INT16ARRAY}`]: Int16Array, + [`${I_UINT16ARRAY}`]: Uint16Array, + [`${I_INT32ARRAY}`]: Int32Array, + [`${I_UINT32ARRAY}`]: Uint32Array, + [`${I_FLOAT32ARRAY}`]: Float32Array, + [`${I_FLOAT64ARRAY}`]: Float64Array, + [`${I_DATAVIEW}`]: DataView, +}; + +/******************************************************************************/ + +const textDecoder = new TextDecoder(); +const textEncoder = new TextEncoder(); +const isInteger = Number.isInteger; + +const writeRefs = new Map(); +const writeBuffer = []; + +const readRefs = new Map(); +let readStr = ''; +let readPtr = 0; +let readEnd = 0; + +let refCounter = 1; + +let uint8Input = null; + +const uint8InputFromAsciiStr = s => { + if ( uint8Input === null || uint8Input.length < s.length ) { + uint8Input = new Uint8Array(s.length + 0x03FF & ~0x03FF); + } + textEncoder.encodeInto(s, uint8Input); + return uint8Input; +}; + +const isInstanceOf = (o, s) => { + return typeof o === 'object' && o !== null && ( + s === 'Object' || Object.prototype.toString.call(o) === `[object ${s}]` + ); +}; + +/******************************************************************************* + * + * A large Uint is always a positive integer (can be zero), assumed to be + * large, i.e. > NUMSAFECHARS -- but not necessarily. The serialized value has + * always at least one digit, and is always followed by a separator. + * + * */ + +const strFromLargeUint = i => { + let r = 0, s = ''; + for (;;) { + r = i % NUMSAFECHARS; + s += intToChar[r]; + i -= r; + if ( i === 0 ) { break; } + i /= NUMSAFECHARS; + } + return s + SEPARATORCHAR; +}; + +const deserializeLargeUint = ( ) => { + let c = readStr.charCodeAt(readPtr++); + let n = charCodeToInt[c]; + let m = 1; + while ( (c = readStr.charCodeAt(readPtr++)) !== SEPARATORCHARCODE ) { + m *= NUMSAFECHARS; + n += m * charCodeToInt[c]; + } + return n; +}; + +/******************************************************************************* + * + * Methods specific to ArrayBuffer objects to serialize optimally according to + * the content of the buffer. + * + * In sparse mode, number of output bytes per input int32 (4-byte) value: + * [v === zero]: 1 byte (separator) + * [v !== zero]: n digits + 1 byte (separator) + * + * */ + +const sparseValueLen = v => v !== 0 + ? (Math.log2(v) / BITS_PER_SAFECHARS | 0) + 2 + : 1; + +const analyzeArrayBuffer = arrbuf => { + const byteLength = arrbuf.byteLength; + const uint32len = byteLength >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uint32len); + let notzeroCount = 0; + for ( let i = uint32len-1; i >= 0; i-- ) { + if ( uint32arr[i] === 0 ) { continue; } + notzeroCount = i + 1; + break; + } + const end = notzeroCount + 1 <= uint32len ? notzeroCount << 2 : byteLength; + const endUint32 = end >>> 2; + const remUint8 = end & 0b11; + const denseSize = endUint32 * 5 + (remUint8 ? remUint8 + 1 : 0); + let sparseSize = 0; + for ( let i = 0; i < endUint32; i++ ) { + sparseSize += sparseValueLen(uint32arr[i]); + if ( sparseSize > denseSize ) { + return { end, dense: true, denseSize }; + } + } + if ( remUint8 !== 0 ) { + sparseSize += 1; // sentinel + const uint8arr = new Uint8Array(arrbuf, endUint32 << 2); + for ( let i = 0; i < remUint8; i++ ) { + sparseSize += sparseValueLen(uint8arr[i]); + } + } + return { end, dense: false, sparseSize }; +}; + +const denseArrayBufferToStr = (arrbuf, details) => { + const end = details.end; + const m = end % 4; + const n = end - m; + const uin32len = n >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uin32len); + const output = new Uint8Array(details.denseSize); + let j = 0, v = 0; + for ( let i = 0; i < uin32len; i++ ) { + v = uint32arr[i]; + output[j+0] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+1] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+2] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+3] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+4] = intToCharCode[v]; + j += 5; + } + if ( m !== 0 ) { + const uint8arr = new Uint8Array(arrbuf, n); + v = uint8arr[0]; + if ( m > 1 ) { + v += uint8arr[1] << 8; + if ( m > 2 ) { + v += uint8arr[2] << 16; + } + } + output[j+0] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+1] = intToCharCode[v % NUMSAFECHARS]; + if ( m > 1 ) { + v = v / NUMSAFECHARS | 0; + output[j+2] = intToCharCode[v % NUMSAFECHARS]; + if ( m > 2 ) { + v = v / NUMSAFECHARS | 0; + output[j+3] = intToCharCode[v % NUMSAFECHARS]; + } + } + } + return textDecoder.decode(output); +}; + +const BASE88_POW1 = NUMSAFECHARS; +const BASE88_POW2 = NUMSAFECHARS * BASE88_POW1; +const BASE88_POW3 = NUMSAFECHARS * BASE88_POW2; +const BASE88_POW4 = NUMSAFECHARS * BASE88_POW3; + +const denseArrayBufferFromStr = (denseStr, arrbuf) => { + const input = uint8InputFromAsciiStr(denseStr); + const end = denseStr.length; + const m = end % 5; + const n = end - m; + const uin32len = n / 5 * 4 >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uin32len); + let j = 0, v = 0; + for ( let i = 0; i < n; i += 5 ) { + v = charCodeToInt[input[i+0]]; + v += charCodeToInt[input[i+1]] * BASE88_POW1; + v += charCodeToInt[input[i+2]] * BASE88_POW2; + v += charCodeToInt[input[i+3]] * BASE88_POW3; + v += charCodeToInt[input[i+4]] * BASE88_POW4; + uint32arr[j++] = v; + } + if ( m === 0 ) { return; } + v = charCodeToInt[input[n+0]] + + charCodeToInt[input[n+1]] * BASE88_POW1; + if ( m > 2 ) { + v += charCodeToInt[input[n+2]] * BASE88_POW2; + if ( m > 3 ) { + v += charCodeToInt[input[n+3]] * BASE88_POW3; + } + } + const uint8arr = new Uint8Array(arrbuf, j << 2); + uint8arr[0] = v & 255; + if ( v !== 0 ) { + v >>>= 8; + uint8arr[1] = v & 255; + if ( v !== 0 ) { + v >>>= 8; + uint8arr[2] = v & 255; + } + } +}; + +const sparseArrayBufferToStr = (arrbuf, details) => { + const end = details.end; + const uint8out = new Uint8Array(details.sparseSize); + const uint32len = end >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uint32len); + let j = 0, n = 0, r = 0; + for ( let i = 0; i < uint32len; i++ ) { + n = uint32arr[i]; + if ( n !== 0 ) { + for (;;) { + r = n % NUMSAFECHARS; + uint8out[j++] = intToCharCode[r]; + n -= r; + if ( n === 0 ) { break; } + n /= NUMSAFECHARS; + } + } + uint8out[j++] = SEPARATORCHARCODE; + } + const uint8rem = end & 0b11; + if ( uint8rem !== 0 ) { + uint8out[j++] = SENTINELCHARCODE; + const uint8arr = new Uint8Array(arrbuf, end - uint8rem, uint8rem); + for ( let i = 0; i < uint8rem; i++ ) { + n = uint8arr[i]; + if ( n !== 0 ) { + for (;;) { + r = n % NUMSAFECHARS; + uint8out[j++] = intToCharCode[r]; + n -= r; + if ( n === 0 ) { break; } + n /= NUMSAFECHARS; + } + } + uint8out[j++] = SEPARATORCHARCODE; + } + } + return textDecoder.decode(uint8out); +}; + +const sparseArrayBufferFromStr = (sparseStr, arrbuf) => { + const sparseLen = sparseStr.length; + const input = uint8InputFromAsciiStr(sparseStr); + const end = arrbuf.byteLength; + const uint32len = end >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uint32len); + let i = 0, j = 0, c = 0, n = 0, m = 0; + for ( ; j < sparseLen; i++ ) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { continue; } + if ( c === SENTINELCHARCODE ) { break; } + n = charCodeToInt[c]; + m = 1; + for (;;) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { break; } + m *= NUMSAFECHARS; + n += m * charCodeToInt[c]; + } + uint32arr[i] = n; + } + if ( c === SENTINELCHARCODE ) { + i <<= 2; + const uint8arr = new Uint8Array(arrbuf, i); + for ( ; j < sparseLen; i++ ) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { continue; } + n = charCodeToInt[c]; + m = 1; + for (;;) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { break; } + m *= NUMSAFECHARS; + n += m * charCodeToInt[c]; + } + uint8arr[i] = n; + } + } +}; + +/******************************************************************************/ + +const _serialize = data => { + // Primitive types + if ( data === 0 ) { + writeBuffer.push(C_ZERO); + return; + } + if ( data === null ) { + writeBuffer.push(C_NULL); + return; + } + if ( data === undefined ) { + writeBuffer.push(C_UNDEFINED); + return; + } + // Type name + switch ( typeToSerializedInt[typeof data] ) { + case I_STRING: { + const length = data.length; + if ( length < NUMSAFECHARS ) { + writeBuffer.push(C_STRING_SMALL + intToChar[length], data); + } else { + writeBuffer.push(C_STRING_LARGE + strFromLargeUint(length), data); + } + return; + } + case I_NUMBER: + if ( isInteger(data) ) { + if ( data >= NUMSAFECHARS ) { + writeBuffer.push(C_INTEGER_LARGE_POS + strFromLargeUint(data)); + } else if ( data > 0 ) { + writeBuffer.push(C_INTEGER_SMALL_POS + intToChar[data]); + } else if ( data > -NUMSAFECHARS ) { + writeBuffer.push(C_INTEGER_SMALL_NEG + intToChar[-data]); + } else { + writeBuffer.push(C_INTEGER_LARGE_NEG + strFromLargeUint(-data)); + } + } else { + const s = `${data}`; + writeBuffer.push(C_FLOAT + strFromLargeUint(s.length) + s); + } + return; + case I_BOOL: + writeBuffer.push(data ? C_BOOL_TRUE : C_BOOL_FALSE); + return; + case I_OBJECT: + break; + default: + return; + } + const xtypeName = Object.prototype.toString.call(data); + const xtypeInt = xtypeToSerializedInt[xtypeName]; + if ( xtypeInt === I_REGEXP ) { + writeBuffer.push(C_REGEXP); + _serialize(data.source); + _serialize(data.flags); + return; + } + if ( xtypeInt === I_DATE ) { + writeBuffer.push(C_DATE + _serialize(data.getTime())); + return; + } + // Reference to composite types + const ref = writeRefs.get(data); + if ( ref !== undefined ) { + writeBuffer.push(C_REFERENCE + strFromLargeUint(ref)); + return; + } + // Remember reference + writeRefs.set(data, refCounter++); + // Extended type name + switch ( xtypeInt ) { + case I_ARRAY: { + const size = data.length; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_ARRAY_SMALL + intToChar[size]); + } else { + writeBuffer.push(C_ARRAY_LARGE + strFromLargeUint(size)); + } + for ( const v of data ) { + _serialize(v); + } + return; + } + case I_SET: { + const size = data.size; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_SET_SMALL + intToChar[size]); + } else { + writeBuffer.push(C_SET_LARGE + strFromLargeUint(size)); + } + for ( const v of data ) { + _serialize(v); + } + return; + } + case I_MAP: { + const size = data.size; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_MAP_SMALL + intToChar[size]); + } else { + writeBuffer.push(C_MAP_LARGE + strFromLargeUint(size)); + } + for ( const [ k, v ] of data ) { + _serialize(k); + _serialize(v); + } + return; + } + case I_ARRAYBUFFER: { + const byteLength = data.byteLength; + writeBuffer.push(C_ARRAYBUFFER + strFromLargeUint(byteLength)); + _serialize(data.maxByteLength); + const arrbuffDetails = analyzeArrayBuffer(data); + _serialize(arrbuffDetails.dense); + const str = arrbuffDetails.dense + ? denseArrayBufferToStr(data, arrbuffDetails) + : sparseArrayBufferToStr(data, arrbuffDetails); + _serialize(str); + //console.log(`arrbuf size=${byteLength} content size=${arrbuffDetails.end} dense=${arrbuffDetails.dense} array size=${arrbuffDetails.dense ? arrbuffDetails.denseSize : arrbuffDetails.sparseSize} serialized size=${str.length}`); + return; + } + case I_INT8ARRAY: + case I_UINT8ARRAY: + case I_UINT8CLAMPEDARRAY: + case I_INT16ARRAY: + case I_UINT16ARRAY: + case I_INT32ARRAY: + case I_UINT32ARRAY: + case I_FLOAT32ARRAY: + case I_FLOAT64ARRAY: + writeBuffer.push( + typeToSerializedChar[xtypeName], + strFromLargeUint(data.byteOffset), + strFromLargeUint(data.length) + ); + _serialize(data.buffer); + return; + case I_DATAVIEW: + writeBuffer.push(C_DATAVIEW, strFromLargeUint(data.byteOffset), strFromLargeUint(data.byteLength)); + _serialize(data.buffer); + return; + default: { + const keys = Object.keys(data); + const size = keys.length; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_SMALL_OBJECT + intToChar[size]); + } else { + writeBuffer.push(C_LARGE_OBJECT + strFromLargeUint(size)); + } + for ( const key of keys ) { + _serialize(key); + _serialize(data[key]); + } + break; + } + } +}; + +/******************************************************************************/ + +const _deserialize = ( ) => { + if ( readPtr >= readEnd ) { return; } + const type = charCodeToInt[readStr.charCodeAt(readPtr++)]; + switch ( type ) { + // Primitive types + case I_STRING_SMALL: + case I_STRING_LARGE: { + const size = type === I_STRING_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + const beg = readPtr; + readPtr += size; + return readStr.slice(beg, readPtr); + } + case I_ZERO: + return 0; + case I_INTEGER_SMALL_POS: + return charCodeToInt[readStr.charCodeAt(readPtr++)]; + case I_INTEGER_SMALL_NEG: + return -charCodeToInt[readStr.charCodeAt(readPtr++)]; + case I_INTEGER_LARGE_POS: + return deserializeLargeUint(); + case I_INTEGER_LARGE_NEG: + return -deserializeLargeUint(); + case I_BOOL_FALSE: + return false; + case I_BOOL_TRUE: + return true; + case I_NULL: + return null; + case I_UNDEFINED: + return; + case I_FLOAT: { + const size = deserializeLargeUint(); + const beg = readPtr; + readPtr += size; + return parseFloat(readStr.slice(beg, readPtr)); + } + case I_REGEXP: { + const source = _deserialize(); + const flags = _deserialize(); + return new RegExp(source, flags); + } + case I_DATE: { + const time = _deserialize(); + return new Date(time); + } + case I_REFERENCE: { + const ref = deserializeLargeUint(); + return readRefs.get(ref); + } + case I_SMALL_OBJECT: + case I_LARGE_OBJECT: { + const entries = []; + const size = type === I_SMALL_OBJECT + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + const k = _deserialize(); + const v = _deserialize(); + entries.push([ k, v ]); + } + const out = Object.fromEntries(entries); + readRefs.set(refCounter++, out); + return out; + } + case I_ARRAY_SMALL: + case I_ARRAY_LARGE: { + const out = []; + const size = type === I_ARRAY_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + out.push(_deserialize()); + } + readRefs.set(refCounter++, out); + return out; + } + case I_SET_SMALL: + case I_SET_LARGE: { + const entries = []; + const size = type === I_SET_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + entries.push(_deserialize()); + } + const out = new Set(entries); + readRefs.set(refCounter++, out); + return out; + } + case I_MAP_SMALL: + case I_MAP_LARGE: { + const entries = []; + const size = type === I_MAP_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + const k = _deserialize(); + const v = _deserialize(); + entries.push([ k, v ]); + } + const out = new Map(entries); + readRefs.set(refCounter++, out); + return out; + } + case I_ARRAYBUFFER: { + const byteLength = deserializeLargeUint(); + const maxByteLength = _deserialize(); + let options; + if ( maxByteLength !== 0 && maxByteLength !== byteLength ) { + options = { maxByteLength }; + } + const arrbuf = new ArrayBuffer(byteLength, options); + const dense = _deserialize(); + const str = _deserialize(); + if ( dense ) { + denseArrayBufferFromStr(str, arrbuf); + } else { + sparseArrayBufferFromStr(str, arrbuf); + } + readRefs.set(refCounter++, arrbuf); + return arrbuf; + } + case I_INT8ARRAY: + case I_UINT8ARRAY: + case I_UINT8CLAMPEDARRAY: + case I_INT16ARRAY: + case I_UINT16ARRAY: + case I_INT32ARRAY: + case I_UINT32ARRAY: + case I_FLOAT32ARRAY: + case I_FLOAT64ARRAY: + case I_DATAVIEW: { + const byteOffset = deserializeLargeUint(); + const length = deserializeLargeUint(); + const arrayBuffer = _deserialize(); + const ctor = toArrayBufferViewConstructor[`${type}`]; + const out = new ctor(arrayBuffer, byteOffset, length); + readRefs.set(refCounter++, out); + return out; + } + default: + break; + } + readPtr = FAILMARK; +}; + +/******************************************************************************* + * + * LZ4 block compression/decompression + * + * Imported from: + * https://github.com/gorhill/lz4-wasm/blob/8995cdef7b/dist/lz4-block-codec-js.js + * + * Customized to avoid external dependencies as I entertain the idea of + * spinning off the serializer as a standalone utility for all to use. + * + * */ + +class LZ4BlockJS { + constructor() { + this.hashTable = undefined; + this.outputBuffer = undefined; + } + reset() { + this.hashTable = undefined; + this.outputBuffer = undefined; + } + growOutputBuffer(size) { + if ( this.outputBuffer !== undefined ) { + if ( this.outputBuffer.byteLength >= size ) { return; } + } + this.outputBuffer = new ArrayBuffer(size + 0xFFFF & 0x7FFF0000); + } + encodeBound(size) { + return size > 0x7E000000 ? 0 : size + (size / 255 | 0) + 16; + } + encodeBlock(iBuf, oOffset) { + const iLen = iBuf.byteLength; + if ( iLen >= 0x7E000000 ) { throw new RangeError(); } + // "The last match must start at least 12 bytes before end of block" + const lastMatchPos = iLen - 12; + // "The last 5 bytes are always literals" + const lastLiteralPos = iLen - 5; + if ( this.hashTable === undefined ) { + this.hashTable = new Int32Array(65536); + } + this.hashTable.fill(-65536); + if ( isInstanceOf(iBuf, 'ArrayBuffer') ) { + iBuf = new Uint8Array(iBuf); + } + const oLen = oOffset + this.encodeBound(iLen); + this.growOutputBuffer(oLen); + const oBuf = new Uint8Array(this.outputBuffer, 0, oLen); + let iPos = 0; + let oPos = oOffset; + let anchorPos = 0; + // sequence-finding loop + for (;;) { + let refPos; + let mOffset; + let sequence = iBuf[iPos] << 8 | iBuf[iPos+1] << 16 | iBuf[iPos+2] << 24; + // match-finding loop + while ( iPos <= lastMatchPos ) { + sequence = sequence >>> 8 | iBuf[iPos+3] << 24; + const hash = (sequence * 0x9E37 & 0xFFFF) + (sequence * 0x79B1 >>> 16) & 0xFFFF; + refPos = this.hashTable[hash]; + this.hashTable[hash] = iPos; + mOffset = iPos - refPos; + if ( + mOffset < 65536 && + iBuf[refPos+0] === ((sequence ) & 0xFF) && + iBuf[refPos+1] === ((sequence >>> 8) & 0xFF) && + iBuf[refPos+2] === ((sequence >>> 16) & 0xFF) && + iBuf[refPos+3] === ((sequence >>> 24) & 0xFF) + ) { + break; + } + iPos += 1; + } + // no match found + if ( iPos > lastMatchPos ) { break; } + // match found + let lLen = iPos - anchorPos; + let mLen = iPos; + iPos += 4; refPos += 4; + while ( iPos < lastLiteralPos && iBuf[iPos] === iBuf[refPos] ) { + iPos += 1; refPos += 1; + } + mLen = iPos - mLen; + const token = mLen < 19 ? mLen - 4 : 15; + // write token, length of literals if needed + if ( lLen >= 15 ) { + oBuf[oPos++] = 0xF0 | token; + let l = lLen - 15; + while ( l >= 255 ) { + oBuf[oPos++] = 255; + l -= 255; + } + oBuf[oPos++] = l; + } else { + oBuf[oPos++] = (lLen << 4) | token; + } + // write literals + while ( lLen-- ) { + oBuf[oPos++] = iBuf[anchorPos++]; + } + if ( mLen === 0 ) { break; } + // write offset of match + oBuf[oPos+0] = mOffset; + oBuf[oPos+1] = mOffset >>> 8; + oPos += 2; + // write length of match if needed + if ( mLen >= 19 ) { + let l = mLen - 19; + while ( l >= 255 ) { + oBuf[oPos++] = 255; + l -= 255; + } + oBuf[oPos++] = l; + } + anchorPos = iPos; + } + // last sequence is literals only + let lLen = iLen - anchorPos; + if ( lLen >= 15 ) { + oBuf[oPos++] = 0xF0; + let l = lLen - 15; + while ( l >= 255 ) { + oBuf[oPos++] = 255; + l -= 255; + } + oBuf[oPos++] = l; + } else { + oBuf[oPos++] = lLen << 4; + } + while ( lLen-- ) { + oBuf[oPos++] = iBuf[anchorPos++]; + } + return new Uint8Array(oBuf.buffer, 0, oPos); + } + decodeBlock(iBuf, iOffset, oLen) { + const iLen = iBuf.byteLength; + this.growOutputBuffer(oLen); + const oBuf = new Uint8Array(this.outputBuffer, 0, oLen); + let iPos = iOffset, oPos = 0; + while ( iPos < iLen ) { + const token = iBuf[iPos++]; + // literals + let clen = token >>> 4; + // length of literals + if ( clen !== 0 ) { + if ( clen === 15 ) { + let l; + for (;;) { + l = iBuf[iPos++]; + if ( l !== 255 ) { break; } + clen += 255; + } + clen += l; + } + // copy literals + const end = iPos + clen; + while ( iPos < end ) { + oBuf[oPos++] = iBuf[iPos++]; + } + if ( iPos === iLen ) { break; } + } + // match + const mOffset = iBuf[iPos+0] | (iBuf[iPos+1] << 8); + if ( mOffset === 0 || mOffset > oPos ) { return; } + iPos += 2; + // length of match + clen = (token & 0x0F) + 4; + if ( clen === 19 ) { + let l; + for (;;) { + l = iBuf[iPos++]; + if ( l !== 255 ) { break; } + clen += 255; + } + clen += l; + } + // copy match + const end = oPos + clen; + let mPos = oPos - mOffset; + while ( oPos < end ) { + oBuf[oPos++] = oBuf[mPos++]; + } + } + return oBuf; + } + encode(input, outputOffset) { + if ( isInstanceOf(input, 'ArrayBuffer') ) { + input = new Uint8Array(input); + } else if ( isInstanceOf(input, 'Uint8Array') === false ) { + throw new TypeError(); + } + return this.encodeBlock(input, outputOffset); + } + decode(input, inputOffset, outputSize) { + if ( isInstanceOf(input, 'ArrayBuffer') ) { + input = new Uint8Array(input); + } else if ( isInstanceOf(input, 'Uint8Array') === false ) { + throw new TypeError(); + } + return this.decodeBlock(input, inputOffset, outputSize); + } +} + +/******************************************************************************* + * + * Synchronous APIs + * + * */ + +export const serialize = (data, options = {}) => { + refCounter = 1; + _serialize(data); + writeBuffer.unshift(MAGICPREFIX); + const s = writeBuffer.join(''); + writeRefs.clear(); + writeBuffer.length = 0; + if ( options.compress !== true ) { return s; } + const lz4Util = new LZ4BlockJS(); + const encoder = new TextEncoder(); + const uint8ArrayBefore = encoder.encode(s); + const uint8ArrayAfter = lz4Util.encode(uint8ArrayBefore, 0); + const lz4 = { + size: uint8ArrayBefore.length, + data: new Uint8Array(uint8ArrayAfter), + }; + refCounter = 1; + _serialize(lz4); + writeBuffer.unshift(MAGICLZ4PREFIX); + const t = writeBuffer.join(''); + writeRefs.clear(); + writeBuffer.length = 0; + const ratio = t.length / s.length; + return ratio <= 0.85 ? t : s; +}; + +export const deserialize = s => { + if ( s.startsWith(MAGICLZ4PREFIX) ) { + refCounter = 1; + readStr = s; + readEnd = s.length; + readPtr = MAGICLZ4PREFIX.length; + const lz4 = _deserialize(); + readRefs.clear(); + readStr = ''; + const lz4Util = new LZ4BlockJS(); + const uint8ArrayAfter = lz4Util.decode(lz4.data, 0, lz4.size); + s = textDecoder.decode(new Uint8Array(uint8ArrayAfter)); + } + if ( s.startsWith(MAGICPREFIX) === false ) { return; } + refCounter = 1; + readStr = s; + readEnd = s.length; + readPtr = MAGICPREFIX.length; + const data = _deserialize(); + readRefs.clear(); + readStr = ''; + uint8Input = null; + if ( readPtr === FAILMARK ) { return; } + return data; +}; + +export const canDeserialize = s => + typeof s === 'string' && + (s.startsWith(MAGICLZ4PREFIX) || s.startsWith(MAGICPREFIX)); + +/******************************************************************************* + * + * Configuration + * + * */ + +const defaultConfig = { + threadTTL: 5000, +}; + +const validateConfig = { + threadTTL: val => val > 0, +}; + +const currentConfig = Object.assign({}, defaultConfig); + +export const getConfig = ( ) => Object.assign({}, currentConfig); + +export const setConfig = config => { + for ( const key in Object.keys(config) ) { + if ( defaultConfig.hasOwnProperty(key) === false ) { continue; } + const val = config[key]; + if ( typeof val !== typeof defaultConfig[key] ) { continue; } + if ( (validateConfig[key])(val) === false ) { continue; } + currentConfig[key] = val; + } +}; + +/******************************************************************************* + * + * Asynchronous APIs + * + * Being asynchronous allows to support workers and future features such as + * checksums. + * + * */ + +class Thread { + constructor(gcer) { + this.jobs = new Map(); + this.jobIdGenerator = 1; + this.workerAccessTime = 0; + this.workerTimer = undefined; + this.gcer = gcer; + this.workerPromise = new Promise(resolve => { + let worker = null; + try { + worker = new Worker('js/scuo-serializer.js', { type: 'module' }); + worker.onmessage = ev => { + const msg = ev.data; + if ( isInstanceOf(msg, 'Object') === false ) { return; } + if ( msg.what === 'ready!' ) { + worker.onmessage = ev => { this.onmessage(ev); }; + worker.onerror = null; + resolve(worker); + } + }; + worker.onerror = ( ) => { + worker.onmessage = worker.onerror = null; + resolve(null); + }; + worker.postMessage({ what: 'ready?', config: currentConfig }); + } catch(ex) { + console.info(ex); + worker.onmessage = worker.onerror = null; + resolve(null); + } + }); + } + + countdownWorker() { + if ( this.workerTimer !== undefined ) { return; } + this.workerTimer = setTimeout(async ( ) => { + this.workerTimer = undefined; + if ( this.jobs.size !== 0 ) { return; } + const idleTime = Date.now() - this.workerAccessTime; + if ( idleTime < currentConfig.threadTTL ) { + return this.countdownWorker(); + } + const worker = await this.workerPromise; + if ( this.jobs.size !== 0 ) { return; } + this.gcer(this); + if ( worker === null ) { return; } + worker.onmessage = worker.onerror = null; + worker.terminate(); + }, currentConfig.threadTTL); + } + + onmessage(ev) { + const job = ev.data; + const resolve = this.jobs.get(job.id); + if ( resolve === undefined ) { return; } + this.jobs.delete(job.id); + resolve(job.result); + if ( this.jobs.size !== 0 ) { return; } + this.countdownWorker(); + } + + async serialize(data, options) { + this.workerAccessTime = Date.now(); + const worker = await this.workerPromise; + if ( worker === null ) { + const result = serialize(data, options); + this.countdownWorker(); + return result; + } + const id = this.jobIdGenerator++; + return new Promise(resolve => { + const job = { what: 'serialize', id, data, options }; + this.jobs.set(job.id, resolve); + worker.postMessage(job); + }); + } + + async deserialize(data, options) { + this.workerAccessTime = Date.now(); + const worker = await this.workerPromise; + if ( worker === null ) { + const result = deserialize(data, options); + this.countdownWorker(); + return result; + } + const id = this.jobIdGenerator++; + return new Promise(resolve => { + const job = { what: 'deserialize', id, data, options }; + this.jobs.set(job.id, resolve); + worker.postMessage(job); + }); + } +} + +const threads = { + pool: [], + thread(maxPoolSize) { + for ( const thread of this.pool ) { + if ( thread.jobs.size === 0 ) { return thread; } + } + const len = this.pool.length; + if ( len !== 0 && len >= maxPoolSize ) { + if ( len === 1 ) { return this.pool[0]; } + return this.pool.reduce((best, candidate) => + candidate.jobs.size < best.jobs.size ? candidate : best + ); + } + const thread = new Thread(thread => { + const pos = this.pool.indexOf(thread); + if ( pos === -1 ) { return; } + this.pool.splice(pos, 1); + }); + this.pool.push(thread); + return thread; + }, +}; + +export async function serializeAsync(data, options = {}) { + const maxThreadCount = options.multithreaded || 0; + if ( maxThreadCount === 0 ) { + return serialize(data, options); + } + const result = await threads + .thread(maxThreadCount) + .serialize(data, options); + if ( result !== undefined ) { return result; } + return serialize(data, options); +} + +export async function deserializeAsync(data, options = {}) { + const maxThreadCount = options.multithreaded || 0; + if ( maxThreadCount === 0 ) { + return deserialize(data, options); + } + const result = await threads + .thread(maxThreadCount) + .deserialize(data, options); + if ( result !== undefined ) { return result; } + return deserialize(data, options); +} + +/******************************************************************************* + * + * Worker-only code + * + * */ + +if ( isInstanceOf(globalThis, 'DedicatedWorkerGlobalScope') ) { + globalThis.onmessage = ev => { + const msg = ev.data; + switch ( msg.what ) { + case 'ready?': + setConfig(msg.config); + globalThis.postMessage({ what: 'ready!' }); + break; + case 'serialize': + case 'deserialize': { + const result = msg.what === 'serialize' + ? serialize(msg.data, msg.options) + : deserialize(msg.data); + globalThis.postMessage({ id: msg.id, result }); + break; + } + } + }; +} + +/******************************************************************************/ diff --git a/src/js/start.js b/src/js/start.js index 5762619b6..877d909c4 100644 --- a/src/js/start.js +++ b/src/js/start.js @@ -139,7 +139,7 @@ const initializeTabs = async ( ) => { // https://www.reddit.com/r/uBlockOrigin/comments/s7c9go/ // Abort suspending network requests when uBO is merely being installed. -const onVersionReady = lastVersion => { +const onVersionReady = async lastVersion => { if ( lastVersion === vAPI.app.version ) { return; } vAPI.storage.set({ @@ -155,6 +155,11 @@ const onVersionReady = lastVersion => { return; } + // Migrate cache storage + if ( lastVersionInt < vAPI.app.intFromVersion('1.56.1b1') ) { + await cacheStorage.migrate(µb.hiddenSettings.cacheStorageAPI); + } + // Since built-in resources may have changed since last version, we // force a reload of all resources. redirectEngine.invalidateResourcesSelfie(io); @@ -252,19 +257,19 @@ const onUserSettingsReady = fetched => { // Wait for removal of invalid cached data to be completed. const onCacheSettingsReady = async (fetched = {}) => { + let selfieIsInvalid = false; if ( fetched.compiledMagic !== µb.systemSettings.compiledMagic ) { µb.compiledFormatChanged = true; - µb.selfieIsInvalid = true; + selfieIsInvalid = true; ubolog(`Serialized format of static filter lists changed`); } if ( fetched.selfieMagic !== µb.systemSettings.selfieMagic ) { - µb.selfieIsInvalid = true; + selfieIsInvalid = true; ubolog(`Serialized format of selfie changed`); } - if ( µb.selfieIsInvalid ) { - µb.selfieManager.destroy(); - cacheStorage.set(µb.systemSettings); - } + if ( selfieIsInvalid === false ) { return; } + µb.selfieManager.destroy({ janitor: true }); + cacheStorage.set(µb.systemSettings); }; /******************************************************************************/ @@ -305,10 +310,7 @@ const onHiddenSettingsReady = async ( ) => { } // Maybe override default cache storage - µb.supportStats.cacheBackend = await cacheStorage.select( - µb.hiddenSettings.cacheStorageAPI - ); - ubolog(`Backend storage for cache will be ${µb.supportStats.cacheBackend}`); + µb.supportStats.cacheBackend = 'browser.storage.local'; }; /******************************************************************************/ @@ -333,7 +335,6 @@ const onFirstFetchReady = (fetched, adminExtra) => { sessionSwitches.assign(permanentSwitches); onNetWhitelistReady(fetched.netWhitelist, adminExtra); - onVersionReady(fetched.version); }; /******************************************************************************/ @@ -389,23 +390,20 @@ try { const adminExtra = await vAPI.adminStorage.get('toAdd'); ubolog(`Extra admin settings ready ${Date.now()-vAPI.T0} ms after launch`); - // https://github.com/uBlockOrigin/uBlock-issues/issues/1365 - // Wait for onCacheSettingsReady() to be fully ready. - const [ , , lastVersion ] = await Promise.all([ + const lastVersion = await vAPI.storage.get(createDefaultProps()).then(async fetched => { + ubolog(`Version ready ${Date.now()-vAPI.T0} ms after launch`); + await onVersionReady(fetched.version); + return fetched; + }).then(fetched => { + ubolog(`First fetch ready ${Date.now()-vAPI.T0} ms after launch`); + onFirstFetchReady(fetched, adminExtra); + return fetched.version; + }); + + await Promise.all([ µb.loadSelectedFilterLists().then(( ) => { ubolog(`List selection ready ${Date.now()-vAPI.T0} ms after launch`); }), - cacheStorage.get( - { compiledMagic: 0, selfieMagic: 0 } - ).then(fetched => { - ubolog(`Cache magic numbers ready ${Date.now()-vAPI.T0} ms after launch`); - onCacheSettingsReady(fetched); - }), - vAPI.storage.get(createDefaultProps()).then(fetched => { - ubolog(`First fetch ready ${Date.now()-vAPI.T0} ms after launch`); - onFirstFetchReady(fetched, adminExtra); - return fetched.version; - }), µb.loadUserSettings().then(fetched => { ubolog(`User settings ready ${Date.now()-vAPI.T0} ms after launch`); onUserSettingsReady(fetched); @@ -413,6 +411,10 @@ try { µb.loadPublicSuffixList().then(( ) => { ubolog(`PSL ready ${Date.now()-vAPI.T0} ms after launch`); }), + cacheStorage.get({ compiledMagic: 0, selfieMagic: 0 }).then(bin => { + ubolog(`Cache magic numbers ready ${Date.now()-vAPI.T0} ms after launch`); + onCacheSettingsReady(bin); + }), ]); // https://github.com/uBlockOrigin/uBlock-issues/issues/1547 diff --git a/src/js/static-ext-filtering-db.js b/src/js/static-ext-filtering-db.js index 64a9c8df0..e669c1e11 100644 --- a/src/js/static-ext-filtering-db.js +++ b/src/js/static-ext-filtering-db.js @@ -141,8 +141,8 @@ const StaticExtFilteringHostnameDB = class { toSelfie() { return { version: this.version, - hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap), - regexToSlotIdMap: Array.from(this.regexToSlotIdMap), + hostnameToSlotIdMap: this.hostnameToSlotIdMap, + regexToSlotIdMap: this.regexToSlotIdMap, hostnameSlots: this.hostnameSlots, strSlots: this.strSlots, size: this.size @@ -150,11 +150,11 @@ const StaticExtFilteringHostnameDB = class { } fromSelfie(selfie) { - if ( selfie === undefined ) { return; } - this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap); + if ( typeof selfie !== 'object' || selfie === null ) { return; } + this.hostnameToSlotIdMap = selfie.hostnameToSlotIdMap; // Regex-based lookup available in uBO 1.47.0 and above - if ( Array.isArray(selfie.regexToSlotIdMap) ) { - this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap); + if ( selfie.regexToSlotIdMap ) { + this.regexToSlotIdMap = selfie.regexToSlotIdMap; } this.hostnameSlots = selfie.hostnameSlots; this.strSlots = selfie.strSlots; diff --git a/src/js/static-ext-filtering.js b/src/js/static-ext-filtering.js index 8a2905eb6..e616e6350 100644 --- a/src/js/static-ext-filtering.js +++ b/src/js/static-ext-filtering.js @@ -26,9 +26,8 @@ import cosmeticFilteringEngine from './cosmetic-filtering.js'; import htmlFilteringEngine from './html-filtering.js'; import httpheaderFilteringEngine from './httpheader-filtering.js'; -import io from './assets.js'; -import logger from './logger.js'; import scriptletFilteringEngine from './scriptlet-filtering.js'; +import logger from './logger.js'; /******************************************************************************* @@ -147,34 +146,24 @@ staticExtFilteringEngine.fromCompiledContent = function(reader, options) { htmlFilteringEngine.fromCompiledContent(reader, options); }; -staticExtFilteringEngine.toSelfie = function(path) { - return io.put( - `${path}/main`, - JSON.stringify({ - cosmetic: cosmeticFilteringEngine.toSelfie(), - scriptlets: scriptletFilteringEngine.toSelfie(), - httpHeaders: httpheaderFilteringEngine.toSelfie(), - html: htmlFilteringEngine.toSelfie(), - }) - ); +staticExtFilteringEngine.toSelfie = function() { + return { + cosmetic: cosmeticFilteringEngine.toSelfie(), + scriptlets: scriptletFilteringEngine.toSelfie(), + httpHeaders: httpheaderFilteringEngine.toSelfie(), + html: htmlFilteringEngine.toSelfie(), + }; }; -staticExtFilteringEngine.fromSelfie = function(path) { - return io.get(`${path}/main`).then(details => { - let selfie; - try { - selfie = JSON.parse(details.content); - } catch (ex) { - } - if ( selfie instanceof Object === false ) { return false; } - cosmeticFilteringEngine.fromSelfie(selfie.cosmetic); - httpheaderFilteringEngine.fromSelfie(selfie.httpHeaders); - htmlFilteringEngine.fromSelfie(selfie.html); - if ( scriptletFilteringEngine.fromSelfie(selfie.scriptlets) === false ) { - return false; - } - return true; - }); +staticExtFilteringEngine.fromSelfie = async function(selfie) { + if ( typeof selfie !== 'object' || selfie === null ) { return false; } + cosmeticFilteringEngine.fromSelfie(selfie.cosmetic); + httpheaderFilteringEngine.fromSelfie(selfie.httpHeaders); + htmlFilteringEngine.fromSelfie(selfie.html); + if ( scriptletFilteringEngine.fromSelfie(selfie.scriptlets) === false ) { + return false; + } + return true; }; /******************************************************************************/ diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 9189c01bc..86d042248 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -28,7 +28,6 @@ import { queueTask, dropTask } from './tasks.js'; import BidiTrieContainer from './biditrie.js'; import HNTrieContainer from './hntrie.js'; -import { sparseBase64 } from './base64-custom.js'; import { CompiledListReader } from './static-filtering-io.js'; import * as sfp from './static-filtering-parser.js'; @@ -493,17 +492,13 @@ const filterDataReset = ( ) => { filterData.fill(0); filterDataWritePtr = 2; }; -const filterDataToSelfie = ( ) => { - return JSON.stringify(Array.from(filterData.subarray(0, filterDataWritePtr))); -}; +const filterDataToSelfie = ( ) => + filterData.subarray(0, filterDataWritePtr); + const filterDataFromSelfie = selfie => { - if ( typeof selfie !== 'string' || selfie === '' ) { return false; } - const data = JSON.parse(selfie); - if ( Array.isArray(data) === false ) { return false; } - filterDataGrow(data.length); - filterDataWritePtr = data.length; - filterData.set(data); - filterDataShrink(); + if ( selfie instanceof Int32Array === false ) { return false; } + filterData = selfie; + filterDataWritePtr = selfie.length; return true; }; @@ -519,53 +514,15 @@ const filterRefsReset = ( ) => { filterRefs.fill(null); filterRefsWritePtr = 1; }; -const filterRefsToSelfie = ( ) => { - const refs = []; - for ( let i = 0; i < filterRefsWritePtr; i++ ) { - const v = filterRefs[i]; - if ( v instanceof RegExp ) { - refs.push({ t: 1, s: v.source, f: v.flags }); - continue; - } - if ( Array.isArray(v) ) { - refs.push({ t: 2, v }); - continue; - } - if ( typeof v !== 'object' || v === null ) { - refs.push({ t: 0, v }); - continue; - } - const out = Object.create(null); - for ( const prop of Object.keys(v) ) { - const value = v[prop]; - out[prop] = prop.startsWith('$') - ? (typeof value === 'string' ? '' : null) - : value; - } - refs.push({ t: 3, v: out }); - } - return JSON.stringify(refs); -}; +const filterRefsToSelfie = ( ) => + filterRefs.slice(0, filterRefsWritePtr); + const filterRefsFromSelfie = selfie => { - if ( typeof selfie !== 'string' || selfie === '' ) { return false; } - const refs = JSON.parse(selfie); - if ( Array.isArray(refs) === false ) { return false; } - for ( let i = 0; i < refs.length; i++ ) { - const v = refs[i]; - switch ( v.t ) { - case 0: - case 2: - case 3: - filterRefs[i] = v.v; - break; - case 1: - filterRefs[i] = new RegExp(v.s, v.f); - break; - default: - throw new Error('Unknown filter reference!'); - } + if ( Array.isArray(selfie) === false ) { return false; } + for ( let i = 0, n = selfie.length; i < n; i++ ) { + filterRefs[i] = selfie[i]; } - filterRefsWritePtr = refs.length; + filterRefsWritePtr = selfie.length; return true; }; @@ -3121,14 +3078,11 @@ const urlTokenizer = new (class { } toSelfie() { - return sparseBase64.encode( - this.knownTokens.buffer, - this.knownTokens.byteLength - ); + return this.knownTokens; } fromSelfie(selfie) { - return sparseBase64.decode(selfie, this.knownTokens.buffer); + this.knownTokens = selfie; } // https://github.com/chrisaljoudi/uBlock/issues/1118 @@ -4674,52 +4628,24 @@ FilterContainer.prototype.optimize = function(throttle = 0) { /******************************************************************************/ -FilterContainer.prototype.toSelfie = async function(storage, path) { - if ( typeof storage !== 'object' || storage === null ) { return; } - if ( typeof storage.put !== 'function' ) { return; } - +FilterContainer.prototype.toSelfie = function() { bidiTrieOptimize(true); - keyvalStore.setItem( - 'SNFE.origHNTrieContainer.trieDetails', + keyvalStore.setItem('SNFE.origHNTrieContainer.trieDetails', origHNTrieContainer.optimize() ); - - return Promise.all([ - storage.put( - `${path}/destHNTrieContainer`, - destHNTrieContainer.serialize(sparseBase64) - ), - storage.put( - `${path}/origHNTrieContainer`, - origHNTrieContainer.serialize(sparseBase64) - ), - storage.put( - `${path}/bidiTrie`, - bidiTrie.serialize(sparseBase64) - ), - storage.put( - `${path}/filterData`, - filterDataToSelfie() - ), - storage.put( - `${path}/filterRefs`, - filterRefsToSelfie() - ), - storage.put( - `${path}/main`, - JSON.stringify({ - version: this.selfieVersion, - processedFilterCount: this.processedFilterCount, - acceptedCount: this.acceptedCount, - discardedCount: this.discardedCount, - bitsToBucket: Array.from(this.bitsToBucket).map(kv => { - kv[1] = Array.from(kv[1]); - return kv; - }), - urlTokenizer: urlTokenizer.toSelfie(), - }) - ) - ]); + return { + version: this.selfieVersion, + processedFilterCount: this.processedFilterCount, + acceptedCount: this.acceptedCount, + discardedCount: this.discardedCount, + bitsToBucket: this.bitsToBucket, + urlTokenizer: urlTokenizer.toSelfie(), + destHNTrieContainer: destHNTrieContainer.toSelfie(), + origHNTrieContainer: origHNTrieContainer.toSelfie(), + bidiTrie: bidiTrie.toSelfie(), + filterData: filterDataToSelfie(), + filterRefs: filterRefsToSelfie(), + }; }; FilterContainer.prototype.serialize = async function() { @@ -4735,53 +4661,27 @@ FilterContainer.prototype.serialize = async function() { /******************************************************************************/ -FilterContainer.prototype.fromSelfie = async function(storage, path) { - if ( typeof storage !== 'object' || storage === null ) { return; } - if ( typeof storage.get !== 'function' ) { return; } +FilterContainer.prototype.fromSelfie = async function(selfie) { + if ( typeof selfie !== 'object' || selfie === null ) { return; } this.reset(); this.notReady = true; - const results = await Promise.all([ - storage.get(`${path}/main`), - storage.get(`${path}/destHNTrieContainer`).then(details => - destHNTrieContainer.unserialize(details.content, sparseBase64) - ), - storage.get(`${path}/origHNTrieContainer`).then(details => - origHNTrieContainer.unserialize(details.content, sparseBase64) - ), - storage.get(`${path}/bidiTrie`).then(details => - bidiTrie.unserialize(details.content, sparseBase64) - ), - storage.get(`${path}/filterData`).then(details => - filterDataFromSelfie(details.content) - ), - storage.get(`${path}/filterRefs`).then(details => - filterRefsFromSelfie(details.content) - ), - ]); - + const results = [ + destHNTrieContainer.fromSelfie(selfie.destHNTrieContainer), + origHNTrieContainer.fromSelfie(selfie.origHNTrieContainer), + bidiTrie.fromSelfie(selfie.bidiTrie), + filterDataFromSelfie(selfie.filterData), + filterRefsFromSelfie(selfie.filterRefs), + ]; if ( results.slice(1).every(v => v === true) === false ) { return false; } - const details = results[0]; - if ( typeof details !== 'object' || details === null ) { return false; } - if ( typeof details.content !== 'string' ) { return false; } - if ( details.content === '' ) { return false; } - let selfie; - try { - selfie = JSON.parse(details.content); - } catch (ex) { - } - if ( typeof selfie !== 'object' || selfie === null ) { return false; } if ( selfie.version !== this.selfieVersion ) { return false; } this.processedFilterCount = selfie.processedFilterCount; this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; - this.bitsToBucket = new Map(selfie.bitsToBucket.map(kv => { - kv[1] = new Map(kv[1]); - return kv; - })); + this.bitsToBucket = selfie.bitsToBucket; urlTokenizer.fromSelfie(selfie.urlTokenizer); // If this point is never reached, it means the internal state is diff --git a/src/js/storage.js b/src/js/storage.js index 5325a200f..68b52209d 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -38,7 +38,6 @@ import µb from './background.js'; import { hostnameFromURI } from './uri-utils.js'; import { i18n, i18n$ } from './i18n.js'; import { redirectEngine } from './redirect-engine.js'; -import { sparseBase64 } from './base64-custom.js'; import { ubolog, ubologSet } from './console.js'; import * as sfp from './static-filtering-parser.js'; @@ -974,7 +973,7 @@ onBroadcast(msg => { /******************************************************************************/ µb.getCompiledFilterList = async function(assetKey) { - const compiledPath = 'compiled/' + assetKey; + const compiledPath = `compiled/${assetKey}`; // https://github.com/uBlockOrigin/uBlock-issues/issues/1365 // Verify that the list version matches that of the current compiled @@ -983,11 +982,10 @@ onBroadcast(msg => { this.compiledFormatChanged === false && this.badLists.has(assetKey) === false ) { - const compiledDetails = await io.get(compiledPath); + const content = await io.fromCache(compiledPath); const compilerVersion = `${this.systemSettings.compiledMagic}\n`; - if ( compiledDetails.content.startsWith(compilerVersion) ) { - compiledDetails.assetKey = assetKey; - return compiledDetails; + if ( content.startsWith(compilerVersion) ) { + return { assetKey, content }; } } @@ -1017,7 +1015,7 @@ onBroadcast(msg => { assetKey, trustedSource: this.isTrustedList(assetKey), }); - io.put(compiledPath, compiledContent); + io.toCache(compiledPath, compiledContent); return { assetKey, content: compiledContent }; }; @@ -1046,7 +1044,7 @@ onBroadcast(msg => { /******************************************************************************/ µb.removeCompiledFilterList = function(assetKey) { - io.remove('compiled/' + assetKey); + io.remove(`compiled/${assetKey}`); }; µb.removeFilterList = function(assetKey) { @@ -1173,20 +1171,17 @@ onBroadcast(msg => { const results = await Promise.all(fetchPromises); if ( Array.isArray(results) === false ) { return results; } - let content = ''; + const content = []; for ( let i = 1; i < results.length; i++ ) { const result = results[i]; - if ( - result instanceof Object === false || - typeof result.content !== 'string' || - result.content === '' - ) { - continue; - } - content += '\n\n' + result.content; + if ( result instanceof Object === false ) { continue; } + if ( typeof result.content !== 'string' ) { continue; } + if ( result.content === '' ) { continue; } + content.push(result.content); + } + if ( content.length !== 0 ) { + redirectEngine.resourcesFromString(content.join('\n\n')); } - - redirectEngine.resourcesFromString(content); redirectEngine.selfieFromResources(io); } catch(ex) { ubolog(ex); @@ -1225,8 +1220,8 @@ onBroadcast(msg => { } try { - const result = await io.get(`compiled/${this.pslAssetKey}`); - if ( psl.fromSelfie(result.content, sparseBase64) ) { return; } + const selfie = await io.fromCache(`compiled/${this.pslAssetKey}`); + if ( psl.fromSelfie(selfie) ) { return; } } catch (reason) { ubolog(reason); } @@ -1240,7 +1235,7 @@ onBroadcast(msg => { µb.compilePublicSuffixList = function(content) { const psl = publicSuffixList; psl.parse(content, punycode.toASCII); - io.put(`compiled/${this.pslAssetKey}`, psl.toSelfie(sparseBase64)); + return io.toCache(`compiled/${this.pslAssetKey}`, psl.toSelfie()); }; /******************************************************************************/ @@ -1260,39 +1255,24 @@ onBroadcast(msg => { if ( µb.inMemoryFilters.length !== 0 ) { return; } if ( Object.keys(µb.availableFilterLists).length === 0 ) { return; } await Promise.all([ - io.put( - 'selfie/main', - JSON.stringify({ - magic: µb.systemSettings.selfieMagic, - availableFilterLists: µb.availableFilterLists, - }) + io.toCache('selfie/main', { + magic: µb.systemSettings.selfieMagic, + availableFilterLists: µb.availableFilterLists, + }), + io.toCache('selfie/staticExtFilteringEngine', + staticExtFilteringEngine.toSelfie() ), - redirectEngine.toSelfie('selfie/redirectEngine'), - staticExtFilteringEngine.toSelfie( - 'selfie/staticExtFilteringEngine' - ), - staticNetFilteringEngine.toSelfie(io, - 'selfie/staticNetFilteringEngine' + io.toCache('selfie/staticNetFilteringEngine', + staticNetFilteringEngine.toSelfie() ), ]); lz4Codec.relinquish(); µb.selfieIsInvalid = false; + ubolog(`Selfie was created`); }; const loadMain = async function() { - const details = await io.get('selfie/main'); - if ( - details instanceof Object === false || - typeof details.content !== 'string' || - details.content === '' - ) { - return false; - } - let selfie; - try { - selfie = JSON.parse(details.content); - } catch(ex) { - } + const selfie = await io.fromCache('selfie/main'); if ( selfie instanceof Object === false ) { return false; } if ( selfie.magic !== µb.systemSettings.selfieMagic ) { return false; } if ( selfie.availableFilterLists instanceof Object === false ) { return false; } @@ -1306,12 +1286,11 @@ onBroadcast(msg => { try { const results = await Promise.all([ loadMain(), - redirectEngine.fromSelfie('selfie/redirectEngine'), - staticExtFilteringEngine.fromSelfie( - 'selfie/staticExtFilteringEngine' + io.fromCache('selfie/staticExtFilteringEngine').then(selfie => + staticExtFilteringEngine.fromSelfie(selfie) ), - staticNetFilteringEngine.fromSelfie(io, - 'selfie/staticNetFilteringEngine' + io.fromCache('selfie/staticNetFilteringEngine').then(selfie => + staticNetFilteringEngine.fromSelfie(selfie) ), ]); if ( results.every(v => v) ) { @@ -1325,10 +1304,11 @@ onBroadcast(msg => { return false; }; - const destroy = function() { + const destroy = function(options = {}) { if ( µb.selfieIsInvalid === false ) { - io.remove(/^selfie\//); + io.remove(/^selfie\//, options); µb.selfieIsInvalid = true; + ubolog(`Selfie was removed`); } if ( µb.wakeupReason === 'createSelfie' ) { µb.wakeupReason = ''; @@ -1594,8 +1574,7 @@ onBroadcast(msg => { if ( topic === 'after-asset-updated' ) { // Skip selfie-related content. if ( details.assetKey.startsWith('selfie/') ) { return; } - const cached = typeof details.content === 'string' && - details.content !== ''; + const cached = typeof details.content === 'string' && details.content !== ''; if ( this.availableFilterLists.hasOwnProperty(details.assetKey) ) { if ( cached ) { if ( this.selectedFilterLists.indexOf(details.assetKey) !== -1 ) { @@ -1604,8 +1583,7 @@ onBroadcast(msg => { details.content ); if ( this.badLists.has(details.assetKey) === false ) { - io.put( - 'compiled/' + details.assetKey, + io.toCache(`compiled/${details.assetKey}`, this.compileFilters(details.content, { assetKey: details.assetKey, trustedSource: this.isTrustedList(details.assetKey), diff --git a/src/lib/publicsuffixlist/publicsuffixlist.js b/src/lib/publicsuffixlist/publicsuffixlist.js index 6483c89e2..87910d4b0 100644 --- a/src/lib/publicsuffixlist/publicsuffixlist.js +++ b/src/lib/publicsuffixlist/publicsuffixlist.js @@ -13,8 +13,6 @@ /*! Home: https://github.com/gorhill/publicsuffixlist.js -- GPLv3 APLv2 */ -/* globals WebAssembly, exports:true, module */ - 'use strict'; /******************************************************************************* @@ -70,7 +68,7 @@ const RULES_PTR_SLOT = 100; // 100 / 400 (400-256=144 => 144>128) const SUFFIX_NOT_FOUND_SLOT = 399; // -- / 399 (safe, see above) const CHARDATA_PTR_SLOT = 101; // 101 / 404 const EMPTY_STRING = ''; -const SELFIE_MAGIC = 2; +const SELFIE_MAGIC = 3; let wasmMemory; let pslBuffer32; @@ -499,9 +497,7 @@ const toSelfie = function(encoder) { } return { magic: SELFIE_MAGIC, - buf32: Array.from( - new Uint32Array(pslBuffer8.buffer, 0, pslByteLength >>> 2) - ), + buf32: pslBuffer32.subarray(0, pslByteLength >> 2), }; }; @@ -524,7 +520,7 @@ const fromSelfie = function(selfie, decoder) { } else if ( selfie instanceof Object && selfie.magic === SELFIE_MAGIC && - Array.isArray(selfie.buf32) + selfie.buf32 instanceof Uint32Array ) { byteLength = selfie.buf32.length << 2; allocateBuffers(byteLength);