From 7cd583a301a84d269587e1afcb883de78d359374 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sun, 8 Aug 2021 09:17:14 -0400 Subject: [PATCH] Revisit the nodejs API --- platform/nodejs/README.md | 57 ++++---- platform/nodejs/index.js | 268 ++++++++++++++++-------------------- platform/nodejs/test.js | 6 +- src/js/filtering-context.js | 7 + 4 files changed, 159 insertions(+), 179 deletions(-) diff --git a/platform/nodejs/README.md b/platform/nodejs/README.md index 6f4165d7a..f1ca5b41b 100644 --- a/platform/nodejs/README.md +++ b/platform/nodejs/README.md @@ -28,70 +28,69 @@ and also lists of domain names or hosts file format (i.e. block lists from [The ## Usage At the moment, there can be only one instance of the static network filtering -engine, which API must be imported as follow: +engine ("SNFE"), which proxy API must be imported as follow: ```js -import { FilteringContext, pslInit, useRawLists } from '@gorhill/ubo-core'; +import { StaticNetFilteringEngine } from '@gorhill/ubo-core'; ``` If you must import as a NodeJS module: ```js -const { FilteringContext, pslInit, useRawLists } await import from '@gorhill/ubo-core'; +const { StaticNetFilteringEngine } await import from '@gorhill/ubo-core'; ``` -uBO's SNFE works best with a properly initialized Public Suffix List database, -since it needs to evaluate whether a network request to match is either 1st- -or 3rd-party to the context in which it is fired: + +Create an instance of SNFE: ```js -await pslInit(); +const snfe = StaticNetFilteringEngine.create(); ``` -Now feed the SNFE with filter lists -- `useRawLists()` accepts an array of +Feed the SNFE with filter lists -- `useLists()` accepts an array of objects (or promises to object) which expose the raw text of a list through the `raw` property, and optionally the name of the list through the `name` property (how you fetch the lists is up to you): ```js -const snfe = await useRawLists([ +await snfe.useLists([ fetch('easylist').then(raw => ({ name: 'easylist', raw })), fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })), ]); ``` -`useRawLists()` returns a reference to the SNFE, which you can use later to -match network requests. First we need a filtering context instance, which is -required as an argument to match network requests: - -```js -const fctxt = new FilteringContext(); -``` - Now we are ready to match network requests: ```js // Not blocked -fctxt.setDocOriginFromURL('https://www.bloomberg.com/'); -fctxt.setURL('https://www.bloomberg.com/tophat/assets/v2.6.1/that.css'); -fctxt.setType('stylesheet'); -if ( snfe.matchRequest(fctxt) !== 0 ) { +if ( snfe.matchRequest({ + originURL: 'https://www.bloomberg.com/', + url: 'https://www.bloomberg.com/tophat/assets/v2.6.1/that.css', + type: 'stylesheet' +}) !== 0 ) { console.log(snfe.toLogData()); } // Blocked -fctxt.setDocOriginFromURL('https://www.bloomberg.com/'); -fctxt.setURL('https://securepubads.g.doubleclick.net/tag/js/gpt.js'); -fctxt.setType('script'); -if ( snfe.matchRequest(fctxt) !== 0 ) { +if ( snfe.matchRequest({ + originURL: 'https://www.bloomberg.com/', + url: 'https://securepubads.g.doubleclick.net/tag/js/gpt.js', + type: 'script' +}) !== 0 ) { console.log(snfe.toLogData()); } // Unblocked -fctxt.setDocOriginFromURL('https://www.bloomberg.com/'); -fctxt.setURL('https://sourcepointcmp.bloomberg.com/ccpa.js'); -fctxt.setType('script'); -if ( snfe.matchRequest(fctxt) !== 0 ) { +if ( snfe.matchRequest({ + originURL: 'https://www.bloomberg.com/', + url: 'https://sourcepointcmp.bloomberg.com/ccpa.js', + type: 'script' +}) !== 0 ) { console.log(snfe.toLogData()); } ``` + +It is possible to pre-parse filter lists and save the intermediate results for +later use -- useful to speed up the loading of filter lists. This will be +documented eventually, but if you feel adventurous, you can look at the code +and use this capability now if you figure out the details. diff --git a/platform/nodejs/index.js b/platform/nodejs/index.js index ac87b1653..832843e7c 100644 --- a/platform/nodejs/index.js +++ b/platform/nodejs/index.js @@ -51,12 +51,67 @@ function loadJSON(path) { return JSON.parse(readFileSync(resolve(__dirname, path), 'utf8')); } -function compileList(list, compiler, writer, options = {}) { - const lineIter = new LineIterator(list.raw); +/******************************************************************************/ + +async function enableWASM() { + const wasmModuleFetcher = function(path) { + const require = createRequire(import.meta.url); // jshint ignore:line + const wasm = new Uint8Array(require(`${path}.wasm.json`)); + return globals.WebAssembly.compile(wasm); + }; + try { + const results = await Promise.all([ + globals.publicSuffixList.enableWASM(wasmModuleFetcher, './lib/publicsuffixlist/wasm/'), + snfe.enableWASM(wasmModuleFetcher, './js/wasm/'), + ]); + return results.every(a => a === true); + } catch(reason) { + console.log(reason); + } + return false; +} + +/******************************************************************************/ + +function pslInit(raw) { + if ( typeof raw === 'string' && raw.trim() !== '' ) { + globals.publicSuffixList.parse(raw, globals.punycode.toASCII); + return globals.publicSuffixList; + } + + // Use serialized version if available + let serialized = null; + try { + // Use loadJSON() because require() would keep the string in memory. + serialized = loadJSON('build/publicsuffixlist.json'); + } catch (error) { + if ( process.env.npm_lifecycle_event !== 'install' ) { + // This should never happen except during package installation. + console.error(error); + } + } + if ( serialized !== null ) { + globals.publicSuffixList.fromSelfie(serialized); + return globals.publicSuffixList; + } + + const require = createRequire(import.meta.url); // jshint ignore:line + raw = require('./data/effective_tld_names.json'); + if ( typeof raw !== 'string' || raw.trim() === '' ) { + console.error('Unable to populate public suffix list'); + return; + } + return globals.publicSuffixList; +} + +/******************************************************************************/ + +function compileList({ name, raw }, compiler, writer, options = {}) { + const lineIter = new LineIterator(raw); const events = Array.isArray(options.events) ? options.events : undefined; - if ( list.name ) { - writer.properties.set('name', list.name); + if ( name ) { + writer.properties.set('name', name); } const { parser } = compiler; @@ -81,168 +136,93 @@ function compileList(list, compiler, writer, options = {}) { }); } } -} -async function enableWASM() { - const wasmModuleFetcher = function(path) { - const require = createRequire(import.meta.url); // jshint ignore:line - const wasm = new Uint8Array(require(`${path}.wasm.json`)); - return globals.WebAssembly.compile(wasm); - }; - try { - const results = await Promise.all([ - globals.publicSuffixList.enableWASM(wasmModuleFetcher, './lib/publicsuffixlist/wasm/'), - snfe.enableWASM(wasmModuleFetcher, './js/wasm/'), - ]); - return results.every(a => a === true); - } catch(reason) { - console.log(reason); - } - return false; -} - -function pslInit(raw) { - if ( typeof raw !== 'string' || raw.trim() === '' ) { - const require = createRequire(import.meta.url); // jshint ignore:line - - let serialized = null; - - // Use serialized version if available - try { - // Use loadJSON() because require() would keep the string in memory. - serialized = loadJSON('build/publicsuffixlist.json'); - } catch (error) { - if ( process.env.npm_lifecycle_event !== 'install' ) { - // This should never happen except during package installation. - console.error(error); - } - } - - if ( serialized !== null ) { - globals.publicSuffixList.fromSelfie(serialized); - return globals.publicSuffixList; - } - - raw = require('./data/effective_tld_names.json'); - if ( typeof raw !== 'string' || raw.trim() === '' ) { - console.error('Unable to populate public suffix list'); - return; - } - } - globals.publicSuffixList.parse(raw, globals.punycode.toASCII); - return globals.publicSuffixList; -} - -function createCompiler(parser) { - return snfe.createCompiler(parser); -} - -async function useCompiledLists(lists) { - // Remove all filters - reset(); - - if ( Array.isArray(lists) === false || lists.length === 0 ) { - return snfe; - } - - const consumeList = list => { - snfe.fromCompiled(new CompiledListReader(list.compiled)); - }; - - // Populate filtering engine with filter lists - const promises = []; - for ( const list of lists ) { - const promise = list instanceof Promise ? list : Promise.resolve(list); - promises.push(promise.then(list => consumeList(list))); - } - - await Promise.all(promises); - - // Commit changes - snfe.freeze(); - snfe.optimize(); - - return snfe; -} - -async function useRawLists(lists, options = {}) { - // Remove all filters - reset(); - - if ( Array.isArray(lists) === false || lists.length === 0 ) { - return snfe; - } - - const compiler = createCompiler(new StaticFilteringParser()); - - const consumeList = list => { - const writer = new CompiledListWriter(); - compileList(list, compiler, writer, options); - snfe.fromCompiled(new CompiledListReader(writer.toString())); - }; - - // Populate filtering engine with filter lists - const promises = []; - for ( const list of lists ) { - const promise = list instanceof Promise ? list : Promise.resolve(list); - promises.push(promise.then(list => consumeList(list))); - } - - await Promise.all(promises); - - // Commit changes - snfe.freeze(); - snfe.optimize(); - - return snfe; -} - -function reset() { - snfe.reset(); + return writer.toString(); } /******************************************************************************/ -let pslInitialized = false; -let staticNetFilteringEngineInstance = null; +async function useLists(lists, options = {}) { + // Remove all filters + snfe.reset(); + + if ( Array.isArray(lists) === false || lists.length === 0 ) { + return snfe; + } + + let compiler = null; + + const consumeList = list => { + let { compiled } = list; + if ( typeof compiled !== 'string' || compiled === '' ) { + const writer = new CompiledListWriter(); + if ( compiler === null ) { + compiler = snfe.createCompiler(new StaticFilteringParser()); + } + compiled = compileList(list, compiler, writer, options); + } + snfe.fromCompiled(new CompiledListReader(compiled)); + }; + + // Populate filtering engine with resolved filter lists + const promises = []; + for ( const list of lists ) { + const promise = list instanceof Promise ? list : Promise.resolve(list); + promises.push(promise.then(list => consumeList(list))); + } + + await Promise.all(promises); + + // Commit changes + snfe.freeze(); + snfe.optimize(); + + return snfe; +} + +/******************************************************************************/ + +const fctx = new FilteringContext(); +let snfeInstance = null; class StaticNetFilteringEngine { constructor() { - if ( staticNetFilteringEngineInstance !== null ) { + if ( snfeInstance !== null ) { throw new Error('Only a single instance is supported.'); } - - staticNetFilteringEngineInstance = this; - - this._context = new FilteringContext(); + snfeInstance = this; } async useLists(lists) { - await useRawLists(lists); + return useLists(lists); } - matchRequest({ url, originURL, type }) { - this._context.setDocOriginFromURL(originURL); - this._context.setURL(url); - this._context.setType(type); - - return snfe.matchRequest(this._context); + matchRequest(details) { + return snfe.matchRequest(fctx.fromDetails(details)); } toLogData() { return snfe.toLogData(); } -} -StaticNetFilteringEngine.initialize = async function initialize() { - if ( !pslInitialized ) { - if ( !pslInit() ) { + createCompiler(parser) { + return snfe.createCompiler(parser); + } + + compileList(...args) { + return compileList(...args); + } + + static async create({ noPSL } = {}) { + const instance = new StaticNetFilteringEngine(); + + if ( noPSL !== true && !pslInit() ) { throw new Error('Failed to initialize public suffix list.'); } - pslInitialized = true; + return instance; } -}; +} /******************************************************************************/ @@ -255,11 +235,7 @@ if ( typeof module !== 'undefined' && typeof exports !== 'undefined' ) { } export { - FilteringContext, - StaticNetFilteringEngine, enableWASM, pslInit, - createCompiler, - useCompiledLists, - useRawLists, + StaticNetFilteringEngine, }; diff --git a/platform/nodejs/test.js b/platform/nodejs/test.js index f4691edac..58d732407 100644 --- a/platform/nodejs/test.js +++ b/platform/nodejs/test.js @@ -52,16 +52,14 @@ async function main() { console.log(ex); } - await StaticNetFilteringEngine.initialize(); - - const engine = new StaticNetFilteringEngine(); + const engine = await StaticNetFilteringEngine.create(); await engine.useLists([ fetch('easylist').then(raw => ({ name: 'easylist', raw })), fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })), ]); - let result = null; + let result = 0; // Tests // Not blocked diff --git a/src/js/filtering-context.js b/src/js/filtering-context.js index f0a9ea8e7..96f469a4b 100644 --- a/src/js/filtering-context.js +++ b/src/js/filtering-context.js @@ -150,6 +150,13 @@ const FilteringContext = class { return this; } + fromDetails({ originURL, url, type }) { + this.setDocOriginFromURL(originURL); + this.setURL(url); + this.setType(type); + return this; + } + duplicate() { return (new FilteringContext(this)); }