From 4818405cf6aa2fa5bb1d289ca6268e418cc5a331 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Thu, 5 Aug 2021 13:30:20 -0400 Subject: [PATCH] Remove need to pass parser at every compile() call The compiler instance is already initialized with a reference to the parser, no need to keep passing the reference at each call to compile(). --- platform/nodejs/index.js | 88 ++++++++++++++++++++++++---------- src/js/reverselookup.js | 2 +- src/js/static-net-filtering.js | 52 ++++++++++---------- src/js/storage.js | 2 +- 4 files changed, 90 insertions(+), 54 deletions(-) diff --git a/platform/nodejs/index.js b/platform/nodejs/index.js index c70363630..c246eb744 100644 --- a/platform/nodejs/index.js +++ b/platform/nodejs/index.js @@ -51,13 +51,15 @@ function loadJSON(path) { return JSON.parse(readFileSync(resolve(__dirname, path), 'utf8')); } -function compileList(rawText, writer, options = {}) { - const lineIter = new LineIterator(rawText); - const parser = new StaticFilteringParser(true); - const compiler = snfe.createCompiler(parser); +function compileList(list, compiler, writer, options = {}) { + const lineIter = new LineIterator(list.raw); const events = Array.isArray(options.events) ? options.events : undefined; - parser.setMaxTokenLength(snfe.MAX_TOKEN_LENGTH); + if ( list.name ) { + writer.properties.set('name', list.name); + } + + const { parser } = compiler; while ( lineIter.eot() === false ) { let line = lineIter.next(); @@ -71,7 +73,7 @@ function compileList(rawText, writer, options = {}) { if ( parser.patternHasUnicode() && parser.toASCII() === false ) { continue; } - if ( compiler.compile(parser, writer) ) { continue; } + if ( compiler.compile(writer) ) { continue; } if ( compiler.error !== undefined && events !== undefined ) { options.events.push({ type: 'error', @@ -79,16 +81,6 @@ function compileList(rawText, writer, options = {}) { }); } } - - return writer.toString(); -} - -function applyList(name, raw) { - const writer = new CompiledListWriter(); - writer.properties.set('name', name); - const compiled = compileList(raw, writer); - const reader = new CompiledListReader(compiled); - snfe.fromCompiled(reader); } async function enableWASM() { @@ -141,20 +133,63 @@ function pslInit(raw) { return globals.publicSuffixList; } -function restart(lists, options = {}) { +async function useCompiledLists(lists) { // Remove all filters reset(); - if ( Array.isArray(lists) && lists.length !== 0 ) { - // Populate filtering engine with filter lists - for ( const { name, raw } of lists ) { - applyList(name, raw, options); - } - // Commit changes - snfe.freeze(); - snfe.optimize(); + if ( Array.isArray(lists) === false || lists.length === 0 ) { + return snfe; } + const consumeList = list => { + snfe.fromCompiled(new CompiledListReader(list.compiled)); + }; + + // Populate filtering engine with filter lists + const promises = []; + for ( const list of lists ) { + const promise = list instanceof Promise ? list : Promise.resolve(list); + promises.push(promise.then(list => consumeList(list))); + } + + await Promise.all(promises); + + // Commit changes + snfe.freeze(); + snfe.optimize(); + + return snfe; +} + +async function useRawLists(lists, options = {}) { + // Remove all filters + reset(); + + if ( Array.isArray(lists) === false || lists.length === 0 ) { + return snfe; + } + + const compiler = snfe.createCompiler(new StaticFilteringParser()); + + const consumeList = list => { + const writer = new CompiledListWriter(); + compileList(list, compiler, writer, options); + snfe.fromCompiled(new CompiledListReader(writer.toString())); + }; + + // Populate filtering engine with filter lists + const promises = []; + for ( const list of lists ) { + const promise = list instanceof Promise ? list : Promise.resolve(list); + promises.push(promise.then(list => consumeList(list))); + } + + await Promise.all(promises); + + // Commit changes + snfe.freeze(); + snfe.optimize(); + return snfe; } @@ -174,5 +209,6 @@ export { FilteringContext, enableWASM, pslInit, - restart, + useCompiledLists, + useRawLists, }; diff --git a/src/js/reverselookup.js b/src/js/reverselookup.js index 28e40c9ec..ed6b8288c 100644 --- a/src/js/reverselookup.js +++ b/src/js/reverselookup.js @@ -138,7 +138,7 @@ const fromNetFilter = async function(rawFilter) { parser.analyze(rawFilter); const compiler = staticNetFilteringEngine.createCompiler(parser); - if ( compiler.compile(parser, writer) === false ) { return; } + if ( compiler.compile(writer) === false ) { return; } await initWorker(); diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 4f6c147cd..69fce94e6 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -2675,6 +2675,7 @@ const urlTokenizer = new (class { class FilterCompiler { constructor(parser, other = undefined) { + this.parser = parser; if ( other !== undefined ) { return Object.assign(this, other); } @@ -2810,11 +2811,10 @@ class FilterCompiler { [ 'crop',1431 ], [ 'new',1412], ]); - this.reset(parser); + this.reset(); } - reset(parser) { - this.parser = parser; + reset() { this.action = BlockAction; // anchor: bit vector // 0000 (0x0): no anchoring @@ -3078,67 +3078,67 @@ class FilterCompiler { return true; } - process(parser) { + process() { // important! - this.reset(parser); + this.reset(); - if ( parser.hasError() ) { + if ( this.parser.hasError() ) { return this.FILTER_INVALID; } // Filters which pattern is a single character other than `*` and have // no narrowing options are discarded as invalid. - if ( parser.patternIsDubious() ) { + if ( this.parser.patternIsDubious() ) { return this.FILTER_INVALID; } // block or allow filter? // Important: this must be executed before parsing options - if ( parser.isException() ) { + if ( this.parser.isException() ) { this.action = AllowAction; } - this.isPureHostname = parser.patternIsPlainHostname(); + this.isPureHostname = this.parser.patternIsPlainHostname(); // Plain hostname? (from HOSTS file) - if ( this.isPureHostname && parser.hasOptions() === false ) { - this.pattern = parser.patternToLowercase(); + if ( this.isPureHostname && this.parser.hasOptions() === false ) { + this.pattern = this.parser.patternToLowercase(); this.anchor |= 0b100; return this.FILTER_OK; } // options - if ( parser.hasOptions() && this.processOptions() === false ) { + if ( this.parser.hasOptions() && this.processOptions() === false ) { return this.FILTER_UNSUPPORTED; } // regex? - if ( parser.patternIsRegex() ) { + if ( this.parser.patternIsRegex() ) { this.isRegex = true; // https://github.com/gorhill/uBlock/issues/1246 // If the filter is valid, use the corrected version of the // source string -- this ensure reverse-lookup will work fine. - this.pattern = this.normalizeRegexSource(parser.getNetPattern()); + this.pattern = this.normalizeRegexSource(this.parser.getNetPattern()); if ( this.pattern === '' ) { return this.FILTER_UNSUPPORTED; } return this.FILTER_OK; } - const pattern = parser.patternIsMatchAll() + const pattern = this.parser.patternIsMatchAll() ? '*' - : parser.patternToLowercase(); + : this.parser.patternToLowercase(); - if ( parser.patternIsLeftHostnameAnchored() ) { + if ( this.parser.patternIsLeftHostnameAnchored() ) { this.anchor |= 0b100; - } else if ( parser.patternIsLeftAnchored() ) { + } else if ( this.parser.patternIsLeftAnchored() ) { this.anchor |= 0b010; } - if ( parser.patternIsRightAnchored() ) { + if ( this.parser.patternIsRightAnchored() ) { this.anchor |= 0b001; } - if ( parser.patternHasWildcard() ) { + if ( this.parser.patternHasWildcard() ) { this.firstWildcardPos = pattern.indexOf('*'); if ( this.firstWildcardPos !== -1 ) { this.secondWildcardPos = @@ -3146,7 +3146,7 @@ class FilterCompiler { } } - if ( parser.patternHasCaret() ) { + if ( this.parser.patternHasCaret() ) { this.firstCaretPos = pattern.indexOf('^'); if ( this.firstCaretPos !== -1 ) { this.secondCaretPos = @@ -3294,8 +3294,8 @@ class FilterCompiler { s.charCodeAt(l-2) === 0x2E /* '.' */; } - compile(parser, writer) { - const r = this.process(parser); + compile(writer) { + const r = this.process(); // Ignore non-static network filters if ( r === this.FILTER_INVALID ) { return false; } @@ -3303,7 +3303,7 @@ class FilterCompiler { // Ignore filters with unsupported options if ( r === this.FILTER_UNSUPPORTED ) { const who = writer.properties.get('name') || '?'; - this.error = `Invalid network filter in ${who}: ${parser.raw}`; + this.error = `Invalid network filter in ${who}: ${this.parser.raw}`; return false; } @@ -3316,8 +3316,8 @@ class FilterCompiler { // Reminder: // `redirect=` is a combination of a `redirect-rule` filter and a // block filter. - if ( this.modifyType === parser.OPTTokenRedirect ) { - this.modifyType = parser.OPTTokenRedirectRule; + if ( this.modifyType === this.parser.OPTTokenRedirect ) { + this.modifyType = this.parser.OPTTokenRedirectRule; const parsedBlock = this.clone(); parsedBlock.modifyType = undefined; parsedBlock.optionUnitBits &= ~this.REDIRECT_BIT; diff --git a/src/js/storage.js b/src/js/storage.js index 0d56207b2..4b15eaac0 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -1001,7 +1001,7 @@ self.addEventListener('hiddenSettingsChanged', ( ) => { if ( parser.patternHasUnicode() && parser.toASCII() === false ) { continue; } - if ( compiler.compile(parser, writer) ) { continue; } + if ( compiler.compile(writer) ) { continue; } if ( compiler.error !== undefined ) { logger.writeOne({ realm: 'message',