From 72bb89495ba0928042b8c0fe22ee57f6e955ca90 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Tue, 7 Dec 2021 11:15:14 -0500 Subject: [PATCH] Change compiled list format to a saner block id management Just use self-described readable section identifiers instead of difficult-to-manage arbitrary integers. --- src/js/background.js | 11 ++--------- src/js/cosmetic-filtering.js | 21 +++++++++------------ src/js/html-filtering.js | 4 ++-- src/js/httpheader-filtering.js | 4 ++-- src/js/reverselookup-worker.js | 29 ++++++++++++++++++----------- src/js/scriptlet-filtering.js | 4 ++-- src/js/static-filtering-io.js | 18 +++++------------- src/js/static-net-filtering.js | 30 +++++++++++++++--------------- src/js/storage.js | 4 ++++ 9 files changed, 59 insertions(+), 66 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index c1cd88374..16e3a8f1a 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -175,8 +175,8 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 41, // Increase when compiled format changes - selfieMagic: 41, // Increase when selfie format changes + compiledMagic: 42, // Increase when compiled format changes + selfieMagic: 42, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 @@ -189,13 +189,6 @@ const µBlock = { // jshint ignore:line compiledFormatChanged: false, selfieIsInvalid: false, - compiledCosmeticSection: 200, - compiledScriptletSection: 300, - compiledHTMLSection: 400, - compiledHTTPHeaderSection: 500, - compiledSentinelSection: 1000, - compiledBadSubsection: 1, - restoreBackupSettings: { lastRestoreFile: '', lastRestoreTime: 0, diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 3d46fd3c7..c5b89da95 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -38,9 +38,6 @@ const cosmeticSurveyingMissCountMax = parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) || 15; -const COMPILED_SPECIFIC_SECTION = 0; -const COMPILED_GENERIC_SECTION = 1; - /******************************************************************************/ /******************************************************************************/ @@ -398,7 +395,7 @@ FilterContainer.prototype.compileGenericHideSelector = function( return; } - writer.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION); + writer.select('COSMETIC_FILTERS:GENERIC'); const type = compiled.charCodeAt(0); let key; @@ -501,7 +498,7 @@ FilterContainer.prototype.compileGenericUnhideSelector = function( return; } - writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION); + writer.select('COSMETIC_FILTERS:SPECIFIC'); // https://github.com/chrisaljoudi/uBlock/issues/497 // All generic exception filters are stored as hostname-based filter @@ -531,7 +528,7 @@ FilterContainer.prototype.compileSpecificSelector = function( return; } - writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION); + writer.select('COSMETIC_FILTERS:SPECIFIC'); // https://github.com/chrisaljoudi/uBlock/issues/145 let unhide = exception ? 1 : 0; @@ -564,13 +561,13 @@ FilterContainer.prototype.compileTemporary = function(parser) { FilterContainer.prototype.fromCompiledContent = function(reader, options) { if ( options.skipCosmetic ) { - this.skipCompiledContent(reader, COMPILED_SPECIFIC_SECTION); - this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION); + this.skipCompiledContent(reader, 'SPECIFIC'); + this.skipCompiledContent(reader, 'GENERIC'); return; } // Specific cosmetic filter section - reader.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION); + reader.select('COSMETIC_FILTERS:SPECIFIC'); while ( reader.next() ) { this.acceptedCount += 1; const fingerprint = reader.fingerprint(); @@ -606,12 +603,12 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) { } if ( options.skipGenericCosmetic ) { - this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION); + this.skipCompiledContent(reader, 'GENERIC'); return; } // Generic cosmetic filter section - reader.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION); + reader.select('COSMETIC_FILTERS:GENERIC'); while ( reader.next() ) { this.acceptedCount += 1; const fingerprint = reader.fingerprint(); @@ -675,7 +672,7 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) { /******************************************************************************/ FilterContainer.prototype.skipCompiledContent = function(reader, sectionId) { - reader.select(µb.compiledCosmeticSection + sectionId); + reader.select(`COSMETIC_FILTERS:${sectionId}`); while ( reader.next() ) { this.acceptedCount += 1; this.discardedCount += 1; diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js index e3ff2101a..66d8460a2 100644 --- a/src/js/html-filtering.js +++ b/src/js/html-filtering.js @@ -316,7 +316,7 @@ htmlFilteringEngine.compile = function(parser, writer) { return; } - writer.select(µb.compiledHTMLSection); + writer.select('HTML_FILTERS'); // TODO: Mind negated hostnames, they are currently discarded. @@ -345,7 +345,7 @@ htmlFilteringEngine.fromCompiledContent = function(reader) { // Don't bother loading filters if stream filtering is not supported. if ( µb.canFilterResponseData === false ) { return; } - reader.select(µb.compiledHTMLSection); + reader.select('HTML_FILTERS'); while ( reader.next() ) { acceptedCount += 1; diff --git a/src/js/httpheader-filtering.js b/src/js/httpheader-filtering.js index b9126d99b..c959e565e 100644 --- a/src/js/httpheader-filtering.js +++ b/src/js/httpheader-filtering.js @@ -90,7 +90,7 @@ httpheaderFilteringEngine.freeze = function() { }; httpheaderFilteringEngine.compile = function(parser, writer) { - writer.select(µb.compiledHTTPHeaderSection); + writer.select('HTTPHEADER_FILTERS'); const { compiled, exception } = parser.result; const headerName = compiled.slice(15, -1); @@ -136,7 +136,7 @@ httpheaderFilteringEngine.compileTemporary = function(parser) { // 15 -1 httpheaderFilteringEngine.fromCompiledContent = function(reader) { - reader.select(µb.compiledHTTPHeaderSection); + reader.select('HTTPHEADER_FILTERS'); while ( reader.next() ) { acceptedCount += 1; diff --git a/src/js/reverselookup-worker.js b/src/js/reverselookup-worker.js index 0e4cf624f..eb20e3005 100644 --- a/src/js/reverselookup-worker.js +++ b/src/js/reverselookup-worker.js @@ -28,18 +28,18 @@ /******************************************************************************/ -const reBlockStart = /^#block-start-(\d+)\n/gm; +const reBlockStart = /^#block-start-([\w:]+)\n/gm; let listEntries = Object.create(null); -const extractBlocks = function(content, begId, endId) { +const extractBlocks = function(content, ...ids) { reBlockStart.lastIndex = 0; const out = []; let match = reBlockStart.exec(content); while ( match !== null ) { const beg = match.index + match[0].length; - const blockId = parseInt(match[1], 10); - if ( blockId >= begId && blockId < endId ) { - const end = content.indexOf('#block-end-' + match[1], beg); + const id = match[1]; + if ( ids.includes(id) ) { + const end = content.indexOf(`#block-end-${id}`, beg); out.push(content.slice(beg, end)); reBlockStart.lastIndex = end; } @@ -58,7 +58,7 @@ const fromNetFilter = function(details) { for ( const assetKey in listEntries ) { const entry = listEntries[assetKey]; if ( entry === undefined ) { continue; } - const content = extractBlocks(entry.content, 100, 101); + const content = extractBlocks(entry.content, 'NETWORK_FILTERS:GOOD'); let pos = 0; for (;;) { pos = content.indexOf(compiledFilter, pos); @@ -159,9 +159,15 @@ const fromCosmeticFilter = function(details) { for ( const assetKey in listEntries ) { const entry = listEntries[assetKey]; if ( entry === undefined ) { continue; } - let content = extractBlocks(entry.content, 200, 1000), - isProcedural, - found; + const content = extractBlocks( + entry.content, + 'COSMETIC_FILTERS:GENERIC', + 'COSMETIC_FILTERS:SPECIFIC', + 'SCRIPTLET_FILTERS', + 'HTML_FILTERS', + 'HTTPHEADER_FILTERS' + ); + let found; let pos = 0; while ( (pos = content.indexOf(needle, pos)) !== -1 ) { let beg = content.lastIndexOf('\n', pos); @@ -216,9 +222,9 @@ const fromCosmeticFilter = function(details) { case 8: // HTML filtering // Response header filtering - case 64: + case 64: { if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; } - isProcedural = (fargs[2] & 0b010) !== 0; + const isProcedural = (fargs[2] & 0b010) !== 0; if ( isProcedural === false && fargs[3] !== selector || isProcedural && JSON.parse(fargs[3]).raw !== selector @@ -237,6 +243,7 @@ const fromCosmeticFilter = function(details) { } found = fargs[1] + prefix + selector; break; + } // Scriptlet injection case 32: if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; } diff --git a/src/js/scriptlet-filtering.js b/src/js/scriptlet-filtering.js index 3a090dbba..f1e0bc59e 100644 --- a/src/js/scriptlet-filtering.js +++ b/src/js/scriptlet-filtering.js @@ -249,7 +249,7 @@ scriptletFilteringEngine.freeze = function() { }; scriptletFilteringEngine.compile = function(parser, writer) { - writer.select(µb.compiledScriptletSection); + writer.select('SCRIPTLET_FILTERS'); // Only exception filters are allowed to be global. const { raw, exception } = parser.result; @@ -295,7 +295,7 @@ scriptletFilteringEngine.compileTemporary = function(parser) { // 4 -1 scriptletFilteringEngine.fromCompiledContent = function(reader) { - reader.select(µb.compiledScriptletSection); + reader.select('SCRIPTLET_FILTERS'); while ( reader.next() ) { acceptedCount += 1; diff --git a/src/js/static-filtering-io.js b/src/js/static-filtering-io.js index 5240c3bc2..941c8413c 100644 --- a/src/js/static-filtering-io.js +++ b/src/js/static-filtering-io.js @@ -85,12 +85,13 @@ class CompiledListReader { this.line = ''; this.blocks = new Map(); this.properties = new Map(); - const reBlockStart = new RegExp(`^${blockStartPrefix}(\\d+)\\n`, 'gm'); + const reBlockStart = new RegExp(`^${blockStartPrefix}([\\w:]+)\\n`, 'gm'); let match = reBlockStart.exec(raw); while ( match !== null ) { - let beg = match.index + match[0].length; - let end = raw.indexOf(blockEndPrefix + match[1], beg); - this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end)); + const sectionId = match[1]; + const beg = match.index + match[0].length; + const end = raw.indexOf(blockEndPrefix + sectionId, beg); + this.blocks.set(sectionId, raw.slice(beg, end)); reBlockStart.lastIndex = end; match = reBlockStart.exec(raw); } @@ -130,15 +131,6 @@ class CompiledListReader { } } -CompiledListWriter.prototype.NETWORK_SECTION = -CompiledListReader.prototype.NETWORK_SECTION = 100; - -CompiledListWriter.blockStartPrefix = -CompiledListReader.blockStartPrefix = blockStartPrefix; - -CompiledListWriter.blockEndPrefix = -CompiledListReader.blockEndPrefix = blockEndPrefix; - /******************************************************************************/ export { diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 88da4aadd..f8d6e4f70 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -111,10 +111,8 @@ const typeNameToTypeValue = { 'inline-font': 17 << TypeBitsOffset, 'inline-script': 18 << TypeBitsOffset, 'cname': 19 << TypeBitsOffset, -// 'unused': 20 << TypeBitsOffset, -// 'unused': 21 << TypeBitsOffset, - 'webrtc': 22 << TypeBitsOffset, - 'unsupported': 23 << TypeBitsOffset, + 'webrtc': 20 << TypeBitsOffset, + 'unsupported': 21 << TypeBitsOffset, }; const otherTypeBitValue = typeNameToTypeValue.other; @@ -169,8 +167,6 @@ const typeValueToTypeName = [ const MAX_TOKEN_LENGTH = 7; -const COMPILED_BAD_SECTION = 1; - // Four upper bits of token hash are reserved for built-in predefined // token hashes, which should never end up being used when tokenizing // any arbitrary string. @@ -1779,12 +1775,10 @@ registerFilterClass(FilterCompositeAll); const FilterHostnameDict = class { static getCount(idata) { const itrie = filterData[idata+1]; - if ( itrie === 0 ) { - return filterRefs[filterData[idata+3]].length; + if ( itrie !== 0 ) { + return Array.from(destHNTrieContainer.trieIterator(itrie)).length; } - return Array.from( - destHNTrieContainer.trieIterator(filterData[idata+1]) - ).length; + return filterRefs[filterData[idata+3]].length; } static match(idata) { @@ -2640,6 +2634,12 @@ class FilterCompiler { return this; } + start(/* writer */) { + } + + finish(/* writer */) { + } + clone() { return new FilterCompiler(this.parser, this); } @@ -3105,8 +3105,8 @@ class FilterCompiler { writer.select( this.badFilter - ? writer.NETWORK_SECTION + COMPILED_BAD_SECTION - : writer.NETWORK_SECTION + ? 'NETWORK_FILTERS:BAD' + : 'NETWORK_FILTERS:GOOD' ); // Reminder: @@ -3715,7 +3715,7 @@ FilterContainer.prototype.createCompiler = function(parser) { /******************************************************************************/ FilterContainer.prototype.fromCompiled = function(reader) { - reader.select(reader.NETWORK_SECTION); + reader.select('NETWORK_FILTERS:GOOD'); while ( reader.next() ) { this.acceptedCount += 1; if ( this.goodFilters.has(reader.line) ) { @@ -3725,7 +3725,7 @@ FilterContainer.prototype.fromCompiled = function(reader) { } } - reader.select(reader.NETWORK_SECTION + COMPILED_BAD_SECTION); + reader.select('NETWORK_FILTERS:BAD'); while ( reader.next() ) { this.badFilters.add(reader.line); } diff --git a/src/js/storage.js b/src/js/storage.js index e0ba9e1f2..65d397419 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -979,6 +979,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => { parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH); + compiler.start(writer); + while ( lineIter.eot() === false ) { let line = lineIter.next(); @@ -1013,6 +1015,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => { } } + compiler.finish(writer); + // https://github.com/uBlockOrigin/uBlock-issues/issues/1365 // Embed version into compiled list itself: it is encoded in as the // first digits followed by a whitespace.