1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-15 07:22:28 +02:00

Change compiled list format to a saner block id management

Just use self-described readable section identifiers instead
of difficult-to-manage arbitrary integers.
This commit is contained in:
Raymond Hill 2021-12-07 11:15:14 -05:00
parent 8309cc548e
commit 72bb89495b
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
9 changed files with 59 additions and 66 deletions

View File

@ -175,8 +175,8 @@ const µBlock = { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 41, // Increase when compiled format changes compiledMagic: 42, // Increase when compiled format changes
selfieMagic: 41, // Increase when selfie format changes selfieMagic: 42, // Increase when selfie format changes
}, },
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
@ -189,13 +189,6 @@ const µBlock = { // jshint ignore:line
compiledFormatChanged: false, compiledFormatChanged: false,
selfieIsInvalid: false, selfieIsInvalid: false,
compiledCosmeticSection: 200,
compiledScriptletSection: 300,
compiledHTMLSection: 400,
compiledHTTPHeaderSection: 500,
compiledSentinelSection: 1000,
compiledBadSubsection: 1,
restoreBackupSettings: { restoreBackupSettings: {
lastRestoreFile: '', lastRestoreFile: '',
lastRestoreTime: 0, lastRestoreTime: 0,

View File

@ -38,9 +38,6 @@ const cosmeticSurveyingMissCountMax =
parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) || parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) ||
15; 15;
const COMPILED_SPECIFIC_SECTION = 0;
const COMPILED_GENERIC_SECTION = 1;
/******************************************************************************/ /******************************************************************************/
/******************************************************************************/ /******************************************************************************/
@ -398,7 +395,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(
return; return;
} }
writer.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION); writer.select('COSMETIC_FILTERS:GENERIC');
const type = compiled.charCodeAt(0); const type = compiled.charCodeAt(0);
let key; let key;
@ -501,7 +498,7 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(
return; return;
} }
writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION); writer.select('COSMETIC_FILTERS:SPECIFIC');
// https://github.com/chrisaljoudi/uBlock/issues/497 // https://github.com/chrisaljoudi/uBlock/issues/497
// All generic exception filters are stored as hostname-based filter // All generic exception filters are stored as hostname-based filter
@ -531,7 +528,7 @@ FilterContainer.prototype.compileSpecificSelector = function(
return; return;
} }
writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION); writer.select('COSMETIC_FILTERS:SPECIFIC');
// https://github.com/chrisaljoudi/uBlock/issues/145 // https://github.com/chrisaljoudi/uBlock/issues/145
let unhide = exception ? 1 : 0; let unhide = exception ? 1 : 0;
@ -564,13 +561,13 @@ FilterContainer.prototype.compileTemporary = function(parser) {
FilterContainer.prototype.fromCompiledContent = function(reader, options) { FilterContainer.prototype.fromCompiledContent = function(reader, options) {
if ( options.skipCosmetic ) { if ( options.skipCosmetic ) {
this.skipCompiledContent(reader, COMPILED_SPECIFIC_SECTION); this.skipCompiledContent(reader, 'SPECIFIC');
this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION); this.skipCompiledContent(reader, 'GENERIC');
return; return;
} }
// Specific cosmetic filter section // Specific cosmetic filter section
reader.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION); reader.select('COSMETIC_FILTERS:SPECIFIC');
while ( reader.next() ) { while ( reader.next() ) {
this.acceptedCount += 1; this.acceptedCount += 1;
const fingerprint = reader.fingerprint(); const fingerprint = reader.fingerprint();
@ -606,12 +603,12 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
} }
if ( options.skipGenericCosmetic ) { if ( options.skipGenericCosmetic ) {
this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION); this.skipCompiledContent(reader, 'GENERIC');
return; return;
} }
// Generic cosmetic filter section // Generic cosmetic filter section
reader.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION); reader.select('COSMETIC_FILTERS:GENERIC');
while ( reader.next() ) { while ( reader.next() ) {
this.acceptedCount += 1; this.acceptedCount += 1;
const fingerprint = reader.fingerprint(); const fingerprint = reader.fingerprint();
@ -675,7 +672,7 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.skipCompiledContent = function(reader, sectionId) { FilterContainer.prototype.skipCompiledContent = function(reader, sectionId) {
reader.select(µb.compiledCosmeticSection + sectionId); reader.select(`COSMETIC_FILTERS:${sectionId}`);
while ( reader.next() ) { while ( reader.next() ) {
this.acceptedCount += 1; this.acceptedCount += 1;
this.discardedCount += 1; this.discardedCount += 1;

View File

@ -316,7 +316,7 @@ htmlFilteringEngine.compile = function(parser, writer) {
return; return;
} }
writer.select(µb.compiledHTMLSection); writer.select('HTML_FILTERS');
// TODO: Mind negated hostnames, they are currently discarded. // TODO: Mind negated hostnames, they are currently discarded.
@ -345,7 +345,7 @@ htmlFilteringEngine.fromCompiledContent = function(reader) {
// Don't bother loading filters if stream filtering is not supported. // Don't bother loading filters if stream filtering is not supported.
if ( µb.canFilterResponseData === false ) { return; } if ( µb.canFilterResponseData === false ) { return; }
reader.select(µb.compiledHTMLSection); reader.select('HTML_FILTERS');
while ( reader.next() ) { while ( reader.next() ) {
acceptedCount += 1; acceptedCount += 1;

View File

@ -90,7 +90,7 @@ httpheaderFilteringEngine.freeze = function() {
}; };
httpheaderFilteringEngine.compile = function(parser, writer) { httpheaderFilteringEngine.compile = function(parser, writer) {
writer.select(µb.compiledHTTPHeaderSection); writer.select('HTTPHEADER_FILTERS');
const { compiled, exception } = parser.result; const { compiled, exception } = parser.result;
const headerName = compiled.slice(15, -1); const headerName = compiled.slice(15, -1);
@ -136,7 +136,7 @@ httpheaderFilteringEngine.compileTemporary = function(parser) {
// 15 -1 // 15 -1
httpheaderFilteringEngine.fromCompiledContent = function(reader) { httpheaderFilteringEngine.fromCompiledContent = function(reader) {
reader.select(µb.compiledHTTPHeaderSection); reader.select('HTTPHEADER_FILTERS');
while ( reader.next() ) { while ( reader.next() ) {
acceptedCount += 1; acceptedCount += 1;

View File

@ -28,18 +28,18 @@
/******************************************************************************/ /******************************************************************************/
const reBlockStart = /^#block-start-(\d+)\n/gm; const reBlockStart = /^#block-start-([\w:]+)\n/gm;
let listEntries = Object.create(null); let listEntries = Object.create(null);
const extractBlocks = function(content, begId, endId) { const extractBlocks = function(content, ...ids) {
reBlockStart.lastIndex = 0; reBlockStart.lastIndex = 0;
const out = []; const out = [];
let match = reBlockStart.exec(content); let match = reBlockStart.exec(content);
while ( match !== null ) { while ( match !== null ) {
const beg = match.index + match[0].length; const beg = match.index + match[0].length;
const blockId = parseInt(match[1], 10); const id = match[1];
if ( blockId >= begId && blockId < endId ) { if ( ids.includes(id) ) {
const end = content.indexOf('#block-end-' + match[1], beg); const end = content.indexOf(`#block-end-${id}`, beg);
out.push(content.slice(beg, end)); out.push(content.slice(beg, end));
reBlockStart.lastIndex = end; reBlockStart.lastIndex = end;
} }
@ -58,7 +58,7 @@ const fromNetFilter = function(details) {
for ( const assetKey in listEntries ) { for ( const assetKey in listEntries ) {
const entry = listEntries[assetKey]; const entry = listEntries[assetKey];
if ( entry === undefined ) { continue; } if ( entry === undefined ) { continue; }
const content = extractBlocks(entry.content, 100, 101); const content = extractBlocks(entry.content, 'NETWORK_FILTERS:GOOD');
let pos = 0; let pos = 0;
for (;;) { for (;;) {
pos = content.indexOf(compiledFilter, pos); pos = content.indexOf(compiledFilter, pos);
@ -159,9 +159,15 @@ const fromCosmeticFilter = function(details) {
for ( const assetKey in listEntries ) { for ( const assetKey in listEntries ) {
const entry = listEntries[assetKey]; const entry = listEntries[assetKey];
if ( entry === undefined ) { continue; } if ( entry === undefined ) { continue; }
let content = extractBlocks(entry.content, 200, 1000), const content = extractBlocks(
isProcedural, entry.content,
found; 'COSMETIC_FILTERS:GENERIC',
'COSMETIC_FILTERS:SPECIFIC',
'SCRIPTLET_FILTERS',
'HTML_FILTERS',
'HTTPHEADER_FILTERS'
);
let found;
let pos = 0; let pos = 0;
while ( (pos = content.indexOf(needle, pos)) !== -1 ) { while ( (pos = content.indexOf(needle, pos)) !== -1 ) {
let beg = content.lastIndexOf('\n', pos); let beg = content.lastIndexOf('\n', pos);
@ -216,9 +222,9 @@ const fromCosmeticFilter = function(details) {
case 8: case 8:
// HTML filtering // HTML filtering
// Response header filtering // Response header filtering
case 64: case 64: {
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; } if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
isProcedural = (fargs[2] & 0b010) !== 0; const isProcedural = (fargs[2] & 0b010) !== 0;
if ( if (
isProcedural === false && fargs[3] !== selector || isProcedural === false && fargs[3] !== selector ||
isProcedural && JSON.parse(fargs[3]).raw !== selector isProcedural && JSON.parse(fargs[3]).raw !== selector
@ -237,6 +243,7 @@ const fromCosmeticFilter = function(details) {
} }
found = fargs[1] + prefix + selector; found = fargs[1] + prefix + selector;
break; break;
}
// Scriptlet injection // Scriptlet injection
case 32: case 32:
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; } if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }

View File

@ -249,7 +249,7 @@ scriptletFilteringEngine.freeze = function() {
}; };
scriptletFilteringEngine.compile = function(parser, writer) { scriptletFilteringEngine.compile = function(parser, writer) {
writer.select(µb.compiledScriptletSection); writer.select('SCRIPTLET_FILTERS');
// Only exception filters are allowed to be global. // Only exception filters are allowed to be global.
const { raw, exception } = parser.result; const { raw, exception } = parser.result;
@ -295,7 +295,7 @@ scriptletFilteringEngine.compileTemporary = function(parser) {
// 4 -1 // 4 -1
scriptletFilteringEngine.fromCompiledContent = function(reader) { scriptletFilteringEngine.fromCompiledContent = function(reader) {
reader.select(µb.compiledScriptletSection); reader.select('SCRIPTLET_FILTERS');
while ( reader.next() ) { while ( reader.next() ) {
acceptedCount += 1; acceptedCount += 1;

View File

@ -85,12 +85,13 @@ class CompiledListReader {
this.line = ''; this.line = '';
this.blocks = new Map(); this.blocks = new Map();
this.properties = new Map(); this.properties = new Map();
const reBlockStart = new RegExp(`^${blockStartPrefix}(\\d+)\\n`, 'gm'); const reBlockStart = new RegExp(`^${blockStartPrefix}([\\w:]+)\\n`, 'gm');
let match = reBlockStart.exec(raw); let match = reBlockStart.exec(raw);
while ( match !== null ) { while ( match !== null ) {
let beg = match.index + match[0].length; const sectionId = match[1];
let end = raw.indexOf(blockEndPrefix + match[1], beg); const beg = match.index + match[0].length;
this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end)); const end = raw.indexOf(blockEndPrefix + sectionId, beg);
this.blocks.set(sectionId, raw.slice(beg, end));
reBlockStart.lastIndex = end; reBlockStart.lastIndex = end;
match = reBlockStart.exec(raw); match = reBlockStart.exec(raw);
} }
@ -130,15 +131,6 @@ class CompiledListReader {
} }
} }
CompiledListWriter.prototype.NETWORK_SECTION =
CompiledListReader.prototype.NETWORK_SECTION = 100;
CompiledListWriter.blockStartPrefix =
CompiledListReader.blockStartPrefix = blockStartPrefix;
CompiledListWriter.blockEndPrefix =
CompiledListReader.blockEndPrefix = blockEndPrefix;
/******************************************************************************/ /******************************************************************************/
export { export {

View File

@ -111,10 +111,8 @@ const typeNameToTypeValue = {
'inline-font': 17 << TypeBitsOffset, 'inline-font': 17 << TypeBitsOffset,
'inline-script': 18 << TypeBitsOffset, 'inline-script': 18 << TypeBitsOffset,
'cname': 19 << TypeBitsOffset, 'cname': 19 << TypeBitsOffset,
// 'unused': 20 << TypeBitsOffset, 'webrtc': 20 << TypeBitsOffset,
// 'unused': 21 << TypeBitsOffset, 'unsupported': 21 << TypeBitsOffset,
'webrtc': 22 << TypeBitsOffset,
'unsupported': 23 << TypeBitsOffset,
}; };
const otherTypeBitValue = typeNameToTypeValue.other; const otherTypeBitValue = typeNameToTypeValue.other;
@ -169,8 +167,6 @@ const typeValueToTypeName = [
const MAX_TOKEN_LENGTH = 7; const MAX_TOKEN_LENGTH = 7;
const COMPILED_BAD_SECTION = 1;
// Four upper bits of token hash are reserved for built-in predefined // Four upper bits of token hash are reserved for built-in predefined
// token hashes, which should never end up being used when tokenizing // token hashes, which should never end up being used when tokenizing
// any arbitrary string. // any arbitrary string.
@ -1779,12 +1775,10 @@ registerFilterClass(FilterCompositeAll);
const FilterHostnameDict = class { const FilterHostnameDict = class {
static getCount(idata) { static getCount(idata) {
const itrie = filterData[idata+1]; const itrie = filterData[idata+1];
if ( itrie === 0 ) { if ( itrie !== 0 ) {
return filterRefs[filterData[idata+3]].length; return Array.from(destHNTrieContainer.trieIterator(itrie)).length;
} }
return Array.from( return filterRefs[filterData[idata+3]].length;
destHNTrieContainer.trieIterator(filterData[idata+1])
).length;
} }
static match(idata) { static match(idata) {
@ -2640,6 +2634,12 @@ class FilterCompiler {
return this; return this;
} }
start(/* writer */) {
}
finish(/* writer */) {
}
clone() { clone() {
return new FilterCompiler(this.parser, this); return new FilterCompiler(this.parser, this);
} }
@ -3105,8 +3105,8 @@ class FilterCompiler {
writer.select( writer.select(
this.badFilter this.badFilter
? writer.NETWORK_SECTION + COMPILED_BAD_SECTION ? 'NETWORK_FILTERS:BAD'
: writer.NETWORK_SECTION : 'NETWORK_FILTERS:GOOD'
); );
// Reminder: // Reminder:
@ -3715,7 +3715,7 @@ FilterContainer.prototype.createCompiler = function(parser) {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.fromCompiled = function(reader) { FilterContainer.prototype.fromCompiled = function(reader) {
reader.select(reader.NETWORK_SECTION); reader.select('NETWORK_FILTERS:GOOD');
while ( reader.next() ) { while ( reader.next() ) {
this.acceptedCount += 1; this.acceptedCount += 1;
if ( this.goodFilters.has(reader.line) ) { if ( this.goodFilters.has(reader.line) ) {
@ -3725,7 +3725,7 @@ FilterContainer.prototype.fromCompiled = function(reader) {
} }
} }
reader.select(reader.NETWORK_SECTION + COMPILED_BAD_SECTION); reader.select('NETWORK_FILTERS:BAD');
while ( reader.next() ) { while ( reader.next() ) {
this.badFilters.add(reader.line); this.badFilters.add(reader.line);
} }

View File

@ -979,6 +979,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH); parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
compiler.start(writer);
while ( lineIter.eot() === false ) { while ( lineIter.eot() === false ) {
let line = lineIter.next(); let line = lineIter.next();
@ -1013,6 +1015,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
} }
} }
compiler.finish(writer);
// https://github.com/uBlockOrigin/uBlock-issues/issues/1365 // https://github.com/uBlockOrigin/uBlock-issues/issues/1365
// Embed version into compiled list itself: it is encoded in as the // Embed version into compiled list itself: it is encoded in as the
// first digits followed by a whitespace. // first digits followed by a whitespace.