From cabb0d36b6792da392f6381b5dabe3842733310e Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Tue, 23 Oct 2018 14:01:08 -0300 Subject: [PATCH] fix https://github.com/gorhill/uBlock/issues/3371 --- src/js/background.js | 4 +- src/js/static-net-filtering.js | 336 ++++++++++++--------------------- src/js/storage.js | 32 ++-- src/js/utils.js | 97 +++++----- 4 files changed, 194 insertions(+), 275 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index d49910cfa..dac1228cf 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -140,8 +140,8 @@ var µBlock = (function() { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 5, // Increase when compiled format changes - selfieMagic: 4 // Increase when selfie format changes + compiledMagic: 6, // Increase when compiled format changes + selfieMagic: 6 // Increase when selfie format changes }, restoreBackupSettings: { diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 5849efa04..71ddb8cc6 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -33,16 +33,15 @@ var µb = µBlock; // fedcba9876543210 -// | | | ||| -// | | | ||| -// | | | ||| -// | | | ||| -// | | | ||+---- bit 0: [BlockAction | AllowAction] -// | | | |+----- bit 1: `important` -// | | | +------ bit 2- 3: party [0 - 3] -// | | +-------- bit 4- 8: type [0 - 31] -// | +------------- bit 9-14: unused -// +------------------- bit 15: bad filter +// | | ||| +// | | ||| +// | | ||| +// | | ||| +// | | ||+---- bit 0: [BlockAction | AllowAction] +// | | |+----- bit 1: `important` +// | | +------ bit 2- 3: party [0 - 3] +// | +-------- bit 4- 8: type [0 - 31] +// +------------- bit 9-15: unused var BlockAction = 0 << 0; var AllowAction = 1 << 0; @@ -50,7 +49,6 @@ var Important = 1 << 1; var AnyParty = 0 << 2; var FirstParty = 1 << 2; var ThirdParty = 2 << 2; -var BadFilter = 1 << 15; var AnyType = 0 << 4; var typeNameToTypeValue = { @@ -126,50 +124,6 @@ var pageHostnameRegister = '', //var filterRegister = null; //var categoryRegister = ''; -/******************************************************************************/ - -var histogram = function() {}; -/* -histogram = function(label, categories) { - var h = [], - categoryBucket; - for ( var k in categories ) { - // No need for hasOwnProperty() here: there is no prototype chain. - categoryBucket = categories[k]; - for ( var kk in categoryBucket ) { - // No need for hasOwnProperty() here: there is no prototype chain. - filterBucket = categoryBucket[kk]; - h.push({ - k: k.charCodeAt(0).toString(2) + ' ' + kk, - n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1 - }); - } - } - - console.log('Histogram %s', label); - - var total = h.length; - h.sort(function(a, b) { return b.n - a.n; }); - - // Find indices of entries of interest - var target = 2; - for ( var i = 0; i < total; i++ ) { - if ( h[i].n === target ) { - console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k); - target -= 1; - } - } - - h = h.slice(0, 50); - - h.forEach(function(v) { - console.log('\tkey=%s count=%d', v.k, v.n); - }); - console.log('\tTotal buckets count: %d', total); -}; -*/ -/******************************************************************************/ - // Local helpers // Be sure to not confuse 'example.com' with 'anotherexample.com' @@ -219,16 +173,20 @@ rawToRegexStr.escape4 = /\*/g; rawToRegexStr.reTextHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?'; rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?'; -var filterFingerprinter = µb.CompiledLineWriter.fingerprint; +const filterDataSerialize = µb.CompiledLineIO.serialize; var toLogDataInternal = function(categoryBits, tokenHash, filter) { if ( filter === null ) { return undefined; } - var logData = filter.logData(); - logData.compiled = filterFingerprinter([ categoryBits, tokenHash, logData.compiled ]); + let logData = filter.logData(); + logData.compiled = filterDataSerialize([ + categoryBits, + tokenHash, + logData.compiled + ]); if ( categoryBits & 0x001 ) { logData.raw = '@@' + logData.raw; } - var opts = []; + let opts = []; if ( categoryBits & 0x002 ) { opts.push('important'); } @@ -237,7 +195,7 @@ var toLogDataInternal = function(categoryBits, tokenHash, filter) { } else if ( categoryBits & 0x004 ) { opts.push('first-party'); } - var type = categoryBits & 0x1F0; + let type = categoryBits & 0x1F0; if ( type !== 0 && type !== typeNameToTypeValue.data ) { opts.push(typeValueToTypeName[type >>> 4]); } @@ -300,16 +258,12 @@ var registerFilterClass = function(ctor) { var fid = filterClassIdGenerator++; ctor.fid = ctor.prototype.fid = fid; filterClasses[fid] = ctor; - //console.log(ctor.name, fid); }; var filterFromCompiledData = function(args) { - //filterClassHistogram.set(fid, (filterClassHistogram.get(fid) || 0) + 1); return filterClasses[args[0]].load(args); }; -//var filterClassHistogram = new Map(); - /******************************************************************************/ var FilterTrue = function() { @@ -1424,7 +1378,7 @@ FilterParser.prototype.toNormalizedType = { FilterParser.prototype.reset = function() { this.action = BlockAction; this.anchor = 0; - this.badFilter = 0; + this.badFilter = false; this.dataType = undefined; this.dataStr = undefined; this.elemHiding = false; @@ -1594,7 +1548,7 @@ FilterParser.prototype.parseOptions = function(s) { } // https://github.com/uBlockOrigin/uAssets/issues/192 if ( opt === 'badfilter' ) { - this.badFilter = BadFilter; + this.badFilter = true; continue; } // Unrecognized filter option: ignore whole filter. @@ -2000,17 +1954,12 @@ FilterContainer.prototype.reset = function() { this.allowFilterCount = 0; this.blockFilterCount = 0; this.discardedCount = 0; + this.goodFilters = new Set(); this.badFilters = new Set(); - this.duplicateBuster = new Set(); this.categories = new Map(); this.dataFilters = new Map(); this.filterParser.reset(); - // Reuse filter instances whenever possible at load time. - this.fclassLast = null; - this.fdataLast = null; - this.filterLast = null; - // Runtime registers this.cbRegister = undefined; this.thRegister = undefined; @@ -2020,18 +1969,82 @@ FilterContainer.prototype.reset = function() { /******************************************************************************/ FilterContainer.prototype.freeze = function() { - histogram('allFilters', this.categories); - this.removeBadFilters(); - this.duplicateBuster = new Set(); + let filterPairId = FilterPair.fid, + filterBucketId = FilterBucket.fid, + filterDataHolderId = FilterDataHolder.fid, + redirectTypeValue = typeNameToTypeValue.redirect, + unserialize = µb.CompiledLineIO.unserialize; + + for ( let line of this.goodFilters ) { + if ( this.badFilters.has(line) ) { continue; } + + let args = unserialize(line); + let bits = args[0]; + + // Special cases: delegate to more specialized engines. + // Redirect engine. + if ( (bits & 0x1F0) === redirectTypeValue ) { + µb.redirectEngine.fromCompiledRule(args[1]); + continue; + } + + // Plain static filters. + let tokenHash = args[1]; + let fdata = args[2]; + + // Special treatment: data-holding filters are stored separately + // because they require special matching algorithm (unlike other + // filters, ALL hits must be reported). + if ( fdata[0] === filterDataHolderId ) { + let entry = new FilterDataHolderEntry(bits, tokenHash, fdata); + let bucket = this.dataFilters.get(tokenHash); + if ( bucket !== undefined ) { + entry.next = bucket; + } + this.dataFilters.set(tokenHash, entry); + continue; + } + + let bucket = this.categories.get(bits); + if ( bucket === undefined ) { + bucket = new Map(); + this.categories.set(bits, bucket); + } + let entry = bucket.get(tokenHash); + + if ( tokenHash === this.dotTokenHash ) { + if ( entry === undefined ) { + entry = new FilterHostnameDict(); + bucket.set(this.dotTokenHash, entry); + } + entry.add(fdata); + continue; + } + + if ( entry === undefined ) { + bucket.set(tokenHash, filterFromCompiledData(fdata)); + continue; + } + if ( entry.fid === filterBucketId ) { + entry.add(fdata); + continue; + } + if ( entry.fid === filterPairId ) { + bucket.set( + tokenHash, + entry.upgrade(filterFromCompiledData(fdata)) + ); + continue; + } + bucket.set( + tokenHash, + new FilterPair(entry, filterFromCompiledData(fdata)) + ); + } + this.filterParser.reset(); - this.fclassLast = null; - this.fdataLast = null; - this.filterLast = null; + this.goodFilters = new Set(); this.frozen = true; - //console.log(JSON.stringify(Array.from(filterClassHistogram))); - //this.tokenHistogram = new Map(Array.from(this.tokenHistogram).sort(function(a, b) { - // return a[0].localeCompare(b[0]) || (b[1] - a[1]); - //})); }; /******************************************************************************/ @@ -2125,9 +2138,6 @@ FilterContainer.prototype.compile = function(raw, writer) { return false; } - // 0 = network filters - writer.select(0); - // Pure hostnames, use more efficient dictionary lookup // https://github.com/chrisaljoudi/uBlock/issues/665 // Create a dict keyed on request type etc. @@ -2207,10 +2217,16 @@ FilterContainer.prototype.compileToAtomicFilter = function( fdata, writer ) { - let descBits = parsed.action | - parsed.important | - parsed.party | - parsed.badFilter; + + // 0 = network filters + // 1 = network filters: bad filters + if ( parsed.badFilter ) { + writer.select(1); + } else { + writer.select(0); + } + + let descBits = parsed.action | parsed.important | parsed.party; let type = parsed.types; // Typeless @@ -2231,7 +2247,7 @@ FilterContainer.prototype.compileToAtomicFilter = function( // Only static filter with an explicit type can be redirected. If we reach // this point, it's because there is one or more explicit type. - if ( parsed.badFilter === 0 && parsed.redirect ) { + if ( parsed.badFilter === false && parsed.redirect ) { let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw); if ( Array.isArray(redirects) ) { for ( let redirect of redirects ) { @@ -2244,138 +2260,30 @@ FilterContainer.prototype.compileToAtomicFilter = function( /******************************************************************************/ FilterContainer.prototype.fromCompiledContent = function(reader) { - var badFilterBit = BadFilter, - filterPairId = FilterPair.fid, - filterBucketId = FilterBucket.fid, - filterDataHolderId = FilterDataHolder.fid, - redirectTypeValue = typeNameToTypeValue.redirect, - args, bits, bucket, entry, - tokenHash, fdata, fingerprint; - // 0 = network filters reader.select(0); - - while ( reader.next() === true ) { - args = reader.args(); - bits = args[0]; - - if ( (bits & badFilterBit) !== 0 ) { - this.badFilters.add(args); - continue; - } - - // Special cases: delegate to more specialized engines. - // Redirect engine. - if ( (bits & 0x1F0) === redirectTypeValue ) { - µb.redirectEngine.fromCompiledRule(args[1]); - continue; - } - - this.acceptedCount += 1; - - // Plain static filters. - fingerprint = reader.fingerprint(); - tokenHash = args[1]; - fdata = args[2]; - - // Special treatment: data-holding filters are stored separately - // because they require special matching algorithm (unlike other - // filters, ALL hits must be reported). - if ( fdata[0] === filterDataHolderId ) { - if ( this.duplicateBuster.has(fingerprint) ) { - this.discardedCount += 1; - continue; - } - this.duplicateBuster.add(fingerprint); - entry = new FilterDataHolderEntry(bits, tokenHash, fdata); - bucket = this.dataFilters.get(tokenHash); - if ( bucket !== undefined ) { - entry.next = bucket; - } - this.dataFilters.set(tokenHash, entry); - continue; - } - - bucket = this.categories.get(bits); - if ( bucket === undefined ) { - bucket = new Map(); - this.categories.set(bits, bucket); - } - entry = bucket.get(tokenHash); - - if ( tokenHash === this.dotTokenHash ) { - if ( entry === undefined ) { - entry = new FilterHostnameDict(); - bucket.set(this.dotTokenHash, entry); - } - if ( entry.add(fdata) === false ) { - this.discardedCount += 1; - } - continue; - } - - if ( this.duplicateBuster.has(fingerprint) ) { + while ( reader.next() ) { + if ( this.goodFilters.has(reader.line) ) { this.discardedCount += 1; continue; } - this.duplicateBuster.add(fingerprint); - - if ( entry === undefined ) { - bucket.set(tokenHash, filterFromCompiledData(fdata)); - continue; - } - if ( entry.fid === filterBucketId ) { - entry.add(fdata); - continue; - } - if ( entry.fid === filterPairId ) { - bucket.set( - tokenHash, - entry.upgrade(filterFromCompiledData(fdata)) - ); - continue; - } - bucket.set( - tokenHash, - new FilterPair(entry, filterFromCompiledData(fdata)) - ); + this.goodFilters.add(reader.line); + this.acceptedCount += 1; } -}; -/******************************************************************************/ - -FilterContainer.prototype.removeBadFilters = function() { - var filterPairId = FilterPair.fid, - filterBucketId = FilterBucket.fid, - filterHostnameDictId = FilterHostnameDict.fid, - bits, tokenHash, fdata, bucket, entry; - for ( var args of this.badFilters ) { - bits = args[0] & ~BadFilter; - bucket = this.categories.get(bits); - if ( bucket === undefined ) { continue; } - tokenHash = args[1]; - entry = bucket.get(tokenHash); - if ( entry === undefined ) { continue; } - fdata = args[2]; - if ( entry.fid === filterPairId || entry.fid === filterBucketId ) { - entry.remove(fdata); - entry = entry.downgrade(); - if ( entry !== undefined ) { - bucket.set(tokenHash, entry); - } else { - bucket.delete(tokenHash); - } - } else if ( entry.fid === filterHostnameDictId ) { - entry.remove(fdata); - if ( entry.size === 0 ) { - bucket.delete(tokenHash); - } - } else if ( arrayStrictEquals(entry.compile(), fdata) ) { - bucket.delete(tokenHash); - } - if ( bucket.size === 0 ) { - this.categories.delete(bits); + // 1 = network filters: bad filters + // Since we are going to keep bad filter fingerprints around, we ensure + // they are "detached" from the parent string from which they are sliced. + // We keep bad filter fingerprints around to use them when user + // incrementally add filters (through "Block element" for example). + reader.select(1); + while ( reader.next() ) { + if ( this.badFilters.has(reader.line) ) { + this.discardedCount += 1; + continue; } + this.badFilters.add(µb.orphanizeString(reader.line)); + this.acceptedCount += 1; } }; diff --git a/src/js/storage.js b/src/js/storage.js index 94ba2a248..e977d4f6d 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -743,27 +743,26 @@ // Lower minimum update period to 1 day. µBlock.extractFilterListMetadata = function(assetKey, raw) { - var listEntry = this.availableFilterLists[assetKey]; + let listEntry = this.availableFilterLists[assetKey]; if ( listEntry === undefined ) { return; } // Metadata expected to be found at the top of content. - var head = raw.slice(0, 1024), - matches, v; + let head = raw.slice(0, 1024); // https://github.com/gorhill/uBlock/issues/313 // Always try to fetch the name if this is an external filter list. if ( listEntry.title === '' || listEntry.group === 'custom' ) { - matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Title[\t ]*:([^\n]+)/i); + let matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Title[\t ]*:([^\n]+)/i); if ( matches !== null ) { // https://bugs.chromium.org/p/v8/issues/detail?id=2869 - // JSON.stringify/JSON.parse is to work around String.slice() - // potentially causing the whole raw filter list to be held in - // memory just because we cut out the title as a substring. - listEntry.title = JSON.parse(JSON.stringify(matches[1].trim())); + // orphanizeString is to work around String.slice() + // potentially causing the whole raw filter list to be held in + // memory just because we cut out the title as a substring. + listEntry.title = this.orphanizeString(matches[1].trim()); } } // Extract update frequency information - matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Expires[\t ]*:[\t ]*(\d+)[\t ]*(h)?/i); + let matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Expires[\t ]*:[\t ]*(\d+)[\t ]*(h)?/i); if ( matches !== null ) { - v = Math.max(parseInt(matches[1], 10), 1); + let v = Math.max(parseInt(matches[1], 10), 1); if ( matches[2] !== undefined ) { v = Math.ceil(v / 24); } @@ -787,29 +786,28 @@ /******************************************************************************/ µBlock.compileFilters = function(rawText) { - var writer = new this.CompiledLineWriter(); + let writer = new this.CompiledLineIO.Writer(); // Useful references: // https://adblockplus.org/en/filter-cheatsheet // https://adblockplus.org/en/filters - var staticNetFilteringEngine = this.staticNetFilteringEngine, + let staticNetFilteringEngine = this.staticNetFilteringEngine, staticExtFilteringEngine = this.staticExtFilteringEngine, reIsWhitespaceChar = /\s/, reMaybeLocalIp = /^[\d:f]/, reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/, reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/, - line, c, pos, lineIter = new this.LineIterator(this.processDirectives(rawText)); while ( lineIter.eot() === false ) { // rhill 2014-04-18: The trim is important here, as without it there // could be a lingering `\r` which would cause problems in the // following parsing code. - line = lineIter.next().trim(); + let line = lineIter.next().trim(); if ( line.length === 0 ) { continue; } // Strip comments - c = line.charAt(0); + let c = line.charAt(0); if ( c === '!' || c === '[' ) { continue; } // Parse or skip cosmetic filters @@ -828,7 +826,7 @@ // Don't remove: // ...#blah blah blah // because some ABP filters uses the `#` character (URL fragment) - pos = line.indexOf('#'); + let pos = line.indexOf('#'); if ( pos !== -1 && reIsWhitespaceChar.test(line.charAt(pos - 1)) ) { line = line.slice(0, pos).trim(); } @@ -860,7 +858,7 @@ µBlock.applyCompiledFilters = function(rawText, firstparty) { if ( rawText === '' ) { return; } - var reader = new this.CompiledLineReader(rawText); + let reader = new this.CompiledLineIO.Reader(rawText); this.staticNetFilteringEngine.fromCompiledContent(reader); this.staticExtFilteringEngine.fromCompiledContent(reader, { skipGenericCosmetic: this.userSettings.ignoreGenericCosmeticFilters, diff --git a/src/js/utils.js b/src/js/utils.js index dbfdfab23..7b889238f 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -224,18 +224,47 @@ /******************************************************************************/ -µBlock.CompiledLineWriter = function() { - this.blockId = undefined; - this.block = undefined; - this.blocks = new Map(); - this.stringifier = JSON.stringify; +µBlock.CompiledLineIO = { + serialize: JSON.stringify, + unserialize: JSON.parse, + blockStartPrefix: '#block-start-', // ensure no special regex characters + blockEndPrefix: '#block-end-', // ensure no special regex characters + + Writer: function() { + this.io = µBlock.CompiledLineIO; + this.blockId = undefined; + this.block = undefined; + this.blocks = new Map(); + this.stringifier = this.io.serialize; + }, + + Reader: function(raw, blockId) { + this.io = µBlock.CompiledLineIO; + this.block = ''; + this.len = 0; + this.offset = 0; + this.line = ''; + this.parser = this.io.unserialize; + this.blocks = new Map(); + let reBlockStart = new RegExp( + '^' + this.io.blockStartPrefix + '(\\d+)\\n', + 'gm' + ); + let match = reBlockStart.exec(raw); + while ( match !== null ) { + let beg = match.index + match[0].length; + let end = raw.indexOf(this.io.blockEndPrefix + match[1], beg); + this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end)); + reBlockStart.lastIndex = end; + match = reBlockStart.exec(raw); + } + if ( blockId !== undefined ) { + this.select(blockId); + } + } }; -µBlock.CompiledLineWriter.fingerprint = function(args) { - return JSON.stringify(args); -}; - -µBlock.CompiledLineWriter.prototype = { +µBlock.CompiledLineIO.Writer.prototype = { push: function(args) { this.block[this.block.length] = this.stringifier(args); }, @@ -248,50 +277,26 @@ } }, toString: function() { - var result = []; - for ( var entry of this.blocks ) { - if ( entry[1].length === 0 ) { continue; } + let result = []; + for ( let [ id, lines ] of this.blocks ) { + if ( lines.length === 0 ) { continue; } result.push( - '#block-start-' + entry[0], - entry[1].join('\n'), - '#block-end-' + entry[0] + this.io.blockStartPrefix + id, + lines.join('\n'), + this.io.blockEndPrefix + id ); } return result.join('\n'); } }; -/******************************************************************************/ - -µBlock.CompiledLineReader = function(raw, blockId) { - this.block = ''; - this.len = 0; - this.offset = 0; - this.line = ''; - this.parser = JSON.parse; - this.blocks = new Map(); - var reBlockStart = /^#block-start-(\d+)\n/gm, - match = reBlockStart.exec(raw), - beg, end; - while ( match !== null ) { - beg = match.index + match[0].length; - end = raw.indexOf('#block-end-' + match[1], beg); - this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end)); - reBlockStart.lastIndex = end; - match = reBlockStart.exec(raw); - } - if ( blockId !== undefined ) { - this.select(blockId); - } -}; - -µBlock.CompiledLineReader.prototype = { +µBlock.CompiledLineIO.Reader.prototype = { next: function() { if ( this.offset === this.len ) { this.line = ''; return false; } - var pos = this.block.indexOf('\n', this.offset); + let pos = this.block.indexOf('\n', this.offset); if ( pos !== -1 ) { this.line = this.block.slice(this.offset, pos); this.offset = pos + 1; @@ -466,3 +471,11 @@ return decomposed; }; })(); + +/******************************************************************************/ + +// TODO: evaluate using TextEncoder/TextDecoder + +µBlock.orphanizeString = function(s) { + return JSON.parse(JSON.stringify(s)); +}; \ No newline at end of file