From a4e20ae3ad869a18f66eeb940273a625d342578e Mon Sep 17 00:00:00 2001 From: gorhill Date: Sat, 11 Mar 2017 13:55:47 -0500 Subject: [PATCH] new filter option: "badfilter" (see https://github.com/uBlockOrigin/uAssets/issues/192) --- src/js/cosmetic-filtering.js | 15 +++--- src/js/static-net-filtering.js | 98 +++++++++++++++++++++++++++++----- src/js/utils.js | 62 +++++++++++++-------- 3 files changed, 134 insertions(+), 41 deletions(-) diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 8782fb9ff..32b13ef95 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -1148,10 +1148,11 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter, skipGenericCo fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { - if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x63 /* 'c' */ ) { + line = lineIter.next(); + if ( line.charCodeAt(0) !== 0x63 /* 'c' */ ) { + lineIter.rewind(); return; } - line = lineIter.next(); this.acceptedCount += 1; if ( this.duplicateBuster.has(line) ) { @@ -1274,10 +1275,11 @@ FilterContainer.prototype.skipGenericCompiledContent = function(lineIter) { fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { - if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x63 /* 'c' */ ) { + line = lineIter.next(); + if ( line.charCodeAt(0) !== 0x63 /* 'c' */ ) { + lineIter.rewind(); return; } - line = lineIter.next(); this.acceptedCount += 1; if ( this.duplicateBuster.has(line) ) { @@ -1336,10 +1338,11 @@ FilterContainer.prototype.skipCompiledContent = function(lineIter) { fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { - if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x63 /* 'c' */ ) { + line = lineIter.next(); + if ( line.charCodeAt(0) !== 0x63 /* 'c' */ ) { + lineIter.rewind(); return; } - line = lineIter.next(); this.acceptedCount += 1; if ( this.duplicateBuster.has(line) ) { diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index dfa6f3a7a..f1e0e1a82 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -1078,6 +1078,17 @@ FilterBucket.prototype.add = function(a) { this.filters.push(a); }; +FilterBucket.prototype.remove = function(fclass, fdata) { + var i = this.filters.length, + filter; + while ( i-- ) { + filter = this.filters[i]; + if ( filter.fid === fclass && filter.toSelfie() === fdata ) { + this.filters.splice(i, 1); + } + } +}; + // Promote hit filters so they can be found faster next time. FilterBucket.prototype.promote = function(i) { var filters = this.filters; @@ -1181,6 +1192,7 @@ FilterParser.prototype.toNormalizedType = { FilterParser.prototype.reset = function() { this.action = BlockAction; this.anchor = 0; + this.badFilter = false; this.elemHiding = false; this.f = ''; this.firstParty = false; @@ -1334,6 +1346,11 @@ FilterParser.prototype.parseOptions = function(s) { if ( opt === 'empty' ) { continue; } + // https://github.com/uBlockOrigin/uAssets/issues/192 + if ( opt === 'badfilter' ) { + this.badFilter = true; + continue; + } // Unrecognized filter option: ignore whole filter. this.unsupported = true; break; @@ -1598,10 +1615,10 @@ FilterContainer.prototype.reset = function() { this.allowFilterCount = 0; this.blockFilterCount = 0; this.discardedCount = 0; + this.badFilters = new Set(); this.duplicateBuster = new Set(); this.categories = new Map(); this.filterParser.reset(); - this.filterCounts = {}; // Reuse filter instances whenever possible at load time. this.fclassLast = null; @@ -1618,6 +1635,7 @@ FilterContainer.prototype.reset = function() { FilterContainer.prototype.freeze = function() { histogram('allFilters', this.categories); + this.removeBadFilters(); this.duplicateBuster = new Set(); this.filterParser.reset(); this.fclassLast = null; @@ -1853,7 +1871,7 @@ FilterContainer.prototype.compile = function(raw, out) { return false; } - // Pure hostnames, use more efficient liquid dict + // Pure hostnames, use more efficient dictionary lookup // https://github.com/chrisaljoudi/uBlock/issues/665 // Create a dict keyed on request type etc. if ( parsed.hostnamePure && this.compileHostnameOnlyFilter(parsed, out) ) { @@ -1880,8 +1898,11 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) { // return; //} - var party = AnyParty; - if ( parsed.firstParty !== parsed.thirdParty ) { + var route = parsed.badFilter ? 'n-\v' : 'n\v', + party; + if ( parsed.firstParty === parsed.thirdParty ) { + party = AnyParty; + } else { party = parsed.firstParty ? FirstParty : ThirdParty; } var keyShard = parsed.action | parsed.important | party; @@ -1889,7 +1910,7 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) { var type = parsed.types; if ( type === 0 ) { out.push( - 'n\v' + + route + toHex(keyShard) + '\v' + '.\v' + parsed.f @@ -1901,7 +1922,7 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) { do { if ( type & 1 ) { out.push( - 'n\v' + + route + toHex(keyShard | (bitOffset << 4)) + '\v' + '.\v' + parsed.f @@ -1934,11 +1955,12 @@ FilterContainer.prototype.compileFilter = function(parsed, out) { /******************************************************************************/ FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, party, out) { - var bits = parsed.action | parsed.important | party; - var type = parsed.types; + var route = parsed.badFilter ? 'n-\v' : 'n\v', + bits = parsed.action | parsed.important | party, + type = parsed.types; if ( type === 0 ) { out.push( - 'n\v' + + route + toHex(bits) + '\v' + parsed.token + '\v' + filterClass.fid + '\v' + @@ -1950,7 +1972,7 @@ FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, do { if ( type & 1 ) { out.push( - 'n\v' + + route + toHex(bits | (bitOffset << 4)) + '\v' + parsed.token + '\v' + filterClass.fid + '\v' + @@ -1967,6 +1989,10 @@ FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, return; } + if ( parsed.badFilter ) { + return; + } + var redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw); if ( Array.isArray(redirects) === false ) { return; @@ -1985,12 +2011,17 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) { fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { - if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x6E /* 'n' */ ) { + line = lineIter.next(); + if ( line.charCodeAt(0) !== 0x6E /* 'n' */ ) { + lineIter.rewind(); return; } - line = lineIter.next(); - fieldIter.first(line); + if ( fieldIter.first(line) === 'n-' ) { + this.badFilters.add(line); + continue; + } + hash = fieldIter.next(); token = fieldIter.next(); fclass = fieldIter.next(); @@ -2046,6 +2077,47 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) { /******************************************************************************/ +FilterContainer.prototype.removeBadFilters = function() { + var lines = µb.setToArray(this.badFilters), + fieldIter = new µb.FieldIterator('\v'), + hash, token, fclass, fdata, bucket, entry, + i = lines.length; + while ( i-- ) { + fieldIter.first(lines[i]); + hash = fieldIter.next(); + token = fieldIter.next(); + fclass = fieldIter.next(); + fdata = fieldIter.next(); + bucket = this.categories.get(hash); + if ( bucket === undefined ) { + continue; + } + entry = bucket.get(token); + if ( entry === undefined ) { + continue; + } + if ( entry instanceof FilterHostnameDict ) { + entry.delete(fclass); // 'fclass' is hostname + if ( entry.dict.size === 0 ) { + this.categories.delete(hash); + } + continue; + } + if ( entry instanceof FilterBucket ) { + entry.remove(fclass, fdata); + if ( entry.filters.length === 1 ) { + bucket.set(token, entry.filters[0]); + } + continue; + } + if ( entry.fid === fclass && entry.toSelfie() === fdata ) { + this.categories.delete(hash); + } + } +}; + +/******************************************************************************/ + FilterContainer.prototype.filterStringFromCompiled = function(compiled) { var opts = []; var vfields = compiled.split('\v'); diff --git a/src/js/utils.js b/src/js/utils.js index e7834c3de..8d3814565 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -163,6 +163,20 @@ return line; }; +µBlock.LineIterator.prototype.rewind = function() { + if ( this.offset <= 1 ) { + this.offset = 0; + return; + } + var lineEnd = this.text.lastIndexOf('\n', this.offset - 2); + if ( lineEnd !== -1 ) { + this.offset = lineEnd + 1; + } else { + lineEnd = this.text.lastIndexOf('\r', this.offset - 2); + this.offset = lineEnd !== -1 ? lineEnd + 1 : 0; + } +}; + µBlock.LineIterator.prototype.eot = function() { return this.offset >= this.textLen; }; @@ -197,33 +211,37 @@ /******************************************************************************/ -µBlock.mapToArray = function(map) { - var out = [], - entries = map.entries(), - entry; - for (;;) { - entry = entries.next(); - if ( entry.done ) { break; } - out.push([ entry.value[0], entry.value[1] ]); - } - return out; -}; +µBlock.mapToArray = typeof Array.from === 'function' + ? Array.from + : function(map) { + var out = [], + entries = map.entries(), + entry; + for (;;) { + entry = entries.next(); + if ( entry.done ) { break; } + out.push([ entry.value[0], entry.value[1] ]); + } + return out; + }; µBlock.mapFromArray = function(arr) { return new Map(arr); }; -µBlock.setToArray = function(dict) { - var out = [], - entries = dict.values(), - entry; - for (;;) { - entry = entries.next(); - if ( entry.done ) { break; } - out.push(entry.value); - } - return out; -}; +µBlock.setToArray = typeof Array.from === 'function' + ? Array.from + : function(dict) { + var out = [], + entries = dict.values(), + entry; + for (;;) { + entry = entries.next(); + if ( entry.done ) { break; } + out.push(entry.value); + } + return out; + }; µBlock.setFromArray = function(arr) { return new Set(arr);