From 273c303d3b63f244d403257d9efbb4bf165ed710 Mon Sep 17 00:00:00 2001 From: gorhill Date: Mon, 23 Nov 2015 19:18:25 -0500 Subject: [PATCH] refactoring redirect engine: static filters are not the way to create redirection rules --- assets/checksums.txt | 3 +- assets/ublock/redirect-rules.txt | 29 +++--- assets/ublock/redirect.txt | 4 + src/js/background.js | 4 + src/js/redirect-engine.js | 147 ++++++++++++++++++++++++------- src/js/static-net-filtering.js | 30 ++++++- src/js/storage.js | 4 +- 7 files changed, 164 insertions(+), 57 deletions(-) create mode 100644 assets/ublock/redirect.txt diff --git a/assets/checksums.txt b/assets/checksums.txt index 4889a717e..67b1fdf75 100644 --- a/assets/checksums.txt +++ b/assets/checksums.txt @@ -2,9 +2,10 @@ d644ffec21b802b4efb76365a576b86f assets/ublock/unbreak.txt 7a04294b44b88baa34cf3e4bfe0e59db assets/ublock/privacy.txt b2dbf435507aa0262b289c67cbef2142 assets/ublock/filters.txt 146704ad1c0393e342afdb416762c183 assets/ublock/badware.txt +4b3df06f0b27c57cea0006ae559e078b assets/ublock/redirect.txt c9c5cc56bec563bc1885847f925b9be2 assets/ublock/mirror-candidates.txt f9455a47b5024cc08ff3675ce79b58a9 assets/ublock/filter-lists.json -a37de4c1166ed23c273d44611142b5e4 assets/ublock/redirect-rules.txt +b7781a2ab4094b0ffa363caa6e29801d assets/ublock/redirect-rules.txt 94c0a3eab74c42783855f07b22a429cf assets/thirdparties/home.fredfiber.no/langsholt/adblock.txt a82cb5ba5caf035ce00e97de81db5de7 assets/thirdparties/www.zoso.ro/pages/rolist.txt 72373316d0e7ad22604d307c2d93e7cc assets/thirdparties/adblock.gardar.net/is.abp.txt diff --git a/assets/ublock/redirect-rules.txt b/assets/ublock/redirect-rules.txt index 960f5eab9..bde2080f0 100644 --- a/assets/ublock/redirect-rules.txt +++ b/assets/ublock/redirect-rules.txt @@ -1,22 +1,13 @@ redirects: - hd-main.js application/javascript - var L = (function(){ - var l = {}; - var fn = function(){}; - var props = ["pf","ed","Qe","fd","xh","Uc","ef","zd","Ad","Qc","Ri","Wc","Vc","Xc","Wg","rd","qd","sd","Pe","Id","Hd","Jd","fg","Fd","Ed","Gd","ek","Cd","Bd","Dd","Nj","Sc","Rc","Tc","wg","xd","wd","yd","fh","ld","md","nd","Re","cd","Pc","ke","Yc","Xg","jd","kd","oh","ad","bd","mi","gd","hd","ae","dd","fk","ij","ud","td","vd","ig","od","pd","Yd","$j","Oc","bf"]; - for ( var i = 0; i < props.length; i++ ) { - l[props[i]] = fn; - } - return l; - })(); +hd-main.js application/javascript +var L = (function(){ + var l = {}; + var fn = function(){}; + var props = ["pf","ed","Qe","fd","xh","Uc","ef","zd","Ad","Qc","Ri","Wc","Vc","Xc","Wg","rd","qd","sd","Pe","Id","Hd","Jd","fg","Fd","Ed","Gd","ek","Cd","Bd","Dd","Nj","Sc","Rc","Tc","wg","xd","wd","yd","fh","ld","md","nd","Re","cd","Pc","ke","Yc","Xg","jd","kd","oh","ad","bd","mi","gd","hd","ae","dd","fk","ij","ud","td","vd","ig","od","pd","Yd","$j","Oc","bf"]; + for ( var i = 0; i < props.length; i++ ) { + l[props[i]] = fn; + } + return l; +})(); -rules: - - # Prevent difficult to block video ads. Examples: - # http://www.chip.de/news/Halbvoll-in-2-Minuten-Huawei-zeigt-neue-Superakkus_85752247.html - # Block filters to use: - # ||s3.amazonaws.com/homad-global-configs.schneevonmorgen.com/hd-main.js$script,domain=cdnapi.kaltura.com,important - # ||hgc.svonm.com/hd-main.js$script,domain=cdnapi.kaltura.com,important - cdnapi.kaltura.com s3.amazonaws.com script /\/hd-main.js$/ hd-main.js - cdnapi.kaltura.com hgc.svonm.com script /\/hd-main.js$/ hd-main.js diff --git a/assets/ublock/redirect.txt b/assets/ublock/redirect.txt new file mode 100644 index 000000000..8a6712919 --- /dev/null +++ b/assets/ublock/redirect.txt @@ -0,0 +1,4 @@ +# uBlock Origin -- To redirect blocked resources to friendlier contents. + +||s3.amazonaws.com/homad-global-configs.schneevonmorgen.com/hd-main.js$script,domain=cdnapi.kaltura.com,redirect=hd-main.js +||hgc.svonm.com/hd-main.js$script,domain=cdnapi.kaltura.com,redirect=hd-main.js diff --git a/src/js/background.js b/src/js/background.js index 808cd03e5..5abe2fc73 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -126,6 +126,10 @@ return { title: 'uBlock filters – Unbreak', group: 'default' }, + 'assets/ublock/redirect.txt': { + title: 'uBlock filters – Block-then-redirect', + group: 'default' + }, 'assets/ublock/badware.txt': { title: 'uBlock filters – Badware risks', group: 'default', diff --git a/src/js/redirect-engine.js b/src/js/redirect-engine.js index 9917e7659..d5c2adff1 100644 --- a/src/js/redirect-engine.js +++ b/src/js/redirect-engine.js @@ -40,18 +40,23 @@ var toBroaderHostname = function(hostname) { /******************************************************************************/ var RedirectEngine = function() { + this.redirects = Object.create(null); this.reset(); }; /******************************************************************************/ RedirectEngine.prototype.reset = function() { - this.redirects = Object.create(null); this.rules = Object.create(null); }; /******************************************************************************/ +RedirectEngine.prototype.freeze = function() { +}; + +/******************************************************************************/ + RedirectEngine.prototype.lookup = function(context) { var typeEntry = this.rules[context.requestType]; if ( typeEntry === undefined ) { @@ -93,13 +98,117 @@ RedirectEngine.prototype.lookup = function(context) { // TODO: combine same key-redirect pairs into a single regex. -RedirectEngine.prototype.fromString = function(text) { +RedirectEngine.prototype.addRule = function(src, des, type, pattern, redirect) { + var typeEntry = this.rules[type]; + if ( typeEntry === undefined ) { + typeEntry = this.rules[type] = Object.create(null); + } + var desEntry = typeEntry[des]; + if ( desEntry === undefined ) { + desEntry = typeEntry[des] = Object.create(null); + } + var ruleEntries = desEntry[src]; + if ( ruleEntries === undefined ) { + ruleEntries = desEntry[src] = []; + } + ruleEntries.push({ + c: new RegExp(pattern), + r: redirect + }); +}; + +/******************************************************************************/ + +RedirectEngine.prototype.fromCompiledRule = function(line) { + var fields = line.split('\t'); + if ( fields.length !== 5 ) { + return; + } + this.addRule(fields[0], fields[1], fields[2], fields[3], fields[4]); +}; + +/******************************************************************************/ + +RedirectEngine.prototype.compileRuleFromStaticFilter = function(line) { + var matches = this.reFilterParser.exec(line); + if ( matches === null || matches.length !== 4 ) { + return ''; + } + + var pattern = (matches[1] + matches[2]).replace(/[.+?{}()|[\]\\]/g, '\\$&') + .replace(/\^/g, '[^\\w\\d%-]') + .replace(/\*/g, '.*?'); + + var des = matches[1]; + var types = []; + var redirect = ''; + var srcs = []; + var options = matches[3].split(','), option; + while ( (option = options.pop()) ) { + if ( option.lastIndexOf('redirect=', 0) === 0 ) { + redirect = option.slice(9); + continue; + } + if ( option.lastIndexOf('domain=', 0) === 0 ) { + srcs = option.slice(7).split('|'); + continue; + } + if ( option in this.supportedTypes ) { + types.push(option); + continue; + } + } + + if ( redirect === '' || types.length === 0 ) { + return ''; + } + + if ( des === '' ) { + des = '*'; + } + + if ( srcs.length === 0 ) { + srcs.push('*'); + } + + var out = []; + var i = srcs.length, j; + while ( i-- ) { + j = types.length; + while ( j-- ) { + out.push(srcs[i] + '\t' + des + '\t' + types[j] + '\t' + pattern + '\t' + redirect); + } + } + + return out; +}; + +/******************************************************************************/ + +RedirectEngine.prototype.reFilterParser = /^\|\|([^\/\?#]+)([^$]+)\$([^$]+)$/; + +RedirectEngine.prototype.supportedTypes = (function() { + var types = Object.create(null); + types.stylesheet = 'stylesheet'; + types.image = 'image'; + types.object = 'object'; + types.script = 'script'; + types.xmlhttprequest = 'xmlhttprequest'; + types.subdocument = 'sub_frame'; + types.font = 'font'; + return types; +})(); + +/******************************************************************************/ + +// TODO: combine same key-redirect pairs into a single regex. + +RedirectEngine.prototype.redirectDataFromString = function(text) { var textEnd = text.length; var lineBeg = 0, lineEnd; var mode, modeData, line, fields, encoded, data; - var reSource, typeEntry, desEntry, ruleEntries; - this.reset(); + this.redirects = Object.create(null); while ( lineBeg < textEnd ) { lineEnd = text.indexOf('\n', lineBeg); @@ -147,36 +256,6 @@ RedirectEngine.prototype.fromString = function(text) { mode = 'redirects'; continue; } - - if ( mode === 'rules' ) { - fields = line.split(/\s+/); - if ( fields.length !== 5 ) { - continue; - } - reSource = fields[3]; - if ( reSource.charAt(0) !== '/' || reSource.slice(-1) !== '/' ) { - reSource = reSource.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - } else { - reSource = reSource.slice(1, -1); - } - typeEntry = this.rules[fields[2]]; - if ( typeEntry === undefined ) { - typeEntry = this.rules[fields[2]] = Object.create(null); - } - desEntry = typeEntry[fields[1]]; - if ( desEntry === undefined ) { - desEntry = typeEntry[fields[1]] = Object.create(null); - } - ruleEntries = desEntry[fields[0]]; - if ( ruleEntries === undefined ) { - ruleEntries = desEntry[fields[0]] = []; - } - ruleEntries.push({ - c: new RegExp(reSource), - r: fields[4] - }); - continue; - } } }; diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index ee36666bb..679fd4756 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -1404,6 +1404,8 @@ FilterParser.prototype.reset = function() { this.hostnamePure = false; this.domainOpt = ''; this.isRegex = false; + this.raw = ''; + this.redirect = false; this.thirdParty = false; this.token = ''; this.tokenBeg = 0; @@ -1488,7 +1490,7 @@ FilterParser.prototype.parseOptions = function(s) { this.parseOptType(opt, not); continue; } - if ( opt.slice(0,7) === 'domain=' ) { + if ( opt.lastIndexOf('domain=', 0) === 0 ) { this.domainOpt = opt.slice(7); continue; } @@ -1500,6 +1502,10 @@ FilterParser.prototype.parseOptions = function(s) { this.parseOptParty(true, not); continue; } + if ( opt.lastIndexOf('redirect=', 0) === 0 ) { + this.redirect = true; + continue; + } this.unsupported = true; break; } @@ -1511,7 +1517,7 @@ FilterParser.prototype.parse = function(raw) { // important! this.reset(); - var s = raw; + var s = this.raw = raw; // plain hostname? if ( reHostnameRule.test(s) ) { @@ -2033,6 +2039,18 @@ FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, bitOffset += 1; type >>>= 1; } while ( type !== 0 ); + + // Only static filter with an explicit type can be redirected. If we reach + // this point, it's because there is one or more explicit type. + if ( !parsed.redirect ) { + return; + } + + var redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw); + var i = redirects.length; + while ( i-- ) { + out.push('n\v\v\v=>\v' + redirects[i]); + } }; /******************************************************************************/ @@ -2054,6 +2072,14 @@ FilterContainer.prototype.fromCompiledContent = function(text, lineBeg) { fields = line.split('\v'); lineBeg = lineEnd + 1; + // Special cases: delegate to more specialized engines. + // Redirect engine. + if ( fields[2] === '=>' ) { + µb.redirectEngine.fromCompiledRule(fields[3]); + continue; + } + + // Plain static filters. this.acceptedCount += 1; bucket = this.categories[fields[0]]; diff --git a/src/js/storage.js b/src/js/storage.js index 902e06a05..c29ed3949 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -362,6 +362,7 @@ µb.staticNetFilteringEngine.freeze(); µb.cosmeticFilteringEngine.freeze(); + µb.redirectEngine.freeze(); vAPI.storage.set({ 'remoteBlacklists': µb.remoteBlacklists }); //quickProfiler.stop(0); @@ -396,6 +397,7 @@ var onFilterListsReady = function(lists) { µb.remoteBlacklists = lists; + µb.redirectEngine.reset(); µb.cosmeticFilteringEngine.reset(); µb.staticNetFilteringEngine.reset(); µb.destroySelfie(); @@ -656,7 +658,7 @@ } var onRulesLoaded = function(details) { if ( details.content !== '' ) { - µb.redirectEngine.fromString(details.content); + µb.redirectEngine.redirectDataFromString(details.content); } callback(); };