1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-18 08:52:26 +02:00

code review of dynamic URL filtering engine

This commit is contained in:
gorhill 2017-02-11 11:44:18 -05:00
parent cbca48307c
commit 1e1508cdd2

View File

@ -1,7 +1,7 @@
/******************************************************************************* /*******************************************************************************
uBlock - a browser extension to black/white list requests. uBlock Origin - a browser extension to black/white list requests.
Copyright (C) 2015 Raymond Hill Copyright (C) 2015-2017 Raymond Hill
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -19,7 +19,7 @@
Home: https://github.com/gorhill/uBlock Home: https://github.com/gorhill/uBlock
*/ */
/* global µBlock */ 'use strict';
/******************************************************************************/ /******************************************************************************/
@ -28,16 +28,12 @@
µBlock.URLNetFiltering = (function() { µBlock.URLNetFiltering = (function() {
'use strict';
/******************************************************************************* /*******************************************************************************
buckets: map of [origin + urlkey + type] buckets: map of [hostname + type]
bucket: array of rule entry, sorted from shorter to longer url bucket: array of rule entries, sorted from shorter to longer url
rule entry: { url, action } rule entry: { url, action }
*******************************************************************************/ *******************************************************************************/
/******************************************************************************/ /******************************************************************************/
@ -63,13 +59,13 @@ var RuleEntry = function(url, action) {
/******************************************************************************/ /******************************************************************************/
var indexOfURL = function(urls, url) { var indexOfURL = function(entries, url) {
// TODO: binary search -- maybe, depends on common use cases // TODO: binary search -- maybe, depends on common use cases
var urlLen = url.length; var urlLen = url.length,
var entry; entry;
// urls must be ordered by increasing length. // URLs must be ordered by increasing length.
for ( var i = 0; i< urls.length; i++ ) { for ( var i = 0; i < entries.length; i++ ) {
entry = urls[i]; entry = entries[i];
if ( entry.url.length > urlLen ) { if ( entry.url.length > urlLen ) {
break; break;
} }
@ -82,30 +78,31 @@ var indexOfURL = function(urls, url) {
/******************************************************************************/ /******************************************************************************/
var indexOfMatch = function(urls, url) { var indexOfMatch = function(entries, url) {
// TODO: binary search -- maybe, depends on common use cases var urlLen = url.length,
var urlLen = url.length; i = entries.length;
var i = urls.length;
var entry;
while ( i-- ) { while ( i-- ) {
entry = urls[i]; if ( entries[i].url.length <= urlLen ) {
if ( entry.url.length > urlLen ) { break;
continue;
}
if ( url.startsWith(entry.url) ) {
return i;
} }
} }
if ( i !== -1 ) {
do {
if ( url.startsWith(entries[i].url) ) {
return i;
}
} while ( i-- );
}
return -1; return -1;
}; };
/******************************************************************************/ /******************************************************************************/
var indexFromLength = function(urls, len) { var indexFromLength = function(entries, len) {
// TODO: binary search -- maybe, depends on common use cases // TODO: binary search -- maybe, depends on common use cases
// urls must be ordered by increasing length. // URLs must be ordered by increasing length.
for ( var i = 0; i< urls.length; i++ ) { for ( var i = 0; i < entries.length; i++ ) {
if ( urls[i].url.length > len ) { if ( entries[i].url.length > len ) {
return i; return i;
} }
} }
@ -114,43 +111,26 @@ var indexFromLength = function(urls, len) {
/******************************************************************************/ /******************************************************************************/
var addRuleEntry = function(urls, url, action) { var addRuleEntry = function(entries, url, action) {
var entry = new RuleEntry(url, action); var entry = new RuleEntry(url, action),
var i = indexFromLength(urls, url.length); i = indexFromLength(entries, url.length);
if ( i === -1 ) { if ( i === -1 ) {
urls.push(entry); entries.push(entry);
} else { } else {
urls.splice(i, 0, entry); entries.splice(i, 0, entry);
} }
}; };
/******************************************************************************/ /******************************************************************************/
var urlKeyFromURL = function(url) {
// Experimental: running benchmarks first
//if ( url === '*' ) {
// return url;
//}
var match = reURLKey.exec(url);
return match !== null ? match[0] : '';
};
var reURLKey = /^[a-z]+:\/\/[^\/?#]+/;
/******************************************************************************/
var URLNetFiltering = function() { var URLNetFiltering = function() {
this.reset(); this.reset();
}; };
/******************************************************************************/ /******************************************************************************/
// rules:
// origin + urlkey + type => urls
// urls = collection of urls to match
URLNetFiltering.prototype.reset = function() { URLNetFiltering.prototype.reset = function() {
this.rules = Object.create(null); this.rules = new Map();
// registers, filled with result of last evaluation // registers, filled with result of last evaluation
this.context = ''; this.context = '';
this.url = ''; this.url = '';
@ -161,20 +141,24 @@ URLNetFiltering.prototype.reset = function() {
/******************************************************************************/ /******************************************************************************/
URLNetFiltering.prototype.assign = function(other) { URLNetFiltering.prototype.assign = function(other) {
var thisRules = this.rules; var thisRules = this.rules,
var otherRules = other.rules; otherRules = other.rules,
var k; iter, item;
// Remove rules not in other // Remove rules not in other
for ( k in thisRules ) { iter = thisRules.entries();
if ( otherRules[k] === undefined ) { for (;;) {
delete thisRules[k]; item = iter.next();
if ( item.done ) { break; }
if ( otherRules.has(item.value) === false ) {
thisRules.delete(item.value);
} }
} }
// Add/change rules in other // Add/change rules in other
for ( k in otherRules ) { iter = otherRules.entries();
thisRules[k] = otherRules[k].slice(); for (;;) {
item = iter.next();
if ( item.done ) { break; }
thisRules.set(item.value[0], item.value[1].slice());
} }
}; };
@ -184,117 +168,77 @@ URLNetFiltering.prototype.setRule = function(srcHostname, url, type, action) {
if ( action === 0 ) { if ( action === 0 ) {
return this.removeRule(srcHostname, url, type); return this.removeRule(srcHostname, url, type);
} }
var bucketKey = srcHostname + ' ' + type,
var urlKey = urlKeyFromURL(url); entries = this.rules.get(bucketKey);
if ( urlKey === '' ) { if ( entries === undefined ) {
return false; entries = [];
this.rules.set(bucketKey, entries);
} }
var i = indexOfURL(entries, url),
var bucketKey = srcHostname + ' ' + urlKey + ' ' + type; entry;
var urls = this.rules[bucketKey];
if ( urls === undefined ) {
urls = this.rules[bucketKey] = [];
}
var entry;
var i = indexOfURL(urls, url);
if ( i !== -1 ) { if ( i !== -1 ) {
entry = urls[i]; entry = entries[i];
if ( entry.action === action ) { if ( entry.action === action ) { return false; }
return false;
}
entry.action = action; entry.action = action;
return true; } else {
addRuleEntry(entries, url, action);
} }
addRuleEntry(urls, url, action);
return true; return true;
}; };
/******************************************************************************/ /******************************************************************************/
URLNetFiltering.prototype.removeRule = function(srcHostname, url, type) { URLNetFiltering.prototype.removeRule = function(srcHostname, url, type) {
var urlKey = urlKeyFromURL(url); var bucketKey = srcHostname + ' ' + type,
if ( urlKey === '' ) { entries = this.rules.get(bucketKey);
if ( entries === undefined ) {
return false; return false;
} }
var i = indexOfURL(entries, url);
var bucketKey = srcHostname + ' ' + urlKey + ' ' + type;
var urls = this.rules[bucketKey];
if ( urls === undefined ) {
return false;
}
var i = indexOfURL(urls, url);
if ( i === -1 ) { if ( i === -1 ) {
return false; return false;
} }
entries.splice(i, 1);
urls.splice(i, 1); if ( entries.length === 0 ) {
if ( urls.length === 0 ) { this.rules.delete(bucketKey);
delete this.rules[bucketKey];
} }
return true; return true;
}; };
/******************************************************************************/ /******************************************************************************/
URLNetFiltering.prototype.evaluateZ = function(context, target, type) { URLNetFiltering.prototype.evaluateZ = function(context, target, type) {
var urlKey = urlKeyFromURL(target); this.r = 0;
if ( urlKey === '' ) { if ( this.rules.size === 0 ) {
this.r = 0;
return this; return this;
} }
var entries, pos, i, entry;
var urls, pos, i, entry, keyShard;
for (;;) { for (;;) {
this.context = context; this.context = context;
keyShard = context + ' ' + urlKey; if ( (entries = this.rules.get(context + ' ' + type)) ) {
if ( (urls = this.rules[keyShard + ' ' + type]) ) { i = indexOfMatch(entries, target);
i = indexOfMatch(urls, target);
if ( i !== -1 ) { if ( i !== -1 ) {
entry = urls[i]; entry = entries[i];
this.url = entry.url; this.url = entry.url;
this.type = type; this.type = type;
this.r = entry.action; this.r = entry.action;
return this; return this;
} }
} }
if ( (urls = this.rules[keyShard + ' *']) ) { if ( (entries = this.rules.get(context + ' *')) ) {
i = indexOfMatch(urls, target); i = indexOfMatch(entries, target);
if ( i !== -1 ) { if ( i !== -1 ) {
entry = urls[i]; entry = entries[i];
this.url = entry.url; this.url = entry.url;
this.type = '*'; this.type = '*';
this.r = entry.action; this.r = entry.action;
return this; return this;
} }
} }
/* Experimental: running benchmarks first if ( context === '*' ) { break; }
if ( urls = this.rules[context + ' * ' + type] ) {
entry = urls[0];
this.url = '*';
this.type = type;
this.r = entry.action;
return this;
}
if ( urls = this.rules[context + ' * *'] ) {
entry = urls[0];
this.url = this.type = '*';
this.r = entry.action;
return this;
}
*/
if ( context === '*' ) {
break;
}
pos = context.indexOf('.'); pos = context.indexOf('.');
context = pos !== -1 ? context.slice(pos + 1) : '*'; context = pos !== -1 ? context.slice(pos + 1) : '*';
} }
this.r = 0;
return this; return this;
}; };
@ -350,16 +294,20 @@ URLNetFiltering.prototype.copyRules = function(other, context, urls, type) {
// "url-filtering:" hostname url type action // "url-filtering:" hostname url type action
URLNetFiltering.prototype.toString = function() { URLNetFiltering.prototype.toString = function() {
var out = []; var out = [],
var pos, hn, type, urls, i, entry; iter = this.rules.entries(),
for ( var bucketKey in this.rules ) { item, key, pos, hn, type, entries, i, entry;
pos = bucketKey.indexOf(' '); for (;;) {
hn = bucketKey.slice(0, pos); item = iter.next();
pos = bucketKey.lastIndexOf(' '); if ( item.done ) { break; }
type = bucketKey.slice(pos + 1); key = item.value[0];
urls = this.rules[bucketKey]; pos = key.indexOf(' ');
for ( i = 0; i < urls.length; i++ ) { hn = key.slice(0, pos);
entry = urls[i]; pos = key.lastIndexOf(' ');
type = key.slice(pos + 1);
entries = item.value[1];
for ( i = 0; i < entries.length; i++ ) {
entry = entries[i];
out.push( out.push(
hn + ' ' + hn + ' ' +
entry.url + ' ' + entry.url + ' ' +
@ -374,46 +322,28 @@ URLNetFiltering.prototype.toString = function() {
/******************************************************************************/ /******************************************************************************/
URLNetFiltering.prototype.fromString = function(text) { URLNetFiltering.prototype.fromString = function(text) {
var textEnd = text.length;
var lineBeg = 0, lineEnd;
var line, fields;
this.reset(); this.reset();
while ( lineBeg < textEnd ) { var lineIter = new µBlock.LineIterator(text),
lineEnd = text.indexOf('\n', lineBeg); line, fields;
if ( lineEnd < 0 ) { while ( lineIter.eot() === false ) {
lineEnd = text.indexOf('\r', lineBeg); line = lineIter.next().trim();
if ( lineEnd < 0 ) { if ( line === '' ) { continue; }
lineEnd = textEnd;
}
}
line = text.slice(lineBeg, lineEnd).trim();
lineBeg = lineEnd + 1;
if ( line === '' ) {
continue;
}
// Coarse test // Coarse test
if ( line.indexOf('://') === -1 ) { if ( line.indexOf('://') === -1 ) {
continue; continue;
} }
fields = line.split(/\s+/); fields = line.split(/\s+/);
if ( fields.length !== 4 ) { if ( fields.length !== 4 ) {
continue; continue;
} }
// Finer test // Finer test
if ( fields[1].indexOf('://') === -1 ) { if ( fields[1].indexOf('://') === -1 ) {
continue; continue;
} }
if ( nameToActionMap.hasOwnProperty(fields[3]) === false ) { if ( nameToActionMap.hasOwnProperty(fields[3]) === false ) {
continue; continue;
} }
this.setRule(fields[0], fields[1], fields[2], nameToActionMap[fields[3]]); this.setRule(fields[0], fields[1], fields[2], nameToActionMap[fields[3]]);
} }
}; };