1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-06 02:42:33 +01:00
uBlock/js/abp-filters.js

1523 lines
48 KiB
JavaScript
Raw Normal View History

2014-06-24 00:42:43 +02:00
/*******************************************************************************
µBlock - a Chromium browser extension to block requests.
Copyright (C) 2014 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* jshint esnext: true, bitwise: false */
/* global µBlock */
/******************************************************************************/
µBlock.abpFilters = (function(){
/******************************************************************************/
2014-07-14 17:24:59 +02:00
// fedcba9876543210
// | | |
// | | |
// | | |
// | | |
// | | |
// | | |
// | | +---- party [0 - 7]
// | +---- type [0 - 15]
// +---- [BlockAction | AllowAction]
2014-06-24 00:42:43 +02:00
const BlockAction = 0 << 15;
const AllowAction = 1 << 15;
const AnyType = 1 << 11;
const AnyParty = 0 << 8;
const FirstParty = 1 << 8;
const ThirdParty = 2 << 8;
const SpecificParty = 3 << 8;
const SpecificNotParty = 4 << 8;
const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
const BlockAnyType1stParty = BlockAction | AnyType | FirstParty;
const BlockAnyType3rdParty = BlockAction | AnyType | ThirdParty;
const BlockAnyTypeOneParty = BlockAction | AnyType | SpecificParty;
const BlockAnyTypeOtherParties = BlockAction | AnyType | SpecificNotParty;
const BlockAnyType = BlockAction | AnyType;
const BlockAnyParty = BlockAction | AnyParty;
const BlockOneParty = BlockAction | SpecificParty;
const BlockOtherParties = BlockAction | SpecificNotParty;
const AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
const AllowAnyType1stParty = AllowAction | AnyType | FirstParty;
const AllowAnyType3rdParty = AllowAction | AnyType | ThirdParty;
const AllowAnyTypeOneParty = AllowAction | AnyType | SpecificParty;
const AllowAnyTypeOtherParties = AllowAction | AnyType | SpecificNotParty;
const AllowAnyType = AllowAction | AnyType;
const AllowAnyParty = AllowAction | AnyParty;
const AllowOneParty = AllowAction | SpecificParty;
const AllowOtherParties = AllowAction | SpecificNotParty;
var pageHostname = '';
var reIgnoreEmpty = /^\s+$/;
var reIgnoreComment = /^\[|^!/;
var reHostnameRule = /^[0-9a-z][0-9a-z.-]+[0-9a-z]$/;
var reHostnameToken = /^[0-9a-z]+/g;
var reGoodToken = /[%0-9a-z]{2,}/g;
var typeNameToTypeValue = {
'stylesheet': 2 << 11,
'image': 3 << 11,
'object': 4 << 11,
'script': 5 << 11,
'xmlhttprequest': 6 << 11,
'sub_frame': 7 << 11,
2014-07-14 17:24:59 +02:00
'other': 8 << 11,
'popup': 9 << 11
2014-06-24 00:42:43 +02:00
};
// ABP filters: https://adblockplus.org/en/filters
// regex tester: http://regex101.com/
/******************************************************************************/
/*
var histogram = function(label, categories) {
var h = [],
categoryBucket;
for ( var k in categories ) {
if ( categories.hasOwnProperty(k) === false ) {
continue;
}
categoryBucket = categories[k];
for ( var kk in categoryBucket ) {
if ( categoryBucket.hasOwnProperty(kk) === false ) {
continue;
}
filterBucket = categoryBucket[kk];
h.push({
k: k + ' ' + kk,
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
});
}
}
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 2;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
}
}
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey=%s count=%d', v.k, v.n);
});
console.log('\tTotal buckets count: %d', total);
};
*/
/*
var adbProfiler = {
testCount: 0,
urlCount: 0,
dumpEach: 200,
countUrl: function() {
this.urlCount += 1;
if ( (this.urlCount % this.dumpEach) === 0 ) {
this.dump();
}
},
countTest: function() {
this.testCount += 1;
},
dump: function() {
console.log('µBlock.adbProfiler> number or filters tested per URL: %d (sample: %d URLs)', this.testCount / this.urlCount, this.urlCount);
},
reset: function() {
this.testCount = 0;
this.urlCount = 0;
},
dummy: 0
};
*/
/*******************************************************************************
Filters family tree:
- plain (no wildcard)
- anywhere
- no hostname
- specific hostname
- specific not hostname
- anchored at start
- no hostname
- specific hostname
- specific not hostname
- anchored at end
- no hostname
- specific hostname
- specific not hostname
- one wildcard
- anywhere
- no hostname
- specific hostname
- specific not hostname
- anchored at start
- no hostname
- specific hostname
- specific not hostname
- anchored at end
- no hostname
- specific hostname
- specific not hostname
- more than one wildcard
- anywhere
- no hostname
- specific hostname
- specific not hostname
- anchored at start
- no hostname
- specific hostname
- specific not hostname
- anchored at end
- no hostname
- specific hostname
- specific not hostname
*/
/******************************************************************************/
var FilterPlain = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
};
FilterPlain.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return url.substr(tokenBeg - this.tokenBeg, this.s.length) === this.s;
};
var FilterPlainHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.hostname = hostname;
};
FilterPlainHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg - this.tokenBeg, this.s.length) === this.s;
};
var FilterPlainNotHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.hostname = hostname;
};
FilterPlainNotHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.substr(tokenBeg - this.tokenBeg, this.s.length) === this.s;
};
/******************************************************************************/
var FilterPlainPrefix0 = function(s) {
this.s = s;
};
FilterPlainPrefix0.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return url.substr(tokenBeg, this.s.length) === this.s;
};
var FilterPlainPrefix0Hostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix0Hostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg, this.s.length) === this.s;
};
var FilterPlainPrefix0NotHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix0NotHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.substr(tokenBeg, this.s.length) === this.s;
};
/******************************************************************************/
var FilterPlainPrefix1 = function(s) {
this.s = s;
};
FilterPlainPrefix1.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return url.substr(tokenBeg - 1, this.s.length) === this.s;
};
var FilterPlainPrefix1Hostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix1Hostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg - 1, this.s.length) === this.s;
};
var FilterPlainPrefix1NotHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix1NotHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.substr(tokenBeg - 1, this.s.length) === this.s;
};
/******************************************************************************/
var FilterPlainLeftAnchored = function(s) {
this.s = s;
};
FilterPlainLeftAnchored.prototype.match = function(url) {
// adbProfiler.countTest();
return url.slice(0, this.s.length) === this.s;
};
var FilterPlainLeftAnchoredHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainLeftAnchoredHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(0, this.s.length) === this.s;
};
var FilterPlainLeftAnchoredNotHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainLeftAnchoredNotHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.slice(0, this.s.length) === this.s;
};
/******************************************************************************/
var FilterPlainRightAnchored = function(s) {
this.s = s;
};
FilterPlainRightAnchored.prototype.match = function(url) {
// adbProfiler.countTest();
return url.slice(-this.s.length) === this.s;
};
var FilterPlainRightAnchoredHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainRightAnchoredHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(-this.s.length) === this.s;
};
var FilterPlainRightAnchoredNotHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainRightAnchoredNotHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.slice(-this.s.length) === this.s;
};
/******************************************************************************/
// With a single wildcard, regex is not optimal.
// See:
// http://jsperf.com/regexp-vs-indexof-abp-miss/3
// http://jsperf.com/regexp-vs-indexof-abp-hit/3
var FilterSingleWildcard = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
};
FilterSingleWildcard.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
tokenBeg -= this.tokenBeg;
return url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
var FilterSingleWildcardHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
tokenBeg -= this.tokenBeg;
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
var FilterSingleWildcardNotHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardNotHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
tokenBeg -= this.tokenBeg;
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
/******************************************************************************/
var FilterSingleWildcardPrefix0 = function(s) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
};
FilterSingleWildcardPrefix0.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
var FilterSingleWildcardPrefix0Hostname = function(s, hostname) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardPrefix0Hostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
var FilterSingleWildcardPrefix0NotHostname = function(s, hostname) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardPrefix0NotHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
/******************************************************************************/
// With a single wildcard, regex is not optimal.
// See:
// http://jsperf.com/regexp-vs-indexof-abp-miss/3
// http://jsperf.com/regexp-vs-indexof-abp-hit/3
var FilterSingleWildcardLeftAnchored = function(s) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
};
FilterSingleWildcardLeftAnchored.prototype.match = function(url) {
// adbProfiler.countTest();
return url.slice(0, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, this.lSegment.length) > 0;
};
var FilterSingleWildcardLeftAnchoredHostname = function(s, hostname) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardLeftAnchoredHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(0, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, this.lSegment.length) > 0;
};
var FilterSingleWildcardLeftAnchoredNotHostname = function(s, hostname) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardLeftAnchoredNotHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.slice(0, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, this.lSegment.length) > 0;
};
/******************************************************************************/
// With a single wildcard, regex is not optimal.
// See:
// http://jsperf.com/regexp-vs-indexof-abp-miss/3
// http://jsperf.com/regexp-vs-indexof-abp-hit/3
var FilterSingleWildcardRightAnchored = function(s) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
};
FilterSingleWildcardRightAnchored.prototype.match = function(url) {
// adbProfiler.countTest();
return url.slice(-this.rSegment.length) === this.rSegment &&
url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0;
};
var FilterSingleWildcardRightAnchoredHostname = function(s, hostname) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardRightAnchoredHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(-this.rSegment.length) === this.rSegment &&
url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0;
};
var FilterSingleWildcardRightAnchoredNotHostname = function(s, hostname) {
this.s = s;
var wcOffset = s.indexOf('*');
this.lSegment = s.slice(0, wcOffset);
this.rSegment = s.slice(wcOffset + 1);
this.hostname = hostname;
};
FilterSingleWildcardRightAnchoredNotHostname.prototype.match = function(url) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
url.slice(-this.rSegment.length) === this.rSegment &&
url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0;
};
/******************************************************************************/
// With many wildcards, a regex is best.
// Ref: regex escaper taken from:
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
// modified for the purpose here.
var FilterManyWildcards = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
};
FilterManyWildcards.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return this.re.test(url.slice(tokenBeg - this.tokenBeg));
};
var FilterManyWildcardsHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
this.hostname = hostname;
};
FilterManyWildcardsHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) === this.hostname &&
this.re.test(url.slice(tokenBeg - this.tokenBeg));
};
var FilterManyWildcardsNotHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
this.hostname = hostname;
};
FilterManyWildcardsNotHostname.prototype.match = function(url, tokenBeg) {
// adbProfiler.countTest();
return pageHostname.slice(-this.hostname.length) !== this.hostname &&
this.re.test(url.slice(tokenBeg - this.tokenBeg));
};
/******************************************************************************/
var makeFilter = function(details, tokenBeg) {
var s = details.f;
var wcOffset = s.indexOf('*');
if ( wcOffset > 0 ) {
if ( (/\*[^*]\*/).test(s) ) {
return details.anchor === 0 ? new FilterManyWildcards(s, tokenBeg) : null;
}
if ( details.anchor < 0 ) {
return new FilterSingleWildcardLeftAnchored(s);
}
if ( details.anchor > 0 ) {
return new FilterSingleWildcardRightAnchored(s);
}
if ( tokenBeg === 0 ) {
return new FilterSingleWildcardPrefix0(s);
}
return new FilterSingleWildcard(s, tokenBeg);
}
if ( details.anchor < 0 ) {
return new FilterPlainLeftAnchored(s);
}
if ( details.anchor > 0 ) {
return new FilterPlainRightAnchored(s);
}
if ( tokenBeg === 0 ) {
return new FilterPlainPrefix0(s);
}
if ( tokenBeg === 1 ) {
return new FilterPlainPrefix1(s);
}
return new FilterPlain(s, tokenBeg);
};
/******************************************************************************/
var makeHostnameFilter = function(details, tokenBeg, hostname) {
var s = details.f;
var wcOffset = s.indexOf('*');
if ( wcOffset > 0 ) {
if ( (/\*[^*]\*/).test(s) ) {
return details.anchor === 0 ? new FilterManyWildcardsHostname(s, tokenBeg, hostname) : null;
}
if ( details.anchor < 0 ) {
return new FilterSingleWildcardLeftAnchoredHostname(s, hostname);
}
if ( details.anchor > 0 ) {
return new FilterSingleWildcardRightAnchoredHostname(s, hostname);
}
if ( tokenBeg === 0 ) {
return new FilterSingleWildcardPrefix0Hostname(s, hostname);
}
return new FilterSingleWildcardHostname(s, tokenBeg, hostname);
}
if ( details.anchor < 0 ) {
return new FilterPlainLeftAnchoredHostname(s, hostname);
}
if ( details.anchor > 0 ) {
return new FilterPlainRightAnchoredHostname(s, hostname);
}
if ( tokenBeg === 0 ) {
return new FilterPlainPrefix0Hostname(s, hostname);
}
if ( tokenBeg === 1 ) {
return new FilterPlainPrefix1Hostname(s, hostname);
}
return new FilterPlainHostname(s, tokenBeg, hostname);
};
/******************************************************************************/
var makeNotHostnameFilter = function(details, tokenBeg, hostname) {
var s = details.f;
var wcOffset = s.indexOf('*');
if ( wcOffset > 0 ) {
if ( (/\*[^*]\*/).test(s) ) {
return details.anchor === 0 ? new FilterManyWildcardsNotHostname(s, tokenBeg, hostname) : null;
}
if ( details.anchor < 0 ) {
return new FilterSingleWildcardLeftAnchoredNotHostname(s, hostname);
}
if ( details.anchor > 0 ) {
return new FilterSingleWildcardRightAnchoredNotHostname(s, hostname);
}
if ( tokenBeg === 0 ) {
return new FilterSingleWildcardPrefix0NotHostname(s, hostname);
}
return new FilterSingleWildcardNotHostname(s, tokenBeg, hostname);
}
if ( details.anchor < 0 ) {
return new FilterPlainLeftAnchoredNotHostname(s, hostname);
}
if ( details.anchor > 0 ) {
return new FilterPlainRightAnchoredNotHostname(s, hostname);
}
if ( tokenBeg === 0 ) {
return new FilterPlainPrefix0NotHostname(s, hostname);
}
if ( tokenBeg === 1 ) {
return new FilterPlainPrefix1NotHostname(s, hostname);
}
return new FilterPlainNotHostname(s, tokenBeg, hostname);
};
/******************************************************************************/
// Given a string, find a good token. Tokens which are too generic, i.e. very
// common with a high probability of ending up as a miss, are not
// good. Avoid if possible. This has a *significant* positive impact on
// performance.
// These "bad tokens" are collated manually.
var badTokens = {
'com': true,
'http': true,
'https': true,
'images': true,
'img': true,
'js': true,
'net': true,
2014-06-24 00:42:43 +02:00
'news': true,
'www': true
};
var findFirstGoodToken = function(s) {
reGoodToken.lastIndex = 0;
var matches;
while ( matches = reGoodToken.exec(s) ) {
if ( badTokens[matches[0]] === undefined ) {
return matches;
}
}
// No good token found, just return the first token from left
reGoodToken.lastIndex = 0;
return reGoodToken.exec(s);
};
/******************************************************************************/
var findHostnameToken = function(s) {
reHostnameToken.lastIndex = 0;
return reHostnameToken.exec(s);
};
/******************************************************************************/
// Trim leading/trailing char "c"
var trimChar = function(s, c) {
// Remove leading and trailing wildcards
var pos = 0;
while ( s.charAt(pos) === c ) {
pos += 1;
}
s = s.slice(pos);
if ( pos = s.length ) {
while ( s.charAt(pos-1) === c ) {
pos -= 1;
}
s = s.slice(0, pos);
}
return s;
};
/******************************************************************************/
var FilterParser = function() {
this.action = BlockAction;
this.anchor = 0;
this.domains = [];
this.elemHiding = false;
this.f = '';
this.firstParty = false;
this.fopts = '';
this.hostname = false;
this.hostnames = [];
this.notDomains = [];
this.notHostnames = [];
this.thirdParty = false;
this.types = [];
this.unsupported = false;
};
/******************************************************************************/
FilterParser.prototype.toNormalizedType = {
'stylesheet': 'stylesheet',
'image': 'image',
'object': 'object',
'object-subrequest': 'object',
'script': 'script',
'xmlhttprequest': 'xmlhttprequest',
'subdocument': 'sub_frame',
2014-07-14 17:24:59 +02:00
'other': 'other',
'popup': 'popup'
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.reset = function() {
this.action = BlockAction;
this.anchor = 0;
this.domains = [];
this.elemHiding = false;
this.f = '';
this.firstParty = false;
this.fopts = '';
this.hostname = false;
this.hostnames = [];
this.notDomains = [];
this.notHostnames = [];
this.thirdParty = false;
this.types = [];
this.unsupported = false;
return this;
};
/******************************************************************************/
FilterParser.prototype.parseOptType = function(raw, not) {
var type = this.toNormalizedType[raw];
if ( not ) {
for ( var k in typeNameToTypeValue ) {
2014-07-31 06:17:18 +02:00
if ( k === type ) { continue; }
// https://github.com/gorhill/uBlock/issues/121
2014-07-31 06:18:02 +02:00
// `popup` is a special type, it cannot be set for filters intended
2014-07-31 06:21:25 +02:00
// for real net request types. The test is safe since there is no
// such thing as a filter using `~popup`.
2014-07-31 06:17:18 +02:00
if ( k === 'popup' ) { continue; }
2014-06-24 00:42:43 +02:00
this.types.push(typeNameToTypeValue[k]);
}
} else {
this.types.push(typeNameToTypeValue[type]);
}
};
/******************************************************************************/
FilterParser.prototype.parseOptParty = function(not) {
if ( not ) {
this.firstParty = true;
} else {
this.thirdParty = true;
}
};
/******************************************************************************/
FilterParser.prototype.parseOptHostnames = function(raw) {
var µburi = µBlock.URI;
var hostnames = raw.split('|');
var hostname, not, domain;
for ( var i = 0; i < hostnames.length; i++ ) {
hostname = hostnames[i];
not = hostname.charAt(0) === '~';
if ( not ) {
hostname = hostname.slice(1);
}
domain = µburi.domainFromHostname(hostname);
if ( not ) {
this.notHostnames.push(hostname);
this.notDomains.push(domain);
} else {
this.hostnames.push(hostname);
this.domains.push(domain);
}
}
};
/******************************************************************************/
FilterParser.prototype.parse = function(s) {
// important!
this.reset();
// element hiding filter?
if ( s.indexOf('##') >= 0 || s.indexOf('#@') >= 0 ) {
this.elemHiding = true;
return this;
}
// block or allow filter?
if ( s.slice(0, 2) === '@@' ) {
this.action = AllowAction;
s = s.slice(2);
}
// hostname anchoring
if ( s.slice(0, 2) === '||' ) {
this.hostname = true;
s = s.slice(2);
}
// left-anchored
if ( s.charAt(0) === '|' ) {
this.anchor = -1;
s = s.slice(1);
}
// options
var pos = s.indexOf('$');
if ( pos > 0 ) {
this.fopts = s.slice(pos + 1);
s = s.slice(0, pos);
}
// right-anchored
if ( s.slice(-1) === '|' ) {
this.anchor = 1;
s = s.slice(0, -1);
}
// normalize placeholders
// TODO: transforming `^` into `*` is not a strict interpretation of
// ABP syntax.
s = s.replace(/\^/g, '*');
s = s.replace(/\*\*+/g, '*');
// remove leading and trailing wildcards
this.f = trimChar(s, '*');
if ( !this.fopts ) {
return this;
}
// parse options
var opts = this.fopts.split(',');
var opt, not;
for ( var i = 0; i < opts.length; i++ ) {
opt = opts[i];
not = opt.charAt(0) === '~';
if ( not ) {
opt = opt.slice(1);
}
if ( opt === 'third-party' ) {
this.parseOptParty(not);
continue;
}
if ( this.toNormalizedType.hasOwnProperty(opt) ) {
this.parseOptType(opt, not);
continue;
}
if ( opt.slice(0,7) === 'domain=' ) {
this.parseOptHostnames(opt.slice(7));
continue;
}
if ( opt === 'popup' ) {
2014-07-14 17:24:59 +02:00
this.parseOptType('popup', not);
2014-06-24 00:42:43 +02:00
break;
}
this.unsupported = true;
}
return this;
};
/******************************************************************************/
/******************************************************************************/
var FilterBucket = function(a, b) {
this.filters = [a, b];
this.s = '';
};
/******************************************************************************/
FilterBucket.prototype.add = function(a) {
this.filters.push(a);
};
/******************************************************************************/
FilterBucket.prototype.match = function(url, tokenBeg) {
var filters = this.filters;
var i = filters.length;
while ( i-- ) {
if ( filters[i].match(url, tokenBeg) !== false ) {
this.s = filters[i].s;
return true;
}
}
return false;
};
/******************************************************************************/
/******************************************************************************/
var FilterContainer = function() {
2014-07-20 21:00:26 +02:00
this.reAnyToken = /[%0-9a-z]+/g;
this.buckets = new Array(8);
this.blockedAnyPartyHostnames = new µBlock.LiquidDict();
this.blocked3rdPartyHostnames = new µBlock.LiquidDict();
2014-06-24 00:42:43 +02:00
this.filterParser = new FilterParser();
2014-07-20 21:00:26 +02:00
this.reset();
};
/******************************************************************************/
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
this.frozen = false;
2014-06-24 00:42:43 +02:00
this.processedFilterCount = 0;
this.acceptedCount = 0;
2014-06-24 00:42:43 +02:00
this.allowFilterCount = 0;
this.blockFilterCount = 0;
this.duplicateCount = 0;
2014-07-20 21:00:26 +02:00
this.categories = {};
this.duplicates = {};
this.blockedAnyPartyHostnames.reset();
this.blocked3rdPartyHostnames.reset();
this.filterParser.reset();
};
2014-06-24 00:42:43 +02:00
2014-07-20 21:00:26 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
2014-07-20 21:00:26 +02:00
FilterContainer.prototype.freeze = function() {
//histogram('allFilters', this.categories);
this.blockedAnyPartyHostnames.freeze();
this.blocked3rdPartyHostnames.freeze();
this.duplicates = {};
this.filterParser.reset();
this.frozen = true;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterContainer.prototype.toDomainBits = function(domain) {
if ( domain === undefined ) {
return 0;
}
var i = domain.length >> 2;
return (domain.charCodeAt( 0) & 0x01) << 3 |
(domain.charCodeAt( i) & 0x01) << 2 |
(domain.charCodeAt( i+i) & 0x01) << 1 |
(domain.charCodeAt(i+i+i) & 0x01) << 0;
};
/******************************************************************************/
FilterContainer.prototype.makeCategoryKey = function(category) {
return String.fromCharCode(category);
};
/******************************************************************************/
FilterContainer.prototype.addAnyPartyHostname = function(hostname) {
if ( this.blockedAnyPartyHostnames.add(hostname) ) {
this.acceptedCount++;
this.blockFilterCount++;
2014-06-24 00:42:43 +02:00
return true;
}
this.duplicateCount++;
2014-06-24 00:42:43 +02:00
return false;
};
/******************************************************************************/
FilterContainer.prototype.add3rdPartyHostname = function(hostname) {
if ( this.blocked3rdPartyHostnames.add(hostname) ) {
this.acceptedCount++;
this.blockFilterCount++;
2014-06-24 00:42:43 +02:00
return true;
}
this.duplicateCount++;
2014-06-24 00:42:43 +02:00
return false;
};
/******************************************************************************/
FilterContainer.prototype.add = function(s) {
// ORDER OF TESTS IS IMPORTANT!
// Ignore empty lines
if ( reIgnoreEmpty.test(s) ) {
return false;
}
// Ignore comments
if ( reIgnoreComment.test(s) ) {
return false;
}
var parsed = this.filterParser.parse(s);
// Ignore element-hiding filters
if ( parsed.elemHiding ) {
return false;
}
if ( this.duplicates[s] ) {
this.duplicateCount++;
2014-06-24 00:42:43 +02:00
return false;
}
this.duplicates[s] = true;
this.processedFilterCount += 1;
// Ignore rules with other conditions for now
if ( parsed.unsupported ) {
// console.log('µBlock> abp-filter.js/FilterContainer.add(): unsupported filter "%s"', s);
return false;
}
// Ignore optionless hostname rules, these will be taken care of by µBlock.
if ( parsed.hostname && parsed.fopts === '' && parsed.action === BlockAction && reHostnameRule.test(parsed.f) ) {
return false;
}
this.acceptedCount += 1;
2014-06-24 00:42:43 +02:00
// Pure third-party hostnames, use more efficient liquid dict
if ( reHostnameRule.test(parsed.f) && parsed.hostname && parsed.action === BlockAction ) {
if ( parsed.fopts === 'third-party' ) {
return this.blocked3rdPartyHostnames.add(parsed.f);
}
if ( parsed.fopts === '' ) {
return this.blockedAnyPartyHostnames.add(parsed.f);
}
}
var r = this.addFilter(parsed);
if ( r === false ) {
return false;
}
if ( parsed.action ) {
this.allowFilterCount += 1;
} else {
this.blockFilterCount += 1;
}
return true;
};
/******************************************************************************/
FilterContainer.prototype.addFilter = function(parsed) {
// TODO: avoid duplicates
var matches = parsed.hostname ? findHostnameToken(parsed.f) : findFirstGoodToken(parsed.f);
if ( !matches || !matches[0].length ) {
return false;
}
var tokenBeg = matches.index;
var tokenEnd = parsed.hostname ? reHostnameToken.lastIndex : reGoodToken.lastIndex;
var i, n, filter;
if ( parsed.hostnames.length || parsed.notHostnames.length ) {
n = parsed.hostnames.length;
for ( i = 0; i < n; i++ ) {
filter = makeHostnameFilter(parsed, tokenBeg, parsed.hostnames[i]);
if ( !filter ) {
return false;
}
this.addFilterEntry(
filter,
parsed,
SpecificParty | this.toDomainBits(parsed.domains[i]),
tokenBeg,
tokenEnd
);
}
n = parsed.notHostnames.length;
for ( i = 0; i < n; i++ ) {
filter = makeNotHostnameFilter(parsed, tokenBeg, parsed.notHostnames[i]);
if ( !filter ) {
return false;
}
this.addFilterEntry(
filter,
parsed,
2014-07-03 16:48:04 +02:00
SpecificNotParty,
2014-06-24 00:42:43 +02:00
tokenBeg,
tokenEnd
);
}
} else {
filter = makeFilter(parsed, tokenBeg);
if ( !filter ) {
return false;
}
if ( parsed.firstParty ) {
this.addFilterEntry(filter, parsed, FirstParty, tokenBeg, tokenEnd);
} else if ( parsed.thirdParty ) {
this.addFilterEntry(filter, parsed, ThirdParty, tokenBeg, tokenEnd);
} else {
this.addFilterEntry(filter, parsed, AnyParty, tokenBeg, tokenEnd);
}
}
return true;
};
/******************************************************************************/
FilterContainer.prototype.addFilterEntry = function(filter, parsed, party, tokenBeg, tokenEnd) {
var s = parsed.f;
var tokenKey = s.slice(tokenBeg, tokenEnd);
2014-06-24 00:42:43 +02:00
if ( parsed.types.length === 0 ) {
this.addToCategory(parsed.action | AnyType | party, tokenKey, filter);
return;
}
var n = parsed.types.length;
for ( var i = 0; i < n; i++ ) {
this.addToCategory(parsed.action | parsed.types[i] | party, tokenKey, filter);
}
};
/******************************************************************************/
FilterContainer.prototype.addToCategory = function(category, tokenKey, filter) {
var categoryKey = this.makeCategoryKey(category);
var categoryBucket = this.categories[categoryKey];
if ( !categoryBucket ) {
categoryBucket = this.categories[categoryKey] = {};
}
var filterEntry = categoryBucket[tokenKey];
if ( filterEntry === undefined ) {
categoryBucket[tokenKey] = filter;
return;
}
if ( filterEntry instanceof FilterBucket ) {
filterEntry.add(filter);
return;
}
categoryBucket[tokenKey] = new FilterBucket(filterEntry, filter);
};
/******************************************************************************/
2014-07-20 21:00:26 +02:00
FilterContainer.prototype.matchTokens = function(url) {
2014-06-24 00:42:43 +02:00
var re = this.reAnyToken;
var matches, beg, token;
var buckets = this.buckets;
var bucket0 = buckets[0];
var bucket1 = buckets[1];
var bucket2 = buckets[2];
var bucket3 = buckets[3];
var bucket4 = buckets[4];
var bucket5 = buckets[5];
var bucket6 = buckets[6];
var bucket7 = buckets[7];
var f;
2014-06-24 00:42:43 +02:00
re.lastIndex = 0;
while ( matches = re.exec(url) ) {
beg = matches.index;
token = url.slice(beg, re.lastIndex);
if ( bucket0 !== undefined ) {
f = bucket0[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
if ( bucket1 !== undefined ) {
f = bucket1[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
if ( bucket2 !== undefined ) {
f = bucket2[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
if ( bucket3 !== undefined ) {
f = bucket3[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
if ( bucket4 !== undefined ) {
f = bucket4[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
if ( bucket5 !== undefined ) {
f = bucket5[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
if ( bucket6 !== undefined ) {
f = bucket6[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
if ( bucket7 !== undefined ) {
f = bucket7[token];
if ( f !== undefined && f.match(url, beg) !== false ) {
return f.s;
}
2014-06-24 00:42:43 +02:00
}
}
return false;
};
/******************************************************************************/
// This is where we test filters which have the form:
//
// `||www.example.com^`
//
// Because LiquidDict is well optimized to deal with plain hostname, we gain
// reusing it here for these sort of filters rather than using filters
// specialized to deal with other complex filters.
FilterContainer.prototype.matchAnyPartyHostname = function(requestHostname) {
// Quick test first
if ( this.blockedAnyPartyHostnames.test(requestHostname) ) {
return '||' + requestHostname + '^';
}
// Check parent hostnames if quick test failed
var hostnames = µBlock.URI.parentHostnamesFromHostname(requestHostname);
for ( var i = 0, n = hostnames.length; i < n; i++ ) {
if ( this.blockedAnyPartyHostnames.test(hostnames[i]) ) {
return '||' + hostnames[i] + '^';
}
}
return false;
};
/******************************************************************************/
// This is where we test filters which have the form:
//
// `||www.example.com^$third-party`
//
// Because LiquidDict is well optimized to deal with plain hostname, we gain
// reusing it here for these sort of filters rather than using filters
// specialized to deal with other complex filters.
FilterContainer.prototype.match3rdPartyHostname = function(requestHostname) {
// Quick test first
if ( this.blocked3rdPartyHostnames.test(requestHostname) ) {
return '||' + requestHostname + '^$third-party';
}
// Check parent hostnames if quick test failed
var hostnames = µBlock.URI.parentHostnamesFromHostname(requestHostname);
for ( var i = 0, n = hostnames.length; i < n; i++ ) {
if ( this.blocked3rdPartyHostnames.test(hostnames[i]) ) {
return '||' + hostnames[i] + '^$third-party';
}
}
return false;
};
/******************************************************************************/
// Specialized handlers
2014-07-30 03:10:00 +02:00
// https://github.com/gorhill/uBlock/issues/116
// Some type of requests are exceptional, they need custom handling,
// not the generic handling.
FilterContainer.prototype.matchStringExactType = function(pageDetails, requestURL, requestType, requestHostname) {
var url = requestURL.toLowerCase();
var pageDomain = pageDetails.pageDomain || '';
var party = requestHostname.slice(-pageDomain.length) === pageDomain ?
FirstParty :
ThirdParty;
var domainParty = this.toDomainBits(pageDomain);
var type = typeNameToTypeValue[requestType];
var categories = this.categories;
var buckets = this.buckets;
// This will be used by hostname-based filters
pageHostname = pageDetails.pageHostname || '';
buckets[0] = buckets[1] = buckets[2] = buckets[3] = undefined;
// Test against block filters
buckets[4] = categories[this.makeCategoryKey(BlockAnyParty | type)];
buckets[5] = categories[this.makeCategoryKey(BlockAction | type | party)];
buckets[6] = categories[this.makeCategoryKey(BlockOneParty | type | domainParty)];
buckets[7] = categories[this.makeCategoryKey(BlockOtherParties | type)];
var br = this.matchTokens(url);
// If there is no block filter, no need to test against allow filters
if ( br === false ) {
return false;
}
// Test against allow filters
buckets[4] = categories[this.makeCategoryKey(AllowAnyParty | type)];
buckets[5] = categories[this.makeCategoryKey(AllowAction | type | party)];
buckets[6] = categories[this.makeCategoryKey(AllowOneParty | type | domainParty)];
buckets[7] = categories[this.makeCategoryKey(AllowOtherParties | type | domainParty)];
var ar = this.matchTokens(url);
if ( ar !== false ) {
return '@@' + ar;
}
return br;
};
/******************************************************************************/
2014-07-20 21:00:26 +02:00
FilterContainer.prototype.matchString = function(pageDetails, requestURL, requestType, requestHostname) {
2014-06-24 00:42:43 +02:00
// adbProfiler.countUrl();
// https://github.com/gorhill/httpswitchboard/issues/239
// Convert url to lower case:
// `match-case` option not supported, but then, I saw only one
// occurrence of it in all the supported lists (bulgaria list).
2014-07-20 21:00:26 +02:00
var url = requestURL.toLowerCase();
2014-06-24 00:42:43 +02:00
// The logic here is simple:
//
// block = !whitelisted && blacklisted
// or equivalent
// allow = whitelisted || !blacklisted
2014-06-28 17:40:26 +02:00
// Statistically, hits on a URL in order of likelihood:
// 1. No hit
// 2. Hit on a block filter
// 3. Hit on an allow filter
//
// High likelihood of "no hit" means to optimize we need to reduce as much
// as possible the number of filters to test.
//
// Then, because of the order of probabilities, we should test only
// block filters first, and test allow filters if and only if there is a
// hit on a block filter. Since there is a high likelihood of no hit,
// testing allow filter by default is likely wasted work, hence allow
2014-06-28 17:41:49 +02:00
// filters are tested *only* if there is a (unlikely) hit on a block
// filter.
2014-06-24 00:42:43 +02:00
2014-06-25 03:46:37 +02:00
var pageDomain = pageDetails.pageDomain || '';
2014-06-24 00:42:43 +02:00
var party = requestHostname.slice(-pageDomain.length) === pageDomain ?
FirstParty :
ThirdParty;
// Test hostname-based block filters
2014-07-07 01:14:32 +02:00
var br = this.matchAnyPartyHostname(requestHostname);
if ( br === false && party === ThirdParty ) {
br = this.match3rdPartyHostname(requestHostname);
2014-06-24 00:42:43 +02:00
}
2014-06-25 03:46:37 +02:00
// This will be used by hostname-based filters
pageHostname = pageDetails.pageHostname || '';
var domainParty = this.toDomainBits(pageDomain);
var type = typeNameToTypeValue[requestType];
var categories = this.categories;
var buckets = this.buckets;
2014-06-24 00:42:43 +02:00
// Test against block filters
2014-07-07 01:14:32 +02:00
if ( br === false ) {
buckets[0] = categories[this.makeCategoryKey(BlockAnyTypeAnyParty)];
buckets[1] = categories[this.makeCategoryKey(BlockAnyType | party)];
buckets[2] = categories[this.makeCategoryKey(BlockAnyTypeOneParty | domainParty)];
buckets[3] = categories[this.makeCategoryKey(BlockAnyTypeOtherParties)];
buckets[4] = categories[this.makeCategoryKey(BlockAnyParty | type)];
buckets[5] = categories[this.makeCategoryKey(BlockAction | type | party)];
buckets[6] = categories[this.makeCategoryKey(BlockOneParty | type | domainParty)];
buckets[7] = categories[this.makeCategoryKey(BlockOtherParties | type)];
2014-07-20 21:00:26 +02:00
br = this.matchTokens(url);
2014-06-24 00:42:43 +02:00
}
// If there is no block filter, no need to test against allow filters
2014-07-07 01:14:32 +02:00
if ( br === false ) {
2014-06-24 00:42:43 +02:00
return false;
}
// Test against allow filters
buckets[0] = categories[this.makeCategoryKey(AllowAnyTypeAnyParty)];
buckets[1] = categories[this.makeCategoryKey(AllowAnyType | party)];
buckets[2] = categories[this.makeCategoryKey(AllowAnyTypeOneParty | domainParty)];
buckets[3] = categories[this.makeCategoryKey(AllowAnyTypeOtherParties | domainParty)];
buckets[4] = categories[this.makeCategoryKey(AllowAnyParty | type)];
buckets[5] = categories[this.makeCategoryKey(AllowAction | type | party)];
buckets[6] = categories[this.makeCategoryKey(AllowOneParty | type | domainParty)];
buckets[7] = categories[this.makeCategoryKey(AllowOtherParties | type | domainParty)];
2014-07-20 21:00:26 +02:00
var ar = this.matchTokens(url);
2014-07-07 01:14:32 +02:00
if ( ar !== false ) {
return '@@' + ar;
2014-06-24 00:42:43 +02:00
}
2014-07-07 01:14:32 +02:00
return br;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
return this.blockFilterCount + this.allowFilterCount;
};
/******************************************************************************/
return new FilterContainer();
/******************************************************************************/
})();