1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-14 23:12:28 +02:00

Squashed commit of the following:

commit 7c6cacc59b27660fabacb55d668ef099b222a9e6
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Sat Nov 3 08:52:51 2018 -0300

    code review: finalize support for wasm-based hntrie

commit 8596ed80e3bdac2c36e3c860b51e7189f6bc8487
Merge: cbe1f2e 000eb82
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Sat Nov 3 08:41:40 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit cbe1f2e2f38484d42af3204ec7f1b5decd30f99e
Merge: 270fc7f dbb7e80
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 17:43:20 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit 270fc7f9b3b73d79e6355522c1a42ce782fe7e5c
Merge: d2a89cf d693d4f
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 16:21:08 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit d2a89cf28f0816ffd4617c2c7b4ccfcdcc30e1b4
Merge: d7afc78 649f82f
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 14:54:58 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit d7afc78b5f5675d7d34c5a1d0ec3099a77caef49
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 13:56:11 2018 -0300

    finalize wasm-based hntrie implementation

commit e7b9e043cf36ad055791713e34eb0322dec84627
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 08:14:02 2018 -0300

    add first-pass implementation of wasm version of hntrie

commit 1015cb34624f3ef73ace58b58fe4e03dfc59897f
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Wed Oct 31 17:16:47 2018 -0300

    back up draft work toward experimenting with wasm hntries
This commit is contained in:
Raymond Hill 2018-11-03 08:58:46 -03:00
parent 000eb82f08
commit d7d544cda0
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
10 changed files with 47177 additions and 668 deletions

View File

@ -33,12 +33,12 @@ if ( vAPI.webextFlavor === undefined ) {
/******************************************************************************/
var µBlock = (function() { // jshint ignore:line
const µBlock = (function() { // jshint ignore:line
var oneSecond = 1000,
const oneSecond = 1000,
oneMinute = 60 * oneSecond;
var hiddenSettingsDefault = {
const hiddenSettingsDefault = {
assetFetchTimeout: 30,
autoUpdateAssetFetchPeriod: 120,
autoUpdatePeriod: 7,
@ -56,7 +56,7 @@ var µBlock = (function() { // jshint ignore:line
userResourcesLocation: 'unset'
};
var whitelistDefault = [
const whitelistDefault = [
'about-scheme',
'chrome-extension-scheme',
'chrome-scheme',

View File

@ -1,7 +1,7 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2017 Raymond Hill
Copyright (C) 2017-present Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -19,6 +19,9 @@
Home: https://github.com/gorhill/uBlock
*/
/* globals WebAssembly */
/* exported hnTrieManager */
'use strict';
/*******************************************************************************
@ -37,89 +40,115 @@
For example, `www.abc.com` is deemed matching `abc.com`, because the former
is a subdomain of the latter. The opposite is of course not true.
The resulting read-only trie created as a result of using HNTrieBuilder are
The resulting read-only tries created as a result of using hnTrieManager are
simply just typed arrays filled with integers. The matching algorithm is
just a matter of reading/comparing these integers, and further using them as
indices in the array as a way to move around in the trie.
There is still place for optimizations. Specifically, I could force the
strings to be properly sorted so that `HNTrie.matches` could bail earlier
when trying to find a matching descendant -- but suspect the gain would be
marginal, if measurable.
[1] To solve <https://github.com/gorhill/uBlock/issues/3193>
*/
var HNTrieBuilder = function() {
this.reset();
};
const hnTrieManager = {
tree: null,
treesz: 0,
trie: new Uint8Array(65536),
trie32: null,
triesz: 256, // bytes 0-254: decoded needle, byte 255: needle length
id: 0,
needle: '',
wasmLoading: null,
wasmMemory: null,
cleanupToken: 0,
cleanupTimer: undefined,
/*******************************************************************************
reset: function() {
if ( this.wasmMemory === null && this.trie.byteLength > 65536 ) {
this.trie = new Uint8Array(65536);
this.trie32 = new Uint32Array(this.trie.buffer);
} else {
this.trie.fill(0);
}
this.triesz = 256;
this.needle = '';
this.id += 1;
},
A plain javascript array is used to build the trie. It will be casted into
the appropriate read-only TypedArray[1] at vacuum time.
readyToUse: function() {
return this.wasmLoading instanceof Promise
? this.wasmLoading
: Promise.resolve();
},
[1] Depending on the size: Uint8Array, Uint16Array, or Uint32Array.
isValidRef: function(ref) {
return ref !== null && ref.id === this.id;
},
*/
HNTrieBuilder.prototype.reset = function() {
this.buf = [];
this.bufsz = 0;
this.buf[0] = 0;
this.buf[1] = 0;
this.buf[2] = 0;
setNeedle: function(needle) {
if ( needle !== this.needle ) {
const buf = this.trie;
let i = needle.length;
buf[255] = i;
while ( i-- ) {
buf[i] = needle.charCodeAt(i);
}
this.needle = needle;
}
return this;
};
},
/*******************************************************************************
Helpers for convenience.
*/
HNTrieBuilder.fromDomainOpt = function(domainOpt) {
var builder = new HNTrieBuilder();
builder.fromDomainOpt(domainOpt);
return builder.vacuum();
};
HNTrieBuilder.fromIterable = function(hostnames) {
var builder = new HNTrieBuilder();
builder.fromIterable(hostnames);
return builder.vacuum();
};
HNTrieBuilder.print = function(trie) {
var buf = trie.buf,
i = 0, cc = [], ic, indent = 0,
forks = [];
matchesJS: function(itrie) {
const buf = this.trie;
const buf32 = this.trie32;
let ineedle = buf[255];
for (;;) {
if ( buf[i] !== 0 ) {
forks.push(i, indent);
ineedle -= 1;
const nchar = ineedle === -1 ? 0 : buf[ineedle];
for (;;) {
const tchar = buf[itrie+8]; // quick test: first character
if ( tchar === nchar ) { break; }
if ( tchar === 0 && nchar === 0x2E ) { return 1; }
itrie = buf32[itrie >>> 2];
if ( itrie === 0 ) { return 0; } // no more descendants
}
cc.unshift(buf[i+2]);
for ( ic = 0; ic < buf[i+3]; ic++ ) {
cc.unshift(buf[i+4+ic]);
if ( nchar === 0 ) { return 1; }
let lxtra = buf[itrie+9]; // length of extra charaters
if ( lxtra !== 0 ) { // cell is only one character
if ( lxtra > ineedle ) { return 0; }
let ixtra = itrie + 10;
lxtra += ixtra;
do {
ineedle -= 1;
if ( buf[ineedle] !== buf[ixtra] ) { return 0; }
ixtra += 1;
} while ( ixtra !== lxtra );
}
console.log('\xB7'.repeat(indent) + String.fromCharCode.apply(null, cc));
indent += cc.length;
cc = [];
i = buf[i+1];
if ( i === 0 ) {
if ( forks.length === 0 ) { break; }
indent = forks.pop();
i = forks.pop();
i = buf[i];
itrie = buf32[itrie + 4 >>> 2];
if ( itrie === 0 ) {
return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0;
}
}
};
},
matchesWASM: null,
matches: null,
/*******************************************************************************
start: function() {
if ( this.trie32 === null ) {
this.trie32 = new Uint32Array(this.trie.buffer);
}
this.treesz = 0;
if ( this.tree === null ) {
this.tree = new Uint32Array(16384);
}
this.tree[0] = 0;
this.tree[1] = 0;
this.tree[2] = 0;
},
Since this trie is specialized for matching hostnames, the stored strings are
reversed internally, because of hostname comparison logic:
/***************************************************************************
Since this trie is specialized for matching hostnames, the stored
strings are reversed internally, because of hostname comparison logic:
Correct matching:
index 0123456
@ -137,87 +166,57 @@ HNTrieBuilder.print = function(trie) {
*/
HNTrieBuilder.prototype.add = function(hn) {
var ichar = hn.length - 1;
add: function(hn) {
// 256 * 3 + 3 = 771
if ( this.treesz + 771 >= this.tree.length ) {
this.growTree();
}
let ichar = hn.length - 1;
if ( ichar === -1 ) { return; }
var c = hn.charCodeAt(ichar),
let c = hn.charCodeAt(ichar),
i = 0, inext;
for (;;) {
if ( this.buf[i+2] !== c ) { // match not found
inext = this.buf[i]; // move to descendant
if ( this.tree[i+2] !== c ) { // match not found
inext = this.tree[i]; // move to descendant
if ( inext === 0 ) { break; } // no descendant
} else { // match found
if ( c === 0 ) { return; }
inext = this.buf[i+1]; // move to sibling
inext = this.tree[i+1]; // move to sibling
ichar -= 1;
c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
}
i = inext;
}
// Any new string added will always cause a new descendant to be created.
// The only time this is not the case is when trying to store a string
// which is already in the trie.
inext = this.bufsz; // new descendant cell
this.buf[i] = inext;
this.buf[inext+0] = 0; // jump index to descendant
this.buf[inext+1] = 0; // jump index to sibling
this.buf[inext+2] = c; // character code
this.bufsz += 3;
// Any new string added will always cause a new descendant to be
// created. The only time this is not the case is when trying to
// store a string which is already in the trie.
inext = this.treesz; // new descendant cell
this.tree[i] = inext;
this.tree[inext+0] = 0; // jump index to descendant
this.tree[inext+1] = 0; // jump index to sibling
this.tree[inext+2] = c; // character code
this.treesz += 3;
if ( c === 0 ) { return; } // character zero is always last cell
do {
i = inext; // new branch sprouting made from
ichar -= 1; // all characters left to store
c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
inext = this.bufsz;
this.buf[i+1] = inext;
this.buf[inext+0] = 0;
this.buf[inext+1] = 0;
this.buf[inext+2] = c;
this.bufsz += 3;
inext = this.treesz;
this.tree[i+1] = inext;
this.tree[inext+0] = 0;
this.tree[inext+1] = 0;
this.tree[inext+2] = c;
this.treesz += 3;
} while ( c!== 0 );
};
},
/*******************************************************************************
growTree: function() {
let tree = new Uint32Array(this.tree.length + 16384);
tree.set(this.tree);
this.tree = tree;
},
Not using String.split('|') to avoid memory churning.
*/
HNTrieBuilder.prototype.fromDomainOpt = function(hostnames) {
return this.fromIterable(hostnames.split('|'));
};
HNTrieBuilder.prototype.fromIterable = function(hostnames) {
var hns = Array.from(hostnames).sort(function(a, b) {
return a.length - b.length;
});
// https://github.com/gorhill/uBlock/issues/3328
// Must sort from shortest to longest.
for ( var hn of hns ) {
this.add(hn);
}
return this;
};
/******************************************************************************/
HNTrieBuilder.prototype.matches = function(needle) {
var ichar = needle.length - 1,
buf = this.buf, i = 0, c;
for (;;) {
c = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( buf[i+2] !== c ) {
i = buf[i];
if ( i === 0 ) { return false; }
}
if ( c === 0 ) { return true; }
i = buf[i+1];
if ( i === 0 ) { return c === 0x2E; }
ichar -= 1;
}
};
/*******************************************************************************
/***************************************************************************
Before vacuuming, each cell is 3 entry-long:
- Jump index to descendant (if any)
@ -225,9 +224,9 @@ HNTrieBuilder.prototype.matches = function(needle) {
- character code
All strings stored in the un-vacuumed trie are zero-terminated, and the
character zero does occupy a cell like any other character. Let's use _ to
represent character zero for sake of comments. The asterisk will be used to
highlight a node with a descendant.
character zero does occupy a cell like any other character. Let's
use _ to represent character zero for sake of comments. The asterisk
will be used to highlight a node with a descendant.
Cases, before vacuuming:
@ -251,14 +250,14 @@ HNTrieBuilder.prototype.matches = function(needle) {
_ -- b -- . -- c -- o -- m
_ -- a
Vacuuming is the process of merging sibling cells with no descendants. Cells
with descendants can't be merged.
Vacuuming is the process of merging sibling cells with no descendants.
Cells with descendants can't be merged.
Each time we arrive at the end of a horizontal branch (sibling jump index is
0), we walk back to the nearest previous node with descendants, and repeat
the process. Since there is no index information on where to come back, a
stack is used to remember cells with descendants (descendant jump index is
non zero) encountered on the way
Each time we arrive at the end of a horizontal branch (sibling jump
index is 0), we walk back to the nearest previous node with descendants,
and repeat the process. Since there is no index information on where to
come back, a stack is used to remember cells with descendants (descendant
jump index is non zero) encountered on the way
After vacuuming, each cell is 4+n entry-long:
- Jump index to descendant (if any)
@ -292,40 +291,55 @@ HNTrieBuilder.prototype.matches = function(needle) {
It's not possible for a character zero cell to have next siblings.
This will have to be taken into account during both vacuuming and matching.
This will have to be taken into account during both vacuuming and
matching.
Character zero cells with no descendant are discarded during vacuuming.
Character zero cells with a descendant, or character zero cells which are a
decendant are kept into the vacuumed trie.
Character zero cells with a descendant, or character zero cells which
are a decendant are kept into the vacuumed trie.
A vacuumed trie is very efficient memory- and lookup-wise, but is also
read-only: no string can be added or removed. The read-only trie is really
just a self-sufficient array of integers, and can easily be exported/imported
as a JSON array. It is theoretically possible to "decompile" a trie (vacuumed
or not) into the set of strings originally added to it (in the order they
were added with the current implementation), but so far I do not need this
feature.
read-only: no string can be added or removed. The read-only trie is
really just a self-sufficient array of integers, and can easily be
exported/imported as a JSON array. It is theoretically possible to
"decompile" a trie (vacuumed or not) into the set of strings originally
added to it (in the order they were added with the current
implementation), but so far I do not need this feature.
TODO: It's possible to build the vacuumed trie on the fly as items are
added to it. I need to carefully list all possible cases which can arise
at insertion time. The benefits will be: faster creation time (expected), no
longer read-only trie (items can be added at any time).
New vacuum output array format:
byte 0..2: offset to descendant
byte 3..5: offset to sibling
byte 6: first character
byte 7: number of extra characters
Offset & count values are little-endian.
3 + 3 + 1 + 1 = 8 bytes for one character, otherwise
3 + 3 + 1 + 1 + n = 8 + n bytes for one + n character(s)
*/
HNTrieBuilder.prototype.vacuum = function() {
if ( this.bufsz === 0 ) { return null; }
var input = this.buf,
output = [], outsz = 0,
forks = [],
iin = 0, iout;
finish: function() {
if ( this.treesz === 0 ) { return null; }
const input = this.tree,
iout0 = this.triesz,
forks = [];
let output = this.trie,
output32 = this.trie32,
iout1 = iout0,
iout2 = output.byteLength,
iin = 0;
for (;;) {
iout = outsz;
output[iout+0] = 0;
output[iout+1] = 0;
output[iout+2] = input[iin+2]; // first character
output[iout+3] = 0;
outsz += 4;
if ( (iout1 + 266) >= iout2 ) {
this.growTrie();
output = this.trie;
output32 = this.trie32;
iout2 = output.byteLength;
}
let iout = iout1;
output32[iout >>> 2] = 0;
output32[iout + 4 >>> 2] = 0;
output[iout+8] = input[iin+2]; // first character
output[iout+9] = 0; // extra character count
iout1 += 10;
if ( input[iin] !== 0 ) { // cell with descendant
forks.push(iout, iin); // defer processing
}
@ -334,264 +348,184 @@ HNTrieBuilder.prototype.vacuum = function() {
if ( iin === 0 ) { break; } // no more sibling cell
if ( input[iin] !== 0 ) { break; } // cell with a descendant
if ( input[iin+2] === 0 ) { break; } // don't merge \x00
output[outsz] = input[iin+2]; // add character data
outsz += 1;
output[iout1] = input[iin+2]; // add character data
iout1 += 1;
}
if ( outsz !== iout + 4 ) { // cells were merged
output[iout+3] = outsz - iout - 4; // so adjust count
if ( iout1 !== iout + 10 ) { // cells were merged
output[iout+9] = iout1 - iout - 10; // so adjust count
}
iout1 = (iout1 + 3) & ~3; // align to i32
if ( iin !== 0 && input[iin] !== 0 ) { // can't merge this cell
output[iout+1] = outsz;
output32[iout + 4 >>> 2] = iout1;
continue;
}
if ( forks.length === 0 ) { break; } // no more descendants: bye
iin = forks.pop(); // process next descendant
iout = forks.pop();
iin = input[iin];
output[iout] = outsz;
output32[iout >>> 2] = iout1;
}
var trie; // pick optimal read-only
if ( outsz < 256 ) { // container array.
trie = new this.HNTrie8(output, outsz);
} else if ( outsz < 65536 ) {
trie = new this.HNTrie16(output, outsz);
this.triesz = iout1;
this.cleanupAsync();
return new HNTrieRef(iout0);
},
fromIterable: function(hostnames) {
this.start();
const hns = Array.from(hostnames).sort(function(a, b) {
return a.length - b.length;
});
// https://github.com/gorhill/uBlock/issues/3328
// Must sort from shortest to longest.
for ( let hn of hns ) {
this.add(hn);
}
return this.finish();
},
fromDomainOpt: function(hostnames) {
return this.fromIterable(hostnames.split('|'));
},
growTrie: function() {
let trie;
if ( this.wasmMemory === null ) {
trie = new Uint8Array(this.trie.byteLength + 65536);
trie.set(this.trie);
} else {
trie = new this.HNTrie32(output, outsz);
this.wasmMemory.grow(1);
trie = new Uint8Array(this.wasmMemory.buffer);
}
this.reset(); // free working array
return trie;
};
this.trie = trie;
this.trie32 = new Uint32Array(this.trie.buffer);
},
/*******************************************************************************
The following internal classes are the actual output of the vacuum() method.
They use the minimal amount of data to be able to efficiently lookup strings
in a read-only trie.
Given that javascript optimizers mind that the type of an argument passed to
a function always stays the same each time the function is called, there need
to be three separate implementation of matches() to allow the javascript
optimizer to do its job.
The matching code deals only with looking up values in a TypedArray (beside
calls to String.charCodeAt), so I expect this to be fast and good candidate
for optimization by javascript engines.
cleanupAsync: function() {
if ( this.cleanupTimer === undefined ) {
this.cleanupToken = this.triesz;
this.cleanupTimer = setTimeout(( ) => {
this.cleanupTimer = undefined;
if ( this.cleanupToken !== this.triesz ) {
this.cleanupAsync();
} else {
this.tree = null;
}
}, 30000);
}
},
// For debugging purpose
// TODO: currently broken, needs to be fixed as per new buffer format.
/*
print: function(offset) {
let i = offset, cc = [], indent = 0,
forks = [];
for (;;) {
if ( buf[i] !== 0 ) {
forks.push(i, indent);
}
cc.unshift(buf[i+2]);
for ( let ic = 0; ic < buf[i+3]; ic++ ) {
cc.unshift(buf[i+4+ic]);
}
console.log('\xB7'.repeat(indent) + String.fromCharCode.apply(null, cc));
indent += cc.length;
cc = [];
i = buf[i+1];
if ( i === 0 ) {
if ( forks.length === 0 ) { break; }
indent = forks.pop();
i = forks.pop();
i = buf[i];
}
}
},
*/
HNTrieBuilder.prototype.HNTrie8 = function(buf, bufsz) {
this.buf = new Uint8Array(buf.slice(0, bufsz));
};
HNTrieBuilder.prototype.HNTrie8.prototype.matches = function(needle) {
var ichar = needle.length,
i = 0, c1, c2, ccnt, ic, i1, i2;
for (;;) {
ichar -= 1;
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
if ( c2 === 0 && c1 === 0x2E ) { return true; }
i = this.buf[i]; // next descendant
if ( i === 0 ) { return false; } // no more descendants
}
if ( c1 === 0 ) { return true; }
ccnt = this.buf[i+3];
if ( ccnt !== 0 ) { // cell is only one character
if ( ccnt > ichar ) { return false; }
ic = ccnt; i1 = ichar-1; i2 = i+4;
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
if ( ic !== -1 ) { return false; }
ichar -= ccnt;
}
i = this.buf[i+1]; // next sibling
if ( i === 0 ) {
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
}
}
};
/******************************************************************************/
HNTrieBuilder.prototype.HNTrie16 = function(buf, bufsz) {
this.buf = new Uint16Array(buf.slice(0, bufsz));
};
(function() {
// Default to javascript version.
hnTrieManager.matches = hnTrieManager.matchesJS;
HNTrieBuilder.prototype.HNTrie16.prototype.matches = function(needle) {
var ichar = needle.length,
i = 0, c1, c2, ccnt, ic, i1, i2;
for (;;) {
ichar -= 1;
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
if ( c2 === 0 && c1 === 0x2E ) { return true; }
i = this.buf[i]; // next descendant
if ( i === 0 ) { return false; } // no more descendants
if (
typeof WebAssembly !== 'object' ||
typeof WebAssembly.instantiateStreaming !== 'function'
) {
return;
}
if ( c1 === 0 ) { return true; }
ccnt = this.buf[i+3];
if ( ccnt !== 0 ) { // cell is only one character
if ( ccnt > ichar ) { return false; }
ic = ccnt; i1 = ichar-1; i2 = i+4;
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
if ( ic !== -1 ) { return false; }
ichar -= ccnt;
}
i = this.buf[i+1]; // next sibling
if ( i === 0 ) {
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
}
}
};
HNTrieBuilder.prototype.HNTrie32 = function(buf, bufsz) {
this.buf = new Uint32Array(buf.slice(0, bufsz));
};
HNTrieBuilder.prototype.HNTrie32.prototype.matches = function(needle) {
var ichar = needle.length,
i = 0, c1, c2, ccnt, ic, i1, i2;
for (;;) {
ichar -= 1;
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
if ( c2 === 0 && c1 === 0x2E ) { return true; }
i = this.buf[i]; // next descendant
if ( i === 0 ) { return false; } // no more descendants
// Soft-dependency on vAPI so that the code here can be used outside of
// uBO (i.e. tests, benchmarks)
if (
typeof vAPI === 'object' &&
vAPI.webextFlavor.soup.has('firefox') === false
) {
return;
}
if ( c1 === 0 ) { return true; }
ccnt = this.buf[i+3];
if ( ccnt !== 0 ) { // cell is only one character
if ( ccnt > ichar ) { return false; }
ic = ccnt; i1 = ichar-1; i2 = i+4;
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
if ( ic !== -1 ) { return false; }
ichar -= ccnt;
}
i = this.buf[i+1]; // next sibling
if ( i === 0 ) {
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
}
}
};
/*******************************************************************************
// The wasm module will work only if CPU is natively little-endian,
// as we use native uint32 array in our trie-creation js code.
const uint32s = new Uint32Array(1);
const uint8s = new Uint8Array(uint32s.buffer);
uint32s[0] = 1;
if ( uint8s[0] !== 1 ) { return; }
Experimenting: WebAssembly version.
Developed using this simple online tool: https://wasdk.github.io/WasmFiddle/
>>> start of C code
unsigned short buffer[0];
int matches(int id, int cclen)
let workingDir;
{
unsigned short* cc0 = &buffer[0];
unsigned short* cc = cc0 + cclen;
unsigned short* cell0 = &buffer[512+id];
unsigned short* cell = cell0;
unsigned short* ww;
int c1, c2, ccnt;
for (;;) {
c1 = cc <= cc0 ? 0 : *--cc;
for (;;) {
c2 = cell[2];
if ( c2 == c1 ) { break; }
if ( c2 == 0 && c1 == 0x2E ) { return 1; }
if ( cell[0] == 0 ) { return 0; }
cell = cell0 + cell[0];
const url = document.currentScript.src;
const match = /[^\/]+$/.exec(url);
workingDir = match !== null
? url.slice(0, match.index)
: '';
}
if ( c1 == 0 ) { return 1; }
ccnt = cell[3];
if ( ccnt != 0 ) {
if ( cc - ccnt < cc0 ) { return 0; }
ww = cell + 4;
while ( ccnt-- ) {
if ( *--cc != *ww++ ) { return 0; }
}
}
if ( cell[1] == 0 ) {
if ( cc == cc0 ) { return 1; }
if ( *--cc == 0x2E ) { return 1; }
return 0;
}
cell = cell0 + cell[1];
}
}
int getLinearMemoryOffset() {
return (int)&buffer[0];
}
<<< end of C code
Observations:
- When growing memory, we must re-create the typed array js-side. The content
of the array is preserved by grow().
- It's slower than the javascript version... Possible explanations:
- Call overhead: https://github.com/WebAssembly/design/issues/1120
- Having to copy whole input string in buffer before call.
const memory = new WebAssembly.Memory({ initial: 1 });
var HNTrie16wasm = (function() {
var module;
var instance;
var memory;
var memoryOrigin = 0;
var memoryUsed = 1024;
var cbuffer;
var tbuffer;
var tbufferSize = 0;
var matchesFn;
var init = function() {
module = new WebAssembly.Module(new Uint8Array([0,97,115,109,1,0,0,0,1,139,128,128,128,0,2,96,2,127,127,1,127,96,0,1,127,3,131,128,128,128,0,2,0,1,4,132,128,128,128,0,1,112,0,0,5,131,128,128,128,0,1,0,1,6,129,128,128,128,0,0,7,172,128,128,128,0,3,6,109,101,109,111,114,121,2,0,7,109,97,116,99,104,101,115,0,0,21,103,101,116,76,105,110,101,97,114,77,101,109,111,114,121,79,102,102,115,101,116,0,1,10,217,130,128,128,0,2,202,130,128,128,0,1,5,127,32,1,65,1,116,65,12,106,33,3,32,0,65,1,116,65,140,8,106,34,2,33,0,2,64,2,64,2,64,2,64,2,64,2,64,3,64,65,0,33,5,2,64,32,3,65,12,77,13,0,32,3,65,126,106,34,3,47,1,0,33,5,11,2,64,32,5,32,0,47,1,4,34,1,70,13,0,2,64,32,5,65,46,71,13,0,3,64,32,1,65,255,255,3,113,69,13,5,32,0,47,1,0,34,1,69,13,6,32,2,32,1,65,1,116,106,34,0,47,1,4,34,1,65,46,71,13,0,12,2,11,11,3,64,32,0,47,1,0,34,1,69,13,3,32,5,32,2,32,1,65,1,116,106,34,0,47,1,4,71,13,0,11,11,65,1,33,6,32,5,69,13,5,2,64,2,64,32,0,47,1,6,34,1,69,13,0,32,3,32,1,65,1,116,107,65,12,73,13,8,32,1,65,127,115,33,5,32,0,65,8,106,33,1,3,64,32,5,65,1,106,34,5,69,13,1,32,1,47,1,0,33,4,32,1,65,2,106,33,1,32,4,32,3,65,126,106,34,3,47,1,0,70,13,0,12,2,11,11,32,0,47,1,2,34,1,69,13,5,32,2,32,1,65,1,116,106,33,0,12,1,11,11,65,0,15,11,65,0,15,11,65,1,15,11,65,0,15,11,32,3,65,12,70,13,0,32,3,65,126,106,47,1,0,65,46,70,33,6,11,32,6,15,11,65,0,11,132,128,128,128,0,0,65,12,11]));
instance = new WebAssembly.Instance(module);
memory = instance.exports.memory;
memoryOrigin = instance.exports.getLinearMemoryOffset();
cbuffer = new Uint16Array(memory.buffer, memoryOrigin, 512);
tbuffer = new Uint16Array(memory.buffer, memoryOrigin + 1024);
memoryUsed = memoryOrigin + 1024;
matchesFn = instance.exports.matches;
};
return {
create: function(data) {
if ( module === undefined ) { init(); }
var bytesNeeded = memoryUsed + ((data.length * 2 + 3) & ~3);
if ( bytesNeeded > memory.buffer.byteLength ) {
memory.grow((bytesNeeded - memory.buffer.byteLength + 65535) >>> 16);
cbuffer = new Uint16Array(memory.buffer, memoryOrigin, 512);
tbuffer = new Uint16Array(memory.buffer, memoryOrigin + 1024);
hnTrieManager.wasmLoading = WebAssembly.instantiateStreaming(
fetch(workingDir + 'wasm/hntrie.wasm', { mode: 'same-origin' }),
{ imports: { memory } }
).then(result => {
hnTrieManager.wasmLoading = null;
if ( !result || !result.instance ) { return; }
const pageCount = hnTrieManager.trie.byteLength >>> 16;
if ( pageCount > 1 ) {
memory.grow(pageCount - 1);
}
for ( var i = 0, j = tbufferSize; i < data.length; i++, j++ ) {
tbuffer[j] = data[i];
const trie = new Uint8Array(memory.buffer);
trie.set(hnTrieManager.trie);
hnTrieManager.trie = trie;
if ( hnTrieManager.trie32 !== null ) {
hnTrieManager.trie32 = new Uint32Array(memory.buffer);
}
var id = tbufferSize;
tbufferSize += data.length;
if ( tbufferSize & 1 ) { tbufferSize += 1; }
memoryUsed += tbufferSize * 2;
return id;
},
reset: function() {
module = undefined;
instance = undefined;
memory = undefined;
memory.grow(1);
memoryUsed = 1024;
cbuffer = undefined;
tbuffer = undefined;
tbufferSize = 0;
},
matches: function(id, hn) {
var len = hn.length;
if ( len > 512 ) {
hn = hn.slice(-512);
var pos = hn.indexOf('.');
if ( pos !== 0 ) {
hn = hn.slice(pos + 1);
}
len = hn.length;
}
var needle = cbuffer, i = len;
while ( i-- ) {
needle[i] = hn.charCodeAt(i);
}
return matchesFn(id, len) === 1;
}
};
hnTrieManager.wasmMemory = memory;
hnTrieManager.matchesWASM = result.instance.exports.matches;
hnTrieManager.matches = hnTrieManager.matchesWASM;
}).catch(reason => {
hnTrieManager.wasmLoading = null;
console.error(reason);
});
})();
*/
/******************************************************************************/
const HNTrieRef = function(offset) {
this.id = hnTrieManager.id;
this.offset = offset;
};
HNTrieRef.prototype = {
isValid: function() {
return this.id === hnTrieManager.id;
},
matches: function(needle) {
return hnTrieManager.setNeedle(needle).matches(this.offset);
},
matchesJS: function(needle) {
return hnTrieManager.setNeedle(needle).matchesJS(this.offset);
},
matchesWASM: function(needle) {
return hnTrieManager.setNeedle(needle).matchesWASM(this.offset);
},
};

View File

@ -29,7 +29,7 @@
/******************************************************************************/
var µb = µBlock;
const µb = µBlock;
/******************************************************************************/
@ -287,7 +287,12 @@ var onFirstFetchReady = function(fetched) {
onVersionReady(fetched.version);
onCommandShortcutsReady(fetched.commandShortcuts);
µb.loadPublicSuffixList(onPSLReady);
Promise.all([
µb.loadPublicSuffixList(),
µb.staticNetFilteringEngine.readyToUse()
]).then(( ) => {
onPSLReady();
});
µb.loadRedirectResources();
};

View File

@ -20,7 +20,7 @@
*/
/* jshint bitwise: false */
/* global punycode, HNTrieBuilder */
/* global punycode, hnTrieManager */
'use strict';
@ -30,7 +30,7 @@
/******************************************************************************/
var µb = µBlock;
const µb = µBlock;
// fedcba9876543210
// | | |||
@ -43,15 +43,15 @@ var µb = µBlock;
// | +-------- bit 4- 8: type [0 - 31]
// +------------- bit 9-15: unused
var BlockAction = 0 << 0;
var AllowAction = 1 << 0;
var Important = 1 << 1;
var AnyParty = 0 << 2;
var FirstParty = 1 << 2;
var ThirdParty = 2 << 2;
const BlockAction = 0 << 0;
const AllowAction = 1 << 0;
const Important = 1 << 1;
const AnyParty = 0 << 2;
const FirstParty = 1 << 2;
const ThirdParty = 2 << 2;
var AnyType = 0 << 4;
var typeNameToTypeValue = {
const AnyType = 0 << 4;
const typeNameToTypeValue = {
'no_type': 0 << 4,
'stylesheet': 1 << 4,
'image': 2 << 4,
@ -75,9 +75,9 @@ var typeNameToTypeValue = {
'webrtc': 19 << 4,
'unsupported': 20 << 4
};
var otherTypeBitValue = typeNameToTypeValue.other;
const otherTypeBitValue = typeNameToTypeValue.other;
var typeValueToTypeName = {
const typeValueToTypeName = {
1: 'stylesheet',
2: 'image',
3: 'object',
@ -100,15 +100,15 @@ var typeValueToTypeName = {
20: 'unsupported'
};
var BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
var BlockAnyType = BlockAction | AnyType;
var BlockAnyParty = BlockAction | AnyParty;
const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
const BlockAnyType = BlockAction | AnyType;
const BlockAnyParty = BlockAction | AnyParty;
var AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
var AllowAnyType = AllowAction | AnyType;
var AllowAnyParty = AllowAction | AnyParty;
const AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
const AllowAnyType = AllowAction | AnyType;
const AllowAnyParty = AllowAction | AnyParty;
var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide,
const genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide,
genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important;
// ABP filters: https://adblockplus.org/en/filters
@ -119,7 +119,7 @@ var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generich
// See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation.
var pageHostnameRegister = '',
let pageHostnameRegister = '',
requestHostnameRegister = '';
//var filterRegister = null;
//var categoryRegister = '';
@ -127,13 +127,13 @@ var pageHostnameRegister = '',
// Local helpers
// Be sure to not confuse 'example.com' with 'anotherexample.com'
var isFirstParty = function(domain, hostname) {
const isFirstParty = function(domain, hostname) {
return hostname.endsWith(domain) &&
(hostname.length === domain.length ||
hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */);
};
var normalizeRegexSource = function(s) {
const normalizeRegexSource = function(s) {
try {
var re = new RegExp(s);
return re.source;
@ -143,12 +143,12 @@ var normalizeRegexSource = function(s) {
return '';
};
var rawToRegexStr = function(s, anchor) {
var me = rawToRegexStr;
const rawToRegexStr = function(s, anchor) {
let me = rawToRegexStr;
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
// Also: remove leading/trailing wildcards -- there is no point.
var reStr = s.replace(me.escape1, '\\$&')
let reStr = s.replace(me.escape1, '\\$&')
.replace(me.escape2, '(?:[^%.0-9a-z_-]|$)')
.replace(me.escape3, '')
.replace(me.escape4, '[^ ]*?');
@ -175,7 +175,7 @@ rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
const filterDataSerialize = µb.CompiledLineIO.serialize;
var toLogDataInternal = function(categoryBits, tokenHash, filter) {
const toLogDataInternal = function(categoryBits, tokenHash, filter) {
if ( filter === null ) { return undefined; }
let logData = filter.logData();
logData.compiled = filterDataSerialize([
@ -209,7 +209,7 @@ var toLogDataInternal = function(categoryBits, tokenHash, filter) {
};
// First character of match must be within the hostname part of the url.
var isHnAnchored = function(url, matchStart) {
const isHnAnchored = function(url, matchStart) {
var hnStart = url.indexOf('://');
if ( hnStart === -1 ) { return false; }
hnStart += 3;
@ -222,9 +222,9 @@ var isHnAnchored = function(url, matchStart) {
return url.charCodeAt(matchStart - 1) === 0x2E;
};
var reURLPostHostnameAnchors = /[\/?#]/;
const reURLPostHostnameAnchors = /[\/?#]/;
var arrayStrictEquals = function(a, b) {
const arrayStrictEquals = function(a, b) {
var n = a.length;
if ( n !== b.length ) { return false; }
var isArray, x, y;
@ -251,22 +251,22 @@ var arrayStrictEquals = function(a, b) {
**/
var filterClasses = [],
filterClassIdGenerator = 0;
const filterClasses = [];
let filterClassIdGenerator = 0;
var registerFilterClass = function(ctor) {
var fid = filterClassIdGenerator++;
const registerFilterClass = function(ctor) {
let fid = filterClassIdGenerator++;
ctor.fid = ctor.prototype.fid = fid;
filterClasses[fid] = ctor;
};
var filterFromCompiledData = function(args) {
const filterFromCompiledData = function(args) {
return filterClasses[args[0]].load(args);
};
/******************************************************************************/
var FilterTrue = function() {
const FilterTrue = function() {
};
FilterTrue.prototype.match = function() {
@ -297,7 +297,7 @@ registerFilterClass(FilterTrue);
/******************************************************************************/
var FilterPlain = function(s, tokenBeg) {
const FilterPlain = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
};
@ -330,7 +330,7 @@ registerFilterClass(FilterPlain);
/******************************************************************************/
var FilterPlainPrefix0 = function(s) {
const FilterPlainPrefix0 = function(s) {
this.s = s;
};
@ -362,7 +362,7 @@ registerFilterClass(FilterPlainPrefix0);
/******************************************************************************/
var FilterPlainPrefix1 = function(s) {
const FilterPlainPrefix1 = function(s) {
this.s = s;
};
@ -394,7 +394,7 @@ registerFilterClass(FilterPlainPrefix1);
/******************************************************************************/
var FilterPlainHostname = function(s) {
const FilterPlainHostname = function(s) {
this.s = s;
};
@ -429,7 +429,7 @@ registerFilterClass(FilterPlainHostname);
/******************************************************************************/
var FilterPlainLeftAnchored = function(s) {
const FilterPlainLeftAnchored = function(s) {
this.s = s;
};
@ -461,7 +461,7 @@ registerFilterClass(FilterPlainLeftAnchored);
/******************************************************************************/
var FilterPlainRightAnchored = function(s) {
const FilterPlainRightAnchored = function(s) {
this.s = s;
};
@ -493,7 +493,7 @@ registerFilterClass(FilterPlainRightAnchored);
/******************************************************************************/
var FilterExactMatch = function(s) {
const FilterExactMatch = function(s) {
this.s = s;
};
@ -525,7 +525,7 @@ registerFilterClass(FilterExactMatch);
/******************************************************************************/
var FilterPlainHnAnchored = function(s) {
const FilterPlainHnAnchored = function(s) {
this.s = s;
};
@ -558,7 +558,7 @@ registerFilterClass(FilterPlainHnAnchored);
/******************************************************************************/
var FilterGeneric = function(s, anchor) {
const FilterGeneric = function(s, anchor) {
this.s = s;
this.anchor = anchor;
};
@ -603,7 +603,7 @@ registerFilterClass(FilterGeneric);
/******************************************************************************/
var FilterGenericHnAnchored = function(s) {
const FilterGenericHnAnchored = function(s) {
this.s = s;
};
@ -642,7 +642,7 @@ registerFilterClass(FilterGenericHnAnchored);
/******************************************************************************/
var FilterGenericHnAndRightAnchored = function(s) {
const FilterGenericHnAndRightAnchored = function(s) {
FilterGenericHnAnchored.call(this, s);
};
@ -682,7 +682,7 @@ registerFilterClass(FilterGenericHnAndRightAnchored);
/******************************************************************************/
var FilterRegex = function(s) {
const FilterRegex = function(s) {
this.re = s;
};
@ -723,7 +723,7 @@ registerFilterClass(FilterRegex);
// Filtering according to the origin.
var FilterOrigin = function() {
const FilterOrigin = function() {
};
FilterOrigin.prototype.wrapped = {
@ -766,7 +766,7 @@ FilterOrigin.prototype.compile = function() {
// *** start of specialized origin matchers
var FilterOriginHit = function(domainOpt) {
const FilterOriginHit = function(domainOpt) {
FilterOrigin.call(this);
this.hostname = domainOpt;
};
@ -792,7 +792,7 @@ FilterOriginHit.prototype = Object.create(FilterOrigin.prototype, {
//
var FilterOriginMiss = function(domainOpt) {
const FilterOriginMiss = function(domainOpt) {
FilterOrigin.call(this);
this.hostname = domainOpt.slice(1);
};
@ -811,14 +811,15 @@ FilterOriginMiss.prototype = Object.create(FilterOrigin.prototype, {
var needle = this.hostname, haystack = pageHostnameRegister;
if ( haystack.endsWith(needle) === false ) { return true; }
var offset = haystack.length - needle.length;
return offset !== 0 && haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */;
return offset !== 0 &&
haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */;
}
},
});
//
var FilterOriginHitSet = function(domainOpt) {
const FilterOriginHitSet = function(domainOpt) {
FilterOrigin.call(this);
this.domainOpt = domainOpt.length < 128
? domainOpt
@ -840,17 +841,17 @@ FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, {
},
matchOrigin: {
value: function() {
if ( this.oneOf === null ) {
this.oneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt);
if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
this.oneOf = hnTrieManager.fromDomainOpt(this.domainOpt);
}
return this.oneOf.matches(pageHostnameRegister);
return this.oneOf.matches(pageHostnameRegister) === 1;
}
},
});
//
var FilterOriginMissSet = function(domainOpt) {
const FilterOriginMissSet = function(domainOpt) {
FilterOrigin.call(this);
this.domainOpt = domainOpt.length < 128
? domainOpt
@ -872,17 +873,19 @@ FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, {
},
matchOrigin: {
value: function() {
if ( this.noneOf === null ) {
this.noneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt.replace(/~/g, ''));
if ( hnTrieManager.isValidRef(this.noneOf) === false ) {
this.noneOf = hnTrieManager.fromDomainOpt(
this.domainOpt.replace(/~/g, '')
);
}
return this.noneOf.matches(pageHostnameRegister) === false;
return this.noneOf.matches(pageHostnameRegister) === 0;
}
},
});
//
var FilterOriginMixedSet = function(domainOpt) {
const FilterOriginMixedSet = function(domainOpt) {
FilterOrigin.call(this);
this.domainOpt = domainOpt.length < 128
? domainOpt
@ -903,20 +906,16 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
},
init: {
value: function() {
var oneOf = [], noneOf = [],
hostnames = this.domainOpt.split('|'),
i = hostnames.length,
hostname;
while ( i-- ) {
hostname = hostnames[i];
let oneOf = [], noneOf = [];
for ( let hostname of this.domainOpt.split('|') ) {
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
noneOf.push(hostname.slice(1));
} else {
oneOf.push(hostname);
}
}
this.oneOf = HNTrieBuilder.fromIterable(oneOf);
this.noneOf = HNTrieBuilder.fromIterable(noneOf);
this.oneOf = hnTrieManager.fromIterable(oneOf);
this.noneOf = hnTrieManager.fromIterable(noneOf);
}
},
toDomainOpt: {
@ -926,10 +925,12 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
},
matchOrigin: {
value: function() {
if ( this.oneOf === null ) { this.init(); }
var needle = pageHostnameRegister;
return this.oneOf.matches(needle) &&
this.noneOf.matches(needle) === false;
if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
this.init();
}
let needle = pageHostnameRegister;
return this.oneOf.matches(needle) === 1 &&
this.noneOf.matches(needle) === 0;
}
},
});
@ -981,7 +982,7 @@ registerFilterClass(FilterOrigin);
/******************************************************************************/
var FilterDataHolder = function(dataType, dataStr) {
const FilterDataHolder = function(dataType, dataStr) {
this.dataType = dataType;
this.dataStr = dataStr;
this.wrapped = undefined;
@ -1024,7 +1025,7 @@ registerFilterClass(FilterDataHolder);
// Helper class for storing instances of FilterDataHolder.
var FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) {
const FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) {
this.categoryBits = categoryBits;
this.tokenHash = tokenHash;
this.filter = filterFromCompiledData(fdata);
@ -1047,7 +1048,7 @@ FilterDataHolderEntry.load = function(data) {
// Dictionary of hostnames
//
var FilterHostnameDict = function() {
const FilterHostnameDict = function() {
this.h = ''; // short-lived register
this.dict = new Set();
};
@ -1138,7 +1139,7 @@ registerFilterClass(FilterHostnameDict);
/******************************************************************************/
var FilterPair = function(a, b) {
const FilterPair = function(a, b) {
this.f1 = a;
this.f2 = b;
this.f = null;
@ -1217,7 +1218,7 @@ registerFilterClass(FilterPair);
/******************************************************************************/
var FilterBucket = function(a, b, c) {
const FilterBucket = function(a, b, c) {
this.filters = [];
this.f = null;
if ( a !== undefined ) {
@ -1315,7 +1316,7 @@ registerFilterClass(FilterBucket);
/******************************************************************************/
/******************************************************************************/
var FilterParser = function() {
const FilterParser = function() {
this.cantWebsocket = vAPI.cantWebsocket;
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
@ -1933,7 +1934,7 @@ FilterParser.prototype.makeToken = function() {
/******************************************************************************/
/******************************************************************************/
var FilterContainer = function() {
const FilterContainer = function() {
this.reIsGeneric = /[\^\*]/;
this.filterParser = new FilterParser();
this.urlTokenizer = µb.urlTokenizer;
@ -1960,6 +1961,9 @@ FilterContainer.prototype.reset = function() {
this.dataFilters = new Map();
this.filterParser.reset();
// This will invalidate all hn tries throughout uBO:
hnTrieManager.reset();
// Runtime registers
this.cbRegister = undefined;
this.thRegister = undefined;
@ -2052,6 +2056,15 @@ FilterContainer.prototype.freeze = function() {
/******************************************************************************/
// This is necessary for when the filtering engine readiness will depend
// on asynchronous operations (ex.: when loading a wasm module).
FilterContainer.prototype.readyToUse = function() {
return hnTrieManager.readyToUse();
};
/******************************************************************************/
FilterContainer.prototype.toSelfie = function() {
let categoriesToSelfie = function(categoryMap) {
let selfie = [];
@ -2250,7 +2263,7 @@ FilterContainer.prototype.compileToAtomicFilter = function(
// Only static filter with an explicit type can be redirected. If we reach
// this point, it's because there is one or more explicit type.
if ( parsed.badFilter === false && parsed.redirect ) {
if ( parsed.redirect ) {
let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
if ( Array.isArray(redirects) ) {
for ( let redirect of redirects ) {
@ -2292,26 +2305,24 @@ FilterContainer.prototype.fromCompiledContent = function(reader) {
FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) {
if ( this.dataFilters.length === 0 ) { return; }
var url = this.urlTokenizer.setURL(requestURL);
let url = this.urlTokenizer.setURL(requestURL);
requestHostnameRegister = µb.URI.hostnameFromURI(url);
pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url);
// We need to visit ALL the matching filters.
var toAddImportant = new Map(),
let toAddImportant = new Map(),
toAdd = new Map(),
toRemove = new Map();
var entry, f,
tokenHashes = this.urlTokenizer.getTokens(),
tokenHash, tokenOffset,
let tokenHashes = this.urlTokenizer.getTokens(),
i = 0;
while ( i < 32 ) {
tokenHash = tokenHashes[i++];
let tokenHash = tokenHashes[i++];
if ( tokenHash === 0 ) { break; }
tokenOffset = tokenHashes[i++];
entry = this.dataFilters.get(tokenHash);
let tokenOffset = tokenHashes[i++];
let entry = this.dataFilters.get(tokenHash);
while ( entry !== undefined ) {
f = entry.filter;
let f = entry.filter;
if ( f.match(url, tokenOffset) === true ) {
if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry);
@ -2324,9 +2335,9 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
entry = entry.next;
}
}
entry = this.dataFilters.get(this.noTokenHash);
let entry = this.dataFilters.get(this.noTokenHash);
while ( entry !== undefined ) {
f = entry.filter;
let f = entry.filter;
if ( f.match(url) === true ) {
if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry);
@ -2342,12 +2353,11 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; }
// Remove entries overriden by other filters.
var key;
for ( key of toAddImportant.keys() ) {
for ( let key of toAddImportant.keys() ) {
toAdd.delete(key);
toRemove.delete(key);
}
for ( key of toRemove.keys() ) {
for ( let key of toRemove.keys() ) {
if ( key === '' ) {
toAdd.clear();
break;
@ -2355,26 +2365,25 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
toAdd.delete(key);
}
var logData;
for ( entry of toAddImportant ) {
for ( let entry of toAddImportant ) {
out.push(entry[0]);
if ( outlog === undefined ) { continue; }
logData = entry[1].logData();
let logData = entry[1].logData();
logData.source = 'static';
logData.result = 1;
outlog.push(logData);
}
for ( entry of toAdd ) {
for ( let entry of toAdd ) {
out.push(entry[0]);
if ( outlog === undefined ) { continue; }
logData = entry[1].logData();
let logData = entry[1].logData();
logData.source = 'static';
logData.result = 1;
outlog.push(logData);
}
if ( outlog !== undefined ) {
for ( entry of toRemove.values()) {
logData = entry.logData();
for ( let entry of toRemove.values()) {
let logData = entry.logData();
logData.source = 'static';
logData.result = 2;
outlog.push(logData);
@ -2389,20 +2398,19 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
FilterContainer.prototype.matchTokens = function(bucket, url) {
// Hostname-only filters
var f = bucket.get(this.dotTokenHash);
let f = bucket.get(this.dotTokenHash);
if ( f !== undefined && f.match() === true ) {
this.thRegister = this.dotTokenHash;
this.fRegister = f;
return true;
}
var tokenHashes = this.urlTokenizer.getTokens(),
tokenHash, tokenOffset,
let tokenHashes = this.urlTokenizer.getTokens(),
i = 0;
for (;;) {
tokenHash = tokenHashes[i++];
let tokenHash = tokenHashes[i++];
if ( tokenHash === 0 ) { break; }
tokenOffset = tokenHashes[i++];
let tokenOffset = tokenHashes[i++];
f = bucket.get(tokenHash);
if ( f !== undefined && f.match(url, tokenOffset) === true ) {
this.thRegister = tokenHash;
@ -2437,8 +2445,10 @@ FilterContainer.prototype.matchStringGenericHide = function(requestURL) {
let url = this.urlTokenizer.setURL(requestURL);
// https://github.com/gorhill/uBlock/issues/2225
// Important: this is used by FilterHostnameDict.match().
requestHostnameRegister = µb.URI.hostnameFromURI(url);
// Important:
// - `pageHostnameRegister` is used by FilterOrigin.matchOrigin().
// - `requestHostnameRegister` is used by FilterHostnameDict.match().
pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url);
let bucket = this.categories.get(genericHideException);
if ( !bucket || this.matchTokens(bucket, url) === false ) {
@ -2548,7 +2558,7 @@ FilterContainer.prototype.matchString = function(context) {
// https://github.com/chrisaljoudi/uBlock/issues/519
// Use exact type match for anything beyond `other`
// Also, be prepared to support unknown types
var type = typeNameToTypeValue[context.requestType];
let type = typeNameToTypeValue[context.requestType];
if ( type === undefined ) {
type = otherTypeBitValue;
} else if ( type === 0 || type > otherTypeBitValue ) {
@ -2577,7 +2587,7 @@ FilterContainer.prototype.matchString = function(context) {
// filter.
// Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(context.requestURL);
let url = this.urlTokenizer.setURL(context.requestURL);
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
@ -2585,10 +2595,10 @@ FilterContainer.prototype.matchString = function(context) {
this.fRegister = null;
var party = isFirstParty(context.pageDomain, context.requestHostname)
let party = isFirstParty(context.pageDomain, context.requestHostname)
? FirstParty
: ThirdParty;
var categories = this.categories,
let categories = this.categories,
catBits, bucket;
// https://github.com/chrisaljoudi/uBlock/issues/139

View File

@ -604,9 +604,7 @@
µBlock.loadFilterLists = function(callback) {
// Callers are expected to check this first.
if ( this.loadingFilterLists ) {
return;
}
if ( this.loadingFilterLists ) { return; }
this.loadingFilterLists = true;
var µb = this,
@ -961,38 +959,31 @@
/******************************************************************************/
µBlock.loadPublicSuffixList = function(callback) {
var µb = this,
assetKey = µb.pslAssetKey,
compiledAssetKey = 'compiled/' + assetKey;
if ( typeof callback !== 'function' ) {
callback = this.noopFunc;
}
var onRawListLoaded = function(details) {
if ( details.content !== '' ) {
µb.compilePublicSuffixList(details.content);
}
callback();
};
var onCompiledListLoaded = function(details) {
var selfie;
µBlock.loadPublicSuffixList = function() {
return new Promise(resolve => {
// start of executor
this.assets.get('compiled/' + this.pslAssetKey, details => {
let selfie;
try {
selfie = JSON.parse(details.content);
} catch (ex) {
}
if (
selfie === undefined ||
publicSuffixList.fromSelfie(selfie) === false
selfie instanceof Object &&
publicSuffixList.fromSelfie(selfie)
) {
µb.assets.get(assetKey, onRawListLoaded);
resolve();
return;
}
callback();
};
this.assets.get(compiledAssetKey, onCompiledListLoaded);
this.assets.get(this.pslAssetKey, details => {
if ( details.content !== '' ) {
this.compilePublicSuffixList(details.content);
}
resolve();
});
});
// end of executor
});
};
/******************************************************************************/

24
src/js/wasm/README.md Normal file
View File

@ -0,0 +1,24 @@
### For code reviewers
All `wasm` files in that directory where created by compiling the
corresponding `wat` file using the command (using `hntrie.wat`/`hntrie.wasm`
as example):
wat2wasm hntrie.wat -o hntrie.wasm
Assuming:
- The command is executed from within the present directory.
### `wat2wasm` tool
The `wat2wasm` tool can be downloaded from an official WebAssembly project:
<https://github.com/WebAssembly/wabt/releases>.
### `wat2wasm` tool online
You can also use the following online `wat2wasm` tool:
<https://webassembly.github.io/wabt/demo/wat2wasm/>.
Just paste the whole content of the `wat` file to compile into the WAT pane.
Click "Download" button to retrieve the resulting `wasm` file.

BIN
src/js/wasm/hntrie.wasm Normal file

Binary file not shown.

200
src/js/wasm/hntrie.wat Normal file
View File

@ -0,0 +1,200 @@
;;
;; uBlock Origin - a browser extension to block requests.
;; Copyright (C) 2018-present Raymond Hill
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see {http://www.gnu.org/licenses/}.
;;
;; Home: https://github.com/gorhill/uBlock
;; File: hntrie.wat
;; Description: WebAssembly code used by src/js/hntrie.js
;; How to compile: See README.md in this directory.
(module
;;
;; module start
;;
;; (func $log (import "imports" "log") (param i32 i32 i32))
(memory (import "imports" "memory") 1)
;;
;; Public functions
;;
;;
;; unsigned int matches(offset)
;;
;; Test whether the currently set needle matches the trie at specified offset.
;;
;; Memory layout, byte offset:
;; 0-254: encoded needle (ASCII)
;; 255 : needle length
;; 256- : tries
;;
(func (export "matches")
(param $itrie i32)
(result i32) ;; result: 0 = miss, 1 = hit
(local $ineedle i32) ;; current needle offset
(local $nchar i32) ;; needle char being processed
(local $tchar i32) ;; trie char being processed
(local $lxtra i32)
(local $ixtra i32)
i32.const 255
i32.load8_u
set_local $ineedle
loop $nextNeedleChar
;; ineedle -= 1;
get_local $ineedle
i32.const -1
i32.add
tee_local $ineedle
;; let nchar = ineedle === -1 ? 0 : buf[ineedle];
i32.const 0
i32.lt_s
if
i32.const 0
set_local $nchar
else
get_local $ineedle
i32.load8_u
set_local $nchar
end
block $trieCharEqNeedleChar loop $nextTrieChar
;; let tchar = buf[itrie+8];
get_local $itrie
i32.load8_u offset=8
tee_local $tchar
;; if ( tchar === nchar ) { break; }
get_local $nchar
i32.eq
br_if $trieCharEqNeedleChar
;; if ( tchar === 0 && nchar === 0x2E ) { return 1; }
get_local $tchar
i32.eqz
if
get_local $nchar
i32.const 0x2E
i32.eq
if
i32.const 1
return
end
end
;; itrie = buf32[itrie >>> 2];
get_local $itrie
i32.load
tee_local $itrie
;; if ( itrie === 0 ) { return 0; }
i32.eqz
if
i32.const 0
return
end
br $nextTrieChar
end end
;; if ( nchar === 0 ) { return 1; }
get_local $nchar
i32.eqz
if
i32.const 1
return
end
;; let lxtra = buf[itrie+9];
get_local $itrie
i32.load8_u offset=9
tee_local $lxtra
i32.eqz
if else
;; if ( lxtra > ineedle ) { return 0; }
get_local $lxtra
get_local $ineedle
i32.gt_u
if
i32.const 0
return
end
;; let ixtra = itrie + 10;
get_local $itrie
i32.const 10
i32.add
tee_local $ixtra
;; lxtra += ixtra;
get_local $lxtra
i32.add
set_local $lxtra
;; do {
block $noMoreExtraChars loop
;; ineedle -= 1;
get_local $ineedle
i32.const -1
i32.add
tee_local $ineedle
;; if ( buf[ineedle] !== buf[ixtra] ) { return 0; }
i32.load8_u
get_local $ixtra
i32.load8_u
i32.ne
if
i32.const 0
return
end
;; ixtra += 1;
get_local $ixtra
i32.const 1
i32.add
tee_local $ixtra
;; while ( ixtra !== lxtra ) {
get_local $lxtra
i32.eq
br_if $noMoreExtraChars
br 0
end end
end
;; itrie = buf32[itrie + 4 >>> 2];
get_local $itrie
i32.load offset=4
tee_local $itrie
;; if ( itrie === 0 ) {
i32.eqz
if
;; return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0;
get_local $ineedle
i32.eqz
if
i32.const 1
return
end
get_local $ineedle
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const 1
return
end
i32.const 0
return
end
br 0
end
i32.const 0
)
;;
;; module end
;;
)

479
test/hnset-benchmark.html Normal file

File diff suppressed because one or more lines are too long

45866
test/hntrie-test.html Normal file

File diff suppressed because it is too large Load Diff