mirror of
https://github.com/gorhill/uBlock.git
synced 2024-11-06 19:02:30 +01:00
Squashed commit of the following:
commit 7c6cacc59b27660fabacb55d668ef099b222a9e6 Author: Raymond Hill <rhill@raymondhill.net> Date: Sat Nov 3 08:52:51 2018 -0300 code review: finalize support for wasm-based hntrie commit 8596ed80e3bdac2c36e3c860b51e7189f6bc8487 Merge: cbe1f2e000eb82
Author: Raymond Hill <rhill@raymondhill.net> Date: Sat Nov 3 08:41:40 2018 -0300 Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm commit cbe1f2e2f38484d42af3204ec7f1b5decd30f99e Merge: 270fc7fdbb7e80
Author: Raymond Hill <rhill@raymondhill.net> Date: Fri Nov 2 17:43:20 2018 -0300 Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm commit 270fc7f9b3b73d79e6355522c1a42ce782fe7e5c Merge: d2a89cfd693d4f
Author: Raymond Hill <rhill@raymondhill.net> Date: Fri Nov 2 16:21:08 2018 -0300 Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm commit d2a89cf28f0816ffd4617c2c7b4ccfcdcc30e1b4 Merge: d7afc78649f82f
Author: Raymond Hill <rhill@raymondhill.net> Date: Fri Nov 2 14:54:58 2018 -0300 Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm commit d7afc78b5f5675d7d34c5a1d0ec3099a77caef49 Author: Raymond Hill <rhill@raymondhill.net> Date: Fri Nov 2 13:56:11 2018 -0300 finalize wasm-based hntrie implementation commit e7b9e043cf36ad055791713e34eb0322dec84627 Author: Raymond Hill <rhill@raymondhill.net> Date: Fri Nov 2 08:14:02 2018 -0300 add first-pass implementation of wasm version of hntrie commit 1015cb34624f3ef73ace58b58fe4e03dfc59897f Author: Raymond Hill <rhill@raymondhill.net> Date: Wed Oct 31 17:16:47 2018 -0300 back up draft work toward experimenting with wasm hntries
This commit is contained in:
parent
000eb82f08
commit
d7d544cda0
@ -33,12 +33,12 @@ if ( vAPI.webextFlavor === undefined ) {
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var µBlock = (function() { // jshint ignore:line
|
const µBlock = (function() { // jshint ignore:line
|
||||||
|
|
||||||
var oneSecond = 1000,
|
const oneSecond = 1000,
|
||||||
oneMinute = 60 * oneSecond;
|
oneMinute = 60 * oneSecond;
|
||||||
|
|
||||||
var hiddenSettingsDefault = {
|
const hiddenSettingsDefault = {
|
||||||
assetFetchTimeout: 30,
|
assetFetchTimeout: 30,
|
||||||
autoUpdateAssetFetchPeriod: 120,
|
autoUpdateAssetFetchPeriod: 120,
|
||||||
autoUpdatePeriod: 7,
|
autoUpdatePeriod: 7,
|
||||||
@ -56,7 +56,7 @@ var µBlock = (function() { // jshint ignore:line
|
|||||||
userResourcesLocation: 'unset'
|
userResourcesLocation: 'unset'
|
||||||
};
|
};
|
||||||
|
|
||||||
var whitelistDefault = [
|
const whitelistDefault = [
|
||||||
'about-scheme',
|
'about-scheme',
|
||||||
'chrome-extension-scheme',
|
'chrome-extension-scheme',
|
||||||
'chrome-scheme',
|
'chrome-scheme',
|
||||||
|
714
src/js/hntrie.js
714
src/js/hntrie.js
@ -1,7 +1,7 @@
|
|||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
|
|
||||||
uBlock Origin - a browser extension to block requests.
|
uBlock Origin - a browser extension to block requests.
|
||||||
Copyright (C) 2017 Raymond Hill
|
Copyright (C) 2017-present Raymond Hill
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -19,6 +19,9 @@
|
|||||||
Home: https://github.com/gorhill/uBlock
|
Home: https://github.com/gorhill/uBlock
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* globals WebAssembly */
|
||||||
|
/* exported hnTrieManager */
|
||||||
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
@ -37,89 +40,115 @@
|
|||||||
For example, `www.abc.com` is deemed matching `abc.com`, because the former
|
For example, `www.abc.com` is deemed matching `abc.com`, because the former
|
||||||
is a subdomain of the latter. The opposite is of course not true.
|
is a subdomain of the latter. The opposite is of course not true.
|
||||||
|
|
||||||
The resulting read-only trie created as a result of using HNTrieBuilder are
|
The resulting read-only tries created as a result of using hnTrieManager are
|
||||||
simply just typed arrays filled with integers. The matching algorithm is
|
simply just typed arrays filled with integers. The matching algorithm is
|
||||||
just a matter of reading/comparing these integers, and further using them as
|
just a matter of reading/comparing these integers, and further using them as
|
||||||
indices in the array as a way to move around in the trie.
|
indices in the array as a way to move around in the trie.
|
||||||
|
|
||||||
There is still place for optimizations. Specifically, I could force the
|
|
||||||
strings to be properly sorted so that `HNTrie.matches` could bail earlier
|
|
||||||
when trying to find a matching descendant -- but suspect the gain would be
|
|
||||||
marginal, if measurable.
|
|
||||||
|
|
||||||
[1] To solve <https://github.com/gorhill/uBlock/issues/3193>
|
[1] To solve <https://github.com/gorhill/uBlock/issues/3193>
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
var HNTrieBuilder = function() {
|
const hnTrieManager = {
|
||||||
this.reset();
|
tree: null,
|
||||||
};
|
treesz: 0,
|
||||||
|
trie: new Uint8Array(65536),
|
||||||
|
trie32: null,
|
||||||
|
triesz: 256, // bytes 0-254: decoded needle, byte 255: needle length
|
||||||
|
id: 0,
|
||||||
|
needle: '',
|
||||||
|
wasmLoading: null,
|
||||||
|
wasmMemory: null,
|
||||||
|
cleanupToken: 0,
|
||||||
|
cleanupTimer: undefined,
|
||||||
|
|
||||||
/*******************************************************************************
|
reset: function() {
|
||||||
|
if ( this.wasmMemory === null && this.trie.byteLength > 65536 ) {
|
||||||
|
this.trie = new Uint8Array(65536);
|
||||||
|
this.trie32 = new Uint32Array(this.trie.buffer);
|
||||||
|
} else {
|
||||||
|
this.trie.fill(0);
|
||||||
|
}
|
||||||
|
this.triesz = 256;
|
||||||
|
this.needle = '';
|
||||||
|
this.id += 1;
|
||||||
|
},
|
||||||
|
|
||||||
A plain javascript array is used to build the trie. It will be casted into
|
readyToUse: function() {
|
||||||
the appropriate read-only TypedArray[1] at vacuum time.
|
return this.wasmLoading instanceof Promise
|
||||||
|
? this.wasmLoading
|
||||||
|
: Promise.resolve();
|
||||||
|
},
|
||||||
|
|
||||||
[1] Depending on the size: Uint8Array, Uint16Array, or Uint32Array.
|
isValidRef: function(ref) {
|
||||||
|
return ref !== null && ref.id === this.id;
|
||||||
|
},
|
||||||
|
|
||||||
*/
|
setNeedle: function(needle) {
|
||||||
|
if ( needle !== this.needle ) {
|
||||||
HNTrieBuilder.prototype.reset = function() {
|
const buf = this.trie;
|
||||||
this.buf = [];
|
let i = needle.length;
|
||||||
this.bufsz = 0;
|
buf[255] = i;
|
||||||
this.buf[0] = 0;
|
while ( i-- ) {
|
||||||
this.buf[1] = 0;
|
buf[i] = needle.charCodeAt(i);
|
||||||
this.buf[2] = 0;
|
}
|
||||||
|
this.needle = needle;
|
||||||
|
}
|
||||||
return this;
|
return this;
|
||||||
};
|
},
|
||||||
|
|
||||||
/*******************************************************************************
|
matchesJS: function(itrie) {
|
||||||
|
const buf = this.trie;
|
||||||
Helpers for convenience.
|
const buf32 = this.trie32;
|
||||||
|
let ineedle = buf[255];
|
||||||
*/
|
|
||||||
|
|
||||||
HNTrieBuilder.fromDomainOpt = function(domainOpt) {
|
|
||||||
var builder = new HNTrieBuilder();
|
|
||||||
builder.fromDomainOpt(domainOpt);
|
|
||||||
return builder.vacuum();
|
|
||||||
};
|
|
||||||
|
|
||||||
HNTrieBuilder.fromIterable = function(hostnames) {
|
|
||||||
var builder = new HNTrieBuilder();
|
|
||||||
builder.fromIterable(hostnames);
|
|
||||||
return builder.vacuum();
|
|
||||||
};
|
|
||||||
|
|
||||||
HNTrieBuilder.print = function(trie) {
|
|
||||||
var buf = trie.buf,
|
|
||||||
i = 0, cc = [], ic, indent = 0,
|
|
||||||
forks = [];
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if ( buf[i] !== 0 ) {
|
ineedle -= 1;
|
||||||
forks.push(i, indent);
|
const nchar = ineedle === -1 ? 0 : buf[ineedle];
|
||||||
|
for (;;) {
|
||||||
|
const tchar = buf[itrie+8]; // quick test: first character
|
||||||
|
if ( tchar === nchar ) { break; }
|
||||||
|
if ( tchar === 0 && nchar === 0x2E ) { return 1; }
|
||||||
|
itrie = buf32[itrie >>> 2];
|
||||||
|
if ( itrie === 0 ) { return 0; } // no more descendants
|
||||||
}
|
}
|
||||||
cc.unshift(buf[i+2]);
|
if ( nchar === 0 ) { return 1; }
|
||||||
for ( ic = 0; ic < buf[i+3]; ic++ ) {
|
let lxtra = buf[itrie+9]; // length of extra charaters
|
||||||
cc.unshift(buf[i+4+ic]);
|
if ( lxtra !== 0 ) { // cell is only one character
|
||||||
|
if ( lxtra > ineedle ) { return 0; }
|
||||||
|
let ixtra = itrie + 10;
|
||||||
|
lxtra += ixtra;
|
||||||
|
do {
|
||||||
|
ineedle -= 1;
|
||||||
|
if ( buf[ineedle] !== buf[ixtra] ) { return 0; }
|
||||||
|
ixtra += 1;
|
||||||
|
} while ( ixtra !== lxtra );
|
||||||
}
|
}
|
||||||
console.log('\xB7'.repeat(indent) + String.fromCharCode.apply(null, cc));
|
itrie = buf32[itrie + 4 >>> 2];
|
||||||
indent += cc.length;
|
if ( itrie === 0 ) {
|
||||||
cc = [];
|
return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0;
|
||||||
i = buf[i+1];
|
|
||||||
if ( i === 0 ) {
|
|
||||||
if ( forks.length === 0 ) { break; }
|
|
||||||
indent = forks.pop();
|
|
||||||
i = forks.pop();
|
|
||||||
i = buf[i];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
},
|
||||||
|
matchesWASM: null,
|
||||||
|
matches: null,
|
||||||
|
|
||||||
/*******************************************************************************
|
start: function() {
|
||||||
|
if ( this.trie32 === null ) {
|
||||||
|
this.trie32 = new Uint32Array(this.trie.buffer);
|
||||||
|
}
|
||||||
|
this.treesz = 0;
|
||||||
|
if ( this.tree === null ) {
|
||||||
|
this.tree = new Uint32Array(16384);
|
||||||
|
}
|
||||||
|
this.tree[0] = 0;
|
||||||
|
this.tree[1] = 0;
|
||||||
|
this.tree[2] = 0;
|
||||||
|
},
|
||||||
|
|
||||||
Since this trie is specialized for matching hostnames, the stored strings are
|
/***************************************************************************
|
||||||
reversed internally, because of hostname comparison logic:
|
|
||||||
|
Since this trie is specialized for matching hostnames, the stored
|
||||||
|
strings are reversed internally, because of hostname comparison logic:
|
||||||
|
|
||||||
Correct matching:
|
Correct matching:
|
||||||
index 0123456
|
index 0123456
|
||||||
@ -137,87 +166,57 @@ HNTrieBuilder.print = function(trie) {
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HNTrieBuilder.prototype.add = function(hn) {
|
add: function(hn) {
|
||||||
var ichar = hn.length - 1;
|
// 256 * 3 + 3 = 771
|
||||||
|
if ( this.treesz + 771 >= this.tree.length ) {
|
||||||
|
this.growTree();
|
||||||
|
}
|
||||||
|
let ichar = hn.length - 1;
|
||||||
if ( ichar === -1 ) { return; }
|
if ( ichar === -1 ) { return; }
|
||||||
var c = hn.charCodeAt(ichar),
|
let c = hn.charCodeAt(ichar),
|
||||||
i = 0, inext;
|
i = 0, inext;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if ( this.buf[i+2] !== c ) { // match not found
|
if ( this.tree[i+2] !== c ) { // match not found
|
||||||
inext = this.buf[i]; // move to descendant
|
inext = this.tree[i]; // move to descendant
|
||||||
if ( inext === 0 ) { break; } // no descendant
|
if ( inext === 0 ) { break; } // no descendant
|
||||||
} else { // match found
|
} else { // match found
|
||||||
if ( c === 0 ) { return; }
|
if ( c === 0 ) { return; }
|
||||||
inext = this.buf[i+1]; // move to sibling
|
inext = this.tree[i+1]; // move to sibling
|
||||||
ichar -= 1;
|
ichar -= 1;
|
||||||
c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
|
c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
|
||||||
}
|
}
|
||||||
i = inext;
|
i = inext;
|
||||||
}
|
}
|
||||||
// Any new string added will always cause a new descendant to be created.
|
// Any new string added will always cause a new descendant to be
|
||||||
// The only time this is not the case is when trying to store a string
|
// created. The only time this is not the case is when trying to
|
||||||
// which is already in the trie.
|
// store a string which is already in the trie.
|
||||||
inext = this.bufsz; // new descendant cell
|
inext = this.treesz; // new descendant cell
|
||||||
this.buf[i] = inext;
|
this.tree[i] = inext;
|
||||||
this.buf[inext+0] = 0; // jump index to descendant
|
this.tree[inext+0] = 0; // jump index to descendant
|
||||||
this.buf[inext+1] = 0; // jump index to sibling
|
this.tree[inext+1] = 0; // jump index to sibling
|
||||||
this.buf[inext+2] = c; // character code
|
this.tree[inext+2] = c; // character code
|
||||||
this.bufsz += 3;
|
this.treesz += 3;
|
||||||
if ( c === 0 ) { return; } // character zero is always last cell
|
if ( c === 0 ) { return; } // character zero is always last cell
|
||||||
do {
|
do {
|
||||||
i = inext; // new branch sprouting made from
|
i = inext; // new branch sprouting made from
|
||||||
ichar -= 1; // all characters left to store
|
ichar -= 1; // all characters left to store
|
||||||
c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
|
c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
|
||||||
inext = this.bufsz;
|
inext = this.treesz;
|
||||||
this.buf[i+1] = inext;
|
this.tree[i+1] = inext;
|
||||||
this.buf[inext+0] = 0;
|
this.tree[inext+0] = 0;
|
||||||
this.buf[inext+1] = 0;
|
this.tree[inext+1] = 0;
|
||||||
this.buf[inext+2] = c;
|
this.tree[inext+2] = c;
|
||||||
this.bufsz += 3;
|
this.treesz += 3;
|
||||||
} while ( c!== 0 );
|
} while ( c!== 0 );
|
||||||
};
|
},
|
||||||
|
|
||||||
/*******************************************************************************
|
growTree: function() {
|
||||||
|
let tree = new Uint32Array(this.tree.length + 16384);
|
||||||
|
tree.set(this.tree);
|
||||||
|
this.tree = tree;
|
||||||
|
},
|
||||||
|
|
||||||
Not using String.split('|') to avoid memory churning.
|
/***************************************************************************
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
HNTrieBuilder.prototype.fromDomainOpt = function(hostnames) {
|
|
||||||
return this.fromIterable(hostnames.split('|'));
|
|
||||||
};
|
|
||||||
|
|
||||||
HNTrieBuilder.prototype.fromIterable = function(hostnames) {
|
|
||||||
var hns = Array.from(hostnames).sort(function(a, b) {
|
|
||||||
return a.length - b.length;
|
|
||||||
});
|
|
||||||
// https://github.com/gorhill/uBlock/issues/3328
|
|
||||||
// Must sort from shortest to longest.
|
|
||||||
for ( var hn of hns ) {
|
|
||||||
this.add(hn);
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
};
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
HNTrieBuilder.prototype.matches = function(needle) {
|
|
||||||
var ichar = needle.length - 1,
|
|
||||||
buf = this.buf, i = 0, c;
|
|
||||||
for (;;) {
|
|
||||||
c = ichar === -1 ? 0 : needle.charCodeAt(ichar);
|
|
||||||
while ( buf[i+2] !== c ) {
|
|
||||||
i = buf[i];
|
|
||||||
if ( i === 0 ) { return false; }
|
|
||||||
}
|
|
||||||
if ( c === 0 ) { return true; }
|
|
||||||
i = buf[i+1];
|
|
||||||
if ( i === 0 ) { return c === 0x2E; }
|
|
||||||
ichar -= 1;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*******************************************************************************
|
|
||||||
|
|
||||||
Before vacuuming, each cell is 3 entry-long:
|
Before vacuuming, each cell is 3 entry-long:
|
||||||
- Jump index to descendant (if any)
|
- Jump index to descendant (if any)
|
||||||
@ -225,9 +224,9 @@ HNTrieBuilder.prototype.matches = function(needle) {
|
|||||||
- character code
|
- character code
|
||||||
|
|
||||||
All strings stored in the un-vacuumed trie are zero-terminated, and the
|
All strings stored in the un-vacuumed trie are zero-terminated, and the
|
||||||
character zero does occupy a cell like any other character. Let's use _ to
|
character zero does occupy a cell like any other character. Let's
|
||||||
represent character zero for sake of comments. The asterisk will be used to
|
use _ to represent character zero for sake of comments. The asterisk
|
||||||
highlight a node with a descendant.
|
will be used to highlight a node with a descendant.
|
||||||
|
|
||||||
Cases, before vacuuming:
|
Cases, before vacuuming:
|
||||||
|
|
||||||
@ -251,14 +250,14 @@ HNTrieBuilder.prototype.matches = function(needle) {
|
|||||||
_ -- b -- . -- c -- o -- m
|
_ -- b -- . -- c -- o -- m
|
||||||
_ -- a
|
_ -- a
|
||||||
|
|
||||||
Vacuuming is the process of merging sibling cells with no descendants. Cells
|
Vacuuming is the process of merging sibling cells with no descendants.
|
||||||
with descendants can't be merged.
|
Cells with descendants can't be merged.
|
||||||
|
|
||||||
Each time we arrive at the end of a horizontal branch (sibling jump index is
|
Each time we arrive at the end of a horizontal branch (sibling jump
|
||||||
0), we walk back to the nearest previous node with descendants, and repeat
|
index is 0), we walk back to the nearest previous node with descendants,
|
||||||
the process. Since there is no index information on where to come back, a
|
and repeat the process. Since there is no index information on where to
|
||||||
stack is used to remember cells with descendants (descendant jump index is
|
come back, a stack is used to remember cells with descendants (descendant
|
||||||
non zero) encountered on the way
|
jump index is non zero) encountered on the way
|
||||||
|
|
||||||
After vacuuming, each cell is 4+n entry-long:
|
After vacuuming, each cell is 4+n entry-long:
|
||||||
- Jump index to descendant (if any)
|
- Jump index to descendant (if any)
|
||||||
@ -292,40 +291,55 @@ HNTrieBuilder.prototype.matches = function(needle) {
|
|||||||
|
|
||||||
It's not possible for a character zero cell to have next siblings.
|
It's not possible for a character zero cell to have next siblings.
|
||||||
|
|
||||||
This will have to be taken into account during both vacuuming and matching.
|
This will have to be taken into account during both vacuuming and
|
||||||
|
matching.
|
||||||
|
|
||||||
Character zero cells with no descendant are discarded during vacuuming.
|
Character zero cells with no descendant are discarded during vacuuming.
|
||||||
Character zero cells with a descendant, or character zero cells which are a
|
Character zero cells with a descendant, or character zero cells which
|
||||||
decendant are kept into the vacuumed trie.
|
are a decendant are kept into the vacuumed trie.
|
||||||
|
|
||||||
A vacuumed trie is very efficient memory- and lookup-wise, but is also
|
A vacuumed trie is very efficient memory- and lookup-wise, but is also
|
||||||
read-only: no string can be added or removed. The read-only trie is really
|
read-only: no string can be added or removed. The read-only trie is
|
||||||
just a self-sufficient array of integers, and can easily be exported/imported
|
really just a self-sufficient array of integers, and can easily be
|
||||||
as a JSON array. It is theoretically possible to "decompile" a trie (vacuumed
|
exported/imported as a JSON array. It is theoretically possible to
|
||||||
or not) into the set of strings originally added to it (in the order they
|
"decompile" a trie (vacuumed or not) into the set of strings originally
|
||||||
were added with the current implementation), but so far I do not need this
|
added to it (in the order they were added with the current
|
||||||
feature.
|
implementation), but so far I do not need this feature.
|
||||||
|
|
||||||
TODO: It's possible to build the vacuumed trie on the fly as items are
|
New vacuum output array format:
|
||||||
added to it. I need to carefully list all possible cases which can arise
|
byte 0..2: offset to descendant
|
||||||
at insertion time. The benefits will be: faster creation time (expected), no
|
byte 3..5: offset to sibling
|
||||||
longer read-only trie (items can be added at any time).
|
byte 6: first character
|
||||||
|
byte 7: number of extra characters
|
||||||
|
Offset & count values are little-endian.
|
||||||
|
|
||||||
|
3 + 3 + 1 + 1 = 8 bytes for one character, otherwise
|
||||||
|
3 + 3 + 1 + 1 + n = 8 + n bytes for one + n character(s)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HNTrieBuilder.prototype.vacuum = function() {
|
finish: function() {
|
||||||
if ( this.bufsz === 0 ) { return null; }
|
if ( this.treesz === 0 ) { return null; }
|
||||||
var input = this.buf,
|
const input = this.tree,
|
||||||
output = [], outsz = 0,
|
iout0 = this.triesz,
|
||||||
forks = [],
|
forks = [];
|
||||||
iin = 0, iout;
|
let output = this.trie,
|
||||||
|
output32 = this.trie32,
|
||||||
|
iout1 = iout0,
|
||||||
|
iout2 = output.byteLength,
|
||||||
|
iin = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
iout = outsz;
|
if ( (iout1 + 266) >= iout2 ) {
|
||||||
output[iout+0] = 0;
|
this.growTrie();
|
||||||
output[iout+1] = 0;
|
output = this.trie;
|
||||||
output[iout+2] = input[iin+2]; // first character
|
output32 = this.trie32;
|
||||||
output[iout+3] = 0;
|
iout2 = output.byteLength;
|
||||||
outsz += 4;
|
}
|
||||||
|
let iout = iout1;
|
||||||
|
output32[iout >>> 2] = 0;
|
||||||
|
output32[iout + 4 >>> 2] = 0;
|
||||||
|
output[iout+8] = input[iin+2]; // first character
|
||||||
|
output[iout+9] = 0; // extra character count
|
||||||
|
iout1 += 10;
|
||||||
if ( input[iin] !== 0 ) { // cell with descendant
|
if ( input[iin] !== 0 ) { // cell with descendant
|
||||||
forks.push(iout, iin); // defer processing
|
forks.push(iout, iin); // defer processing
|
||||||
}
|
}
|
||||||
@ -334,264 +348,184 @@ HNTrieBuilder.prototype.vacuum = function() {
|
|||||||
if ( iin === 0 ) { break; } // no more sibling cell
|
if ( iin === 0 ) { break; } // no more sibling cell
|
||||||
if ( input[iin] !== 0 ) { break; } // cell with a descendant
|
if ( input[iin] !== 0 ) { break; } // cell with a descendant
|
||||||
if ( input[iin+2] === 0 ) { break; } // don't merge \x00
|
if ( input[iin+2] === 0 ) { break; } // don't merge \x00
|
||||||
output[outsz] = input[iin+2]; // add character data
|
output[iout1] = input[iin+2]; // add character data
|
||||||
outsz += 1;
|
iout1 += 1;
|
||||||
}
|
}
|
||||||
if ( outsz !== iout + 4 ) { // cells were merged
|
if ( iout1 !== iout + 10 ) { // cells were merged
|
||||||
output[iout+3] = outsz - iout - 4; // so adjust count
|
output[iout+9] = iout1 - iout - 10; // so adjust count
|
||||||
}
|
}
|
||||||
|
iout1 = (iout1 + 3) & ~3; // align to i32
|
||||||
if ( iin !== 0 && input[iin] !== 0 ) { // can't merge this cell
|
if ( iin !== 0 && input[iin] !== 0 ) { // can't merge this cell
|
||||||
output[iout+1] = outsz;
|
output32[iout + 4 >>> 2] = iout1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ( forks.length === 0 ) { break; } // no more descendants: bye
|
if ( forks.length === 0 ) { break; } // no more descendants: bye
|
||||||
iin = forks.pop(); // process next descendant
|
iin = forks.pop(); // process next descendant
|
||||||
iout = forks.pop();
|
iout = forks.pop();
|
||||||
iin = input[iin];
|
iin = input[iin];
|
||||||
output[iout] = outsz;
|
output32[iout >>> 2] = iout1;
|
||||||
}
|
}
|
||||||
var trie; // pick optimal read-only
|
this.triesz = iout1;
|
||||||
if ( outsz < 256 ) { // container array.
|
this.cleanupAsync();
|
||||||
trie = new this.HNTrie8(output, outsz);
|
return new HNTrieRef(iout0);
|
||||||
} else if ( outsz < 65536 ) {
|
},
|
||||||
trie = new this.HNTrie16(output, outsz);
|
|
||||||
|
fromIterable: function(hostnames) {
|
||||||
|
this.start();
|
||||||
|
const hns = Array.from(hostnames).sort(function(a, b) {
|
||||||
|
return a.length - b.length;
|
||||||
|
});
|
||||||
|
// https://github.com/gorhill/uBlock/issues/3328
|
||||||
|
// Must sort from shortest to longest.
|
||||||
|
for ( let hn of hns ) {
|
||||||
|
this.add(hn);
|
||||||
|
}
|
||||||
|
return this.finish();
|
||||||
|
},
|
||||||
|
|
||||||
|
fromDomainOpt: function(hostnames) {
|
||||||
|
return this.fromIterable(hostnames.split('|'));
|
||||||
|
},
|
||||||
|
|
||||||
|
growTrie: function() {
|
||||||
|
let trie;
|
||||||
|
if ( this.wasmMemory === null ) {
|
||||||
|
trie = new Uint8Array(this.trie.byteLength + 65536);
|
||||||
|
trie.set(this.trie);
|
||||||
} else {
|
} else {
|
||||||
trie = new this.HNTrie32(output, outsz);
|
this.wasmMemory.grow(1);
|
||||||
|
trie = new Uint8Array(this.wasmMemory.buffer);
|
||||||
}
|
}
|
||||||
this.reset(); // free working array
|
this.trie = trie;
|
||||||
return trie;
|
this.trie32 = new Uint32Array(this.trie.buffer);
|
||||||
};
|
},
|
||||||
|
|
||||||
/*******************************************************************************
|
cleanupAsync: function() {
|
||||||
|
if ( this.cleanupTimer === undefined ) {
|
||||||
The following internal classes are the actual output of the vacuum() method.
|
this.cleanupToken = this.triesz;
|
||||||
|
this.cleanupTimer = setTimeout(( ) => {
|
||||||
They use the minimal amount of data to be able to efficiently lookup strings
|
this.cleanupTimer = undefined;
|
||||||
in a read-only trie.
|
if ( this.cleanupToken !== this.triesz ) {
|
||||||
|
this.cleanupAsync();
|
||||||
Given that javascript optimizers mind that the type of an argument passed to
|
} else {
|
||||||
a function always stays the same each time the function is called, there need
|
this.tree = null;
|
||||||
to be three separate implementation of matches() to allow the javascript
|
}
|
||||||
optimizer to do its job.
|
}, 30000);
|
||||||
|
}
|
||||||
The matching code deals only with looking up values in a TypedArray (beside
|
},
|
||||||
calls to String.charCodeAt), so I expect this to be fast and good candidate
|
|
||||||
for optimization by javascript engines.
|
|
||||||
|
|
||||||
|
// For debugging purpose
|
||||||
|
// TODO: currently broken, needs to be fixed as per new buffer format.
|
||||||
|
/*
|
||||||
|
print: function(offset) {
|
||||||
|
let i = offset, cc = [], indent = 0,
|
||||||
|
forks = [];
|
||||||
|
for (;;) {
|
||||||
|
if ( buf[i] !== 0 ) {
|
||||||
|
forks.push(i, indent);
|
||||||
|
}
|
||||||
|
cc.unshift(buf[i+2]);
|
||||||
|
for ( let ic = 0; ic < buf[i+3]; ic++ ) {
|
||||||
|
cc.unshift(buf[i+4+ic]);
|
||||||
|
}
|
||||||
|
console.log('\xB7'.repeat(indent) + String.fromCharCode.apply(null, cc));
|
||||||
|
indent += cc.length;
|
||||||
|
cc = [];
|
||||||
|
i = buf[i+1];
|
||||||
|
if ( i === 0 ) {
|
||||||
|
if ( forks.length === 0 ) { break; }
|
||||||
|
indent = forks.pop();
|
||||||
|
i = forks.pop();
|
||||||
|
i = buf[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HNTrieBuilder.prototype.HNTrie8 = function(buf, bufsz) {
|
|
||||||
this.buf = new Uint8Array(buf.slice(0, bufsz));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
HNTrieBuilder.prototype.HNTrie8.prototype.matches = function(needle) {
|
/******************************************************************************/
|
||||||
var ichar = needle.length,
|
|
||||||
i = 0, c1, c2, ccnt, ic, i1, i2;
|
|
||||||
for (;;) {
|
|
||||||
ichar -= 1;
|
|
||||||
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
|
|
||||||
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
|
|
||||||
if ( c2 === 0 && c1 === 0x2E ) { return true; }
|
|
||||||
i = this.buf[i]; // next descendant
|
|
||||||
if ( i === 0 ) { return false; } // no more descendants
|
|
||||||
}
|
|
||||||
if ( c1 === 0 ) { return true; }
|
|
||||||
ccnt = this.buf[i+3];
|
|
||||||
if ( ccnt !== 0 ) { // cell is only one character
|
|
||||||
if ( ccnt > ichar ) { return false; }
|
|
||||||
ic = ccnt; i1 = ichar-1; i2 = i+4;
|
|
||||||
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
|
|
||||||
if ( ic !== -1 ) { return false; }
|
|
||||||
ichar -= ccnt;
|
|
||||||
}
|
|
||||||
i = this.buf[i+1]; // next sibling
|
|
||||||
if ( i === 0 ) {
|
|
||||||
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
HNTrieBuilder.prototype.HNTrie16 = function(buf, bufsz) {
|
(function() {
|
||||||
this.buf = new Uint16Array(buf.slice(0, bufsz));
|
// Default to javascript version.
|
||||||
};
|
hnTrieManager.matches = hnTrieManager.matchesJS;
|
||||||
|
|
||||||
HNTrieBuilder.prototype.HNTrie16.prototype.matches = function(needle) {
|
if (
|
||||||
var ichar = needle.length,
|
typeof WebAssembly !== 'object' ||
|
||||||
i = 0, c1, c2, ccnt, ic, i1, i2;
|
typeof WebAssembly.instantiateStreaming !== 'function'
|
||||||
for (;;) {
|
) {
|
||||||
ichar -= 1;
|
return;
|
||||||
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
|
|
||||||
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
|
|
||||||
if ( c2 === 0 && c1 === 0x2E ) { return true; }
|
|
||||||
i = this.buf[i]; // next descendant
|
|
||||||
if ( i === 0 ) { return false; } // no more descendants
|
|
||||||
}
|
}
|
||||||
if ( c1 === 0 ) { return true; }
|
|
||||||
ccnt = this.buf[i+3];
|
|
||||||
if ( ccnt !== 0 ) { // cell is only one character
|
|
||||||
if ( ccnt > ichar ) { return false; }
|
|
||||||
ic = ccnt; i1 = ichar-1; i2 = i+4;
|
|
||||||
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
|
|
||||||
if ( ic !== -1 ) { return false; }
|
|
||||||
ichar -= ccnt;
|
|
||||||
}
|
|
||||||
i = this.buf[i+1]; // next sibling
|
|
||||||
if ( i === 0 ) {
|
|
||||||
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
HNTrieBuilder.prototype.HNTrie32 = function(buf, bufsz) {
|
// Soft-dependency on vAPI so that the code here can be used outside of
|
||||||
this.buf = new Uint32Array(buf.slice(0, bufsz));
|
// uBO (i.e. tests, benchmarks)
|
||||||
};
|
if (
|
||||||
|
typeof vAPI === 'object' &&
|
||||||
HNTrieBuilder.prototype.HNTrie32.prototype.matches = function(needle) {
|
vAPI.webextFlavor.soup.has('firefox') === false
|
||||||
var ichar = needle.length,
|
) {
|
||||||
i = 0, c1, c2, ccnt, ic, i1, i2;
|
return;
|
||||||
for (;;) {
|
|
||||||
ichar -= 1;
|
|
||||||
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
|
|
||||||
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
|
|
||||||
if ( c2 === 0 && c1 === 0x2E ) { return true; }
|
|
||||||
i = this.buf[i]; // next descendant
|
|
||||||
if ( i === 0 ) { return false; } // no more descendants
|
|
||||||
}
|
}
|
||||||
if ( c1 === 0 ) { return true; }
|
|
||||||
ccnt = this.buf[i+3];
|
|
||||||
if ( ccnt !== 0 ) { // cell is only one character
|
|
||||||
if ( ccnt > ichar ) { return false; }
|
|
||||||
ic = ccnt; i1 = ichar-1; i2 = i+4;
|
|
||||||
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
|
|
||||||
if ( ic !== -1 ) { return false; }
|
|
||||||
ichar -= ccnt;
|
|
||||||
}
|
|
||||||
i = this.buf[i+1]; // next sibling
|
|
||||||
if ( i === 0 ) {
|
|
||||||
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*******************************************************************************
|
// The wasm module will work only if CPU is natively little-endian,
|
||||||
|
// as we use native uint32 array in our trie-creation js code.
|
||||||
|
const uint32s = new Uint32Array(1);
|
||||||
|
const uint8s = new Uint8Array(uint32s.buffer);
|
||||||
|
uint32s[0] = 1;
|
||||||
|
if ( uint8s[0] !== 1 ) { return; }
|
||||||
|
|
||||||
Experimenting: WebAssembly version.
|
let workingDir;
|
||||||
Developed using this simple online tool: https://wasdk.github.io/WasmFiddle/
|
|
||||||
|
|
||||||
>>> start of C code
|
|
||||||
unsigned short buffer[0];
|
|
||||||
int matches(int id, int cclen)
|
|
||||||
{
|
{
|
||||||
unsigned short* cc0 = &buffer[0];
|
const url = document.currentScript.src;
|
||||||
unsigned short* cc = cc0 + cclen;
|
const match = /[^\/]+$/.exec(url);
|
||||||
unsigned short* cell0 = &buffer[512+id];
|
workingDir = match !== null
|
||||||
unsigned short* cell = cell0;
|
? url.slice(0, match.index)
|
||||||
unsigned short* ww;
|
: '';
|
||||||
int c1, c2, ccnt;
|
|
||||||
for (;;) {
|
|
||||||
c1 = cc <= cc0 ? 0 : *--cc;
|
|
||||||
for (;;) {
|
|
||||||
c2 = cell[2];
|
|
||||||
if ( c2 == c1 ) { break; }
|
|
||||||
if ( c2 == 0 && c1 == 0x2E ) { return 1; }
|
|
||||||
if ( cell[0] == 0 ) { return 0; }
|
|
||||||
cell = cell0 + cell[0];
|
|
||||||
}
|
}
|
||||||
if ( c1 == 0 ) { return 1; }
|
|
||||||
ccnt = cell[3];
|
|
||||||
if ( ccnt != 0 ) {
|
|
||||||
if ( cc - ccnt < cc0 ) { return 0; }
|
|
||||||
ww = cell + 4;
|
|
||||||
while ( ccnt-- ) {
|
|
||||||
if ( *--cc != *ww++ ) { return 0; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( cell[1] == 0 ) {
|
|
||||||
if ( cc == cc0 ) { return 1; }
|
|
||||||
if ( *--cc == 0x2E ) { return 1; }
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
cell = cell0 + cell[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int getLinearMemoryOffset() {
|
|
||||||
return (int)&buffer[0];
|
|
||||||
}
|
|
||||||
<<< end of C code
|
|
||||||
|
|
||||||
Observations:
|
const memory = new WebAssembly.Memory({ initial: 1 });
|
||||||
- When growing memory, we must re-create the typed array js-side. The content
|
|
||||||
of the array is preserved by grow().
|
|
||||||
- It's slower than the javascript version... Possible explanations:
|
|
||||||
- Call overhead: https://github.com/WebAssembly/design/issues/1120
|
|
||||||
- Having to copy whole input string in buffer before call.
|
|
||||||
|
|
||||||
var HNTrie16wasm = (function() {
|
hnTrieManager.wasmLoading = WebAssembly.instantiateStreaming(
|
||||||
var module;
|
fetch(workingDir + 'wasm/hntrie.wasm', { mode: 'same-origin' }),
|
||||||
var instance;
|
{ imports: { memory } }
|
||||||
var memory;
|
).then(result => {
|
||||||
var memoryOrigin = 0;
|
hnTrieManager.wasmLoading = null;
|
||||||
var memoryUsed = 1024;
|
if ( !result || !result.instance ) { return; }
|
||||||
var cbuffer;
|
const pageCount = hnTrieManager.trie.byteLength >>> 16;
|
||||||
var tbuffer;
|
if ( pageCount > 1 ) {
|
||||||
var tbufferSize = 0;
|
memory.grow(pageCount - 1);
|
||||||
var matchesFn;
|
|
||||||
|
|
||||||
var init = function() {
|
|
||||||
module = new WebAssembly.Module(new Uint8Array([0,97,115,109,1,0,0,0,1,139,128,128,128,0,2,96,2,127,127,1,127,96,0,1,127,3,131,128,128,128,0,2,0,1,4,132,128,128,128,0,1,112,0,0,5,131,128,128,128,0,1,0,1,6,129,128,128,128,0,0,7,172,128,128,128,0,3,6,109,101,109,111,114,121,2,0,7,109,97,116,99,104,101,115,0,0,21,103,101,116,76,105,110,101,97,114,77,101,109,111,114,121,79,102,102,115,101,116,0,1,10,217,130,128,128,0,2,202,130,128,128,0,1,5,127,32,1,65,1,116,65,12,106,33,3,32,0,65,1,116,65,140,8,106,34,2,33,0,2,64,2,64,2,64,2,64,2,64,2,64,3,64,65,0,33,5,2,64,32,3,65,12,77,13,0,32,3,65,126,106,34,3,47,1,0,33,5,11,2,64,32,5,32,0,47,1,4,34,1,70,13,0,2,64,32,5,65,46,71,13,0,3,64,32,1,65,255,255,3,113,69,13,5,32,0,47,1,0,34,1,69,13,6,32,2,32,1,65,1,116,106,34,0,47,1,4,34,1,65,46,71,13,0,12,2,11,11,3,64,32,0,47,1,0,34,1,69,13,3,32,5,32,2,32,1,65,1,116,106,34,0,47,1,4,71,13,0,11,11,65,1,33,6,32,5,69,13,5,2,64,2,64,32,0,47,1,6,34,1,69,13,0,32,3,32,1,65,1,116,107,65,12,73,13,8,32,1,65,127,115,33,5,32,0,65,8,106,33,1,3,64,32,5,65,1,106,34,5,69,13,1,32,1,47,1,0,33,4,32,1,65,2,106,33,1,32,4,32,3,65,126,106,34,3,47,1,0,70,13,0,12,2,11,11,32,0,47,1,2,34,1,69,13,5,32,2,32,1,65,1,116,106,33,0,12,1,11,11,65,0,15,11,65,0,15,11,65,1,15,11,65,0,15,11,32,3,65,12,70,13,0,32,3,65,126,106,47,1,0,65,46,70,33,6,11,32,6,15,11,65,0,11,132,128,128,128,0,0,65,12,11]));
|
|
||||||
instance = new WebAssembly.Instance(module);
|
|
||||||
memory = instance.exports.memory;
|
|
||||||
memoryOrigin = instance.exports.getLinearMemoryOffset();
|
|
||||||
cbuffer = new Uint16Array(memory.buffer, memoryOrigin, 512);
|
|
||||||
tbuffer = new Uint16Array(memory.buffer, memoryOrigin + 1024);
|
|
||||||
memoryUsed = memoryOrigin + 1024;
|
|
||||||
matchesFn = instance.exports.matches;
|
|
||||||
};
|
|
||||||
|
|
||||||
return {
|
|
||||||
create: function(data) {
|
|
||||||
if ( module === undefined ) { init(); }
|
|
||||||
var bytesNeeded = memoryUsed + ((data.length * 2 + 3) & ~3);
|
|
||||||
if ( bytesNeeded > memory.buffer.byteLength ) {
|
|
||||||
memory.grow((bytesNeeded - memory.buffer.byteLength + 65535) >>> 16);
|
|
||||||
cbuffer = new Uint16Array(memory.buffer, memoryOrigin, 512);
|
|
||||||
tbuffer = new Uint16Array(memory.buffer, memoryOrigin + 1024);
|
|
||||||
}
|
}
|
||||||
for ( var i = 0, j = tbufferSize; i < data.length; i++, j++ ) {
|
const trie = new Uint8Array(memory.buffer);
|
||||||
tbuffer[j] = data[i];
|
trie.set(hnTrieManager.trie);
|
||||||
|
hnTrieManager.trie = trie;
|
||||||
|
if ( hnTrieManager.trie32 !== null ) {
|
||||||
|
hnTrieManager.trie32 = new Uint32Array(memory.buffer);
|
||||||
}
|
}
|
||||||
var id = tbufferSize;
|
hnTrieManager.wasmMemory = memory;
|
||||||
tbufferSize += data.length;
|
hnTrieManager.matchesWASM = result.instance.exports.matches;
|
||||||
if ( tbufferSize & 1 ) { tbufferSize += 1; }
|
hnTrieManager.matches = hnTrieManager.matchesWASM;
|
||||||
memoryUsed += tbufferSize * 2;
|
}).catch(reason => {
|
||||||
return id;
|
hnTrieManager.wasmLoading = null;
|
||||||
},
|
console.error(reason);
|
||||||
reset: function() {
|
});
|
||||||
module = undefined;
|
|
||||||
instance = undefined;
|
|
||||||
memory = undefined;
|
|
||||||
memory.grow(1);
|
|
||||||
memoryUsed = 1024;
|
|
||||||
cbuffer = undefined;
|
|
||||||
tbuffer = undefined;
|
|
||||||
tbufferSize = 0;
|
|
||||||
},
|
|
||||||
matches: function(id, hn) {
|
|
||||||
var len = hn.length;
|
|
||||||
if ( len > 512 ) {
|
|
||||||
hn = hn.slice(-512);
|
|
||||||
var pos = hn.indexOf('.');
|
|
||||||
if ( pos !== 0 ) {
|
|
||||||
hn = hn.slice(pos + 1);
|
|
||||||
}
|
|
||||||
len = hn.length;
|
|
||||||
}
|
|
||||||
var needle = cbuffer, i = len;
|
|
||||||
while ( i-- ) {
|
|
||||||
needle[i] = hn.charCodeAt(i);
|
|
||||||
}
|
|
||||||
return matchesFn(id, len) === 1;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
})();
|
})();
|
||||||
*/
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
const HNTrieRef = function(offset) {
|
||||||
|
this.id = hnTrieManager.id;
|
||||||
|
this.offset = offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
HNTrieRef.prototype = {
|
||||||
|
isValid: function() {
|
||||||
|
return this.id === hnTrieManager.id;
|
||||||
|
},
|
||||||
|
matches: function(needle) {
|
||||||
|
return hnTrieManager.setNeedle(needle).matches(this.offset);
|
||||||
|
},
|
||||||
|
matchesJS: function(needle) {
|
||||||
|
return hnTrieManager.setNeedle(needle).matchesJS(this.offset);
|
||||||
|
},
|
||||||
|
matchesWASM: function(needle) {
|
||||||
|
return hnTrieManager.setNeedle(needle).matchesWASM(this.offset);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var µb = µBlock;
|
const µb = µBlock;
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
@ -287,7 +287,12 @@ var onFirstFetchReady = function(fetched) {
|
|||||||
onVersionReady(fetched.version);
|
onVersionReady(fetched.version);
|
||||||
onCommandShortcutsReady(fetched.commandShortcuts);
|
onCommandShortcutsReady(fetched.commandShortcuts);
|
||||||
|
|
||||||
µb.loadPublicSuffixList(onPSLReady);
|
Promise.all([
|
||||||
|
µb.loadPublicSuffixList(),
|
||||||
|
µb.staticNetFilteringEngine.readyToUse()
|
||||||
|
]).then(( ) => {
|
||||||
|
onPSLReady();
|
||||||
|
});
|
||||||
µb.loadRedirectResources();
|
µb.loadRedirectResources();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* jshint bitwise: false */
|
/* jshint bitwise: false */
|
||||||
/* global punycode, HNTrieBuilder */
|
/* global punycode, hnTrieManager */
|
||||||
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
@ -30,7 +30,7 @@
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var µb = µBlock;
|
const µb = µBlock;
|
||||||
|
|
||||||
// fedcba9876543210
|
// fedcba9876543210
|
||||||
// | | |||
|
// | | |||
|
||||||
@ -43,15 +43,15 @@ var µb = µBlock;
|
|||||||
// | +-------- bit 4- 8: type [0 - 31]
|
// | +-------- bit 4- 8: type [0 - 31]
|
||||||
// +------------- bit 9-15: unused
|
// +------------- bit 9-15: unused
|
||||||
|
|
||||||
var BlockAction = 0 << 0;
|
const BlockAction = 0 << 0;
|
||||||
var AllowAction = 1 << 0;
|
const AllowAction = 1 << 0;
|
||||||
var Important = 1 << 1;
|
const Important = 1 << 1;
|
||||||
var AnyParty = 0 << 2;
|
const AnyParty = 0 << 2;
|
||||||
var FirstParty = 1 << 2;
|
const FirstParty = 1 << 2;
|
||||||
var ThirdParty = 2 << 2;
|
const ThirdParty = 2 << 2;
|
||||||
|
|
||||||
var AnyType = 0 << 4;
|
const AnyType = 0 << 4;
|
||||||
var typeNameToTypeValue = {
|
const typeNameToTypeValue = {
|
||||||
'no_type': 0 << 4,
|
'no_type': 0 << 4,
|
||||||
'stylesheet': 1 << 4,
|
'stylesheet': 1 << 4,
|
||||||
'image': 2 << 4,
|
'image': 2 << 4,
|
||||||
@ -75,9 +75,9 @@ var typeNameToTypeValue = {
|
|||||||
'webrtc': 19 << 4,
|
'webrtc': 19 << 4,
|
||||||
'unsupported': 20 << 4
|
'unsupported': 20 << 4
|
||||||
};
|
};
|
||||||
var otherTypeBitValue = typeNameToTypeValue.other;
|
const otherTypeBitValue = typeNameToTypeValue.other;
|
||||||
|
|
||||||
var typeValueToTypeName = {
|
const typeValueToTypeName = {
|
||||||
1: 'stylesheet',
|
1: 'stylesheet',
|
||||||
2: 'image',
|
2: 'image',
|
||||||
3: 'object',
|
3: 'object',
|
||||||
@ -100,15 +100,15 @@ var typeValueToTypeName = {
|
|||||||
20: 'unsupported'
|
20: 'unsupported'
|
||||||
};
|
};
|
||||||
|
|
||||||
var BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
|
const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
|
||||||
var BlockAnyType = BlockAction | AnyType;
|
const BlockAnyType = BlockAction | AnyType;
|
||||||
var BlockAnyParty = BlockAction | AnyParty;
|
const BlockAnyParty = BlockAction | AnyParty;
|
||||||
|
|
||||||
var AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
|
const AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
|
||||||
var AllowAnyType = AllowAction | AnyType;
|
const AllowAnyType = AllowAction | AnyType;
|
||||||
var AllowAnyParty = AllowAction | AnyParty;
|
const AllowAnyParty = AllowAction | AnyParty;
|
||||||
|
|
||||||
var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide,
|
const genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide,
|
||||||
genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important;
|
genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important;
|
||||||
|
|
||||||
// ABP filters: https://adblockplus.org/en/filters
|
// ABP filters: https://adblockplus.org/en/filters
|
||||||
@ -119,7 +119,7 @@ var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generich
|
|||||||
// See the following as short-lived registers, used during evaluation. They are
|
// See the following as short-lived registers, used during evaluation. They are
|
||||||
// valid until the next evaluation.
|
// valid until the next evaluation.
|
||||||
|
|
||||||
var pageHostnameRegister = '',
|
let pageHostnameRegister = '',
|
||||||
requestHostnameRegister = '';
|
requestHostnameRegister = '';
|
||||||
//var filterRegister = null;
|
//var filterRegister = null;
|
||||||
//var categoryRegister = '';
|
//var categoryRegister = '';
|
||||||
@ -127,13 +127,13 @@ var pageHostnameRegister = '',
|
|||||||
// Local helpers
|
// Local helpers
|
||||||
|
|
||||||
// Be sure to not confuse 'example.com' with 'anotherexample.com'
|
// Be sure to not confuse 'example.com' with 'anotherexample.com'
|
||||||
var isFirstParty = function(domain, hostname) {
|
const isFirstParty = function(domain, hostname) {
|
||||||
return hostname.endsWith(domain) &&
|
return hostname.endsWith(domain) &&
|
||||||
(hostname.length === domain.length ||
|
(hostname.length === domain.length ||
|
||||||
hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */);
|
hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */);
|
||||||
};
|
};
|
||||||
|
|
||||||
var normalizeRegexSource = function(s) {
|
const normalizeRegexSource = function(s) {
|
||||||
try {
|
try {
|
||||||
var re = new RegExp(s);
|
var re = new RegExp(s);
|
||||||
return re.source;
|
return re.source;
|
||||||
@ -143,12 +143,12 @@ var normalizeRegexSource = function(s) {
|
|||||||
return '';
|
return '';
|
||||||
};
|
};
|
||||||
|
|
||||||
var rawToRegexStr = function(s, anchor) {
|
const rawToRegexStr = function(s, anchor) {
|
||||||
var me = rawToRegexStr;
|
let me = rawToRegexStr;
|
||||||
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
|
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
|
||||||
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
|
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
|
||||||
// Also: remove leading/trailing wildcards -- there is no point.
|
// Also: remove leading/trailing wildcards -- there is no point.
|
||||||
var reStr = s.replace(me.escape1, '\\$&')
|
let reStr = s.replace(me.escape1, '\\$&')
|
||||||
.replace(me.escape2, '(?:[^%.0-9a-z_-]|$)')
|
.replace(me.escape2, '(?:[^%.0-9a-z_-]|$)')
|
||||||
.replace(me.escape3, '')
|
.replace(me.escape3, '')
|
||||||
.replace(me.escape4, '[^ ]*?');
|
.replace(me.escape4, '[^ ]*?');
|
||||||
@ -175,7 +175,7 @@ rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
|
|||||||
|
|
||||||
const filterDataSerialize = µb.CompiledLineIO.serialize;
|
const filterDataSerialize = µb.CompiledLineIO.serialize;
|
||||||
|
|
||||||
var toLogDataInternal = function(categoryBits, tokenHash, filter) {
|
const toLogDataInternal = function(categoryBits, tokenHash, filter) {
|
||||||
if ( filter === null ) { return undefined; }
|
if ( filter === null ) { return undefined; }
|
||||||
let logData = filter.logData();
|
let logData = filter.logData();
|
||||||
logData.compiled = filterDataSerialize([
|
logData.compiled = filterDataSerialize([
|
||||||
@ -209,7 +209,7 @@ var toLogDataInternal = function(categoryBits, tokenHash, filter) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// First character of match must be within the hostname part of the url.
|
// First character of match must be within the hostname part of the url.
|
||||||
var isHnAnchored = function(url, matchStart) {
|
const isHnAnchored = function(url, matchStart) {
|
||||||
var hnStart = url.indexOf('://');
|
var hnStart = url.indexOf('://');
|
||||||
if ( hnStart === -1 ) { return false; }
|
if ( hnStart === -1 ) { return false; }
|
||||||
hnStart += 3;
|
hnStart += 3;
|
||||||
@ -222,9 +222,9 @@ var isHnAnchored = function(url, matchStart) {
|
|||||||
return url.charCodeAt(matchStart - 1) === 0x2E;
|
return url.charCodeAt(matchStart - 1) === 0x2E;
|
||||||
};
|
};
|
||||||
|
|
||||||
var reURLPostHostnameAnchors = /[\/?#]/;
|
const reURLPostHostnameAnchors = /[\/?#]/;
|
||||||
|
|
||||||
var arrayStrictEquals = function(a, b) {
|
const arrayStrictEquals = function(a, b) {
|
||||||
var n = a.length;
|
var n = a.length;
|
||||||
if ( n !== b.length ) { return false; }
|
if ( n !== b.length ) { return false; }
|
||||||
var isArray, x, y;
|
var isArray, x, y;
|
||||||
@ -251,22 +251,22 @@ var arrayStrictEquals = function(a, b) {
|
|||||||
|
|
||||||
**/
|
**/
|
||||||
|
|
||||||
var filterClasses = [],
|
const filterClasses = [];
|
||||||
filterClassIdGenerator = 0;
|
let filterClassIdGenerator = 0;
|
||||||
|
|
||||||
var registerFilterClass = function(ctor) {
|
const registerFilterClass = function(ctor) {
|
||||||
var fid = filterClassIdGenerator++;
|
let fid = filterClassIdGenerator++;
|
||||||
ctor.fid = ctor.prototype.fid = fid;
|
ctor.fid = ctor.prototype.fid = fid;
|
||||||
filterClasses[fid] = ctor;
|
filterClasses[fid] = ctor;
|
||||||
};
|
};
|
||||||
|
|
||||||
var filterFromCompiledData = function(args) {
|
const filterFromCompiledData = function(args) {
|
||||||
return filterClasses[args[0]].load(args);
|
return filterClasses[args[0]].load(args);
|
||||||
};
|
};
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterTrue = function() {
|
const FilterTrue = function() {
|
||||||
};
|
};
|
||||||
|
|
||||||
FilterTrue.prototype.match = function() {
|
FilterTrue.prototype.match = function() {
|
||||||
@ -297,7 +297,7 @@ registerFilterClass(FilterTrue);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPlain = function(s, tokenBeg) {
|
const FilterPlain = function(s, tokenBeg) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
this.tokenBeg = tokenBeg;
|
this.tokenBeg = tokenBeg;
|
||||||
};
|
};
|
||||||
@ -330,7 +330,7 @@ registerFilterClass(FilterPlain);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPlainPrefix0 = function(s) {
|
const FilterPlainPrefix0 = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -362,7 +362,7 @@ registerFilterClass(FilterPlainPrefix0);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPlainPrefix1 = function(s) {
|
const FilterPlainPrefix1 = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -394,7 +394,7 @@ registerFilterClass(FilterPlainPrefix1);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPlainHostname = function(s) {
|
const FilterPlainHostname = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -429,7 +429,7 @@ registerFilterClass(FilterPlainHostname);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPlainLeftAnchored = function(s) {
|
const FilterPlainLeftAnchored = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -461,7 +461,7 @@ registerFilterClass(FilterPlainLeftAnchored);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPlainRightAnchored = function(s) {
|
const FilterPlainRightAnchored = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -493,7 +493,7 @@ registerFilterClass(FilterPlainRightAnchored);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterExactMatch = function(s) {
|
const FilterExactMatch = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -525,7 +525,7 @@ registerFilterClass(FilterExactMatch);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPlainHnAnchored = function(s) {
|
const FilterPlainHnAnchored = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -558,7 +558,7 @@ registerFilterClass(FilterPlainHnAnchored);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterGeneric = function(s, anchor) {
|
const FilterGeneric = function(s, anchor) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
this.anchor = anchor;
|
this.anchor = anchor;
|
||||||
};
|
};
|
||||||
@ -603,7 +603,7 @@ registerFilterClass(FilterGeneric);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterGenericHnAnchored = function(s) {
|
const FilterGenericHnAnchored = function(s) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -642,7 +642,7 @@ registerFilterClass(FilterGenericHnAnchored);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterGenericHnAndRightAnchored = function(s) {
|
const FilterGenericHnAndRightAnchored = function(s) {
|
||||||
FilterGenericHnAnchored.call(this, s);
|
FilterGenericHnAnchored.call(this, s);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -682,7 +682,7 @@ registerFilterClass(FilterGenericHnAndRightAnchored);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterRegex = function(s) {
|
const FilterRegex = function(s) {
|
||||||
this.re = s;
|
this.re = s;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -723,7 +723,7 @@ registerFilterClass(FilterRegex);
|
|||||||
|
|
||||||
// Filtering according to the origin.
|
// Filtering according to the origin.
|
||||||
|
|
||||||
var FilterOrigin = function() {
|
const FilterOrigin = function() {
|
||||||
};
|
};
|
||||||
|
|
||||||
FilterOrigin.prototype.wrapped = {
|
FilterOrigin.prototype.wrapped = {
|
||||||
@ -766,7 +766,7 @@ FilterOrigin.prototype.compile = function() {
|
|||||||
|
|
||||||
// *** start of specialized origin matchers
|
// *** start of specialized origin matchers
|
||||||
|
|
||||||
var FilterOriginHit = function(domainOpt) {
|
const FilterOriginHit = function(domainOpt) {
|
||||||
FilterOrigin.call(this);
|
FilterOrigin.call(this);
|
||||||
this.hostname = domainOpt;
|
this.hostname = domainOpt;
|
||||||
};
|
};
|
||||||
@ -792,7 +792,7 @@ FilterOriginHit.prototype = Object.create(FilterOrigin.prototype, {
|
|||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
var FilterOriginMiss = function(domainOpt) {
|
const FilterOriginMiss = function(domainOpt) {
|
||||||
FilterOrigin.call(this);
|
FilterOrigin.call(this);
|
||||||
this.hostname = domainOpt.slice(1);
|
this.hostname = domainOpt.slice(1);
|
||||||
};
|
};
|
||||||
@ -811,14 +811,15 @@ FilterOriginMiss.prototype = Object.create(FilterOrigin.prototype, {
|
|||||||
var needle = this.hostname, haystack = pageHostnameRegister;
|
var needle = this.hostname, haystack = pageHostnameRegister;
|
||||||
if ( haystack.endsWith(needle) === false ) { return true; }
|
if ( haystack.endsWith(needle) === false ) { return true; }
|
||||||
var offset = haystack.length - needle.length;
|
var offset = haystack.length - needle.length;
|
||||||
return offset !== 0 && haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */;
|
return offset !== 0 &&
|
||||||
|
haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
var FilterOriginHitSet = function(domainOpt) {
|
const FilterOriginHitSet = function(domainOpt) {
|
||||||
FilterOrigin.call(this);
|
FilterOrigin.call(this);
|
||||||
this.domainOpt = domainOpt.length < 128
|
this.domainOpt = domainOpt.length < 128
|
||||||
? domainOpt
|
? domainOpt
|
||||||
@ -840,17 +841,17 @@ FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, {
|
|||||||
},
|
},
|
||||||
matchOrigin: {
|
matchOrigin: {
|
||||||
value: function() {
|
value: function() {
|
||||||
if ( this.oneOf === null ) {
|
if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
|
||||||
this.oneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt);
|
this.oneOf = hnTrieManager.fromDomainOpt(this.domainOpt);
|
||||||
}
|
}
|
||||||
return this.oneOf.matches(pageHostnameRegister);
|
return this.oneOf.matches(pageHostnameRegister) === 1;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
var FilterOriginMissSet = function(domainOpt) {
|
const FilterOriginMissSet = function(domainOpt) {
|
||||||
FilterOrigin.call(this);
|
FilterOrigin.call(this);
|
||||||
this.domainOpt = domainOpt.length < 128
|
this.domainOpt = domainOpt.length < 128
|
||||||
? domainOpt
|
? domainOpt
|
||||||
@ -872,17 +873,19 @@ FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, {
|
|||||||
},
|
},
|
||||||
matchOrigin: {
|
matchOrigin: {
|
||||||
value: function() {
|
value: function() {
|
||||||
if ( this.noneOf === null ) {
|
if ( hnTrieManager.isValidRef(this.noneOf) === false ) {
|
||||||
this.noneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt.replace(/~/g, ''));
|
this.noneOf = hnTrieManager.fromDomainOpt(
|
||||||
|
this.domainOpt.replace(/~/g, '')
|
||||||
|
);
|
||||||
}
|
}
|
||||||
return this.noneOf.matches(pageHostnameRegister) === false;
|
return this.noneOf.matches(pageHostnameRegister) === 0;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
var FilterOriginMixedSet = function(domainOpt) {
|
const FilterOriginMixedSet = function(domainOpt) {
|
||||||
FilterOrigin.call(this);
|
FilterOrigin.call(this);
|
||||||
this.domainOpt = domainOpt.length < 128
|
this.domainOpt = domainOpt.length < 128
|
||||||
? domainOpt
|
? domainOpt
|
||||||
@ -903,20 +906,16 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
|
|||||||
},
|
},
|
||||||
init: {
|
init: {
|
||||||
value: function() {
|
value: function() {
|
||||||
var oneOf = [], noneOf = [],
|
let oneOf = [], noneOf = [];
|
||||||
hostnames = this.domainOpt.split('|'),
|
for ( let hostname of this.domainOpt.split('|') ) {
|
||||||
i = hostnames.length,
|
|
||||||
hostname;
|
|
||||||
while ( i-- ) {
|
|
||||||
hostname = hostnames[i];
|
|
||||||
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
|
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
|
||||||
noneOf.push(hostname.slice(1));
|
noneOf.push(hostname.slice(1));
|
||||||
} else {
|
} else {
|
||||||
oneOf.push(hostname);
|
oneOf.push(hostname);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.oneOf = HNTrieBuilder.fromIterable(oneOf);
|
this.oneOf = hnTrieManager.fromIterable(oneOf);
|
||||||
this.noneOf = HNTrieBuilder.fromIterable(noneOf);
|
this.noneOf = hnTrieManager.fromIterable(noneOf);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
toDomainOpt: {
|
toDomainOpt: {
|
||||||
@ -926,10 +925,12 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
|
|||||||
},
|
},
|
||||||
matchOrigin: {
|
matchOrigin: {
|
||||||
value: function() {
|
value: function() {
|
||||||
if ( this.oneOf === null ) { this.init(); }
|
if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
|
||||||
var needle = pageHostnameRegister;
|
this.init();
|
||||||
return this.oneOf.matches(needle) &&
|
}
|
||||||
this.noneOf.matches(needle) === false;
|
let needle = pageHostnameRegister;
|
||||||
|
return this.oneOf.matches(needle) === 1 &&
|
||||||
|
this.noneOf.matches(needle) === 0;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@ -981,7 +982,7 @@ registerFilterClass(FilterOrigin);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterDataHolder = function(dataType, dataStr) {
|
const FilterDataHolder = function(dataType, dataStr) {
|
||||||
this.dataType = dataType;
|
this.dataType = dataType;
|
||||||
this.dataStr = dataStr;
|
this.dataStr = dataStr;
|
||||||
this.wrapped = undefined;
|
this.wrapped = undefined;
|
||||||
@ -1024,7 +1025,7 @@ registerFilterClass(FilterDataHolder);
|
|||||||
|
|
||||||
// Helper class for storing instances of FilterDataHolder.
|
// Helper class for storing instances of FilterDataHolder.
|
||||||
|
|
||||||
var FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) {
|
const FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) {
|
||||||
this.categoryBits = categoryBits;
|
this.categoryBits = categoryBits;
|
||||||
this.tokenHash = tokenHash;
|
this.tokenHash = tokenHash;
|
||||||
this.filter = filterFromCompiledData(fdata);
|
this.filter = filterFromCompiledData(fdata);
|
||||||
@ -1047,7 +1048,7 @@ FilterDataHolderEntry.load = function(data) {
|
|||||||
|
|
||||||
// Dictionary of hostnames
|
// Dictionary of hostnames
|
||||||
//
|
//
|
||||||
var FilterHostnameDict = function() {
|
const FilterHostnameDict = function() {
|
||||||
this.h = ''; // short-lived register
|
this.h = ''; // short-lived register
|
||||||
this.dict = new Set();
|
this.dict = new Set();
|
||||||
};
|
};
|
||||||
@ -1138,7 +1139,7 @@ registerFilterClass(FilterHostnameDict);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterPair = function(a, b) {
|
const FilterPair = function(a, b) {
|
||||||
this.f1 = a;
|
this.f1 = a;
|
||||||
this.f2 = b;
|
this.f2 = b;
|
||||||
this.f = null;
|
this.f = null;
|
||||||
@ -1217,7 +1218,7 @@ registerFilterClass(FilterPair);
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterBucket = function(a, b, c) {
|
const FilterBucket = function(a, b, c) {
|
||||||
this.filters = [];
|
this.filters = [];
|
||||||
this.f = null;
|
this.f = null;
|
||||||
if ( a !== undefined ) {
|
if ( a !== undefined ) {
|
||||||
@ -1315,7 +1316,7 @@ registerFilterClass(FilterBucket);
|
|||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterParser = function() {
|
const FilterParser = function() {
|
||||||
this.cantWebsocket = vAPI.cantWebsocket;
|
this.cantWebsocket = vAPI.cantWebsocket;
|
||||||
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
|
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
|
||||||
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
|
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
|
||||||
@ -1933,7 +1934,7 @@ FilterParser.prototype.makeToken = function() {
|
|||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterContainer = function() {
|
const FilterContainer = function() {
|
||||||
this.reIsGeneric = /[\^\*]/;
|
this.reIsGeneric = /[\^\*]/;
|
||||||
this.filterParser = new FilterParser();
|
this.filterParser = new FilterParser();
|
||||||
this.urlTokenizer = µb.urlTokenizer;
|
this.urlTokenizer = µb.urlTokenizer;
|
||||||
@ -1960,6 +1961,9 @@ FilterContainer.prototype.reset = function() {
|
|||||||
this.dataFilters = new Map();
|
this.dataFilters = new Map();
|
||||||
this.filterParser.reset();
|
this.filterParser.reset();
|
||||||
|
|
||||||
|
// This will invalidate all hn tries throughout uBO:
|
||||||
|
hnTrieManager.reset();
|
||||||
|
|
||||||
// Runtime registers
|
// Runtime registers
|
||||||
this.cbRegister = undefined;
|
this.cbRegister = undefined;
|
||||||
this.thRegister = undefined;
|
this.thRegister = undefined;
|
||||||
@ -2052,6 +2056,15 @@ FilterContainer.prototype.freeze = function() {
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
// This is necessary for when the filtering engine readiness will depend
|
||||||
|
// on asynchronous operations (ex.: when loading a wasm module).
|
||||||
|
|
||||||
|
FilterContainer.prototype.readyToUse = function() {
|
||||||
|
return hnTrieManager.readyToUse();
|
||||||
|
};
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
FilterContainer.prototype.toSelfie = function() {
|
FilterContainer.prototype.toSelfie = function() {
|
||||||
let categoriesToSelfie = function(categoryMap) {
|
let categoriesToSelfie = function(categoryMap) {
|
||||||
let selfie = [];
|
let selfie = [];
|
||||||
@ -2250,7 +2263,7 @@ FilterContainer.prototype.compileToAtomicFilter = function(
|
|||||||
|
|
||||||
// Only static filter with an explicit type can be redirected. If we reach
|
// Only static filter with an explicit type can be redirected. If we reach
|
||||||
// this point, it's because there is one or more explicit type.
|
// this point, it's because there is one or more explicit type.
|
||||||
if ( parsed.badFilter === false && parsed.redirect ) {
|
if ( parsed.redirect ) {
|
||||||
let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
|
let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
|
||||||
if ( Array.isArray(redirects) ) {
|
if ( Array.isArray(redirects) ) {
|
||||||
for ( let redirect of redirects ) {
|
for ( let redirect of redirects ) {
|
||||||
@ -2292,26 +2305,24 @@ FilterContainer.prototype.fromCompiledContent = function(reader) {
|
|||||||
FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) {
|
FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) {
|
||||||
if ( this.dataFilters.length === 0 ) { return; }
|
if ( this.dataFilters.length === 0 ) { return; }
|
||||||
|
|
||||||
var url = this.urlTokenizer.setURL(requestURL);
|
let url = this.urlTokenizer.setURL(requestURL);
|
||||||
|
|
||||||
requestHostnameRegister = µb.URI.hostnameFromURI(url);
|
pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url);
|
||||||
|
|
||||||
// We need to visit ALL the matching filters.
|
// We need to visit ALL the matching filters.
|
||||||
var toAddImportant = new Map(),
|
let toAddImportant = new Map(),
|
||||||
toAdd = new Map(),
|
toAdd = new Map(),
|
||||||
toRemove = new Map();
|
toRemove = new Map();
|
||||||
|
|
||||||
var entry, f,
|
let tokenHashes = this.urlTokenizer.getTokens(),
|
||||||
tokenHashes = this.urlTokenizer.getTokens(),
|
|
||||||
tokenHash, tokenOffset,
|
|
||||||
i = 0;
|
i = 0;
|
||||||
while ( i < 32 ) {
|
while ( i < 32 ) {
|
||||||
tokenHash = tokenHashes[i++];
|
let tokenHash = tokenHashes[i++];
|
||||||
if ( tokenHash === 0 ) { break; }
|
if ( tokenHash === 0 ) { break; }
|
||||||
tokenOffset = tokenHashes[i++];
|
let tokenOffset = tokenHashes[i++];
|
||||||
entry = this.dataFilters.get(tokenHash);
|
let entry = this.dataFilters.get(tokenHash);
|
||||||
while ( entry !== undefined ) {
|
while ( entry !== undefined ) {
|
||||||
f = entry.filter;
|
let f = entry.filter;
|
||||||
if ( f.match(url, tokenOffset) === true ) {
|
if ( f.match(url, tokenOffset) === true ) {
|
||||||
if ( entry.categoryBits & 0x001 ) {
|
if ( entry.categoryBits & 0x001 ) {
|
||||||
toRemove.set(f.dataStr, entry);
|
toRemove.set(f.dataStr, entry);
|
||||||
@ -2324,9 +2335,9 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
|
|||||||
entry = entry.next;
|
entry = entry.next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
entry = this.dataFilters.get(this.noTokenHash);
|
let entry = this.dataFilters.get(this.noTokenHash);
|
||||||
while ( entry !== undefined ) {
|
while ( entry !== undefined ) {
|
||||||
f = entry.filter;
|
let f = entry.filter;
|
||||||
if ( f.match(url) === true ) {
|
if ( f.match(url) === true ) {
|
||||||
if ( entry.categoryBits & 0x001 ) {
|
if ( entry.categoryBits & 0x001 ) {
|
||||||
toRemove.set(f.dataStr, entry);
|
toRemove.set(f.dataStr, entry);
|
||||||
@ -2342,12 +2353,11 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
|
|||||||
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; }
|
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; }
|
||||||
|
|
||||||
// Remove entries overriden by other filters.
|
// Remove entries overriden by other filters.
|
||||||
var key;
|
for ( let key of toAddImportant.keys() ) {
|
||||||
for ( key of toAddImportant.keys() ) {
|
|
||||||
toAdd.delete(key);
|
toAdd.delete(key);
|
||||||
toRemove.delete(key);
|
toRemove.delete(key);
|
||||||
}
|
}
|
||||||
for ( key of toRemove.keys() ) {
|
for ( let key of toRemove.keys() ) {
|
||||||
if ( key === '' ) {
|
if ( key === '' ) {
|
||||||
toAdd.clear();
|
toAdd.clear();
|
||||||
break;
|
break;
|
||||||
@ -2355,26 +2365,25 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
|
|||||||
toAdd.delete(key);
|
toAdd.delete(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
var logData;
|
for ( let entry of toAddImportant ) {
|
||||||
for ( entry of toAddImportant ) {
|
|
||||||
out.push(entry[0]);
|
out.push(entry[0]);
|
||||||
if ( outlog === undefined ) { continue; }
|
if ( outlog === undefined ) { continue; }
|
||||||
logData = entry[1].logData();
|
let logData = entry[1].logData();
|
||||||
logData.source = 'static';
|
logData.source = 'static';
|
||||||
logData.result = 1;
|
logData.result = 1;
|
||||||
outlog.push(logData);
|
outlog.push(logData);
|
||||||
}
|
}
|
||||||
for ( entry of toAdd ) {
|
for ( let entry of toAdd ) {
|
||||||
out.push(entry[0]);
|
out.push(entry[0]);
|
||||||
if ( outlog === undefined ) { continue; }
|
if ( outlog === undefined ) { continue; }
|
||||||
logData = entry[1].logData();
|
let logData = entry[1].logData();
|
||||||
logData.source = 'static';
|
logData.source = 'static';
|
||||||
logData.result = 1;
|
logData.result = 1;
|
||||||
outlog.push(logData);
|
outlog.push(logData);
|
||||||
}
|
}
|
||||||
if ( outlog !== undefined ) {
|
if ( outlog !== undefined ) {
|
||||||
for ( entry of toRemove.values()) {
|
for ( let entry of toRemove.values()) {
|
||||||
logData = entry.logData();
|
let logData = entry.logData();
|
||||||
logData.source = 'static';
|
logData.source = 'static';
|
||||||
logData.result = 2;
|
logData.result = 2;
|
||||||
outlog.push(logData);
|
outlog.push(logData);
|
||||||
@ -2389,20 +2398,19 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
|
|||||||
|
|
||||||
FilterContainer.prototype.matchTokens = function(bucket, url) {
|
FilterContainer.prototype.matchTokens = function(bucket, url) {
|
||||||
// Hostname-only filters
|
// Hostname-only filters
|
||||||
var f = bucket.get(this.dotTokenHash);
|
let f = bucket.get(this.dotTokenHash);
|
||||||
if ( f !== undefined && f.match() === true ) {
|
if ( f !== undefined && f.match() === true ) {
|
||||||
this.thRegister = this.dotTokenHash;
|
this.thRegister = this.dotTokenHash;
|
||||||
this.fRegister = f;
|
this.fRegister = f;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
var tokenHashes = this.urlTokenizer.getTokens(),
|
let tokenHashes = this.urlTokenizer.getTokens(),
|
||||||
tokenHash, tokenOffset,
|
|
||||||
i = 0;
|
i = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
tokenHash = tokenHashes[i++];
|
let tokenHash = tokenHashes[i++];
|
||||||
if ( tokenHash === 0 ) { break; }
|
if ( tokenHash === 0 ) { break; }
|
||||||
tokenOffset = tokenHashes[i++];
|
let tokenOffset = tokenHashes[i++];
|
||||||
f = bucket.get(tokenHash);
|
f = bucket.get(tokenHash);
|
||||||
if ( f !== undefined && f.match(url, tokenOffset) === true ) {
|
if ( f !== undefined && f.match(url, tokenOffset) === true ) {
|
||||||
this.thRegister = tokenHash;
|
this.thRegister = tokenHash;
|
||||||
@ -2437,8 +2445,10 @@ FilterContainer.prototype.matchStringGenericHide = function(requestURL) {
|
|||||||
let url = this.urlTokenizer.setURL(requestURL);
|
let url = this.urlTokenizer.setURL(requestURL);
|
||||||
|
|
||||||
// https://github.com/gorhill/uBlock/issues/2225
|
// https://github.com/gorhill/uBlock/issues/2225
|
||||||
// Important: this is used by FilterHostnameDict.match().
|
// Important:
|
||||||
requestHostnameRegister = µb.URI.hostnameFromURI(url);
|
// - `pageHostnameRegister` is used by FilterOrigin.matchOrigin().
|
||||||
|
// - `requestHostnameRegister` is used by FilterHostnameDict.match().
|
||||||
|
pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url);
|
||||||
|
|
||||||
let bucket = this.categories.get(genericHideException);
|
let bucket = this.categories.get(genericHideException);
|
||||||
if ( !bucket || this.matchTokens(bucket, url) === false ) {
|
if ( !bucket || this.matchTokens(bucket, url) === false ) {
|
||||||
@ -2548,7 +2558,7 @@ FilterContainer.prototype.matchString = function(context) {
|
|||||||
// https://github.com/chrisaljoudi/uBlock/issues/519
|
// https://github.com/chrisaljoudi/uBlock/issues/519
|
||||||
// Use exact type match for anything beyond `other`
|
// Use exact type match for anything beyond `other`
|
||||||
// Also, be prepared to support unknown types
|
// Also, be prepared to support unknown types
|
||||||
var type = typeNameToTypeValue[context.requestType];
|
let type = typeNameToTypeValue[context.requestType];
|
||||||
if ( type === undefined ) {
|
if ( type === undefined ) {
|
||||||
type = otherTypeBitValue;
|
type = otherTypeBitValue;
|
||||||
} else if ( type === 0 || type > otherTypeBitValue ) {
|
} else if ( type === 0 || type > otherTypeBitValue ) {
|
||||||
@ -2577,7 +2587,7 @@ FilterContainer.prototype.matchString = function(context) {
|
|||||||
// filter.
|
// filter.
|
||||||
|
|
||||||
// Prime tokenizer: we get a normalized URL in return.
|
// Prime tokenizer: we get a normalized URL in return.
|
||||||
var url = this.urlTokenizer.setURL(context.requestURL);
|
let url = this.urlTokenizer.setURL(context.requestURL);
|
||||||
|
|
||||||
// These registers will be used by various filters
|
// These registers will be used by various filters
|
||||||
pageHostnameRegister = context.pageHostname || '';
|
pageHostnameRegister = context.pageHostname || '';
|
||||||
@ -2585,10 +2595,10 @@ FilterContainer.prototype.matchString = function(context) {
|
|||||||
|
|
||||||
this.fRegister = null;
|
this.fRegister = null;
|
||||||
|
|
||||||
var party = isFirstParty(context.pageDomain, context.requestHostname)
|
let party = isFirstParty(context.pageDomain, context.requestHostname)
|
||||||
? FirstParty
|
? FirstParty
|
||||||
: ThirdParty;
|
: ThirdParty;
|
||||||
var categories = this.categories,
|
let categories = this.categories,
|
||||||
catBits, bucket;
|
catBits, bucket;
|
||||||
|
|
||||||
// https://github.com/chrisaljoudi/uBlock/issues/139
|
// https://github.com/chrisaljoudi/uBlock/issues/139
|
||||||
|
@ -604,9 +604,7 @@
|
|||||||
|
|
||||||
µBlock.loadFilterLists = function(callback) {
|
µBlock.loadFilterLists = function(callback) {
|
||||||
// Callers are expected to check this first.
|
// Callers are expected to check this first.
|
||||||
if ( this.loadingFilterLists ) {
|
if ( this.loadingFilterLists ) { return; }
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.loadingFilterLists = true;
|
this.loadingFilterLists = true;
|
||||||
|
|
||||||
var µb = this,
|
var µb = this,
|
||||||
@ -961,38 +959,31 @@
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
µBlock.loadPublicSuffixList = function(callback) {
|
µBlock.loadPublicSuffixList = function() {
|
||||||
var µb = this,
|
return new Promise(resolve => {
|
||||||
assetKey = µb.pslAssetKey,
|
// start of executor
|
||||||
compiledAssetKey = 'compiled/' + assetKey;
|
this.assets.get('compiled/' + this.pslAssetKey, details => {
|
||||||
|
let selfie;
|
||||||
if ( typeof callback !== 'function' ) {
|
|
||||||
callback = this.noopFunc;
|
|
||||||
}
|
|
||||||
var onRawListLoaded = function(details) {
|
|
||||||
if ( details.content !== '' ) {
|
|
||||||
µb.compilePublicSuffixList(details.content);
|
|
||||||
}
|
|
||||||
callback();
|
|
||||||
};
|
|
||||||
|
|
||||||
var onCompiledListLoaded = function(details) {
|
|
||||||
var selfie;
|
|
||||||
try {
|
try {
|
||||||
selfie = JSON.parse(details.content);
|
selfie = JSON.parse(details.content);
|
||||||
} catch (ex) {
|
} catch (ex) {
|
||||||
}
|
}
|
||||||
if (
|
if (
|
||||||
selfie === undefined ||
|
selfie instanceof Object &&
|
||||||
publicSuffixList.fromSelfie(selfie) === false
|
publicSuffixList.fromSelfie(selfie)
|
||||||
) {
|
) {
|
||||||
µb.assets.get(assetKey, onRawListLoaded);
|
resolve();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
callback();
|
this.assets.get(this.pslAssetKey, details => {
|
||||||
};
|
if ( details.content !== '' ) {
|
||||||
|
this.compilePublicSuffixList(details.content);
|
||||||
this.assets.get(compiledAssetKey, onCompiledListLoaded);
|
}
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
// end of executor
|
||||||
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
24
src/js/wasm/README.md
Normal file
24
src/js/wasm/README.md
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
### For code reviewers
|
||||||
|
|
||||||
|
All `wasm` files in that directory where created by compiling the
|
||||||
|
corresponding `wat` file using the command (using `hntrie.wat`/`hntrie.wasm`
|
||||||
|
as example):
|
||||||
|
|
||||||
|
wat2wasm hntrie.wat -o hntrie.wasm
|
||||||
|
|
||||||
|
Assuming:
|
||||||
|
|
||||||
|
- The command is executed from within the present directory.
|
||||||
|
|
||||||
|
### `wat2wasm` tool
|
||||||
|
|
||||||
|
The `wat2wasm` tool can be downloaded from an official WebAssembly project:
|
||||||
|
<https://github.com/WebAssembly/wabt/releases>.
|
||||||
|
|
||||||
|
### `wat2wasm` tool online
|
||||||
|
|
||||||
|
You can also use the following online `wat2wasm` tool:
|
||||||
|
<https://webassembly.github.io/wabt/demo/wat2wasm/>.
|
||||||
|
|
||||||
|
Just paste the whole content of the `wat` file to compile into the WAT pane.
|
||||||
|
Click "Download" button to retrieve the resulting `wasm` file.
|
BIN
src/js/wasm/hntrie.wasm
Normal file
BIN
src/js/wasm/hntrie.wasm
Normal file
Binary file not shown.
200
src/js/wasm/hntrie.wat
Normal file
200
src/js/wasm/hntrie.wat
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
;;
|
||||||
|
;; uBlock Origin - a browser extension to block requests.
|
||||||
|
;; Copyright (C) 2018-present Raymond Hill
|
||||||
|
;;
|
||||||
|
;; This program is free software: you can redistribute it and/or modify
|
||||||
|
;; it under the terms of the GNU General Public License as published by
|
||||||
|
;; the Free Software Foundation, either version 3 of the License, or
|
||||||
|
;; (at your option) any later version.
|
||||||
|
;;
|
||||||
|
;; This program is distributed in the hope that it will be useful,
|
||||||
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
;; GNU General Public License for more details.
|
||||||
|
;;
|
||||||
|
;; You should have received a copy of the GNU General Public License
|
||||||
|
;; along with this program. If not, see {http://www.gnu.org/licenses/}.
|
||||||
|
;;
|
||||||
|
;; Home: https://github.com/gorhill/uBlock
|
||||||
|
;; File: hntrie.wat
|
||||||
|
;; Description: WebAssembly code used by src/js/hntrie.js
|
||||||
|
;; How to compile: See README.md in this directory.
|
||||||
|
|
||||||
|
(module
|
||||||
|
;;
|
||||||
|
;; module start
|
||||||
|
;;
|
||||||
|
|
||||||
|
;; (func $log (import "imports" "log") (param i32 i32 i32))
|
||||||
|
|
||||||
|
(memory (import "imports" "memory") 1)
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; Public functions
|
||||||
|
;;
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; unsigned int matches(offset)
|
||||||
|
;;
|
||||||
|
;; Test whether the currently set needle matches the trie at specified offset.
|
||||||
|
;;
|
||||||
|
;; Memory layout, byte offset:
|
||||||
|
;; 0-254: encoded needle (ASCII)
|
||||||
|
;; 255 : needle length
|
||||||
|
;; 256- : tries
|
||||||
|
;;
|
||||||
|
(func (export "matches")
|
||||||
|
(param $itrie i32)
|
||||||
|
(result i32) ;; result: 0 = miss, 1 = hit
|
||||||
|
(local $ineedle i32) ;; current needle offset
|
||||||
|
(local $nchar i32) ;; needle char being processed
|
||||||
|
(local $tchar i32) ;; trie char being processed
|
||||||
|
(local $lxtra i32)
|
||||||
|
(local $ixtra i32)
|
||||||
|
i32.const 255
|
||||||
|
i32.load8_u
|
||||||
|
set_local $ineedle
|
||||||
|
loop $nextNeedleChar
|
||||||
|
;; ineedle -= 1;
|
||||||
|
get_local $ineedle
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
tee_local $ineedle
|
||||||
|
;; let nchar = ineedle === -1 ? 0 : buf[ineedle];
|
||||||
|
i32.const 0
|
||||||
|
i32.lt_s
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
set_local $nchar
|
||||||
|
else
|
||||||
|
get_local $ineedle
|
||||||
|
i32.load8_u
|
||||||
|
set_local $nchar
|
||||||
|
end
|
||||||
|
block $trieCharEqNeedleChar loop $nextTrieChar
|
||||||
|
;; let tchar = buf[itrie+8];
|
||||||
|
get_local $itrie
|
||||||
|
i32.load8_u offset=8
|
||||||
|
tee_local $tchar
|
||||||
|
;; if ( tchar === nchar ) { break; }
|
||||||
|
get_local $nchar
|
||||||
|
i32.eq
|
||||||
|
br_if $trieCharEqNeedleChar
|
||||||
|
;; if ( tchar === 0 && nchar === 0x2E ) { return 1; }
|
||||||
|
get_local $tchar
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
get_local $nchar
|
||||||
|
i32.const 0x2E
|
||||||
|
i32.eq
|
||||||
|
if
|
||||||
|
i32.const 1
|
||||||
|
return
|
||||||
|
end
|
||||||
|
end
|
||||||
|
;; itrie = buf32[itrie >>> 2];
|
||||||
|
get_local $itrie
|
||||||
|
i32.load
|
||||||
|
tee_local $itrie
|
||||||
|
;; if ( itrie === 0 ) { return 0; }
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
br $nextTrieChar
|
||||||
|
end end
|
||||||
|
;; if ( nchar === 0 ) { return 1; }
|
||||||
|
get_local $nchar
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 1
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; let lxtra = buf[itrie+9];
|
||||||
|
get_local $itrie
|
||||||
|
i32.load8_u offset=9
|
||||||
|
tee_local $lxtra
|
||||||
|
i32.eqz
|
||||||
|
if else
|
||||||
|
;; if ( lxtra > ineedle ) { return 0; }
|
||||||
|
get_local $lxtra
|
||||||
|
get_local $ineedle
|
||||||
|
i32.gt_u
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; let ixtra = itrie + 10;
|
||||||
|
get_local $itrie
|
||||||
|
i32.const 10
|
||||||
|
i32.add
|
||||||
|
tee_local $ixtra
|
||||||
|
;; lxtra += ixtra;
|
||||||
|
get_local $lxtra
|
||||||
|
i32.add
|
||||||
|
set_local $lxtra
|
||||||
|
;; do {
|
||||||
|
block $noMoreExtraChars loop
|
||||||
|
;; ineedle -= 1;
|
||||||
|
get_local $ineedle
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
tee_local $ineedle
|
||||||
|
;; if ( buf[ineedle] !== buf[ixtra] ) { return 0; }
|
||||||
|
i32.load8_u
|
||||||
|
get_local $ixtra
|
||||||
|
i32.load8_u
|
||||||
|
i32.ne
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; ixtra += 1;
|
||||||
|
get_local $ixtra
|
||||||
|
i32.const 1
|
||||||
|
i32.add
|
||||||
|
tee_local $ixtra
|
||||||
|
;; while ( ixtra !== lxtra ) {
|
||||||
|
get_local $lxtra
|
||||||
|
i32.eq
|
||||||
|
br_if $noMoreExtraChars
|
||||||
|
br 0
|
||||||
|
end end
|
||||||
|
end
|
||||||
|
;; itrie = buf32[itrie + 4 >>> 2];
|
||||||
|
get_local $itrie
|
||||||
|
i32.load offset=4
|
||||||
|
tee_local $itrie
|
||||||
|
;; if ( itrie === 0 ) {
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
;; return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0;
|
||||||
|
get_local $ineedle
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 1
|
||||||
|
return
|
||||||
|
end
|
||||||
|
get_local $ineedle
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
i32.load8_u
|
||||||
|
i32.const 0x2E
|
||||||
|
i32.eq
|
||||||
|
if
|
||||||
|
i32.const 1
|
||||||
|
return
|
||||||
|
end
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
br 0
|
||||||
|
end
|
||||||
|
i32.const 0
|
||||||
|
)
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; module end
|
||||||
|
;;
|
||||||
|
)
|
479
test/hnset-benchmark.html
Normal file
479
test/hnset-benchmark.html
Normal file
File diff suppressed because one or more lines are too long
45866
test/hntrie-test.html
Normal file
45866
test/hntrie-test.html
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user