mirror of
https://github.com/gorhill/uBlock.git
synced 2024-11-05 18:32:30 +01:00
Performance + code maintenance work on static network filtering engine
Implement a plain string trie container class: STrieContainer. Make use of STrieContainer where beneficial Some filter buckets can grow quite large, and in such case coalescing "trieable" filter classes into a single trie reduces lookup performance and memory usage. For instance, at time of commit, the filter bucket for the `ad` keyword contains 919 entries[1]. Coalescing trieable filters of the same class into a single plain string trie reduced the size of the bucket into 50 entries + two tries which are scanned only once each whenever the bucket is visited. [1] Enter the following code at uBO's dev console: µBlock.staticNetFilteringEngine.categories.get(0).get(µBlock.urlTokenizer.tokenHashFromString('ad')) Refactor static network filtering engine code to make use of ES6's syntactic sugar `class`. Change first auto-update run from 7 to 5 minutes.
This commit is contained in:
parent
92c5f17b78
commit
c229003d31
@ -16,6 +16,7 @@
|
||||
<script src="js/background.js"></script>
|
||||
<script src="js/traffic.js"></script>
|
||||
<script src="js/hntrie.js"></script>
|
||||
<script src="js/strie.js"></script>
|
||||
<script src="js/utils.js"></script>
|
||||
<script src="js/uritools.js"></script>
|
||||
<script src="js/lz4.js"></script>
|
||||
|
@ -137,8 +137,8 @@ const µBlock = (function() { // jshint ignore:line
|
||||
|
||||
// Read-only
|
||||
systemSettings: {
|
||||
compiledMagic: 7, // Increase when compiled format changes
|
||||
selfieMagic: 8 // Increase when selfie format changes
|
||||
compiledMagic: 8, // Increase when compiled format changes
|
||||
selfieMagic: 9 // Increase when selfie format changes
|
||||
},
|
||||
|
||||
restoreBackupSettings: {
|
||||
|
@ -67,7 +67,7 @@ var onAllReady = function() {
|
||||
// https://github.com/chrisaljoudi/uBlock/issues/184
|
||||
// Check for updates not too far in the future.
|
||||
µb.assets.addObserver(µb.assetObserver.bind(µb));
|
||||
µb.scheduleAssetUpdater(µb.userSettings.autoUpdate ? 7 * 60 * 1000 : 0);
|
||||
µb.scheduleAssetUpdater(µb.userSettings.autoUpdate ? 5 * 60 * 1000 : 0);
|
||||
|
||||
// vAPI.cloud is optional.
|
||||
if ( µb.cloudStorageSupported ) {
|
||||
|
File diff suppressed because it is too large
Load Diff
445
src/js/strie.js
Normal file
445
src/js/strie.js
Normal file
@ -0,0 +1,445 @@
|
||||
/*******************************************************************************
|
||||
|
||||
uBlock Origin - a browser extension to block requests.
|
||||
Copyright (C) 2019-present Raymond Hill
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see {http://www.gnu.org/licenses/}.
|
||||
|
||||
Home: https://github.com/gorhill/uBlock
|
||||
*/
|
||||
|
||||
/* exported STrieContainer */
|
||||
|
||||
'use strict';
|
||||
|
||||
/*******************************************************************************
|
||||
|
||||
A STrieContainer is mostly a large buffer in which distinct but related
|
||||
tries are stored. The memory layout of the buffer is as follow:
|
||||
|
||||
0-255: reserved
|
||||
256-259: offset to start of trie data section (=> trie0)
|
||||
260-263: offset to end of trie data section (=> trie1)
|
||||
264-267: offset to start of character data section (=> char0)
|
||||
268-271: offset to end of character data section (=> char1)
|
||||
272: start of trie data section
|
||||
|
||||
*/
|
||||
|
||||
const STRIE_PAGE_SIZE = 65536;
|
||||
// i32 / i8
|
||||
const STRIE_TRIE0_SLOT = 256 >>> 2; // 64 / 256
|
||||
const STRIE_TRIE1_SLOT = STRIE_TRIE0_SLOT + 1; // 65 / 260
|
||||
const STRIE_CHAR0_SLOT = STRIE_TRIE0_SLOT + 2; // 66 / 264
|
||||
const STRIE_CHAR1_SLOT = STRIE_TRIE0_SLOT + 3; // 67 / 268
|
||||
const STRIE_TRIE0_START = STRIE_TRIE0_SLOT + 4 << 2; // 272
|
||||
|
||||
|
||||
const STrieContainer = function(details) {
|
||||
if ( details instanceof Object === false ) { details = {}; }
|
||||
const len = (details.byteLength || 0) + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1);
|
||||
this.buf = new Uint8Array(Math.max(len, 131072));
|
||||
this.buf32 = new Uint32Array(this.buf.buffer);
|
||||
this.buf32[STRIE_TRIE0_SLOT] = STRIE_TRIE0_START;
|
||||
this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT];
|
||||
this.buf32[STRIE_CHAR0_SLOT] = details.char0 || 65536;
|
||||
this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT];
|
||||
};
|
||||
|
||||
STrieContainer.prototype = {
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Public methods
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
reset: function() {
|
||||
this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT];
|
||||
this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT];
|
||||
},
|
||||
|
||||
matches: function(iroot, a, al) {
|
||||
const ar = a.length;
|
||||
const char0 = this.buf32[STRIE_CHAR0_SLOT];
|
||||
let icell = iroot;
|
||||
for (;;) {
|
||||
let c = a.charCodeAt(al);
|
||||
al += 1;
|
||||
let v, bl;
|
||||
// find first segment with a first-character match
|
||||
for (;;) {
|
||||
v = this.buf32[icell+2];
|
||||
bl = char0 + (v & 0x00FFFFFF);
|
||||
if ( this.buf[bl] === c ) { break; }
|
||||
icell = this.buf32[icell+0];
|
||||
if ( icell === 0 ) { return -1; }
|
||||
}
|
||||
// all characters in segment must match
|
||||
let n = v >>> 24;
|
||||
if ( n > 1 ) {
|
||||
n -= 1;
|
||||
if ( (al + n) > ar ) { return -1; }
|
||||
bl += 1;
|
||||
const br = bl + n;
|
||||
do {
|
||||
if ( a.charCodeAt(al) !== this.buf[bl] ) { return -1; }
|
||||
al += 1;
|
||||
bl += 1;
|
||||
} while ( bl < br );
|
||||
}
|
||||
// next segment
|
||||
icell = this.buf32[icell+1];
|
||||
if ( icell === 0 || this.buf32[icell+2] === 0 ) { return al; }
|
||||
if ( al === ar ) { return -1; }
|
||||
}
|
||||
},
|
||||
|
||||
createOne: function(args) {
|
||||
if ( Array.isArray(args) ) {
|
||||
return new this.STrieRef(this, args[0], args[1]);
|
||||
}
|
||||
// grow buffer if needed
|
||||
if ( (this.buf32[STRIE_CHAR0_SLOT] - this.buf32[STRIE_TRIE1_SLOT]) < 12 ) {
|
||||
this.growBuf(12, 0);
|
||||
}
|
||||
const iroot = this.buf32[STRIE_TRIE1_SLOT] >>> 2;
|
||||
this.buf32[STRIE_TRIE1_SLOT] += 12;
|
||||
this.buf32[iroot+0] = 0;
|
||||
this.buf32[iroot+1] = 0;
|
||||
this.buf32[iroot+2] = 0;
|
||||
return new this.STrieRef(this, iroot, 0);
|
||||
},
|
||||
|
||||
compileOne: function(trieRef) {
|
||||
return [ trieRef.iroot, trieRef.size ];
|
||||
},
|
||||
|
||||
add: function(iroot, s) {
|
||||
const lschar = s.length;
|
||||
if ( lschar === 0 ) { return 0; }
|
||||
let ischar = 0;
|
||||
let icell = iroot;
|
||||
// special case: first node in trie
|
||||
if ( this.buf32[icell+2] === 0 ) {
|
||||
this.buf32[icell+2] = this.addSegment(s.slice(ischar));
|
||||
return 1;
|
||||
}
|
||||
// grow buffer if needed
|
||||
if (
|
||||
(this.buf32[STRIE_CHAR0_SLOT] - this.buf32[STRIE_TRIE1_SLOT]) < 24 ||
|
||||
(this.buf.length - this.buf32[STRIE_CHAR1_SLOT]) < 256
|
||||
) {
|
||||
this.growBuf(24, 256);
|
||||
}
|
||||
//
|
||||
const char0 = this.buf32[STRIE_CHAR0_SLOT];
|
||||
let inext;
|
||||
// find a matching cell: move down
|
||||
for (;;) {
|
||||
const vseg = this.buf32[icell+2];
|
||||
// skip boundary cells
|
||||
if ( vseg === 0 ) {
|
||||
icell = this.buf32[icell+1];
|
||||
continue;
|
||||
}
|
||||
let isegchar0 = char0 + (vseg & 0x00FFFFFF);
|
||||
// if first character is no match, move to next descendant
|
||||
if ( this.buf[isegchar0] !== s.charCodeAt(ischar) ) {
|
||||
inext = this.buf32[icell+0];
|
||||
if ( inext === 0 ) {
|
||||
this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(s.slice(ischar)));
|
||||
return 1;
|
||||
}
|
||||
icell = inext;
|
||||
continue;
|
||||
}
|
||||
// 1st character was tested
|
||||
let isegchar = 1;
|
||||
ischar += 1;
|
||||
// find 1st mismatch in rest of segment
|
||||
const lsegchar = vseg >>> 24;
|
||||
if ( lsegchar !== 1 ) {
|
||||
for (;;) {
|
||||
if ( isegchar === lsegchar ) { break; }
|
||||
if ( ischar === lschar ) { break; }
|
||||
if ( this.buf[isegchar0+isegchar] !== s.charCodeAt(ischar) ) { break; }
|
||||
isegchar += 1;
|
||||
ischar += 1;
|
||||
}
|
||||
}
|
||||
// all segment characters matched
|
||||
if ( isegchar === lsegchar ) {
|
||||
inext = this.buf32[icell+1];
|
||||
// needle remainder: no
|
||||
if ( ischar === lschar ) {
|
||||
// boundary cell already present
|
||||
if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; }
|
||||
// need boundary cell
|
||||
this.buf32[icell+1] = this.addCell(0, inext, 0);
|
||||
}
|
||||
// needle remainder: yes
|
||||
else {
|
||||
if ( inext !== 0 ) {
|
||||
icell = inext;
|
||||
continue;
|
||||
}
|
||||
// boundary cell + needle remainder
|
||||
inext = this.addCell(0, 0, 0);
|
||||
this.buf32[icell+1] = inext;
|
||||
this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(s.slice(ischar)));
|
||||
}
|
||||
}
|
||||
// some segment characters matched
|
||||
else {
|
||||
// split current cell
|
||||
isegchar0 -= char0;
|
||||
this.buf32[icell+2] = isegchar << 24 | isegchar0;
|
||||
inext = this.addCell(
|
||||
0,
|
||||
this.buf32[icell+1],
|
||||
lsegchar - isegchar << 24 | isegchar0 + isegchar
|
||||
);
|
||||
this.buf32[icell+1] = inext;
|
||||
// needle remainder: no = need boundary cell
|
||||
if ( ischar === lschar ) {
|
||||
this.buf32[icell+1] = this.addCell(0, inext, 0);
|
||||
}
|
||||
// needle remainder: yes = need new cell for remaining characters
|
||||
else {
|
||||
this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(s.slice(ischar)));
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
},
|
||||
|
||||
optimize: function() {
|
||||
this.shrinkBuf();
|
||||
return {
|
||||
byteLength: this.buf.byteLength,
|
||||
char0: this.buf32[STRIE_CHAR0_SLOT],
|
||||
};
|
||||
},
|
||||
|
||||
fromIterable: function(hostnames, add) {
|
||||
if ( add === undefined ) { add = 'add'; }
|
||||
const trieRef = this.createOne();
|
||||
for ( const hn of hostnames ) {
|
||||
trieRef[add](hn);
|
||||
}
|
||||
return trieRef;
|
||||
},
|
||||
|
||||
serialize: function(encoder) {
|
||||
if ( encoder instanceof Object ) {
|
||||
return encoder.encode(
|
||||
this.buf32.buffer,
|
||||
this.buf32[STRIE_CHAR1_SLOT]
|
||||
);
|
||||
}
|
||||
return Array.from(
|
||||
new Uint32Array(
|
||||
this.buf32.buffer,
|
||||
0,
|
||||
this.buf32[STRIE_CHAR1_SLOT] + 3 >>> 2
|
||||
)
|
||||
);
|
||||
},
|
||||
|
||||
unserialize: function(selfie, decoder) {
|
||||
const shouldDecode = typeof selfie === 'string';
|
||||
let byteLength = shouldDecode
|
||||
? decoder.decodeSize(selfie)
|
||||
: selfie.length << 2;
|
||||
byteLength = byteLength + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1);
|
||||
if ( byteLength === 0 ) { return; }
|
||||
if ( byteLength > this.buf.length ) {
|
||||
this.buf = new Uint8Array(byteLength);
|
||||
this.buf32 = new Uint32Array(this.buf.buffer);
|
||||
}
|
||||
if ( shouldDecode ) {
|
||||
decoder.decode(selfie, this.buf.buffer);
|
||||
} else {
|
||||
this.buf32.set(selfie);
|
||||
}
|
||||
},
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Class to hold reference to a specific trie
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
STrieRef: function(container, iroot, size) {
|
||||
this.container = container;
|
||||
this.iroot = iroot;
|
||||
this.size = size;
|
||||
},
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Private methods
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
addCell: function(idown, iright, v) {
|
||||
let icell = this.buf32[STRIE_TRIE1_SLOT];
|
||||
this.buf32[STRIE_TRIE1_SLOT] = icell + 12;
|
||||
icell >>>= 2;
|
||||
this.buf32[icell+0] = idown;
|
||||
this.buf32[icell+1] = iright;
|
||||
this.buf32[icell+2] = v;
|
||||
return icell;
|
||||
},
|
||||
|
||||
addSegment: function(segment) {
|
||||
const lsegchar = segment.length;
|
||||
if ( lsegchar === 0 ) { return 0; }
|
||||
let char1 = this.buf32[STRIE_CHAR1_SLOT];
|
||||
const isegchar = char1 - this.buf32[STRIE_CHAR0_SLOT];
|
||||
let i = 0;
|
||||
do {
|
||||
this.buf[char1++] = segment.charCodeAt(i++);
|
||||
} while ( i !== lsegchar );
|
||||
this.buf32[STRIE_CHAR1_SLOT] = char1;
|
||||
return (lsegchar << 24) | isegchar;
|
||||
},
|
||||
|
||||
growBuf: function(trieGrow, charGrow) {
|
||||
const char0 = Math.max(
|
||||
(this.buf32[STRIE_TRIE1_SLOT] + trieGrow + STRIE_PAGE_SIZE-1) & ~(STRIE_PAGE_SIZE-1),
|
||||
this.buf32[STRIE_CHAR0_SLOT]
|
||||
);
|
||||
const char1 = char0 + this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT];
|
||||
const bufLen = Math.max(
|
||||
(char1 + charGrow + STRIE_PAGE_SIZE-1) & ~(STRIE_PAGE_SIZE-1),
|
||||
this.buf.length
|
||||
);
|
||||
this.resizeBuf(bufLen, char0);
|
||||
},
|
||||
|
||||
shrinkBuf: function() {
|
||||
const char0 = this.buf32[STRIE_TRIE1_SLOT] + 24;
|
||||
const char1 = char0 + this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT];
|
||||
const bufLen = char1 + 256;
|
||||
this.resizeBuf(bufLen, char0);
|
||||
},
|
||||
|
||||
resizeBuf: function(bufLen, char0) {
|
||||
bufLen = bufLen + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1);
|
||||
if (
|
||||
bufLen === this.buf.length &&
|
||||
char0 === this.buf32[STRIE_CHAR0_SLOT]
|
||||
) {
|
||||
return;
|
||||
}
|
||||
const charDataLen = this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT];
|
||||
if ( bufLen !== this.buf.length ) {
|
||||
const newBuf = new Uint8Array(bufLen);
|
||||
newBuf.set(
|
||||
new Uint8Array(
|
||||
this.buf.buffer,
|
||||
0,
|
||||
this.buf32[STRIE_TRIE1_SLOT]
|
||||
),
|
||||
0
|
||||
);
|
||||
newBuf.set(
|
||||
new Uint8Array(
|
||||
this.buf.buffer,
|
||||
this.buf32[STRIE_CHAR0_SLOT],
|
||||
charDataLen
|
||||
),
|
||||
char0
|
||||
);
|
||||
this.buf = newBuf;
|
||||
this.buf32 = new Uint32Array(this.buf.buffer);
|
||||
this.buf32[STRIE_CHAR0_SLOT] = char0;
|
||||
this.buf32[STRIE_CHAR1_SLOT] = char0 + charDataLen;
|
||||
}
|
||||
if ( char0 !== this.buf32[STRIE_CHAR0_SLOT] ) {
|
||||
this.buf.set(
|
||||
new Uint8Array(
|
||||
this.buf.buffer,
|
||||
this.buf32[STRIE_CHAR0_SLOT],
|
||||
charDataLen
|
||||
),
|
||||
char0
|
||||
);
|
||||
this.buf32[STRIE_CHAR0_SLOT] = char0;
|
||||
this.buf32[STRIE_CHAR1_SLOT] = char0 + charDataLen;
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
STrieContainer.prototype.STrieRef.prototype = {
|
||||
add: function(pattern) {
|
||||
if ( this.container.add(this.iroot, pattern) === 1 ) {
|
||||
this.size += 1;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
},
|
||||
matches: function(a, al) {
|
||||
return this.container.matches(this.iroot, a, al);
|
||||
},
|
||||
[Symbol.iterator]: function() {
|
||||
return {
|
||||
value: undefined,
|
||||
done: false,
|
||||
next: function() {
|
||||
if ( this.icell === 0 ) {
|
||||
if ( this.forks.length === 0 ) {
|
||||
this.value = undefined;
|
||||
this.done = true;
|
||||
return this;
|
||||
}
|
||||
this.charPtr = this.forks.pop();
|
||||
this.icell = this.forks.pop();
|
||||
}
|
||||
for (;;) {
|
||||
const idown = this.container.buf32[this.icell+0];
|
||||
if ( idown !== 0 ) {
|
||||
this.forks.push(idown, this.charPtr);
|
||||
}
|
||||
const v = this.container.buf32[this.icell+2];
|
||||
let i0 = this.container.buf32[STRIE_CHAR0_SLOT] + (v & 0x00FFFFFF);
|
||||
const i1 = i0 + (v >>> 24);
|
||||
while ( i0 < i1 ) {
|
||||
this.charPtr -= 1;
|
||||
this.charBuf[this.charPtr] = this.container.buf[i0];
|
||||
i0 += 1;
|
||||
}
|
||||
this.icell = this.container.buf32[this.icell+1];
|
||||
if ( this.icell === 0 ) {
|
||||
return this.toPattern();
|
||||
}
|
||||
if ( this.container.buf32[this.icell+2] === 0 ) {
|
||||
this.icell = this.container.buf32[this.icell+1];
|
||||
return this.toPattern();
|
||||
}
|
||||
}
|
||||
},
|
||||
toPattern: function() {
|
||||
this.value = this.textDecoder.decode(
|
||||
new Uint8Array(this.charBuf.buffer, this.charPtr)
|
||||
);
|
||||
return this;
|
||||
},
|
||||
container: this.container,
|
||||
icell: this.iroot,
|
||||
charBuf: new Uint8Array(256),
|
||||
charPtr: 256,
|
||||
forks: [],
|
||||
textDecoder: new TextDecoder()
|
||||
};
|
||||
},
|
||||
};
|
Loading…
Reference in New Issue
Block a user