mirror of
https://github.com/gorhill/uBlock.git
synced 2024-11-02 00:42:45 +01:00
Implement bidirectional plain-string trie
The bidirectional trie allows storing the right and left parts of a string into a trie given a pivot position. Releated issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/528 Additionally, the mandatory token-at-index-0 rule for FilterPlainHnAnchored has been lifted, thus allowing the engine to pick a potentially better token at any position in the filter string. *** TODO: Eventually rename `strie.js` to `biditrie.js`. TODO: Fix dump() method, it currently only show the right-hand side of a filter string.
This commit is contained in:
parent
4a062728ff
commit
cfc2ce333d
@ -137,8 +137,8 @@ const µBlock = (function() { // jshint ignore:line
|
||||
|
||||
// Read-only
|
||||
systemSettings: {
|
||||
compiledMagic: 16, // Increase when compiled format changes
|
||||
selfieMagic: 16 // Increase when selfie format changes
|
||||
compiledMagic: 17, // Increase when compiled format changes
|
||||
selfieMagic: 17 // Increase when selfie format changes
|
||||
},
|
||||
|
||||
restoreBackupSettings: {
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/* jshint bitwise: false */
|
||||
/* global punycode, HNTrieContainer, STrieContainer */
|
||||
/* global punycode, HNTrieContainer */
|
||||
|
||||
'use strict';
|
||||
|
||||
@ -130,6 +130,7 @@ const reIsWildcarded = /[\^\*]/;
|
||||
// See the following as short-lived registers, used during evaluation. They are
|
||||
// valid until the next evaluation.
|
||||
|
||||
let urlRegister = '';
|
||||
let pageHostnameRegister = '';
|
||||
let requestHostnameRegister = '';
|
||||
|
||||
@ -311,13 +312,12 @@ registerFilterClass(FilterTrue);
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterPlain = class {
|
||||
constructor(s, tokenBeg) {
|
||||
constructor(s) {
|
||||
this.s = s;
|
||||
this.tokenBeg = tokenBeg;
|
||||
}
|
||||
|
||||
match(url, tokenBeg) {
|
||||
return url.startsWith(this.s, tokenBeg - this.tokenBeg);
|
||||
return url.startsWith(this.s, tokenBeg);
|
||||
}
|
||||
|
||||
logData() {
|
||||
@ -332,56 +332,55 @@ const FilterPlain = class {
|
||||
return [ this.fid, this.s, this.tokenBeg ];
|
||||
}
|
||||
|
||||
addToTrie(trie) {
|
||||
trie.add(this.s, this.tokenBeg);
|
||||
}
|
||||
|
||||
static compile(details) {
|
||||
return [ FilterPlain.fid, details.f, details.tokenBeg ];
|
||||
}
|
||||
|
||||
static load(args) {
|
||||
return new FilterPlain(args[1], args[2]);
|
||||
if ( args[2] === 0 ) {
|
||||
return new FilterPlain(args[1]);
|
||||
}
|
||||
if ( args[2] === 1 ) {
|
||||
return new FilterPlain1(args[1]);
|
||||
}
|
||||
return new FilterPlainX(args[1], args[2]);
|
||||
}
|
||||
|
||||
static addToTrie(args, trie) {
|
||||
trie.add(args[1], args[2]);
|
||||
}
|
||||
};
|
||||
|
||||
FilterPlain.trieableId = 0;
|
||||
FilterPlain.prototype.trieableId = FilterPlain.trieableId;
|
||||
FilterPlain.prototype.tokenBeg = 0;
|
||||
|
||||
registerFilterClass(FilterPlain);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterPlainPrefix1 = class {
|
||||
constructor(s) {
|
||||
this.s = s;
|
||||
}
|
||||
|
||||
const FilterPlain1 = class extends FilterPlain {
|
||||
match(url, tokenBeg) {
|
||||
return url.startsWith(this.s, tokenBeg - 1);
|
||||
}
|
||||
|
||||
logData() {
|
||||
return {
|
||||
raw: rawToPlainStr(this.s, 0),
|
||||
regex: rawToRegexStr(this.s, 0),
|
||||
compiled: this.compile()
|
||||
};
|
||||
}
|
||||
|
||||
compile() {
|
||||
return [ this.fid, this.s ];
|
||||
}
|
||||
|
||||
static compile(details) {
|
||||
return [ FilterPlainPrefix1.fid, details.f ];
|
||||
}
|
||||
|
||||
static load(args) {
|
||||
return new FilterPlainPrefix1(args[1]);
|
||||
}
|
||||
|
||||
static trieableStringFromArgs(args) {
|
||||
return args[1];
|
||||
}
|
||||
};
|
||||
|
||||
FilterPlainPrefix1.prototype.trieableId = 0;
|
||||
FilterPlain1.prototype.tokenBeg = 1;
|
||||
|
||||
registerFilterClass(FilterPlainPrefix1);
|
||||
|
||||
const FilterPlainX = class extends FilterPlain {
|
||||
constructor(s, tokenBeg) {
|
||||
super(s);
|
||||
this.tokenBeg = tokenBeg;
|
||||
}
|
||||
|
||||
match(url, tokenBeg) {
|
||||
return url.startsWith(this.s, tokenBeg - this.tokenBeg);
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
@ -538,32 +537,54 @@ const FilterPlainHnAnchored = class {
|
||||
logData() {
|
||||
return {
|
||||
raw: `||${this.s}`,
|
||||
regex: rawToRegexStr(this.s, 0),
|
||||
regex: rawToRegexStr(this.s, this.tokenBeg),
|
||||
compiled: this.compile()
|
||||
};
|
||||
}
|
||||
|
||||
compile() {
|
||||
return [ this.fid, this.s ];
|
||||
return [ this.fid, this.s, this.tokenBeg ];
|
||||
}
|
||||
|
||||
addToTrie(trie) {
|
||||
trie.add(this.s, this.tokenBeg);
|
||||
}
|
||||
|
||||
static compile(details) {
|
||||
return [ FilterPlainHnAnchored.fid, details.f ];
|
||||
return [ FilterPlainHnAnchored.fid, details.f, details.tokenBeg ];
|
||||
}
|
||||
|
||||
static load(args) {
|
||||
return new FilterPlainHnAnchored(args[1]);
|
||||
if ( args[2] === 0 ) {
|
||||
return new FilterPlainHnAnchored(args[1]);
|
||||
}
|
||||
return new FilterPlainHnAnchoredX(args[1], args[2]);
|
||||
}
|
||||
|
||||
static trieableStringFromArgs(args) {
|
||||
return args[1];
|
||||
static addToTrie(args, trie) {
|
||||
trie.add(args[1], args[2]);
|
||||
}
|
||||
};
|
||||
|
||||
FilterPlainHnAnchored.prototype.trieableId = 1;
|
||||
FilterPlainHnAnchored.trieableId = 1;
|
||||
FilterPlainHnAnchored.prototype.trieableId = FilterPlainHnAnchored.trieableId;
|
||||
FilterPlainHnAnchored.prototype.tokenBeg = 0;
|
||||
|
||||
registerFilterClass(FilterPlainHnAnchored);
|
||||
|
||||
|
||||
const FilterPlainHnAnchoredX = class extends FilterPlainHnAnchored {
|
||||
constructor(s, tokenBeg) {
|
||||
super(s);
|
||||
this.tokenBeg = tokenBeg;
|
||||
}
|
||||
|
||||
match(url, tokenBeg) {
|
||||
const beg = tokenBeg - this.tokenBeg;
|
||||
return url.startsWith(this.s, beg) && isHnAnchored(url, beg);
|
||||
}
|
||||
};
|
||||
|
||||
/*******************************************************************************
|
||||
|
||||
Filters with only one single occurrence of wildcard `*`
|
||||
@ -1605,12 +1626,13 @@ const FilterBucket = class {
|
||||
this.filters.push(a, b, c);
|
||||
this._countTrieable();
|
||||
}
|
||||
this.trieResult = 0;
|
||||
}
|
||||
|
||||
get size() {
|
||||
let size = this.filters.length;
|
||||
if ( this.plainPrefix1Trie !== null ) {
|
||||
size += this.plainPrefix1Trie.size;
|
||||
if ( this.plainTrie !== null ) {
|
||||
size += this.plainTrie.size;
|
||||
}
|
||||
if ( this.plainHnAnchoredTrie !== null ) {
|
||||
size += this.plainHnAnchoredTrie.size;
|
||||
@ -1619,39 +1641,26 @@ const FilterBucket = class {
|
||||
}
|
||||
|
||||
add(fdata) {
|
||||
if ( fdata[0] === this.plainPrefix1Id ) {
|
||||
if ( this.plainPrefix1Trie !== null ) {
|
||||
return this.plainPrefix1Trie.add(
|
||||
FilterPlainPrefix1.trieableStringFromArgs(fdata)
|
||||
);
|
||||
const fclass = filterClasses[fdata[0]];
|
||||
if ( fclass.trieableId === 0 ) {
|
||||
if ( this.plainTrie !== null ) {
|
||||
return fclass.addToTrie(fdata, this.plainTrie);
|
||||
}
|
||||
if ( this.plainPrefix1Count === 3 ) {
|
||||
this.plainPrefix1Trie = FilterBucket.trieContainer.createOne();
|
||||
this._transferTrieable(
|
||||
this.plainPrefix1Id,
|
||||
this.plainPrefix1Trie
|
||||
);
|
||||
return this.plainPrefix1Trie.add(
|
||||
FilterPlainPrefix1.trieableStringFromArgs(fdata)
|
||||
);
|
||||
if ( this.plainCount === 3 ) {
|
||||
this.plainTrie = FilterBucket.trieContainer.createOne();
|
||||
this._transferTrieable(0, this.plainTrie);
|
||||
return fclass.addToTrie(fdata, this.plainTrie);
|
||||
}
|
||||
this.plainPrefix1Count += 1;
|
||||
this.plainCount += 1;
|
||||
}
|
||||
if ( fdata[0] === this.plainHnAnchoredId ) {
|
||||
if ( fclass.trieableId === 1 ) {
|
||||
if ( this.plainHnAnchoredTrie !== null ) {
|
||||
return this.plainHnAnchoredTrie.add(
|
||||
FilterPlainHnAnchored.trieableStringFromArgs(fdata)
|
||||
);
|
||||
return fclass.addToTrie(fdata, this.plainHnAnchoredTrie);
|
||||
}
|
||||
if ( this.plainHnAnchoredCount === 3 ) {
|
||||
this.plainHnAnchoredTrie = FilterBucket.trieContainer.createOne();
|
||||
this._transferTrieable(
|
||||
this.plainHnAnchoredId,
|
||||
this.plainHnAnchoredTrie
|
||||
);
|
||||
return this.plainHnAnchoredTrie.add(
|
||||
FilterPlainHnAnchored.trieableStringFromArgs(fdata)
|
||||
);
|
||||
this._transferTrieable(1, this.plainHnAnchoredTrie);
|
||||
return fclass.addToTrie(fdata, this.plainHnAnchoredTrie);
|
||||
}
|
||||
this.plainHnAnchoredCount += 1;
|
||||
}
|
||||
@ -1659,22 +1668,21 @@ const FilterBucket = class {
|
||||
}
|
||||
|
||||
match(url, tokenBeg) {
|
||||
if ( this.plainPrefix1Trie !== null ) {
|
||||
const pos = this.plainPrefix1Trie.matches(url, tokenBeg - 1);
|
||||
if ( this.plainTrie !== null ) {
|
||||
const pos = this.plainTrie.matches(url, tokenBeg);
|
||||
if ( pos !== -1 ) {
|
||||
this.plainPrefix1Filter.s = url.slice(tokenBeg - 1, pos);
|
||||
this.f = this.plainPrefix1Filter;
|
||||
this.trieResult = pos;
|
||||
this.f = this.plainFilter;
|
||||
this.f.tokenBeg = tokenBeg - (pos >>> 16);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (
|
||||
this.plainHnAnchoredTrie !== null &&
|
||||
isHnAnchored(url, tokenBeg)
|
||||
) {
|
||||
if ( this.plainHnAnchoredTrie !== null ) {
|
||||
const pos = this.plainHnAnchoredTrie.matches(url, tokenBeg);
|
||||
if ( pos !== -1 ) {
|
||||
this.plainHnAnchoredFilter.s = url.slice(tokenBeg, pos);
|
||||
if ( pos !== -1 && isHnAnchored(url, pos >>> 16) ) {
|
||||
this.trieResult = pos;
|
||||
this.f = this.plainHnAnchoredFilter;
|
||||
this.f.tokenBeg = tokenBeg - (pos >>> 16);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1690,6 +1698,15 @@ const FilterBucket = class {
|
||||
}
|
||||
|
||||
logData() {
|
||||
if (
|
||||
this.f === this.plainFilter ||
|
||||
this.f === this.plainHnAnchoredFilter
|
||||
) {
|
||||
this.f.s = urlRegister.slice(
|
||||
this.trieResult >>> 16,
|
||||
this.trieResult & 0xFFFF
|
||||
);
|
||||
}
|
||||
return this.f.logData();
|
||||
}
|
||||
|
||||
@ -1697,8 +1714,8 @@ const FilterBucket = class {
|
||||
return [
|
||||
this.fid,
|
||||
this.filters.map(filter => filter.compile(toSelfie)),
|
||||
this.plainPrefix1Trie !== null &&
|
||||
FilterBucket.trieContainer.compileOne(this.plainPrefix1Trie),
|
||||
this.plainTrie !== null &&
|
||||
FilterBucket.trieContainer.compileOne(this.plainTrie),
|
||||
this.plainHnAnchoredTrie !== null &&
|
||||
FilterBucket.trieContainer.compileOne(this.plainHnAnchoredTrie),
|
||||
];
|
||||
@ -1706,21 +1723,22 @@ const FilterBucket = class {
|
||||
|
||||
_countTrieable() {
|
||||
for ( const f of this.filters ) {
|
||||
if ( f.fid === this.plainPrefix1Id ) {
|
||||
this.plainPrefix1Count += 1;
|
||||
} else if ( f.fid === this.plainHnAnchoredId ) {
|
||||
if ( f.trieableId === 0 ) {
|
||||
this.plainCount += 1;
|
||||
} else if ( f.trieableId === 1 ) {
|
||||
this.plainHnAnchoredCount += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_transferTrieable(fid, trie) {
|
||||
let i = this.filters.length;
|
||||
_transferTrieable(trieableId, trie) {
|
||||
const filters = this.filters;
|
||||
let i = filters.length;
|
||||
while ( i-- ) {
|
||||
const f = this.filters[i];
|
||||
if ( f.fid !== fid || f.s.length > 255 ) { continue; }
|
||||
trie.add(f.s);
|
||||
this.filters.splice(i, 1);
|
||||
const f = filters[i];
|
||||
if ( f.trieableId !== trieableId || f.s.length > 255 ) { continue; }
|
||||
f.addToTrie(trie);
|
||||
filters.splice(i, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1757,7 +1775,7 @@ const FilterBucket = class {
|
||||
const bucket = new FilterBucket();
|
||||
bucket.filters = args[1].map(data => filterFromCompiledData(data));
|
||||
if ( Array.isArray(args[2]) ) {
|
||||
bucket.plainPrefix1Trie =
|
||||
bucket.plainTrie =
|
||||
FilterBucket.trieContainer.createOne(args[2]);
|
||||
}
|
||||
if ( Array.isArray(args[3]) ) {
|
||||
@ -1771,17 +1789,15 @@ const FilterBucket = class {
|
||||
FilterBucket.prototype.f = null;
|
||||
FilterBucket.prototype.promoted = 0;
|
||||
|
||||
FilterBucket.prototype.plainPrefix1Id = FilterPlainPrefix1.fid;
|
||||
FilterBucket.prototype.plainPrefix1Count = 0;
|
||||
FilterBucket.prototype.plainPrefix1Trie = null;
|
||||
FilterBucket.prototype.plainPrefix1Filter = new FilterPlainPrefix1('');
|
||||
FilterBucket.prototype.plainCount = 0;
|
||||
FilterBucket.prototype.plainTrie = null;
|
||||
FilterBucket.prototype.plainFilter = new FilterPlainX('', 0);
|
||||
|
||||
FilterBucket.prototype.plainHnAnchoredId = FilterPlainHnAnchored.fid;
|
||||
FilterBucket.prototype.plainHnAnchoredCount = 0;
|
||||
FilterBucket.prototype.plainHnAnchoredTrie = null;
|
||||
FilterBucket.prototype.plainHnAnchoredFilter = new FilterPlainHnAnchored('');
|
||||
FilterBucket.prototype.plainHnAnchoredFilter = new FilterPlainHnAnchoredX('', 0);
|
||||
|
||||
FilterBucket.trieContainer = (function() {
|
||||
FilterBucket.trieContainer = (( ) => {
|
||||
let trieDetails;
|
||||
try {
|
||||
trieDetails = JSON.parse(
|
||||
@ -1789,7 +1805,7 @@ FilterBucket.trieContainer = (function() {
|
||||
);
|
||||
} catch(ex) {
|
||||
}
|
||||
return new STrieContainer(trieDetails);
|
||||
return new µBlock.BidiTrieContainer(trieDetails);
|
||||
})();
|
||||
|
||||
registerFilterClass(FilterBucket);
|
||||
@ -2215,7 +2231,6 @@ FilterParser.prototype.parse = function(raw) {
|
||||
// These "bad tokens" are collated manually.
|
||||
|
||||
// Hostname-anchored with no wildcard always have a token index of 0.
|
||||
const reHostnameToken = /^[0-9a-z]+/;
|
||||
const reGoodToken = /[%0-9a-z]{2,}/g;
|
||||
const reRegexToken = /[%0-9A-Za-z]{2,}/g;
|
||||
const reRegexTokenAbort = /[([]/;
|
||||
@ -2298,13 +2313,7 @@ FilterParser.prototype.makeToken = function() {
|
||||
|
||||
if ( this.f === '*' ) { return; }
|
||||
|
||||
let matches = null;
|
||||
if ( (this.anchor & 0x4) !== 0 && this.wildcarded === false ) {
|
||||
matches = reHostnameToken.exec(this.f);
|
||||
}
|
||||
if ( matches === null ) {
|
||||
matches = this.findFirstGoodToken();
|
||||
}
|
||||
let matches = this.findFirstGoodToken();
|
||||
if ( matches !== null ) {
|
||||
this.token = matches[0];
|
||||
this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token);
|
||||
@ -2361,7 +2370,6 @@ FilterContainer.prototype.reset = function() {
|
||||
FilterBucket.reset();
|
||||
|
||||
// Runtime registers
|
||||
this.urlRegister = '';
|
||||
this.catbitsRegister = 0;
|
||||
this.tokenRegister = 0;
|
||||
this.filterRegister = null;
|
||||
@ -2673,8 +2681,7 @@ FilterContainer.prototype.compile = function(raw, writer) {
|
||||
} else if ( parsed.anchor === 0x4 ) {
|
||||
if (
|
||||
parsed.wildcarded === false &&
|
||||
parsed.tokenHash !== parsed.noTokenHash &&
|
||||
parsed.tokenBeg === 0
|
||||
parsed.tokenHash !== parsed.noTokenHash
|
||||
) {
|
||||
fdata = FilterPlainHnAnchored.compile(parsed);
|
||||
} else {
|
||||
@ -2704,8 +2711,6 @@ FilterContainer.prototype.compile = function(raw, writer) {
|
||||
fdata = FilterPlainRightAnchored.compile(parsed);
|
||||
} else if ( parsed.anchor === 0x3 ) {
|
||||
fdata = FilterExactMatch.compile(parsed);
|
||||
} else if ( parsed.tokenBeg === 1 ) {
|
||||
fdata = FilterPlainPrefix1.compile(parsed);
|
||||
} else {
|
||||
fdata = FilterPlain.compile(parsed);
|
||||
}
|
||||
@ -2962,7 +2967,7 @@ FilterContainer.prototype.realmMatchString = function(
|
||||
}
|
||||
// Pattern-based filters
|
||||
else {
|
||||
const url = this.urlRegister;
|
||||
const url = urlRegister;
|
||||
const tokenHashes = this.urlTokenizer.getTokens();
|
||||
let i = 0, tokenBeg = 0;
|
||||
for (;;) {
|
||||
@ -3026,7 +3031,7 @@ FilterContainer.prototype.matchStringGenericHide = function(requestURL) {
|
||||
const typeBits = typeNameToTypeValue['generichide'] | 0x80000000;
|
||||
|
||||
// Prime tokenizer: we get a normalized URL in return.
|
||||
this.urlRegister = this.urlTokenizer.setURL(requestURL);
|
||||
urlRegister = this.urlTokenizer.setURL(requestURL);
|
||||
this.filterRegister = null;
|
||||
|
||||
// These registers will be used by various filters
|
||||
@ -3071,7 +3076,7 @@ FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
|
||||
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
|
||||
|
||||
// Prime tokenizer: we get a normalized URL in return.
|
||||
this.urlRegister = this.urlTokenizer.setURL(fctxt.url);
|
||||
urlRegister = this.urlTokenizer.setURL(fctxt.url);
|
||||
this.filterRegister = null;
|
||||
|
||||
// These registers will be used by various filters
|
||||
@ -3176,6 +3181,20 @@ FilterContainer.prototype.benchmark = async function(action) {
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.test = function(docURL, type, url) {
|
||||
const fctxt = µb.filteringContext.duplicate();
|
||||
fctxt.setDocOriginFromURL(docURL);
|
||||
fctxt.setType(type);
|
||||
fctxt.setURL(url);
|
||||
const r = this.matchString(fctxt);
|
||||
console.log(`${r}`);
|
||||
if ( r !== 0 ) {
|
||||
console.log(this.toLogData());
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************-
|
||||
|
||||
With default filter lists:
|
||||
@ -3316,8 +3335,8 @@ FilterContainer.prototype.filterClassHistogram = function() {
|
||||
filterClassDetails.set(i, { name: filterClasses[i].name, count: 0, });
|
||||
}
|
||||
// Artificial classes to report content of tries
|
||||
filterClassDetails.set(1000, { name: 'FilterPrefix1Trie', count: 0, });
|
||||
filterClassDetails.set(1001, { name: 'FilterHnAnchoredTrie', count: 0, });
|
||||
filterClassDetails.set(1000, { name: 'FilterPlainTrie', count: 0, });
|
||||
filterClassDetails.set(1001, { name: 'FilterPlainHnAnchoredTrie', count: 0, });
|
||||
|
||||
const countFilter = function(f) {
|
||||
if ( f instanceof Object === false ) { return; }
|
||||
@ -3332,8 +3351,8 @@ FilterContainer.prototype.filterClassHistogram = function() {
|
||||
countFilter(f);
|
||||
if ( f instanceof FilterBucket ) {
|
||||
for ( const g of f.filters ) { countFilter(g); }
|
||||
if ( f.plainPrefix1Trie !== null ) {
|
||||
filterClassDetails.get(1000).count += f.plainPrefix1Trie.size;
|
||||
if ( f.plainTrie !== null ) {
|
||||
filterClassDetails.get(1000).count += f.plainTrie.size;
|
||||
}
|
||||
if ( f.plainHnAnchoredTrie !== null ) {
|
||||
filterClassDetails.get(1001).count += f.plainHnAnchoredTrie.size;
|
||||
|
509
src/js/strie.js
509
src/js/strie.js
@ -19,13 +19,16 @@
|
||||
Home: https://github.com/gorhill/uBlock
|
||||
*/
|
||||
|
||||
/* exported STrieContainer */
|
||||
|
||||
'use strict';
|
||||
|
||||
// *****************************************************************************
|
||||
// start of local namespace
|
||||
|
||||
{
|
||||
|
||||
/*******************************************************************************
|
||||
|
||||
A STrieContainer is mostly a large buffer in which distinct but related
|
||||
A BidiTrieContainer is mostly a large buffer in which distinct but related
|
||||
tries are stored. The memory layout of the buffer is as follow:
|
||||
|
||||
0-255: reserved
|
||||
@ -35,28 +38,96 @@
|
||||
268-271: offset to end of character data section (=> char1)
|
||||
272: start of trie data section
|
||||
|
||||
+--------------+
|
||||
Normal cell: | And | If "Segment info" matches:
|
||||
(aka CELL) +--------------+ Goto "And"
|
||||
| Or | Else
|
||||
+--------------+ Goto "Or"
|
||||
| Segment info |
|
||||
+--------------+
|
||||
|
||||
+--------------+
|
||||
Boundary cell: | Right And | "Right And" and/or "Left And"
|
||||
(aka BCELL) +--------------+ can be 0 in last-segment condition.
|
||||
| Left And |
|
||||
+--------------+
|
||||
| 0 |
|
||||
+--------------+
|
||||
|
||||
Given following filters and assuming token is "ad" for all of them:
|
||||
|
||||
-images/ad-
|
||||
/google_ad.
|
||||
/images_ad.
|
||||
_images/ad.
|
||||
|
||||
We get the following internal representation:
|
||||
|
||||
+-----------+ +-----------+ +---+
|
||||
| |---->| |---->| 0 |
|
||||
+-----------+ +-----------+ +---+ +-----------+
|
||||
| 0 | +--| | | |---->| 0 |
|
||||
+-----------+ | +-----------+ +---+ +-----------+
|
||||
| ad | | | - | | 0 | | 0 |
|
||||
+-----------+ | +-----------+ +---+ +-----------+
|
||||
| | -images/ |
|
||||
| +-----------+ +---+ +-----------+
|
||||
+->| |---->| 0 |
|
||||
+-----------+ +---+ +-----------+ +-----------+
|
||||
| 0 | | |---->| |---->| 0 |
|
||||
+-----------+ +---+ +-----------+ +-----------+
|
||||
| . | | 0 | +--| | +--| |
|
||||
+-----------+ +---+ | +-----------+ | +-----------+
|
||||
| | _ | | | /google |
|
||||
| +-----------+ | +-----------+
|
||||
| |
|
||||
| | +-----------+
|
||||
| +->| 0 |
|
||||
| +-----------+
|
||||
| | 0 |
|
||||
| +-----------+
|
||||
| | /images |
|
||||
| +-----------+
|
||||
|
|
||||
| +-----------+
|
||||
+->| 0 |
|
||||
+-----------+
|
||||
| 0 |
|
||||
+-----------+
|
||||
| _images/ |
|
||||
+-----------+
|
||||
|
||||
*/
|
||||
|
||||
const STRIE_PAGE_SIZE = 65536;
|
||||
// i32 / i8
|
||||
const STRIE_TRIE0_SLOT = 256 >>> 2; // 64 / 256
|
||||
const STRIE_TRIE1_SLOT = STRIE_TRIE0_SLOT + 1; // 65 / 260
|
||||
const STRIE_CHAR0_SLOT = STRIE_TRIE0_SLOT + 2; // 66 / 264
|
||||
const STRIE_CHAR1_SLOT = STRIE_TRIE0_SLOT + 3; // 67 / 268
|
||||
const STRIE_TRIE0_START = STRIE_TRIE0_SLOT + 4 << 2; // 272
|
||||
const PAGE_SIZE = 65536;
|
||||
// i32 / i8
|
||||
const TRIE0_SLOT = 256 >>> 2; // 64 / 256
|
||||
const TRIE1_SLOT = TRIE0_SLOT + 1; // 65 / 260
|
||||
const CHAR0_SLOT = TRIE0_SLOT + 2; // 66 / 264
|
||||
const CHAR1_SLOT = TRIE0_SLOT + 3; // 67 / 268
|
||||
const TRIE0_START = TRIE0_SLOT + 4 << 2; // 272
|
||||
|
||||
const CELL_BYTE_LENGTH = 12;
|
||||
const MIN_FREE_CELL_BYTE_LENGTH = CELL_BYTE_LENGTH * 4;
|
||||
|
||||
const CELL_AND = 0;
|
||||
const CELL_OR = 1;
|
||||
const BCELL_RIGHT_AND = 0;
|
||||
const BCELL_LEFT_AND = 1;
|
||||
const SEGMENT_INFO = 2;
|
||||
|
||||
|
||||
const STrieContainer = class {
|
||||
µBlock.BidiTrieContainer = class {
|
||||
|
||||
constructor(details) {
|
||||
if ( details instanceof Object === false ) { details = {}; }
|
||||
const len = (details.byteLength || 0) + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1);
|
||||
const len = (details.byteLength || 0) + PAGE_SIZE-1 & ~(PAGE_SIZE-1);
|
||||
this.buf = new Uint8Array(Math.max(len, 131072));
|
||||
this.buf32 = new Uint32Array(this.buf.buffer);
|
||||
this.buf32[STRIE_TRIE0_SLOT] = STRIE_TRIE0_START;
|
||||
this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT];
|
||||
this.buf32[STRIE_CHAR0_SLOT] = details.char0 || 65536;
|
||||
this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT];
|
||||
this.buf32[TRIE0_SLOT] = TRIE0_START;
|
||||
this.buf32[TRIE1_SLOT] = this.buf32[TRIE0_SLOT];
|
||||
this.buf32[CHAR0_SLOT] = details.char0 || 65536;
|
||||
this.buf32[CHAR1_SLOT] = this.buf32[CHAR0_SLOT];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
@ -64,43 +135,86 @@ const STrieContainer = class {
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
reset() {
|
||||
this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT];
|
||||
this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT];
|
||||
this.buf32[TRIE1_SLOT] = this.buf32[TRIE0_SLOT];
|
||||
this.buf32[CHAR1_SLOT] = this.buf32[CHAR0_SLOT];
|
||||
}
|
||||
|
||||
matches(iroot, a, al) {
|
||||
const ar = a.length;
|
||||
const char0 = this.buf32[STRIE_CHAR0_SLOT];
|
||||
matches(iroot, a, i) {
|
||||
const buf32 = this.buf32;
|
||||
const buf8 = this.buf;
|
||||
const char0 = buf32[CHAR0_SLOT];
|
||||
const aR = a.length;
|
||||
let icell = iroot;
|
||||
let al = i;
|
||||
let c, v, bl, n;
|
||||
for (;;) {
|
||||
let c = a.charCodeAt(al);
|
||||
c = a.charCodeAt(al);
|
||||
al += 1;
|
||||
let v, bl;
|
||||
// find first segment with a first-character match
|
||||
for (;;) {
|
||||
v = this.buf32[icell+2];
|
||||
v = buf32[icell+SEGMENT_INFO];
|
||||
bl = char0 + (v & 0x00FFFFFF);
|
||||
if ( this.buf[bl] === c ) { break; }
|
||||
icell = this.buf32[icell+0];
|
||||
if ( buf8[bl] === c ) { break; }
|
||||
icell = buf32[icell+CELL_OR];
|
||||
if ( icell === 0 ) { return -1; }
|
||||
}
|
||||
// all characters in segment must match
|
||||
let n = v >>> 24;
|
||||
n = v >>> 24;
|
||||
if ( n > 1 ) {
|
||||
n -= 1;
|
||||
if ( (al + n) > ar ) { return -1; }
|
||||
if ( (al + n) > aR ) { return -1; }
|
||||
bl += 1;
|
||||
const br = bl + n;
|
||||
do {
|
||||
if ( a.charCodeAt(al) !== this.buf[bl] ) { return -1; }
|
||||
al += 1;
|
||||
bl += 1;
|
||||
} while ( bl < br );
|
||||
for ( let i = 0; i < n; i++ ) {
|
||||
if ( a.charCodeAt(al+i) !== buf8[bl+i] ) { return -1; }
|
||||
}
|
||||
al += n;
|
||||
}
|
||||
// next segment
|
||||
icell = this.buf32[icell+1];
|
||||
if ( icell === 0 || this.buf32[icell+2] === 0 ) { return al; }
|
||||
if ( al === ar ) { return -1; }
|
||||
icell = buf32[icell+CELL_AND];
|
||||
if ( /* icell === 0 || */ buf32[icell+SEGMENT_INFO] === 0 ) {
|
||||
const inext = buf32[icell+BCELL_LEFT_AND];
|
||||
if ( inext === 0 ) { return (i << 16) | al; }
|
||||
const r = this.matchesLeft(inext, a, i);
|
||||
if ( r !== -1 ) { return (r << 16) | al; }
|
||||
icell = buf32[icell+CELL_AND];
|
||||
if ( icell === 0 ) { return -1; }
|
||||
}
|
||||
if ( al === aR ) { return -1; }
|
||||
}
|
||||
}
|
||||
|
||||
matchesLeft(iroot, a, i) {
|
||||
const buf32 = this.buf32;
|
||||
const buf8 = this.buf;
|
||||
const char0 = buf32[CHAR0_SLOT];
|
||||
let icell = iroot;
|
||||
let ar = i;
|
||||
let c, v, br, n;
|
||||
for (;;) {
|
||||
ar -= 1;
|
||||
c = a.charCodeAt(ar);
|
||||
// find first segment with a first-character match
|
||||
for (;;) {
|
||||
v = buf32[icell+SEGMENT_INFO];
|
||||
n = v >>> 24;
|
||||
br = char0 + (v & 0x00FFFFFF) + n - 1;
|
||||
if ( buf8[br] === c ) { break; }
|
||||
icell = buf32[icell+CELL_OR];
|
||||
if ( icell === 0 ) { return -1; }
|
||||
}
|
||||
// all characters in segment must match
|
||||
if ( n > 1 ) {
|
||||
n -= 1;
|
||||
if ( n > ar ) { return -1; }
|
||||
for ( let i = 1; i <= n; i++ ) {
|
||||
if ( a.charCodeAt(ar-i) !== buf8[br-i] ) { return -1; }
|
||||
}
|
||||
ar -= n;
|
||||
}
|
||||
// next segment
|
||||
icell = buf32[icell+CELL_AND];
|
||||
if ( icell === 0 || buf32[icell+SEGMENT_INFO] === 0 ) { return ar; }
|
||||
if ( ar === 0 ) { return -1; }
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,14 +223,14 @@ const STrieContainer = class {
|
||||
return new this.STrieRef(this, args[0], args[1]);
|
||||
}
|
||||
// grow buffer if needed
|
||||
if ( (this.buf32[STRIE_CHAR0_SLOT] - this.buf32[STRIE_TRIE1_SLOT]) < 12 ) {
|
||||
this.growBuf(12, 0);
|
||||
if ( (this.buf32[CHAR0_SLOT] - this.buf32[TRIE1_SLOT]) < CELL_BYTE_LENGTH ) {
|
||||
this.growBuf(CELL_BYTE_LENGTH, 0);
|
||||
}
|
||||
const iroot = this.buf32[STRIE_TRIE1_SLOT] >>> 2;
|
||||
this.buf32[STRIE_TRIE1_SLOT] += 12;
|
||||
this.buf32[iroot+0] = 0;
|
||||
this.buf32[iroot+1] = 0;
|
||||
this.buf32[iroot+2] = 0;
|
||||
const iroot = this.buf32[TRIE1_SLOT] >>> 2;
|
||||
this.buf32[TRIE1_SLOT] += CELL_BYTE_LENGTH;
|
||||
this.buf32[iroot+CELL_OR] = 0;
|
||||
this.buf32[iroot+CELL_AND] = 0;
|
||||
this.buf32[iroot+SEGMENT_INFO] = 0;
|
||||
return new this.STrieRef(this, iroot, 0);
|
||||
}
|
||||
|
||||
@ -124,68 +238,177 @@ const STrieContainer = class {
|
||||
return [ trieRef.iroot, trieRef.size ];
|
||||
}
|
||||
|
||||
add(iroot, s) {
|
||||
const lschar = s.length;
|
||||
if ( lschar === 0 ) { return 0; }
|
||||
let ischar = 0;
|
||||
add(iroot, a, i = 0) {
|
||||
const aR = a.length;
|
||||
if ( aR === 0 ) { return 0; }
|
||||
let icell = iroot;
|
||||
// special case: first node in trie
|
||||
if ( this.buf32[icell+2] === 0 ) {
|
||||
this.buf32[icell+2] = this.addSegment(s.slice(ischar));
|
||||
return 1;
|
||||
if ( this.buf32[icell+SEGMENT_INFO] === 0 ) {
|
||||
this.buf32[icell+SEGMENT_INFO] = this.addSegment(a, i, aR);
|
||||
return this.addLeft(icell, a, i);
|
||||
}
|
||||
// grow buffer if needed
|
||||
if (
|
||||
(this.buf32[STRIE_CHAR0_SLOT] - this.buf32[STRIE_TRIE1_SLOT]) < 24 ||
|
||||
(this.buf.length - this.buf32[STRIE_CHAR1_SLOT]) < 256
|
||||
(this.buf32[CHAR0_SLOT] - this.buf32[TRIE1_SLOT]) < MIN_FREE_CELL_BYTE_LENGTH ||
|
||||
(this.buf.length - this.buf32[CHAR1_SLOT]) < 256
|
||||
) {
|
||||
this.growBuf(24, 256);
|
||||
this.growBuf(MIN_FREE_CELL_BYTE_LENGTH, 256);
|
||||
}
|
||||
//
|
||||
const char0 = this.buf32[STRIE_CHAR0_SLOT];
|
||||
const buf32 = this.buf32;
|
||||
const buf8 = this.buf;
|
||||
const char0 = buf32[CHAR0_SLOT];
|
||||
let al = i;
|
||||
let inext;
|
||||
// find a matching cell: move down
|
||||
for (;;) {
|
||||
const vseg = this.buf32[icell+2];
|
||||
const binfo = buf32[icell+SEGMENT_INFO];
|
||||
// skip boundary cells
|
||||
if ( vseg === 0 ) {
|
||||
icell = this.buf32[icell+1];
|
||||
if ( binfo === 0 ) {
|
||||
icell = buf32[icell+BCELL_RIGHT_AND];
|
||||
continue;
|
||||
}
|
||||
let isegchar0 = char0 + (vseg & 0x00FFFFFF);
|
||||
let bl = char0 + (binfo & 0x00FFFFFF);
|
||||
// if first character is no match, move to next descendant
|
||||
if ( this.buf[isegchar0] !== s.charCodeAt(ischar) ) {
|
||||
inext = this.buf32[icell+0];
|
||||
if ( buf8[bl] !== a.charCodeAt(al) ) {
|
||||
inext = buf32[icell+CELL_OR];
|
||||
if ( inext === 0 ) {
|
||||
this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(s.slice(ischar)));
|
||||
inext = this.addCell(0, 0, this.addSegment(a, al, aR));
|
||||
buf32[icell+CELL_OR] = inext;
|
||||
return this.addLeft(inext, a, i);
|
||||
}
|
||||
icell = inext;
|
||||
continue;
|
||||
}
|
||||
// 1st character was tested
|
||||
let bi = 1;
|
||||
al += 1;
|
||||
// find 1st mismatch in rest of segment
|
||||
const bR = binfo >>> 24;
|
||||
if ( bR !== 1 ) {
|
||||
for (;;) {
|
||||
if ( bi === bR ) { break; }
|
||||
if ( al === aR ) { break; }
|
||||
if ( buf8[bl+bi] !== a.charCodeAt(al) ) { break; }
|
||||
bi += 1;
|
||||
al += 1;
|
||||
}
|
||||
}
|
||||
// all segment characters matched
|
||||
if ( bi === bR ) {
|
||||
// needle remainder: no
|
||||
if ( al === aR ) {
|
||||
return this.addLeft(icell, a, i);
|
||||
}
|
||||
// needle remainder: yes
|
||||
inext = buf32[icell+CELL_AND];
|
||||
if ( buf32[inext+CELL_AND] !== 0 ) {
|
||||
icell = inext;
|
||||
continue;
|
||||
}
|
||||
// add needle remainder
|
||||
icell = this.addCell(0, 0, this.addSegment(a, al, aR));
|
||||
buf32[inext+CELL_AND] = icell;
|
||||
return this.addLeft(icell, a, i);
|
||||
}
|
||||
// some characters matched
|
||||
// split current segment
|
||||
bl -= char0;
|
||||
buf32[icell+SEGMENT_INFO] = bi << 24 | bl;
|
||||
inext = this.addCell(
|
||||
buf32[icell+CELL_AND],
|
||||
0,
|
||||
bR - bi << 24 | bl + bi
|
||||
);
|
||||
buf32[icell+CELL_AND] = inext;
|
||||
// needle remainder: no = need boundary cell
|
||||
if ( al === aR ) {
|
||||
return this.addLeft(icell, a, i);
|
||||
}
|
||||
// needle remainder: yes = need new cell for remaining characters
|
||||
icell = this.addCell(0, 0, this.addSegment(a, al, aR));
|
||||
buf32[inext+CELL_OR] = icell;
|
||||
return this.addLeft(icell, a, i);
|
||||
}
|
||||
}
|
||||
|
||||
addLeft(icell, a, i) {
|
||||
const buf32 = this.buf32;
|
||||
// fetch boundary cell
|
||||
let inext = buf32[icell+CELL_AND];
|
||||
// add boundary cell if none exist
|
||||
if ( inext === 0 || buf32[inext+SEGMENT_INFO] !== 0 ) {
|
||||
const iboundary = this.allocateCell();
|
||||
buf32[icell+CELL_AND] = iboundary;
|
||||
buf32[iboundary+BCELL_RIGHT_AND] = inext;
|
||||
if ( i === 0 ) { return 1; }
|
||||
buf32[iboundary+BCELL_LEFT_AND] = this.allocateCell();
|
||||
inext = iboundary;
|
||||
}
|
||||
// shortest match is always first so no point storing whatever is left
|
||||
if ( buf32[inext+BCELL_LEFT_AND] === 0 ) {
|
||||
return i === 0 ? 0 : 1;
|
||||
}
|
||||
// bail out if no left segment
|
||||
if ( i === 0 ) {
|
||||
buf32[inext+BCELL_LEFT_AND] = 0;
|
||||
return 1;
|
||||
}
|
||||
// fetch root cell of left segment
|
||||
icell = buf32[inext+BCELL_LEFT_AND];
|
||||
// special case: first node in trie
|
||||
if ( buf32[icell+SEGMENT_INFO] === 0 ) {
|
||||
buf32[icell+SEGMENT_INFO] = this.addSegment(a, 0, i);
|
||||
return 1;
|
||||
}
|
||||
const buf8 = this.buf;
|
||||
const char0 = buf32[CHAR0_SLOT];
|
||||
let ar = i;
|
||||
// find a matching cell: move down
|
||||
for (;;) {
|
||||
const binfo = buf32[icell+SEGMENT_INFO];
|
||||
// skip boundary cells
|
||||
if ( binfo === 0 ) {
|
||||
icell = buf32[icell+CELL_AND];
|
||||
continue;
|
||||
}
|
||||
const bL = char0 + (binfo & 0x00FFFFFF);
|
||||
const bR = bL + (binfo >>> 24);
|
||||
let br = bR;
|
||||
// if first character is no match, move to next descendant
|
||||
if ( buf8[br-1] !== a.charCodeAt(ar-1) ) {
|
||||
inext = buf32[icell+CELL_OR];
|
||||
if ( inext === 0 ) {
|
||||
inext = this.addCell(0, 0, this.addSegment(a, 0, ar));
|
||||
buf32[icell+CELL_OR] = inext;
|
||||
return 1;
|
||||
}
|
||||
icell = inext;
|
||||
continue;
|
||||
}
|
||||
// 1st character was tested
|
||||
let isegchar = 1;
|
||||
ischar += 1;
|
||||
br -= 1;
|
||||
ar -= 1;
|
||||
// find 1st mismatch in rest of segment
|
||||
const lsegchar = vseg >>> 24;
|
||||
if ( lsegchar !== 1 ) {
|
||||
if ( br !== bL ) {
|
||||
for (;;) {
|
||||
if ( isegchar === lsegchar ) { break; }
|
||||
if ( ischar === lschar ) { break; }
|
||||
if ( this.buf[isegchar0+isegchar] !== s.charCodeAt(ischar) ) { break; }
|
||||
isegchar += 1;
|
||||
ischar += 1;
|
||||
if ( br === bL ) { break; }
|
||||
if ( ar === 0 ) { break; }
|
||||
if ( buf8[br-1] !== a.charCodeAt(ar-1) ) { break; }
|
||||
br -= 1;
|
||||
ar -= 1;
|
||||
}
|
||||
}
|
||||
// all segment characters matched
|
||||
if ( isegchar === lsegchar ) {
|
||||
inext = this.buf32[icell+1];
|
||||
if ( br === bL ) {
|
||||
inext = buf32[icell+CELL_AND];
|
||||
// needle remainder: no
|
||||
if ( ischar === lschar ) {
|
||||
if ( ar === 0 ) {
|
||||
// boundary cell already present
|
||||
if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; }
|
||||
if ( inext === 0 || buf32[inext+SEGMENT_INFO] === 0 ) {
|
||||
return 0;
|
||||
}
|
||||
// need boundary cell
|
||||
this.buf32[icell+1] = this.addCell(0, inext, 0);
|
||||
buf32[icell+CELL_AND] = this.addCell(inext, 0, 0);
|
||||
}
|
||||
// needle remainder: yes
|
||||
else {
|
||||
@ -195,28 +418,29 @@ const STrieContainer = class {
|
||||
}
|
||||
// boundary cell + needle remainder
|
||||
inext = this.addCell(0, 0, 0);
|
||||
this.buf32[icell+1] = inext;
|
||||
this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(s.slice(ischar)));
|
||||
buf32[icell+CELL_AND] = inext;
|
||||
buf32[inext+CELL_AND] =
|
||||
this.addCell(0, 0, this.addSegment(a, 0, ar));
|
||||
}
|
||||
}
|
||||
// some segment characters matched
|
||||
else {
|
||||
// split current cell
|
||||
isegchar0 -= char0;
|
||||
this.buf32[icell+2] = isegchar << 24 | isegchar0;
|
||||
buf32[icell+SEGMENT_INFO] = (bR - br) << 24 | (br - char0);
|
||||
inext = this.addCell(
|
||||
buf32[icell+CELL_AND],
|
||||
0,
|
||||
this.buf32[icell+1],
|
||||
lsegchar - isegchar << 24 | isegchar0 + isegchar
|
||||
(br - bL) << 24 | (bL - char0)
|
||||
);
|
||||
this.buf32[icell+1] = inext;
|
||||
buf32[icell+CELL_AND] = inext;
|
||||
// needle remainder: no = need boundary cell
|
||||
if ( ischar === lschar ) {
|
||||
this.buf32[icell+1] = this.addCell(0, inext, 0);
|
||||
if ( ar === 0 ) {
|
||||
buf32[icell+CELL_AND] = this.addCell(inext, 0, 0);
|
||||
}
|
||||
// needle remainder: yes = need new cell for remaining characters
|
||||
else {
|
||||
this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(s.slice(ischar)));
|
||||
buf32[inext+CELL_OR] =
|
||||
this.addCell(0, 0, this.addSegment(a, 0, ar));
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
@ -227,7 +451,7 @@ const STrieContainer = class {
|
||||
this.shrinkBuf();
|
||||
return {
|
||||
byteLength: this.buf.byteLength,
|
||||
char0: this.buf32[STRIE_CHAR0_SLOT],
|
||||
char0: this.buf32[CHAR0_SLOT],
|
||||
};
|
||||
}
|
||||
|
||||
@ -235,14 +459,14 @@ const STrieContainer = class {
|
||||
if ( encoder instanceof Object ) {
|
||||
return encoder.encode(
|
||||
this.buf32.buffer,
|
||||
this.buf32[STRIE_CHAR1_SLOT]
|
||||
this.buf32[CHAR1_SLOT]
|
||||
);
|
||||
}
|
||||
return Array.from(
|
||||
new Uint32Array(
|
||||
this.buf32.buffer,
|
||||
0,
|
||||
this.buf32[STRIE_CHAR1_SLOT] + 3 >>> 2
|
||||
this.buf32[CHAR1_SLOT] + 3 >>> 2
|
||||
)
|
||||
);
|
||||
}
|
||||
@ -253,7 +477,7 @@ const STrieContainer = class {
|
||||
? decoder.decodeSize(selfie)
|
||||
: selfie.length << 2;
|
||||
if ( byteLength === 0 ) { return false; }
|
||||
byteLength = byteLength + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1);
|
||||
byteLength = byteLength + PAGE_SIZE-1 & ~(PAGE_SIZE-1);
|
||||
if ( byteLength > this.buf.length ) {
|
||||
this.buf = new Uint8Array(byteLength);
|
||||
this.buf32 = new Uint32Array(this.buf.buffer);
|
||||
@ -270,92 +494,100 @@ const STrieContainer = class {
|
||||
// Private methods
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
addCell(idown, iright, v) {
|
||||
let icell = this.buf32[STRIE_TRIE1_SLOT];
|
||||
this.buf32[STRIE_TRIE1_SLOT] = icell + 12;
|
||||
allocateCell() {
|
||||
let icell = this.buf32[TRIE1_SLOT];
|
||||
this.buf32[TRIE1_SLOT] = icell + CELL_BYTE_LENGTH;
|
||||
icell >>>= 2;
|
||||
this.buf32[icell+0] = idown;
|
||||
this.buf32[icell+1] = iright;
|
||||
this.buf32[icell+2] = v;
|
||||
this.buf32[icell+0] = 0;
|
||||
this.buf32[icell+1] = 0;
|
||||
this.buf32[icell+2] = 0;
|
||||
return icell;
|
||||
}
|
||||
|
||||
addSegment(segment) {
|
||||
const lsegchar = segment.length;
|
||||
if ( lsegchar === 0 ) { return 0; }
|
||||
let char1 = this.buf32[STRIE_CHAR1_SLOT];
|
||||
const isegchar = char1 - this.buf32[STRIE_CHAR0_SLOT];
|
||||
let i = 0;
|
||||
do {
|
||||
this.buf[char1++] = segment.charCodeAt(i++);
|
||||
} while ( i !== lsegchar );
|
||||
this.buf32[STRIE_CHAR1_SLOT] = char1;
|
||||
return (lsegchar << 24) | isegchar;
|
||||
addCell(iand, ior, v) {
|
||||
const icell = this.allocateCell();
|
||||
this.buf32[icell+CELL_AND] = iand;
|
||||
this.buf32[icell+CELL_OR] = ior;
|
||||
this.buf32[icell+SEGMENT_INFO] = v;
|
||||
return icell;
|
||||
}
|
||||
|
||||
addSegment(s, l, r) {
|
||||
const n = r - l;
|
||||
if ( n === 0 ) { return 0; }
|
||||
const buf32 = this.buf32;
|
||||
const des = buf32[CHAR1_SLOT];
|
||||
buf32[CHAR1_SLOT] = des + n;
|
||||
const buf8 = this.buf;
|
||||
for ( let i = 0; i < n; i++ ) {
|
||||
buf8[des+i] = s.charCodeAt(l+i);
|
||||
}
|
||||
return (n << 24) | (des - buf32[CHAR0_SLOT]);
|
||||
}
|
||||
|
||||
growBuf(trieGrow, charGrow) {
|
||||
const char0 = Math.max(
|
||||
(this.buf32[STRIE_TRIE1_SLOT] + trieGrow + STRIE_PAGE_SIZE-1) & ~(STRIE_PAGE_SIZE-1),
|
||||
this.buf32[STRIE_CHAR0_SLOT]
|
||||
(this.buf32[TRIE1_SLOT] + trieGrow + PAGE_SIZE-1) & ~(PAGE_SIZE-1),
|
||||
this.buf32[CHAR0_SLOT]
|
||||
);
|
||||
const char1 = char0 + this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT];
|
||||
const char1 = char0 + this.buf32[CHAR1_SLOT] - this.buf32[CHAR0_SLOT];
|
||||
const bufLen = Math.max(
|
||||
(char1 + charGrow + STRIE_PAGE_SIZE-1) & ~(STRIE_PAGE_SIZE-1),
|
||||
(char1 + charGrow + PAGE_SIZE-1) & ~(PAGE_SIZE-1),
|
||||
this.buf.length
|
||||
);
|
||||
this.resizeBuf(bufLen, char0);
|
||||
}
|
||||
|
||||
shrinkBuf() {
|
||||
const char0 = this.buf32[STRIE_TRIE1_SLOT] + 24;
|
||||
const char1 = char0 + this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT];
|
||||
const char0 = this.buf32[TRIE1_SLOT] + MIN_FREE_CELL_BYTE_LENGTH;
|
||||
const char1 = char0 + this.buf32[CHAR1_SLOT] - this.buf32[CHAR0_SLOT];
|
||||
const bufLen = char1 + 256;
|
||||
this.resizeBuf(bufLen, char0);
|
||||
}
|
||||
|
||||
resizeBuf(bufLen, char0) {
|
||||
bufLen = bufLen + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1);
|
||||
bufLen = bufLen + PAGE_SIZE-1 & ~(PAGE_SIZE-1);
|
||||
if (
|
||||
bufLen === this.buf.length &&
|
||||
char0 === this.buf32[STRIE_CHAR0_SLOT]
|
||||
char0 === this.buf32[CHAR0_SLOT]
|
||||
) {
|
||||
return;
|
||||
}
|
||||
const charDataLen = this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT];
|
||||
const charDataLen = this.buf32[CHAR1_SLOT] - this.buf32[CHAR0_SLOT];
|
||||
if ( bufLen !== this.buf.length ) {
|
||||
const newBuf = new Uint8Array(bufLen);
|
||||
newBuf.set(
|
||||
new Uint8Array(
|
||||
this.buf.buffer,
|
||||
0,
|
||||
this.buf32[STRIE_TRIE1_SLOT]
|
||||
this.buf32[TRIE1_SLOT]
|
||||
),
|
||||
0
|
||||
);
|
||||
newBuf.set(
|
||||
new Uint8Array(
|
||||
this.buf.buffer,
|
||||
this.buf32[STRIE_CHAR0_SLOT],
|
||||
this.buf32[CHAR0_SLOT],
|
||||
charDataLen
|
||||
),
|
||||
char0
|
||||
);
|
||||
this.buf = newBuf;
|
||||
this.buf32 = new Uint32Array(this.buf.buffer);
|
||||
this.buf32[STRIE_CHAR0_SLOT] = char0;
|
||||
this.buf32[STRIE_CHAR1_SLOT] = char0 + charDataLen;
|
||||
this.buf32[CHAR0_SLOT] = char0;
|
||||
this.buf32[CHAR1_SLOT] = char0 + charDataLen;
|
||||
}
|
||||
if ( char0 !== this.buf32[STRIE_CHAR0_SLOT] ) {
|
||||
if ( char0 !== this.buf32[CHAR0_SLOT] ) {
|
||||
this.buf.set(
|
||||
new Uint8Array(
|
||||
this.buf.buffer,
|
||||
this.buf32[STRIE_CHAR0_SLOT],
|
||||
this.buf32[CHAR0_SLOT],
|
||||
charDataLen
|
||||
),
|
||||
char0
|
||||
);
|
||||
this.buf32[STRIE_CHAR0_SLOT] = char0;
|
||||
this.buf32[STRIE_CHAR1_SLOT] = char0 + charDataLen;
|
||||
this.buf32[CHAR0_SLOT] = char0;
|
||||
this.buf32[CHAR1_SLOT] = char0 + charDataLen;
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -366,23 +598,23 @@ const STrieContainer = class {
|
||||
|
||||
*/
|
||||
|
||||
STrieContainer.prototype.STrieRef = class {
|
||||
µBlock.BidiTrieContainer.prototype.STrieRef = class {
|
||||
constructor(container, iroot, size) {
|
||||
this.container = container;
|
||||
this.iroot = iroot;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
add(pattern) {
|
||||
if ( this.container.add(this.iroot, pattern) === 1 ) {
|
||||
add(s, i = 0) {
|
||||
if ( this.container.add(this.iroot, s, i) === 1 ) {
|
||||
this.size += 1;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
matches(a, al) {
|
||||
return this.container.matches(this.iroot, a, al);
|
||||
matches(a, i) {
|
||||
return this.container.matches(this.iroot, a, i);
|
||||
}
|
||||
|
||||
dump() {
|
||||
@ -406,24 +638,24 @@ STrieContainer.prototype.STrieRef = class {
|
||||
this.icell = this.forks.pop();
|
||||
}
|
||||
for (;;) {
|
||||
const idown = this.container.buf32[this.icell+0];
|
||||
const idown = this.container.buf32[this.icell+CELL_OR];
|
||||
if ( idown !== 0 ) {
|
||||
this.forks.push(idown, this.charPtr);
|
||||
}
|
||||
const v = this.container.buf32[this.icell+2];
|
||||
let i0 = this.container.buf32[STRIE_CHAR0_SLOT] + (v & 0x00FFFFFF);
|
||||
const v = this.container.buf32[this.icell+SEGMENT_INFO];
|
||||
let i0 = this.container.buf32[CHAR0_SLOT] + (v & 0x00FFFFFF);
|
||||
const i1 = i0 + (v >>> 24);
|
||||
while ( i0 < i1 ) {
|
||||
this.charBuf[this.charPtr] = this.container.buf[i0];
|
||||
this.charPtr += 1;
|
||||
i0 += 1;
|
||||
}
|
||||
this.icell = this.container.buf32[this.icell+1];
|
||||
this.icell = this.container.buf32[this.icell+CELL_AND];
|
||||
if ( this.icell === 0 ) {
|
||||
return this.toPattern();
|
||||
}
|
||||
if ( this.container.buf32[this.icell+2] === 0 ) {
|
||||
this.icell = this.container.buf32[this.icell+1];
|
||||
if ( this.container.buf32[this.icell+SEGMENT_INFO] === 0 ) {
|
||||
this.icell = this.container.buf32[this.icell+CELL_AND];
|
||||
return this.toPattern();
|
||||
}
|
||||
}
|
||||
@ -443,3 +675,8 @@ STrieContainer.prototype.STrieRef = class {
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// end of local namespace
|
||||
// *****************************************************************************
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user