1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-07-05 11:37:01 +02:00

Refactor hntrie to avoid the need for boundary cells

Whereas before the string segment was encoded as:

LL OOOOOOOOOOOO

where L are the upper 8 bits and used to encode the length
of the segment, and O are the lower 24 bits and used to
encode the offset of the string data in the character
buffer, the new code encode as follow:

OOOOOOOOOOOO LL

And furthermore the most significant bit of the length
LL is now used to mark whether the current string segment
is a label boundary.

This means a cell can't reference a segment longer then
127 characters. To work around this limitation for when a
segment is longer than 127 characters (a rare occurrence),
the algorithm will simply split the segment into multiple
adjacent cells.

As a result, there is no longer a need to encode
"boundariness" into special cells, which simplifies
both the storing and matching algorithms.

Additionally, added minimal documentation for the NPM
package on how to import and use HNTrieContainer as a
standalone API.
This commit is contained in:
Raymond Hill 2021-08-10 09:27:59 -04:00
parent a3f430ef03
commit c6fb70b1f0
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
8 changed files with 375 additions and 254 deletions

View File

@ -94,3 +94,65 @@ It is possible to pre-parse filter lists and save the intermediate results for
later use -- useful to speed up the loading of filter lists. This will be
documented eventually, but if you feel adventurous, you can look at the code
and use this capability now if you figure out the details.
---
## Extras
You can directly use specific APIs exposed by this package, here are some of
them, which are used internally by uBO's SNFE.
### `HNTrieContainer`
A well optimised [compressed trie](https://en.wikipedia.org/wiki/Trie#Compressing_tries)
container specialized to specifically store and lookup hostnames.
The matching algorithm is designed for hostnames, i.e. the hostname labels
making up a hostname are matched from right to left, such that `www.example.org`
with be a match if `example.org` is stored into the trie, while
`anotherexample.org` won't be a match.
`HNTrieContainer` is designed to store a large number of hostnames with CPU and
memory efficiency as a main concern -- and is a key component of uBO.
To create and use a standalone `HNTrieContainer` object:
```js
import HNTrieContainer from '@gorhill/ubo-core/js/hntrie.js';
const trieContainer = new HNTrieContainer();
const aTrie = trieContainer.createOne();
aTrie.add('example.org');
aTrie.add('example.com');
const anotherTrie = trieContainer.createOne();
anotherTrie.add('foo.invalid');
anotherTrie.add('bar.invalid');
// matches() return the position at which the match starts, or -1 when
// there is no match.
// Matches: return 4
console.log("aTrie.matches('www.example.org')", aTrie.matches('www.example.org'));
// Does not match: return -1
console.log("aTrie.matches('www.foo.invalid')", aTrie.matches('www.foo.invalid'));
// Does not match: return -1
console.log("anotherTrie.matches('www.example.org')", anotherTrie.matches('www.example.org'));
// Matches: return 0
console.log("anotherTrie.matches('foo.invalid')", anotherTrie.matches('foo.invalid'));
```
The `reset()` method must be used to remove all the tries from a trie container,
you can't remove a single trie from the container.
```js
hntrieContainer.reset();
```
When you reset a trie container, you can't use the reference to prior instances
of trie, i.e. `aTrie` and `anotherTrie` are no longer valid and shouldn't be
used following a reset.

View File

@ -1,6 +1,6 @@
{
"name": "@gorhill/ubo-core",
"version": "0.1.7",
"version": "0.1.8",
"description": "To create a working instance of uBlock Origin's static network filtering engine",
"type": "module",
"main": "index.js",
@ -15,7 +15,8 @@
"keywords": [
"uBlock",
"uBO",
"adblock"
"adblock",
"trie"
],
"author": "Raymond Hill",
"license": "GPL-3.0-or-later",

View File

@ -33,6 +33,8 @@ import {
StaticNetFilteringEngine,
} from './index.js';
import HNTrieContainer from './js/hntrie.js';
/******************************************************************************/
function fetch(listName) {
@ -42,7 +44,7 @@ function fetch(listName) {
});
}
function runTests(engine) {
function testSNFE(engine) {
let result = 0;
// Tests
@ -77,6 +79,53 @@ function runTests(engine) {
}
}
async function doSNFE() {
const engine = await StaticNetFilteringEngine.create();
await engine.useLists([
fetch('easylist').then(raw => ({ name: 'easylist', raw })),
fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })),
]);
testSNFE(engine);
const serialized = await engine.serialize();
engine.useLists([]);
testSNFE(engine);
await engine.deserialize(serialized);
testSNFE(engine);
}
async function doHNTrie() {
const trieContainer = new HNTrieContainer();
const aTrie = trieContainer.createOne();
aTrie.add('example.org');
aTrie.add('example.com');
const anotherTrie = trieContainer.createOne();
anotherTrie.add('foo.invalid');
anotherTrie.add('bar.invalid');
// matches() return the position at which the match starts, or -1 when
// there is no match.
// Matches: return 4
console.log("aTrie.matches('www.example.org')", aTrie.matches('www.example.org'));
// Does not match: return -1
console.log("aTrie.matches('www.foo.invalid')", aTrie.matches('www.foo.invalid'));
// Does not match: return -1
console.log("anotherTrie.matches('www.example.org')", anotherTrie.matches('www.example.org'));
// Matches: return 0
console.log("anotherTrie.matches('foo.invalid')", anotherTrie.matches('foo.invalid'));
}
async function main() {
try {
const result = await enableWASM();
@ -87,23 +136,8 @@ async function main() {
console.log(ex);
}
const engine = await StaticNetFilteringEngine.create();
await engine.useLists([
fetch('easylist').then(raw => ({ name: 'easylist', raw })),
fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })),
]);
runTests(engine);
const serialized = await engine.serialize();
engine.useLists([]);
runTests(engine);
await engine.deserialize(serialized);
runTests(engine);
await doSNFE();
await doHNTrie();
process.exit();
}

View File

@ -155,7 +155,7 @@ const µBlock = { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 37, // Increase when compiled format changes
selfieMagic: 37, // Increase when selfie format changes
selfieMagic: 38, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -184,21 +184,21 @@ const HNTrieContainer = class {
let ineedle = buf8[255];
let icell = buf32[iroot+0];
if ( icell === 0 ) { return -1; }
let c = 0, v = 0, i0 = 0, n = 0;
for (;;) {
if ( ineedle === 0 ) { return -1; }
ineedle -= 1;
let c = buf8[ineedle];
let v, i0;
c = buf8[ineedle];
// find first segment with a first-character match
for (;;) {
v = buf32[icell+2];
i0 = char0 + (v & 0x00FFFFFF);
i0 = char0 + (v >>> 8);
if ( buf8[i0] === c ) { break; }
icell = buf32[icell+0];
if ( icell === 0 ) { return -1; }
}
// all characters in segment must match
let n = v >>> 24;
n = v & 0x7F;
if ( n > 1 ) {
n -= 1;
if ( n > ineedle ) { return -1; }
@ -210,17 +210,17 @@ const HNTrieContainer = class {
i0 += 1;
} while ( i0 < i1 );
}
// boundary at end of segment?
if ( (v & 0x80) !== 0 ) {
if ( ineedle === 0 || buf8[ineedle-1] === 0x2E /* '.' */ ) {
return ineedle;
}
}
// next segment
icell = buf32[icell+1];
if ( icell === 0 ) { break; }
if ( buf32[icell+2] === 0 ) {
if ( ineedle === 0 || buf8[ineedle-1] === 0x2E ) {
return ineedle;
}
icell = buf32[icell+1];
}
}
return ineedle === 0 || buf8[ineedle-1] === 0x2E ? ineedle : -1;
return -1;
}
createOne(args) {
@ -256,39 +256,31 @@ const HNTrieContainer = class {
let icell = this.buf32[iroot+0];
// special case: first node in trie
if ( icell === 0 ) {
this.buf32[iroot+0] = this.addCell(0, 0, this.addSegment(lhnchar));
this.buf32[iroot+0] = this.addLeafCell(lhnchar);
return 1;
}
//
const char0 = this.buf32[CHAR0_SLOT];
let inext;
let isegchar, lsegchar, boundaryBit, inext;
// find a matching cell: move down
for (;;) {
const vseg = this.buf32[icell+2];
// skip boundary cells
if ( vseg === 0 ) {
// remainder is at label boundary? if yes, no need to add
// the rest since the shortest match is always reported
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
icell = this.buf32[icell+1];
continue;
}
let isegchar0 = char0 + (vseg & 0x00FFFFFF);
const v = this.buf32[icell+2];
let isegchar0 = char0 + (v >>> 8);
// if first character is no match, move to next descendant
if ( this.buf[isegchar0] !== this.buf[lhnchar-1] ) {
inext = this.buf32[icell+0];
if ( inext === 0 ) {
this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(lhnchar));
this.buf32[icell+0] = this.addLeafCell(lhnchar);
return 1;
}
icell = inext;
continue;
}
// 1st character was tested
let isegchar = 1;
isegchar = 1;
lhnchar -= 1;
// find 1st mismatch in rest of segment
const lsegchar = vseg >>> 24;
lsegchar = v & 0x7F;
if ( lsegchar !== 1 ) {
for (;;) {
if ( isegchar === lsegchar ) { break; }
@ -298,49 +290,50 @@ const HNTrieContainer = class {
lhnchar -= 1;
}
}
boundaryBit = v & 0x80;
// all segment characters matched
if ( isegchar === lsegchar ) {
inext = this.buf32[icell+1];
// needle remainder: no
if ( lhnchar === 0 ) {
// boundary cell already present
if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; }
// need boundary cell
this.buf32[icell+1] = this.addCell(0, inext, 0);
// boundary: yes, already present
if ( boundaryBit !== 0 ) { return 0; }
// boundary: no, mark as boundary
this.buf32[icell+2] = v | 0x80;
}
// needle remainder: yes
else {
// remainder is at label boundary? if yes, no need to add
// the rest since the shortest match is always reported
if ( boundaryBit !== 0 ) {
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
}
inext = this.buf32[icell+1];
if ( inext !== 0 ) {
icell = inext;
continue;
}
// remainder is at label boundary? if yes, no need to add
// the rest since the shortest match is always reported
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
// boundary cell + needle remainder
inext = this.addCell(0, 0, 0);
this.buf32[icell+1] = inext;
this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(lhnchar));
// add needle remainder
this.buf32[icell+1] = this.addLeafCell(lhnchar);
}
}
// some segment characters matched
else {
// split current cell
isegchar0 -= char0;
this.buf32[icell+2] = isegchar << 24 | isegchar0;
this.buf32[icell+2] = isegchar0 << 8 | isegchar;
inext = this.addCell(
0,
this.buf32[icell+1],
lsegchar - isegchar << 24 | isegchar0 + isegchar
isegchar0 + isegchar << 8 | boundaryBit | lsegchar - isegchar
);
this.buf32[icell+1] = inext;
// needle remainder: no = need boundary cell
if ( lhnchar === 0 ) {
this.buf32[icell+1] = this.addCell(0, inext, 0);
// needle remainder: yes, need new cell for remaining characters
if ( lhnchar !== 0 ) {
this.buf32[inext+0] = this.addLeafCell(lhnchar);
}
// needle remainder: yes = need new cell for remaining characters
// needle remainder: no, need boundary cell
else {
this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(lhnchar));
this.buf32[icell+2] |= 0x80;
}
}
return 1;
@ -459,9 +452,9 @@ const HNTrieContainer = class {
async enableWASM(wasmModuleFetcher, path) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/globalThis
const globals = (( ) => {
if ( typeof self !== 'undefined' ) { return self; }
// jshint ignore:start
if ( typeof globalThis !== 'undefined' ) { return globalThis; }
if ( typeof self !== 'undefined' ) { return self; }
if ( typeof global !== 'undefined' ) { return global; }
// jshint ignore:end
return {};
@ -508,16 +501,33 @@ const HNTrieContainer = class {
return icell;
}
addSegment(lsegchar) {
addLeafCell(lsegchar) {
const r = this.buf32[TRIE1_SLOT] >>> 2;
let i = r;
while ( lsegchar > 127 ) {
this.buf32[i+0] = 0;
this.buf32[i+1] = i + 3;
this.buf32[i+2] = this.addSegment(lsegchar, lsegchar - 127);
lsegchar -= 127;
i += 3;
}
this.buf32[i+0] = 0;
this.buf32[i+1] = 0;
this.buf32[i+2] = this.addSegment(lsegchar, 0) | 0x80;
this.buf32[TRIE1_SLOT] = i + 3 << 2;
return r;
}
addSegment(lsegchar, lsegend) {
if ( lsegchar === 0 ) { return 0; }
let char1 = this.buf32[CHAR1_SLOT];
const isegchar = char1 - this.buf32[CHAR0_SLOT];
let i = lsegchar;
do {
this.buf[char1++] = this.buf[--i];
} while ( i !== 0 );
} while ( i !== lsegend );
this.buf32[CHAR1_SLOT] = char1;
return (lsegchar << 24) | isegchar;
return isegchar << 8 | lsegchar - lsegend;
}
growBuf(trieGrow, charGrow) {
@ -724,8 +734,8 @@ HNTrieContainer.prototype.HNTrieRef = class {
this.forks.push(idown, this.charPtr);
}
const v = this.container.buf32[this.icell+2];
let i0 = this.container.buf32[CHAR0_SLOT] + (v & 0x00FFFFFF);
const i1 = i0 + (v >>> 24);
let i0 = this.container.buf32[CHAR0_SLOT] + (v >>> 8);
const i1 = i0 + (v & 0x7F);
while ( i0 < i1 ) {
this.charPtr -= 1;
this.charBuf[this.charPtr] = this.container.buf[i0];
@ -795,4 +805,4 @@ const getWasmModule = (( ) => {
/******************************************************************************/
export { HNTrieContainer };
export default HNTrieContainer;

View File

@ -26,9 +26,9 @@
/******************************************************************************/
import globals from './globals.js';
import HNTrieContainer from './hntrie.js';
import { sparseBase64 } from './base64-custom.js';
import { BidiTrieContainer } from './biditrie.js';
import { HNTrieContainer } from './hntrie.js';
import { StaticFilteringParser } from './static-filtering-parser.js';
import { CompiledListReader } from './static-filtering-io.js';

Binary file not shown.

View File

@ -106,9 +106,9 @@
get_local $icell
i32.load offset=8
tee_local $v
;; i0 = this.char0 + (v & 0x00FFFFFF);
i32.const 0x00FFFFFF
i32.and
;; i0 = char0 + (v >>> 8);
i32.const 8
i32.shr_u
get_local $char0
i32.add
tee_local $i0
@ -130,10 +130,10 @@
end
br 0
end end
;; let n = v >>> 24;
;; let n = v & 0x7F;
get_local $v
i32.const 24
i32.shr_u
i32.const 0x7F
i32.and
tee_local $n
;; if ( n > 1 ) {
i32.const 1
@ -186,21 +186,12 @@
br_if 0
end
end
;; icell = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { break; }
i32.eqz
br_if $noSegment
;; if ( this.buf32[icell+2] === 0 ) {
get_local $icell
i32.load offset=8
i32.eqz
;; if ( (v & 0x80) !== 0 ) {
get_local $v
i32.const 0x80
i32.and
if
;; if ( ineedle === 0 || this.buf[ineedle-1] === 0x2E ) {
;; if ( ineedle === 0 || buf8[ineedle-1] === 0x2E /* '.' */ ) {
;; return ineedle;
;; }
get_local $ineedle
@ -219,32 +210,17 @@
get_local $ineedle
return
end
;; icell = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $icell
end
br 0
;; icell = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { break; }
br_if 0
end end
;; return ineedle === 0 || this.buf[ineedle-1] === 0x2E ? ineedle : -1;
get_local $ineedle
i32.eqz
if
i32.const 0
return
end
get_local $ineedle
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
get_local $ineedle
return
end
;; return -1;
i32.const -1
)
@ -259,11 +235,12 @@
(local $icell i32) ;; index of current cell in the trie
(local $lhnchar i32) ;; number of characters left to process in hostname
(local $char0 i32) ;; offset to start of character data section
(local $vseg i32) ;; integer value describing a segment
(local $v i32) ;; integer value describing a segment
(local $isegchar0 i32) ;; offset to start of current segment's character data
(local $isegchar i32)
(local $lsegchar i32) ;; number of character in current segment
(local $inext i32) ;; index of next cell to process
(local $boundaryBit i32) ;; the boundary bit state of the current cell
;;
;; let lhnchar = this.buf[255];
i32.const 255
@ -315,14 +292,11 @@
;; if ( this.buf32[icell+2] === 0 ) {
i32.eqz
if
;; this.buf32[iroot+0] = this.addCell(0, 0, this.addSegment(lhnchar));
;; this.buf32[iroot+0] = this.addLeafCell(lhnchar);
;; return 1;
get_local $iroot
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
call $addLeafCell
i32.store
i32.const 1
return
@ -336,35 +310,11 @@
;; const v = this.buf32[icell+2];
get_local $icell
i32.load offset=8
tee_local $vseg
;; if ( vseg === 0 ) {
i32.eqz
if
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const -1
return
end
;; icell = this.buf32[icell+1];
;; continue;
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $icell
br $nextSegment
end
;; let isegchar0 = char0 + (vseg & 0x00FFFFFF);
tee_local $v
;; let isegchar0 = char0 + (v >>> 8);
i32.const 8
i32.shr_u
get_local $char0
get_local $vseg
i32.const 0x00FFFFFF
i32.and
i32.add
tee_local $isegchar0
;; if ( this.buf[isegchar0] !== this.buf[lhnchar-1] ) {
@ -378,19 +328,14 @@
;; inext = this.buf32[icell+0];
get_local $icell
i32.load
i32.const 2
i32.shl
tee_local $inext
;; if ( inext === 0 ) {
i32.eqz
if
;; this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(lhnchar));
;; this.buf32[icell+0] = this.addLeafCell(lhnchar);
get_local $icell
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
call $addLeafCell
i32.store
;; return 1;
i32.const 1
@ -398,6 +343,8 @@
end
;; icell = inext;
get_local $inext
i32.const 2
i32.shl
set_local $icell
br $nextSegment
end
@ -409,10 +356,10 @@
i32.const -1
i32.add
set_local $lhnchar
;; const lsegchar = vseg >>> 24;
get_local $vseg
i32.const 24
i32.shr_u
;; const lsegchar = v & 0x7F;
get_local $v
i32.const 0x7F
i32.and
tee_local $lsegchar
;; if ( lsegchar !== 1 ) {
i32.const 1
@ -452,82 +399,66 @@
br 0
end end
end
;; const boundaryBit = v & 0x80;
get_local $v
i32.const 0x80
i32.and
set_local $boundaryBit
;; if ( isegchar === lsegchar ) {
get_local $isegchar
get_local $lsegchar
i32.eq
if
;; inext = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $inext
;; if ( lhnchar === 0 ) {
get_local $lhnchar
i32.eqz
if
;; if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; }
get_local $inext
i32.eqz
;; if ( boundaryBit !== 0 ) { return 0; }
get_local $boundaryBit
if
i32.const 0
return
end
get_local $inext
i32.load offset=8
i32.eqz
if
i32.const 0
return
end
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
;; this.buf32[icell+2] = v | 0x80;
get_local $icell
i32.const 0
get_local $inext
i32.const 2
i32.shr_u
i32.const 0
call $addCell
i32.store offset=4
get_local $v
i32.const 0x80
i32.or
i32.store offset=8
else
;; if ( boundaryBit !== 0 ) {
get_local $boundaryBit
if
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const -1
return
end
end
;; inext = this.buf32[icell+1];
get_local $icell
i32.load offset=4
tee_local $inext
;; if ( inext !== 0 ) {
get_local $inext
if
;; icell = inext;
get_local $inext
i32.const 2
i32.shl
set_local $icell
;; continue;
br $nextSegment
end
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const -1
return
end
;; inext = this.addCell(0, 0, 0);
;; this.buf32[icell+1] = inext;
;; this.buf32[icell+1] = this.addLeafCell(lhnchar);
get_local $icell
i32.const 0
i32.const 0
i32.const 0
call $addCell
tee_local $inext
i32.store offset=4
;; this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(lhnchar));
get_local $inext
i32.const 2
i32.shl
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
call $addLeafCell
i32.store offset=4
end
else
@ -537,56 +468,54 @@
get_local $char0
i32.sub
tee_local $isegchar0
;; this.buf32[icell+2] = isegchar << 24 | isegchar0;
get_local $isegchar
i32.const 24
;; this.buf32[icell+2] = isegchar0 << 8 | isegchar;
i32.const 8
i32.shl
get_local $isegchar
i32.or
i32.store offset=8
;; inext = this.addCell(
;; 0,
;; this.buf32[icell+1],
;; lsegchar - isegchar << 24 | isegchar0 + isegchar
;; isegchar0 + isegchar << 8 | boundaryBit | lsegchar - isegchar
;; );
;; this.buf32[icell+1] = inext;
get_local $icell
i32.const 0
get_local $icell
i32.load offset=4
get_local $lsegchar
get_local $isegchar
i32.sub
i32.const 24
i32.shl
get_local $isegchar0
get_local $isegchar
i32.add
i32.const 8
i32.shl
get_local $boundaryBit
i32.or
get_local $lsegchar
get_local $isegchar
i32.sub
i32.or
call $addCell
tee_local $inext
;; this.buf32[icell+1] = inext;
i32.store offset=4
;; if ( lhnchar === 0 ) {
;; if ( lhnchar !== 0 ) {
get_local $lhnchar
i32.eqz
if
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
get_local $icell
i32.const 0
get_local $inext
i32.const 0
call $addCell
i32.store offset=4
else
;; this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(lhnchar));
;; this.buf32[inext+0] = this.addLeafCell(lhnchar);
get_local $inext
i32.const 2
i32.shl
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
call $addLeafCell
i32.store
else
;; this.buf32[icell+2] |= 0x80;
get_local $icell
get_local $icell
i32.load offset=8
i32.const 0x80
i32.or
i32.store offset=8
end
end
;; return 1;
@ -602,14 +531,14 @@
;;
;;
;; unsigned int addCell(idown, iright, vseg)
;; unsigned int addCell(idown, iright, v)
;;
;; Add a new cell, return cell index.
;;
(func $addCell
(param $idown i32)
(param $iright i32)
(param $vseg i32)
(param $v i32)
(result i32) ;; result: index of added cell
(local $icell i32)
;;
@ -632,7 +561,7 @@
i32.store offset=4
;; this.buf32[icell+2] = v;
get_local $icell
get_local $vseg
get_local $v
i32.store offset=8
;; return icell;
get_local $icell
@ -641,13 +570,96 @@
)
;;
;; unsigned int addSegment(lsegchar)
;; unsigned int addLeafCell(lsegchar)
;;
;; Add a new cell, return cell index.
;;
(func $addLeafCell
(param $lsegchar i32)
(result i32) ;; result: index of added cell
(local $r i32)
(local $i i32)
;; const r = this.buf32[TRIE1_SLOT] >>> 2;
i32.const 260
i32.load
tee_local $r
;; let i = r;
set_local $i
;; while ( lsegchar > 127 ) {
block $lastSegment loop
get_local $lsegchar
i32.const 127
i32.le_u
br_if $lastSegment
;; this.buf32[i+0] = 0;
get_local $i
i32.const 0
i32.store
;; this.buf32[i+1] = i + 3;
get_local $i
get_local $i
i32.const 12
i32.add
i32.const 2
i32.shr_u
i32.store offset=4
;; this.buf32[i+2] = this.addSegment(lsegchar, lsegchar - 127);
get_local $i
get_local $lsegchar
get_local $lsegchar
i32.const 127
i32.sub
call $addSegment
i32.store offset=8
;; lsegchar -= 127;
get_local $lsegchar
i32.const 127
i32.sub
set_local $lsegchar
;; i += 3;
get_local $i
i32.const 12
i32.add
set_local $i
br 0
end end
;; this.buf32[i+0] = 0;
get_local $i
i32.const 0
i32.store
;; this.buf32[i+1] = 0;
get_local $i
i32.const 0
i32.store offset=4
;; this.buf32[i+2] = this.addSegment(lsegchar, 0) | 0x80;
get_local $i
get_local $lsegchar
i32.const 0
call $addSegment
i32.const 0x80
i32.or
i32.store offset=8
;; this.buf32[TRIE1_SLOT] = i + 3 << 2;
i32.const 260
get_local $i
i32.const 12
i32.add
i32.store
;; return r;
get_local $r
i32.const 2
i32.shr_u
)
;;
;; unsigned int addSegment(lsegchar, lsegend)
;;
;; Store a segment of characters and return a segment descriptor. The segment
;; is created from the character data in the needle buffer.
;;
(func $addSegment
(param $lsegchar i32)
(param $lsegend i32)
(result i32) ;; result: segment descriptor
(local $char1 i32) ;; offset to end of character data section
(local $isegchar i32) ;; relative offset to first character of segment
@ -673,7 +685,7 @@
get_local $lsegchar
set_local $i
;; do {
block $endOfSegment loop
loop
;; this.buf[char1++] = this.buf[--i];
get_local $char1
get_local $i
@ -686,21 +698,23 @@
i32.const 1
i32.add
set_local $char1
;; } while ( i !== 0 );
;; } while ( i !== lsegend );
get_local $i
i32.eqz
br_if $endOfSegment
br 0
end end
get_local $lsegend
i32.ne
br_if 0
end
;; this.buf32[HNBIGTRIE_CHAR1_SLOT] = char1;
i32.const 268
get_local $char1
i32.store
;; return (lsegchar << 24) | isegchar;
get_local $lsegchar
i32.const 24
i32.shl
;; return isegchar << 8 | lsegchar - lsegend;
get_local $isegchar
i32.const 8
i32.shl
get_local $lsegchar
get_local $lsegend
i32.sub
i32.or
)