1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-07 03:12:33 +01:00

Add WASM implementation for BidiTrieContainer.matches()

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/761
This commit is contained in:
Raymond Hill 2019-10-28 13:57:35 -04:00
parent d7b2d31180
commit 5cc797fb47
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
5 changed files with 504 additions and 63 deletions

View File

@ -142,7 +142,7 @@ const µBlock = (( ) => { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 23, // Increase when compiled format changes
selfieMagic: 23, // Increase when selfie format changes
selfieMagic: 24, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -1824,7 +1824,7 @@ const FilterPlainTrie = class {
}
match() {
if ( this.plainTrie.matches($tokenBeg) ) {
if ( this.plainTrie.matches($tokenBeg) !== 0 ) {
this.$matchedUnit = this.plainTrie.$iu;
return true;
}
@ -1862,7 +1862,7 @@ registerFilterClass(FilterPlainTrie);
const FilterBucket = class extends FilterCollection {
match() {
if ( this.plainTrie !== null ) {
if ( this.plainTrie.matches($tokenBeg) ) {
if ( this.plainTrie.matches($tokenBeg, this) !== 0 ) {
this.$matchedTrie = true;
this.$matchedUnit = this.plainTrie.$iu;
return true;

View File

@ -106,11 +106,14 @@ const PAGE_SIZE = 65536*2;
const HAYSTACK_START = 0;
const HAYSTACK_SIZE = 2048; // i32 / i8
const HAYSTACK_SIZE_SLOT = HAYSTACK_SIZE >>> 2; // 512 / 2048
const TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 512 / 2052
const TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 513 / 2056
const CHAR0_SLOT = HAYSTACK_SIZE_SLOT + 3; // 514 / 2060
const CHAR1_SLOT = HAYSTACK_SIZE_SLOT + 4; // 515 / 2064
const TRIE0_START = HAYSTACK_SIZE_SLOT + 5 << 2; // 2068
const TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 513 / 2052
const TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 514 / 2056
const CHAR0_SLOT = HAYSTACK_SIZE_SLOT + 3; // 515 / 2060
const CHAR1_SLOT = HAYSTACK_SIZE_SLOT + 4; // 516 / 2064
const RESULT_L_SLOT = HAYSTACK_SIZE_SLOT + 5; // 517 / 2068
const RESULT_R_SLOT = HAYSTACK_SIZE_SLOT + 6; // 518 / 2072
const RESULT_IU_SLOT = HAYSTACK_SIZE_SLOT + 7; // 519 / 2076
const TRIE0_START = HAYSTACK_SIZE_SLOT + 8 << 2; // 2080
// TODO: need a few slots for result values if WASM-ing
const CELL_BYTE_LENGTH = 12;
@ -144,32 +147,34 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
HAYSTACK_START,
HAYSTACK_START + HAYSTACK_SIZE
);
this.haystackLen = 0;
this.extraHandler = extraHandler;
this.textDecoder = null;
this.wasmMemory = null;
this.$l = 0;
this.$r = 0;
this.$iu = 0;
}
//--------------------------------------------------------------------------
// Public methods
//--------------------------------------------------------------------------
get haystackLen() {
return this.buf32[HAYSTACK_SIZE_SLOT];
}
set haystackLen(v) {
this.buf32[HAYSTACK_SIZE_SLOT] = v;
}
reset() {
this.buf32[TRIE1_SLOT] = this.buf32[TRIE0_SLOT];
this.buf32[CHAR1_SLOT] = this.buf32[CHAR0_SLOT];
}
matches(iroot, i) {
matches(icell, ai) {
const buf32 = this.buf32;
const buf8 = this.buf8;
const char0 = buf32[CHAR0_SLOT];
const aR = this.haystackLen;
let icell = iroot;
let al = i;
const aR = buf32[HAYSTACK_SIZE_SLOT];
let al = ai;
let c, v, bl, n;
for (;;) {
c = buf8[al];
@ -180,50 +185,47 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
bl = char0 + (v & 0x00FFFFFF);
if ( buf8[bl] === c ) { break; }
icell = buf32[icell+CELL_OR];
if ( icell === 0 ) { return false; }
if ( icell === 0 ) { return 0; }
}
// all characters in segment must match
n = (v >>> 24) - 1;
if ( n !== 0 ) {
const ar = al + n;
if ( ar > aR ) { return false; }
if ( ar > aR ) { return 0; }
let i = al, j = bl + 1;
do {
if ( buf8[i] !== buf8[j] ) { return false; }
i += 1; j += 1;
if ( buf8[i] !== buf8[j] ) { return 0; }
j += 1; i += 1;
} while ( i !== ar );
al = i;
}
// next segment
icell = buf32[icell+CELL_AND];
const ix = buf32[icell+BCELL_EXTRA];
if ( ix <= BCELL_EXTRA_MAX ) {
if ( ix !== 0 ) {
const iu = ix === 1 ? -1 : this.extraHandler(i, al, ix);
if ( iu !== 0 ) {
this.$l = i; this.$r = al; this.$iu = iu; return true;
}
v = buf32[icell+BCELL_EXTRA];
if ( v <= BCELL_EXTRA_MAX ) {
if ( v !== 0 && this.matchesExtra(ai, al, v) !== 0 ) {
return 1;
}
let inext = buf32[icell+BCELL_ALT_AND];
if ( inext !== 0 ) {
if ( this.matchesLeft(inext, i, al) ) { return true; }
if ( inext !== 0 && this.matchesLeft(inext, ai, al) !== 0 ) {
return 1;
}
inext = buf32[icell+BCELL_NEXT_AND];
if ( inext === 0 ) { return false; }
icell = inext;
icell = buf32[icell+BCELL_NEXT_AND];
if ( icell === 0 ) { return 0; }
}
if ( al === aR ) { return false; }
if ( al === aR ) { return 0; }
}
return 0;
}
matchesLeft(iroot, i, r) {
matchesLeft(iroot, ar, r) {
const buf32 = this.buf32;
const buf8 = this.buf8;
const char0 = buf32[CHAR0_SLOT];
let icell = iroot;
let ar = i;
let c, v, br, n;
for (;;) {
if ( ar === 0 ) { return 0; }
ar -= 1;
c = buf8[ar];
// find first segment with a first-character match
@ -233,34 +235,45 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
br = char0 + (v & 0x00FFFFFF) + n;
if ( buf8[br] === c ) { break; }
icell = buf32[icell+CELL_OR];
if ( icell === 0 ) { return false; }
if ( icell === 0 ) { return 0; }
}
// all characters in segment must match
if ( n !== 0 ) {
const al = ar - n;
if ( al < 0 ) { return false; }
if ( al < 0 ) { return 0; }
let i = ar, j = br;
do {
i -= 1; j -= 1;
if ( buf8[i] !== buf8[j] ) { return false; }
if ( buf8[i] !== buf8[j] ) { return 0; }
} while ( i !== al );
ar = i;
}
// next segment
icell = buf32[icell+CELL_AND];
const ix = buf32[icell+BCELL_EXTRA];
if ( ix <= BCELL_EXTRA_MAX ) {
if ( ix !== 0 ) {
const iu = ix === 1 ? -1 : this.extraHandler(ar, r, ix);
if ( iu !== 0 ) {
this.$l = ar; this.$r = r; this.$iu = iu; return true;
}
v = buf32[icell+BCELL_EXTRA];
if ( v <= BCELL_EXTRA_MAX ) {
if ( v !== 0 && this.matchesExtra(ar, r, v) !== 0 ) {
return 1;
}
icell = buf32[icell+BCELL_NEXT_AND];
if ( icell === 0 ) { return false; }
if ( icell === 0 ) { return 0; }
}
if ( ar === 0 ) { return false; }
}
return 0;
}
matchesExtra(l, r, ix) {
let iu;
if ( ix !== 1 ) {
iu = this.extraHandler(l, r, ix);
if ( iu === 0 ) { return 0; }
} else {
iu = -1;
}
this.buf32[RESULT_L_SLOT] = l;
this.buf32[RESULT_R_SLOT] = r;
this.buf32[RESULT_IU_SLOT] = iu;
return 1;
}
createOne(args) {
@ -675,7 +688,7 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
});
const instance = await WebAssembly.instantiate(
module,
{ imports: { memory } }
{ imports: { memory, extraHandler: this.extraHandler } }
);
if ( instance instanceof WebAssembly.Instance === false ) {
return false;
@ -694,6 +707,7 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
HAYSTACK_START,
HAYSTACK_START + HAYSTACK_SIZE
);
this.matches = instance.exports.matches;
this.startsWith = instance.exports.startsWith;
this.indexOf = instance.exports.indexOf;
this.lastIndexOf = instance.exports.lastIndexOf;
@ -829,9 +843,9 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
}
}
get $l() { return this.container.$l; }
get $r() { return this.container.$r; }
get $iu() { return this.container.$iu; }
get $l() { return this.container.buf32[RESULT_L_SLOT] | 0; }
get $r() { return this.container.buf32[RESULT_R_SLOT] | 0; }
get $iu() { return this.container.buf32[RESULT_IU_SLOT] | 0; }
[Symbol.iterator]() {
return {

Binary file not shown.

View File

@ -26,24 +26,451 @@
;;
(memory (import "imports" "memory") 1)
(func $extraHandler (import "imports" "extraHandler") (param i32 i32 i32) (result i32))
;; Trie container
;;
;; Memory layout, byte offset:
;; HAYSTACK_START = 0;
;; HAYSTACK_SIZE = 2048; // i32 / i8
;; HAYSTACK_SIZE_SLOT = HAYSTACK_SIZE >>> 2; // 512 / 2048
;; TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 512 / 2052
;; TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 513 / 2056
;; CHAR0_SLOT = HAYSTACK_SIZE_SLOT + 3; // 514 / 2060
;; CHAR1_SLOT = HAYSTACK_SIZE_SLOT + 4; // 515 / 2064
;; TRIE0_START = HAYSTACK_SIZE_SLOT + 5 << 2; // 2068
;; const HAYSTACK_START = 0;
;; const HAYSTACK_SIZE = 2048; // i32 / i8
;; const HAYSTACK_SIZE_SLOT = HAYSTACK_SIZE >>> 2; // 512 / 2048
;; const TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 513 / 2052
;; const TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 514 / 2056
;; const CHAR0_SLOT = HAYSTACK_SIZE_SLOT + 3; // 515 / 2060
;; const CHAR1_SLOT = HAYSTACK_SIZE_SLOT + 4; // 516 / 2064
;; const RESULT_L_SLOT = HAYSTACK_SIZE_SLOT + 5; // 517 / 2068
;; const RESULT_R_SLOT = HAYSTACK_SIZE_SLOT + 6; // 518 / 2072
;; const RESULT_IU_SLOT = HAYSTACK_SIZE_SLOT + 7; // 519 / 2076
;; const TRIE0_START = HAYSTACK_SIZE_SLOT + 8 << 2; // 2080
;;
;;
;; Public functions
;;
;;
;; unsigned int matches(icell, ai)
;;
;; Test whether the trie at icell matches the haystack content at position ai.
;;
(func (export "matches")
(param $icell i32) ;; start offset in haystack
(param $ai i32) ;; offset in haystack
(result i32) ;; result: 0 = no match, 1 = match
(local $char0 i32)
(local $aR i32)
(local $al i32)
(local $c i32)
(local $v i32)
(local $bl i32)
(local $n i32)
(local $ar i32)
(local $i i32)
(local $j i32)
(local $inext i32)
;; trie index is a uint32 offset, need to convert to uint8 offset
get_local $icell
i32.const 2
i32.shl
set_local $icell
;; const buf32 = this.buf32;
;; const buf8 = this.buf8;
;; const char0 = buf32[CHAR0_SLOT];
i32.const 2060
i32.load align=4
set_local $char0
;; const aR = buf32[HAYSTACK_SIZE_SLOT];
i32.const 2048
i32.load align=4
set_local $aR
;; let al = ai;
get_local $ai
set_local $al
block $matchFound
block $matchNotFound
;; for (;;) {
loop $mainLoop
;; c = buf8[al];
get_local $al
i32.load8_u
set_local $c
;; al += 1;
get_local $al
i32.const 1
i32.add
set_local $al
;; // find first segment with a first-character match
;; for (;;) {
block $breakMatchFirstChar loop $matchFirstChar
;; v = buf32[icell+SEGMENT_INFO];
get_local $icell
i32.load offset=8 align=4
tee_local $v
;; bl = char0 + (v & 0x00FFFFFF);
i32.const 0x00FFFFFF
i32.and
get_local $char0
i32.add
tee_local $bl
;; if ( buf8[bl] === c ) { break; }
i32.load8_u
get_local $c
i32.eq
br_if $breakMatchFirstChar
;; icell = buf32[icell+CELL_OR];
get_local $icell
i32.load offset=4 align=4
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { return 0; }
i32.eqz
br_if $matchNotFound
br $matchFirstChar
;; }
end end
;; // all characters in segment must match
;; n = (v >>> 24) - 1;
get_local $v
i32.const 24
i32.shr_u
i32.const 1
i32.sub
tee_local $n
;; if ( n !== 0 ) {
if
;; const ar = al + n;
get_local $n
get_local $al
i32.add
tee_local $ar
;; if ( ar > aR ) { return 0; }
get_local $aR
i32.gt_u
br_if $matchNotFound
;; let i = al, j = bl + 1;
get_local $al
set_local $i
get_local $bl
i32.const 1
i32.add
set_local $j
;; do {
loop
;; if ( buf8[i] !== buf8[j] ) { return 0; }
get_local $i
i32.load8_u
get_local $j
i32.load8_u
i32.ne
br_if $matchNotFound
;; j += 1; i += 1;
get_local $j
i32.const 1
i32.add
set_local $j
get_local $i
i32.const 1
i32.add
tee_local $i
;; } while ( i !== ar );
get_local $ar
i32.ne
br_if 0
end
;; al = i;
get_local $i
set_local $al
;; }
end
;; // next segment
;; icell = buf32[icell+CELL_AND];
get_local $icell
i32.load align=4
i32.const 2
i32.shl
tee_local $icell
;; const v = buf32[icell+BCELL_EXTRA];
i32.load offset=8 align=4
tee_local $v
;; if ( v <= BCELL_EXTRA_MAX ) {
i32.const 0x00FFFFFF
i32.le_u
if
;; if ( v !== 0 && this.matchesExtra(ai, al, v) !== 0 ) {
;; return 1;
;; }
get_local $v
if
get_local $ai
get_local $al
get_local $v
call $matchesExtra
br_if $matchFound
end
;; let inext = buf32[icell+BCELL_ALT_AND];
get_local $icell
i32.load offset=4 align=4
i32.const 2
i32.shl
tee_local $inext
;; if ( inext !== 0 && this.matchesLeft(inext, ai, al) !== 0 ) {
if
get_local $inext
get_local $ai
get_local $al
call $matchesLeft
br_if $matchFound
;; }
end
;; icell = buf32[icell+BCELL_NEXT_AND];
get_local $icell
i32.load align=4
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { return 0; }
i32.eqz
br_if $matchNotFound
;; }
end
;; if ( al === aR ) { return 0; }
get_local $al
get_local $aR
i32.ne
br_if $mainLoop
;; }
end ;; $mainLoop
end ;; $matchNotFound
i32.const 0
return
end ;; $matchFound
i32.const 1
return
)
;;
;; unsigned int matchesLeft(icell, ar, r)
;;
;; Test whether the trie at icell matches the haystack content at position ai.
;;
(func $matchesLeft
(param $icell i32) ;; start offset in haystack
(param $ar i32) ;; offset of where to start in haystack
(param $r i32) ;; right bound of match so far
(result i32) ;; result: 0 = no match, 1 = match
(local $char0 i32)
(local $c i32)
(local $v i32)
(local $bl i32)
(local $n i32)
(local $al i32)
(local $br i32)
(local $i i32)
(local $j i32)
;; const buf32 = this.buf32;
;; const buf8 = this.buf8;
;; const char0 = buf32[CHAR0_SLOT];
i32.const 2060
i32.load align=4
set_local $char0
block $matchFound
block $matchNotFound
;; for (;;) {
loop $mainLoop
;; if ( ar === 0 ) { return 0; }
get_local $ar
i32.eqz
br_if $matchNotFound
;; ar -= 1;
get_local $ar
i32.const 1
i32.sub
tee_local $ar
;; c = buf8[ar];
i32.load8_u
set_local $c
;; // find first segment with a first-character match
;; for (;;) {
block $breakMatchFirstChar loop $matchFirstChar
;; v = buf32[icell+SEGMENT_INFO];
get_local $icell
i32.load offset=8 align=4
tee_local $v
;; n = (v >>> 24) - 1;
i32.const 24
i32.shr_u
i32.const 1
i32.sub
tee_local $n
;; br = char0 + (v & 0x00FFFFFF) + n;
get_local $char0
i32.add
get_local $v
i32.const 0x00FFFFFF
i32.and
i32.add
tee_local $br
;; if ( buf8[br] === c ) { break; }
i32.load8_u
get_local $c
i32.eq
br_if $breakMatchFirstChar
;; icell = buf32[icell+CELL_OR];
get_local $icell
i32.load offset=4 align=4
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { return 0; }
i32.eqz
br_if $matchNotFound
br $matchFirstChar
;; }
end end
;; // all characters in segment must match
;; if ( n !== 0 ) {
get_local $n
if
;; const al = ar - n;
get_local $ar
get_local $n
i32.sub
tee_local $al
;; if ( al < 0 ) { return 0; }
i32.const 0
i32.lt_s
br_if $matchNotFound
;; let i = ar, j = br;
get_local $ar
set_local $i
get_local $br
set_local $j
;; do {
loop
;; i -= 1; j -= 1;
;; if ( buf8[i] !== buf8[j] ) { return 0; }
get_local $i
i32.const 1
i32.sub
tee_local $i
i32.load8_u
get_local $j
i32.const 1
i32.sub
tee_local $j
i32.load8_u
i32.ne
br_if $matchNotFound
;; } while ( i !== al );
get_local $i
get_local $al
i32.ne
br_if 0
end
;; ar = i;
get_local $i
set_local $ar
;; }
end
;; // next segment
;; icell = buf32[icell+CELL_AND];
get_local $icell
i32.load align=4
i32.const 2
i32.shl
tee_local $icell
;; const v = buf32[icell+BCELL_EXTRA];
i32.load offset=8 align=4
tee_local $v
;; if ( v <= BCELL_EXTRA_MAX ) {
i32.const 0x00FFFFFF
i32.le_u
if
;; if ( v !== 0 && this.matchesExtra(ar, r, v) !== 0 ) {
;; return 1;
;; }
get_local $v
if
get_local $ar
get_local $r
get_local $v
call $matchesExtra
br_if $matchFound
end
;; icell = buf32[icell+BCELL_NEXT_AND];
get_local $icell
i32.load align=4
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { return 0; }
i32.eqz
br_if $matchNotFound
;; }
end
br $mainLoop
;; }
end ;; $mainLoop
end ;; $matchNotFound
i32.const 0
return
end ;; $matchFound
i32.const 1
return
)
;;
;; int matchExtra(l, r, ix)
;;
;; Test whether extra handler returns a match.
;;
(func $matchesExtra
(param $l i32) ;; left bound of match so far
(param $r i32) ;; right bound of match so far
(param $ix i32) ;; extra token
(result i32) ;; result: 0 = no match, 1 = match
(local $iu i32) ;; filter unit
;; let iu;
;; if ( ix !== 1 ) {
;; iu = this.extraHandler(l, r, ix);
;; if ( iu === 0 ) { return 0; }
get_local $ix
i32.const 1
i32.ne
if
get_local $l
get_local $r
get_local $ix
call $extraHandler
tee_local $iu
i32.eqz
if
i32.const 0
return
end
;; } else {
;; iu = -1;
else
i32.const -1
set_local $iu
;; }
end
;; this.buf32[RESULT_L_SLOT] = l;
i32.const 2068
get_local $l
i32.store align=4
;; this.buf32[RESULT_R_SLOT] = r;
i32.const 2072
get_local $r
i32.store align=4
;; this.buf32[RESULT_IU_SLOT] = iu;
i32.const 2076
get_local $iu
i32.store align=4
i32.const 1
)
;;
;; unsigned int startsWith(haystackLeft, haystackRight, needleLeft, needleLen)
;;
@ -81,7 +508,7 @@
;; needleLeft += this.buf32[CHAR0_SLOT];
get_local $needleLeft
i32.const 2060 ;; CHAR0_SLOT memory address
i32.load ;; CHAR0 memory address
i32.load align=4 ;; CHAR0 memory address
i32.add ;; needle memory address
;; const needleRight = needleLeft + needleLen;
tee_local $needleLeft
@ -152,7 +579,7 @@
;; needleLeft += this.buf32[CHAR0_SLOT];
get_local $needleLeft
i32.const 2060 ;; CHAR0_SLOT memory address
i32.load ;; CHAR0 memory address
i32.load align=4 ;; CHAR0 memory address
i32.add ;; needle memory address
tee_local $needleLeft
;; const needleRight = needleLeft + needleLen;
@ -244,7 +671,7 @@
;; needleLeft += this.buf32[CHAR0_SLOT];
get_local $needleLeft
i32.const 2060 ;; CHAR0_SLOT memory address
i32.load ;; CHAR0 memory address
i32.load align=4 ;; CHAR0 memory address
i32.add ;; needle memory address
tee_local $needleLeft
;; const needleRight = needleLeft + needleLen;