1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-17 16:02:33 +01:00
uBlock/src/js/wasm/hntrie.wat
Raymond Hill adabb56dc9
Do not store impossible to match filters in HNTrie
Consider the two following filters:

    example.com
    www.example.com

This commit make it so that if the first filter is
already present in a given HNTrie, the second filter
will not be stored, since HNTrie will _always_
return the first filter as a match whenever the
hostname to match is example.com or any subdomain
of example.com.

The detection of such pointless filters is
virtually free when adding a hostname to an HNTrie
instance (given how data is stored in the trie), so
in practice no overhead is incurred to detect such
pointless filters.

The ability to ignore impossible to match filters
in HNTrie instances will _especially_ benefit those
using large hosts files.

Examples of how this helps using real configurations:

- Default lists:
  444 filters out of 100,382 were ignored as a result
  of this commit.

- Default lists + "Energized Ultimate Protection":
  283,669 filters out of 903,235 were ignored as a
  result of this commit.

Side note: There was no measurable difference between
the two configurations above in the performance of
the matching algorithm as reported by the built-in
benchmark tool.
2019-04-29 13:15:16 -04:00

711 lines
19 KiB
Plaintext

;;
;; uBlock Origin - a browser extension to block requests.
;; Copyright (C) 2018-present Raymond Hill
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see {http://www.gnu.org/licenses/}.
;;
;; Home: https://github.com/gorhill/uBlock
;; File: hntrie.wat
;; Description: WebAssembly code used by src/js/hntrie.js
;; How to compile: See README.md in this directory.
(module
;;
;; module start
;;
(func $growBuf (import "imports" "growBuf"))
(memory (import "imports" "memory") 1)
;; Trie container
;;
;; Memory layout, byte offset:
;; 0-254: needle being processed
;; 255: length of needle
;; 256-259: offset to start of trie data section (=> trie0)
;; 260-263: offset to end of trie data section (=> trie1)
;; 264-267: offset to start of character data section (=> char0)
;; 268-271: offset to end of character data section (=> char1)
;; 272: start of trie data section
;;
;;
;; Public functions
;;
;;
;; unsigned int matches(icell)
;;
;; Test whether the currently set needle matches the trie at specified trie
;; offset.
;;
(func (export "matches")
(param $iroot i32) ;; offset to root cell of the trie
(result i32) ;; result = match index, -1 = miss
(local $icell i32) ;; offset to the current cell
(local $char0 i32) ;; offset to first character data
(local $ineedle i32) ;; current needle offset
(local $c i32)
(local $v i32)
(local $n i32)
(local $i0 i32)
(local $i1 i32)
;;
i32.const 264 ;; start of char section is stored at addr 264
i32.load
set_local $char0
;; let ineedle = this.buf[255];
i32.const 255 ;; addr of needle is stored at addr 255
i32.load8_u
set_local $ineedle
;; let icell = this.buf32[iroot+0];
get_local $iroot
i32.const 2
i32.shl
i32.load
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { return -1; }
i32.eqz
if
i32.const -1
return
end
;; for (;;) {
block $noSegment loop $nextSegment
;; if ( ineedle === 0 ) { return -1; }
get_local $ineedle
i32.eqz
if
i32.const -1
return
end
;; ineedle -= 1;
get_local $ineedle
i32.const -1
i32.add
tee_local $ineedle
;; let c = this.buf[ineedle];
i32.load8_u
set_local $c
;; for (;;) {
block $foundSegment loop $findSegment
;; v = this.buf32[icell+2];
get_local $icell
i32.load offset=8
tee_local $v
;; i0 = this.char0 + (v & 0x00FFFFFF);
i32.const 0x00FFFFFF
i32.and
get_local $char0
i32.add
tee_local $i0
;; if ( this.buf[i0] === c ) { break; }
i32.load8_u
get_local $c
i32.eq
br_if $foundSegment
;; icell = this.buf32[icell+0];
get_local $icell
i32.load
i32.const 2
i32.shl
tee_local $icell
i32.eqz
if
i32.const -1
return
end
br 0
end end
;; let n = v >>> 24;
get_local $v
i32.const 24
i32.shr_u
tee_local $n
;; if ( n > 1 ) {
i32.const 1
i32.gt_u
if
;; n -= 1;
get_local $n
i32.const -1
i32.add
tee_local $n
;; if ( n > ineedle ) { return -1; }
get_local $ineedle
i32.gt_u
if
i32.const -1
return
end
get_local $i0
i32.const 1
i32.add
tee_local $i0
;; const i1 = i0 + n;
get_local $n
i32.add
set_local $i1
;; do {
loop
;; ineedle -= 1;
get_local $ineedle
i32.const -1
i32.add
tee_local $ineedle
;; if ( this.buf[i0] !== this.buf[ineedle] ) { return -1; }
i32.load8_u
get_local $i0
i32.load8_u
i32.ne
if
i32.const -1
return
end
;; i0 += 1;
get_local $i0
i32.const 1
i32.add
tee_local $i0
;; } while ( i0 < i1 );
get_local $i1
i32.lt_u
br_if 0
end
end
;; icell = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { break; }
i32.eqz
br_if $noSegment
;; if ( this.buf32[icell+2] === 0 ) {
get_local $icell
i32.load
i32.eqz
if
;; if ( ineedle === 0 || this.buf[ineedle-1] === 0x2E ) {
;; return ineedle;
;; }
get_local $ineedle
i32.eqz
if
i32.const 0
return
end
get_local $ineedle
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
get_local $ineedle
return
end
;; icell = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $icell
end
br 0
end end
;; return ineedle === 0 || this.buf[ineedle-1] === 0x2E ? ineedle : -1;
get_local $ineedle
i32.eqz
if
i32.const 0
return
end
get_local $ineedle
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
get_local $ineedle
return
end
i32.const -1
)
;;
;; unsigned int add(icell)
;;
;; Add a new hostname to a trie which root cell is passed as argument.
;;
(func (export "add")
(param $iroot i32) ;; index of root cell of the trie
(result i32) ;; result: 0 not added, 1 = added
(local $icell i32) ;; index of current cell in the trie
(local $lhnchar i32) ;; number of characters left to process in hostname
(local $char0 i32) ;; offset to start of character data section
(local $vseg i32) ;; integer value describing a segment
(local $isegchar0 i32) ;; offset to start of current segment's character data
(local $isegchar i32)
(local $lsegchar i32) ;; number of character in current segment
(local $inext i32) ;; index of next cell to process
;;
;; let lhnchar = this.buf[255];
i32.const 255
i32.load8_u
tee_local $lhnchar
;; if ( lhnchar === 0 ) { return 0; }
i32.eqz
if
i32.const 0
return
end
;; if (
;; (this.buf32[HNBIGTRIE_CHAR0_SLOT] - this.buf32[HNBIGTRIE_TRIE1_SLOT]) < 24 ||
;; (this.buf.length - this.buf32[HNBIGTRIE_CHAR1_SLOT]) < 256
;; ) {
;; this.growBuf();
;; }
i32.const 264
i32.load
i32.const 260
i32.load
i32.sub
i32.const 24
i32.lt_u
if
call $growBuf
else
memory.size
i32.const 16
i32.shl
i32.const 268
i32.load
i32.sub
i32.const 256
i32.lt_u
if
call $growBuf
end
end
;; let icell = this.buf32[iroot+0];
get_local $iroot
i32.const 2
i32.shl
tee_local $iroot
i32.load
i32.const 2
i32.shl
tee_local $icell
;; if ( this.buf32[icell+2] === 0 ) {
i32.eqz
if
;; this.buf32[iroot+0] = this.addCell(0, 0, this.addSegment(lhnchar));
;; return 1;
get_local $iroot
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store
i32.const 1
return
end
;; const char0 = this.buf32[HNBIGTRIE_CHAR0_SLOT];
i32.const 264
i32.load
set_local $char0
;; for (;;) {
loop $nextSegment
;; const v = this.buf32[icell+2];
get_local $icell
i32.load offset=8
tee_local $vseg
;; if ( vseg === 0 ) {
i32.eqz
if
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const -1
return
end
;; icell = this.buf32[icell+1];
;; continue;
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $icell
br $nextSegment
end
;; let isegchar0 = char0 + (vseg & 0x00FFFFFF);
get_local $char0
get_local $vseg
i32.const 0x00FFFFFF
i32.and
i32.add
tee_local $isegchar0
;; if ( this.buf[isegchar0] !== this.buf[lhnchar-1] ) {
i32.load8_u
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.ne
if
;; inext = this.buf32[icell+0];
get_local $icell
i32.load
i32.const 2
i32.shl
tee_local $inext
;; if ( inext === 0 ) {
i32.eqz
if
;; this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(lhnchar));
get_local $icell
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store
;; return 1;
i32.const 1
return
end
;; icell = inext;
get_local $inext
set_local $icell
br $nextSegment
end
;; let isegchar = 1;
i32.const 1
set_local $isegchar
;; lhnchar -= 1;
get_local $lhnchar
i32.const -1
i32.add
set_local $lhnchar
;; const lsegchar = vseg >>> 24;
get_local $vseg
i32.const 24
i32.shr_u
tee_local $lsegchar
;; if ( lsegchar !== 1 ) {
i32.const 1
i32.ne
if
;; for (;;) {
block $mismatch loop
;; if ( isegchar === lsegchar ) { break; }
get_local $isegchar
get_local $lsegchar
i32.eq
br_if $mismatch
get_local $lhnchar
i32.eqz
br_if $mismatch
;; if ( this.buf[isegchar0+isegchar] !== this.buf[lhnchar-1] ) { break; }
get_local $isegchar0
get_local $isegchar
i32.add
i32.load8_u
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.ne
br_if $mismatch
;; isegchar += 1;
get_local $isegchar
i32.const 1
i32.add
set_local $isegchar
;; lhnchar -= 1;
get_local $lhnchar
i32.const -1
i32.add
set_local $lhnchar
br 0
end end
end
;; if ( isegchar === lsegchar ) {
get_local $isegchar
get_local $lsegchar
i32.eq
if
;; inext = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $inext
;; if ( lhnchar === 0 ) {
get_local $lhnchar
i32.eqz
if
;; if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; }
get_local $inext
i32.eqz
if
i32.const 0
return
end
get_local $inext
i32.load offset=8
i32.eqz
if
i32.const 0
return
end
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
get_local $icell
i32.const 0
get_local $inext
i32.const 2
i32.shr_u
i32.const 0
call $addCell
i32.store offset=4
else
;; if ( inext !== 0 ) {
get_local $inext
if
;; icell = inext;
get_local $inext
set_local $icell
br $nextSegment
end
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const -1
return
end
;; inext = this.addCell(0, 0, 0);
;; this.buf32[icell+1] = inext;
get_local $icell
i32.const 0
i32.const 0
i32.const 0
call $addCell
tee_local $inext
i32.store offset=4
;; this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(lhnchar));
get_local $inext
i32.const 2
i32.shl
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store offset=4
end
else
;; isegchar0 -= char0;
get_local $icell
get_local $isegchar0
get_local $char0
i32.sub
tee_local $isegchar0
;; this.buf32[icell+2] = isegchar << 24 | isegchar0;
get_local $isegchar
i32.const 24
i32.shl
i32.or
i32.store offset=8
;; inext = this.addCell(
;; 0,
;; this.buf32[icell+1],
;; lsegchar - isegchar << 24 | isegchar0 + isegchar
;; );
;; this.buf32[icell+1] = inext;
get_local $icell
i32.const 0
get_local $icell
i32.load offset=4
get_local $lsegchar
get_local $isegchar
i32.sub
i32.const 24
i32.shl
get_local $isegchar0
get_local $isegchar
i32.add
i32.or
call $addCell
tee_local $inext
i32.store offset=4
;; if ( lhnchar === 0 ) {
get_local $lhnchar
i32.eqz
if
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
get_local $icell
i32.const 0
get_local $inext
i32.const 0
call $addCell
i32.store offset=4
else
;; this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(lhnchar));
get_local $inext
i32.const 2
i32.shl
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store
end
end
;; return 1;
i32.const 1
return
end
;;
i32.const 1
)
;;
;; Private functions
;;
;;
;; unsigned int addCell(idown, iright, vseg)
;;
;; Add a new cell, return cell index.
;;
(func $addCell
(param $idown i32)
(param $iright i32)
(param $vseg i32)
(result i32) ;; result: index of added cell
(local $icell i32)
;;
;; let icell = this.buf32[HNBIGTRIE_TRIE1_SLOT];
;; this.buf32[HNBIGTRIE_TRIE1_SLOT] = icell + 12;
i32.const 260
i32.const 260
i32.load
tee_local $icell
i32.const 12
i32.add
i32.store
;; this.buf32[icell+0] = idown;
get_local $icell
get_local $idown
i32.store
;; this.buf32[icell+1] = iright;
get_local $icell
get_local $iright
i32.store offset=4
;; this.buf32[icell+2] = v;
get_local $icell
get_local $vseg
i32.store offset=8
;; return icell;
get_local $icell
i32.const 2
i32.shr_u
)
;;
;; unsigned int addSegment(lsegchar)
;;
;; Store a segment of characters and return a segment descriptor. The segment
;; is created from the character data in the needle buffer.
;;
(func $addSegment
(param $lsegchar i32)
(result i32) ;; result: segment descriptor
(local $char1 i32) ;; offset to end of character data section
(local $isegchar i32) ;; relative offset to first character of segment
(local $i i32) ;; iterator
;;
;; if ( lsegchar === 0 ) { return 0; }
get_local $lsegchar
i32.eqz
if
i32.const 0
return
end
;; let char1 = this.buf32[HNBIGTRIE_CHAR1_SLOT];
i32.const 268
i32.load
tee_local $char1
;; const isegchar = char1 - this.buf32[HNBIGTRIE_CHAR0_SLOT];
i32.const 264
i32.load
i32.sub
set_local $isegchar
;; let i = lsegchar;
get_local $lsegchar
set_local $i
;; do {
block $endOfSegment loop
;; this.buf[char1++] = this.buf[--i];
get_local $char1
get_local $i
i32.const -1
i32.add
tee_local $i
i32.load8_u
i32.store8
get_local $char1
i32.const 1
i32.add
set_local $char1
;; } while ( i !== 0 );
get_local $i
i32.eqz
br_if $endOfSegment
br 0
end end
;; this.buf32[HNBIGTRIE_CHAR1_SLOT] = char1;
i32.const 268
get_local $char1
i32.store
;; return (lsegchar << 24) | isegchar;
get_local $lsegchar
i32.const 24
i32.shl
get_local $isegchar
i32.or
)
;;
;; module end
;;
)