1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-14 23:12:28 +02:00

Update publicSuffixLibrary to latest (WASM-able) version

See https://github.com/gorhill/publicsuffixlist.js
This commit is contained in:
Raymond Hill 2019-02-10 12:19:05 -05:00
parent 00236cf54f
commit 4da340384a
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
8 changed files with 980 additions and 331 deletions

View File

@ -7,7 +7,7 @@
<body>
<script src="lib/lz4/lz4-block-codec-any.js"></script>
<script src="lib/punycode.js"></script>
<script src="lib/publicsuffixlist.js"></script>
<script src="lib/publicsuffixlist/publicsuffixlist.js"></script>
<script src="js/vapi.js"></script>
<script src="js/vapi-common.js"></script>
<script src="js/vapi-background.js"></script>

View File

@ -1009,6 +1009,10 @@
/******************************************************************************/
µBlock.loadPublicSuffixList = function() {
if ( this.hiddenSettings.disableWebAssembly === false ) {
publicSuffixList.enableWASM();
}
return new Promise(resolve => {
// start of executor
this.assets.get('compiled/' + this.pslAssetKey, details => {

View File

@ -307,7 +307,7 @@ URI.domain = function() {
// It is expected that there is higher-scoped `publicSuffixList` lingering
// somewhere. Cache it. See <https://github.com/gorhill/publicsuffixlist.js>.
var psl = publicSuffixList;
const psl = publicSuffixList;
/******************************************************************************/
@ -391,7 +391,7 @@ const domainCachePrune = function() {
}
};
window.addEventListener('publicSuffixList', function() {
window.addEventListener('publicSuffixListChanged', function() {
domainCache.clear();
});

View File

@ -1,328 +0,0 @@
/*******************************************************************************
publicsuffixlist.js - an efficient javascript implementation to deal with
Mozilla Foundation's Public Suffix List <http://publicsuffix.org/list/>
Copyright (C) 2013-2018 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
*/
/*! Home: https://github.com/gorhill/publicsuffixlist.js */
/*
This code is mostly dumb: I consider this to be lower-level code, thus
in order to ensure efficiency, the caller is responsible for sanitizing
the inputs.
*/
/******************************************************************************/
// A single instance of PublicSuffixList is enough.
;(function(root) {
'use strict';
/******************************************************************************/
let exceptions = new Map();
let rules = new Map();
// This value dictate how the search will be performed:
// < this.cutoffLength = indexOf()
// >= this.cutoffLength = binary search
const cutoffLength = 256;
const mustPunycode = /[^a-z0-9.-]/;
/******************************************************************************/
// In the context of this code, a domain is defined as:
// "{label}.{public suffix}".
// A single standalone label is a public suffix as per
// http://publicsuffix.org/list/:
// "If no rules match, the prevailing rule is '*' "
// This means 'localhost' is not deemed a domain by this
// code, since according to the definition above, it would be
// evaluated as a public suffix. The caller is therefore responsible to
// decide how to further interpret such public suffix.
//
// `hostname` must be a valid ascii-based hostname.
function getDomain(hostname) {
// A hostname starting with a dot is not a valid hostname.
if ( !hostname || hostname.charAt(0) === '.' ) {
return '';
}
hostname = hostname.toLowerCase();
var suffix = getPublicSuffix(hostname);
if ( suffix === hostname ) {
return '';
}
var pos = hostname.lastIndexOf('.', hostname.lastIndexOf('.', hostname.length - suffix.length) - 1);
if ( pos <= 0 ) {
return hostname;
}
return hostname.slice(pos + 1);
}
/******************************************************************************/
// Return longest public suffix.
//
// `hostname` must be a valid ascii-based string which respect hostname naming.
function getPublicSuffix(hostname) {
if ( !hostname ) {
return '';
}
// Since we slice down the hostname with each pass, the first match
// is the longest, so no need to find all the matching rules.
var pos;
while ( true ) {
pos = hostname.indexOf('.');
if ( pos < 0 ) {
return hostname;
}
if ( search(exceptions, hostname) ) {
return hostname.slice(pos + 1);
}
if ( search(rules, hostname) ) {
return hostname;
}
if ( search(rules, '*' + hostname.slice(pos)) ) {
return hostname;
}
hostname = hostname.slice(pos + 1);
}
// unreachable
}
/******************************************************************************/
// Look up a specific hostname.
function search(store, hostname) {
// Extract TLD
var pos = hostname.lastIndexOf('.');
var tld, remainder;
if ( pos < 0 ) {
tld = hostname;
remainder = hostname;
} else {
tld = hostname.slice(pos + 1);
remainder = hostname.slice(0, pos);
}
var substore = store.get(tld);
if ( substore === undefined ) { return false; }
// If substore is a string, use indexOf()
if ( typeof substore === 'string' ) {
return substore.indexOf(' ' + remainder + ' ') >= 0;
}
// It is an array: use binary search.
var l = remainder.length;
if ( l >= substore.length ) { return false; }
var haystack = substore[l];
if ( haystack.length === 0 ) { return false; }
var left = 0;
var right = Math.floor(haystack.length / l + 0.5);
var i, needle;
while ( left < right ) {
i = left + right >> 1;
needle = haystack.substr( l * i, l );
if ( remainder < needle ) {
right = i;
} else if ( remainder > needle ) {
left = i + 1;
} else {
return true;
}
}
return false;
}
/******************************************************************************/
// Parse and set a UTF-8 text-based suffix list. Format is same as found at:
// http://publicsuffix.org/list/
//
// `toAscii` is a converter from unicode to punycode. Required since the
// Public Suffix List contains unicode characters.
// Suggestion: use <https://github.com/bestiejs/punycode.js> it's quite good.
function parse(text, toAscii) {
exceptions = new Map();
rules = new Map();
// http://publicsuffix.org/list/:
// "... all rules must be canonicalized in the normal way
// for hostnames - lower-case, Punycode ..."
text = text.toLowerCase();
var lineBeg = 0, lineEnd;
var textEnd = text.length;
var line, store, pos, tld;
while ( lineBeg < textEnd ) {
lineEnd = text.indexOf('\n', lineBeg);
if ( lineEnd < 0 ) {
lineEnd = text.indexOf('\r', lineBeg);
if ( lineEnd < 0 ) {
lineEnd = textEnd;
}
}
line = text.slice(lineBeg, lineEnd).trim();
lineBeg = lineEnd + 1;
if ( line.length === 0 ) {
continue;
}
// Ignore comments
pos = line.indexOf('//');
if ( pos >= 0 ) {
line = line.slice(0, pos);
}
// Ignore surrounding whitespaces
line = line.trim();
if ( !line ) {
continue;
}
if ( mustPunycode.test(line) ) {
line = toAscii(line);
}
// Is this an exception rule?
if ( line.charAt(0) === '!' ) {
store = exceptions;
line = line.slice(1);
} else {
store = rules;
}
// Extract TLD
pos = line.lastIndexOf('.');
if ( pos < 0 ) {
tld = line;
} else {
tld = line.slice(pos + 1);
line = line.slice(0, pos);
}
// Store suffix using tld as key
var substore = store.get(tld);
if ( substore === undefined ) {
store.set(tld, (substore = []));
}
if ( line ) {
substore.push(line);
}
}
crystallize(exceptions);
crystallize(rules);
window.dispatchEvent(new CustomEvent('publicSuffixList'));
}
/******************************************************************************/
// Cristallize the storage of suffixes using optimal internal representation
// for future look up.
function crystallize(store) {
for ( var entry of store ) {
var tld = entry[0];
var suffixes = entry[1];
// No suffix
if ( suffixes.length === 0 ) {
store.set(tld, '');
continue;
}
// Concatenated list of suffixes less than cutoff length:
// Store as string, lookup using indexOf()
var s = suffixes.join(' ');
if ( s.length < cutoffLength ) {
store.set(tld, ' ' + s + ' ');
continue;
}
// Concatenated list of suffixes greater or equal to cutoff length
// Store as array keyed on suffix length, lookup using binary search.
// I borrowed the idea to key on string length here:
// http://ejohn.org/blog/dictionary-lookups-in-javascript/#comment-392072
var i = suffixes.length, l;
var aa = [];
while ( i-- ) {
var suffix = suffixes[i];
var j = aa.length;
l = suffix.length;
while ( j <= l ) {
aa[j] = []; j += 1;
}
aa[l].push(suffix);
}
l = aa.length;
while ( l-- ) {
aa[l] = aa[l].sort().join('');
}
store.set(tld, aa);
}
return store;
}
/******************************************************************************/
const selfieMagic = 1;
function toSelfie() {
return {
magic: selfieMagic,
rules: Array.from(rules),
exceptions: Array.from(exceptions)
};
}
function fromSelfie(selfie) {
if ( typeof selfie !== 'object' || selfie.magic !== selfieMagic ) {
return false;
}
rules = new Map(selfie.rules);
exceptions = new Map(selfie.exceptions);
window.dispatchEvent(new CustomEvent('publicSuffixList'));
return true;
}
/******************************************************************************/
// Public API
root = root || window;
root.publicSuffixList = {
version: '1.0',
parse: parse,
getDomain: getDomain,
getPublicSuffix: getPublicSuffix,
toSelfie: toSelfie,
fromSelfie: fromSelfie,
get empty() {
return rules.size === 0;
}
};
/******************************************************************************/
})(this);

View File

@ -0,0 +1,627 @@
/*******************************************************************************
publicsuffixlist.js - an efficient javascript implementation to deal with
Mozilla Foundation's Public Suffix List <http://publicsuffix.org/list/>
Copyright (C) 2013-present Raymond Hill
License: pick the one which suits you:
GPL v3 see <https://www.gnu.org/licenses/gpl.html>
APL v2 see <http://www.apache.org/licenses/LICENSE-2.0>
*/
/*! Home: https://github.com/gorhill/publicsuffixlist.js -- GPLv3 APLv2 */
/* jshint browser:true, esversion:6, laxbreak:true, undef:true, unused:true */
/* globals WebAssembly, console, exports:true, module */
/*******************************************************************************
Reference:
https://publicsuffix.org/list/
Excerpt:
> Algorithm
>
> 1. Match domain against all rules and take note of the matching ones.
> 2. If no rules match, the prevailing rule is "*".
> 3. If more than one rule matches, the prevailing rule is the one which
is an exception rule.
> 4. If there is no matching exception rule, the prevailing rule is the
one with the most labels.
> 5. If the prevailing rule is a exception rule, modify it by removing
the leftmost label.
> 6. The public suffix is the set of labels from the domain which match
the labels of the prevailing rule, using the matching algorithm above.
> 7. The registered or registrable domain is the public suffix plus one
additional label.
*/
/******************************************************************************/
(function(context) {
// >>>>>>>> start of anonymous namespace
'use strict';
/*******************************************************************************
Tree encoding in array buffer:
Node:
+ u16: length of array of children
+ u8: flags => bit 0: is_publicsuffix, bit 1: is_exception
+ u8: length of char data
+ u32: char data or offset to char data
+ u32: offset to array of children
= 12 bytes
More bits in flags could be used; for example:
- to distinguish private suffixes
*/
// i32 / i8
const HOSTNAME_SLOT = 0; // jshint ignore:line
const LABEL_INDICES_SLOT = 256; // -- / 256
const RULES_PTR_SLOT = 100; // 100 / 400
const CHARDATA_PTR_SLOT = 101; // 101 / 404
const EMPTY_STRING = '';
const SELFIE_MAGIC = 2;
let wasmMemory;
let pslBuffer32;
let pslBuffer8;
let pslByteLength = 0;
let hostnameArg = EMPTY_STRING;
/******************************************************************************/
const fireChangedEvent = function() {
if (
window instanceof Object &&
window.dispatchEvent instanceof Function &&
window.CustomEvent instanceof Function
) {
window.dispatchEvent(new CustomEvent('publicSuffixListChanged'));
}
};
/******************************************************************************/
const allocateBuffers = function(byteLength) {
pslByteLength = byteLength + 3 & ~3;
if (
pslBuffer32 !== undefined &&
pslBuffer32.byteLength >= pslByteLength
) {
return;
}
if ( wasmMemory !== undefined ) {
const newPageCount = pslByteLength + 0xFFFF >>> 16;
const curPageCount = wasmMemory.buffer.byteLength >>> 16;
const delta = newPageCount - curPageCount;
if ( delta > 0 ) {
wasmMemory.grow(delta);
pslBuffer32 = new Uint32Array(wasmMemory.buffer);
pslBuffer8 = new Uint8Array(wasmMemory.buffer);
}
} else {
pslBuffer8 = new Uint8Array(pslByteLength);
pslBuffer32 = new Uint32Array(pslBuffer8.buffer);
}
hostnameArg = '';
pslBuffer8[LABEL_INDICES_SLOT] = 0;
};
/******************************************************************************/
// Parse and set a UTF-8 text-based suffix list. Format is same as found at:
// http://publicsuffix.org/list/
//
// `toAscii` is a converter from unicode to punycode. Required since the
// Public Suffix List contains unicode characters.
// Suggestion: use <https://github.com/bestiejs/punycode.js>
const parse = function(text, toAscii) {
// Use short property names for better minifying results
const rootRule = {
l: EMPTY_STRING, // l => label
f: 0, // f => flags
c: undefined // c => children
};
// Tree building
{
const compareLabels = function(a, b) {
let n = a.length;
let d = n - b.length;
if ( d !== 0 ) { return d; }
for ( let i = 0; i < n; i++ ) {
d = a.charCodeAt(i) - b.charCodeAt(i);
if ( d !== 0 ) { return d; }
}
return 0;
};
const addToTree = function(rule, exception) {
let node = rootRule;
let end = rule.length;
while ( end > 0 ) {
const beg = rule.lastIndexOf('.', end - 1);
const label = rule.slice(beg + 1, end);
end = beg;
if ( Array.isArray(node.c) === false ) {
const child = { l: label, f: 0, c: undefined };
node.c = [ child ];
node = child;
continue;
}
let left = 0;
let right = node.c.length;
while ( left < right ) {
const i = left + right >>> 1;
const d = compareLabels(label, node.c[i].l);
if ( d < 0 ) {
right = i;
if ( right === left ) {
const child = {
l: label,
f: 0,
c: undefined
};
node.c.splice(left, 0, child);
node = child;
break;
}
continue;
}
if ( d > 0 ) {
left = i + 1;
if ( left === right ) {
const child = {
l: label,
f: 0,
c: undefined
};
node.c.splice(right, 0, child);
node = child;
break;
}
continue;
}
/* d === 0 */
node = node.c[i];
break;
}
}
node.f |= 0b01;
if ( exception ) {
node.f |= 0b10;
}
};
// 2. If no rules match, the prevailing rule is "*".
addToTree('*', false);
const mustPunycode = /[^a-z0-9.-]/;
const textEnd = text.length;
let lineBeg = 0;
while ( lineBeg < textEnd ) {
let lineEnd = text.indexOf('\n', lineBeg);
if ( lineEnd === -1 ) {
lineEnd = text.indexOf('\r', lineBeg);
if ( lineEnd === -1 ) {
lineEnd = textEnd;
}
}
let line = text.slice(lineBeg, lineEnd).trim();
lineBeg = lineEnd + 1;
// Ignore comments
const pos = line.indexOf('//');
if ( pos !== -1 ) {
line = line.slice(0, pos);
}
// Ignore surrounding whitespaces
line = line.trim();
if ( line.length === 0 ) { continue; }
const exception = line.charCodeAt(0) === 0x21 /* '!' */;
if ( exception ) {
line = line.slice(1);
}
if ( mustPunycode.test(line) ) {
line = toAscii(line.toLowerCase());
}
addToTree(line, exception);
}
}
{
const labelToOffsetMap = new Map();
const treeData = [];
const charData = [];
const allocate = function(n) {
const ibuf = treeData.length;
for ( let i = 0; i < n; i++ ) {
treeData.push(0);
}
return ibuf;
};
const storeNode = function(ibuf, node) {
const nChars = node.l.length;
const nChildren = node.c !== undefined
? node.c.length
: 0;
treeData[ibuf+0] = nChildren << 16 | node.f << 8 | nChars;
// char data
if ( nChars <= 4 ) {
let v = 0;
if ( nChars > 0 ) {
v |= node.l.charCodeAt(0);
if ( nChars > 1 ) {
v |= node.l.charCodeAt(1) << 8;
if ( nChars > 2 ) {
v |= node.l.charCodeAt(2) << 16;
if ( nChars > 3 ) {
v |= node.l.charCodeAt(3) << 24;
}
}
}
}
treeData[ibuf+1] = v;
} else {
let offset = labelToOffsetMap.get(node.l);
if ( offset === undefined ) {
offset = charData.length;
for ( let i = 0; i < nChars; i++ ) {
charData.push(node.l.charCodeAt(i));
}
labelToOffsetMap.set(node.l, offset);
}
treeData[ibuf+1] = offset;
}
// child nodes
if ( Array.isArray(node.c) === false ) {
treeData[ibuf+2] = 0;
return;
}
const iarray = allocate(nChildren * 3);
treeData[ibuf+2] = iarray;
for ( let i = 0; i < nChildren; i++ ) {
storeNode(iarray + i * 3, node.c[i]);
}
};
// First 512 bytes are reserved for internal use
allocate(512 >> 2);
const iRootRule = allocate(3);
storeNode(iRootRule, rootRule);
treeData[RULES_PTR_SLOT] = iRootRule;
const iCharData = treeData.length << 2;
treeData[CHARDATA_PTR_SLOT] = iCharData;
const byteLength = (treeData.length << 2) + (charData.length + 3 & ~3);
allocateBuffers(byteLength);
pslBuffer32.set(treeData);
pslBuffer8.set(charData, treeData.length << 2);
}
fireChangedEvent();
};
/******************************************************************************/
const setHostnameArg = function(hostname) {
const buf = pslBuffer8;
if ( hostname === hostnameArg ) { return buf[LABEL_INDICES_SLOT]; }
if ( hostname === null || hostname.length === 0 ) {
return (buf[LABEL_INDICES_SLOT] = 0);
}
hostname = hostname.toLowerCase();
hostnameArg = hostname;
let n = hostname.length;
if ( n > 255 ) { n = 255; }
buf[LABEL_INDICES_SLOT] = n;
let i = n;
let j = LABEL_INDICES_SLOT + 1;
while ( i-- ) {
const c = hostname.charCodeAt(i);
if ( c === 0x2E /* '.' */ ) {
buf[j+0] = i + 1;
buf[j+1] = i;
j += 2;
}
buf[i] = c;
}
buf[j] = 0;
return n;
};
/******************************************************************************/
// Returns an offset to the start of the public suffix.
//
// WASM-able, because no information outside the buffer content is required.
const getPublicSuffixPosJS = function() {
const buf8 = pslBuffer8;
const buf32 = pslBuffer32;
const iCharData = buf32[CHARDATA_PTR_SLOT];
let iNode = pslBuffer32[RULES_PTR_SLOT];
let cursorPos = -1;
let iLabel = LABEL_INDICES_SLOT;
// Label-lookup loop
for (;;) {
// Extract label indices
const labelBeg = buf8[iLabel+1];
const labelLen = buf8[iLabel+0] - labelBeg;
// Match-lookup loop: binary search
let r = buf32[iNode+0] >>> 16;
if ( r === 0 ) { break; }
const iCandidates = buf32[iNode+2];
let l = 0;
let iFound = 0;
while ( l < r ) {
const iCandidate = l + r >>> 1;
const iCandidateNode = iCandidates + iCandidate + (iCandidate << 1);
const candidateLen = buf32[iCandidateNode+0] & 0x000000FF;
let d = labelLen - candidateLen;
if ( d === 0 ) {
const iCandidateChar = candidateLen <= 4
? iCandidateNode + 1 << 2
: iCharData + buf32[iCandidateNode+1];
for ( let i = 0; i < labelLen; i++ ) {
d = buf8[labelBeg+i] - buf8[iCandidateChar+i];
if ( d !== 0 ) { break; }
}
}
if ( d < 0 ) {
r = iCandidate;
} else if ( d > 0 ) {
l = iCandidate + 1;
} else /* if ( d === 0 ) */ {
iFound = iCandidateNode;
break;
}
}
// 2. If no rules match, the prevailing rule is "*".
if ( iFound === 0 ) {
if ( buf8[iCandidates + 1 << 2] !== 0x2A /* '*' */ ) { break; }
iFound = iCandidates;
}
iNode = iFound;
// 5. If the prevailing rule is a exception rule, modify it by
// removing the leftmost label.
if ( (buf32[iNode+0] & 0x00000200) !== 0 ) {
if ( iLabel > LABEL_INDICES_SLOT ) {
return iLabel - 2;
}
break;
}
if ( (buf32[iNode+0] & 0x00000100) !== 0 ) {
cursorPos = iLabel;
}
if ( labelBeg === 0 ) { break; }
iLabel += 2;
}
return cursorPos;
};
let getPublicSuffixPosWASM;
let getPublicSuffixPos = getPublicSuffixPosJS;
/******************************************************************************/
const getPublicSuffix = function(hostname) {
if ( pslBuffer32 === undefined ) { return EMPTY_STRING; }
const hostnameLen = setHostnameArg(hostname);
const buf8 = pslBuffer8;
if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) {
return EMPTY_STRING;
}
const cursorPos = getPublicSuffixPos();
if ( cursorPos === -1 ) {
return EMPTY_STRING;
}
const beg = buf8[cursorPos + 1];
return beg === 0 ? hostnameArg : hostnameArg.slice(beg);
};
/******************************************************************************/
const getDomain = function(hostname) {
if ( pslBuffer32 === undefined ) { return EMPTY_STRING; }
const hostnameLen = setHostnameArg(hostname);
const buf8 = pslBuffer8;
if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) {
return EMPTY_STRING;
}
const cursorPos = getPublicSuffixPos();
if ( cursorPos === -1 || buf8[cursorPos + 1] === 0 ) {
return EMPTY_STRING;
}
// 7. The registered or registrable domain is the public suffix plus one
// additional label.
const beg = buf8[cursorPos + 3];
return beg === 0 ? hostnameArg : hostnameArg.slice(beg);
};
/******************************************************************************/
const toSelfie = function() {
const selfie = {
magic: SELFIE_MAGIC,
byteLength: pslByteLength,
buffer: pslBuffer32 !== undefined
? Array.from(new Uint32Array(pslBuffer32.buffer, 0, pslByteLength >>> 2))
: null,
};
return selfie;
};
const fromSelfie = function(selfie) {
if (
selfie instanceof Object === false ||
selfie.magic !== SELFIE_MAGIC ||
typeof selfie.byteLength !== 'number' ||
Array.isArray(selfie.buffer) === false
) {
return false;
}
allocateBuffers(selfie.byteLength);
pslBuffer32.set(selfie.buffer);
// Important!
hostnameArg = '';
pslBuffer8[LABEL_INDICES_SLOT] = 0;
fireChangedEvent();
return true;
};
/******************************************************************************/
// The WASM module is entirely optional, the JS implementation will be
// used should the WASM module be unavailable for whatever reason.
const enableWASM = (function() {
// The directory from which the current script was fetched should also
// contain the related WASM file. The script is fetched from a trusted
// location, and consequently so will be the related WASM file.
let workingDir;
{
const url = new URL(document.currentScript.src);
const match = /[^\/]+$/.exec(url.pathname);
if ( match !== null ) {
url.pathname = url.pathname.slice(0, match.index);
}
workingDir = url.href;
}
let memory;
return function() {
if ( getPublicSuffixPosWASM instanceof Function ) {
return Promise.resolve(true);
}
if (
typeof WebAssembly !== 'object' ||
typeof WebAssembly.instantiateStreaming !== 'function'
) {
return Promise.resolve(false);
}
// The wasm code will work only if CPU is natively little-endian,
// as we use native uint32 array in our js code.
const uint32s = new Uint32Array(1);
const uint8s = new Uint8Array(uint32s.buffer);
uint32s[0] = 1;
if ( uint8s[0] !== 1 ) {
return Promise.resolve(false);
}
return fetch(
workingDir + 'wasm/publicsuffixlist.wasm',
{ mode: 'same-origin' }
).then(response => {
const pageCount = pslBuffer8 !== undefined
? pslBuffer8.byteLength + 0xFFFF >>> 16
: 1;
memory = new WebAssembly.Memory({ initial: pageCount });
return WebAssembly.instantiateStreaming(
response,
{ imports: { memory: memory } }
);
}).then(({ instance }) => {
const curPageCount = memory.buffer.byteLength;
const newPageCount = pslBuffer8 !== undefined
? pslBuffer8.byteLength + 0xFFFF >>> 16
: 0;
if ( newPageCount > curPageCount ) {
memory.grow(newPageCount - curPageCount);
}
const buf8 = new Uint8Array(memory.buffer);
const buf32 = new Uint32Array(memory.buffer);
if ( pslBuffer32 !== undefined ) {
buf32.set(pslBuffer32);
}
pslBuffer8 = buf8;
pslBuffer32 = buf32;
wasmMemory = memory;
getPublicSuffixPosWASM = instance.exports.getPublicSuffixPos;
getPublicSuffixPos = getPublicSuffixPosWASM;
memory = undefined;
return true;
}).catch(reason => {
console.info(reason);
return false;
});
};
})();
const disableWASM = function() {
if ( getPublicSuffixPosWASM instanceof Function ) {
getPublicSuffixPos = getPublicSuffixPosJS;
getPublicSuffixPosWASM = undefined;
}
if ( wasmMemory !== undefined ) {
const buf8 = new Uint8Array(pslByteLength);
const buf32 = new Uint32Array(buf8.buffer);
buf32.set(pslBuffer32);
pslBuffer8 = buf8;
pslBuffer32 = buf32;
wasmMemory = undefined;
}
};
/******************************************************************************/
context = context || window;
context.publicSuffixList = {
version: '2.0',
parse,
getDomain,
getPublicSuffix,
toSelfie, fromSelfie,
disableWASM, enableWASM,
};
if ( typeof module !== 'undefined' ) {
module.exports = context.publicSuffixList;
} else if ( typeof exports !== 'undefined' ) {
exports = context.publicSuffixList;
}
/******************************************************************************/
// <<<<<<<< end of anonymous namespace
})(this);

View File

@ -0,0 +1,29 @@
### For code reviewers
All `wasm` files in that directory where created by compiling the
corresponding `wat` file using the command (using
`publicsuffixlist.wat`/`publicsuffixlist.wasm` as example):
wat2wasm publicsuffixlist.wat -o publicsuffixlist.wasm
Assuming:
- The command is executed from within the present directory.
### `wat2wasm` tool
The `wat2wasm` tool can be downloaded from an official WebAssembly project:
<https://github.com/WebAssembly/wabt/releases>.
### `wat2wasm` tool online
You can also use the following online `wat2wasm` tool:
<https://webassembly.github.io/wabt/demo/wat2wasm/>.
Just paste the whole content of the `wat` file to compile into the WAT pane.
Click "Download" button to retrieve the resulting `wasm` file.
### See also
For the curious, the following online tool allows you to find out the machine
code as a result from the WASM code: https://mbebenita.github.io/WasmExplorer/

Binary file not shown.

View File

@ -0,0 +1,317 @@
;;
;; uBlock Origin - a browser extension to block requests.
;; Copyright (C) 2019-present Raymond Hill
;;
;; License: pick the one which suits you:
;; GPL v3 see <https://www.gnu.org/licenses/gpl.html>
;; APL v2 see <http://www.apache.org/licenses/LICENSE-2.0>
;;
;; Home: https://github.com/gorhill/publicsuffixlist.js
;; File: publicsuffixlist.wat
;;
;; Description: WebAssembly implementation for core lookup method in
;; publicsuffixlist.js
;;
;; How to compile:
;;
;; wat2wasm publicsuffixlist.wat -o publicsuffixlist.wasm
;;
;; The `wat2wasm` tool can be downloaded from an official WebAssembly
;; project:
;; https://github.com/WebAssembly/wabt/releases
(module
;;
;; module start
;;
(memory (import "imports" "memory") 1)
;;
;; Tree encoding in array buffer:
;;
;; Node:
;; + u8: length of char data
;; + u8: flags => bit 0: is_publicsuffix, bit 1: is_exception
;; + u16: length of array of children
;; + u32: char data or offset to char data
;; + u32: offset to array of children
;; = 12 bytes
;;
;; // i32 / i8
;; const HOSTNAME_SLOT = 0; // jshint ignore:line
;; const LABEL_INDICES_SLOT = 256; // -- / 256
;; const RULES_PTR_SLOT = 100; // 100 / 400
;; const CHARDATA_PTR_SLOT = 101; // 101 / 404
;; const EMPTY_STRING = '';
;; const SELFIE_MAGIC = 2;
;;
;;
;; Public functions
;;
;;
;; unsigned int getPublicSuffixPos()
;;
;; Returns an offset to the start of the public suffix.
;;
(func (export "getPublicSuffixPos")
(result i32) ;; result = match index, -1 = miss
(local $iCharData i32) ;; offset to start of character data
(local $iNode i32) ;; offset to current node
(local $iLabel i32) ;; offset to label indices
(local $cursorPos i32) ;; position of cursor within hostname argument
(local $labelBeg i32)
(local $labelLen i32)
(local $nCandidates i32)
(local $iCandidates i32)
(local $iFound i32)
(local $l i32)
(local $r i32)
(local $d i32)
(local $iCandidate i32)
(local $iCandidateNode i32)
(local $candidateLen i32)
(local $iCandidateChar i32)
(local $_1 i32)
(local $_2 i32)
(local $_3 i32)
;;
;; const iCharData = buf32[CHARDATA_PTR_SLOT];
i32.const 404
i32.load
set_local $iCharData
;; let iNode = pslBuffer32[RULES_PTR_SLOT];
i32.const 400
i32.load
i32.const 2
i32.shl
set_local $iNode
;; let iLabel = LABEL_INDICES_SLOT;
i32.const 256
set_local $iLabel
;; let cursorPos = -1;
i32.const -1
set_local $cursorPos
;; label-lookup loop
;; for (;;) {
block $labelLookupDone loop $labelLookup
;; // Extract label indices
;; const labelBeg = buf8[iLabel+1];
;; const labelLen = buf8[iLabel+0] - labelBeg;
get_local $iLabel
i32.load8_u
get_local $iLabel
i32.load8_u offset=1
tee_local $labelBeg
i32.sub
set_local $labelLen
;; // Match-lookup loop: binary search
;; let r = buf32[iNode+0] >>> 16;
;; if ( r === 0 ) { break; }
get_local $iNode
i32.load16_u offset=2
tee_local $r
i32.eqz
br_if $labelLookupDone
;; const iCandidates = buf32[iNode+2];
get_local $iNode
i32.load offset=8
i32.const 2
i32.shl
set_local $iCandidates
;; let l = 0;
;; let iFound = 0;
i32.const 0
tee_local $l
set_local $iFound
;; while ( l < r ) {
block $binarySearchDone loop $binarySearch
get_local $l
get_local $r
i32.ge_u
br_if $binarySearchDone
;; const iCandidate = l + r >>> 1;
get_local $l
get_local $r
i32.add
i32.const 1
i32.shr_u
tee_local $iCandidate
;; const iCandidateNode = iCandidates + iCandidate + (iCandidate << 1);
i32.const 2
i32.shl
tee_local $_1
get_local $_1
i32.const 1
i32.shl
i32.add
get_local $iCandidates
i32.add
tee_local $iCandidateNode
;; const candidateLen = buf32[iCandidateNode+0] & 0x000000FF;
i32.load8_u
set_local $candidateLen
;; let d = labelLen - candidateLen;
get_local $labelLen
get_local $candidateLen
i32.sub
tee_local $d
;; if ( d === 0 ) {
i32.eqz
if
;; const iCandidateChar = candidateLen <= 4
get_local $candidateLen
i32.const 4
i32.le_u
if
;; ? iCandidateNode + 1 << 2
get_local $iCandidateNode
i32.const 4
i32.add
set_local $iCandidateChar
else
;; : buf32[CHARDATA_PTR_SLOT] + buf32[iCandidateNode+1];
get_local $iCharData
get_local $iCandidateNode
i32.load offset=4
i32.add
set_local $iCandidateChar
end
;; for ( let i = 0; i < labelLen; i++ ) {
get_local $labelBeg
tee_local $_1
get_local $labelLen
i32.add
set_local $_3
get_local $iCandidateChar
set_local $_2
block $findDiffDone loop $findDiff
;; d = buf8[labelBeg+i] - buf8[iCandidateChar+i];
;; if ( d !== 0 ) { break; }
get_local $_1
i32.load8_u
get_local $_2
i32.load8_u
i32.sub
tee_local $d
br_if $findDiffDone
get_local $_1
i32.const 1
i32.add
tee_local $_1
get_local $_3
i32.eq
br_if $findDiffDone
get_local $_2
i32.const 1
i32.add
set_local $_2
br $findDiff
;; }
end end
;; }
end
;; if ( d < 0 ) {
;; r = iCandidate;
get_local $d
i32.const 0
i32.lt_s
if
get_local $iCandidate
set_local $r
br $binarySearch
end
;; } else if ( d > 0 ) {
;; l = iCandidate + 1;
get_local $d
i32.const 0
i32.gt_s
if
get_local $iCandidate
i32.const 1
i32.add
set_local $l
br $binarySearch
end
;; } else /* if ( d === 0 ) */ {
;; iFound = iCandidateNode;
;; break;
;; }
get_local $iCandidateNode
set_local $iFound
end end
;; }
;; // 2. If no rules match, the prevailing rule is "*".
;; if ( iFound === 0 ) {
;; if ( buf8[iCandidates + 1 << 2] !== 0x2A /* '*' */ ) { break; }
;; iFound = iCandidates;
;; }
get_local $iFound
i32.eqz
if
get_local $iCandidates
i32.load8_u offset=4
i32.const 0x2A
i32.ne
br_if $labelLookupDone
get_local $iCandidates
set_local $iFound
end
;; iNode = iFound;
get_local $iFound
tee_local $iNode
;; // 5. If the prevailing rule is a exception rule, modify it by
;; // removing the leftmost label.
;; if ( (buf32[iNode+0] & 0x00000200) !== 0 ) {
;; if ( iLabel > LABEL_INDICES_SLOT ) {
;; return iLabel - 2;
;; }
;; break;
;; }
i32.load8_u offset=1
tee_local $_1
i32.const 0x02
i32.and
if
get_local $iLabel
i32.const 256
i32.gt_u
if
get_local $iLabel
i32.const -2
i32.add
return
end
br $labelLookupDone
end
;; if ( (buf32[iNode+0] & 0x00000100) !== 0 ) {
;; cursorPos = labelBeg;
;; }
get_local $_1
i32.const 0x01
i32.and
if
get_local $iLabel
set_local $cursorPos
end
;; if ( labelBeg === 0 ) { break; }
get_local $labelBeg
i32.eqz
br_if $labelLookupDone
;; iLabel += 2;
get_local $iLabel
i32.const 2
i32.add
set_local $iLabel
br $labelLookup
end end
get_local $cursorPos
)
;;
;; module end
;;
)