diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3edc51ff2..9cd1db1cb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,6 +45,7 @@ jobs: tools/make-firefox.sh ${{ steps.release_info.outputs.VERSION }} tools/make-thunderbird.sh ${{ steps.release_info.outputs.VERSION }} tools/make-npm.sh ${{ steps.release_info.outputs.VERSION }} + tools/make-mv3.sh all - name: Upload Chromium package uses: actions/upload-release-asset@v1 env: @@ -81,3 +82,12 @@ jobs: asset_path: dist/build/uBlock0_${{ steps.release_info.outputs.VERSION }}.npm.tgz asset_name: uBlock0_${{ steps.release_info.outputs.VERSION }}.npm.tgz asset_content_type: application/octet-stream + - name: Upload Chromium MV3 package + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: dist/build/uBlock0.mv3.zip + asset_name: uBlock0.mv3.zip + asset_content_type: application/octet-stream diff --git a/Makefile b/Makefile index d69c81688..77af4aa02 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # https://stackoverflow.com/a/6273809 run_options := $(filter-out $@,$(MAKECMDGOALS)) -.PHONY: all clean test lint chromium firefox npm dig \ +.PHONY: all clean test lint chromium firefox npm dig mv3 \ compare maxcost medcost mincost modifiers record wasm sources := $(wildcard assets/resources/* src/* src/*/* src/*/*/* src/*/*/*/*) @@ -52,6 +52,11 @@ dig: dist/build/uBlock0.dig dig-snfe: dig cd dist/build/uBlock0.dig && npm run snfe $(run_options) +dist/build/uBlock0.mv3: tools/make-mv3.sh $(sources) $(platform) + tools/make-mv3.sh all + +mv3: dist/build/uBlock0.mv3 + # Update submodules. update-submodules: tools/update-submodules.sh diff --git a/platform/common/vapi-common.js b/platform/common/vapi-common.js index e9f00dc62..0bea820f1 100644 --- a/platform/common/vapi-common.js +++ b/platform/common/vapi-common.js @@ -37,7 +37,10 @@ vAPI.setTimeout = vAPI.setTimeout || self.setTimeout.bind(self); vAPI.webextFlavor = { major: 0, - soup: new Set() + soup: new Set(), + get env() { + return Array.from(this.soup); + } }; (( ) => { diff --git a/platform/mv3/extension/background.js b/platform/mv3/extension/background.js new file mode 100644 index 000000000..4c7ef6cd9 --- /dev/null +++ b/platform/mv3/extension/background.js @@ -0,0 +1,65 @@ +'use strict'; + +import regexRulesets from '/rulesets/regexes.js'; + +const dnr = chrome.declarativeNetRequest; + +dnr.setExtensionActionOptions({ displayActionCountAsBadgeText: true }); + +(async ( ) => { + const allRules = []; + const toCheck = []; + for ( const regexRuleset of regexRulesets ) { + if ( regexRuleset.enabled !== true ) { continue; } + for ( const rule of regexRuleset.rules ) { + const regex = rule.condition.regexFilter; + const isCaseSensitive = rule.condition.isUrlFilterCaseSensitive === true; + allRules.push(rule); + toCheck.push(dnr.isRegexSupported({ regex, isCaseSensitive })); + } + } + const results = await Promise.all(toCheck); + const newRules = []; + for ( let i = 0; i < allRules.length; i++ ) { + const rule = allRules[i]; + const result = results[i]; + if ( result instanceof Object && result.isSupported ) { + newRules.push(rule); + } else { + console.info(`${result.reason}: ${rule.condition.regexFilter}`); + } + } + const oldRules = await dnr.getDynamicRules(); + const oldRuleMap = new Map(oldRules.map(rule => [ rule.id, rule ])); + const newRuleMap = new Map(newRules.map(rule => [ rule.id, rule ])); + const addRules = []; + const removeRuleIds = []; + for ( const oldRule of oldRules ) { + const newRule = newRuleMap.get(oldRule.id); + if ( newRule === undefined ) { + removeRuleIds.push(oldRule.id); + } else if ( JSON.stringify(oldRule) !== JSON.stringify(newRule) ) { + removeRuleIds.push(oldRule.id); + addRules.push(newRule); + } + } + for ( const newRule of newRuleMap.values() ) { + if ( oldRuleMap.has(newRule.id) ) { continue; } + addRules.push(newRule); + } + if ( addRules.length !== 0 || removeRuleIds.length !== 0 ) { + await dnr.updateDynamicRules({ addRules, removeRuleIds }); + } + + const dynamicRules = await dnr.getDynamicRules(); + console.log(`Dynamic rule count: ${dynamicRules.length}`); + + const enabledRulesets = await dnr.getEnabledRulesets(); + console.log(`Enabled rulesets: ${enabledRulesets}`); + + console.log(`Available dynamic rule count: ${dnr.MAX_NUMBER_OF_DYNAMIC_AND_SESSION_RULES - dynamicRules.length}`); + + dnr.getAvailableStaticRuleCount().then(count => { + console.log(`Available static rule count: ${count}`); + }); +})(); diff --git a/platform/mv3/extension/img/icon_128.png b/platform/mv3/extension/img/icon_128.png new file mode 100644 index 000000000..9824fa8d9 Binary files /dev/null and b/platform/mv3/extension/img/icon_128.png differ diff --git a/platform/mv3/extension/img/icon_16.png b/platform/mv3/extension/img/icon_16.png new file mode 100644 index 000000000..2bf29efd0 Binary files /dev/null and b/platform/mv3/extension/img/icon_16.png differ diff --git a/platform/mv3/extension/img/icon_32.png b/platform/mv3/extension/img/icon_32.png new file mode 100644 index 000000000..7c8a45545 Binary files /dev/null and b/platform/mv3/extension/img/icon_32.png differ diff --git a/platform/mv3/extension/img/icon_64.png b/platform/mv3/extension/img/icon_64.png new file mode 100644 index 000000000..2cf0a62d7 Binary files /dev/null and b/platform/mv3/extension/img/icon_64.png differ diff --git a/platform/mv3/extension/manifest.json b/platform/mv3/extension/manifest.json new file mode 100644 index 000000000..8a618a8bf --- /dev/null +++ b/platform/mv3/extension/manifest.json @@ -0,0 +1,25 @@ +{ + "author": "Raymond Hill", + "background": { + "service_worker": "background.js", + "type": "module" + }, + "declarative_net_request": { + "rule_resources": [ + ] + }, + "description": "uBO Minus is permission-less experimental MV3-based network request blocker", + "icons": { + "16": "img/icon_16.png", + "32": "img/icon_32.png", + "64": "img/icon_64.png", + "128": "img/icon_128.png" + }, + "manifest_version": 3, + "minimum_chrome_version": "101.0", + "name": "uBO Minus (MV3)", + "permissions": [ + "declarativeNetRequest" + ], + "version": "0.1.0" +} diff --git a/platform/mv3/make-rulesets.js b/platform/mv3/make-rulesets.js new file mode 100644 index 000000000..cb4f66cbb --- /dev/null +++ b/platform/mv3/make-rulesets.js @@ -0,0 +1,235 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2022-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************/ + +import fs from 'fs/promises'; +import process from 'process'; + +import rulesetConfigs from './ruleset-config.js'; +import { dnrRulesetFromRawLists } from './js/static-dnr-filtering.js'; + +/******************************************************************************/ + +const commandLineArgs = (( ) => { + const args = new Map(); + let name, value; + for ( const arg of process.argv.slice(2) ) { + const pos = arg.indexOf('='); + if ( pos === -1 ) { + name = arg; + value = ''; + } else { + name = arg.slice(0, pos); + value = arg.slice(pos+1); + } + args.set(name, value); + } + return args; +})(); + +/******************************************************************************/ + +async function main() { + + const writeOps = []; + const ruleResources = []; + const regexRuleResources = []; + const outputDir = commandLineArgs.get('output') || '.'; + + let goodTotalCount = 0; + let maybeGoodTotalCount = 0; + + const output = []; + const log = (text, silent = false) => { + output.push(text); + if ( silent === false ) { + console.log(text); + } + }; + + const replacer = (k, v) => { + if ( k.startsWith('__') ) { return; } + if ( Array.isArray(v) ) { + return v.sort(); + } + if ( v instanceof Object ) { + const sorted = {}; + for ( const kk of Object.keys(v).sort() ) { + sorted[kk] = v[kk]; + } + return sorted; + } + return v; + }; + + const isUnsupported = rule => + rule._error !== undefined; + const isRegex = rule => + rule.condition !== undefined && + rule.condition.regexFilter !== undefined; + const isRedirect = rule => + rule.action !== undefined && + rule.action.type === 'redirect' && + rule.action.redirect.extensionPath !== undefined; + const isCsp = rule => + rule.action !== undefined && + rule.action.type === 'modifyHeaders'; + const isRemoveparam = rule => + rule.action !== undefined && + rule.action.type === 'redirect' && + rule.action.redirect.transform !== undefined; + const isGood = rule => + isUnsupported(rule) === false && + isRedirect(rule) === false && + isCsp(rule) === false && + isRemoveparam(rule) === false + ; + + const rulesetDir = `${outputDir}/rulesets`; + const rulesetDirPromise = fs.mkdir(`${rulesetDir}`, { recursive: true }); + + const fetchList = url => { + return fetch(url) + .then(response => response.text()) + .then(text => ({ name: url, text })); + }; + + const readList = path => + fs.readFile(path, { encoding: 'utf8' }) + .then(text => ({ name: path, text })); + + const writeFile = (path, data) => + rulesetDirPromise.then(( ) => + fs.writeFile(path, data)); + + for ( const ruleset of rulesetConfigs ) { + const lists = []; + + log(`Listset for '${ruleset.id}':`); + + if ( Array.isArray(ruleset.paths) ) { + for ( const path of ruleset.paths ) { + log(`\t${path}`); + lists.push(readList(`assets/${path}`)); + } + } + if ( Array.isArray(ruleset.urls) ) { + for ( const url of ruleset.urls ) { + log(`\t${url}`); + lists.push(fetchList(url)); + } + } + + const rules = await dnrRulesetFromRawLists(lists, { + env: [ 'chromium' ], + }); + + log(`Ruleset size for '${ruleset.id}': ${rules.length}`); + + const good = rules.filter(rule => isGood(rule) && isRegex(rule) === false); + log(`\tGood: ${good.length}`); + + const regexes = rules.filter(rule => isGood(rule) && isRegex(rule)); + log(`\tMaybe good (regexes): ${regexes.length}`); + + const redirects = rules.filter(rule => + isUnsupported(rule) === false && + isRedirect(rule) + ); + log(`\tredirect-rule= (discarded): ${redirects.length}`); + + const headers = rules.filter(rule => + isUnsupported(rule) === false && + isCsp(rule) + ); + log(`\tcsp= (discarded): ${headers.length}`); + + const removeparams = rules.filter(rule => + isUnsupported(rule) === false && + isRemoveparam(rule) + ); + log(`\tremoveparams= (discarded): ${removeparams.length}`); + + const bad = rules.filter(rule => + isUnsupported(rule) + ); + log(`\tUnsupported: ${bad.length}`); + log( + bad.map(rule => rule._error.map(v => `\t\t${v}`)).join('\n'), + true + ); + + writeOps.push( + writeFile( + `${rulesetDir}/${ruleset.id}.json`, + `${JSON.stringify(good, replacer, 2)}\n` + ) + ); + + regexRuleResources.push({ + id: ruleset.id, + enabled: ruleset.enabled, + rules: regexes + }); + + ruleResources.push({ + id: ruleset.id, + enabled: ruleset.enabled, + path: `/rulesets/${ruleset.id}.json` + }); + + goodTotalCount += good.length; + maybeGoodTotalCount += regexes.length; + } + + writeOps.push( + writeFile( + `${rulesetDir}/regexes.js`, + `export default ${JSON.stringify(regexRuleResources, replacer, 2)};\n` + ) + ); + + await Promise.all(writeOps); + + log(`Total good rules count: ${goodTotalCount}`); + log(`Total regex rules count: ${maybeGoodTotalCount}`); + + // Patch manifest + const manifest = await fs.readFile(`${outputDir}/manifest.json`, { encoding: 'utf8' }) + .then(text => JSON.parse(text)); + manifest.declarative_net_request = { rule_resources: ruleResources }; + const now = new Date(); + manifest.version = `0.1.${now.getUTCFullYear() - 2000}.${now.getUTCMonth() * 100 + now.getUTCDate()}`; + await fs.writeFile( + `${outputDir}/manifest.json`, + JSON.stringify(manifest, null, 2) + '\n' + ); + + // Log results + await fs.writeFile(`${outputDir}/log.txt`, output.join('\n') + '\n'); +} + +main(); + +/******************************************************************************/ diff --git a/platform/mv3/package.json b/platform/mv3/package.json new file mode 100644 index 000000000..c10527aa9 --- /dev/null +++ b/platform/mv3/package.json @@ -0,0 +1,6 @@ +{ + "engines": { + "node": ">=17.5.0" + }, + "type": "module" +} diff --git a/platform/mv3/ruleset-config.js b/platform/mv3/ruleset-config.js new file mode 100644 index 000000000..ac4c9f3b3 --- /dev/null +++ b/platform/mv3/ruleset-config.js @@ -0,0 +1,75 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2022-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +export default [ + { + id: 'default', + name: 'Default ruleset', + enabled: true, + paths: [ + ], + urls: [ + 'https://ublockorigin.github.io/uAssets/filters/badware.txt', + 'https://ublockorigin.github.io/uAssets/filters/filters.txt', + 'https://ublockorigin.github.io/uAssets/filters/filters-2020.txt', + 'https://ublockorigin.github.io/uAssets/filters/filters-2021.txt', + 'https://ublockorigin.github.io/uAssets/filters/filters-2022.txt', + 'https://ublockorigin.github.io/uAssets/filters/privacy.txt', + 'https://ublockorigin.github.io/uAssets/filters/quick-fixes.txt', + 'https://ublockorigin.github.io/uAssets/filters/resource-abuse.txt', + 'https://ublockorigin.github.io/uAssets/filters/unbreak.txt', + 'https://easylist.to/easylist/easylist.txt', + 'https://easylist.to/easylist/easyprivacy.txt', + 'https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-online.txt', + 'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=1&mimetype=plaintext', + ] + }, + { + id: 'DEU-0', + name: 'DEU: EasyList Germany', + enabled: false, + paths: [ + ], + urls: [ + 'https://easylist.to/easylistgermany/easylistgermany.txt', + ] + }, + { + id: 'RUS-0', + name: 'RUS: RU AdList', + enabled: false, + paths: [ + ], + urls: [ + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/adservers.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/first_level.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/general_block.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/specific_antisocial.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/specific_block.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/specific_special.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/thirdparty.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/whitelist.txt', + 'https://raw.githubusercontent.com/easylist/ruadlist/master/advblock/AWRL-non-sync.txt', + ] + }, +]; diff --git a/platform/mv3/ublock.svg b/platform/mv3/ublock.svg new file mode 100644 index 000000000..28e8f06ab --- /dev/null +++ b/platform/mv3/ublock.svg @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + diff --git a/platform/npm/package-lock.json b/platform/npm/package-lock.json index 583c88d12..e94b20c1e 100644 --- a/platform/npm/package-lock.json +++ b/platform/npm/package-lock.json @@ -1,7 +1,7 @@ { "name": "@gorhill/ubo-core", - "version": "0.1.9", - "lockfileVersion": 1, + "version": "0.1.25", + "lockfileVersion": 2, "requires": true, "dependencies": { "@babel/code-frame": { @@ -117,7 +117,8 @@ "version": "5.3.2", "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", - "dev": true + "dev": true, + "requires": {} }, "ajv": { "version": "6.12.6", @@ -138,9 +139,9 @@ "dev": true }, "ansi-regex": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", - "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", "dev": true }, "ansi-styles": { @@ -306,9 +307,9 @@ } }, "chokidar": { - "version": "3.5.2", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.2.tgz", - "integrity": "sha512-ekGhOnNVPgT77r4K/U3GDhu+FQ2S8TnK/s2KbIGXi0SZWuwkZ2QNyfWdZW+TVfn84DpEP7rLeCt2UI6bJ8GwbQ==", + "version": "3.5.3", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", + "integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==", "dev": true, "requires": { "anymatch": "~3.1.2", @@ -382,9 +383,9 @@ } }, "debug": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", - "integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", + "version": "4.3.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.3.tgz", + "integrity": "sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q==", "dev": true, "requires": { "ms": "2.1.2" @@ -706,9 +707,9 @@ "dev": true }, "glob": { - "version": "7.1.7", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", - "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.0.tgz", + "integrity": "sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==", "dev": true, "requires": { "fs.realpath": "^1.0.0", @@ -1008,33 +1009,32 @@ } }, "mocha": { - "version": "9.0.3", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-9.0.3.tgz", - "integrity": "sha512-hnYFrSefHxYS2XFGtN01x8un0EwNu2bzKvhpRFhgoybIvMaOkkL60IVPmkb5h6XDmUl4IMSB+rT5cIO4/4bJgg==", + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-9.2.2.tgz", + "integrity": "sha512-L6XC3EdwT6YrIk0yXpavvLkn8h+EU+Y5UcCHKECyMbdUIxyMuZj4bX4U9e1nvnvUUvQVsV2VHQr5zLdcUkhW/g==", "dev": true, "requires": { "@ungap/promise-all-settled": "1.1.2", "ansi-colors": "4.1.1", "browser-stdout": "1.3.1", - "chokidar": "3.5.2", - "debug": "4.3.1", + "chokidar": "3.5.3", + "debug": "4.3.3", "diff": "5.0.0", "escape-string-regexp": "4.0.0", "find-up": "5.0.0", - "glob": "7.1.7", + "glob": "7.2.0", "growl": "1.10.5", "he": "1.2.0", "js-yaml": "4.1.0", "log-symbols": "4.1.0", - "minimatch": "3.0.4", + "minimatch": "4.2.1", "ms": "2.1.3", - "nanoid": "3.1.23", + "nanoid": "3.3.1", "serialize-javascript": "6.0.0", "strip-json-comments": "3.1.1", "supports-color": "8.1.1", "which": "2.0.2", - "wide-align": "1.1.3", - "workerpool": "6.1.5", + "workerpool": "6.2.0", "yargs": "16.2.0", "yargs-parser": "20.2.4", "yargs-unparser": "2.0.0" @@ -1046,23 +1046,6 @@ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", "dev": true }, - "debug": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", - "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", - "dev": true, - "requires": { - "ms": "2.1.2" - }, - "dependencies": { - "ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true - } - } - }, "has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -1078,6 +1061,15 @@ "argparse": "^2.0.1" } }, + "minimatch": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-4.2.1.tgz", + "integrity": "sha512-9Uq1ChtSZO+Mxa/CL1eGizn2vRn3MlLgzhT0Iz8zaY8NdvxvB0d5QdPFmCKf7JKA9Lerx5vRrnwO03jsSfGG9g==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, "ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -1102,9 +1094,9 @@ "dev": true }, "nanoid": { - "version": "3.1.23", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", - "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.1.tgz", + "integrity": "sha512-n6Vs/3KGyxPQd6uO0eH4Bv0ojGSUvuLlIHtC3Y0kEO23YRge8H9x1GCzLn28YX0H66pMkxuaeESFq4tKISKwdw==", "dev": true }, "natural-compare": { @@ -1188,9 +1180,9 @@ "dev": true }, "picomatch": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.0.tgz", - "integrity": "sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==", + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", "dev": true }, "prelude-ls": { @@ -1269,9 +1261,9 @@ "dev": true }, "scaling-palm-tree": { - "version": "github:mjethani/scaling-palm-tree#15cf1ab37e038771e1ff8005edc46d95f176739f", - "from": "github:mjethani/scaling-palm-tree#15cf1ab37e038771e1ff8005edc46d95f176739f", - "dev": true + "version": "git+ssh://git@github.com/mjethani/scaling-palm-tree.git#15cf1ab37e038771e1ff8005edc46d95f176739f", + "dev": true, + "from": "scaling-palm-tree@github:mjethani/scaling-palm-tree#15cf1ab37e038771e1ff8005edc46d95f176739f" }, "semver": { "version": "7.3.5", @@ -1506,48 +1498,6 @@ "isexe": "^2.0.0" } }, - "wide-align": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.3.tgz", - "integrity": "sha512-QGkOQc8XL6Bt5PwnsExKBPuMKBxnGxWWW3fU55Xt4feHozMUhdUMaBCk290qpm/wG5u/RSKzwdAC4i51YigihA==", - "dev": true, - "requires": { - "string-width": "^1.0.2 || 2" - }, - "dependencies": { - "ansi-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", - "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", - "dev": true - }, - "is-fullwidth-code-point": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", - "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=", - "dev": true - }, - "string-width": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", - "integrity": "sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw==", - "dev": true, - "requires": { - "is-fullwidth-code-point": "^2.0.0", - "strip-ansi": "^4.0.0" - } - }, - "strip-ansi": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz", - "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=", - "dev": true, - "requires": { - "ansi-regex": "^3.0.0" - } - } - } - }, "word-wrap": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", @@ -1555,9 +1505,9 @@ "dev": true }, "workerpool": { - "version": "6.1.5", - "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.1.5.tgz", - "integrity": "sha512-XdKkCK0Zqc6w3iTxLckiuJ81tiD/o5rBE/m+nXpRCB+/Sq4DqkfXZ/x0jW02DG1tGsfUGXbTJyZDP+eu67haSw==", + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.0.tgz", + "integrity": "sha512-Rsk5qQHJ9eowMH28Jwhe8HEbmdYDX4lwoMWshiCXugjtHqMD9ZbiqSDLxcsfdqsETPzVUtX5s1Z5kStiIM6l4A==", "dev": true }, "wrap-ansi": { diff --git a/platform/npm/package.json b/platform/npm/package.json index 441f6dcad..50f711991 100644 --- a/platform/npm/package.json +++ b/platform/npm/package.json @@ -1,6 +1,6 @@ { "name": "@gorhill/ubo-core", - "version": "0.1.25", + "version": "0.1.26", "description": "To create a working instance of uBlock Origin's static network filtering engine", "type": "module", "main": "index.js", diff --git a/src/devtools.html b/src/devtools.html index ba14c5d17..8d8927647 100644 --- a/src/devtools.html +++ b/src/devtools.html @@ -26,6 +26,7 @@ + diff --git a/src/js/assets.js b/src/js/assets.js index 576dc1ded..f4f0ffe65 100644 --- a/src/js/assets.js +++ b/src/js/assets.js @@ -26,6 +26,7 @@ import cacheStorage from './cachestorage.js'; import logger from './logger.js'; import µb from './background.js'; +import { StaticFilteringParser } from './static-filtering-parser.js'; /******************************************************************************/ @@ -267,7 +268,10 @@ assets.fetchFilterList = async function(mainlistURL) { } if ( result instanceof Object === false ) { continue; } const content = result.content; - const slices = µb.preparseDirectives.split(content); + const slices = StaticFilteringParser.utils.preparser.splitter( + content, + vAPI.webextFlavor.env + ); for ( let i = 0, n = slices.length - 1; i < n; i++ ) { const slice = content.slice(slices[i+0], slices[i+1]); if ( (i & 1) !== 0 ) { diff --git a/src/js/biditrie.js b/src/js/biditrie.js index 38b780faf..97234963a 100644 --- a/src/js/biditrie.js +++ b/src/js/biditrie.js @@ -715,42 +715,57 @@ class BidiTrieContainer { this.done = true; return this; } - this.charPtr = this.forks.pop(); + this.pattern = this.forks.pop(); + this.dir = this.forks.pop(); this.icell = this.forks.pop(); } + const buf32 = this.container.buf32; + const buf8 = this.container.buf8; for (;;) { - const idown = this.container.buf32[this.icell+CELL_OR]; - if ( idown !== 0 ) { - this.forks.push(idown, this.charPtr); + const ialt = buf32[this.icell+CELL_OR]; + const v = buf32[this.icell+SEGMENT_INFO]; + const offset = v & 0x00FFFFFF; + let i0 = buf32[CHAR0_SLOT] + offset; + const len = v >>> 24; + for ( let i = 0; i < len; i++ ) { + this.charBuf[i] = buf8[i0+i]; } - const v = this.container.buf32[this.icell+SEGMENT_INFO]; - let i0 = this.container.buf32[CHAR0_SLOT] + (v & 0x00FFFFFF); - const i1 = i0 + (v >>> 24); - while ( i0 < i1 ) { - this.charBuf[this.charPtr] = this.container.buf8[i0]; - this.charPtr += 1; - i0 += 1; + if ( len !== 0 && ialt !== 0 ) { + this.forks.push(ialt, this.dir, this.pattern); } - this.icell = this.container.buf32[this.icell+CELL_AND]; - if ( this.icell === 0 ) { - return this.toPattern(); + const inext = buf32[this.icell+CELL_AND]; + if ( len !== 0 ) { + const s = this.textDecoder.decode( + new Uint8Array(this.charBuf.buffer, 0, len) + ); + if ( this.dir > 0 ) { + this.pattern += s; + } else if ( this.dir < 0 ) { + this.pattern = s + this.pattern; + } } - if ( this.container.buf32[this.icell+SEGMENT_INFO] === 0 ) { - this.icell = this.container.buf32[this.icell+CELL_AND]; - return this.toPattern(); + this.icell = inext; + if ( len !== 0 ) { continue; } + // boundary cell + if ( ialt !== 0 ) { + if ( inext === 0 ) { + this.icell = ialt; + this.dir = -1; + } else { + this.forks.push(ialt, -1, this.pattern); + } + } + if ( offset !== 0 ) { + this.value = { pattern: this.pattern, iextra: offset }; + return this; } } }, - toPattern() { - this.value = this.textDecoder.decode( - new Uint8Array(this.charBuf.buffer, 0, this.charPtr) - ); - return this; - }, container: this, icell: iroot, charBuf: new Uint8Array(256), - charPtr: 0, + pattern: '', + dir: 1, forks: [], textDecoder: new TextDecoder(), [Symbol.iterator]() { return this; }, diff --git a/src/js/devtools.js b/src/js/devtools.js index e4346a3ba..fbc526e58 100644 --- a/src/js/devtools.js +++ b/src/js/devtools.js @@ -45,7 +45,8 @@ CodeMirror.registerGlobalHelper( let nextLineNo = startLineNo + 1; while ( nextLineNo < lastLineNo ) { const nextLine = cm.getLine(nextLineNo); - if ( nextLine.startsWith(foldCandidate) === false ) { + // TODO: use regex to find folding end + if ( nextLine.startsWith(foldCandidate) === false && nextLine !== ']' ) { if ( startLineNo >= endLineNo ) { return; } return { from: CodeMirror.Pos(startLineNo, startLine.length), @@ -142,6 +143,17 @@ uDom.nodeFromId('snfe-dump').addEventListener('click', ev => { }); }); +uDom.nodeFromId('snfe-todnr').addEventListener('click', ev => { + const button = ev.target; + button.setAttribute('disabled', ''); + vAPI.messaging.send('dashboard', { + what: 'snfeToDNR', + }).then(result => { + log(result); + button.removeAttribute('disabled'); + }); +}); + vAPI.messaging.send('dashboard', { what: 'getAppData', }).then(appData => { diff --git a/src/js/messaging.js b/src/js/messaging.js index 7c5335f5f..ad8752dfd 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -39,6 +39,7 @@ import staticNetFilteringEngine from './static-net-filtering.js'; import µb from './background.js'; import webRequest from './traffic.js'; import { denseBase64 } from './base64-custom.js'; +import { dnrRulesetFromRawLists } from './static-dnr-filtering.js'; import { redirectEngine } from './redirect-engine.js'; import { StaticFilteringParser } from './static-filtering-parser.js'; @@ -143,6 +144,98 @@ const onMessage = function(request, sender, callback) { }); return; + case 'snfeToDNR': { + const listPromises = []; + const listNames = []; + for ( const assetKey of µb.selectedFilterLists ) { + listPromises.push( + io.get(assetKey, { dontCache: true }).then(details => { + listNames.push(assetKey); + return { name: assetKey, text: details.content }; + }) + ); + } + const options = { + extensionPaths: redirectEngine.getResourceDetails(), + env: vAPI.webextFlavor.env, + }; + const t0 = Date.now(); + dnrRulesetFromRawLists(listPromises, options).then(ruleset => { + const replacer = (k, v) => { + if ( k.startsWith('__') ) { return; } + if ( Array.isArray(v) ) { + return v.sort(); + } + if ( v instanceof Object ) { + const sorted = {}; + for ( const kk of Object.keys(v).sort() ) { + sorted[kk] = v[kk]; + } + return sorted; + } + return v; + }; + const isUnsupported = rule => + rule._error !== undefined; + const isRegex = rule => + rule.condition !== undefined && + rule.condition.regexFilter !== undefined; + const isRedirect = rule => + rule.action !== undefined && + rule.action.type === 'redirect' && + rule.action.redirect.extensionPath !== undefined; + const isCsp = rule => + rule.action !== undefined && + rule.action.type === 'modifyHeaders'; + const isRemoveparam = rule => + rule.action !== undefined && + rule.action.type === 'redirect' && + rule.action.redirect.transform !== undefined; + const runtime = Date.now() - t0; + const out = [ + `dnrRulesetFromRawLists(${JSON.stringify(listNames, null, 2)})`, + `Run time: ${runtime} ms`, + ]; + const good = ruleset.filter(rule => + isUnsupported(rule) === false && + isRegex(rule) === false && + isRedirect(rule) === false && + isCsp(rule) === false && + isRemoveparam(rule) === false + ); + out.push(`+ Good filters (${good.length}): ${JSON.stringify(good, replacer, 2)}`); + const regexes = ruleset.filter(rule => + isUnsupported(rule) === false && + isRegex(rule) && + isRedirect(rule) === false && + isCsp(rule) === false && + isRemoveparam(rule) === false + ); + out.push(`+ Regex-based filters (${regexes.length}): ${JSON.stringify(regexes, replacer, 2)}`); + const redirects = ruleset.filter(rule => + isUnsupported(rule) === false && + isRedirect(rule) + ); + out.push(`+ 'redirect=' filters (${redirects.length}): ${JSON.stringify(redirects, replacer, 2)}`); + const headers = ruleset.filter(rule => + isUnsupported(rule) === false && + isCsp(rule) + ); + out.push(`+ 'csp=' filters (${headers.length}): ${JSON.stringify(headers, replacer, 2)}`); + const removeparams = ruleset.filter(rule => + isUnsupported(rule) === false && + isRemoveparam(rule) + ); + out.push(`+ 'removeparam=' filters (${removeparams.length}): ${JSON.stringify(removeparams, replacer, 2)}`); + const bad = ruleset.filter(rule => + isUnsupported(rule) + ); + out.push(`+ Unsupported filters (${bad.length}): ${JSON.stringify(bad, replacer, 2)}`); + callback(out.join('\n')); + }); + return; + } + default: break; } @@ -1346,7 +1439,7 @@ const getSupportData = async function() { scriptlet: scriptletFilteringEngine.getFilterCount(), html: htmlFilteringEngine.getFilterCount(), }, - 'listset (total-discarded, last updated)': { + 'listset (total-discarded, last-updated)': { removed: removedListset, added: addedListset, default: defaultListset, @@ -1429,8 +1522,10 @@ const onMessage = function(request, sender, callback) { response = {}; if ( (request.hintUpdateToken || 0) === 0 ) { response.redirectResources = redirectEngine.getResourceDetails(); - response.preparseDirectiveTokens = µb.preparseDirectives.getTokens(); - response.preparseDirectiveHints = µb.preparseDirectives.getHints(); + response.preparseDirectiveTokens = + StaticFilteringParser.utils.preparser.getTokens(vAPI.webextFlavor.env); + response.preparseDirectiveHints = + StaticFilteringParser.utils.preparser.getHints(); response.expertMode = µb.hiddenSettings.filterAuthorMode; } if ( request.hintUpdateToken !== µb.pageStoresToken ) { diff --git a/src/js/redirect-engine.js b/src/js/redirect-engine.js index 062317f59..dfc69cf25 100644 --- a/src/js/redirect-engine.js +++ b/src/js/redirect-engine.js @@ -348,6 +348,15 @@ RedirectEngine.prototype.tokenToURL = function( /******************************************************************************/ +RedirectEngine.prototype.tokenToDNR = function(token) { + const entry = this.resources.get(this.aliases.get(token) || token); + if ( entry === undefined ) { return; } + if ( entry.warURL === undefined ) { return; } + return entry.warURL; +}; + +/******************************************************************************/ + RedirectEngine.prototype.hasToken = function(token) { if ( token === 'none' ) { return true; } const asDataURI = token.charCodeAt(0) === 0x25 /* '%' */; @@ -554,6 +563,7 @@ RedirectEngine.prototype.getResourceDetails = function() { canInject: typeof entry.data === 'string', canRedirect: entry.warURL !== undefined, aliasOf: '', + extensionPath: entry.warURL, }); } for ( const [ alias, name ] of this.aliases ) { diff --git a/src/js/static-dnr-filtering.js b/src/js/static-dnr-filtering.js new file mode 100644 index 000000000..2a1a21259 --- /dev/null +++ b/src/js/static-dnr-filtering.js @@ -0,0 +1,104 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2014-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************/ + +import staticNetFilteringEngine from './static-net-filtering.js'; +import { LineIterator } from './text-utils.js'; +import { StaticFilteringParser } from './static-filtering-parser.js'; + +import { + CompiledListReader, + CompiledListWriter, +} from './static-filtering-io.js'; + +/******************************************************************************/ + +function addToDNR(context, list) { + const writer = new CompiledListWriter(); + const lineIter = new LineIterator( + StaticFilteringParser.utils.preparser.prune( + list.text, + context.env || [] + ) + ); + const parser = new StaticFilteringParser(); + const compiler = staticNetFilteringEngine.createCompiler(parser); + + writer.properties.set('name', list.name); + parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH); + compiler.start(writer); + + while ( lineIter.eot() === false ) { + let line = lineIter.next(); + while ( line.endsWith(' \\') ) { + if ( lineIter.peek(4) !== ' ' ) { break; } + line = line.slice(0, -2).trim() + lineIter.next().trim(); + } + + parser.analyze(line); + + if ( parser.shouldIgnore() ) { continue; } + if ( parser.category !== parser.CATStaticNetFilter ) { continue; } + + // https://github.com/gorhill/uBlock/issues/2599 + // convert hostname to punycode if needed + if ( parser.patternHasUnicode() && parser.toASCII() === false ) { + continue; + } + + if ( compiler.compile(writer) ) { continue; } + + if ( compiler.error !== undefined ) { + context.invalid.add(compiler.error); + } + } + + compiler.finish(writer); + + staticNetFilteringEngine.dnrFromCompiled( + 'add', + context, + new CompiledListReader(writer.toString()) + ); +} + +/******************************************************************************/ + +async function dnrRulesetFromRawLists(lists, options = {}) { + const context = staticNetFilteringEngine.dnrFromCompiled('begin'); + context.extensionPaths = new Map(options.extensionPaths || []); + context.env = options.env; + const toLoad = []; + const toDNR = (context, list) => addToDNR(context, list); + for ( const list of lists ) { + toLoad.push(list.then(list => toDNR(context, list))); + } + await Promise.all(toLoad); + const ruleset = staticNetFilteringEngine.dnrFromCompiled('end', context); + return ruleset; +} + +/******************************************************************************/ + +export { dnrRulesetFromRawLists }; diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index 37c05287d..b29113c8a 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -684,7 +684,7 @@ const Parser = class { analyzeNetExtra() { if ( this.patternIsRegex() ) { - if ( this.regexUtils.isValid(this.getNetPattern()) === false ) { + if ( this.utils.regex.isValid(this.getNetPattern()) === false ) { this.markSpan(this.patternSpan, BITError); } } else if ( @@ -1048,7 +1048,7 @@ const Parser = class { // TODO: not necessarily true, this needs more work. if ( this.patternIsRegex === false ) { return true; } return this.reGoodRegexToken.test( - this.regexUtils.toTokenizableStr(this.getNetPattern()) + this.utils.regex.toTokenizableStr(this.getNetPattern()) ); } @@ -2962,134 +2962,269 @@ const ExtOptionsIterator = class { /******************************************************************************/ -// Depends on: -// https://github.com/foo123/RegexAnalyzer +Parser.utils = Parser.prototype.utils = (( ) => { -Parser.regexUtils = Parser.prototype.regexUtils = (( ) => { + // Depends on: + // https://github.com/foo123/RegexAnalyzer + const regexAnalyzer = Regex && Regex.Analyzer || null; - const firstCharCodeClass = s => { - return /^[\x01%0-9A-Za-z]/.test(s) ? 1 : 0; - }; - - const lastCharCodeClass = s => { - return /[\x01%0-9A-Za-z]$/.test(s) ? 1 : 0; - }; - - const toTokenizableStr = node => { - switch ( node.type ) { - case 1: /* T_SEQUENCE, 'Sequence' */ { - let s = ''; - for ( let i = 0; i < node.val.length; i++ ) { - s += toTokenizableStr(node.val[i]); - } - return s; + class regex { + static firstCharCodeClass(s) { + return /^[\x01%0-9A-Za-z]/.test(s) ? 1 : 0; } - case 2: /* T_ALTERNATION, 'Alternation' */ - case 8: /* T_CHARGROUP, 'CharacterGroup' */ { - let firstChar = 0; - let lastChar = 0; - for ( let i = 0; i < node.val.length; i++ ) { - const s = toTokenizableStr(node.val[i]); - if ( firstChar === 0 && firstCharCodeClass(s) === 1 ) { - firstChar = 1; + + static lastCharCodeClass(s) { + return /[\x01%0-9A-Za-z]$/.test(s) ? 1 : 0; + } + + static tokenizableStrFromNode(node) { + switch ( node.type ) { + case 1: /* T_SEQUENCE, 'Sequence' */ { + let s = ''; + for ( let i = 0; i < node.val.length; i++ ) { + s += this.tokenizableStrFromNode(node.val[i]); } - if ( lastChar === 0 && lastCharCodeClass(s) === 1 ) { - lastChar = 1; - } - if ( firstChar === 1 && lastChar === 1 ) { break; } + return s; } - return String.fromCharCode(firstChar, lastChar); - } - case 4: /* T_GROUP, 'Group' */ { - if ( node.flags.NegativeLookAhead === 1 ) { return '\x01'; } - if ( node.flags.NegativeLookBehind === 1 ) { return '\x01'; } - return toTokenizableStr(node.val); - } - case 16: /* T_QUANTIFIER, 'Quantifier' */ { - const s = toTokenizableStr(node.val); - const first = firstCharCodeClass(s); - const last = lastCharCodeClass(s); - if ( node.flags.min === 0 && first === 0 && last === 0 ) { + case 2: /* T_ALTERNATION, 'Alternation' */ + case 8: /* T_CHARGROUP, 'CharacterGroup' */ { + let firstChar = 0; + let lastChar = 0; + for ( let i = 0; i < node.val.length; i++ ) { + const s = this.tokenizableStrFromNode(node.val[i]); + if ( firstChar === 0 && this.firstCharCodeClass(s) === 1 ) { + firstChar = 1; + } + if ( lastChar === 0 && this.lastCharCodeClass(s) === 1 ) { + lastChar = 1; + } + if ( firstChar === 1 && lastChar === 1 ) { break; } + } + return String.fromCharCode(firstChar, lastChar); + } + case 4: /* T_GROUP, 'Group' */ { + if ( node.flags.NegativeLookAhead === 1 ) { return '\x01'; } + if ( node.flags.NegativeLookBehind === 1 ) { return '\x01'; } + return this.tokenizableStrFromNode(node.val); + } + case 16: /* T_QUANTIFIER, 'Quantifier' */ { + const s = this.tokenizableStrFromNode(node.val); + const first = this.firstCharCodeClass(s); + const last = this.lastCharCodeClass(s); + if ( node.flags.min === 0 && first === 0 && last === 0 ) { + return ''; + } + return String.fromCharCode(first, last); + } + case 64: /* T_HEXCHAR, 'HexChar' */ { + return String.fromCharCode(parseInt(node.val.slice(1), 16)); + } + case 128: /* T_SPECIAL, 'Special' */ { + const flags = node.flags; + if ( + flags.EndCharGroup === 1 || // dangling `]` + flags.EndGroup === 1 || // dangling `)` + flags.EndRepeats === 1 // dangling `}` + ) { + throw new Error('Unmatched bracket'); + } + return flags.MatchEnd === 1 || + flags.MatchStart === 1 || + flags.MatchWordBoundary === 1 + ? '\x00' + : '\x01'; + } + case 256: /* T_CHARS, 'Characters' */ { + for ( let i = 0; i < node.val.length; i++ ) { + if ( this.firstCharCodeClass(node.val[i]) === 1 ) { + return '\x01'; + } + } + return '\x00'; + } + // Ranges are assumed to always involve token-related characters. + case 512: /* T_CHARRANGE, 'CharacterRange' */ { + return '\x01'; + } + case 1024: /* T_STRING, 'String' */ { + return node.val; + } + case 2048: /* T_COMMENT, 'Comment' */ { return ''; } - return String.fromCharCode(first, last); - } - case 64: /* T_HEXCHAR, 'HexChar' */ { - return String.fromCharCode(parseInt(node.val.slice(1), 16)); - } - case 128: /* T_SPECIAL, 'Special' */ { - const flags = node.flags; - if ( - flags.EndCharGroup === 1 || // dangling `]` - flags.EndGroup === 1 || // dangling `)` - flags.EndRepeats === 1 // dangling `}` - ) { - throw new Error('Unmatched bracket'); + default: + break; } - return flags.MatchEnd === 1 || - flags.MatchStart === 1 || - flags.MatchWordBoundary === 1 - ? '\x00' - : '\x01'; - } - case 256: /* T_CHARS, 'Characters' */ { - for ( let i = 0; i < node.val.length; i++ ) { - if ( firstCharCodeClass(node.val[i]) === 1 ) { - return '\x01'; - } - } - return '\x00'; - } - // Ranges are assumed to always involve token-related characters. - case 512: /* T_CHARRANGE, 'CharacterRange' */ { return '\x01'; } - case 1024: /* T_STRING, 'String' */ { - return node.val; - } - case 2048: /* T_COMMENT, 'Comment' */ { - return ''; - } - default: - break; - } - return '\x01'; - }; - if ( - Regex instanceof Object === false || - Regex.Analyzer instanceof Object === false - ) { - return { - isValid: function(reStr) { - try { - void new RegExp(reStr); - } catch(ex) { - return false; - } - return true; - }, - toTokenizableStr: ( ) => '', - }; - } - - return { - isValid: function(reStr) { + static isValid(reStr) { try { void new RegExp(reStr); - void toTokenizableStr(Regex.Analyzer(reStr, false).tree()); + if ( regexAnalyzer !== null ) { + void this.tokenizableStrFromNode( + regexAnalyzer(reStr, false).tree() + ); + } } catch(ex) { return false; } return true; - }, - toTokenizableStr: function(reStr) { + } + + static isRE2(reStr) { + if ( regexAnalyzer === null ) { return true; } + let tree; try { - return toTokenizableStr(Regex.Analyzer(reStr, false).tree()); + tree = regexAnalyzer(reStr, false).tree(); + } catch(ex) { + return; + } + const isRE2 = node => { + if ( node instanceof Object === false ) { return true; } + if ( node.flags instanceof Object ) { + if ( node.flags.LookAhead === 1 ) { return false; } + if ( node.flags.NegativeLookAhead === 1 ) { return false; } + if ( node.flags.LookBehind === 1 ) { return false; } + if ( node.flags.NegativeLookBehind === 1 ) { return false; } + } + if ( Array.isArray(node.val) ) { + for ( const entry of node.val ) { + if ( isRE2(entry) === false ) { return false; } + } + } + if ( node.val instanceof Object ) { + return isRE2(node.val); + } + return true; + }; + return isRE2(tree); + } + + static toTokenizableStr(reStr) { + if ( regexAnalyzer === null ) { return ''; } + try { + return this.tokenizableStrFromNode( + regexAnalyzer(reStr, false).tree() + ); } catch(ex) { } return ''; - }, + } + } + + const preparserTokens = new Map([ + [ 'ext_ublock', 'ublock' ], + [ 'env_chromium', 'chromium' ], + [ 'env_edge', 'edge' ], + [ 'env_firefox', 'firefox' ], + [ 'env_legacy', 'legacy' ], + [ 'env_mobile', 'mobile' ], + [ 'env_safari', 'safari' ], + [ 'cap_html_filtering', 'html_filtering' ], + [ 'cap_user_stylesheet', 'user_stylesheet' ], + [ 'false', 'false' ], + // Hoping ABP-only list maintainers can at least make use of it to + // help non-ABP content blockers better deal with filters benefiting + // only ABP. + [ 'ext_abp', 'false' ], + // Compatibility with other blockers + // https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#adguard-specific + [ 'adguard', 'adguard' ], + [ 'adguard_app_android', 'false' ], + [ 'adguard_app_ios', 'false' ], + [ 'adguard_app_mac', 'false' ], + [ 'adguard_app_windows', 'false' ], + [ 'adguard_ext_android_cb', 'false' ], + [ 'adguard_ext_chromium', 'chromium' ], + [ 'adguard_ext_edge', 'edge' ], + [ 'adguard_ext_firefox', 'firefox' ], + [ 'adguard_ext_opera', 'chromium' ], + [ 'adguard_ext_safari', 'false' ], + ]); + + class preparser { + // This method returns an array of indices, corresponding to position in + // the content string which should alternatively be parsed and discarded. + static splitter(content, env) { + const reIf = /^!#(if|endif)\b([^\n]*)(?:[\n\r]+|$)/gm; + const stack = []; + const shouldDiscard = ( ) => stack.some(v => v); + const parts = [ 0 ]; + let discard = false; + + for (;;) { + const match = reIf.exec(content); + if ( match === null ) { break; } + + switch ( match[1] ) { + case 'if': + let expr = match[2].trim(); + const target = expr.charCodeAt(0) === 0x21 /* '!' */; + if ( target ) { expr = expr.slice(1); } + const token = preparserTokens.get(expr); + const startDiscard = + token === 'false' && target === false || + token !== undefined && env.includes(token) === target; + if ( discard === false && startDiscard ) { + parts.push(match.index); + discard = true; + } + stack.push(startDiscard); + break; + + case 'endif': + stack.pop(); + const stopDiscard = shouldDiscard() === false; + if ( discard && stopDiscard ) { + parts.push(match.index + match[0].length); + discard = false; + } + break; + + default: + break; + } + } + + parts.push(content.length); + return parts; + } + + static prune(content, env) { + const parts = this.splitter(content, env); + const out = []; + for ( let i = 0, n = parts.length - 1; i < n; i += 2 ) { + const beg = parts[i+0]; + const end = parts[i+1]; + out.push(content.slice(beg, end)); + } + return out.join('\n'); + } + + static getHints() { + const out = []; + const vals = new Set(); + for ( const [ key, val ] of preparserTokens ) { + if ( vals.has(val) ) { continue; } + vals.add(val); + out.push(key); + } + return out; + } + + static getTokens(env) { + const out = new Map(); + for ( const [ key, val ] of preparserTokens ) { + out.set(key, val !== 'false' && env.includes(val)); + } + return Array.from(out); + } + } + + return { + preparser, + regex, }; })(); diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index ce5291021..331b35975 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -143,7 +143,7 @@ const typeValueToTypeName = [ 'object', 'script', 'xmlhttprequest', - 'subdocument', + 'sub_frame', 'font', 'media', 'websocket', @@ -605,6 +605,22 @@ const filterDumpInfo = (idata) => { return fc.dumpInfo(idata); }; +const dnrRuleFromCompiled = (args, rule) => { + const fc = filterClasses[args[0]]; + if ( fc.dnrFromCompiled === undefined ) { return false; } + fc.dnrFromCompiled(args, rule); + return true; +}; + +const dnrAddRuleError = (rule, msg) => { + rule._error = rule._error || []; + rule._error.push(msg); +}; + +const dnrAddRuleWarning = (rule, msg) => { + rule._warning = rule._warning || []; + rule._warning.push(msg); +}; /******************************************************************************* @@ -701,6 +717,10 @@ const FilterImportant = class { return filterDataAlloc(args[0]); } + static dnrFromCompiled(args, rule) { + rule.priority = (rule.priority || 0) + 10; + } + static keyFromArgs() { } @@ -764,6 +784,16 @@ const FilterPatternPlain = class { return idata; } + static dnrFromCompiled(args, rule) { + if ( rule.condition === undefined ) { + rule.condition = {}; + } else if ( rule.condition.urlFilter !== undefined ) { + rule._error = rule._error || []; + rule._error.push(`urlFilter already defined: ${rule.condition.urlFilter}`); + } + rule.condition.urlFilter = args[1]; + } + static logData(idata, details) { const s = bidiTrie.extractString( filterData[idata+1], @@ -883,6 +913,27 @@ const FilterPatternGeneric = class { return idata; } + static dnrFromCompiled(args, rule) { + if ( rule.condition === undefined ) { + rule.condition = {}; + } else if ( rule.condition.urlFilter !== undefined ) { + dnrAddRuleError(rule, `urlFilter already defined: ${rule.condition.urlFilter}`); + } + let pattern = args[1]; + if ( args[2] & 0b100 ) { + if ( pattern.startsWith('.') ) { + pattern = `*${pattern}`; + } + pattern = `||${pattern}`; + } else if ( args[2] & 0b010 ) { + pattern = `|${pattern}`; + } + if ( args[2] & 0b001 ) { + pattern += '|'; + } + rule.condition.urlFilter = pattern; + } + static keyFromArgs(args) { return `${args[1]}\t${args[2]}`; } @@ -974,6 +1025,10 @@ const FilterAnchorHnLeft = class { return idata; } + static dnrFromCompiled(args, rule) { + rule.condition.urlFilter = `||${rule.condition.urlFilter}`; + } + static keyFromArgs() { } @@ -995,6 +1050,11 @@ const FilterAnchorHn = class extends FilterAnchorHnLeft { return [ FilterAnchorHn.fid ]; } + static dnrFromCompiled(args, rule) { + rule.condition.requestDomains = [ rule.condition.urlFilter ]; + rule.condition.urlFilter = undefined; + } + static keyFromArgs() { } @@ -1022,6 +1082,10 @@ const FilterAnchorLeft = class { return filterDataAlloc(args[0]); } + static dnrFromCompiled(args, rule) { + rule.condition.urlFilter = `|${rule.condition.urlFilter}`; + } + static keyFromArgs() { } @@ -1048,6 +1112,10 @@ const FilterAnchorRight = class { return filterDataAlloc(args[0]); } + static dnrFromCompiled(args, rule) { + rule.condition.urlFilter = `${rule.condition.urlFilter}|`; + } + static keyFromArgs() { } @@ -1079,6 +1147,10 @@ const FilterTrailingSeparator = class { return filterDataAlloc(args[0]); } + static dnrFromCompiled(args, rule) { + rule.condition.urlFilter = `${rule.condition.urlFilter}^`; + } + static keyFromArgs() { } @@ -1135,6 +1207,17 @@ const FilterRegex = class { return idata; } + static dnrFromCompiled(args, rule) { + if ( rule.condition === undefined ) { + rule.condition = {}; + } + if ( StaticFilteringParser.utils.regex.isRE2(args[1]) === false ) { + dnrAddRuleError(rule, `regexFilter is not RE2-compatible: ${args[1]}`); + } + rule.condition.regexFilter = args[1]; + rule.condition.isUrlFilterCaseSensitive = args[2] === 1; + } + static keyFromArgs(args) { return `${args[1]}\t${args[2]}`; } @@ -1194,6 +1277,20 @@ const FilterNotType = class { return idata; } + static dnrFromCompiled(args, rule) { + rule.condition = rule.condition || {}; + if ( rule.condition.excludedResourceTypes === undefined ) { + rule.condition.excludedResourceTypes = []; + } + let bits = args[1]; + for ( let i = 1; bits !== 0 && i < typeValueToTypeName.length; i++ ) { + const bit = 1 << (i - 1); + if ( (bits & bit) === 0 ) { continue; } + bits &= ~bit; + rule.condition.excludedResourceTypes.push(`${typeValueToTypeName[i]}`); + } + } + static keyFromArgs(args) { return `${args[1]}`; } @@ -1386,6 +1483,14 @@ const FilterOriginHit = class { return idata; } + static dnrFromCompiled(args, rule) { + rule.condition = rule.condition || {}; + if ( rule.condition.initiatorDomains === undefined ) { + rule.condition.initiatorDomains = []; + } + rule.condition.initiatorDomains.push(args[1]); + } + static logData(idata, details) { details.domains.push(this.getDomainOpt(idata)); } @@ -1412,6 +1517,14 @@ const FilterOriginMiss = class extends FilterOriginHit { return [ FilterOriginMiss.fid, hostname ]; } + static dnrFromCompiled(args, rule) { + rule.condition = rule.condition || {}; + if ( rule.condition.excludedInitiatorDomains === undefined ) { + rule.condition.excludedInitiatorDomains = []; + } + rule.condition.excludedInitiatorDomains.push(args[1]); + } + static logData(idata, details) { details.domains.push(`~${this.getDomainOpt(idata)}`); } @@ -1529,6 +1642,14 @@ const FilterOriginHitSet = class { return idata; } + static dnrFromCompiled(args, rule) { + rule.condition = rule.condition || {}; + if ( rule.condition.initiatorDomains === undefined ) { + rule.condition.initiatorDomains = []; + } + rule.condition.initiatorDomains.push(...args[1].split('|')); + } + static toTrie(idata) { if ( filterData[idata+2] === 0 ) { return 0; } const itrie = filterData[idata+4] = @@ -1573,6 +1694,14 @@ const FilterOriginMissSet = class extends FilterOriginHitSet { ]; } + static dnrFromCompiled(args, rule) { + rule.condition = rule.condition || {}; + if ( rule.condition.excludedInitiatorDomains === undefined ) { + rule.condition.excludedInitiatorDomains = []; + } + rule.condition.excludedInitiatorDomains.push(...args[1].split('|')); + } + static keyFromArgs(args) { return args[1]; } @@ -1596,6 +1725,11 @@ const FilterOriginEntityHit = class extends FilterOriginHit { static compile(entity) { return [ FilterOriginEntityHit.fid, entity ]; } + + static dnrFromCompiled(args, rule) { + dnrAddRuleError(rule, `Entity not supported: ${args[1]}`); + super.dnrFromCompiled(args, rule); + } }; registerFilterClass(FilterOriginEntityHit); @@ -1610,6 +1744,11 @@ const FilterOriginEntityMiss = class extends FilterOriginMiss { static compile(entity) { return [ FilterOriginEntityMiss.fid, entity ]; } + + static dnrFromCompiled(args, rule) { + dnrAddRuleError(rule, `Entity not supported: ${args[1]}`); + super.dnrFromCompiled(args, rule); + } }; registerFilterClass(FilterOriginEntityMiss); @@ -1651,6 +1790,12 @@ const FilterModifier = class { return idata; } + static dnrFromCompiled(args, rule) { + rule.__modifierAction = args[1]; + rule.__modifierType = StaticFilteringParser.netOptionTokenNames.get(args[2]); + rule.__modifierValue = args[3]; + } + static keyFromArgs(args) { return `${args[1]}\t${args[2]}\t${args[3]}`; } @@ -1764,6 +1909,12 @@ const FilterCollection = class { return idata; } + static dnrFromCompiled(args, rule) { + for ( const unit of args[1] ) { + dnrRuleFromCompiled(unit, rule); + } + } + static logData(idata, details) { this.forEach(idata, iunit => { filterLogData(iunit, details); @@ -1991,6 +2142,12 @@ const FilterDenyAllow = class { return idata; } + static dnrFromCompiled(args, rule) { + rule.condition = rule.condition || {}; + rule.condition.excludedRequestDomains = rule.condition.excludedRequestDomains || []; + rule.condition.excludedRequestDomains.push(...args[1].split('|')); + } + static keyFromArgs(args) { return args[1]; } @@ -2445,10 +2602,15 @@ const FilterStrictParty = class { static fromCompiled(args) { return filterDataAlloc( args[0], // fid - args[1] // not + args[1] ); } + static dnrFromCompiled(args, rule) { + const partyness = args[1] === 0 ? 1 : 3; + dnrAddRuleError(rule, `Strict partyness not supported: strict${partyness}p`); + } + static keyFromArgs(args) { return `${args[1]}`; } @@ -3230,7 +3392,7 @@ class FilterCompiler { // Mind `\b` directives: `/\bads\b/` should result in token being `ads`, // not `bads`. extractTokenFromRegex(pattern) { - pattern = StaticFilteringParser.regexUtils.toTokenizableStr(pattern); + pattern = StaticFilteringParser.utils.regex.toTokenizableStr(pattern); this.reToken.lastIndex = 0; let bestToken; let bestBadness = 0x7FFFFFFF; @@ -3684,6 +3846,366 @@ FilterContainer.prototype.freeze = function() { /******************************************************************************/ +FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) { + if ( op === 'begin' ) { + return { + good: new Set(), + bad: new Set(), + invalid: new Set(), + }; + } + + if ( op === 'add' ) { + const reader = args[0]; + reader.select('NETWORK_FILTERS:GOOD'); + while ( reader.next() ) { + if ( context.good.has(reader.line) === false ) { + context.good.add(reader.line); + } + } + reader.select('NETWORK_FILTERS:BAD'); + while ( reader.next() ) { + context.bad.add(reader.line); + } + return; + } + + if ( op !== 'end' ) { return; } + + const { good, bad } = context; + const unserialize = CompiledListReader.unserialize; + const buckets = new Map(); + + for ( const line of good ) { + if ( bad.has(line) ) { + continue; + } + + const args = unserialize(line); + const bits = args[0]; + const tokenHash = args[1]; + const fdata = args[2]; + + if ( buckets.has(bits) === false ) { + buckets.set(bits, new Map()); + } + const bucket = buckets.get(bits); + + switch ( tokenHash ) { + case DOT_TOKEN_HASH: { + if ( bucket.has(DOT_TOKEN_HASH) === false ) { + bucket.set(DOT_TOKEN_HASH, [{ + condition: { + requestDomains: [] + } + }]); + } + const rule = bucket.get(DOT_TOKEN_HASH)[0]; + rule.condition.requestDomains.push(fdata); + break; + } + case ANY_TOKEN_HASH: { + if ( bucket.has(ANY_TOKEN_HASH) === false ) { + bucket.set(ANY_TOKEN_HASH, [{ + condition: { + initiatorDomains: [] + } + }]); + } + const rule = bucket.get(ANY_TOKEN_HASH)[0]; + rule.condition.initiatorDomains.push(fdata); + break; + } + case ANY_HTTPS_TOKEN_HASH: { + if ( bucket.has(ANY_HTTPS_TOKEN_HASH) === false ) { + bucket.set(ANY_HTTPS_TOKEN_HASH, [{ + condition: { + urlFilter: '|https://', + initiatorDomains: [] + } + }]); + } + const rule = bucket.get(ANY_HTTPS_TOKEN_HASH)[0]; + rule.condition.initiatorDomains.push(fdata); + break; + } + case ANY_HTTP_TOKEN_HASH: { + if ( bucket.has(ANY_HTTP_TOKEN_HASH) === false ) { + bucket.set(ANY_HTTP_TOKEN_HASH, [{ + condition: { + urlFilter: '|http://', + initiatorDomains: [] + } + }]); + } + const rule = bucket.get(ANY_HTTP_TOKEN_HASH)[0]; + rule.condition.initiatorDomains.push(fdata); + break; + } + default: { + if ( bucket.has(EMPTY_TOKEN_HASH) === false ) { + bucket.set(EMPTY_TOKEN_HASH, []); + } + const rule = {}; + dnrRuleFromCompiled(fdata, rule); + bucket.get(EMPTY_TOKEN_HASH).push(rule); + break; + } + } + } + + const realms = new Map([ + [ BlockAction, 'block' ], + [ AllowAction, 'allow' ], + [ ModifyAction, 'modify' ], + ]); + const partyness = new Map([ + [ AnyParty, '' ], + [ FirstParty, 'firstParty' ], + [ ThirdParty, 'thirdParty' ], + ]); + const types = new Set([ + 'no_type', + 'stylesheet', + 'image', + 'object', + 'script', + 'xmlhttprequest', + 'sub_frame', + 'main_frame', + 'font', + 'media', + 'websocket', + 'ping', + 'other', + ]); + let ruleset = []; + for ( const [ realmBits, realmName ] of realms ) { + for ( const [ partyBits, partyName ] of partyness ) { + for ( const typeName in typeNameToTypeValue ) { + if ( types.has(typeName) === false ) { continue; } + const typeBits = typeNameToTypeValue[typeName]; + const bits = realmBits | partyBits | typeBits; + const bucket = buckets.get(bits); + if ( bucket === undefined ) { continue; } + for ( const rules of bucket.values() ) { + for ( const rule of rules ) { + rule.action = rule.action || {}; + rule.action.type = realmName; + if ( partyName !== '' ) { + rule.condition = rule.condition || {}; + rule.condition.domainType = partyName; + } + if ( typeName !== 'no_type' ) { + rule.condition = rule.condition || {}; + rule.condition.resourceTypes = [ typeName ]; + } + ruleset.push(rule); + } + } + } + } + } + + // Patch modifier filters + for ( const rule of ruleset ) { + if ( rule.__modifierType === undefined ) { continue; } + switch ( rule.__modifierType ) { + case 'csp': + rule.action.type = 'modifyHeaders'; + rule.action.responseHeaders = [{ + header: 'content-security-policy', + operation: 'append', + value: rule.__modifierValue, + }]; + if ( rule.__modifierAction === AllowAction ) { + dnrAddRuleError(rule, 'Unhandled modifier exception'); + } + break; + case 'redirect-rule': { + let token = rule.__modifierValue; + if ( token !== '' ) { + const match = /:\d+$/.exec(token); + if ( match !== null ) { + token = token.slice(0, match.index); + } + } + const resource = context.extensionPaths.get(token); + if ( rule.__modifierValue !== '' && resource === undefined ) { + dnrAddRuleWarning(rule, `Unpatchable redirect filter: ${rule.__modifierValue}`); + } + const extensionPath = resource && resource.extensionPath || token; + if ( rule.__modifierAction !== AllowAction ) { + rule.action.type = 'redirect'; + rule.action.redirect = { extensionPath }; + rule.priority = (rule.priority || 1) + 1; + } else { + rule.action.type = 'block'; + rule.priority = (rule.priority || 1) + 2; + } + break; + } + case 'removeparam': + rule.action.type = 'redirect'; + if ( rule.__modifierValue !== '' ) { + rule.action.redirect = { + transform: { + queryTransform: { + removeParams: [ rule.__modifierValue ] + } + } + }; + if ( /^\/.+\/$/.test(rule.__modifierValue) ) { + dnrAddRuleError(rule, `Unsupported regex-based removeParam: ${rule.__modifierValue}`); + } + } else { + rule.action.redirect = { + transform: { + query: '' + } + }; + } + if ( rule.__modifierAction === AllowAction ) { + dnrAddRuleError(rule, 'Unhandled modifier exception'); + } + break; + default: + break; + } + } + + // Assign rule ids + const rulesetMap = new Map(); + { + let ruleId = 1; + for ( const rule of ruleset ) { + rulesetMap.set(ruleId++, rule); + } + } + + // Merge rules where possible by merging arrays of a specific property. + const mergeRules = (rulesetMap, mergeTarget) => { + const mergeMap = new Map(); + const sorter = (_, v) => { + if ( Array.isArray(v) ) { + return typeof v[0] === 'string' ? v.sort() : v; + } + if ( v instanceof Object ) { + const sorted = {}; + for ( const kk of Object.keys(v).sort() ) { + sorted[kk] = v[kk]; + } + return sorted; + } + return v; + }; + const ruleHasher = (rule, target) => { + return JSON.stringify(rule, (k, v) => { + if ( k.startsWith('_') ) { return; } + if ( k === target ) { return; } + return sorter(k, v); + }); + }; + const extractTargetValue = (obj, target) => { + for ( const [ k, v ] of Object.entries(obj) ) { + if ( Array.isArray(v) && k === target ) { return v; } + if ( v instanceof Object ) { + const r = extractTargetValue(v, target); + if ( r !== undefined ) { return r; } + } + } + }; + const extractTargetOwner = (obj, target) => { + for ( const [ k, v ] of Object.entries(obj) ) { + if ( Array.isArray(v) && k === target ) { return obj; } + if ( v instanceof Object ) { + const r = extractTargetOwner(v, target); + if ( r !== undefined ) { return r; } + } + } + }; + for ( const [ id, rule ] of rulesetMap ) { + const hash = ruleHasher(rule, mergeTarget); + if ( mergeMap.has(hash) === false ) { + mergeMap.set(hash, []); + } + mergeMap.get(hash).push(id); + } + for ( const ids of mergeMap.values() ) { + if ( ids.length === 1 ) { continue; } + const leftHand = rulesetMap.get(ids[0]); + const leftHandSet = new Set( + extractTargetValue(leftHand, mergeTarget) || [] + ); + for ( let i = 1; i < ids.length; i++ ) { + const rightHandId = ids[i]; + const rightHand = rulesetMap.get(rightHandId); + const rightHandArray = extractTargetValue(rightHand, mergeTarget); + if ( rightHandArray !== undefined ) { + if ( leftHandSet.size !== 0 ) { + for ( const item of rightHandArray ) { + leftHandSet.add(item); + } + } + } else { + leftHandSet.clear(); + } + rulesetMap.delete(rightHandId); + } + const leftHandOwner = extractTargetOwner(leftHand, mergeTarget); + if ( leftHandSet.size > 1 ) { + //if ( leftHandOwner === undefined ) { debugger; } + leftHandOwner[mergeTarget] = Array.from(leftHandSet).sort(); + } else if ( leftHandSet.size === 0 ) { + if ( leftHandOwner !== undefined ) { + leftHandOwner[mergeTarget] = undefined; + } + } + } + }; + mergeRules(rulesetMap, 'resourceTypes'); + mergeRules(rulesetMap, 'initiatorDomains'); + mergeRules(rulesetMap, 'removeParams'); + + // Patch case-sensitiveness + for ( const rule of rulesetMap.values() ) { + const { condition } = rule; + if ( + condition === undefined || + condition.urlFilter === undefined && + condition.regexFilter === undefined + ) { + continue; + } + if ( condition.isUrlFilterCaseSensitive === undefined ) { + condition.isUrlFilterCaseSensitive = false; + } else if ( condition.isUrlFilterCaseSensitive === true ) { + condition.isUrlFilterCaseSensitive = undefined; + } + } + + // Patch id + { + let ruleId = 1; + for ( const rule of rulesetMap.values() ) { + if ( rule._error === undefined ) { + rule.id = ruleId++; + } else { + rule.id = 0; + } + } + for ( const invalid of context.invalid ) { + rulesetMap.set(ruleId++, { + _error: [ invalid ], + }); + } + } + + return Array.from(rulesetMap.values()); +}; + +/******************************************************************************/ + FilterContainer.prototype.addFilterUnit = function( bits, tokenHash, @@ -4587,32 +5109,44 @@ FilterContainer.prototype.dump = function() { const out = []; - const toOutput = (depth, line, out) => { + const toOutput = (depth, line) => { out.push(`${' '.repeat(depth*2)}${line}`); }; - // TODO: Also report filters "hidden" behind FilterPlainTrie - const dumpUnit = (idata, out, depth = 0) => { + const dumpUnit = (idata, depth = 0) => { const fc = filterGetClass(idata); fcCounts.set(fc.name, (fcCounts.get(fc.name) || 0) + 1); const info = filterDumpInfo(idata) || ''; - toOutput(depth, info !== '' ? `${fc.name}: ${info}` : fc.name, out); + toOutput(depth, info !== '' ? `${fc.name}: ${info}` : fc.name); switch ( fc ) { case FilterBucket: case FilterCompositeAll: case FilterOriginHitAny: { fc.forEach(idata, i => { - dumpUnit(i, out, depth+1); + dumpUnit(i, depth+1); }); break; } case FilterBucketIfOriginHits: { - dumpUnit(filterData[idata+2], out, depth+1); - dumpUnit(filterData[idata+1], out, depth+1); + dumpUnit(filterData[idata+2], depth+1); + dumpUnit(filterData[idata+1], depth+1); break; } case FilterBucketIfRegexHits: { - dumpUnit(filterData[idata+1], out, depth+1); + dumpUnit(filterData[idata+1], depth+1); + break; + } + case FilterPlainTrie: { + for ( const details of bidiTrie.trieIterator(filterData[idata+1]) ) { + toOutput(depth+1, details.pattern); + let ix = details.iextra; + if ( ix === 1 ) { continue; } + for (;;) { + if ( ix === 0 ) { break; } + dumpUnit(filterData[ix+0], depth+2); + ix = filterData[ix+1]; + } + } break; } default: @@ -4635,9 +5169,9 @@ FilterContainer.prototype.dump = function() { [ ThirdParty, '3rd-party' ], ]); for ( const [ realmBits, realmName ] of realms ) { - toOutput(1, `+ realm: ${realmName}`, out); + toOutput(1, `+ realm: ${realmName}`); for ( const [ partyBits, partyName ] of partyness ) { - toOutput(2, `+ party: ${partyName}`, out); + toOutput(2, `+ party: ${partyName}`); const processedTypeBits = new Set(); for ( const typeName in typeNameToTypeValue ) { const typeBits = typeNameToTypeValue[typeName]; @@ -4647,14 +5181,14 @@ FilterContainer.prototype.dump = function() { const ibucket = this.bitsToBucketIndices[bits]; if ( ibucket === 0 ) { continue; } const thCount = this.buckets[ibucket].size; - toOutput(3, `+ type: ${typeName} (${thCount})`, out); + toOutput(3, `+ type: ${typeName} (${thCount})`); for ( const [ th, iunit ] of this.buckets[ibucket] ) { thCounts.add(th); const ths = thConstants.has(th) ? thConstants.get(th) : `0x${th.toString(16)}`; - toOutput(4, `+ th: ${ths}`, out); - dumpUnit(iunit, out, 5); + toOutput(4, `+ th: ${ths}`); + dumpUnit(iunit, 5); } } } diff --git a/src/js/storage.js b/src/js/storage.js index 45e5b208f..10d8119a5 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -972,9 +972,11 @@ self.addEventListener('hiddenSettingsChanged', ( ) => { // Useful references: // https://adblockplus.org/en/filter-cheatsheet // https://adblockplus.org/en/filters - const lineIter = new LineIterator(this.preparseDirectives.prune(rawText)); const parser = new StaticFilteringParser({ expertMode }); const compiler = staticNetFilteringEngine.createCompiler(parser); + const lineIter = new LineIterator( + parser.utils.preparser.prune(rawText, vAPI.webextFlavor.env) + ); parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH); @@ -1043,121 +1045,6 @@ self.addEventListener('hiddenSettingsChanged', ( ) => { /******************************************************************************/ -// https://github.com/AdguardTeam/AdguardBrowserExtension/issues/917 - -µb.preparseDirectives = { - // This method returns an array of indices, corresponding to position in - // the content string which should alternatively be parsed and discarded. - split: function(content) { - const reIf = /^!#(if|endif)\b([^\n]*)(?:[\n\r]+|$)/gm; - const soup = vAPI.webextFlavor.soup; - const stack = []; - const shouldDiscard = ( ) => stack.some(v => v); - const parts = [ 0 ]; - let discard = false; - - for (;;) { - const match = reIf.exec(content); - if ( match === null ) { break; } - - switch ( match[1] ) { - case 'if': - let expr = match[2].trim(); - const target = expr.charCodeAt(0) === 0x21 /* '!' */; - if ( target ) { expr = expr.slice(1); } - const token = this.tokens.get(expr); - const startDiscard = - token === 'false' && target === false || - token !== undefined && soup.has(token) === target; - if ( discard === false && startDiscard ) { - parts.push(match.index); - discard = true; - } - stack.push(startDiscard); - break; - - case 'endif': - stack.pop(); - const stopDiscard = shouldDiscard() === false; - if ( discard && stopDiscard ) { - parts.push(match.index + match[0].length); - discard = false; - } - break; - - default: - break; - } - } - - parts.push(content.length); - return parts; - }, - - prune: function(content) { - const parts = this.split(content); - const out = []; - for ( let i = 0, n = parts.length - 1; i < n; i += 2 ) { - const beg = parts[i+0]; - const end = parts[i+1]; - out.push(content.slice(beg, end)); - } - return out.join('\n'); - }, - - getHints: function() { - const out = []; - const vals = new Set(); - for ( const [ key, val ] of this.tokens ) { - if ( vals.has(val) ) { continue; } - vals.add(val); - out.push(key); - } - return out; - }, - - getTokens: function() { - const out = new Map(); - const soup = vAPI.webextFlavor.soup; - for ( const [ key, val ] of this.tokens ) { - out.set(key, val !== 'false' && soup.has(val)); - } - return Array.from(out); - }, - - tokens: new Map([ - [ 'ext_ublock', 'ublock' ], - [ 'env_chromium', 'chromium' ], - [ 'env_edge', 'edge' ], - [ 'env_firefox', 'firefox' ], - [ 'env_legacy', 'legacy' ], - [ 'env_mobile', 'mobile' ], - [ 'env_safari', 'safari' ], - [ 'cap_html_filtering', 'html_filtering' ], - [ 'cap_user_stylesheet', 'user_stylesheet' ], - [ 'false', 'false' ], - // Hoping ABP-only list maintainers can at least make use of it to - // help non-ABP content blockers better deal with filters benefiting - // only ABP. - [ 'ext_abp', 'false' ], - // Compatibility with other blockers - // https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#adguard-specific - [ 'adguard', 'adguard' ], - [ 'adguard_app_android', 'false' ], - [ 'adguard_app_ios', 'false' ], - [ 'adguard_app_mac', 'false' ], - [ 'adguard_app_windows', 'false' ], - [ 'adguard_ext_android_cb', 'false' ], - [ 'adguard_ext_chromium', 'chromium' ], - [ 'adguard_ext_edge', 'edge' ], - [ 'adguard_ext_firefox', 'firefox' ], - [ 'adguard_ext_opera', 'chromium' ], - [ 'adguard_ext_safari', 'false' ], - ]), -}; - -/******************************************************************************/ - µb.loadRedirectResources = async function() { try { const success = await redirectEngine.resourcesFromSelfie(io); diff --git a/submodules/uAssets b/submodules/uAssets index 21dca6d15..3cd137904 160000 --- a/submodules/uAssets +++ b/submodules/uAssets @@ -1 +1 @@ -Subproject commit 21dca6d15a83015103eb3ee6e06f7f8cdf96e246 +Subproject commit 3cd137904ffe979f337f8e0099a46ca2d0c41e5f diff --git a/tools/make-mv3.sh b/tools/make-mv3.sh new file mode 100755 index 000000000..c29afebbb --- /dev/null +++ b/tools/make-mv3.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# This script assumes a linux environment + +set -e + +echo "*** uBlock0.mv3: Creating extension" + +DES="dist/build/uBlock0.mv3" +rm -rf $DES +mkdir -p $DES +cd $DES +DES=$(pwd) +cd - > /dev/null +TMPDIR=$(mktemp -d) +mkdir -p $TMPDIR + +echo "*** uBlock0.mv3: Copying mv3-specific files" +cp -R platform/mv3/extension/* $DES/ + +echo "*** uBlock0.mv3: Copying common files" +cp LICENSE.txt $DES/ + +echo "*** uBlock0.mv3: Generating rulesets" +./tools/make-nodejs.sh $TMPDIR +cp platform/mv3/package.json $TMPDIR/ +cp platform/mv3/*.js $TMPDIR/ +cd $TMPDIR +node --no-warnings make-rulesets.js output=$DES +cd - > /dev/null +rm -rf $TMPDIR + +echo "*** uBlock0.mv3: extension ready" +echo "Extension location: $DES/" + +if [ "$1" = all ]; then + echo "*** uBlock0.mv3: Creating webstore package..." + pushd $(dirname $DES/) > /dev/null + zip uBlock0.mv3.zip -qr $(basename $DES/)/* + echo "Package location: $(pwd)/uBlock0.mv3.zip" + popd > /dev/null +fi diff --git a/tools/make-nodejs.sh b/tools/make-nodejs.sh index d0e96cee8..3d534acaf 100755 --- a/tools/make-nodejs.sh +++ b/tools/make-nodejs.sh @@ -13,6 +13,7 @@ cp src/js/dynamic-net-filtering.js $DES/js cp src/js/filtering-context.js $DES/js cp src/js/hnswitches.js $DES/js cp src/js/hntrie.js $DES/js +cp src/js/static-dnr-filtering.js $DES/js cp src/js/static-filtering-parser.js $DES/js cp src/js/static-net-filtering.js $DES/js cp src/js/static-filtering-io.js $DES/js @@ -28,7 +29,7 @@ cp -R src/lib/publicsuffixlist $DES/lib/ # Convert wasm modules into json arrays mkdir -p $DES/js/wasm -cp src/js/wasm/* $DES/js/wasm/ +cp src/js/wasm/* $DES/js/wasm/ node -pe "JSON.stringify(Array.from(fs.readFileSync('src/js/wasm/hntrie.wasm')))" \ > $DES/js/wasm/hntrie.wasm.json node -pe "JSON.stringify(Array.from(fs.readFileSync('src/js/wasm/biditrie.wasm')))" \