mirror of https://github.com/gorhill/uBlock.git synced 2024-10-06 09:37:12 +02:00

671 lines
21 KiB

uBlock Origin - a browser extension to block requests.
Copyright (C) 2022-present Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
'use strict';
import fs from 'fs/promises';
import https from 'https';
import path from 'path';
import process from 'process';
import { createHash } from 'crypto';
import { dnrRulesetFromRawLists } from './js/static-dnr-filtering.js';
import { StaticFilteringParser } from './js/static-filtering-parser.js';
const commandLineArgs = (( ) => {
const args = new Map();
let name, value;
for ( const arg of process.argv.slice(2) ) {
const pos = arg.indexOf('=');
if ( pos === -1 ) {
name = arg;
value = '';
} else {
name = arg.slice(0, pos);
value = arg.slice(pos+1);
args.set(name, value);
return args;
const outputDir = commandLineArgs.get('output') || '.';
const cacheDir = `${outputDir}/../mv3-data`;
const rulesetDir = `${outputDir}/rulesets`;
const cssDir = `${outputDir}/content-css`;
const scriptletDir = `${outputDir}/content-js`;
const env = [ 'chromium', 'ubol' ];
const isUnsupported = rule =>
rule._error !== undefined;
const isRegex = rule =>
rule.condition !== undefined &&
rule.condition.regexFilter !== undefined;
const isRedirect = rule =>
rule.action !== undefined &&
rule.action.type === 'redirect' &&
rule.action.redirect.extensionPath !== undefined;
const isCsp = rule =>
rule.action !== undefined &&
rule.action.type === 'modifyHeaders';
const isRemoveparam = rule =>
rule.action !== undefined &&
rule.action.type === 'redirect' &&
rule.action.redirect.transform !== undefined;
const isGood = rule =>
isUnsupported(rule) === false &&
isRedirect(rule) === false &&
isCsp(rule) === false &&
isRemoveparam(rule) === false;
const stdOutput = [];
const log = (text, silent = false) => {
if ( silent === false ) {
const urlToFileName = url => {
return url
.replace(/^https?:\/\//, '')
.replace(/\//g, '_')
const fetchList = (url, cacheDir) => {
return new Promise((resolve, reject) => {
const fname = urlToFileName(url);
fs.readFile(`${cacheDir}/${fname}`, { encoding: 'utf8' }).then(content => {
log(`\tFetched local ${url}`);
resolve({ url, content });
}).catch(( ) => {
log(`\tFetching remote ${url}`);
https.get(url, response => {
const data = [];
response.on('data', chunk => {
response.on('end', ( ) => {
const content = data.join('');
try {
writeFile(`${cacheDir}/${fname}`, content);
} catch (ex) {
resolve({ url, content });
}).on('error', error => {
const writeFile = async (fname, data) => {
const dir = path.dirname(fname);
await fs.mkdir(dir, { recursive: true });
const promise = fs.writeFile(fname, data);
return promise;
const writeOps = [];
const ruleResources = [];
const rulesetDetails = [];
const cssDetails = new Map();
const scriptletDetails = new Map();
async function fetchAsset(assetDetails) {
// Remember fetched URLs
const fetchedURLs = new Set();
// Fetch list and expand `!#include` directives
let parts = assetDetails.urls.map(url => ({ url }));
while ( parts.every(v => typeof v === 'string') === false ) {
const newParts = [];
for ( const part of parts ) {
if ( typeof part === 'string' ) {
if ( fetchedURLs.has(part.url) ) {
fetchList(part.url, cacheDir).then(details => {
const { url } = details;
const content = details.content.trim();
if ( typeof content === 'string' && content !== '' ) {
if (
content.startsWith('<') === false ||
content.endsWith('>') === false
) {
return { url, content };
log(`No valid content for ${details.name}`);
return { url, content: '' };
parts = await Promise.all(newParts);
parts = StaticFilteringParser.utils.preparser.expandIncludes(parts, env);
const text = parts.join('\n');
if ( text === '' ) {
log('No filterset found');
return text;
async function processNetworkFilters(assetDetails, network) {
const replacer = (k, v) => {
if ( k.startsWith('__') ) { return; }
if ( Array.isArray(v) ) {
return v.sort();
if ( v instanceof Object ) {
const sorted = {};
for ( const kk of Object.keys(v).sort() ) {
sorted[kk] = v[kk];
return sorted;
return v;
const { ruleset: rules } = network;
log(`Input filter count: ${network.filterCount}`);
log(`\tAccepted filter count: ${network.acceptedFilterCount}`);
log(`\tRejected filter count: ${network.rejectedFilterCount}`);
log(`Output rule count: ${rules.length}`);
const good = rules.filter(rule => isGood(rule) && isRegex(rule) === false);
log(`\tGood: ${good.length}`);
const regexes = rules.filter(rule => isGood(rule) && isRegex(rule));
log(`\tMaybe good (regexes): ${regexes.length}`);
const redirects = rules.filter(rule =>
isUnsupported(rule) === false &&
log(`\tredirect-rule= (discarded): ${redirects.length}`);
const headers = rules.filter(rule =>
isUnsupported(rule) === false &&
log(`\tcsp= (discarded): ${headers.length}`);
const removeparams = rules.filter(rule =>
isUnsupported(rule) === false &&
log(`\tremoveparams= (discarded): ${removeparams.length}`);
const bad = rules.filter(rule =>
log(`\tUnsupported: ${bad.length}`);
bad.map(rule => rule._error.map(v => `\t\t${v}`)).join('\n'),
`${JSON.stringify(good, replacer)}\n`
if ( regexes.length !== 0 ) {
`${JSON.stringify(regexes, replacer)}\n`
return {
total: rules.length,
accepted: good.length,
discarded: redirects.length + headers.length + removeparams.length,
rejected: bad.length,
regexes: regexes.length,
function optimizeExtendedFilters(filters) {
if ( filters === undefined ) { return []; }
const merge = new Map();
for ( const [ selector, details ] of filters ) {
const json = JSON.stringify(details);
let entries = merge.get(json);
if ( entries === undefined ) {
entries = new Set();
merge.set(json, entries);
const out = [];
for ( const [ json, entries ] of merge ) {
const details = JSON.parse(json);
details.payload = Array.from(entries);
return out;
const style = [
' display:none!important;',
' position:absolute!important;',
' z-index:0!important;',
' visibility:collapse!important;',
function processCosmeticFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
const optimized = optimizeExtendedFilters(mapin);
const cssEntries = new Map();
for ( const entry of optimized ) {
const selectors = entry.payload.join(',\n');
const fname = createHash('sha256').update(selectors).digest('hex').slice(0,8);
const fpath = `${assetDetails.id}/${fname.slice(0,1)}/${fname.slice(1,8)}`;
`${selectors} {\n${style}\n}\n`
cssEntries.set(fname, {
y: entry.matches,
n: entry.excludeMatches,
log(`CSS entries: ${cssEntries.size}`);
if ( cssEntries.size !== 0 ) {
cssDetails.set(assetDetails.id, Array.from(cssEntries));
return cssEntries.size;
async function processScriptletFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
const originalScriptletMap = new Map();
const dealiasingMap = new Map();
const parseArguments = (raw) => {
const out = [];
let s = raw;
let len = s.length;
let beg = 0, pos = 0;
let i = 1;
while ( beg < len ) {
pos = s.indexOf(',', pos);
// Escaped comma? If so, skip.
if ( pos > 0 && s.charCodeAt(pos - 1) === 0x5C /* '\\' */ ) {
s = s.slice(0, pos - 1) + s.slice(pos);
len -= 1;
if ( pos === -1 ) { pos = len; }
out.push(s.slice(beg, pos).trim());
beg = pos = pos + 1;
return out;
const parseFilter = (raw) => {
const filter = raw.slice(4, -1);
const end = filter.length;
let pos = filter.indexOf(',');
if ( pos === -1 ) { pos = end; }
const parts = filter.trim().split(',').map(s => s.trim());
const token = dealiasingMap.get(parts[0]) || '';
if ( token !== '' && originalScriptletMap.has(token) ) {
return {
args: parseArguments(parts.slice(1).join(',').trim()),
const patchScriptlet = (filter) => {
return originalScriptletMap.get(filter.token).replace(
`...${JSON.stringify(filter.args, null, 4)}`
// Load all available scriptlets into a key-val map, where the key is the
// scriptlet token, and val is the whole content of the file.
const files = await fs.readdir('./scriptlets');
const reScriptletNameOrAlias = /^\/\/\/\s+(?:name|alias)\s+(\S+)/gm;
for ( const file of files ) {
const text = await fs.readFile(
{ encoding: 'utf8' }
const aliasSet = new Set();
for (;;) {
const match = reScriptletNameOrAlias.exec(text);
if ( match === null ) { break; }
if ( aliasSet.size === 0 ) { continue; }
const aliases = Array.from(aliasSet);
originalScriptletMap.set(aliases[0], text);
for ( let i = 0; i < aliases.length; i++ ) {
dealiasingMap.set(aliases[i], aliases[0]);
// Merge entries after dealiasing and expanding arguments
const normalizedMap = new Map();
for ( const [ rawFilter, toAdd ] of mapin ) {
const normalized = parseFilter(rawFilter);
if ( normalized === undefined ) { continue; }
const key = JSON.stringify(normalized);
const toMerge = normalizedMap.get(key);
if ( toMerge === undefined ) {
normalizedMap.set(key, toAdd);
const matches = new Set(toMerge.matches || []);
const excludeMatches = new Set(toMerge.excludeMatches || []);
if ( toAdd.matches && toAdd.matches.size !== 0 ) {
toAdd.matches.forEach(hn => {
if ( toAdd.excludeMatches && toAdd.excludeMatches.size !== 0 ) {
toAdd.excludeMatches.forEach(hn => {
if ( matches.size !== 0 ) {
toMerge.matches = matches.has('*')
? [ '*' ]
: Array.from(matches);
if ( excludeMatches.size !== 0 ) {
toMerge.excludeMatches = excludeMatches.has('*')
? [ '*' ]
: Array.from(excludeMatches);
// Combine injected resources for same matches/excludeMatches instances
//const optimized = optimizeExtendedFilters(normalizedMap);
// Generate distinct scriptlets according to patched scriptlets
const scriptletEntries = new Map();
for ( const [ json, entry ] of normalizedMap ) {
const fname = createHash('sha256').update(json).digest('hex').slice(0,8);
const scriptlet = patchScriptlet(JSON.parse(json));
const fpath = `${assetDetails.id}/${fname.slice(0,1)}/${fname.slice(1,8)}`;
writeFile(`${scriptletDir}/${fpath}.js`, scriptlet);
scriptletEntries.set(fname, {
y: entry.matches,
n: entry.excludeMatches,
log(`Scriptlet entries: ${scriptletEntries.size}`);
if ( scriptletEntries.size !== 0 ) {
scriptletDetails.set(assetDetails.id, Array.from(scriptletEntries));
return scriptletEntries.size;
async function main() {
// Get manifest content
const manifest = await fs.readFile(
{ encoding: 'utf8' }
).then(text =>
// Create unique version number according to build time
let version = manifest.version;
const now = new Date();
const yearPart = now.getUTCFullYear() - 2000;
const monthPart = (now.getUTCMonth() + 1) * 1000;
const dayPart = now.getUTCDate() * 10;
const hourPart = Math.floor(now.getUTCHours() / 3) + 1;
version += `.${yearPart}.${monthPart + dayPart + hourPart}`;
log(`Version: ${version}`);
const rulesetFromURLS = async function(assetDetails) {
log(`Listset for '${assetDetails.id}':`);
const text = await fetchAsset(assetDetails);
const results = await dnrRulesetFromRawLists(
[ { name: assetDetails.id, text } ],
{ env }
const netStats = await processNetworkFilters(
const cosmeticStats = await processCosmeticFilters(
const scriptletStats = await processScriptletFilters(
id: assetDetails.id,
name: assetDetails.name,
enabled: assetDetails.enabled,
lang: assetDetails.lang,
homeURL: assetDetails.homeURL,
filters: {
total: results.network.filterCount,
accepted: results.network.acceptedFilterCount,
rejected: results.network.rejectedFilterCount,
rules: {
total: netStats.total,
accepted: netStats.accepted,
discarded: netStats.discarded,
rejected: netStats.rejected,
regexes: netStats.regexes,
css: {
specific: cosmeticStats,
scriptlets: {
total: scriptletStats,
id: assetDetails.id,
enabled: assetDetails.enabled,
path: `/rulesets/${assetDetails.id}.json`
// Get assets.json content
const assets = await fs.readFile(
{ encoding: 'utf8' }
).then(text =>
// Assemble all default lists as the default ruleset
const contentURLs = [
await rulesetFromURLS({
id: 'default',
name: 'Ads, trackers, miners, and more' ,
enabled: true,
urls: contentURLs,
homeURL: 'https://github.com/uBlockOrigin/uAssets',
// Regional rulesets
for ( const [ id, asset ] of Object.entries(assets) ) {
if ( asset.content !== 'filters' ) { continue; }
if ( asset.off !== true ) { continue; }
if ( typeof asset.lang !== 'string' ) { continue; }
const contentURL = Array.isArray(asset.contentURL)
? asset.contentURL[0]
: asset.contentURL;
await rulesetFromURLS({
id: id.toLowerCase(),
lang: asset.lang,
name: asset.title,
enabled: false,
urls: [ contentURL ],
homeURL: asset.supportURL,
// Handpicked rulesets from assets.json
const handpicked = [ 'block-lan', 'dpollock-0' ];
for ( const id of handpicked ) {
const asset = assets[id];
if ( asset.content !== 'filters' ) { continue; }
const contentURL = Array.isArray(asset.contentURL)
? asset.contentURL[0]
: asset.contentURL;
await rulesetFromURLS({
id: id.toLowerCase(),
name: asset.title,
enabled: false,
urls: [ contentURL ],
homeURL: asset.supportURL,
// Handpicked rulesets from abroad
await rulesetFromURLS({
id: 'stevenblack-hosts',
name: 'Steven Black\'s hosts file',
enabled: false,
urls: [ 'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts' ],
homeURL: 'https://github.com/StevenBlack/hosts#readme',
`${JSON.stringify(rulesetDetails, null, 1)}\n`
await Promise.all(writeOps);
// Patch manifest
manifest.declarative_net_request = { rule_resources: ruleResources };
const now = new Date();
const yearPart = now.getUTCFullYear() - 2000;
const monthPart = (now.getUTCMonth() + 1) * 1000;
const dayPart = now.getUTCDate() * 10;
const hourPart = Math.floor(now.getUTCHours() / 3) + 1;
manifest.version = manifest.version + `.${yearPart}.${monthPart + dayPart + hourPart}`;
await fs.writeFile(
JSON.stringify(manifest, null, 2) + '\n'
// Log results
await fs.writeFile(`${outputDir}/log.txt`, stdOutput.join('\n') + '\n');