1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-03 17:49:39 +02:00
uBlock/platform/mv3/make-rulesets.js
Raymond Hill 232c44eeb2
[mv3] Add scriptlet support; improve reliability of cosmetic filtering
First iteration of adding scriptlet support. As with cosmetic
filtering, scriptlet niijection occurs only on sites for which
uBO Lite was granted extended permissions.

At the moment, only three scriptlets are supported:
- abort-current-script
- json-prune
- set-constant

More will be added in the future.
2022-09-16 15:56:35 -04:00

669 lines
21 KiB
JavaScript

/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2022-present Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
'use strict';
/******************************************************************************/
import fs from 'fs/promises';
import https from 'https';
import path from 'path';
import process from 'process';
import { createHash } from 'crypto';
import { dnrRulesetFromRawLists } from './js/static-dnr-filtering.js';
import { StaticFilteringParser } from './js/static-filtering-parser.js';
/******************************************************************************/
const commandLineArgs = (( ) => {
const args = new Map();
let name, value;
for ( const arg of process.argv.slice(2) ) {
const pos = arg.indexOf('=');
if ( pos === -1 ) {
name = arg;
value = '';
} else {
name = arg.slice(0, pos);
value = arg.slice(pos+1);
}
args.set(name, value);
}
return args;
})();
const outputDir = commandLineArgs.get('output') || '.';
const cacheDir = `${outputDir}/../mv3-data`;
const rulesetDir = `${outputDir}/rulesets`;
const cssDir = `${outputDir}/content-css`;
const scriptletDir = `${outputDir}/content-js`;
const env = [ 'chromium', 'ubol' ];
/******************************************************************************/
const isUnsupported = rule =>
rule._error !== undefined;
const isRegex = rule =>
rule.condition !== undefined &&
rule.condition.regexFilter !== undefined;
const isRedirect = rule =>
rule.action !== undefined &&
rule.action.type === 'redirect' &&
rule.action.redirect.extensionPath !== undefined;
const isCsp = rule =>
rule.action !== undefined &&
rule.action.type === 'modifyHeaders';
const isRemoveparam = rule =>
rule.action !== undefined &&
rule.action.type === 'redirect' &&
rule.action.redirect.transform !== undefined;
const isGood = rule =>
isUnsupported(rule) === false &&
isRedirect(rule) === false &&
isCsp(rule) === false &&
isRemoveparam(rule) === false;
/******************************************************************************/
const stdOutput = [];
const log = (text, silent = false) => {
stdOutput.push(text);
if ( silent === false ) {
console.log(text);
}
};
/******************************************************************************/
const urlToFileName = url => {
return url
.replace(/^https?:\/\//, '')
.replace(/\//g, '_')
;
};
const fetchList = (url, cacheDir) => {
return new Promise((resolve, reject) => {
const fname = urlToFileName(url);
fs.readFile(`${cacheDir}/${fname}`, { encoding: 'utf8' }).then(content => {
log(`\tFetched local ${url}`);
resolve({ url, content });
}).catch(( ) => {
log(`\tFetching remote ${url}`);
https.get(url, response => {
const data = [];
response.on('data', chunk => {
data.push(chunk.toString());
});
response.on('end', ( ) => {
const content = data.join('');
try {
writeFile(`${cacheDir}/${fname}`, content);
} catch (ex) {
}
resolve({ url, content });
});
}).on('error', error => {
reject(error);
});
});
});
};
/******************************************************************************/
const writeFile = async (fname, data) => {
const dir = path.dirname(fname);
await fs.mkdir(dir, { recursive: true });
const promise = fs.writeFile(fname, data);
writeOps.push(promise);
return promise;
};
const writeOps = [];
/******************************************************************************/
const ruleResources = [];
const rulesetDetails = [];
const cssDetails = new Map();
const scriptletDetails = new Map();
/******************************************************************************/
async function fetchAsset(assetDetails) {
// Remember fetched URLs
const fetchedURLs = new Set();
// Fetch list and expand `!#include` directives
let parts = assetDetails.urls.map(url => ({ url }));
while ( parts.every(v => typeof v === 'string') === false ) {
const newParts = [];
for ( const part of parts ) {
if ( typeof part === 'string' ) {
newParts.push(part);
continue;
}
if ( fetchedURLs.has(part.url) ) {
newParts.push('');
continue;
}
fetchedURLs.add(part.url);
newParts.push(
fetchList(part.url, cacheDir).then(details => {
const { url } = details;
const content = details.content.trim();
if ( typeof content === 'string' && content !== '' ) {
if (
content.startsWith('<') === false ||
content.endsWith('>') === false
) {
return { url, content };
}
}
log(`No valid content for ${details.name}`);
return { url, content: '' };
})
);
}
parts = await Promise.all(newParts);
parts = StaticFilteringParser.utils.preparser.expandIncludes(parts, env);
}
const text = parts.join('\n');
if ( text === '' ) {
log('No filterset found');
}
return text;
}
/******************************************************************************/
async function processNetworkFilters(assetDetails, network) {
const replacer = (k, v) => {
if ( k.startsWith('__') ) { return; }
if ( Array.isArray(v) ) {
return v.sort();
}
if ( v instanceof Object ) {
const sorted = {};
for ( const kk of Object.keys(v).sort() ) {
sorted[kk] = v[kk];
}
return sorted;
}
return v;
};
const { ruleset: rules } = network;
log(`Input filter count: ${network.filterCount}`);
log(`\tAccepted filter count: ${network.acceptedFilterCount}`);
log(`\tRejected filter count: ${network.rejectedFilterCount}`);
log(`Output rule count: ${rules.length}`);
const good = rules.filter(rule => isGood(rule) && isRegex(rule) === false);
log(`\tGood: ${good.length}`);
const regexes = rules.filter(rule => isGood(rule) && isRegex(rule));
log(`\tMaybe good (regexes): ${regexes.length}`);
const redirects = rules.filter(rule =>
isUnsupported(rule) === false &&
isRedirect(rule)
);
log(`\tredirect-rule= (discarded): ${redirects.length}`);
const headers = rules.filter(rule =>
isUnsupported(rule) === false &&
isCsp(rule)
);
log(`\tcsp= (discarded): ${headers.length}`);
const removeparams = rules.filter(rule =>
isUnsupported(rule) === false &&
isRemoveparam(rule)
);
log(`\tremoveparams= (discarded): ${removeparams.length}`);
const bad = rules.filter(rule =>
isUnsupported(rule)
);
log(`\tUnsupported: ${bad.length}`);
log(
bad.map(rule => rule._error.map(v => `\t\t${v}`)).join('\n'),
true
);
writeFile(
`${rulesetDir}/${assetDetails.id}.json`,
`${JSON.stringify(good, replacer)}\n`
);
if ( regexes.length !== 0 ) {
writeFile(
`${rulesetDir}/${assetDetails.id}.regexes.json`,
`${JSON.stringify(regexes, replacer)}\n`
);
}
return {
total: rules.length,
accepted: good.length,
discarded: redirects.length + headers.length + removeparams.length,
rejected: bad.length,
regexes: regexes.length,
};
}
/******************************************************************************/
function optimizeExtendedFilters(filters) {
if ( filters === undefined ) { return []; }
const merge = new Map();
for ( const [ selector, details ] of filters ) {
const json = JSON.stringify(details);
let entries = merge.get(json);
if ( entries === undefined ) {
entries = new Set();
merge.set(json, entries);
}
entries.add(selector);
}
const out = [];
for ( const [ json, entries ] of merge ) {
const details = JSON.parse(json);
details.payload = Array.from(entries);
out.push(details);
}
return out;
}
/******************************************************************************/
const style = [
' display:none!important;',
' position:absolute!important;',
' z-index:0!important;',
' visibility:collapse!important;',
].join('\n');
function processCosmeticFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
const optimized = optimizeExtendedFilters(mapin);
const cssEntries = new Map();
for ( const entry of optimized ) {
const selectors = entry.payload.join(',\n');
const fname = createHash('sha256').update(selectors).digest('hex').slice(0,8);
const fpath = `${assetDetails.id}/${fname.slice(0,1)}/${fname.slice(1,8)}`;
writeFile(
`${cssDir}/${fpath}.css`,
`${selectors} {\n${style}\n}\n`
);
cssEntries.set(fname, {
y: entry.matches,
n: entry.excludeMatches,
});
}
log(`CSS entries: ${cssEntries.size}`);
if ( cssEntries.size !== 0 ) {
cssDetails.set(assetDetails.id, Array.from(cssEntries));
}
return cssEntries.size;
}
/******************************************************************************/
async function processScriptletFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
const originalScriptletMap = new Map();
const dealiasingMap = new Map();
const parseArguments = (raw) => {
const out = [];
let s = raw;
let len = s.length;
let beg = 0, pos = 0;
let i = 1;
while ( beg < len ) {
pos = s.indexOf(',', pos);
// Escaped comma? If so, skip.
if ( pos > 0 && s.charCodeAt(pos - 1) === 0x5C /* '\\' */ ) {
s = s.slice(0, pos - 1) + s.slice(pos);
len -= 1;
continue;
}
if ( pos === -1 ) { pos = len; }
out.push(s.slice(beg, pos).trim());
beg = pos = pos + 1;
i++;
}
return out;
};
const parseFilter = (raw) => {
const filter = raw.slice(4, -1);
const end = filter.length;
let pos = filter.indexOf(',');
if ( pos === -1 ) { pos = end; }
const parts = filter.trim().split(',').map(s => s.trim());
const token = dealiasingMap.get(parts[0]) || '';
if ( token !== '' && originalScriptletMap.has(token) ) {
return {
token,
args: parseArguments(parts.slice(1).join(',').trim()),
};
}
};
const patchScriptlet = (filter) => {
return originalScriptletMap.get(filter.token).replace(
/^self\.\$args\$$/m,
`...${JSON.stringify(filter.args, null, 4)}`
);
};
// Load all available scriptlets into a key-val map, where the key is the
// scriptlet token, and val is the whole content of the file.
const files = await fs.readdir('./scriptlets');
const reScriptletNameOrAlias = /^\/\/\/\s+(?:name|alias)\s+(\S+)/gm;
for ( const file of files ) {
const text = await fs.readFile(
`./scriptlets/${file}`,
{ encoding: 'utf8' }
);
const aliasSet = new Set();
for (;;) {
const match = reScriptletNameOrAlias.exec(text);
if ( match === null ) { break; }
aliasSet.add(match[1]);
}
if ( aliasSet.size === 0 ) { continue; }
const aliases = Array.from(aliasSet);
originalScriptletMap.set(aliases[0], text);
for ( let i = 0; i < aliases.length; i++ ) {
dealiasingMap.set(aliases[i], aliases[0]);
}
}
// Merge entries after dealiasing and expanding arguments
const normalizedMap = new Map();
for ( const [ rawFilter, toAdd ] of mapin ) {
const normalized = parseFilter(rawFilter);
if ( normalized === undefined ) { continue; }
const key = JSON.stringify(normalized);
const toMerge = normalizedMap.get(key);
if ( toMerge === undefined ) {
normalizedMap.set(key, toAdd);
continue;
}
const matches = new Set(toMerge.matches || []);
const excludeMatches = new Set(toMerge.excludeMatches || []);
if ( toAdd.matches && toAdd.matches.size !== 0 ) {
toAdd.matches.forEach(hn => {
matches.add(hn);
});
}
if ( toAdd.excludeMatches && toAdd.excludeMatches.size !== 0 ) {
toAdd.excludeMatches.forEach(hn => {
excludeMatches.add(hn);
});
}
if ( matches.size !== 0 ) {
toMerge.matches = matches.has('*')
? [ '*' ]
: Array.from(matches);
}
if ( excludeMatches.size !== 0 ) {
toMerge.excludeMatches = excludeMatches.has('*')
? [ '*' ]
: Array.from(excludeMatches);
}
}
// Combine injected resources for same matches/excludeMatches instances
//const optimized = optimizeExtendedFilters(normalizedMap);
// Generate distinct scriptlets according to patched scriptlets
const scriptletEntries = new Map();
for ( const [ json, entry ] of normalizedMap ) {
const fname = createHash('sha256').update(json).digest('hex').slice(0,8);
const scriptlet = patchScriptlet(JSON.parse(json));
const fpath = `${assetDetails.id}/${fname.slice(0,1)}/${fname.slice(1,8)}`;
writeFile(`${scriptletDir}/${fpath}.js`, scriptlet);
scriptletEntries.set(fname, {
y: entry.matches,
n: entry.excludeMatches,
});
}
log(`Scriptlet entries: ${scriptletEntries.size}`);
if ( scriptletEntries.size !== 0 ) {
scriptletDetails.set(assetDetails.id, Array.from(scriptletEntries));
}
return scriptletEntries.size;
}
/******************************************************************************/
async function main() {
// Get manifest content
const manifest = await fs.readFile(
`${outputDir}/manifest.json`,
{ encoding: 'utf8' }
).then(text =>
JSON.parse(text)
);
// Create unique version number according to build time
let version = manifest.version;
{
const now = new Date();
const yearPart = now.getUTCFullYear() - 2000;
const monthPart = (now.getUTCMonth() + 1) * 1000;
const dayPart = now.getUTCDate() * 10;
const hourPart = Math.floor(now.getUTCHours() / 3) + 1;
version += `.${yearPart}.${monthPart + dayPart + hourPart}`;
}
log(`Version: ${version}`);
const rulesetFromURLS = async function(assetDetails) {
log('============================');
log(`Listset for '${assetDetails.id}':`);
const text = await fetchAsset(assetDetails);
const results = await dnrRulesetFromRawLists(
[ { name: assetDetails.id, text } ],
{ env }
);
const netStats = await processNetworkFilters(
assetDetails,
results.network
);
const cosmeticStats = await processCosmeticFilters(
assetDetails,
results.cosmetic
);
const scriptletStats = await processScriptletFilters(
assetDetails,
results.scriptlet
);
rulesetDetails.push({
id: assetDetails.id,
name: assetDetails.name,
enabled: assetDetails.enabled,
lang: assetDetails.lang,
homeURL: assetDetails.homeURL,
filters: {
total: results.network.filterCount,
accepted: results.network.acceptedFilterCount,
rejected: results.network.rejectedFilterCount,
},
rules: {
total: netStats.total,
accepted: netStats.accepted,
discarded: netStats.discarded,
rejected: netStats.rejected,
regexes: netStats.regexes,
},
css: {
specific: cosmeticStats,
},
scriptlets: {
total: scriptletStats,
},
});
ruleResources.push({
id: assetDetails.id,
enabled: assetDetails.enabled,
path: `/rulesets/${assetDetails.id}.json`
});
};
// Get assets.json content
const assets = await fs.readFile(
`./assets.json`,
{ encoding: 'utf8' }
).then(text =>
JSON.parse(text)
);
// Assemble all default lists as the default ruleset
const contentURLs = [];
for ( const asset of Object.values(assets) ) {
if ( asset.content !== 'filters' ) { continue; }
if ( asset.off === true ) { continue; }
const contentURL = Array.isArray(asset.contentURL)
? asset.contentURL[0]
: asset.contentURL;
contentURLs.push(contentURL);
}
await rulesetFromURLS({
id: 'default',
name: 'Ads, trackers, miners, and more' ,
enabled: true,
urls: contentURLs,
homeURL: 'https://github.com/uBlockOrigin/uAssets',
});
// Regional rulesets
for ( const [ id, asset ] of Object.entries(assets) ) {
if ( asset.content !== 'filters' ) { continue; }
if ( asset.off !== true ) { continue; }
if ( typeof asset.lang !== 'string' ) { continue; }
const contentURL = Array.isArray(asset.contentURL)
? asset.contentURL[0]
: asset.contentURL;
await rulesetFromURLS({
id: id.toLowerCase(),
lang: asset.lang,
name: asset.title,
enabled: false,
urls: [ contentURL ],
homeURL: asset.supportURL,
});
}
// Handpicked rulesets from assets.json
const handpicked = [ 'block-lan', 'dpollock-0' ];
for ( const id of handpicked ) {
const asset = assets[id];
if ( asset.content !== 'filters' ) { continue; }
const contentURL = Array.isArray(asset.contentURL)
? asset.contentURL[0]
: asset.contentURL;
await rulesetFromURLS({
id: id.toLowerCase(),
name: asset.title,
enabled: false,
urls: [ contentURL ],
homeURL: asset.supportURL,
});
}
// Handpicked rulesets from abroad
await rulesetFromURLS({
id: 'stevenblack-hosts',
name: 'Steven Black\'s hosts file',
enabled: false,
urls: [ 'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts' ],
homeURL: 'https://github.com/StevenBlack/hosts#readme',
});
writeFile(
`${rulesetDir}/ruleset-details.json`,
`${JSON.stringify(rulesetDetails, null, 1)}\n`
);
writeFile(
`${cssDir}/css-specific.json`,
`${JSON.stringify(Array.from(cssDetails))}\n`
);
writeFile(
`${scriptletDir}/scriptlet-details.json`,
`${JSON.stringify(Array.from(scriptletDetails))}\n`
);
await Promise.all(writeOps);
// Patch manifest
manifest.declarative_net_request = { rule_resources: ruleResources };
const now = new Date();
const yearPart = now.getUTCFullYear() - 2000;
const monthPart = (now.getUTCMonth() + 1) * 1000;
const dayPart = now.getUTCDate() * 10;
const hourPart = Math.floor(now.getUTCHours() / 3) + 1;
manifest.version = manifest.version + `.${yearPart}.${monthPart + dayPart + hourPart}`;
await fs.writeFile(
`${outputDir}/manifest.json`,
JSON.stringify(manifest, null, 2) + '\n'
);
// Log results
await fs.writeFile(`${outputDir}/log.txt`, stdOutput.join('\n') + '\n');
}
main();
/******************************************************************************/