1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-11-10 12:52:46 +01:00

Add support for regex-based values for domain=/from=/to= options

Related discussion:
- https://github.com/uBlockOrigin/uBlock-issues/discussions/2234

Example of usage:

    @@*$ghide,domain=/img[a-z]{3,5}\.buzz/

Regex-based domain values can be negated just like plain or
entity-based values:

    *$domain=~/regex.../

This new syntax does not apply to static extended filters.
This commit is contained in:
Raymond Hill 2023-01-30 17:00:26 -05:00
parent d88ec51b63
commit b1de8d3fe4
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 158 additions and 28 deletions

View File

@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 54, // Increase when compiled format changes compiledMagic: 55, // Increase when compiled format changes
selfieMagic: 54, // Increase when selfie format changes selfieMagic: 55, // Increase when selfie format changes
}, },
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -1620,11 +1620,11 @@ export class AstFilterParser {
); );
switch ( nodeOptionType ) { switch ( nodeOptionType ) {
case NODE_TYPE_NET_OPTION_NAME_DENYALLOW: case NODE_TYPE_NET_OPTION_NAME_DENYALLOW:
this.linkDown(next, this.parseDomainList(next, '|'), 0b0000); this.linkDown(next, this.parseDomainList(next, '|'), 0b00000);
break; break;
case NODE_TYPE_NET_OPTION_NAME_FROM: case NODE_TYPE_NET_OPTION_NAME_FROM:
case NODE_TYPE_NET_OPTION_NAME_TO: case NODE_TYPE_NET_OPTION_NAME_TO:
this.linkDown(next, this.parseDomainList(next, '|', 0b1010)); this.linkDown(next, this.parseDomainList(next, '|', 0b11010));
break; break;
default: default:
break; break;
@ -1642,7 +1642,7 @@ export class AstFilterParser {
return this.getNodeTransform(valueNode); return this.getNodeTransform(valueNode);
} }
parseDomainList(parent, separator, mode = 0b0000) { parseDomainList(parent, separator, mode = 0b00000) {
const parentBeg = this.nodes[parent+NODE_BEG_INDEX]; const parentBeg = this.nodes[parent+NODE_BEG_INDEX];
const parentEnd = this.nodes[parent+NODE_END_INDEX]; const parentEnd = this.nodes[parent+NODE_END_INDEX];
const containerNode = this.allocTypedNode( const containerNode = this.allocTypedNode(
@ -1668,9 +1668,7 @@ export class AstFilterParser {
end = s.indexOf(separator, beg); end = s.indexOf(separator, beg);
} else { } else {
end = s.indexOf('/', beg+1); end = s.indexOf('/', beg+1);
end = end !== -1 end = s.indexOf(separator, end !== -1 ? end+1 : beg);
? s.indexOf(separator, end+1)
: s.indexOf(separator, beg);
} }
if ( end === -1 ) { end = listEnd; } if ( end === -1 ) { end = listEnd; }
if ( end !== beg ) { if ( end !== beg ) {
@ -1683,8 +1681,9 @@ export class AstFilterParser {
prev = this.linkRight(prev, domainNode); prev = this.linkRight(prev, domainNode);
} else { } else {
domainNode = 0; domainNode = 0;
if ( this.interactive && separatorNode !== 0 ) { if ( separatorNode !== 0 ) {
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR); this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
} }
} }
if ( s.charCodeAt(end) === separatorCode ) { if ( s.charCodeAt(end) === separatorCode ) {
@ -1696,14 +1695,20 @@ export class AstFilterParser {
parentBeg + end parentBeg + end
); );
prev = this.linkRight(prev, separatorNode); prev = this.linkRight(prev, separatorNode);
if ( this.interactive && domainNode === 0 ) { if ( domainNode === 0 ) {
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR); this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
} }
} else { } else {
separatorNode = 0; separatorNode = 0;
} }
beg = end; beg = end;
} }
// Dangling separator node
if ( separatorNode !== 0 ) {
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
}
this.linkDown(containerNode, this.throwHeadNode(listNode)); this.linkDown(containerNode, this.throwHeadNode(listNode));
return containerNode; return containerNode;
} }
@ -1724,12 +1729,13 @@ export class AstFilterParser {
} }
if ( beg !== parentEnd ) { if ( beg !== parentEnd ) {
next = this.allocTypedNode(NODE_TYPE_OPTION_VALUE_DOMAIN, beg, parentEnd); next = this.allocTypedNode(NODE_TYPE_OPTION_VALUE_DOMAIN, beg, parentEnd);
const hn = this.normalizeHostnameValue(this.getNodeString(next), mode); const hn = this.normalizeDomainValue(this.getNodeString(next), mode);
if ( hn !== undefined ) { if ( hn !== undefined ) {
if ( hn !== '' ) { if ( hn !== '' ) {
this.setNodeTransform(next, hn); this.setNodeTransform(next, hn);
} else { } else {
this.addNodeFlags(parent, NODE_FLAG_ERROR); this.addNodeFlags(parent, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
} }
} }
if ( head === 0 ) { if ( head === 0 ) {
@ -1737,10 +1743,32 @@ export class AstFilterParser {
} else { } else {
this.linkRight(head, next); this.linkRight(head, next);
} }
} else {
this.addNodeFlags(parent, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
} }
return head; return head;
} }
// mode bits:
// 0b00001: can use wildcard at any position
// 0b00010: can use entity-based hostnames
// 0b00100: can use single wildcard
// 0b01000: can be negated
// 0b10000: can be a regex
normalizeDomainValue(s, modeBits) {
if ( (modeBits & 0b10000) === 0 ||
s.length <= 2 ||
s.charCodeAt(0) !== 0x2F /* / */ ||
exCharCodeAt(s, -1) !== 0x2F /* / */
) {
return this.normalizeHostnameValue(s, modeBits);
}
const source = this.normalizeRegexPattern(s);
if ( source === '' ) { return ''; }
return `/${source}/`;
}
parseExt(parent, anchorBeg, anchorLen) { parseExt(parent, anchorBeg, anchorLen) {
const parentBeg = this.nodes[parent+NODE_BEG_INDEX]; const parentBeg = this.nodes[parent+NODE_BEG_INDEX];
const parentEnd = this.nodes[parent+NODE_END_INDEX]; const parentEnd = this.nodes[parent+NODE_END_INDEX];
@ -1756,7 +1784,7 @@ export class AstFilterParser {
); );
this.addFlags(AST_FLAG_HAS_OPTIONS); this.addFlags(AST_FLAG_HAS_OPTIONS);
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next); this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
this.linkDown(next, this.parseDomainList(next, ',', 0b1110)); this.linkDown(next, this.parseDomainList(next, ',', 0b01110));
prev = this.linkRight(prev, next); prev = this.linkRight(prev, next);
} }
next = this.allocTypedNode( next = this.allocTypedNode(
@ -2276,7 +2304,6 @@ export class AstFilterParser {
// 0b00010: can use entity-based hostnames // 0b00010: can use entity-based hostnames
// 0b00100: can use single wildcard // 0b00100: can use single wildcard
// 0b01000: can be negated // 0b01000: can be negated
// 0b10000: can be a regex
// //
// returns: // returns:
// undefined: no normalization needed, use original hostname // undefined: no normalization needed, use original hostname

View File

@ -1492,21 +1492,22 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
const hostnameMisses = []; const hostnameMisses = [];
const entityHits = []; const entityHits = [];
const entityMisses = []; const entityMisses = [];
const regexHits = [];
const regexMisses = [];
for ( const s of iterable ) { for ( const s of iterable ) {
const len = s.length; const len = s.length;
const beg = len > 1 && s.charCodeAt(0) === 0x7E /* '~' */ ? 1 : 0; const beg = len > 1 && s.charCodeAt(0) === 0x7E /* '~' */ ? 1 : 0;
if ( len <= beg ) { continue; } if ( len <= beg ) { continue; }
if ( s.endsWith('.*') === false ) { if ( s.charCodeAt(beg) === 0x2F /* / */ ) {
if ( beg === 0 ) { if ( beg === 0 ) { regexHits.push(s); continue; }
hostnameHits.push(s); regexMisses.push(s); continue;
} else {
hostnameMisses.push(s.slice(1));
}
} else if ( beg === 0 ) {
entityHits.push(s);
} else {
entityMisses.push(s.slice(1));
} }
if ( s.endsWith('.*') === false ) {
if ( beg === 0 ) { hostnameHits.push(s); continue; }
hostnameMisses.push(s.slice(1)); continue;
}
if ( beg === 0 ) { entityHits.push(s); continue; }
entityMisses.push(s.slice(1)); continue;
} }
const toTrie = []; const toTrie = [];
let trieWhich = 0b00; let trieWhich = 0b00;
@ -1532,6 +1533,9 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
for ( const hn of entityHits ) { for ( const hn of entityHits ) {
compiledHit.push(ctors[1].compile(hn)); compiledHit.push(ctors[1].compile(hn));
} }
for ( const hn of regexHits ) {
compiledHit.push(ctors[3].compile(hn));
}
if ( compiledHit.length > 1 ) { if ( compiledHit.length > 1 ) {
compiledHit[0] = FilterDomainHitAny.compile(compiledHit.slice()); compiledHit[0] = FilterDomainHitAny.compile(compiledHit.slice());
compiledHit.length = 1; compiledHit.length = 1;
@ -1550,14 +1554,17 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
const compiledMiss = []; const compiledMiss = [];
if ( toTrie.length !== 0 ) { if ( toTrie.length !== 0 ) {
compiledMiss.push( compiledMiss.push(
ctors[5].compile(toTrie.sort(), trieWhich) ctors[6].compile(toTrie.sort(), trieWhich)
); );
} }
for ( const hn of hostnameMisses ) { for ( const hn of hostnameMisses ) {
compiledMiss.push(ctors[3].compile(hn)); compiledMiss.push(ctors[4].compile(hn));
} }
for ( const hn of entityMisses ) { for ( const hn of entityMisses ) {
compiledMiss.push(ctors[4].compile(hn)); compiledMiss.push(ctors[5].compile(hn));
}
for ( const hn of regexMisses ) {
compiledHit.push(ctors[7].compile(hn));
} }
if ( prepend ) { if ( prepend ) {
if ( compiledHit.length !== 0 ) { if ( compiledHit.length !== 0 ) {
@ -1749,6 +1756,47 @@ class FilterDomainHitSet {
/******************************************************************************/ /******************************************************************************/
class FilterDomainRegexHit {
static getDomainOpt(idata) {
const ref = filterRefs[filterData[idata+1]];
return ref.restr;
}
static match(idata) {
const ref = filterRefs[filterData[idata+1]];
if ( ref.$re === null ) {
ref.$re = new RegExp(ref.restr.slice(1,-1));
}
return ref.$re.test(this.getMatchTarget());
}
static compile(restr) {
return [ this.fid, restr ];
}
static fromCompiled(args) {
const idata = filterDataAllocLen(2);
filterData[idata+0] = args[0]; // fid
filterData[idata+1] = filterRefAdd({ restr: args[1], $re: null });
return idata;
}
static dnrFromCompiled(args, rule) {
rule.condition = rule.condition || {};
const prop = this.dnrConditionName;
if ( rule.condition[prop] === undefined ) {
rule.condition[prop] = [];
}
rule.condition[prop].push(args[1]);
}
static dumpInfo(idata) {
return this.getDomainOpt(idata);
}
}
/******************************************************************************/
// Implement the following filter option: // Implement the following filter option:
// - domain= // - domain=
// - from= // - from=
@ -1845,20 +1893,44 @@ class FilterFromDomainMissSet extends FilterFromDomainHitSet {
} }
} }
class FilterFromRegexHit extends FilterDomainRegexHit {
static getMatchTarget() {
return $docHostname;
}
static logData(idata, details) {
details.fromDomains.push(`${this.getDomainOpt(idata)}`);
}
}
class FilterFromRegexMiss extends FilterFromRegexHit {
static match(idata) {
return super.match(idata) === false;
}
static logData(idata, details) {
details.fromDomains.push(`~${this.getDomainOpt(idata)}`);
}
}
registerFilterClass(FilterFromDomainHit); registerFilterClass(FilterFromDomainHit);
registerFilterClass(FilterFromDomainMiss); registerFilterClass(FilterFromDomainMiss);
registerFilterClass(FilterFromEntityHit); registerFilterClass(FilterFromEntityHit);
registerFilterClass(FilterFromEntityMiss); registerFilterClass(FilterFromEntityMiss);
registerFilterClass(FilterFromDomainHitSet); registerFilterClass(FilterFromDomainHitSet);
registerFilterClass(FilterFromDomainMissSet); registerFilterClass(FilterFromDomainMissSet);
registerFilterClass(FilterFromRegexHit);
registerFilterClass(FilterFromRegexMiss);
const fromOptClasses = [ const fromOptClasses = [
FilterFromDomainHit, FilterFromDomainHit,
FilterFromEntityHit, FilterFromEntityHit,
FilterFromDomainHitSet, FilterFromDomainHitSet,
FilterFromRegexHit,
FilterFromDomainMiss, FilterFromDomainMiss,
FilterFromEntityMiss, FilterFromEntityMiss,
FilterFromDomainMissSet, FilterFromDomainMissSet,
FilterFromRegexMiss,
]; ];
const compileFromDomainOpt = (...args) => { const compileFromDomainOpt = (...args) => {
@ -1946,20 +2018,44 @@ class FilterToDomainMissSet extends FilterToDomainHitSet {
} }
} }
class FilterToRegexHit extends FilterDomainRegexHit {
static getMatchTarget() {
return $requestHostname;
}
static logData(idata, details) {
details.toDomains.push(`${this.getDomainOpt(idata)}`);
}
}
class FilterToRegexMiss extends FilterToRegexHit {
static match(idata) {
return super.match(idata) === false;
}
static logData(idata, details) {
details.toDomains.push(`~${this.getDomainOpt(idata)}`);
}
}
registerFilterClass(FilterToDomainHit); registerFilterClass(FilterToDomainHit);
registerFilterClass(FilterToDomainMiss); registerFilterClass(FilterToDomainMiss);
registerFilterClass(FilterToEntityHit); registerFilterClass(FilterToEntityHit);
registerFilterClass(FilterToEntityMiss); registerFilterClass(FilterToEntityMiss);
registerFilterClass(FilterToDomainHitSet); registerFilterClass(FilterToDomainHitSet);
registerFilterClass(FilterToDomainMissSet); registerFilterClass(FilterToDomainMissSet);
registerFilterClass(FilterToRegexHit);
registerFilterClass(FilterToRegexMiss);
const toOptClasses = [ const toOptClasses = [
FilterToDomainHit, FilterToDomainHit,
FilterToEntityHit, FilterToEntityHit,
FilterToDomainHitSet, FilterToDomainHitSet,
FilterToRegexHit,
FilterToDomainMiss, FilterToDomainMiss,
FilterToEntityMiss, FilterToEntityMiss,
FilterToDomainMissSet, FilterToDomainMissSet,
FilterToRegexMiss,
]; ];
const compileToDomainOpt = (...args) => { const compileToDomainOpt = (...args) => {
@ -3678,7 +3774,7 @@ class FilterCompiler {
isJustOrigin() { isJustOrigin() {
if ( this.optionUnitBits !== this.FROM_BIT ) { return false; } if ( this.optionUnitBits !== this.FROM_BIT ) { return false; }
if ( this.isRegex ) { return false; } if ( this.isRegex ) { return false; }
if ( this.fromDomainOpt.includes('~') ) { return false; } if ( /[\/~]/.test(this.fromDomainOpt) ) { return false; }
if ( this.pattern === '*' ) { return true; } if ( this.pattern === '*' ) { return true; }
if ( this.anchor !== 0b010 ) { return false; } if ( this.anchor !== 0b010 ) { return false; }
if ( /^(?:http[s*]?:(?:\/\/)?)$/.test(this.pattern) ) { return true; } if ( /^(?:http[s*]?:(?:\/\/)?)$/.test(this.pattern) ) { return true; }

View File

@ -1030,7 +1030,14 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
parser.parse(line); parser.parse(line);
if ( parser.isFilter() === false ) { continue; } if ( parser.isFilter() === false ) { continue; }
if ( parser.hasError() ) { continue; } if ( parser.hasError() ) {
logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid filter: ${parser.raw}`
});
continue;
}
if ( parser.isExtendedFilter() ) { if ( parser.isExtendedFilter() ) {
staticExtFilteringEngine.compile(parser, writer); staticExtFilteringEngine.compile(parser, writer);