1
0
mirror of https://github.com/gorhill/uBlock.git synced 2024-09-15 07:22:28 +02:00

Ignore pointless trailling *^ in network filters

There are currently over 160 patterns with such pointless
trailing `*^` in uBO's filter lists, which ended up being
compiled as generic pattern filters (i.e. regex-based
internally), while the trailing `*^` accomplishes nothing
since it will always match the end of a URL ( `^` can
also match the end of URL).

This commit discards pointless trailing `*^` in patterns,
thus allowing most of those filters to be compiled as
plain pattern filters.

The syntax highlighter will reflect that a trailing
`*^` is pointless.
This commit is contained in:
Raymond Hill 2021-12-11 09:45:25 -05:00
parent ca1ec1461b
commit 3b7a265ee2
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 138 additions and 128 deletions

View File

@ -471,76 +471,78 @@ const Parser = class {
} }
} }
// If the pattern is a regex, remember this. // Assume no anchors.
this.patternLeftAnchorSpan.i = this.patternSpan.i;
this.patternRightAnchorSpan.i = this.optionsAnchorSpan.i;
// Skip all else if pattern is a regex
if ( patternIsRegex ) { if ( patternIsRegex ) {
this.patternBits = this.bitsFromSpan(this.patternSpan);
this.flavorBits |= BITFlavorNetRegex; this.flavorBits |= BITFlavorNetRegex;
this.category = CATStaticNetFilter;
return;
} }
// Refine by processing pattern anchors. // Refine by processing pattern anchors.
// //
// Assume no anchors.
this.patternLeftAnchorSpan.i = this.patternSpan.i;
this.patternRightAnchorSpan.i = this.optionsAnchorSpan.i;
// Not a regex, there might be anchors. // Not a regex, there might be anchors.
if ( patternIsRegex === false ) { // Left anchor?
// Left anchor? // `|`: anchor to start of URL
// `|`: anchor to start of URL // `||`: anchor to left of a hostname label
// `||`: anchor to left of a hostname label if (
if ( this.patternSpan.len !== 0 &&
this.patternSpan.len !== 0 && hasBits(this.slices[this.patternSpan.i], BITPipe)
hasBits(this.slices[this.patternSpan.i], BITPipe) ) {
) { this.patternLeftAnchorSpan.len = 3;
this.patternLeftAnchorSpan.len = 3; const len = this.slices[this.patternSpan.i+2];
const len = this.slices[this.patternSpan.i+2]; // |||*, ... => ||, |*, ...
// |||*, ... => ||, |*, ... if ( len > 2 ) {
if ( len > 2 ) { this.splitSlot(this.patternSpan.i, 2);
this.splitSlot(this.patternSpan.i, 2); } else {
this.patternSpan.len -= 3;
}
this.patternSpan.i += 3;
this.flavorBits |= len === 1
? BITFlavorNetLeftURLAnchor
: BITFlavorNetLeftHnAnchor;
}
// Right anchor?
// `|`: anchor to end of URL
// `^`: anchor to end of hostname, when other conditions are
// fulfilled:
// the pattern is hostname-anchored on the left
// the pattern is made only of hostname characters
if ( this.patternSpan.len !== 0 ) {
const lastPatternSlice = this.patternSpan.len > 3
? this.patternRightAnchorSpan.i - 3
: this.patternSpan.i;
const bits = this.slices[lastPatternSlice];
if ( (bits & BITPipe) !== 0 ) {
this.patternRightAnchorSpan.i = lastPatternSlice;
this.patternRightAnchorSpan.len = 3;
const len = this.slices[this.patternRightAnchorSpan.i+2];
// ..., ||* => ..., |*, |
if ( len > 1 ) {
this.splitSlot(this.patternRightAnchorSpan.i, len - 1);
this.patternRightAnchorSpan.i += 3;
} else { } else {
this.patternSpan.len -= 3; this.patternSpan.len -= 3;
} }
this.patternSpan.i += 3; this.flavorBits |= BITFlavorNetRightURLAnchor;
this.flavorBits |= len === 1 } else if (
? BITFlavorNetLeftURLAnchor hasBits(bits, BITCaret) &&
: BITFlavorNetLeftHnAnchor; this.slices[lastPatternSlice+2] === 1 &&
} hasBits(this.flavorBits, BITFlavorNetLeftHnAnchor) &&
// Right anchor? this.skipUntilNot(
// `|`: anchor to end of URL this.patternSpan.i,
// `^`: anchor to end of hostname, when other conditions are lastPatternSlice,
// fulfilled: BITHostname
// the pattern is hostname-anchored on the left ) === lastPatternSlice
// the pattern is made only of hostname characters ) {
if ( this.patternSpan.len !== 0 ) { this.patternRightAnchorSpan.i = lastPatternSlice;
const lastPatternSlice = this.patternSpan.len > 3 this.patternRightAnchorSpan.len = 3;
? this.patternRightAnchorSpan.i - 3 this.patternSpan.len -= 3;
: this.patternSpan.i; this.flavorBits |= BITFlavorNetRightHnAnchor;
const bits = this.slices[lastPatternSlice];
if ( (bits & BITPipe) !== 0 ) {
this.patternRightAnchorSpan.i = lastPatternSlice;
this.patternRightAnchorSpan.len = 3;
const len = this.slices[this.patternRightAnchorSpan.i+2];
// ..., ||* => ..., |*, |
if ( len > 1 ) {
this.splitSlot(this.patternRightAnchorSpan.i, len - 1);
this.patternRightAnchorSpan.i += 3;
} else {
this.patternSpan.len -= 3;
}
this.flavorBits |= BITFlavorNetRightURLAnchor;
} else if (
hasBits(bits, BITCaret) &&
this.slices[lastPatternSlice+2] === 1 &&
hasBits(this.flavorBits, BITFlavorNetLeftHnAnchor) &&
this.skipUntilNot(
this.patternSpan.i,
lastPatternSlice,
BITHostname
) === lastPatternSlice
) {
this.patternRightAnchorSpan.i = lastPatternSlice;
this.patternRightAnchorSpan.len = 3;
this.patternSpan.len -= 3;
this.flavorBits |= BITFlavorNetRightHnAnchor;
}
} }
} }
@ -553,16 +555,16 @@ const Parser = class {
// the part following the space character. // the part following the space character.
// https://github.com/uBlockOrigin/uBlock-issues/issues/1118 // https://github.com/uBlockOrigin/uBlock-issues/issues/1118
// Patterns with more than one space are dubious. // Patterns with more than one space are dubious.
{ if ( hasBits(this.allBits, BITSpace) ) {
const { i, len } = this.patternSpan; const { i, len } = this.patternSpan;
const noOptionsAnchor = this.optionsAnchorSpan.len === 0; const noOptionsAnchor = this.optionsAnchorSpan.len === 0;
let j = len; let j = len;
for (;;) { for (;;) {
if ( j === 0 ) { break; } if ( j === 0 ) { break; }
j -= 3; j -= 3;
const bits = this.slices[i+j]; if ( noOptionsAnchor && hasBits(this.slices[i+j], BITSpace) ) {
if ( noOptionsAnchor && hasBits(bits, BITSpace) ) { break; } break;
this.patternBits |= bits; }
} }
if ( j !== 0 ) { if ( j !== 0 ) {
const sink = this.strFromSlices(this.patternSpan.i, j - 3); const sink = this.strFromSlices(this.patternSpan.i, j - 3);
@ -587,87 +589,88 @@ const Parser = class {
} }
} }
// Pointless wildcards and anchoring: // Pointless wildcards:
// - Eliminate leading wildcard not followed by a pattern token slice // - Eliminate leading wildcard not followed by a pattern token slice
// - Eliminate trailing wildcard not preceded by a pattern token slice // - Eliminate trailing wildcard not preceded by a pattern token slice
// - Eliminate pattern anchoring when irrelevant // - Eliminate pointless trailing asterisk-caret (`*^`)
// //
// Leading wildcard history: // Leading wildcard history:
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448 // https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
// Remove pointless leading *. // Remove pointless leading *.
// https://github.com/gorhill/uBlock/issues/3034 if ( hasBits(this.allBits, BITAsterisk) ) {
// We can remove anchoring if we need to match all at the start.
//
// Trailing wildcard history:
// https://github.com/gorhill/uBlock/issues/3034
// We can remove anchoring if we need to match all at the end.
{
let { i, len } = this.patternSpan; let { i, len } = this.patternSpan;
let pattern = this.strFromSpan(this.patternSpan);
// Pointless leading wildcard // Pointless leading wildcard
if ( if ( /^\*+[^0-9a-z%]/.test(pattern) ) {
len > 3 &&
hasBits(this.slices[i], BITAsterisk) &&
hasNoBits(this.slices[i+3], BITPatternToken)
) {
this.slices[i] |= BITIgnore; this.slices[i] |= BITIgnore;
i += 3; len -= 3; this.patternSpan.i = (i += 3);
this.patternSpan.i = i; this.patternSpan.len = (len -= 3);
this.patternSpan.len = len; pattern = this.strFromSpan(this.patternSpan);
// We can ignore left-hand pattern anchor
if ( this.patternLeftAnchorSpan.len !== 0 ) {
this.slices[this.patternLeftAnchorSpan.i] |= BITIgnore;
this.flavorBits &= ~BITFlavorNetLeftAnchor;
}
} }
// Pointless trailing wildcard // Pointless trailing wildcard
if ( if ( /([^0-9a-z%]|[0-9a-z%]{7,})\*+$/.test(pattern) ) {
len > 3 && this.patternSpan.len = (len -= 3);
hasBits(this.slices[i+len-3], BITAsterisk) && pattern = this.strFromSpan(this.patternSpan);
hasNoBits(this.slices[i+len-6], BITPatternToken)
) {
// Ignore only if the pattern would not end up looking like // Ignore only if the pattern would not end up looking like
// a regex. // a regex.
if ( if ( /^\/.+\/$/.test(pattern) === false ) {
hasNoBits(this.slices[i], BITSlash) || this.slices[i+len] |= BITIgnore;
hasNoBits(this.slices[i+len-6], BITSlash)
) {
this.slices[i+len-3] |= BITIgnore;
} }
len -= 3;
this.patternSpan.len = len;
// We can ignore right-hand pattern anchor // We can ignore right-hand pattern anchor
if ( this.patternRightAnchorSpan.len !== 0 ) { if ( this.patternRightAnchorSpan.len !== 0 ) {
this.slices[this.patternRightAnchorSpan.i] |= BITIgnore; this.slices[this.patternRightAnchorSpan.i] |= BITIgnore;
this.flavorBits &= ~BITFlavorNetRightAnchor; this.flavorBits &= ~BITFlavorNetRightAnchor;
} }
} }
// Pointless trailing caret (when preceded by a wildcard) // Pointless trailing asterisk-caret: `..*^`, `..*^|`
// TODO if ( hasBits(this.allBits, BITCaret) && /\*+\^$/.test(pattern) ) {
// this.slices[i+len-3] |= BITIgnore;
// Pointless left-hand pattern anchoring this.slices[i+len-6] |= BITIgnore;
this.patternSpan.len = (len -= 6);
pattern = this.strFromSpan(this.patternSpan);
// We can ignore right-hand pattern anchor
if ( this.patternRightAnchorSpan.len !== 0 ) {
this.slices[this.patternRightAnchorSpan.i] |= BITIgnore;
this.flavorBits &= ~BITFlavorNetRightAnchor;
}
}
}
// Pointless left-hand pattern anchoring
//
// Leading wildcard history:
// https://github.com/gorhill/uBlock/issues/3034
// We can remove anchoring if we need to match all at the start.
if ( hasBits(this.flavorBits, BITFlavorNetLeftAnchor) ) {
const i = this.patternLeftAnchorSpan.i;
if ( if (
( this.patternSpan.len === 0 ||
len === 0 || hasBits(this.slices[i+3], BITIgnore|BITAsterisk)
len !== 0 && hasBits(this.slices[i], BITAsterisk)
) &&
hasBits(this.flavorBits, BITFlavorNetLeftAnchor)
) { ) {
this.slices[this.patternLeftAnchorSpan.i] |= BITIgnore; this.slices[i] |= BITIgnore;
this.flavorBits &= ~BITFlavorNetLeftAnchor; this.flavorBits &= ~BITFlavorNetLeftAnchor;
} }
// Pointless right-hand pattern anchoring }
// Pointless right-hand pattern anchoring
//
// Trailing wildcard history:
// https://github.com/gorhill/uBlock/issues/3034
// We can remove anchoring if we need to match all at the end.
if ( hasBits(this.flavorBits, BITFlavorNetRightAnchor) ) {
const i = this.patternLeftAnchorSpan;
if ( if (
( this.patternSpan.len === 0 ||
len === 0 || hasBits(this.slices[i-3], BITIgnore|BITAsterisk)
len !== 0 && hasBits(this.slices[i+len-3], BITAsterisk)
) &&
hasBits(this.flavorBits, BITFlavorNetRightAnchor)
) { ) {
this.slices[this.patternRightAnchorSpan.i] |= BITIgnore; this.slices[i] |= BITIgnore;
this.flavorBits &= ~BITFlavorNetRightAnchor; this.flavorBits &= ~BITFlavorNetRightAnchor;
} }
} }
// Collate effective pattern bits
this.patternBits = this.bitsFromSpan(this.patternSpan);
this.category = CATStaticNetFilter; this.category = CATStaticNetFilter;
} }
@ -1177,6 +1180,15 @@ const Parser = class {
return true; return true;
} }
bitsFromSpan(span) {
const { i, len } = span;
let bits = 0;
for ( let j = 0; j < len; j += 3 ) {
bits |= this.slices[i+j];
}
return bits;
}
hasFlavor(bits) { hasFlavor(bits) {
return hasBits(this.flavorBits, bits); return hasBits(this.flavorBits, bits);
} }

View File

@ -3254,19 +3254,17 @@ class FilterCompiler {
units.push(FilterPatternGeneric.compile(this)); units.push(FilterPatternGeneric.compile(this));
return; return;
} }
if ( this.wildcardPos === -1 && this.caretPos === -1 ) { if ( this.wildcardPos === -1 ) {
units.push(FilterPatternPlain.compile(this)); if ( this.caretPos === -1 ) {
return; units.push(FilterPatternPlain.compile(this));
} return;
// Optimize special case: plain pattern with trailing caret }
if ( if ( this.caretPos === (this.pattern.length - 1) ) {
this.wildcardPos === -1 && this.pattern = this.pattern.slice(0, -1);
this.caretPos === (this.pattern.length - 1) units.push(FilterPatternPlain.compile(this));
) { units.push(FilterTrailingSeparator.compile());
this.pattern = this.pattern.slice(0, -1); return;
units.push(FilterPatternPlain.compile(this)); }
units.push(FilterTrailingSeparator.compile());
return;
} }
units.push(FilterPatternGeneric.compile(this)); units.push(FilterPatternGeneric.compile(this));
} }