mirror of
https://github.com/gorhill/uBlock.git
synced 2024-11-07 03:12:33 +01:00
Ignore pointless trailling *^
in network filters
There are currently over 160 patterns with such pointless trailing `*^` in uBO's filter lists, which ended up being compiled as generic pattern filters (i.e. regex-based internally), while the trailing `*^` accomplishes nothing since it will always match the end of a URL ( `^` can also match the end of URL). This commit discards pointless trailing `*^` in patterns, thus allowing most of those filters to be compiled as plain pattern filters. The syntax highlighter will reflect that a trailing `*^` is pointless.
This commit is contained in:
parent
ca1ec1461b
commit
3b7a265ee2
@ -471,18 +471,21 @@ const Parser = class {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the pattern is a regex, remember this.
|
// Assume no anchors.
|
||||||
|
this.patternLeftAnchorSpan.i = this.patternSpan.i;
|
||||||
|
this.patternRightAnchorSpan.i = this.optionsAnchorSpan.i;
|
||||||
|
|
||||||
|
// Skip all else if pattern is a regex
|
||||||
if ( patternIsRegex ) {
|
if ( patternIsRegex ) {
|
||||||
|
this.patternBits = this.bitsFromSpan(this.patternSpan);
|
||||||
this.flavorBits |= BITFlavorNetRegex;
|
this.flavorBits |= BITFlavorNetRegex;
|
||||||
|
this.category = CATStaticNetFilter;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Refine by processing pattern anchors.
|
// Refine by processing pattern anchors.
|
||||||
//
|
//
|
||||||
// Assume no anchors.
|
|
||||||
this.patternLeftAnchorSpan.i = this.patternSpan.i;
|
|
||||||
this.patternRightAnchorSpan.i = this.optionsAnchorSpan.i;
|
|
||||||
// Not a regex, there might be anchors.
|
// Not a regex, there might be anchors.
|
||||||
if ( patternIsRegex === false ) {
|
|
||||||
// Left anchor?
|
// Left anchor?
|
||||||
// `|`: anchor to start of URL
|
// `|`: anchor to start of URL
|
||||||
// `||`: anchor to left of a hostname label
|
// `||`: anchor to left of a hostname label
|
||||||
@ -542,7 +545,6 @@ const Parser = class {
|
|||||||
this.flavorBits |= BITFlavorNetRightHnAnchor;
|
this.flavorBits |= BITFlavorNetRightHnAnchor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Collate useful pattern bits information for further use.
|
// Collate useful pattern bits information for further use.
|
||||||
//
|
//
|
||||||
@ -553,16 +555,16 @@ const Parser = class {
|
|||||||
// the part following the space character.
|
// the part following the space character.
|
||||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/1118
|
// https://github.com/uBlockOrigin/uBlock-issues/issues/1118
|
||||||
// Patterns with more than one space are dubious.
|
// Patterns with more than one space are dubious.
|
||||||
{
|
if ( hasBits(this.allBits, BITSpace) ) {
|
||||||
const { i, len } = this.patternSpan;
|
const { i, len } = this.patternSpan;
|
||||||
const noOptionsAnchor = this.optionsAnchorSpan.len === 0;
|
const noOptionsAnchor = this.optionsAnchorSpan.len === 0;
|
||||||
let j = len;
|
let j = len;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if ( j === 0 ) { break; }
|
if ( j === 0 ) { break; }
|
||||||
j -= 3;
|
j -= 3;
|
||||||
const bits = this.slices[i+j];
|
if ( noOptionsAnchor && hasBits(this.slices[i+j], BITSpace) ) {
|
||||||
if ( noOptionsAnchor && hasBits(bits, BITSpace) ) { break; }
|
break;
|
||||||
this.patternBits |= bits;
|
}
|
||||||
}
|
}
|
||||||
if ( j !== 0 ) {
|
if ( j !== 0 ) {
|
||||||
const sink = this.strFromSlices(this.patternSpan.i, j - 3);
|
const sink = this.strFromSlices(this.patternSpan.i, j - 3);
|
||||||
@ -587,86 +589,87 @@ const Parser = class {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pointless wildcards and anchoring:
|
// Pointless wildcards:
|
||||||
// - Eliminate leading wildcard not followed by a pattern token slice
|
// - Eliminate leading wildcard not followed by a pattern token slice
|
||||||
// - Eliminate trailing wildcard not preceded by a pattern token slice
|
// - Eliminate trailing wildcard not preceded by a pattern token slice
|
||||||
// - Eliminate pattern anchoring when irrelevant
|
// - Eliminate pointless trailing asterisk-caret (`*^`)
|
||||||
//
|
//
|
||||||
// Leading wildcard history:
|
// Leading wildcard history:
|
||||||
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
|
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
|
||||||
// Remove pointless leading *.
|
// Remove pointless leading *.
|
||||||
// https://github.com/gorhill/uBlock/issues/3034
|
if ( hasBits(this.allBits, BITAsterisk) ) {
|
||||||
// We can remove anchoring if we need to match all at the start.
|
|
||||||
//
|
|
||||||
// Trailing wildcard history:
|
|
||||||
// https://github.com/gorhill/uBlock/issues/3034
|
|
||||||
// We can remove anchoring if we need to match all at the end.
|
|
||||||
{
|
|
||||||
let { i, len } = this.patternSpan;
|
let { i, len } = this.patternSpan;
|
||||||
|
let pattern = this.strFromSpan(this.patternSpan);
|
||||||
// Pointless leading wildcard
|
// Pointless leading wildcard
|
||||||
if (
|
if ( /^\*+[^0-9a-z%]/.test(pattern) ) {
|
||||||
len > 3 &&
|
|
||||||
hasBits(this.slices[i], BITAsterisk) &&
|
|
||||||
hasNoBits(this.slices[i+3], BITPatternToken)
|
|
||||||
) {
|
|
||||||
this.slices[i] |= BITIgnore;
|
this.slices[i] |= BITIgnore;
|
||||||
i += 3; len -= 3;
|
this.patternSpan.i = (i += 3);
|
||||||
this.patternSpan.i = i;
|
this.patternSpan.len = (len -= 3);
|
||||||
this.patternSpan.len = len;
|
pattern = this.strFromSpan(this.patternSpan);
|
||||||
// We can ignore left-hand pattern anchor
|
|
||||||
if ( this.patternLeftAnchorSpan.len !== 0 ) {
|
|
||||||
this.slices[this.patternLeftAnchorSpan.i] |= BITIgnore;
|
|
||||||
this.flavorBits &= ~BITFlavorNetLeftAnchor;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Pointless trailing wildcard
|
// Pointless trailing wildcard
|
||||||
if (
|
if ( /([^0-9a-z%]|[0-9a-z%]{7,})\*+$/.test(pattern) ) {
|
||||||
len > 3 &&
|
this.patternSpan.len = (len -= 3);
|
||||||
hasBits(this.slices[i+len-3], BITAsterisk) &&
|
pattern = this.strFromSpan(this.patternSpan);
|
||||||
hasNoBits(this.slices[i+len-6], BITPatternToken)
|
|
||||||
) {
|
|
||||||
// Ignore only if the pattern would not end up looking like
|
// Ignore only if the pattern would not end up looking like
|
||||||
// a regex.
|
// a regex.
|
||||||
if (
|
if ( /^\/.+\/$/.test(pattern) === false ) {
|
||||||
hasNoBits(this.slices[i], BITSlash) ||
|
this.slices[i+len] |= BITIgnore;
|
||||||
hasNoBits(this.slices[i+len-6], BITSlash)
|
|
||||||
) {
|
|
||||||
this.slices[i+len-3] |= BITIgnore;
|
|
||||||
}
|
}
|
||||||
len -= 3;
|
|
||||||
this.patternSpan.len = len;
|
|
||||||
// We can ignore right-hand pattern anchor
|
// We can ignore right-hand pattern anchor
|
||||||
if ( this.patternRightAnchorSpan.len !== 0 ) {
|
if ( this.patternRightAnchorSpan.len !== 0 ) {
|
||||||
this.slices[this.patternRightAnchorSpan.i] |= BITIgnore;
|
this.slices[this.patternRightAnchorSpan.i] |= BITIgnore;
|
||||||
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Pointless trailing caret (when preceded by a wildcard)
|
// Pointless trailing asterisk-caret: `..*^`, `..*^|`
|
||||||
// TODO
|
if ( hasBits(this.allBits, BITCaret) && /\*+\^$/.test(pattern) ) {
|
||||||
//
|
this.slices[i+len-3] |= BITIgnore;
|
||||||
// Pointless left-hand pattern anchoring
|
this.slices[i+len-6] |= BITIgnore;
|
||||||
if (
|
this.patternSpan.len = (len -= 6);
|
||||||
(
|
pattern = this.strFromSpan(this.patternSpan);
|
||||||
len === 0 ||
|
// We can ignore right-hand pattern anchor
|
||||||
len !== 0 && hasBits(this.slices[i], BITAsterisk)
|
if ( this.patternRightAnchorSpan.len !== 0 ) {
|
||||||
) &&
|
|
||||||
hasBits(this.flavorBits, BITFlavorNetLeftAnchor)
|
|
||||||
) {
|
|
||||||
this.slices[this.patternLeftAnchorSpan.i] |= BITIgnore;
|
|
||||||
this.flavorBits &= ~BITFlavorNetLeftAnchor;
|
|
||||||
}
|
|
||||||
// Pointless right-hand pattern anchoring
|
|
||||||
if (
|
|
||||||
(
|
|
||||||
len === 0 ||
|
|
||||||
len !== 0 && hasBits(this.slices[i+len-3], BITAsterisk)
|
|
||||||
) &&
|
|
||||||
hasBits(this.flavorBits, BITFlavorNetRightAnchor)
|
|
||||||
) {
|
|
||||||
this.slices[this.patternRightAnchorSpan.i] |= BITIgnore;
|
this.slices[this.patternRightAnchorSpan.i] |= BITIgnore;
|
||||||
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pointless left-hand pattern anchoring
|
||||||
|
//
|
||||||
|
// Leading wildcard history:
|
||||||
|
// https://github.com/gorhill/uBlock/issues/3034
|
||||||
|
// We can remove anchoring if we need to match all at the start.
|
||||||
|
if ( hasBits(this.flavorBits, BITFlavorNetLeftAnchor) ) {
|
||||||
|
const i = this.patternLeftAnchorSpan.i;
|
||||||
|
if (
|
||||||
|
this.patternSpan.len === 0 ||
|
||||||
|
hasBits(this.slices[i+3], BITIgnore|BITAsterisk)
|
||||||
|
) {
|
||||||
|
this.slices[i] |= BITIgnore;
|
||||||
|
this.flavorBits &= ~BITFlavorNetLeftAnchor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pointless right-hand pattern anchoring
|
||||||
|
//
|
||||||
|
// Trailing wildcard history:
|
||||||
|
// https://github.com/gorhill/uBlock/issues/3034
|
||||||
|
// We can remove anchoring if we need to match all at the end.
|
||||||
|
if ( hasBits(this.flavorBits, BITFlavorNetRightAnchor) ) {
|
||||||
|
const i = this.patternLeftAnchorSpan;
|
||||||
|
if (
|
||||||
|
this.patternSpan.len === 0 ||
|
||||||
|
hasBits(this.slices[i-3], BITIgnore|BITAsterisk)
|
||||||
|
) {
|
||||||
|
this.slices[i] |= BITIgnore;
|
||||||
|
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collate effective pattern bits
|
||||||
|
this.patternBits = this.bitsFromSpan(this.patternSpan);
|
||||||
|
|
||||||
this.category = CATStaticNetFilter;
|
this.category = CATStaticNetFilter;
|
||||||
}
|
}
|
||||||
@ -1177,6 +1180,15 @@ const Parser = class {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bitsFromSpan(span) {
|
||||||
|
const { i, len } = span;
|
||||||
|
let bits = 0;
|
||||||
|
for ( let j = 0; j < len; j += 3 ) {
|
||||||
|
bits |= this.slices[i+j];
|
||||||
|
}
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
|
||||||
hasFlavor(bits) {
|
hasFlavor(bits) {
|
||||||
return hasBits(this.flavorBits, bits);
|
return hasBits(this.flavorBits, bits);
|
||||||
}
|
}
|
||||||
|
@ -3254,20 +3254,18 @@ class FilterCompiler {
|
|||||||
units.push(FilterPatternGeneric.compile(this));
|
units.push(FilterPatternGeneric.compile(this));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ( this.wildcardPos === -1 && this.caretPos === -1 ) {
|
if ( this.wildcardPos === -1 ) {
|
||||||
|
if ( this.caretPos === -1 ) {
|
||||||
units.push(FilterPatternPlain.compile(this));
|
units.push(FilterPatternPlain.compile(this));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Optimize special case: plain pattern with trailing caret
|
if ( this.caretPos === (this.pattern.length - 1) ) {
|
||||||
if (
|
|
||||||
this.wildcardPos === -1 &&
|
|
||||||
this.caretPos === (this.pattern.length - 1)
|
|
||||||
) {
|
|
||||||
this.pattern = this.pattern.slice(0, -1);
|
this.pattern = this.pattern.slice(0, -1);
|
||||||
units.push(FilterPatternPlain.compile(this));
|
units.push(FilterPatternPlain.compile(this));
|
||||||
units.push(FilterTrailingSeparator.compile());
|
units.push(FilterTrailingSeparator.compile());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
units.push(FilterPatternGeneric.compile(this));
|
units.push(FilterPatternGeneric.compile(this));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user