Add support for "specific-generic" cosmetic filters

A specific cosmetic filter of the following form...

    *##.selector

... will be unconditionally injected into all web pages,
whereas a cosmetic filter of the form...

    ##.selector

... would be injected only when uBO's DOM surveyor finds
at least one matching element in a web page.

The new specific-generic form will also be disabled when a
web page is subject to a `generichide` exception filter,
since the filter is essentially a generic one -- the only
difference from the usual generic form is that the filter
is injected unconditionally instead of through the DOM
surveyor.

Specific-generic cosmetic filters will NOT be discarded
when checking the "Ignore generic cosmetic filters"
option in the "Filter lists" pane -- since the purpose
of this option is primarily to disable the DOM surveyor.

Specific-generic cosmetic filters should be used
parcimoniously and only when using a normal specific
filter is really impractical.

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/803
This commit is contained in:
Raymond Hill 2020-02-14 11:51:05 -05:00
parent 32b9db67af
commit 3fab7bfdb4
No known key found for this signature in database
GPG key ID: 25E1490B761470C2
3 changed files with 63 additions and 41 deletions

View file

@ -191,7 +191,7 @@ const FilterContainer = function() {
this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/;
this.rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)|([#.][\w-]+)$/;
this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g;
this.reSimpleHighGeneric1 = /^[a-z]*\[[^[]+]$/;
this.reSimpleHighGeneric = /^(?:[a-z]*\[[^\]]+\]|\S+)$/;
this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/;
this.selectorCache = new Map();
@ -274,8 +274,8 @@ FilterContainer.prototype.reset = function() {
this.selectorCacheTimer = null;
}
// generic filters
this.hasGenericHide = false;
// whether there is at least one surveyor-based filter
this.needDOMSurveyor = false;
// hostname, entity-based filters
this.specificFilters.clear();
@ -301,13 +301,11 @@ FilterContainer.prototype.freeze = function() {
this.duplicateBuster.clear();
this.specificFilters.collectGarbage();
this.hasGenericHide =
this.needDOMSurveyor =
this.lowlyGeneric.id.simple.size !== 0 ||
this.lowlyGeneric.id.complex.size !== 0 ||
this.lowlyGeneric.cl.simple.size !== 0 ||
this.lowlyGeneric.cl.complex.size !== 0 ||
this.highlyGeneric.simple.dict.size !== 0 ||
this.highlyGeneric.complex.dict.size !== 0;
this.lowlyGeneric.cl.complex.size !== 0;
this.highlyGeneric.simple.str = Array.from(this.highlyGeneric.simple.dict).join(',\n');
this.highlyGeneric.simple.mru.reset();
@ -333,8 +331,8 @@ FilterContainer.prototype.keyFromSelector = function(selector) {
matches = this.rePlainSelectorEscaped.exec(selector);
if ( matches === null ) { return; }
key = '';
let escaped = matches[0],
beg = 0;
const escaped = matches[0];
let beg = 0;
this.reEscapeSequence.lastIndex = 0;
for (;;) {
matches = this.reEscapeSequence.exec(escaped);
@ -402,22 +400,19 @@ FilterContainer.prototype.compileGenericHideSelector = function(
const type = selector.charCodeAt(0);
let key;
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic selector
// falls into that category:
// - ###ad-bigbox
// - ##.ads-bigbox
if ( type === 0x23 /* '#' */ ) {
key = this.keyFromSelector(selector);
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic
// selector falls into that category.
// - ###ad-bigbox
if ( key === selector ) {
writer.push([ 0, key.slice(1) ]);
return;
}
} else if ( type === 0x2E /* '.' */ ) {
key = this.keyFromSelector(selector);
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic
// selector falls into that category.
// - ##.ads-bigbox
if ( key === selector ) {
writer.push([ 2, key.slice(1) ]);
return;
@ -484,12 +479,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(
// For efficiency purpose, we will distinguish between simple and complex
// selectors.
if ( this.reSimpleHighGeneric1.test(selector) ) {
writer.push([ 4 /* simple */, selector ]);
return;
}
if ( selector.indexOf(' ') === -1 ) {
if ( this.reSimpleHighGeneric.test(selector) ) {
writer.push([ 4 /* simple */, selector ]);
} else {
writer.push([ 5 /* complex */, selector ]);
@ -551,10 +541,13 @@ FilterContainer.prototype.compileSpecificSelector = function(
let kind = 0;
if ( unhide === 1 ) {
kind |= 0b01; // Exception
kind |= 0b001; // Exception
}
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
kind |= 0b10; // Procedural
kind |= 0b010; // Procedural
}
if ( hostname === '*' ) {
kind |= 0b100; // Applies everywhere
}
writer.push([ 8, hostname, kind, compiled ]);
@ -637,8 +630,21 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
// hash, example.com, .promoted-tweet
// hash, example.*, .promoted-tweet
//
// https://github.com/uBlockOrigin/uBlock-issues/issues/803
// Handle specific filters meant to apply everywhere, i.e. selectors
// not to be injected conditionally through the DOM surveyor.
// hash, *, .promoted-tweet
case 8:
this.specificFilters.store(args[1], args[2], args[3]);
if ( args[2] === 0b100 ) {
if ( this.reSimpleHighGeneric.test(args[3]) )
this.highlyGeneric.simple.dict.add(args[3]);
else {
this.highlyGeneric.complex.dict.add(args[3]);
}
break;
}
this.specificFilters.store(args[1], args[2] & 0b011, args[3]);
break;
default:
@ -666,11 +672,21 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) {
switch ( args[0] ) {
// hash, example.com, .promoted-tweet
// hash, example.*, .promoted-tweet
// https://github.com/uBlockOrigin/uBlock-issues/issues/803
// Handle specific filters meant to apply everywhere, i.e. selectors
// not to be injected conditionally through the DOM surveyor.
// hash, *, .promoted-tweet
case 8:
this.duplicateBuster.add(fingerprint);
this.specificFilters.store(args[1], args[2], args[3]);
if ( args[2] === 0b100 ) {
if ( this.reSimpleHighGeneric.test(args[3]) )
this.highlyGeneric.simple.dict.add(args[3]);
else {
this.highlyGeneric.complex.dict.add(args[3]);
}
break;
}
this.specificFilters.store(args[1], args[2] & 0b011, args[3]);
break;
default:
@ -699,7 +715,6 @@ FilterContainer.prototype.toSelfie = function() {
acceptedCount: this.acceptedCount,
discardedCount: this.discardedCount,
specificFilters: this.specificFilters.toSelfie(),
hasGenericHide: this.hasGenericHide,
lowlyGenericSID: Array.from(this.lowlyGeneric.id.simple),
lowlyGenericCID: Array.from(this.lowlyGeneric.id.complex),
lowlyGenericSCL: Array.from(this.lowlyGeneric.cl.simple),
@ -715,7 +730,6 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
this.acceptedCount = selfie.acceptedCount;
this.discardedCount = selfie.discardedCount;
this.specificFilters.fromSelfie(selfie.specificFilters);
this.hasGenericHide = selfie.hasGenericHide;
this.lowlyGeneric.id.simple = new Set(selfie.lowlyGenericSID);
this.lowlyGeneric.id.complex = new Map(selfie.lowlyGenericCID);
this.lowlyGeneric.cl.simple = new Set(selfie.lowlyGenericSCL);
@ -724,6 +738,11 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
this.highlyGeneric.simple.str = selfie.highSimpleGenericHideArray.join(',\n');
this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray);
this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n');
this.needDOMSurveyor =
selfie.lowlyGenericSID.length !== 0 ||
selfie.lowlyGenericCID.length !== 0 ||
selfie.lowlyGenericSCL.length !== 0 ||
selfie.lowlyGenericCCL.length !== 0;
this.frozen = true;
};
@ -986,7 +1005,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
highGenericHideComplex: '',
injectedHideFilters: '',
networkFilters: '',
noDOMSurveying: this.hasGenericHide === false,
noDOMSurveying: this.needDOMSurveyor === false,
proceduralFilters: []
};

View file

@ -128,22 +128,25 @@ const fromCosmeticFilter = function(details) {
return a.length > b.length ? a : b;
});
const regexFromLabels = (hn, suffix) =>
const regexFromLabels = (prefix, hn, suffix) =>
new RegExp(
'^' +
prefix +
hn.split('.').reduce((acc, item) => `(${acc}\\.)?${item}`) +
suffix
);
const reHostname = regexFromLabels(hostname, '$');
// https://github.com/uBlockOrigin/uBlock-issues/issues/803
// Support looking up selectors of the form `*##...`
const reHostname = regexFromLabels('^', hostname, '$');
let reEntity;
{
const domain = details.domain;
const pos = domain.indexOf('.');
if ( pos !== -1 ) {
reEntity = regexFromLabels(
'^(',
hostname.slice(0, pos + hostname.length - domain.length),
'\\.\\*$'
'\\.)?\\*$'
);
}
}
@ -218,8 +221,8 @@ const fromCosmeticFilter = function(details) {
case 8:
// HTML filtering
case 64:
if ( exception !== ((fargs[2] & 0b01) !== 0) ) { break; }
isProcedural = (fargs[2] & 0b10) !== 0;
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
isProcedural = (fargs[2] & 0b010) !== 0;
if (
isProcedural === false && fargs[3] !== selector ||
isProcedural && JSON.parse(fargs[3]).raw !== selector
@ -240,7 +243,7 @@ const fromCosmeticFilter = function(details) {
break;
// Scriptlet injection
case 32:
if ( exception !== ((fargs[2] & 1) !== 0) ) { break; }
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
if ( fargs[3] !== selector ) { break; }
if ( hostnameMatches(fargs[1]) ) {
found = fargs[1] + prefix + selector;

View file

@ -56,8 +56,8 @@
const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/;
const emptyArray = [];
const parsed = {
hostnames: [],
exception: false,
hostnames: [],
suffix: ''
};
@ -871,7 +871,7 @@
}
}
let c0 = suffix.charCodeAt(0);
const c0 = suffix.charCodeAt(0);
// New shorter syntax for scriptlet injection engine.
if ( c0 === 0x2B /* '+' */ && suffix.startsWith('+js') ) {