mirror of
https://github.com/gorhill/uBlock.git
synced 2024-11-11 09:31:01 +01:00
Expand parser's ability to process static extended filtering
This commit moves some of the parsing logic of static extended filtering into the static filtering parser; this allows better syntax highlighting and creation-time error-catching for cosmetic, HTML, and scriptlet filters.
This commit is contained in:
parent
681bd70116
commit
1a082e0581
10 changed files with 879 additions and 862 deletions
|
@ -32,9 +32,12 @@
|
|||
text-decoration-style: solid;
|
||||
text-decoration-line: underline;
|
||||
}
|
||||
.cm-s-default .cm-error {
|
||||
color: inherit;
|
||||
}
|
||||
.cm-s-default .cm-error,
|
||||
.CodeMirror-linebackground.error {
|
||||
background-color: #ff000018;
|
||||
background-color: #ff000016;
|
||||
text-decoration: underline red;
|
||||
text-underline-position: under;
|
||||
}
|
||||
|
|
|
@ -29,54 +29,35 @@ CodeMirror.defineMode("ubo-static-filtering", function() {
|
|||
let parserSlot = 0;
|
||||
let netOptionValueMode = false;
|
||||
|
||||
const colorSpan = function(stream) {
|
||||
if ( parser.category === parser.CATNone || parser.shouldIgnore() ) {
|
||||
stream.skipToEnd();
|
||||
return 'comment';
|
||||
}
|
||||
if ( parser.category === parser.CATComment ) {
|
||||
stream.skipToEnd();
|
||||
return reDirective.test(stream.string)
|
||||
? 'variable strong'
|
||||
: 'comment';
|
||||
}
|
||||
if ( (parser.slices[parserSlot] & parser.BITIgnore) !== 0 ) {
|
||||
const colorExtSpan = function(stream) {
|
||||
if ( parserSlot < parser.optionsAnchorSpan.i ) {
|
||||
const style = (parser.slices[parserSlot] & parser.BITComma) === 0
|
||||
? 'string-2'
|
||||
: 'def';
|
||||
stream.pos += parser.slices[parserSlot+2];
|
||||
parserSlot += 3;
|
||||
return 'comment';
|
||||
return style;
|
||||
}
|
||||
if ( (parser.slices[parserSlot] & parser.BITError) !== 0 ) {
|
||||
if (
|
||||
parserSlot >= parser.optionsAnchorSpan.i &&
|
||||
parserSlot < parser.patternSpan.i
|
||||
) {
|
||||
const style = (parser.flavorBits & parser.BITFlavorException) !== 0
|
||||
? 'tag'
|
||||
: 'def';
|
||||
stream.pos += parser.slices[parserSlot+2];
|
||||
parserSlot += 3;
|
||||
return 'error';
|
||||
return `${style} strong`;
|
||||
}
|
||||
if ( parser.category === parser.CATStaticExtFilter ) {
|
||||
if ( parserSlot < parser.optionsAnchorSpan.i ) {
|
||||
const style = (parser.slices[parserSlot] & parser.BITComma) === 0
|
||||
? 'string-2'
|
||||
: 'def';
|
||||
stream.pos += parser.slices[parserSlot+2];
|
||||
parserSlot += 3;
|
||||
return style;
|
||||
}
|
||||
if (
|
||||
parserSlot >= parser.optionsAnchorSpan.i &&
|
||||
parserSlot < parser.patternSpan.i
|
||||
) {
|
||||
const style = (parser.flavorBits & parser.BITFlavorException) !== 0
|
||||
? 'tag'
|
||||
: 'def';
|
||||
stream.pos += parser.slices[parserSlot+2];
|
||||
parserSlot += 3;
|
||||
return `${style} strong`;
|
||||
}
|
||||
if ( parserSlot >= parser.patternSpan.i ) {
|
||||
stream.skipToEnd();
|
||||
return 'variable';
|
||||
}
|
||||
if ( parserSlot >= parser.patternSpan.i ) {
|
||||
stream.skipToEnd();
|
||||
return '';
|
||||
return 'variable';
|
||||
}
|
||||
stream.skipToEnd();
|
||||
return '';
|
||||
};
|
||||
|
||||
const colorNetSpan = function(stream) {
|
||||
if ( parserSlot < parser.exceptionSpan.i ) {
|
||||
stream.pos += parser.slices[parserSlot+2];
|
||||
parserSlot += 3;
|
||||
|
@ -165,6 +146,37 @@ CodeMirror.defineMode("ubo-static-filtering", function() {
|
|||
return '';
|
||||
};
|
||||
|
||||
const colorSpan = function(stream) {
|
||||
if ( parser.category === parser.CATNone || parser.shouldIgnore() ) {
|
||||
stream.skipToEnd();
|
||||
return 'comment';
|
||||
}
|
||||
if ( parser.category === parser.CATComment ) {
|
||||
stream.skipToEnd();
|
||||
return reDirective.test(stream.string)
|
||||
? 'variable strong'
|
||||
: 'comment';
|
||||
}
|
||||
if ( (parser.slices[parserSlot] & parser.BITIgnore) !== 0 ) {
|
||||
stream.pos += parser.slices[parserSlot+2];
|
||||
parserSlot += 3;
|
||||
return 'comment';
|
||||
}
|
||||
if ( (parser.slices[parserSlot] & parser.BITError) !== 0 ) {
|
||||
stream.pos += parser.slices[parserSlot+2];
|
||||
parserSlot += 3;
|
||||
return 'error';
|
||||
}
|
||||
if ( parser.category === parser.CATStaticExtFilter ) {
|
||||
return colorExtSpan(stream);
|
||||
}
|
||||
if ( parser.category === parser.CATStaticNetFilter ) {
|
||||
return colorNetSpan(stream);
|
||||
}
|
||||
stream.skipToEnd();
|
||||
return null;
|
||||
};
|
||||
|
||||
return {
|
||||
token: function(stream) {
|
||||
if ( stream.sol() ) {
|
||||
|
|
|
@ -205,13 +205,7 @@ const FilterContainer = function() {
|
|||
this.specificFilters = new µb.staticExtFilteringEngine.HostnameBasedDB(2);
|
||||
|
||||
// temporary filters
|
||||
this.sessionFilterDB = new (
|
||||
class extends µb.staticExtFilteringEngine.SessionDB {
|
||||
compile(s) {
|
||||
return µb.staticExtFilteringEngine.compileSelector(s);
|
||||
}
|
||||
}
|
||||
)();
|
||||
this.sessionFilterDB = new µb.staticExtFilteringEngine.SessionDB();
|
||||
|
||||
// low generic cosmetic filters, organized by id/class then simple/complex.
|
||||
this.lowlyGeneric = Object.create(null);
|
||||
|
@ -351,14 +345,12 @@ FilterContainer.prototype.keyFromSelector = function(selector) {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.compile = function(parsed, writer) {
|
||||
FilterContainer.prototype.compile = function(parser, writer) {
|
||||
// 1000 = cosmetic filtering
|
||||
writer.select(1000);
|
||||
|
||||
const hostnames = parsed.hostnames;
|
||||
let i = hostnames.length;
|
||||
if ( i === 0 ) {
|
||||
this.compileGenericSelector(parsed, writer);
|
||||
if ( parser.hasOptions() === false ) {
|
||||
this.compileGenericSelector(parser, writer);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -366,15 +358,15 @@ FilterContainer.prototype.compile = function(parsed, writer) {
|
|||
// Negated hostname means the filter applies to all non-negated hostnames
|
||||
// of same filter OR globally if there is no non-negated hostnames.
|
||||
let applyGlobally = true;
|
||||
while ( i-- ) {
|
||||
const hostname = hostnames[i];
|
||||
if ( hostname.startsWith('~') === false ) {
|
||||
for ( const { hn, not, bad } of parser.extOptions() ) {
|
||||
if ( bad ) { continue; }
|
||||
if ( not === false ) {
|
||||
applyGlobally = false;
|
||||
}
|
||||
this.compileSpecificSelector(hostname, parsed, writer);
|
||||
this.compileSpecificSelector(parser, hn, not, writer);
|
||||
}
|
||||
if ( applyGlobally ) {
|
||||
this.compileGenericSelector(parsed, writer);
|
||||
this.compileGenericSelector(parser, writer);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -382,22 +374,31 @@ FilterContainer.prototype.compile = function(parsed, writer) {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.compileGenericSelector = function(parsed, writer) {
|
||||
if ( parsed.exception === false ) {
|
||||
this.compileGenericHideSelector(parsed, writer);
|
||||
FilterContainer.prototype.compileGenericSelector = function(parser, writer) {
|
||||
if ( parser.isException() ) {
|
||||
this.compileGenericUnhideSelector(parser, writer);
|
||||
} else {
|
||||
this.compileGenericUnhideSelector(parsed, writer);
|
||||
this.compileGenericHideSelector(parser, writer);
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.compileGenericHideSelector = function(
|
||||
parsed,
|
||||
parser,
|
||||
writer
|
||||
) {
|
||||
const selector = parsed.suffix;
|
||||
const type = selector.charCodeAt(0);
|
||||
const { raw, compiled, pseudoclass } = parser.result;
|
||||
if ( compiled === undefined ) {
|
||||
const who = writer.properties.get('assetKey') || '?';
|
||||
µb.logger.writeOne({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: `Invalid generic cosmetic filter in ${who}: ${raw}`
|
||||
});
|
||||
}
|
||||
|
||||
const type = compiled.charCodeAt(0);
|
||||
let key;
|
||||
|
||||
// Simple selector-based CSS rule: no need to test for whether the
|
||||
|
@ -406,21 +407,19 @@ FilterContainer.prototype.compileGenericHideSelector = function(
|
|||
// - ###ad-bigbox
|
||||
// - ##.ads-bigbox
|
||||
if ( type === 0x23 /* '#' */ ) {
|
||||
key = this.keyFromSelector(selector);
|
||||
if ( key === selector ) {
|
||||
key = this.keyFromSelector(compiled);
|
||||
if ( key === compiled ) {
|
||||
writer.push([ 0, key.slice(1) ]);
|
||||
return;
|
||||
}
|
||||
} else if ( type === 0x2E /* '.' */ ) {
|
||||
key = this.keyFromSelector(selector);
|
||||
if ( key === selector ) {
|
||||
key = this.keyFromSelector(compiled);
|
||||
if ( key === compiled ) {
|
||||
writer.push([ 2, key.slice(1) ]);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const compiled = µb.staticExtFilteringEngine.compileSelector(selector);
|
||||
|
||||
// Invalid cosmetic filter, possible reasons:
|
||||
// - Bad syntax
|
||||
// - Procedural filters (can't be generic): the compiled version of
|
||||
|
@ -431,19 +430,15 @@ FilterContainer.prototype.compileGenericHideSelector = function(
|
|||
// https://github.com/uBlockOrigin/uBlock-issues/issues/131
|
||||
// Support generic procedural filters as per advanced settings.
|
||||
// TODO: prevent double compilation.
|
||||
if (
|
||||
compiled === undefined ||
|
||||
compiled !== selector &&
|
||||
µb.staticExtFilteringEngine.compileSelector.pseudoclass === -1
|
||||
) {
|
||||
if ( compiled !== raw && pseudoclass === false ) {
|
||||
if ( µb.hiddenSettings.allowGenericProceduralFilters === true ) {
|
||||
return this.compileSpecificSelector('', parsed, writer);
|
||||
return this.compileSpecificSelector(parser, '', false, writer);
|
||||
}
|
||||
const who = writer.properties.get('assetKey') || '?';
|
||||
µb.logger.writeOne({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: `Invalid generic cosmetic filter in ${who}: ##${selector}`
|
||||
text: `Invalid generic cosmetic filter in ${who}: ##${raw}`
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
@ -455,7 +450,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(
|
|||
writer.push([
|
||||
type === 0x23 /* '#' */ ? 1 : 3,
|
||||
key.slice(1),
|
||||
selector
|
||||
compiled
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
@ -463,13 +458,13 @@ FilterContainer.prototype.compileGenericHideSelector = function(
|
|||
// https://github.com/gorhill/uBlock/issues/909
|
||||
// Anything which contains a plain id/class selector can be classified
|
||||
// as a low generic cosmetic filter.
|
||||
const matches = this.rePlainSelectorEx.exec(selector);
|
||||
const matches = this.rePlainSelectorEx.exec(compiled);
|
||||
if ( matches !== null ) {
|
||||
const key = matches[1] || matches[2];
|
||||
writer.push([
|
||||
key.charCodeAt(0) === 0x23 /* '#' */ ? 1 : 3,
|
||||
key.slice(1),
|
||||
selector
|
||||
compiled
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
@ -479,27 +474,27 @@ FilterContainer.prototype.compileGenericHideSelector = function(
|
|||
// For efficiency purpose, we will distinguish between simple and complex
|
||||
// selectors.
|
||||
|
||||
if ( this.reSimpleHighGeneric.test(selector) ) {
|
||||
writer.push([ 4 /* simple */, selector ]);
|
||||
if ( this.reSimpleHighGeneric.test(compiled) ) {
|
||||
writer.push([ 4 /* simple */, compiled ]);
|
||||
} else {
|
||||
writer.push([ 5 /* complex */, selector ]);
|
||||
writer.push([ 5 /* complex */, compiled ]);
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.compileGenericUnhideSelector = function(
|
||||
parsed,
|
||||
parser,
|
||||
writer
|
||||
) {
|
||||
// Procedural cosmetic filters are acceptable as generic exception filters.
|
||||
const compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
|
||||
const { raw, compiled } = parser.result;
|
||||
if ( compiled === undefined ) {
|
||||
const who = writer.properties.get('assetKey') || '?';
|
||||
µb.logger.writeOne({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: `Invalid cosmetic filter in ${who}: #@#${parsed.suffix}`
|
||||
text: `Invalid cosmetic filter in ${who}: #@#${raw}`
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
@ -516,28 +511,25 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(
|
|||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.compileSpecificSelector = function(
|
||||
parser,
|
||||
hostname,
|
||||
parsed,
|
||||
not,
|
||||
writer
|
||||
) {
|
||||
// https://github.com/chrisaljoudi/uBlock/issues/145
|
||||
let unhide = parsed.exception ? 1 : 0;
|
||||
if ( hostname.startsWith('~') ) {
|
||||
hostname = hostname.slice(1);
|
||||
unhide ^= 1;
|
||||
}
|
||||
|
||||
const compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
|
||||
const { raw, compiled, exception } = parser.result;
|
||||
if ( compiled === undefined ) {
|
||||
const who = writer.properties.get('assetKey') || '?';
|
||||
µb.logger.writeOne({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: `Invalid cosmetic filter in ${who}: ##${parsed.suffix}`
|
||||
text: `Invalid cosmetic filter in ${who}: ##${raw}`
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// https://github.com/chrisaljoudi/uBlock/issues/145
|
||||
let unhide = exception ? 1 : 0;
|
||||
if ( not ) { unhide ^= 1; }
|
||||
|
||||
let kind = 0;
|
||||
if ( unhide === 1 ) {
|
||||
|
|
|
@ -29,13 +29,7 @@
|
|||
const duplicates = new Set();
|
||||
|
||||
const filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(2);
|
||||
const sessionFilterDB = new (
|
||||
class extends µb.staticExtFilteringEngine.SessionDB {
|
||||
compile(s) {
|
||||
return µb.staticExtFilteringEngine.compileSelector(s.slice(1));
|
||||
}
|
||||
}
|
||||
)();
|
||||
const sessionFilterDB = new µb.staticExtFilteringEngine.SessionDB();
|
||||
|
||||
let acceptedCount = 0;
|
||||
let discardedCount = 0;
|
||||
|
@ -298,15 +292,14 @@
|
|||
filterDB.collectGarbage();
|
||||
};
|
||||
|
||||
api.compile = function(parsed, writer) {
|
||||
const selector = parsed.suffix.slice(1).trim();
|
||||
const compiled = µb.staticExtFilteringEngine.compileSelector(selector);
|
||||
api.compile = function(parser, writer) {
|
||||
const { raw, compiled, exception } = parser.result;
|
||||
if ( compiled === undefined ) {
|
||||
const who = writer.properties.get('assetKey') || '?';
|
||||
µb.logger.writeOne({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: `Invalid HTML filter in ${who}: ##${selector}`
|
||||
text: `Invalid HTML filter in ${who}: ##${raw}`
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
@ -316,10 +309,9 @@
|
|||
|
||||
// TODO: Mind negated hostnames, they are currently discarded.
|
||||
|
||||
for ( const hn of parsed.hostnames ) {
|
||||
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
|
||||
for ( const { hn } of parser.extOptions() ) {
|
||||
let kind = 0;
|
||||
if ( parsed.exception ) {
|
||||
if ( exception ) {
|
||||
kind |= 0b01;
|
||||
}
|
||||
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
|
||||
|
|
|
@ -750,11 +750,14 @@ const onMessage = function(request, sender, callback) {
|
|||
let response;
|
||||
|
||||
switch ( request.what ) {
|
||||
case 'compileCosmeticFilterSelector':
|
||||
response = µb.staticExtFilteringEngine.compileSelector(
|
||||
request.selector
|
||||
);
|
||||
case 'compileCosmeticFilterSelector': {
|
||||
const parser = new vAPI.StaticFilteringParser();
|
||||
parser.analyze(request.selector);
|
||||
if ( (parser.flavorBits & parser.BITFlavorExtCosmetic) !== 0 ) {
|
||||
response = parser.result.compiled;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/3497
|
||||
// This needs to be removed once issue is fixed.
|
||||
|
@ -1302,20 +1305,19 @@ const getURLFilteringData = function(details) {
|
|||
};
|
||||
|
||||
const compileTemporaryException = function(filter) {
|
||||
const match = /#@?#/.exec(filter);
|
||||
if ( match === null ) { return; }
|
||||
let selector = filter.slice(match.index + match[0].length).trim();
|
||||
const parser = new vAPI.StaticFilteringParser();
|
||||
parser.analyze(filter);
|
||||
if ( parser.shouldDiscard() ) { return {}; }
|
||||
let selector = parser.result.compiled;
|
||||
let session;
|
||||
if ( selector.startsWith('+js') ) {
|
||||
if ( (parser.flavorBits & parser.BITFlavorExtScriptlet) !== 0 ) {
|
||||
session = µb.scriptletFilteringEngine.getSession();
|
||||
} else if ( (parser.flavorBits & parser.BITFlavorExtHTML) !== 0 ) {
|
||||
session = µb.htmlFilteringEngine.getSession();
|
||||
} else {
|
||||
if ( selector.startsWith('^') ) {
|
||||
session = µb.htmlFilteringEngine.getSession();
|
||||
} else {
|
||||
session = µb.cosmeticFilteringEngine.getSession();
|
||||
}
|
||||
session = µb.cosmeticFilteringEngine.getSession();
|
||||
}
|
||||
return { session, selector: session.compile(selector) };
|
||||
return { session, selector };
|
||||
};
|
||||
|
||||
const toggleTemporaryException = function(details) {
|
||||
|
|
|
@ -30,13 +30,7 @@
|
|||
const reEscapeScriptArg = /[\\'"]/g;
|
||||
|
||||
const scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(1);
|
||||
const sessionScriptletDB = new (
|
||||
class extends µb.staticExtFilteringEngine.SessionDB {
|
||||
compile(s) {
|
||||
return s.slice(4, -1).trim();
|
||||
}
|
||||
}
|
||||
)();
|
||||
const sessionScriptletDB = new µb.staticExtFilteringEngine.SessionDB();
|
||||
|
||||
let acceptedCount = 0;
|
||||
let discardedCount = 0;
|
||||
|
@ -177,6 +171,7 @@
|
|||
};
|
||||
})();
|
||||
|
||||
// TODO: Probably should move this into StaticFilteringParser
|
||||
const normalizeRawFilter = function(rawFilter) {
|
||||
let rawToken = rawFilter.slice(4, -1);
|
||||
let rawEnd = rawToken.length;
|
||||
|
@ -288,20 +283,19 @@
|
|||
scriptletDB.collectGarbage();
|
||||
};
|
||||
|
||||
api.compile = function(parsed, writer) {
|
||||
api.compile = function(parser, writer) {
|
||||
// 1001 = scriptlet injection
|
||||
writer.select(1001);
|
||||
|
||||
// Only exception filters are allowed to be global.
|
||||
const normalized = normalizeRawFilter(parsed.suffix);
|
||||
const { raw, exception } = parser.result;
|
||||
const normalized = normalizeRawFilter(raw);
|
||||
|
||||
// Tokenless is meaningful only for exception filters.
|
||||
if ( normalized === '+js()' && parsed.exception === false ) {
|
||||
return;
|
||||
}
|
||||
if ( normalized === '+js()' && exception === false ) { return; }
|
||||
|
||||
if ( parsed.hostnames.length === 0 ) {
|
||||
if ( parsed.exception ) {
|
||||
if ( parser.hasOptions() === false ) {
|
||||
if ( exception ) {
|
||||
writer.push([ 32, '', 1, normalized ]);
|
||||
}
|
||||
return;
|
||||
|
@ -311,16 +305,12 @@
|
|||
// Ignore instances of exception filter with negated hostnames,
|
||||
// because there is no way to create an exception to an exception.
|
||||
|
||||
for ( let hn of parsed.hostnames ) {
|
||||
const negated = hn.charCodeAt(0) === 0x7E /* '~' */;
|
||||
if ( negated ) {
|
||||
hn = hn.slice(1);
|
||||
}
|
||||
for ( const { hn, not } of parser.extOptions() ) {
|
||||
let kind = 0;
|
||||
if ( parsed.exception ) {
|
||||
if ( negated ) { continue; }
|
||||
if ( exception ) {
|
||||
if ( not ) { continue; }
|
||||
kind |= 1;
|
||||
} else if ( negated ) {
|
||||
} else if ( not ) {
|
||||
kind |= 1;
|
||||
}
|
||||
writer.push([ 32, hn, kind, normalized ]);
|
||||
|
|
|
@ -778,8 +778,7 @@ const filterToDOMInterface = (( ) => {
|
|||
callback(lastResultset);
|
||||
return;
|
||||
}
|
||||
const selector = filter.slice(2);
|
||||
lastResultset = fromPlainCosmeticFilter(selector);
|
||||
lastResultset = fromPlainCosmeticFilter(filter.slice(2));
|
||||
if ( lastResultset ) {
|
||||
if ( previewing ) { apply(); }
|
||||
callback(lastResultset);
|
||||
|
@ -788,7 +787,7 @@ const filterToDOMInterface = (( ) => {
|
|||
// Procedural cosmetic filter
|
||||
const response = await vAPI.messaging.send('elementPicker', {
|
||||
what: 'compileCosmeticFilterSelector',
|
||||
selector,
|
||||
selector: filter,
|
||||
});
|
||||
lastResultset = fromCompiledCosmeticFilter(response);
|
||||
if ( previewing ) { apply(); }
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
Home: https://github.com/gorhill/uBlock
|
||||
*/
|
||||
|
||||
/* global punycode */
|
||||
|
||||
'use strict';
|
||||
|
||||
/*******************************************************************************
|
||||
|
@ -52,517 +50,6 @@
|
|||
|
||||
µBlock.staticExtFilteringEngine = (( ) => {
|
||||
const µb = µBlock;
|
||||
const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/;
|
||||
const emptyArray = [];
|
||||
const parsed = {
|
||||
exception: false,
|
||||
hostnames: [],
|
||||
suffix: ''
|
||||
};
|
||||
|
||||
// To be called to ensure no big parent string of a string slice is
|
||||
// left into memory after parsing filter lists is over.
|
||||
const resetParsed = function() {
|
||||
parsed.hostnames = [];
|
||||
parsed.suffix = '';
|
||||
};
|
||||
|
||||
const cssPseudoSelector = (( ) => {
|
||||
const rePseudo = /:(?::?after|:?before|:[a-z][a-z-]*[a-z])$/;
|
||||
return function(s) {
|
||||
if ( s.lastIndexOf(':') === -1 ) { return -1; }
|
||||
const match = rePseudo.exec(s);
|
||||
return match !== null ? match.index : -1;
|
||||
};
|
||||
})();
|
||||
|
||||
// Return value:
|
||||
// 0b00 (0) = not a valid CSS selector
|
||||
// 0b01 (1) = valid CSS selector, without pseudo-element
|
||||
// 0b11 (3) = valid CSS selector, with pseudo element
|
||||
const cssSelectorType = (( ) => {
|
||||
const div = document.createElement('div');
|
||||
// Keep in mind:
|
||||
// https://github.com/gorhill/uBlock/issues/693
|
||||
// https://github.com/gorhill/uBlock/issues/1955
|
||||
// https://github.com/gorhill/uBlock/issues/3111
|
||||
// Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817
|
||||
// is fixed.
|
||||
let matchFn;
|
||||
try {
|
||||
div.matches(':scope');
|
||||
matchFn = div.matches.bind(div);
|
||||
} catch (ex) {
|
||||
matchFn = div.querySelector.bind(div);
|
||||
}
|
||||
// Quick regex-based validation -- most cosmetic filters are of the
|
||||
// simple form and in such case a regex is much faster.
|
||||
const reSimple = /^[#.][A-Za-z_][\w-]*$/;
|
||||
return s => {
|
||||
if ( reSimple.test(s) ) { return 1; }
|
||||
const pos = cssPseudoSelector(s);
|
||||
if ( pos !== -1 ) {
|
||||
return cssSelectorType(s.slice(0, pos)) === 1 ? 3 : 0;
|
||||
}
|
||||
try {
|
||||
matchFn(`${s}, ${s}:not(#foo)`);
|
||||
} catch (ex) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
};
|
||||
})();
|
||||
|
||||
const isBadRegex = function(s) {
|
||||
try {
|
||||
void new RegExp(s);
|
||||
} catch (ex) {
|
||||
isBadRegex.message = ex.toString();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const translateAdguardCSSInjectionFilter = function(suffix) {
|
||||
const matches = /^([^{]+)\{([^}]+)\}\s*$/.exec(suffix);
|
||||
if ( matches === null ) { return ''; }
|
||||
const selector = matches[1].trim();
|
||||
const style = matches[2].trim();
|
||||
// Special style directive `remove: true` is converted into a
|
||||
// `:remove()` operator.
|
||||
if ( /^\s*remove:\s*true[; ]*$/.test(style) ) {
|
||||
return `${selector}:remove()`;
|
||||
}
|
||||
// For some reasons, many of Adguard's plain cosmetic filters are
|
||||
// "disguised" as style-based cosmetic filters: convert such filters
|
||||
// to plain cosmetic filters.
|
||||
return /display\s*:\s*none\s*!important;?$/.test(style)
|
||||
? selector
|
||||
: `${selector}:style(${style})`;
|
||||
};
|
||||
|
||||
const hostnamesFromPrefix = function(parser) {
|
||||
const hostnames = [];
|
||||
const hasUnicode = parser.optionHasUnicode();
|
||||
for ( let { hn, not } of parser.options() ) {
|
||||
hn = hn.trim();
|
||||
if ( hn.length === 0 ) { continue; }
|
||||
if ( hasUnicode ) {
|
||||
hn = punycode.toASCII(hn);
|
||||
}
|
||||
hostnames.push(not ? `~${hn}` : hn);
|
||||
}
|
||||
return hostnames;
|
||||
};
|
||||
|
||||
const compileProceduralSelector = (( ) => {
|
||||
const reProceduralOperator = new RegExp([
|
||||
'^(?:',
|
||||
[
|
||||
'-abp-contains',
|
||||
'-abp-has',
|
||||
'contains',
|
||||
'has',
|
||||
'has-text',
|
||||
'if',
|
||||
'if-not',
|
||||
'matches-css',
|
||||
'matches-css-after',
|
||||
'matches-css-before',
|
||||
'min-text-length',
|
||||
'not',
|
||||
'nth-ancestor',
|
||||
'remove',
|
||||
'style',
|
||||
'upward',
|
||||
'watch-attr',
|
||||
'watch-attrs',
|
||||
'xpath'
|
||||
].join('|'),
|
||||
')\\('
|
||||
].join(''));
|
||||
|
||||
const reEatBackslashes = /\\([()])/g;
|
||||
const reEscapeRegex = /[.*+?^${}()|[\]\\]/g;
|
||||
const reNeedScope = /^\s*>/;
|
||||
const reIsDanglingSelector = /[+>~\s]\s*$/;
|
||||
const reIsSiblingSelector = /^\s*[+~]/;
|
||||
|
||||
const regexToRawValue = new Map();
|
||||
let lastProceduralSelector = '',
|
||||
lastProceduralSelectorCompiled;
|
||||
|
||||
// When dealing with literal text, we must first eat _some_
|
||||
// backslash characters.
|
||||
const compileText = function(s) {
|
||||
const match = reParseRegexLiteral.exec(s);
|
||||
let regexDetails;
|
||||
if ( match !== null ) {
|
||||
regexDetails = match[1];
|
||||
if ( isBadRegex(regexDetails) ) { return; }
|
||||
if ( match[2] ) {
|
||||
regexDetails = [ regexDetails, match[2] ];
|
||||
}
|
||||
} else {
|
||||
regexDetails = s.replace(reEatBackslashes, '$1')
|
||||
.replace(reEscapeRegex, '\\$&');
|
||||
regexToRawValue.set(regexDetails, s);
|
||||
}
|
||||
return regexDetails;
|
||||
};
|
||||
|
||||
const compileCSSDeclaration = function(s) {
|
||||
const pos = s.indexOf(':');
|
||||
if ( pos === -1 ) { return; }
|
||||
const name = s.slice(0, pos).trim();
|
||||
const value = s.slice(pos + 1).trim();
|
||||
const match = reParseRegexLiteral.exec(value);
|
||||
let regexDetails;
|
||||
if ( match !== null ) {
|
||||
regexDetails = match[1];
|
||||
if ( isBadRegex(regexDetails) ) { return; }
|
||||
if ( match[2] ) {
|
||||
regexDetails = [ regexDetails, match[2] ];
|
||||
}
|
||||
} else {
|
||||
regexDetails = '^' + value.replace(reEscapeRegex, '\\$&') + '$';
|
||||
regexToRawValue.set(regexDetails, value);
|
||||
}
|
||||
return { name: name, value: regexDetails };
|
||||
};
|
||||
|
||||
const compileConditionalSelector = function(s) {
|
||||
// https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
|
||||
// Prepend `:scope ` if needed.
|
||||
if ( reNeedScope.test(s) ) {
|
||||
s = `:scope ${s}`;
|
||||
}
|
||||
return compile(s);
|
||||
};
|
||||
|
||||
const compileInteger = function(s, min = 0, max = 0x7FFFFFFF) {
|
||||
if ( /^\d+$/.test(s) === false ) { return; }
|
||||
const n = parseInt(s, 10);
|
||||
if ( n < min || n >= max ) { return; }
|
||||
return n;
|
||||
};
|
||||
|
||||
const compileNotSelector = function(s) {
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
|
||||
// Reject instances of :not() filters for which the argument is
|
||||
// a valid CSS selector, otherwise we would be adversely
|
||||
// changing the behavior of CSS4's :not().
|
||||
if ( cssSelectorType(s) === 0 ) {
|
||||
return compileConditionalSelector(s);
|
||||
}
|
||||
};
|
||||
|
||||
const compileUpwardArgument = function(s) {
|
||||
const i = compileInteger(s, 1, 256);
|
||||
if ( i !== undefined ) { return i; }
|
||||
if ( cssSelectorType(s) === 1 ) { return s; }
|
||||
};
|
||||
|
||||
const compileRemoveSelector = function(s) {
|
||||
if ( s === '' ) { return s; }
|
||||
};
|
||||
|
||||
const compileSpathExpression = function(s) {
|
||||
if ( cssSelectorType('*' + s) === 1 ) {
|
||||
return s;
|
||||
}
|
||||
};
|
||||
|
||||
const compileStyleProperties = (( ) => {
|
||||
let div;
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/668
|
||||
return function(s) {
|
||||
if ( /url\(|\\/i.test(s) ) { return; }
|
||||
if ( div === undefined ) {
|
||||
div = document.createElement('div');
|
||||
}
|
||||
div.style.cssText = s;
|
||||
if ( div.style.cssText === '' ) { return; }
|
||||
div.style.cssText = '';
|
||||
return s;
|
||||
};
|
||||
})();
|
||||
|
||||
const compileAttrList = function(s) {
|
||||
const attrs = s.split('\s*,\s*');
|
||||
const out = [];
|
||||
for ( const attr of attrs ) {
|
||||
if ( attr !== '' ) {
|
||||
out.push(attr);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
};
|
||||
|
||||
const compileXpathExpression = function(s) {
|
||||
try {
|
||||
document.createExpression(s, null);
|
||||
} catch (e) {
|
||||
return;
|
||||
}
|
||||
return s;
|
||||
};
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/2793
|
||||
const normalizedOperators = new Map([
|
||||
[ ':-abp-contains', ':has-text' ],
|
||||
[ ':-abp-has', ':has' ],
|
||||
[ ':contains', ':has-text' ],
|
||||
[ ':nth-ancestor', ':upward' ],
|
||||
[ ':watch-attrs', ':watch-attr' ],
|
||||
]);
|
||||
|
||||
const compileArgument = new Map([
|
||||
[ ':has', compileConditionalSelector ],
|
||||
[ ':has-text', compileText ],
|
||||
[ ':if', compileConditionalSelector ],
|
||||
[ ':if-not', compileConditionalSelector ],
|
||||
[ ':matches-css', compileCSSDeclaration ],
|
||||
[ ':matches-css-after', compileCSSDeclaration ],
|
||||
[ ':matches-css-before', compileCSSDeclaration ],
|
||||
[ ':min-text-length', compileInteger ],
|
||||
[ ':not', compileNotSelector ],
|
||||
[ ':remove', compileRemoveSelector ],
|
||||
[ ':spath', compileSpathExpression ],
|
||||
[ ':style', compileStyleProperties ],
|
||||
[ ':upward', compileUpwardArgument ],
|
||||
[ ':watch-attr', compileAttrList ],
|
||||
[ ':xpath', compileXpathExpression ],
|
||||
]);
|
||||
|
||||
const actionOperators = new Set([
|
||||
':remove',
|
||||
':style',
|
||||
]);
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387
|
||||
// Normalize (somewhat) the stringified version of procedural
|
||||
// cosmetic filters -- this increase the likelihood of detecting
|
||||
// duplicates given that uBO is able to understand syntax specific
|
||||
// to other blockers.
|
||||
// The normalized string version is what is reported in the logger,
|
||||
// by design.
|
||||
const decompile = function(compiled) {
|
||||
const tasks = compiled.tasks;
|
||||
if ( Array.isArray(tasks) === false ) {
|
||||
return compiled.selector;
|
||||
}
|
||||
const raw = [ compiled.selector ];
|
||||
let value;
|
||||
for ( const task of tasks ) {
|
||||
switch ( task[0] ) {
|
||||
case ':has':
|
||||
case ':if':
|
||||
raw.push(`:has(${decompile(task[1])})`);
|
||||
break;
|
||||
case ':has-text':
|
||||
if ( Array.isArray(task[1]) ) {
|
||||
value = `/${task[1][0]}/${task[1][1]}`;
|
||||
} else {
|
||||
value = regexToRawValue.get(task[1]);
|
||||
if ( value === undefined ) {
|
||||
value = `/${task[1]}/`;
|
||||
}
|
||||
}
|
||||
raw.push(`:has-text(${value})`);
|
||||
break;
|
||||
case ':matches-css':
|
||||
case ':matches-css-after':
|
||||
case ':matches-css-before':
|
||||
if ( Array.isArray(task[1].value) ) {
|
||||
value = `/${task[1].value[0]}/${task[1].value[1]}`;
|
||||
} else {
|
||||
value = regexToRawValue.get(task[1].value);
|
||||
if ( value === undefined ) {
|
||||
value = `/${task[1].value}/`;
|
||||
}
|
||||
}
|
||||
raw.push(`${task[0]}(${task[1].name}: ${value})`);
|
||||
break;
|
||||
case ':not':
|
||||
case ':if-not':
|
||||
raw.push(`:not(${decompile(task[1])})`);
|
||||
break;
|
||||
case ':spath':
|
||||
raw.push(task[1]);
|
||||
break;
|
||||
case ':min-text-length':
|
||||
case ':remove':
|
||||
case ':style':
|
||||
case ':upward':
|
||||
case ':watch-attr':
|
||||
case ':xpath':
|
||||
raw.push(`${task[0]}(${task[1]})`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return raw.join('');
|
||||
};
|
||||
|
||||
const compile = function(raw, root = false) {
|
||||
if ( raw === '' ) { return; }
|
||||
|
||||
const tasks = [];
|
||||
const n = raw.length;
|
||||
let prefix = '';
|
||||
let i = 0;
|
||||
let opPrefixBeg = 0;
|
||||
let action;
|
||||
|
||||
for (;;) {
|
||||
let c, match;
|
||||
// Advance to next operator.
|
||||
while ( i < n ) {
|
||||
c = raw.charCodeAt(i++);
|
||||
if ( c === 0x3A /* ':' */ ) {
|
||||
match = reProceduralOperator.exec(raw.slice(i));
|
||||
if ( match !== null ) { break; }
|
||||
}
|
||||
}
|
||||
if ( i === n ) { break; }
|
||||
const opNameBeg = i - 1;
|
||||
const opNameEnd = i + match[0].length - 1;
|
||||
i += match[0].length;
|
||||
// Find end of argument: first balanced closing parenthesis.
|
||||
// Note: unbalanced parenthesis can be used in a regex literal
|
||||
// when they are escaped using `\`.
|
||||
// TODO: need to handle quoted parentheses.
|
||||
let pcnt = 1;
|
||||
while ( i < n ) {
|
||||
c = raw.charCodeAt(i++);
|
||||
if ( c === 0x5C /* '\\' */ ) {
|
||||
if ( i < n ) { i += 1; }
|
||||
} else if ( c === 0x28 /* '(' */ ) {
|
||||
pcnt +=1 ;
|
||||
} else if ( c === 0x29 /* ')' */ ) {
|
||||
pcnt -= 1;
|
||||
if ( pcnt === 0 ) { break; }
|
||||
}
|
||||
}
|
||||
// Unbalanced parenthesis? An unbalanced parenthesis is fine
|
||||
// as long as the last character is a closing parenthesis.
|
||||
if ( pcnt !== 0 && c !== 0x29 ) { return; }
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
|
||||
// Maybe that one operator is a valid CSS selector and if so,
|
||||
// then consider it to be part of the prefix.
|
||||
if ( cssSelectorType(raw.slice(opNameBeg, i)) === 1 ) {
|
||||
continue;
|
||||
}
|
||||
// Extract and remember operator details.
|
||||
let operator = raw.slice(opNameBeg, opNameEnd);
|
||||
operator = normalizedOperators.get(operator) || operator;
|
||||
// Action operator can only be used as trailing operator in the
|
||||
// root task list.
|
||||
// Per-operator arguments validation
|
||||
const args = compileArgument.get(operator)(
|
||||
raw.slice(opNameEnd + 1, i - 1)
|
||||
);
|
||||
if ( args === undefined ) { return; }
|
||||
if ( opPrefixBeg === 0 ) {
|
||||
prefix = raw.slice(0, opNameBeg);
|
||||
} else if ( opNameBeg !== opPrefixBeg ) {
|
||||
if ( action !== undefined ) { return; }
|
||||
const spath = compileSpathExpression(
|
||||
raw.slice(opPrefixBeg, opNameBeg)
|
||||
);
|
||||
if ( spath === undefined ) { return; }
|
||||
tasks.push([ ':spath', spath ]);
|
||||
}
|
||||
if ( action !== undefined ) { return; }
|
||||
tasks.push([ operator, args ]);
|
||||
if ( actionOperators.has(operator) ) {
|
||||
if ( root === false ) { return; }
|
||||
action = operator.slice(1);
|
||||
}
|
||||
opPrefixBeg = i;
|
||||
if ( i === n ) { break; }
|
||||
}
|
||||
|
||||
// No task found: then we have a CSS selector.
|
||||
// At least one task found: nothing should be left to parse.
|
||||
if ( tasks.length === 0 ) {
|
||||
prefix = raw;
|
||||
} else if ( opPrefixBeg < n ) {
|
||||
if ( action !== undefined ) { return; }
|
||||
const spath = compileSpathExpression(raw.slice(opPrefixBeg));
|
||||
if ( spath === undefined ) { return; }
|
||||
tasks.push([ ':spath', spath ]);
|
||||
}
|
||||
|
||||
// https://github.com/NanoAdblocker/NanoCore/issues/1#issuecomment-354394894
|
||||
// https://www.reddit.com/r/uBlockOrigin/comments/c6iem5/
|
||||
// Convert sibling-selector prefix into :spath operator, but
|
||||
// only if context is not the root.
|
||||
if ( prefix !== '' ) {
|
||||
if ( reIsDanglingSelector.test(prefix) ) { prefix += '*'; }
|
||||
if ( cssSelectorType(prefix) === 0 ) {
|
||||
if (
|
||||
root ||
|
||||
reIsSiblingSelector.test(prefix) === false ||
|
||||
compileSpathExpression(prefix) === undefined
|
||||
) {
|
||||
return;
|
||||
}
|
||||
tasks.unshift([ ':spath', prefix ]);
|
||||
prefix = '';
|
||||
}
|
||||
}
|
||||
|
||||
const out = { selector: prefix };
|
||||
|
||||
if ( tasks.length !== 0 ) {
|
||||
out.tasks = tasks;
|
||||
}
|
||||
|
||||
// Expose action to take in root descriptor.
|
||||
//
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/961
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/382
|
||||
// For the time being, `style` action can't be used in a
|
||||
// procedural selector.
|
||||
if ( action !== undefined ) {
|
||||
if ( tasks.length > 1 && action === 'style' ) { return; }
|
||||
out.action = action;
|
||||
}
|
||||
|
||||
// Pseudo-selectors are valid only when used in a root task list.
|
||||
if ( prefix !== '' ) {
|
||||
const pos = cssPseudoSelector(prefix);
|
||||
if ( pos !== -1 ) {
|
||||
if ( root === false ) { return; }
|
||||
out.pseudo = pos;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
};
|
||||
|
||||
const entryPoint = function(raw) {
|
||||
if ( raw === lastProceduralSelector ) {
|
||||
return lastProceduralSelectorCompiled;
|
||||
}
|
||||
lastProceduralSelector = raw;
|
||||
let compiled = compile(raw, true);
|
||||
if ( compiled !== undefined ) {
|
||||
compiled.raw = decompile(compiled);
|
||||
}
|
||||
lastProceduralSelectorCompiled = compiled;
|
||||
return compiled;
|
||||
};
|
||||
|
||||
entryPoint.reset = function() {
|
||||
regexToRawValue.clear();
|
||||
lastProceduralSelector = '';
|
||||
lastProceduralSelectorCompiled = undefined;
|
||||
};
|
||||
|
||||
return entryPoint;
|
||||
})();
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Public API
|
||||
|
@ -750,156 +237,40 @@
|
|||
//--------------------------------------------------------------------------
|
||||
|
||||
api.reset = function() {
|
||||
compileProceduralSelector.reset();
|
||||
µb.cosmeticFilteringEngine.reset();
|
||||
µb.scriptletFilteringEngine.reset();
|
||||
µb.htmlFilteringEngine.reset();
|
||||
resetParsed(parsed);
|
||||
};
|
||||
|
||||
api.freeze = function() {
|
||||
compileProceduralSelector.reset();
|
||||
µb.cosmeticFilteringEngine.freeze();
|
||||
µb.scriptletFilteringEngine.freeze();
|
||||
µb.htmlFilteringEngine.freeze();
|
||||
resetParsed(parsed);
|
||||
};
|
||||
|
||||
// https://github.com/chrisaljoudi/uBlock/issues/1004
|
||||
// Detect and report invalid CSS selectors.
|
||||
|
||||
// Discard new ABP's `-abp-properties` directive until it is
|
||||
// implemented (if ever). Unlikely, see:
|
||||
// https://github.com/gorhill/uBlock/issues/1752
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/2624
|
||||
// Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`.
|
||||
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/89
|
||||
// Do not discard unknown pseudo-elements.
|
||||
|
||||
api.compileSelector = (( ) => {
|
||||
const reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/;
|
||||
const reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/;
|
||||
|
||||
const normalizedExtendedSyntaxOperators = new Map([
|
||||
[ 'contains', ':has-text' ],
|
||||
[ 'has', ':has' ],
|
||||
[ 'matches-css', ':matches-css' ],
|
||||
[ 'matches-css-after', ':matches-css-after' ],
|
||||
[ 'matches-css-before', ':matches-css-before' ],
|
||||
]);
|
||||
|
||||
const entryPoint = function(raw) {
|
||||
entryPoint.pseudoclass = -1;
|
||||
|
||||
const extendedSyntax = reExtendedSyntax.test(raw);
|
||||
if ( cssSelectorType(raw) === 1 && extendedSyntax === false ) {
|
||||
return raw;
|
||||
}
|
||||
|
||||
// We rarely reach this point -- majority of selectors are plain
|
||||
// CSS selectors.
|
||||
|
||||
// Supported Adguard/ABP advanced selector syntax: will translate
|
||||
// into uBO's syntax before further processing.
|
||||
// Mind unsupported advanced selector syntax, such as ABP's
|
||||
// `-abp-properties`.
|
||||
// Note: extended selector syntax has been deprecated in ABP, in
|
||||
// favor of the procedural one (i.e. `:operator(...)`).
|
||||
// See https://issues.adblockplus.org/ticket/5287
|
||||
if ( extendedSyntax ) {
|
||||
let matches;
|
||||
while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) {
|
||||
const operator = normalizedExtendedSyntaxOperators.get(matches[1]);
|
||||
if ( operator === undefined ) { return; }
|
||||
raw = raw.slice(0, matches.index) +
|
||||
operator + '(' + matches[3] + ')' +
|
||||
raw.slice(matches.index + matches[0].length);
|
||||
}
|
||||
return entryPoint(raw);
|
||||
}
|
||||
|
||||
// Procedural selector?
|
||||
const compiled = compileProceduralSelector(raw);
|
||||
if ( compiled === undefined ) { return; }
|
||||
|
||||
if ( compiled.pseudo !== undefined ) {
|
||||
entryPoint.pseudoclass = compiled.pseudo;
|
||||
}
|
||||
|
||||
return JSON.stringify(compiled);
|
||||
};
|
||||
|
||||
entryPoint.pseudoclass = -1;
|
||||
|
||||
return entryPoint;
|
||||
})();
|
||||
|
||||
api.compile = function(parser, writer) {
|
||||
if ( parser.category !== parser.CATStaticExtFilter ) { return false; }
|
||||
|
||||
// Adguard's scriptlet injection: not supported.
|
||||
if ( (parser.flavorBits & parser.BITFlavorUnsupported) !== 0 ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Extract the selector.
|
||||
let suffix = parser.strFromSpan(parser.patternSpan);
|
||||
if ( suffix.length === 0 ) { return false; }
|
||||
parsed.suffix = suffix;
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/952
|
||||
// Find out whether we are dealing with an Adguard-specific cosmetic
|
||||
// filter, and if so, translate it if supported, or discard it if not
|
||||
// supported.
|
||||
// We have an Adguard/ABP cosmetic filter if and only if the
|
||||
// character is `$`, `%` or `?`, otherwise it's not a cosmetic
|
||||
// filter.
|
||||
// Adguard's style injection: translate to uBO's format.
|
||||
if ( (parser.flavorBits & parser.BITFlavorExtStyle) !== 0 ) {
|
||||
suffix = translateAdguardCSSInjectionFilter(suffix);
|
||||
if ( suffix === '' ) { return true; }
|
||||
parsed.suffix = suffix;
|
||||
}
|
||||
|
||||
// Exception filter?
|
||||
parsed.exception = parser.isException();
|
||||
|
||||
// Extract the hostname(s), punycode if required.
|
||||
if ( parser.hasOptions() ) {
|
||||
parsed.hostnames = hostnamesFromPrefix(parser);
|
||||
} else {
|
||||
parsed.hostnames = emptyArray;
|
||||
}
|
||||
|
||||
// Backward compatibility with deprecated syntax.
|
||||
if ( suffix.startsWith('script:') ) {
|
||||
if ( suffix.startsWith('script:inject') ) {
|
||||
suffix = parsed.suffix = '+js' + suffix.slice(13);
|
||||
} else if ( suffix.startsWith('script:contains') ) {
|
||||
suffix = parsed.suffix = '^script:has-text' + suffix.slice(15);
|
||||
}
|
||||
}
|
||||
|
||||
const c0 = suffix.charCodeAt(0);
|
||||
|
||||
// New shorter syntax for scriptlet injection engine.
|
||||
if ( c0 === 0x2B /* '+' */ && suffix.startsWith('+js') ) {
|
||||
µb.scriptletFilteringEngine.compile(parsed, writer);
|
||||
// Scriptlet injection
|
||||
if ( (parser.flavorBits & parser.BITFlavorExtScriptlet) !== 0 ) {
|
||||
µb.scriptletFilteringEngine.compile(parser, writer);
|
||||
return true;
|
||||
}
|
||||
|
||||
// HTML filtering engine.
|
||||
// HTML filtering
|
||||
// TODO: evaluate converting Adguard's `$$` syntax into uBO's HTML
|
||||
// filtering syntax.
|
||||
if ( c0 === 0x5E /* '^' */ ) {
|
||||
µb.htmlFilteringEngine.compile(parsed, writer);
|
||||
if ( (parser.flavorBits & parser.BITFlavorExtHTML) !== 0 ) {
|
||||
µb.htmlFilteringEngine.compile(parser, writer);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Cosmetic filtering engine.
|
||||
µb.cosmeticFilteringEngine.compile(parsed, writer);
|
||||
// Cosmetic filtering
|
||||
µb.cosmeticFilteringEngine.compile(parser, writer);
|
||||
return true;
|
||||
};
|
||||
|
||||
|
|
|
@ -105,6 +105,13 @@ const Parser = class {
|
|||
this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/;
|
||||
this.reHostname = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/;
|
||||
this.punycoder = new URL(self.location);
|
||||
// TODO: reuse for network filtering analysis
|
||||
this.result = {
|
||||
exception: false,
|
||||
raw: '',
|
||||
compiled: '',
|
||||
pseudoclass: false,
|
||||
};
|
||||
this.reset();
|
||||
}
|
||||
|
||||
|
@ -206,6 +213,7 @@ const Parser = class {
|
|||
this.patternSpan.i = from + 3;
|
||||
this.patternSpan.l = this.rightSpaceSpan.i - this.patternSpan.i;
|
||||
this.category = CATStaticExtFilter;
|
||||
this.analyzeExtPattern();
|
||||
return;
|
||||
}
|
||||
let flavorBits = 0;
|
||||
|
@ -256,13 +264,55 @@ const Parser = class {
|
|||
this.patternSpan.l = this.rightSpaceSpan.i - to;
|
||||
this.flavorBits = flavorBits;
|
||||
this.category = CATStaticExtFilter;
|
||||
this.analyzeExtPattern();
|
||||
}
|
||||
|
||||
// Use in syntax highlighting contexts
|
||||
analyzeExtPattern() {
|
||||
this.result.exception = this.isException();
|
||||
this.result.compiled = undefined;
|
||||
this.result.pseudoclass = false;
|
||||
|
||||
let selector = this.strFromSpan(this.patternSpan);
|
||||
if ( selector === '' ) {
|
||||
this.flavorBits |= BITFlavorUnsupported;
|
||||
this.result.raw = '';
|
||||
return;
|
||||
}
|
||||
const { i } = this.patternSpan;
|
||||
// ##+js(...)
|
||||
if (
|
||||
hasBits(this.slices[i], BITPlus) &&
|
||||
selector.startsWith('+js(') && selector.endsWith(')')
|
||||
) {
|
||||
this.flavorBits |= BITFlavorExtScriptlet;
|
||||
this.result.raw = selector;
|
||||
this.result.compiled = selector.slice(4, -1);
|
||||
return;
|
||||
}
|
||||
// ##^...
|
||||
if ( hasBits(this.slices[i], BITCaret) ) {
|
||||
this.flavorBits |= BITFlavorExtHTML;
|
||||
selector = selector.slice(1);
|
||||
}
|
||||
// ##...
|
||||
else {
|
||||
this.flavorBits |= BITFlavorExtCosmetic;
|
||||
}
|
||||
this.result.raw = selector;
|
||||
if ( this.compileSelector(selector, this.result) === false ) {
|
||||
this.flavorBits |= BITFlavorUnsupported;
|
||||
}
|
||||
}
|
||||
|
||||
// Use in syntax highlighting contexts
|
||||
analyzeExtExtra() {
|
||||
const { i, l } = this.optionsSpan;
|
||||
if ( l === 0 ) { return; }
|
||||
this.analyzeDomainList(i, i + l, BITComma, true);
|
||||
if ( this.hasOptions() ) {
|
||||
const { i, l } = this.optionsSpan;
|
||||
this.analyzeDomainList(i, i + l, BITComma, 0b11);
|
||||
}
|
||||
if ( hasBits(this.flavorBits, BITFlavorUnsupported) ) {
|
||||
this.markSpan(this.patternSpan, BITError);
|
||||
}
|
||||
}
|
||||
|
||||
// Static network filters are all of the form:
|
||||
|
@ -569,13 +619,13 @@ const Parser = class {
|
|||
this.netOptionsIterator.init();
|
||||
}
|
||||
|
||||
analyzeDomainList(from, to, bitSeparator, canEntity) {
|
||||
analyzeDomainList(from, to, bitSeparator, optionBits) {
|
||||
if ( from >= to ) { return; }
|
||||
let beg = from;
|
||||
while ( beg < to ) {
|
||||
let end = this.skipUntil(beg, to, bitSeparator);
|
||||
if ( end === -1 ) { end = to; }
|
||||
if ( this.analyzeDomain(beg, end, canEntity) === false ) {
|
||||
if ( this.analyzeDomain(beg, end, optionBits) === false ) {
|
||||
this.markSlices(beg, end, BITError);
|
||||
}
|
||||
beg = end + 3;
|
||||
|
@ -586,15 +636,29 @@ const Parser = class {
|
|||
}
|
||||
}
|
||||
|
||||
analyzeDomain(from, to, canEntity) {
|
||||
// bits:
|
||||
// 0: can use entity-based hostnames
|
||||
// 1: can use single wildcard
|
||||
analyzeDomain(from, to, optionBits) {
|
||||
const { slices } = this;
|
||||
const len = to - from;
|
||||
let len = to - from;
|
||||
if ( len === 0 ) { return false; }
|
||||
if ( hasBits(slices[from], BITTilde) ) {
|
||||
if ( canEntity === false || slices[from+2] > 1 ) { return false; }
|
||||
const not = hasBits(slices[from], BITTilde);
|
||||
if ( not ) {
|
||||
if ( (optionBits & 0b01) === 0 || slices[from+2] > 1 ) { return false; }
|
||||
from += 3;
|
||||
len -= 3;
|
||||
}
|
||||
if ( len === 0 ) { return false; }
|
||||
// One slice only, check for single asterisk
|
||||
if (
|
||||
len === 3 &&
|
||||
not === false &&
|
||||
(optionBits & 0b10) !== 0 &&
|
||||
hasBits(slices[from], BITAsterisk)
|
||||
) {
|
||||
return slices[from+2] === 1;
|
||||
}
|
||||
// First slice must be regex-equivalent of `\w`
|
||||
if ( hasNoBits(slices[from], BITRegexWord | BITUnicode) ) { return false; }
|
||||
// Last slice
|
||||
|
@ -602,7 +666,7 @@ const Parser = class {
|
|||
const last = to - 3;
|
||||
if ( hasBits(slices[last], BITAsterisk) ) {
|
||||
if (
|
||||
canEntity === false ||
|
||||
(optionBits & 0b01) === 0 ||
|
||||
len < 9 ||
|
||||
slices[last+2] > 1 ||
|
||||
hasNoBits(slices[last-3], BITPeriod)
|
||||
|
@ -618,7 +682,9 @@ const Parser = class {
|
|||
for ( let i = from + 3; i < to - 3; i += 3 ) {
|
||||
const bits = slices[i];
|
||||
if ( hasNoBits(bits, BITHostname) ) { return false; }
|
||||
if ( hasBits(bits, BITPeriod) && slices[i+2] > 1 ) { return false; }
|
||||
if ( hasBits(bits, BITPeriod) && slices[i+2] > 1 ) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
hasBits(bits, BITDash) && (
|
||||
hasNoBits(slices[i-3], BITRegexWord | BITUnicode) ||
|
||||
|
@ -786,6 +852,16 @@ const Parser = class {
|
|||
return this.optionsSpan.l !== 0;
|
||||
}
|
||||
|
||||
getPattern() {
|
||||
if ( this.pattern !== '' ) { return this.pattern; }
|
||||
const { i, l } = this.patternSpan;
|
||||
if ( l === 0 ) { return ''; }
|
||||
let beg = this.slices[i+1];
|
||||
let end = this.slices[i+l+1];
|
||||
this.pattern = this.raw.slice(beg, end);
|
||||
return this.pattern;
|
||||
}
|
||||
|
||||
getNetPattern() {
|
||||
if ( this.pattern !== '' ) { return this.pattern; }
|
||||
const { i, l } = this.patternSpan;
|
||||
|
@ -909,13 +985,12 @@ const Parser = class {
|
|||
return hasBits(this.optionsBits, BITUnicode);
|
||||
}
|
||||
|
||||
options() {
|
||||
if ( this.category === CATStaticNetFilter ) {
|
||||
return this.netOptionsIterator;
|
||||
} else if ( this.category === CATStaticExtFilter ) {
|
||||
return this.extOptionsIterator;
|
||||
}
|
||||
return [];
|
||||
netOptions() {
|
||||
return this.netOptionsIterator;
|
||||
}
|
||||
|
||||
extOptions() {
|
||||
return this.extOptionsIterator;
|
||||
}
|
||||
|
||||
patternTokens() {
|
||||
|
@ -972,13 +1047,582 @@ const Parser = class {
|
|||
hasError() {
|
||||
return hasBits(this.flavorBits, BITFlavorError);
|
||||
}
|
||||
|
||||
shouldDiscard() {
|
||||
return hasBits(
|
||||
this.flavorBits,
|
||||
BITFlavorError | BITFlavorUnsupported | BITFlavorIgnore
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
// https://github.com/chrisaljoudi/uBlock/issues/1004
|
||||
// Detect and report invalid CSS selectors.
|
||||
|
||||
// Discard new ABP's `-abp-properties` directive until it is
|
||||
// implemented (if ever). Unlikely, see:
|
||||
// https://github.com/gorhill/uBlock/issues/1752
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/2624
|
||||
// Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`.
|
||||
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/89
|
||||
// Do not discard unknown pseudo-elements.
|
||||
|
||||
Parser.prototype.compileSelector = (( ) => {
|
||||
const reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/;
|
||||
const reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/;
|
||||
const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/;
|
||||
|
||||
const translateAdguardCSSInjectionFilter = function(suffix) {
|
||||
const matches = /^([^{]+)\{([^}]+)\}\s*$/.exec(suffix);
|
||||
if ( matches === null ) { return ''; }
|
||||
const selector = matches[1].trim();
|
||||
const style = matches[2].trim();
|
||||
// Special style directive `remove: true` is converted into a
|
||||
// `:remove()` operator.
|
||||
if ( /^\s*remove:\s*true[; ]*$/.test(style) ) {
|
||||
return `${selector}:remove()`;
|
||||
}
|
||||
// For some reasons, many of Adguard's plain cosmetic filters are
|
||||
// "disguised" as style-based cosmetic filters: convert such filters
|
||||
// to plain cosmetic filters.
|
||||
return /display\s*:\s*none\s*!important;?$/.test(style)
|
||||
? selector
|
||||
: `${selector}:style(${style})`;
|
||||
};
|
||||
|
||||
const normalizedExtendedSyntaxOperators = new Map([
|
||||
[ 'contains', ':has-text' ],
|
||||
[ 'has', ':has' ],
|
||||
[ 'matches-css', ':matches-css' ],
|
||||
[ 'matches-css-after', ':matches-css-after' ],
|
||||
[ 'matches-css-before', ':matches-css-before' ],
|
||||
]);
|
||||
|
||||
// Return value:
|
||||
// 0b00 (0) = not a valid CSS selector
|
||||
// 0b01 (1) = valid CSS selector, without pseudo-element
|
||||
// 0b11 (3) = valid CSS selector, with pseudo element
|
||||
const cssSelectorType = (( ) => {
|
||||
// Quick regex-based validation -- most cosmetic filters are of the
|
||||
// simple form and in such case a regex is much faster.
|
||||
const reSimple = /^[#.][A-Za-z_][\w-]*$/;
|
||||
const div = document.createElement('div');
|
||||
// Keep in mind:
|
||||
// https://github.com/gorhill/uBlock/issues/693
|
||||
// https://github.com/gorhill/uBlock/issues/1955
|
||||
// https://github.com/gorhill/uBlock/issues/3111
|
||||
// Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817
|
||||
// is fixed.
|
||||
return s => {
|
||||
if ( reSimple.test(s) ) { return 1; }
|
||||
const pos = cssPseudoSelector(s);
|
||||
if ( pos !== -1 ) {
|
||||
return cssSelectorType(s.slice(0, pos)) === 1 ? 3 : 0;
|
||||
}
|
||||
try {
|
||||
div.matches(`${s}, ${s}:not(#foo)`);
|
||||
} catch (ex) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
};
|
||||
})();
|
||||
|
||||
const cssPseudoSelector = (( ) => {
|
||||
const rePseudo = /:(?::?after|:?before|:[a-z][a-z-]*[a-z])$/;
|
||||
return function(s) {
|
||||
if ( s.lastIndexOf(':') === -1 ) { return -1; }
|
||||
const match = rePseudo.exec(s);
|
||||
return match !== null ? match.index : -1;
|
||||
};
|
||||
})();
|
||||
|
||||
const compileProceduralSelector = (( ) => {
|
||||
const reProceduralOperator = new RegExp([
|
||||
'^(?:',
|
||||
[
|
||||
'-abp-contains',
|
||||
'-abp-has',
|
||||
'contains',
|
||||
'has',
|
||||
'has-text',
|
||||
'if',
|
||||
'if-not',
|
||||
'matches-css',
|
||||
'matches-css-after',
|
||||
'matches-css-before',
|
||||
'min-text-length',
|
||||
'not',
|
||||
'nth-ancestor',
|
||||
'remove',
|
||||
'style',
|
||||
'upward',
|
||||
'watch-attr',
|
||||
'watch-attrs',
|
||||
'xpath'
|
||||
].join('|'),
|
||||
')\\('
|
||||
].join(''));
|
||||
|
||||
const reEatBackslashes = /\\([()])/g;
|
||||
const reEscapeRegex = /[.*+?^${}()|[\]\\]/g;
|
||||
const reNeedScope = /^\s*>/;
|
||||
const reIsDanglingSelector = /[+>~\s]\s*$/;
|
||||
const reIsSiblingSelector = /^\s*[+~]/;
|
||||
|
||||
const regexToRawValue = new Map();
|
||||
|
||||
const isBadRegex = function(s) {
|
||||
try {
|
||||
void new RegExp(s);
|
||||
} catch (ex) {
|
||||
isBadRegex.message = ex.toString();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// When dealing with literal text, we must first eat _some_
|
||||
// backslash characters.
|
||||
const compileText = function(s) {
|
||||
const match = reParseRegexLiteral.exec(s);
|
||||
let regexDetails;
|
||||
if ( match !== null ) {
|
||||
regexDetails = match[1];
|
||||
if ( isBadRegex(regexDetails) ) { return; }
|
||||
if ( match[2] ) {
|
||||
regexDetails = [ regexDetails, match[2] ];
|
||||
}
|
||||
} else {
|
||||
regexDetails = s.replace(reEatBackslashes, '$1')
|
||||
.replace(reEscapeRegex, '\\$&');
|
||||
regexToRawValue.set(regexDetails, s);
|
||||
}
|
||||
return regexDetails;
|
||||
};
|
||||
|
||||
const compileCSSDeclaration = function(s) {
|
||||
const pos = s.indexOf(':');
|
||||
if ( pos === -1 ) { return; }
|
||||
const name = s.slice(0, pos).trim();
|
||||
const value = s.slice(pos + 1).trim();
|
||||
const match = reParseRegexLiteral.exec(value);
|
||||
let regexDetails;
|
||||
if ( match !== null ) {
|
||||
regexDetails = match[1];
|
||||
if ( isBadRegex(regexDetails) ) { return; }
|
||||
if ( match[2] ) {
|
||||
regexDetails = [ regexDetails, match[2] ];
|
||||
}
|
||||
} else {
|
||||
regexDetails = '^' + value.replace(reEscapeRegex, '\\$&') + '$';
|
||||
regexToRawValue.set(regexDetails, value);
|
||||
}
|
||||
return { name: name, value: regexDetails };
|
||||
};
|
||||
|
||||
const compileConditionalSelector = function(s) {
|
||||
// https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
|
||||
// Prepend `:scope ` if needed.
|
||||
if ( reNeedScope.test(s) ) {
|
||||
s = `:scope ${s}`;
|
||||
}
|
||||
return compile(s);
|
||||
};
|
||||
|
||||
const compileInteger = function(s, min = 0, max = 0x7FFFFFFF) {
|
||||
if ( /^\d+$/.test(s) === false ) { return; }
|
||||
const n = parseInt(s, 10);
|
||||
if ( n < min || n >= max ) { return; }
|
||||
return n;
|
||||
};
|
||||
|
||||
const compileNotSelector = function(s) {
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
|
||||
// Reject instances of :not() filters for which the argument is
|
||||
// a valid CSS selector, otherwise we would be adversely
|
||||
// changing the behavior of CSS4's :not().
|
||||
if ( cssSelectorType(s) === 0 ) {
|
||||
return compileConditionalSelector(s);
|
||||
}
|
||||
};
|
||||
|
||||
const compileUpwardArgument = function(s) {
|
||||
const i = compileInteger(s, 1, 256);
|
||||
if ( i !== undefined ) { return i; }
|
||||
if ( cssSelectorType(s) === 1 ) { return s; }
|
||||
};
|
||||
|
||||
const compileRemoveSelector = function(s) {
|
||||
if ( s === '' ) { return s; }
|
||||
};
|
||||
|
||||
const compileSpathExpression = function(s) {
|
||||
if ( cssSelectorType('*' + s) === 1 ) {
|
||||
return s;
|
||||
}
|
||||
};
|
||||
|
||||
const compileStyleProperties = (( ) => {
|
||||
let div;
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/668
|
||||
return function(s) {
|
||||
if ( /url\(|\\/i.test(s) ) { return; }
|
||||
if ( div === undefined ) {
|
||||
div = document.createElement('div');
|
||||
}
|
||||
div.style.cssText = s;
|
||||
if ( div.style.cssText === '' ) { return; }
|
||||
div.style.cssText = '';
|
||||
return s;
|
||||
};
|
||||
})();
|
||||
|
||||
const compileAttrList = function(s) {
|
||||
const attrs = s.split('\s*,\s*');
|
||||
const out = [];
|
||||
for ( const attr of attrs ) {
|
||||
if ( attr !== '' ) {
|
||||
out.push(attr);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
};
|
||||
|
||||
const compileXpathExpression = function(s) {
|
||||
try {
|
||||
document.createExpression(s, null);
|
||||
} catch (e) {
|
||||
return;
|
||||
}
|
||||
return s;
|
||||
};
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/2793
|
||||
const normalizedOperators = new Map([
|
||||
[ ':-abp-contains', ':has-text' ],
|
||||
[ ':-abp-has', ':has' ],
|
||||
[ ':contains', ':has-text' ],
|
||||
[ ':nth-ancestor', ':upward' ],
|
||||
[ ':watch-attrs', ':watch-attr' ],
|
||||
]);
|
||||
|
||||
const compileArgument = new Map([
|
||||
[ ':has', compileConditionalSelector ],
|
||||
[ ':has-text', compileText ],
|
||||
[ ':if', compileConditionalSelector ],
|
||||
[ ':if-not', compileConditionalSelector ],
|
||||
[ ':matches-css', compileCSSDeclaration ],
|
||||
[ ':matches-css-after', compileCSSDeclaration ],
|
||||
[ ':matches-css-before', compileCSSDeclaration ],
|
||||
[ ':min-text-length', compileInteger ],
|
||||
[ ':not', compileNotSelector ],
|
||||
[ ':remove', compileRemoveSelector ],
|
||||
[ ':spath', compileSpathExpression ],
|
||||
[ ':style', compileStyleProperties ],
|
||||
[ ':upward', compileUpwardArgument ],
|
||||
[ ':watch-attr', compileAttrList ],
|
||||
[ ':xpath', compileXpathExpression ],
|
||||
]);
|
||||
|
||||
const actionOperators = new Set([
|
||||
':remove',
|
||||
':style',
|
||||
]);
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387
|
||||
// Normalize (somewhat) the stringified version of procedural
|
||||
// cosmetic filters -- this increase the likelihood of detecting
|
||||
// duplicates given that uBO is able to understand syntax specific
|
||||
// to other blockers.
|
||||
// The normalized string version is what is reported in the logger,
|
||||
// by design.
|
||||
const decompile = function(compiled) {
|
||||
const tasks = compiled.tasks;
|
||||
if ( Array.isArray(tasks) === false ) {
|
||||
return compiled.selector;
|
||||
}
|
||||
const raw = [ compiled.selector ];
|
||||
let value;
|
||||
for ( const task of tasks ) {
|
||||
switch ( task[0] ) {
|
||||
case ':has':
|
||||
case ':if':
|
||||
raw.push(`:has(${decompile(task[1])})`);
|
||||
break;
|
||||
case ':has-text':
|
||||
if ( Array.isArray(task[1]) ) {
|
||||
value = `/${task[1][0]}/${task[1][1]}`;
|
||||
} else {
|
||||
value = regexToRawValue.get(task[1]);
|
||||
if ( value === undefined ) {
|
||||
value = `/${task[1]}/`;
|
||||
}
|
||||
}
|
||||
raw.push(`:has-text(${value})`);
|
||||
break;
|
||||
case ':matches-css':
|
||||
case ':matches-css-after':
|
||||
case ':matches-css-before':
|
||||
if ( Array.isArray(task[1].value) ) {
|
||||
value = `/${task[1].value[0]}/${task[1].value[1]}`;
|
||||
} else {
|
||||
value = regexToRawValue.get(task[1].value);
|
||||
if ( value === undefined ) {
|
||||
value = `/${task[1].value}/`;
|
||||
}
|
||||
}
|
||||
raw.push(`${task[0]}(${task[1].name}: ${value})`);
|
||||
break;
|
||||
case ':not':
|
||||
case ':if-not':
|
||||
raw.push(`:not(${decompile(task[1])})`);
|
||||
break;
|
||||
case ':spath':
|
||||
raw.push(task[1]);
|
||||
break;
|
||||
case ':min-text-length':
|
||||
case ':remove':
|
||||
case ':style':
|
||||
case ':upward':
|
||||
case ':watch-attr':
|
||||
case ':xpath':
|
||||
raw.push(`${task[0]}(${task[1]})`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return raw.join('');
|
||||
};
|
||||
|
||||
const compile = function(raw, root = false) {
|
||||
if ( raw === '' ) { return; }
|
||||
|
||||
const tasks = [];
|
||||
const n = raw.length;
|
||||
let prefix = '';
|
||||
let i = 0;
|
||||
let opPrefixBeg = 0;
|
||||
let action;
|
||||
|
||||
// TODO: use slices instead of charCodeAt()
|
||||
for (;;) {
|
||||
let c, match;
|
||||
// Advance to next operator.
|
||||
while ( i < n ) {
|
||||
c = raw.charCodeAt(i++);
|
||||
if ( c === 0x3A /* ':' */ ) {
|
||||
match = reProceduralOperator.exec(raw.slice(i));
|
||||
if ( match !== null ) { break; }
|
||||
}
|
||||
}
|
||||
if ( i === n ) { break; }
|
||||
const opNameBeg = i - 1;
|
||||
const opNameEnd = i + match[0].length - 1;
|
||||
i += match[0].length;
|
||||
// Find end of argument: first balanced closing parenthesis.
|
||||
// Note: unbalanced parenthesis can be used in a regex literal
|
||||
// when they are escaped using `\`.
|
||||
// TODO: need to handle quoted parentheses.
|
||||
let pcnt = 1;
|
||||
while ( i < n ) {
|
||||
c = raw.charCodeAt(i++);
|
||||
if ( c === 0x5C /* '\\' */ ) {
|
||||
if ( i < n ) { i += 1; }
|
||||
} else if ( c === 0x28 /* '(' */ ) {
|
||||
pcnt +=1 ;
|
||||
} else if ( c === 0x29 /* ')' */ ) {
|
||||
pcnt -= 1;
|
||||
if ( pcnt === 0 ) { break; }
|
||||
}
|
||||
}
|
||||
// Unbalanced parenthesis? An unbalanced parenthesis is fine
|
||||
// as long as the last character is a closing parenthesis.
|
||||
if ( pcnt !== 0 && c !== 0x29 ) { return; }
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588
|
||||
// Maybe that one operator is a valid CSS selector and if so,
|
||||
// then consider it to be part of the prefix.
|
||||
if ( cssSelectorType(raw.slice(opNameBeg, i)) === 1 ) {
|
||||
continue;
|
||||
}
|
||||
// Extract and remember operator details.
|
||||
let operator = raw.slice(opNameBeg, opNameEnd);
|
||||
operator = normalizedOperators.get(operator) || operator;
|
||||
// Action operator can only be used as trailing operator in the
|
||||
// root task list.
|
||||
// Per-operator arguments validation
|
||||
const args = compileArgument.get(operator)(
|
||||
raw.slice(opNameEnd + 1, i - 1)
|
||||
);
|
||||
if ( args === undefined ) { return; }
|
||||
if ( opPrefixBeg === 0 ) {
|
||||
prefix = raw.slice(0, opNameBeg);
|
||||
} else if ( opNameBeg !== opPrefixBeg ) {
|
||||
if ( action !== undefined ) { return; }
|
||||
const spath = compileSpathExpression(
|
||||
raw.slice(opPrefixBeg, opNameBeg)
|
||||
);
|
||||
if ( spath === undefined ) { return; }
|
||||
tasks.push([ ':spath', spath ]);
|
||||
}
|
||||
if ( action !== undefined ) { return; }
|
||||
tasks.push([ operator, args ]);
|
||||
if ( actionOperators.has(operator) ) {
|
||||
if ( root === false ) { return; }
|
||||
action = operator.slice(1);
|
||||
}
|
||||
opPrefixBeg = i;
|
||||
if ( i === n ) { break; }
|
||||
}
|
||||
|
||||
// No task found: then we have a CSS selector.
|
||||
// At least one task found: nothing should be left to parse.
|
||||
if ( tasks.length === 0 ) {
|
||||
prefix = raw;
|
||||
} else if ( opPrefixBeg < n ) {
|
||||
if ( action !== undefined ) { return; }
|
||||
const spath = compileSpathExpression(raw.slice(opPrefixBeg));
|
||||
if ( spath === undefined ) { return; }
|
||||
tasks.push([ ':spath', spath ]);
|
||||
}
|
||||
|
||||
// https://github.com/NanoAdblocker/NanoCore/issues/1#issuecomment-354394894
|
||||
// https://www.reddit.com/r/uBlockOrigin/comments/c6iem5/
|
||||
// Convert sibling-selector prefix into :spath operator, but
|
||||
// only if context is not the root.
|
||||
if ( prefix !== '' ) {
|
||||
if ( reIsDanglingSelector.test(prefix) && tasks.length !== 0 ) {
|
||||
prefix += ' *';
|
||||
}
|
||||
if ( cssSelectorType(prefix) === 0 ) {
|
||||
if (
|
||||
root ||
|
||||
reIsSiblingSelector.test(prefix) === false ||
|
||||
compileSpathExpression(prefix) === undefined
|
||||
) {
|
||||
return;
|
||||
}
|
||||
tasks.unshift([ ':spath', prefix ]);
|
||||
prefix = '';
|
||||
}
|
||||
}
|
||||
|
||||
const out = { selector: prefix };
|
||||
|
||||
if ( tasks.length !== 0 ) {
|
||||
out.tasks = tasks;
|
||||
}
|
||||
|
||||
// Expose action to take in root descriptor.
|
||||
//
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/961
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/382
|
||||
// For the time being, `style` action can't be used in a
|
||||
// procedural selector.
|
||||
if ( action !== undefined ) {
|
||||
if ( tasks.length > 1 && action === 'style' ) { return; }
|
||||
out.action = action;
|
||||
}
|
||||
|
||||
// Pseudo-selectors are valid only when used in a root task list.
|
||||
if ( prefix !== '' ) {
|
||||
const pos = cssPseudoSelector(prefix);
|
||||
if ( pos !== -1 ) {
|
||||
if ( root === false ) { return; }
|
||||
out.pseudo = pos;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
};
|
||||
|
||||
const entryPoint = function(raw) {
|
||||
const compiled = compile(raw, true);
|
||||
if ( compiled !== undefined ) {
|
||||
compiled.raw = decompile(compiled);
|
||||
}
|
||||
return compiled;
|
||||
};
|
||||
|
||||
entryPoint.reset = function() {
|
||||
regexToRawValue.clear();
|
||||
};
|
||||
|
||||
return entryPoint;
|
||||
})();
|
||||
|
||||
const entryPoint = function(raw, out) {
|
||||
// https://github.com/gorhill/uBlock/issues/952
|
||||
// Find out whether we are dealing with an Adguard-specific cosmetic
|
||||
// filter, and if so, translate it if supported, or discard it if not
|
||||
// supported.
|
||||
// We have an Adguard/ABP cosmetic filter if and only if the
|
||||
// character is `$`, `%` or `?`, otherwise it's not a cosmetic
|
||||
// filter.
|
||||
// Adguard's style injection: translate to uBO's format.
|
||||
if ( hasBits(this.flavorBits, BITFlavorExtStyle) ) {
|
||||
raw = translateAdguardCSSInjectionFilter(raw);
|
||||
if ( raw === '' ) { return false; }
|
||||
out.raw = raw;
|
||||
}
|
||||
|
||||
let extendedSyntax = false;
|
||||
const selectorType = cssSelectorType(raw);
|
||||
if ( selectorType !== 0 ) {
|
||||
extendedSyntax = reExtendedSyntax.test(raw);
|
||||
if ( extendedSyntax === false ) {
|
||||
out.pseudoclass = selectorType === 3;
|
||||
out.compiled = raw;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// We rarely reach this point -- majority of selectors are plain
|
||||
// CSS selectors.
|
||||
|
||||
// Supported Adguard/ABP advanced selector syntax: will translate
|
||||
// into uBO's syntax before further processing.
|
||||
// Mind unsupported advanced selector syntax, such as ABP's
|
||||
// `-abp-properties`.
|
||||
// Note: extended selector syntax has been deprecated in ABP, in
|
||||
// favor of the procedural one (i.e. `:operator(...)`).
|
||||
// See https://issues.adblockplus.org/ticket/5287
|
||||
if ( extendedSyntax ) {
|
||||
let matches;
|
||||
while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) {
|
||||
const operator = normalizedExtendedSyntaxOperators.get(matches[1]);
|
||||
if ( operator === undefined ) { return false; }
|
||||
raw = raw.slice(0, matches.index) +
|
||||
operator + '(' + matches[3] + ')' +
|
||||
raw.slice(matches.index + matches[0].length);
|
||||
}
|
||||
return entryPoint.call(this, raw, out);
|
||||
}
|
||||
|
||||
// Procedural selector?
|
||||
const compiled = compileProceduralSelector(raw);
|
||||
if ( compiled === undefined ) { return false; }
|
||||
|
||||
if ( compiled.pseudo !== undefined ) {
|
||||
out.pseudoclass = compiled.pseudo;
|
||||
}
|
||||
|
||||
out.compiled = JSON.stringify(compiled);
|
||||
return true;
|
||||
};
|
||||
|
||||
return entryPoint;
|
||||
})();
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const hasNoBits = (v, bits) => (v & bits) === 0;
|
||||
const hasBits = (v, bits) => (v & bits) !== 0;
|
||||
const hasNotAllBits = (v, bits) => (v & bits) !== bits;
|
||||
//const hasAllBits = (v, bits) => (v & bits) === bits;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
|
@ -987,42 +1631,45 @@ const CATStaticExtFilter = 1;
|
|||
const CATStaticNetFilter = 2;
|
||||
const CATComment = 3;
|
||||
|
||||
const BITSpace = 1 << 0;
|
||||
const BITGlyph = 1 << 1;
|
||||
const BITExclamation = 1 << 2;
|
||||
const BITHash = 1 << 3;
|
||||
const BITDollar = 1 << 4;
|
||||
const BITPercent = 1 << 5;
|
||||
const BITParen = 1 << 6;
|
||||
const BITAsterisk = 1 << 7;
|
||||
const BITComma = 1 << 8;
|
||||
const BITDash = 1 << 9;
|
||||
const BITPeriod = 1 << 10;
|
||||
const BITSlash = 1 << 11;
|
||||
const BITNum = 1 << 12;
|
||||
const BITEqual = 1 << 13;
|
||||
const BITQuestion = 1 << 14;
|
||||
const BITAt = 1 << 15;
|
||||
const BITAlpha = 1 << 16;
|
||||
const BITUppercase = 1 << 17;
|
||||
const BITSquareBracket = 1 << 18;
|
||||
const BITBackslash = 1 << 19;
|
||||
const BITCaret = 1 << 20;
|
||||
const BITUnderscore = 1 << 21;
|
||||
const BITBrace = 1 << 22;
|
||||
const BITPipe = 1 << 23;
|
||||
const BITTilde = 1 << 24;
|
||||
const BITClosing = 1 << 28;
|
||||
const BITUnicode = 1 << 29;
|
||||
const BITIgnore = 1 << 30;
|
||||
const BITError = 1 << 31;
|
||||
const BITSpace = 1 << 0;
|
||||
const BITGlyph = 1 << 1;
|
||||
const BITExclamation = 1 << 2;
|
||||
const BITHash = 1 << 3;
|
||||
const BITDollar = 1 << 4;
|
||||
const BITPercent = 1 << 5;
|
||||
const BITParen = 1 << 6;
|
||||
const BITAsterisk = 1 << 7;
|
||||
const BITPlus = 1 << 8;
|
||||
const BITComma = 1 << 9;
|
||||
const BITDash = 1 << 10;
|
||||
const BITPeriod = 1 << 11;
|
||||
const BITSlash = 1 << 12;
|
||||
const BITNum = 1 << 13;
|
||||
const BITEqual = 1 << 14;
|
||||
const BITQuestion = 1 << 15;
|
||||
const BITAt = 1 << 16;
|
||||
const BITAlpha = 1 << 17;
|
||||
const BITUppercase = 1 << 18;
|
||||
const BITSquareBracket = 1 << 19;
|
||||
const BITBackslash = 1 << 20;
|
||||
const BITCaret = 1 << 21;
|
||||
const BITUnderscore = 1 << 22;
|
||||
const BITBrace = 1 << 23;
|
||||
const BITPipe = 1 << 24;
|
||||
const BITTilde = 1 << 25;
|
||||
const BITOpening = 1 << 27;
|
||||
const BITClosing = 1 << 28;
|
||||
const BITUnicode = 1 << 29;
|
||||
// TODO: separate from character bits into a new slice slot.
|
||||
const BITIgnore = 1 << 30;
|
||||
const BITError = 1 << 31;
|
||||
|
||||
const BITAll = 0xFFFFFFFF;
|
||||
const BITAlphaNum = BITNum | BITAlpha;
|
||||
const BITRegexWord = BITAlphaNum | BITUnderscore;
|
||||
const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode;
|
||||
const BITPatternToken = BITNum | BITAlpha | BITPercent;
|
||||
const BITLineComment = BITExclamation | BITHash | BITSquareBracket;
|
||||
const BITAll = 0xFFFFFFFF;
|
||||
const BITAlphaNum = BITNum | BITAlpha;
|
||||
const BITRegexWord = BITAlphaNum | BITUnderscore;
|
||||
const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode;
|
||||
const BITPatternToken = BITNum | BITAlpha | BITPercent;
|
||||
const BITLineComment = BITExclamation | BITHash | BITSquareBracket;
|
||||
|
||||
// Important: it is expected that lines passed to the parser have been
|
||||
// trimmed of new line characters. Given this, any newline characters found
|
||||
|
@ -1044,10 +1691,10 @@ const charDescBits = [
|
|||
/* 0x25 % */ BITPercent,
|
||||
/* 0x26 & */ BITGlyph,
|
||||
/* 0x27 ' */ BITGlyph,
|
||||
/* 0x28 ( */ BITParen,
|
||||
/* 0x28 ( */ BITParen | BITOpening,
|
||||
/* 0x29 ) */ BITParen | BITClosing,
|
||||
/* 0x2A * */ BITAsterisk,
|
||||
/* 0x2B + */ BITGlyph,
|
||||
/* 0x2B + */ BITPlus,
|
||||
/* 0x2C , */ BITComma,
|
||||
/* 0x2D - */ BITDash,
|
||||
/* 0x2E . */ BITPeriod,
|
||||
|
@ -1095,7 +1742,7 @@ const charDescBits = [
|
|||
/* 0x58 X */ BITAlpha | BITUppercase,
|
||||
/* 0x59 Y */ BITAlpha | BITUppercase,
|
||||
/* 0x5A Z */ BITAlpha | BITUppercase,
|
||||
/* 0x5B [ */ BITSquareBracket,
|
||||
/* 0x5B [ */ BITSquareBracket | BITOpening,
|
||||
/* 0x5C \ */ BITBackslash,
|
||||
/* 0x5D ] */ BITSquareBracket | BITClosing,
|
||||
/* 0x5E ^ */ BITCaret,
|
||||
|
@ -1127,7 +1774,7 @@ const charDescBits = [
|
|||
/* 0x78 x */ BITAlpha,
|
||||
/* 0x79 y */ BITAlpha,
|
||||
/* 0x7A z */ BITAlpha,
|
||||
/* 0x7B { */ BITBrace,
|
||||
/* 0x7B { */ BITBrace | BITOpening,
|
||||
/* 0x7C | */ BITPipe,
|
||||
/* 0x7D } */ BITBrace | BITClosing,
|
||||
/* 0x7E ~ */ BITTilde,
|
||||
|
@ -1143,6 +1790,9 @@ const BITFlavorNetRightHnAnchor = 1 << 5;
|
|||
const BITFlavorNetSpaceInPattern = 1 << 6;
|
||||
const BITFlavorExtStyle = 1 << 7;
|
||||
const BITFlavorExtStrong = 1 << 8;
|
||||
const BITFlavorExtCosmetic = 1 << 9;
|
||||
const BITFlavorExtScriptlet = 1 << 10;
|
||||
const BITFlavorExtHTML = 1 << 11;
|
||||
const BITFlavorIgnore = 1 << 29;
|
||||
const BITFlavorUnsupported = 1 << 30;
|
||||
const BITFlavorError = 1 << 31;
|
||||
|
@ -1229,6 +1879,10 @@ Parser.prototype.BITAll = BITAll;
|
|||
|
||||
Parser.prototype.BITFlavorException = BITFlavorException;
|
||||
Parser.prototype.BITFlavorExtStyle = BITFlavorExtStyle;
|
||||
Parser.prototype.BITFlavorExtStrong = BITFlavorExtStrong;
|
||||
Parser.prototype.BITFlavorExtCosmetic = BITFlavorExtCosmetic;
|
||||
Parser.prototype.BITFlavorExtScriptlet = BITFlavorExtScriptlet;
|
||||
Parser.prototype.BITFlavorExtHTML = BITFlavorExtHTML;
|
||||
Parser.prototype.BITFlavorIgnore = BITFlavorIgnore;
|
||||
Parser.prototype.BITFlavorUnsupported = BITFlavorUnsupported;
|
||||
Parser.prototype.BITFlavorError = BITFlavorError;
|
||||
|
@ -1298,6 +1952,9 @@ const NetOptionsIterator = class {
|
|||
this.value = undefined;
|
||||
this.done = true;
|
||||
}
|
||||
[Symbol.iterator]() {
|
||||
return this.init();
|
||||
}
|
||||
init() {
|
||||
this.readPtr = this.writePtr = 0;
|
||||
this.done = this.parser.optionsSpan.l === 0;
|
||||
|
@ -1415,7 +2072,7 @@ const NetOptionsIterator = class {
|
|||
if ( this.interactive && hasBits(descriptor, OPTDomainList) ) {
|
||||
this.parser.analyzeDomainList(
|
||||
lval + 3, i, BITPipe,
|
||||
(descriptor & 0xFF) === OPTTokenDomain
|
||||
(descriptor & 0xFF) === OPTTokenDomain ? 0b01 : 0b00
|
||||
);
|
||||
}
|
||||
} else {
|
||||
|
@ -1480,9 +2137,6 @@ const NetOptionsIterator = class {
|
|||
this.readPtr = i + 6;
|
||||
return this;
|
||||
}
|
||||
[Symbol.iterator]() {
|
||||
return this.init();
|
||||
}
|
||||
};
|
||||
|
||||
const netOptionTokens = new Map([
|
||||
|
@ -1547,6 +2201,9 @@ const PatternTokenIterator = class {
|
|||
}
|
||||
[Symbol.iterator]() {
|
||||
const { i, l } = this.parser.patternSpan;
|
||||
if ( l === 0 ) {
|
||||
return this.end();
|
||||
}
|
||||
this.l = i;
|
||||
this.r = i + l;
|
||||
this.i = i;
|
||||
|
@ -1605,16 +2262,18 @@ const ExtOptionsIterator = class {
|
|||
this.value = undefined;
|
||||
this.done = true;
|
||||
}
|
||||
init() {
|
||||
[Symbol.iterator]() {
|
||||
const { i, l } = this.parser.optionsSpan;
|
||||
this.l = i;
|
||||
this.r = i + l;
|
||||
this.done = false;
|
||||
this.value = {
|
||||
hn: undefined,
|
||||
not: false,
|
||||
bad: false,
|
||||
};
|
||||
if ( l === 0 ) {
|
||||
this.l = this.r = 0;
|
||||
this.done = true;
|
||||
this.value = undefined;
|
||||
} else {
|
||||
this.l = i;
|
||||
this.r = i + l;
|
||||
this.done = false;
|
||||
this.value = { hn: undefined, not: false, bad: false };
|
||||
}
|
||||
return this;
|
||||
}
|
||||
next() {
|
||||
|
@ -1655,9 +2314,6 @@ const ExtOptionsIterator = class {
|
|||
this.l = i;
|
||||
return this;
|
||||
}
|
||||
[Symbol.iterator]() {
|
||||
return this.init();
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
|
|
@ -2308,7 +2308,7 @@ const FilterParser = class {
|
|||
}
|
||||
|
||||
parseOptions(parser) {
|
||||
for ( let { id, val, not } of parser.options() ) {
|
||||
for ( let { id, val, not } of parser.netOptions() ) {
|
||||
switch ( id ) {
|
||||
case parser.OPTToken3p:
|
||||
this.parsePartyOption(false, not);
|
||||
|
|
Loading…
Reference in a new issue