diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index ac705f449..f64260747 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -105,6 +105,7 @@ const Parser = class { this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/; this.reHostname = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/; this.punycoder = new URL(self.location); + this.selectorCompiler = new this.SelectorCompiler(this); // TODO: reuse for network filtering analysis this.result = { exception: false, @@ -299,7 +300,7 @@ const Parser = class { this.flavorBits |= BITFlavorExtCosmetic; } this.result.raw = selector; - if ( this.compileSelector(selector, this.result) === false ) { + if ( this.selectorCompiler.compile(selector, this.result) === false ) { this.flavorBits |= BITFlavorUnsupported; } } @@ -1071,78 +1072,23 @@ const Parser = class { // https://github.com/uBlockOrigin/uBlock-issues/issues/89 // Do not discard unknown pseudo-elements. -Parser.prototype.compileSelector = (( ) => { - const reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/; - const reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/; - const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/; - - const translateAdguardCSSInjectionFilter = function(suffix) { - const matches = /^([^{]+)\{([^}]+)\}\s*$/.exec(suffix); - if ( matches === null ) { return ''; } - const selector = matches[1].trim(); - const style = matches[2].trim(); - // Special style directive `remove: true` is converted into a - // `:remove()` operator. - if ( /^\s*remove:\s*true[; ]*$/.test(style) ) { - return `${selector}:remove()`; - } - // For some reasons, many of Adguard's plain cosmetic filters are - // "disguised" as style-based cosmetic filters: convert such filters - // to plain cosmetic filters. - return /display\s*:\s*none\s*!important;?$/.test(style) - ? selector - : `${selector}:style(${style})`; - }; - - const normalizedExtendedSyntaxOperators = new Map([ - [ 'contains', ':has-text' ], - [ 'has', ':has' ], - [ 'matches-css', ':matches-css' ], - [ 'matches-css-after', ':matches-css-after' ], - [ 'matches-css-before', ':matches-css-before' ], - ]); - - // Return value: - // 0b00 (0) = not a valid CSS selector - // 0b01 (1) = valid CSS selector, without pseudo-element - // 0b11 (3) = valid CSS selector, with pseudo element - const cssSelectorType = (( ) => { - // Quick regex-based validation -- most cosmetic filters are of the - // simple form and in such case a regex is much faster. - const reSimple = /^[#.][A-Za-z_][\w-]*$/; - const div = document.createElement('div'); - // Keep in mind: - // https://github.com/gorhill/uBlock/issues/693 - // https://github.com/gorhill/uBlock/issues/1955 - // https://github.com/gorhill/uBlock/issues/3111 - // Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817 - // is fixed. - return s => { - if ( reSimple.test(s) ) { return 1; } - const pos = cssPseudoSelector(s); - if ( pos !== -1 ) { - return cssSelectorType(s.slice(0, pos)) === 1 ? 3 : 0; - } - try { - div.matches(`${s}, ${s}:not(#foo)`); - } catch (ex) { - return 0; - } - return 1; - }; - })(); - - const cssPseudoSelector = (( ) => { - const rePseudo = /:(?::?after|:?before|:[a-z][a-z-]*[a-z])$/; - return function(s) { - if ( s.lastIndexOf(':') === -1 ) { return -1; } - const match = rePseudo.exec(s); - return match !== null ? match.index : -1; - }; - })(); - - const compileProceduralSelector = (( ) => { - const reProceduralOperator = new RegExp([ +Parser.prototype.SelectorCompiler = class { + constructor(parser) { + this.parser = parser; + this.reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/; + this.reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/; + this.reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/; + this.normalizedExtendedSyntaxOperators = new Map([ + [ 'contains', ':has-text' ], + [ 'has', ':has' ], + [ 'matches-css', ':matches-css' ], + [ 'matches-css-after', ':matches-css-after' ], + [ 'matches-css-before', ':matches-css-before' ], + ]); + this.reSimpleSelector = /^[#.][A-Za-z_][\w-]*$/; + this.div = document.createElement('div'); + this.rePseudoClass = /:(?::?after|:?before|:[a-z][a-z-]*[a-z])$/; + this.reProceduralOperator = new RegExp([ '^(?:', [ '-abp-contains', @@ -1167,394 +1113,27 @@ Parser.prototype.compileSelector = (( ) => { ].join('|'), ')\\(' ].join('')); - - const reEatBackslashes = /\\([()])/g; - const reEscapeRegex = /[.*+?^${}()|[\]\\]/g; - const reNeedScope = /^\s*>/; - const reIsDanglingSelector = /[+>~\s]\s*$/; - const reIsSiblingSelector = /^\s*[+~]/; - - const regexToRawValue = new Map(); - - const isBadRegex = function(s) { - try { - void new RegExp(s); - } catch (ex) { - isBadRegex.message = ex.toString(); - return true; - } - return false; - }; - - // When dealing with literal text, we must first eat _some_ - // backslash characters. - const compileText = function(s) { - const match = reParseRegexLiteral.exec(s); - let regexDetails; - if ( match !== null ) { - regexDetails = match[1]; - if ( isBadRegex(regexDetails) ) { return; } - if ( match[2] ) { - regexDetails = [ regexDetails, match[2] ]; - } - } else { - regexDetails = s.replace(reEatBackslashes, '$1') - .replace(reEscapeRegex, '\\$&'); - regexToRawValue.set(regexDetails, s); - } - return regexDetails; - }; - - const compileCSSDeclaration = function(s) { - const pos = s.indexOf(':'); - if ( pos === -1 ) { return; } - const name = s.slice(0, pos).trim(); - const value = s.slice(pos + 1).trim(); - const match = reParseRegexLiteral.exec(value); - let regexDetails; - if ( match !== null ) { - regexDetails = match[1]; - if ( isBadRegex(regexDetails) ) { return; } - if ( match[2] ) { - regexDetails = [ regexDetails, match[2] ]; - } - } else { - regexDetails = '^' + value.replace(reEscapeRegex, '\\$&') + '$'; - regexToRawValue.set(regexDetails, value); - } - return { name: name, value: regexDetails }; - }; - - const compileConditionalSelector = function(s) { - // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277 - // Prepend `:scope ` if needed. - if ( reNeedScope.test(s) ) { - s = `:scope ${s}`; - } - return compile(s); - }; - - const compileInteger = function(s, min = 0, max = 0x7FFFFFFF) { - if ( /^\d+$/.test(s) === false ) { return; } - const n = parseInt(s, 10); - if ( n < min || n >= max ) { return; } - return n; - }; - - const compileNotSelector = function(s) { - // https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588 - // Reject instances of :not() filters for which the argument is - // a valid CSS selector, otherwise we would be adversely - // changing the behavior of CSS4's :not(). - if ( cssSelectorType(s) === 0 ) { - return compileConditionalSelector(s); - } - }; - - const compileUpwardArgument = function(s) { - const i = compileInteger(s, 1, 256); - if ( i !== undefined ) { return i; } - if ( cssSelectorType(s) === 1 ) { return s; } - }; - - const compileRemoveSelector = function(s) { - if ( s === '' ) { return s; } - }; - - const compileSpathExpression = function(s) { - if ( cssSelectorType('*' + s) === 1 ) { - return s; - } - }; - - const compileStyleProperties = (( ) => { - let div; - // https://github.com/uBlockOrigin/uBlock-issues/issues/668 - return function(s) { - if ( /url\(|\\/i.test(s) ) { return; } - if ( div === undefined ) { - div = document.createElement('div'); - } - div.style.cssText = s; - if ( div.style.cssText === '' ) { return; } - div.style.cssText = ''; - return s; - }; - })(); - - const compileAttrList = function(s) { - const attrs = s.split('\s*,\s*'); - const out = []; - for ( const attr of attrs ) { - if ( attr !== '' ) { - out.push(attr); - } - } - return out; - }; - - const compileXpathExpression = function(s) { - try { - document.createExpression(s, null); - } catch (e) { - return; - } - return s; - }; - + this.reEatBackslashes = /\\([()])/g; + this.reEscapeRegex = /[.*+?^${}()|[\]\\]/g; + this.reNeedScope = /^\s*>/; + this.reIsDanglingSelector = /[+>~\s]\s*$/; + this.reIsSiblingSelector = /^\s*[+~]/; + this.regexToRawValue = new Map(); // https://github.com/gorhill/uBlock/issues/2793 - const normalizedOperators = new Map([ + this.normalizedOperators = new Map([ [ ':-abp-contains', ':has-text' ], [ ':-abp-has', ':has' ], [ ':contains', ':has-text' ], [ ':nth-ancestor', ':upward' ], [ ':watch-attrs', ':watch-attr' ], ]); - - const compileArgument = new Map([ - [ ':has', compileConditionalSelector ], - [ ':has-text', compileText ], - [ ':if', compileConditionalSelector ], - [ ':if-not', compileConditionalSelector ], - [ ':matches-css', compileCSSDeclaration ], - [ ':matches-css-after', compileCSSDeclaration ], - [ ':matches-css-before', compileCSSDeclaration ], - [ ':min-text-length', compileInteger ], - [ ':not', compileNotSelector ], - [ ':remove', compileRemoveSelector ], - [ ':spath', compileSpathExpression ], - [ ':style', compileStyleProperties ], - [ ':upward', compileUpwardArgument ], - [ ':watch-attr', compileAttrList ], - [ ':xpath', compileXpathExpression ], - ]); - - const actionOperators = new Set([ + this.actionOperators = new Set([ ':remove', ':style', ]); + } - // https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387 - // Normalize (somewhat) the stringified version of procedural - // cosmetic filters -- this increase the likelihood of detecting - // duplicates given that uBO is able to understand syntax specific - // to other blockers. - // The normalized string version is what is reported in the logger, - // by design. - const decompile = function(compiled) { - const tasks = compiled.tasks; - if ( Array.isArray(tasks) === false ) { - return compiled.selector; - } - const raw = [ compiled.selector ]; - let value; - for ( const task of tasks ) { - switch ( task[0] ) { - case ':has': - case ':if': - raw.push(`:has(${decompile(task[1])})`); - break; - case ':has-text': - if ( Array.isArray(task[1]) ) { - value = `/${task[1][0]}/${task[1][1]}`; - } else { - value = regexToRawValue.get(task[1]); - if ( value === undefined ) { - value = `/${task[1]}/`; - } - } - raw.push(`:has-text(${value})`); - break; - case ':matches-css': - case ':matches-css-after': - case ':matches-css-before': - if ( Array.isArray(task[1].value) ) { - value = `/${task[1].value[0]}/${task[1].value[1]}`; - } else { - value = regexToRawValue.get(task[1].value); - if ( value === undefined ) { - value = `/${task[1].value}/`; - } - } - raw.push(`${task[0]}(${task[1].name}: ${value})`); - break; - case ':not': - case ':if-not': - raw.push(`:not(${decompile(task[1])})`); - break; - case ':spath': - raw.push(task[1]); - break; - case ':min-text-length': - case ':remove': - case ':style': - case ':upward': - case ':watch-attr': - case ':xpath': - raw.push(`${task[0]}(${task[1]})`); - break; - } - } - return raw.join(''); - }; - - const compile = function(raw, root = false) { - if ( raw === '' ) { return; } - - const tasks = []; - const n = raw.length; - let prefix = ''; - let i = 0; - let opPrefixBeg = 0; - let action; - - // TODO: use slices instead of charCodeAt() - for (;;) { - let c, match; - // Advance to next operator. - while ( i < n ) { - c = raw.charCodeAt(i++); - if ( c === 0x3A /* ':' */ ) { - match = reProceduralOperator.exec(raw.slice(i)); - if ( match !== null ) { break; } - } - } - if ( i === n ) { break; } - const opNameBeg = i - 1; - const opNameEnd = i + match[0].length - 1; - i += match[0].length; - // Find end of argument: first balanced closing parenthesis. - // Note: unbalanced parenthesis can be used in a regex literal - // when they are escaped using `\`. - // TODO: need to handle quoted parentheses. - let pcnt = 1; - while ( i < n ) { - c = raw.charCodeAt(i++); - if ( c === 0x5C /* '\\' */ ) { - if ( i < n ) { i += 1; } - } else if ( c === 0x28 /* '(' */ ) { - pcnt +=1 ; - } else if ( c === 0x29 /* ')' */ ) { - pcnt -= 1; - if ( pcnt === 0 ) { break; } - } - } - // Unbalanced parenthesis? An unbalanced parenthesis is fine - // as long as the last character is a closing parenthesis. - if ( pcnt !== 0 && c !== 0x29 ) { return; } - // https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588 - // Maybe that one operator is a valid CSS selector and if so, - // then consider it to be part of the prefix. - if ( cssSelectorType(raw.slice(opNameBeg, i)) === 1 ) { - continue; - } - // Extract and remember operator details. - let operator = raw.slice(opNameBeg, opNameEnd); - operator = normalizedOperators.get(operator) || operator; - // Action operator can only be used as trailing operator in the - // root task list. - // Per-operator arguments validation - const args = compileArgument.get(operator)( - raw.slice(opNameEnd + 1, i - 1) - ); - if ( args === undefined ) { return; } - if ( opPrefixBeg === 0 ) { - prefix = raw.slice(0, opNameBeg); - } else if ( opNameBeg !== opPrefixBeg ) { - if ( action !== undefined ) { return; } - const spath = compileSpathExpression( - raw.slice(opPrefixBeg, opNameBeg) - ); - if ( spath === undefined ) { return; } - tasks.push([ ':spath', spath ]); - } - if ( action !== undefined ) { return; } - tasks.push([ operator, args ]); - if ( actionOperators.has(operator) ) { - if ( root === false ) { return; } - action = operator.slice(1); - } - opPrefixBeg = i; - if ( i === n ) { break; } - } - - // No task found: then we have a CSS selector. - // At least one task found: nothing should be left to parse. - if ( tasks.length === 0 ) { - prefix = raw; - } else if ( opPrefixBeg < n ) { - if ( action !== undefined ) { return; } - const spath = compileSpathExpression(raw.slice(opPrefixBeg)); - if ( spath === undefined ) { return; } - tasks.push([ ':spath', spath ]); - } - - // https://github.com/NanoAdblocker/NanoCore/issues/1#issuecomment-354394894 - // https://www.reddit.com/r/uBlockOrigin/comments/c6iem5/ - // Convert sibling-selector prefix into :spath operator, but - // only if context is not the root. - if ( prefix !== '' ) { - if ( reIsDanglingSelector.test(prefix) && tasks.length !== 0 ) { - prefix += ' *'; - } - if ( cssSelectorType(prefix) === 0 ) { - if ( - root || - reIsSiblingSelector.test(prefix) === false || - compileSpathExpression(prefix) === undefined - ) { - return; - } - tasks.unshift([ ':spath', prefix ]); - prefix = ''; - } - } - - const out = { selector: prefix }; - - if ( tasks.length !== 0 ) { - out.tasks = tasks; - } - - // Expose action to take in root descriptor. - // - // https://github.com/uBlockOrigin/uBlock-issues/issues/961 - // https://github.com/uBlockOrigin/uBlock-issues/issues/382 - // For the time being, `style` action can't be used in a - // procedural selector. - if ( action !== undefined ) { - if ( tasks.length > 1 && action === 'style' ) { return; } - out.action = action; - } - - // Pseudo-selectors are valid only when used in a root task list. - if ( prefix !== '' ) { - const pos = cssPseudoSelector(prefix); - if ( pos !== -1 ) { - if ( root === false ) { return; } - out.pseudo = pos; - } - } - - return out; - }; - - const entryPoint = function(raw) { - const compiled = compile(raw, true); - if ( compiled !== undefined ) { - compiled.raw = decompile(compiled); - } - return compiled; - }; - - entryPoint.reset = function() { - regexToRawValue.clear(); - }; - - return entryPoint; - })(); - - const entryPoint = function(raw, out) { + compile(raw, out) { // https://github.com/gorhill/uBlock/issues/952 // Find out whether we are dealing with an Adguard-specific cosmetic // filter, and if so, translate it if supported, or discard it if not @@ -1563,16 +1142,16 @@ Parser.prototype.compileSelector = (( ) => { // character is `$`, `%` or `?`, otherwise it's not a cosmetic // filter. // Adguard's style injection: translate to uBO's format. - if ( hasBits(this.flavorBits, BITFlavorExtStyle) ) { - raw = translateAdguardCSSInjectionFilter(raw); + if ( hasBits(this.parser.flavorBits, BITFlavorExtStyle) ) { + raw = this.translateAdguardCSSInjectionFilter(raw); if ( raw === '' ) { return false; } out.raw = raw; } let extendedSyntax = false; - const selectorType = cssSelectorType(raw); + const selectorType = this.cssSelectorType(raw); if ( selectorType !== 0 ) { - extendedSyntax = reExtendedSyntax.test(raw); + extendedSyntax = this.reExtendedSyntax.test(raw); if ( extendedSyntax === false ) { out.pseudoclass = selectorType === 3; out.compiled = raw; @@ -1592,18 +1171,18 @@ Parser.prototype.compileSelector = (( ) => { // See https://issues.adblockplus.org/ticket/5287 if ( extendedSyntax ) { let matches; - while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) { - const operator = normalizedExtendedSyntaxOperators.get(matches[1]); + while ( (matches = this.reExtendedSyntaxParser.exec(raw)) !== null ) { + const operator = this.normalizedExtendedSyntaxOperators.get(matches[1]); if ( operator === undefined ) { return false; } raw = raw.slice(0, matches.index) + operator + '(' + matches[3] + ')' + raw.slice(matches.index + matches[0].length); } - return entryPoint.call(this, raw, out); + return this.compile(raw, out); } // Procedural selector? - const compiled = compileProceduralSelector(raw); + const compiled = this.compileProceduralSelector(raw); if ( compiled === undefined ) { return false; } if ( compiled.pseudo !== undefined ) { @@ -1612,10 +1191,430 @@ Parser.prototype.compileSelector = (( ) => { out.compiled = JSON.stringify(compiled); return true; - }; + } - return entryPoint; -})(); + translateAdguardCSSInjectionFilter(suffix) { + const matches = /^([^{]+)\{([^}]+)\}\s*$/.exec(suffix); + if ( matches === null ) { return ''; } + const selector = matches[1].trim(); + const style = matches[2].trim(); + // Special style directive `remove: true` is converted into a + // `:remove()` operator. + if ( /^\s*remove:\s*true[; ]*$/.test(style) ) { + return `${selector}:remove()`; + } + // For some reasons, many of Adguard's plain cosmetic filters are + // "disguised" as style-based cosmetic filters: convert such filters + // to plain cosmetic filters. + return /display\s*:\s*none\s*!important;?$/.test(style) + ? selector + : `${selector}:style(${style})`; + } + + // Return value: + // 0b00 (0) = not a valid CSS selector + // 0b01 (1) = valid CSS selector, without pseudo-element + // 0b11 (3) = valid CSS selector, with pseudo element + // + // Quick regex-based validation -- most cosmetic filters are of the + // simple form and in such case a regex is much faster. + // Keep in mind: + // https://github.com/gorhill/uBlock/issues/693 + // https://github.com/gorhill/uBlock/issues/1955 + // https://github.com/gorhill/uBlock/issues/3111 + // Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817 + // is fixed. + cssSelectorType(s) { + if ( this.reSimpleSelector.test(s) ) { return 1; } + const pos = this.cssPseudoSelector(s); + if ( pos !== -1 ) { + return this.cssSelectorType(s.slice(0, pos)) === 1 ? 3 : 0; + } + try { + this.div.matches(`${s}, ${s}:not(#foo)`); + } catch (ex) { + return 0; + } + return 1; + } + + cssPseudoSelector(s) { + if ( s.lastIndexOf(':') === -1 ) { return -1; } + const match = this.rePseudoClass.exec(s); + return match !== null ? match.index : -1; + } + + compileProceduralSelector(raw) { + const compiled = this.compileProcedural(raw, true); + if ( compiled !== undefined ) { + compiled.raw = this.decompileProcedural(compiled); + } + return compiled; + } + + isBadRegex(s) { + try { + void new RegExp(s); + } catch (ex) { + this.isBadRegex.message = ex.toString(); + return true; + } + return false; + } + + // When dealing with literal text, we must first eat _some_ + // backslash characters. + compileText(s) { + const match = this.reParseRegexLiteral.exec(s); + let regexDetails; + if ( match !== null ) { + regexDetails = match[1]; + if ( this.isBadRegex(regexDetails) ) { return; } + if ( match[2] ) { + regexDetails = [ regexDetails, match[2] ]; + } + } else { + regexDetails = s.replace(this.reEatBackslashes, '$1') + .replace(this.reEscapeRegex, '\\$&'); + this.regexToRawValue.set(regexDetails, s); + } + return regexDetails; + } + + compileCSSDeclaration(s) { + const pos = s.indexOf(':'); + if ( pos === -1 ) { return; } + const name = s.slice(0, pos).trim(); + const value = s.slice(pos + 1).trim(); + const match = this.reParseRegexLiteral.exec(value); + let regexDetails; + if ( match !== null ) { + regexDetails = match[1]; + if ( this.isBadRegex(regexDetails) ) { return; } + if ( match[2] ) { + regexDetails = [ regexDetails, match[2] ]; + } + } else { + regexDetails = '^' + value.replace(this.reEscapeRegex, '\\$&') + '$'; + this.regexToRawValue.set(regexDetails, value); + } + return { name: name, value: regexDetails }; + } + + compileConditionalSelector(s) { + // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277 + // Prepend `:scope ` if needed. + if ( this.reNeedScope.test(s) ) { + s = `:scope ${s}`; + } + return this.compileProcedural(s); + } + + compileInteger(s, min = 0, max = 0x7FFFFFFF) { + if ( /^\d+$/.test(s) === false ) { return; } + const n = parseInt(s, 10); + if ( n < min || n >= max ) { return; } + return n; + } + + compileNotSelector(s) { + // https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588 + // Reject instances of :not() filters for which the argument is + // a valid CSS selector, otherwise we would be adversely + // changing the behavior of CSS4's :not(). + if ( this.cssSelectorType(s) === 0 ) { + return this.compileConditionalSelector(s); + } + } + + compileUpwardArgument(s) { + const i = this.compileInteger(s, 1, 256); + if ( i !== undefined ) { return i; } + if ( this.cssSelectorType(s) === 1 ) { return s; } + } + + compileRemoveSelector(s) { + if ( s === '' ) { return s; } + } + + compileSpathExpression(s) { + if ( this.cssSelectorType('*' + s) === 1 ) { + return s; + } + } + + // https://github.com/uBlockOrigin/uBlock-issues/issues/668 + compileStyleProperties(s) { + if ( /url\(|\\/i.test(s) ) { return; } + this.div.style.cssText = s; + if ( this.div.style.cssText === '' ) { return; } + this.div.style.cssText = ''; + return s; + } + + compileAttrList(s) { + const attrs = s.split('\s*,\s*'); + const out = []; + for ( const attr of attrs ) { + if ( attr !== '' ) { + out.push(attr); + } + } + return out; + } + + compileXpathExpression(s) { + try { + document.createExpression(s, null); + } catch (e) { + return; + } + return s; + } + + // https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387 + // Normalize (somewhat) the stringified version of procedural + // cosmetic filters -- this increase the likelihood of detecting + // duplicates given that uBO is able to understand syntax specific + // to other blockers. + // The normalized string version is what is reported in the logger, + // by design. + decompileProcedural(compiled) { + const tasks = compiled.tasks; + if ( Array.isArray(tasks) === false ) { + return compiled.selector; + } + const raw = [ compiled.selector ]; + let value; + for ( const task of tasks ) { + switch ( task[0] ) { + case ':has': + case ':if': + raw.push(`:has(${this.decompileProcedural(task[1])})`); + break; + case ':has-text': + if ( Array.isArray(task[1]) ) { + value = `/${task[1][0]}/${task[1][1]}`; + } else { + value = this.regexToRawValue.get(task[1]); + if ( value === undefined ) { + value = `/${task[1]}/`; + } + } + raw.push(`:has-text(${value})`); + break; + case ':matches-css': + case ':matches-css-after': + case ':matches-css-before': + if ( Array.isArray(task[1].value) ) { + value = `/${task[1].value[0]}/${task[1].value[1]}`; + } else { + value = this.regexToRawValue.get(task[1].value); + if ( value === undefined ) { + value = `/${task[1].value}/`; + } + } + raw.push(`${task[0]}(${task[1].name}: ${value})`); + break; + case ':not': + case ':if-not': + raw.push(`:not(${this.decompileProcedural(task[1])})`); + break; + case ':spath': + raw.push(task[1]); + break; + case ':min-text-length': + case ':remove': + case ':style': + case ':upward': + case ':watch-attr': + case ':xpath': + raw.push(`${task[0]}(${task[1]})`); + break; + } + } + return raw.join(''); + } + + compileProcedural(raw, root = false) { + if ( raw === '' ) { return; } + + const tasks = []; + const n = raw.length; + let prefix = ''; + let i = 0; + let opPrefixBeg = 0; + let action; + + // TODO: use slices instead of charCodeAt() + for (;;) { + let c, match; + // Advance to next operator. + while ( i < n ) { + c = raw.charCodeAt(i++); + if ( c === 0x3A /* ':' */ ) { + match = this.reProceduralOperator.exec(raw.slice(i)); + if ( match !== null ) { break; } + } + } + if ( i === n ) { break; } + const opNameBeg = i - 1; + const opNameEnd = i + match[0].length - 1; + i += match[0].length; + // Find end of argument: first balanced closing parenthesis. + // Note: unbalanced parenthesis can be used in a regex literal + // when they are escaped using `\`. + // TODO: need to handle quoted parentheses. + let pcnt = 1; + while ( i < n ) { + c = raw.charCodeAt(i++); + if ( c === 0x5C /* '\\' */ ) { + if ( i < n ) { i += 1; } + } else if ( c === 0x28 /* '(' */ ) { + pcnt +=1 ; + } else if ( c === 0x29 /* ')' */ ) { + pcnt -= 1; + if ( pcnt === 0 ) { break; } + } + } + // Unbalanced parenthesis? An unbalanced parenthesis is fine + // as long as the last character is a closing parenthesis. + if ( pcnt !== 0 && c !== 0x29 ) { return; } + // https://github.com/uBlockOrigin/uBlock-issues/issues/341#issuecomment-447603588 + // Maybe that one operator is a valid CSS selector and if so, + // then consider it to be part of the prefix. + if ( this.cssSelectorType(raw.slice(opNameBeg, i)) === 1 ) { + continue; + } + // Extract and remember operator details. + let operator = raw.slice(opNameBeg, opNameEnd); + operator = this.normalizedOperators.get(operator) || operator; + // Action operator can only be used as trailing operator in the + // root task list. + // Per-operator arguments validation + const args = this.compileArgument( + operator, + raw.slice(opNameEnd + 1, i - 1) + ); + if ( args === undefined ) { return; } + if ( opPrefixBeg === 0 ) { + prefix = raw.slice(0, opNameBeg); + } else if ( opNameBeg !== opPrefixBeg ) { + if ( action !== undefined ) { return; } + const spath = this.compileSpathExpression( + raw.slice(opPrefixBeg, opNameBeg) + ); + if ( spath === undefined ) { return; } + tasks.push([ ':spath', spath ]); + } + if ( action !== undefined ) { return; } + tasks.push([ operator, args ]); + if ( this.actionOperators.has(operator) ) { + if ( root === false ) { return; } + action = operator.slice(1); + } + opPrefixBeg = i; + if ( i === n ) { break; } + } + + // No task found: then we have a CSS selector. + // At least one task found: nothing should be left to parse. + if ( tasks.length === 0 ) { + prefix = raw; + } else if ( opPrefixBeg < n ) { + if ( action !== undefined ) { return; } + const spath = this.compileSpathExpression(raw.slice(opPrefixBeg)); + if ( spath === undefined ) { return; } + tasks.push([ ':spath', spath ]); + } + + // https://github.com/NanoAdblocker/NanoCore/issues/1#issuecomment-354394894 + // https://www.reddit.com/r/uBlockOrigin/comments/c6iem5/ + // Convert sibling-selector prefix into :spath operator, but + // only if context is not the root. + if ( prefix !== '' ) { + if ( this.reIsDanglingSelector.test(prefix) && tasks.length !== 0 ) { + prefix += ' *'; + } + if ( this.cssSelectorType(prefix) === 0 ) { + if ( + root || + this.reIsSiblingSelector.test(prefix) === false || + this.compileSpathExpression(prefix) === undefined + ) { + return; + } + tasks.unshift([ ':spath', prefix ]); + prefix = ''; + } + } + + const out = { selector: prefix }; + + if ( tasks.length !== 0 ) { + out.tasks = tasks; + } + + // Expose action to take in root descriptor. + // + // https://github.com/uBlockOrigin/uBlock-issues/issues/961 + // https://github.com/uBlockOrigin/uBlock-issues/issues/382 + // For the time being, `style` action can't be used in a + // procedural selector. + if ( action !== undefined ) { + if ( tasks.length > 1 && action === 'style' ) { return; } + out.action = action; + } + + // Pseudo-selectors are valid only when used in a root task list. + if ( prefix !== '' ) { + const pos = this.cssPseudoSelector(prefix); + if ( pos !== -1 ) { + if ( root === false ) { return; } + out.pseudo = pos; + } + } + + return out; + } + + compileArgument(operator, args) { + switch ( operator ) { + case ':has': + return this.compileConditionalSelector(args); + case ':has-text': + return this.compileText(args); + case ':if': + return this.compileConditionalSelector(args); + case ':if-not': + return this.compileConditionalSelector(args); + case ':matches-css': + return this.compileCSSDeclaration(args); + case ':matches-css-after': + return this.compileCSSDeclaration(args); + case ':matches-css-before': + return this.compileCSSDeclaration(args); + case ':min-text-length': + return this.compileInteger(args); + case ':not': + return this.compileNotSelector(args); + case ':remove': + return this.compileRemoveSelector(args); + case ':spath': + return this.compileSpathExpression(args); + case ':style': + return this.compileStyleProperties(args); + case ':upward': + return this.compileUpwardArgument(args); + case ':watch-attr': + return this.compileAttrList(args); + case ':xpath': + return this.compileXpathExpression(args); + default: + break; + } + } +}; /******************************************************************************/