From 3af1120082ea659ba2a75e8468a8a36caea43d82 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Fri, 19 Feb 2021 08:38:50 -0500 Subject: [PATCH] Add support for exception of `document` to bypass strict-blocking Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/1501 Exception filters for `document` option are complying with uBO's own semantic for `document` option, i.e. an exception filter for `document` option will only allow to bypass a block filter for `document` (either explicit or implicit) and nothing else. Exception filters using `document` option are *not* compatible with ABP's interpretation of these filters. Whereas in ABP the purpose of a `document` exception filter is to wholly disable content blocking, in uBO the same filter will just cause strict-blocking to be disabled while leaving content blocking intact. Additionally, the logger was fixed to properly report pages which are being strict-blocked. --- src/js/messaging.js | 19 +++-- src/js/static-net-filtering.js | 24 ++++-- src/js/traffic.js | 130 +++++++++++++++++++++++++-------- 3 files changed, 130 insertions(+), 43 deletions(-) diff --git a/src/js/messaging.js b/src/js/messaging.js index a3568948e..791df5859 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -1348,6 +1348,7 @@ vAPI.messaging.listen({ const µb = µBlock; const extensionOriginURL = vAPI.getURL(''); +const documentBlockedURL = vAPI.getURL('document-blocked.html'); const getLoggerData = async function(details, activeTabId, callback) { const response = { @@ -1360,18 +1361,24 @@ const getLoggerData = async function(details, activeTabId, callback) { }; if ( µb.pageStoresToken !== details.tabIdsToken ) { const tabIds = new Map(); - for ( const entry of µb.pageStores ) { - const pageStore = entry[1]; - if ( pageStore.rawURL.startsWith(extensionOriginURL) ) { continue; } - tabIds.set(entry[0], pageStore.title); + for ( const [ tabId, pageStore ] of µb.pageStores ) { + const { rawURL } = pageStore; + if ( + rawURL.startsWith(extensionOriginURL) === false || + rawURL.startsWith(documentBlockedURL) + ) { + tabIds.set(tabId, pageStore.title); + } } response.tabIds = Array.from(tabIds); } if ( activeTabId ) { const pageStore = µb.pageStoreFromTabId(activeTabId); + const rawURL = pageStore && pageStore.rawURL; if ( - pageStore === null || - pageStore.rawURL.startsWith(extensionOriginURL) + rawURL === null || + rawURL.startsWith(extensionOriginURL) && + rawURL.startsWith(documentBlockedURL) === false ) { response.activeTabId = undefined; } diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 783b09198..ff74207a6 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -4131,7 +4131,20 @@ FilterContainer.prototype.matchStringReverse = function(type, url) { // https://github.com/chrisaljoudi/uBlock/issues/519 // Use exact type match for anything beyond `other`. Also, be prepared to // support unknown types. +// https://github.com/uBlockOrigin/uBlock-issues/issues/1501 +// Add support to evaluate allow realm before block realm. +/** + * Matches a URL string using filtering context. + * @param {FilteringContext} fctxt - The filtering context + * @param {integer} [modifier=0] - A bit vector modifying the behavior of the + * matching algorithm: + * Bit 0: match exact type. + * Bit 1: lookup allow realm regardless of whether there was a match in + * block realm. + * + * @returns {integer} 0=no match, 1=block, 2=allow (exeption) + */ FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) { let typeValue = typeNameToTypeValue[fctxt.type]; if ( modifiers === 0 ) { @@ -4159,17 +4172,18 @@ FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) { $docEntity.reset(); $requestHostname = fctxt.getHostname(); - // Important block filters. + // Important block realm. if ( this.realmMatchString(BlockImportant, typeValue, partyBits) ) { return 1; } - // Block filters - if ( this.realmMatchString(BlockAction, typeValue, partyBits) ) { - // Exception filters + + // Evaluate block realm before allow realm. + const r = this.realmMatchString(BlockAction, typeValue, partyBits); + if ( r || (modifiers & 0b0010) !== 0 ) { if ( this.realmMatchString(AllowAction, typeValue, partyBits) ) { return 2; } - return 1; + if ( r ) { return 1; } } return 0; }; diff --git a/src/js/traffic.js b/src/js/traffic.js index 7eacf89bb..2da474c9c 100644 --- a/src/js/traffic.js +++ b/src/js/traffic.js @@ -172,37 +172,9 @@ const onBeforeRootFrameRequest = function(fctxt) { } } - // Static filtering: We always need the long-form result here. - const snfe = µb.staticNetFilteringEngine; - - // Check for specific block + // Static filtering if ( result === 0 ) { - result = snfe.matchString(fctxt, 0b0001); - if ( result !== 0 || loggerEnabled ) { - logData = snfe.toLogData(); - } - } - - // Check for generic block - if ( result === 0 ) { - fctxt.type = 'no_type'; - result = snfe.matchString(fctxt, 0b0001); - if ( result !== 0 || loggerEnabled ) { - logData = snfe.toLogData(); - } - // https://github.com/chrisaljoudi/uBlock/issues/1128 - // Do not block if the match begins after the hostname, except when - // the filter is specifically of type `other`. - // https://github.com/gorhill/uBlock/issues/490 - // Removing this for the time being, will need a new, dedicated type. - if ( - result === 1 && - toBlockDocResult(requestURL, requestHostname, logData) === false - ) { - result = 0; - logData = undefined; - } - fctxt.type = 'main_frame'; + ({ result, logData } = shouldStrictBlock(fctxt, loggerEnabled)); } const pageStore = µb.bindTabToPageStore(fctxt.tabId, 'beforeRequest'); @@ -221,7 +193,7 @@ const onBeforeRootFrameRequest = function(fctxt) { result !== 1 && trusted === false && pageStore !== null && - snfe.hasQuery(fctxt) + µb.staticNetFilteringEngine.hasQuery(fctxt) ) { pageStore.redirectNonBlockedRequest(fctxt); } @@ -263,16 +235,109 @@ const onBeforeRootFrameRequest = function(fctxt) { /******************************************************************************/ +// Strict blocking through static filtering +// +// https://github.com/chrisaljoudi/uBlock/issues/1128 +// Do not block if the match begins after the hostname, +// except when the filter is specifically of type `other`. +// https://github.com/gorhill/uBlock/issues/490 +// Removing this for the time being, will need a new, dedicated type. +// https://github.com/uBlockOrigin/uBlock-issues/issues/1501 +// Support explicit exception filters. +// +// Let result of match for specific `document` type be `rs` +// Let result of match for no specific type be `rg` *after* going through +// confirmation necessary for implicit matches +// Let `important` be `i` +// Let final result be logical combination of `rs` and `rg` as follow: +// +// | rs | +// +--------+--------+--------+--------| +// | 0 | 1 | 1i | 2 | +// --------+--------+--------+--------+--------+--------| +// | 0 | rg | rs | rs | rs | +// rg | 1 | rg | rs | rs | rs | +// | 1i | rg | rg | rs | rg | +// | 2 | rg | rg | rs | rs | +// --------+--------+--------+--------+--------+--------+ + +const shouldStrictBlock = function(fctxt, loggerEnabled) { + const µb = µBlock; + const snfe = µb.staticNetFilteringEngine; + + // Explicit filtering: `document` option + const rs = snfe.matchString(fctxt, 0b0011); + const is = rs === 1 && snfe.isBlockImportant(); + let lds; + if ( rs !== 0 && loggerEnabled ) { + lds = snfe.toLogData(); + } + + // | rs | + // +--------+--------+--------+--------| + // | 0 | 1 | 1i | 2 | + // --------+--------+--------+--------+--------+--------| + // | 0 | rg | rs | x | rs | + // rg | 1 | rg | rs | x | rs | + // | 1i | rg | rg | x | rg | + // | 2 | rg | rg | x | rs | + // --------+--------+--------+--------+--------+--------+ + if ( rs === 1 && is ) { + return { result: rs, logData: lds }; + } + + // Implicit filtering: no `document` option + fctxt.type = 'no_type'; + let rg = snfe.matchString(fctxt, 0b0011); + fctxt.type = 'main_frame'; + const ig = rg === 1 && snfe.isBlockImportant(); + let ldg; + if ( rg !== 0 || loggerEnabled ) { + ldg = snfe.toLogData(); + if ( rg === 1 && validateStrictBlock(fctxt, ldg) === false ) { + rg = 0; ldg = undefined; + } + } + + // | rs | + // +--------+--------+--------+--------| + // | 0 | 1 | 1i | 2 | + // --------+--------+--------+--------+--------+--------| + // | 0 | x | rs | - | rs | + // rg | 1 | x | rs | - | rs | + // | 1i | x | x | - | x | + // | 2 | x | x | - | rs | + // --------+--------+--------+--------+--------+--------+ + if ( rs === 0 || rg === 1 && ig || rg === 2 && rs !== 2 ) { + return { result: rg, logData: ldg }; + } + + // | rs | + // +--------+--------+--------+--------| + // | 0 | 1 | 1i | 2 | + // --------+--------+--------+--------+--------+--------| + // | 0 | - | x | - | x | + // rg | 1 | - | x | - | x | + // | 1i | - | - | - | - | + // | 2 | - | - | - | x | + // --------+--------+--------+--------+--------+--------+ + return { result: rs, logData: lds }; +}; + +/******************************************************************************/ + // https://github.com/gorhill/uBlock/issues/3208 // Mind case insensitivity. // https://github.com/uBlockOrigin/uBlock-issues/issues/1147 // Do not strict-block if the filter pattern does not contain at least one // token character. -const toBlockDocResult = function(url, hostname, logData) { + +const validateStrictBlock = function(fctxt, logData) { if ( typeof logData.regex !== 'string' ) { return false; } if ( typeof logData.raw === 'string' && /\w/.test(logData.raw) === false ) { return false; } + const url = fctxt.url; const re = new RegExp(logData.regex, 'i'); const match = re.exec(url.toLowerCase()); if ( match === null ) { return false; } @@ -283,6 +348,7 @@ const toBlockDocResult = function(url, hostname, logData) { // hostname. // https://github.com/uBlockOrigin/uAssets/issues/7619#issuecomment-653010310 // Also match FQDN. + const hostname = fctxt.getHostname(); const hnpos = url.indexOf(hostname); const hnlen = hostname.length; const end = match.index + match[0].length - hnpos - hnlen;