Rework generic cosmetic filtering code

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/2248
This commit is contained in:
Raymond Hill 2022-12-07 10:30:09 -05:00
parent 76d70102f0
commit 26594fb902
No known key found for this signature in database
GPG key ID: 25E1490B761470C2
4 changed files with 351 additions and 550 deletions

View file

@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 47, // Increase when compiled format changes
selfieMagic: 47, // Increase when selfie format changes
compiledMagic: 48, // Increase when compiled format changes
selfieMagic: 48, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View file

@ -945,72 +945,65 @@ vAPI.DOMFilterer = class {
// vAPI.domSurveyor
{
const messaging = vAPI.messaging;
const queriedIds = new Set();
const queriedClasses = new Set();
// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/
// Must mirror cosmetic filtering compiler's version
const hashFromStr = (type, s) => {
const len = s.length;
const step = len + 7 >>> 3;
let hash = (type << 5) - type + (len & 0xFF) | 0;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) - hash + s.charCodeAt(i) | 0;
}
return hash & 0xFFFFFF;
};
const addHashes = hashes => {
for ( const hash of hashes ) {
queriedHashes.add(hash);
}
};
const queriedHashes = new Set();
const maxSurveyNodes = 65536;
const maxSurveyTimeSlice = 4;
const maxSurveyBuffer = 64;
const pendingLists = [];
const pendingNodes = [];
const processedSet = new Set();
let domFilterer;
let hostname = '';
let domChanged = false;
let scannedCount = 0;
let stopped = false;
let domFilterer,
hostname = '',
surveyCost = 0;
const addPendingList = list => {
if ( list.length === 0 ) { return; }
pendingLists.push(Array.from(list));
};
const pendingNodes = {
nodeLists: [],
buffer: [
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
],
j: 0,
accepted: 0,
iterated: 0,
stopped: false,
add(nodes) {
if ( nodes.length === 0 || this.accepted >= maxSurveyNodes ) {
return;
const nextPendingNodes = ( ) => {
if ( pendingLists.length === 0 ) { return 0; }
const bufferSize = 256;
let j = 0;
do {
const nodeList = pendingLists[0];
let n = bufferSize - j;
if ( n > nodeList.length ) {
n = nodeList.length;
}
this.nodeLists.push(nodes);
this.accepted += nodes.length;
},
next() {
if ( this.nodeLists.length === 0 || this.stopped ) { return 0; }
const nodeLists = this.nodeLists;
let ib = 0;
do {
const nodeList = nodeLists[0];
let j = this.j;
let n = j + maxSurveyBuffer - ib;
if ( n > nodeList.length ) {
n = nodeList.length;
}
for ( let i = j; i < n; i++ ) {
this.buffer[ib++] = nodeList[j++];
}
if ( j !== nodeList.length ) {
this.j = j;
break;
}
this.j = 0;
this.nodeLists.shift();
} while ( ib < maxSurveyBuffer && nodeLists.length !== 0 );
this.iterated += ib;
if ( this.iterated >= maxSurveyNodes ) {
this.nodeLists = [];
this.stopped = true;
//console.info(`domSurveyor> Surveyed a total of ${this.iterated} nodes. Enough.`);
for ( let i = 0; i < n; i++ ) {
pendingNodes[j+i] = nodeList[i];
}
return ib;
},
hasNodes() {
return this.nodeLists.length !== 0;
},
j += n;
if ( n !== nodeList.length ) {
pendingLists[0] = nodeList.slice(n);
break;
}
pendingLists.shift();
} while ( j < bufferSize && pendingLists.length !== 0 );
return j;
};
const hasPendingNodes = ( ) => {
return pendingLists.length !== 0;
};
// Extract all classes/ids: these will be passed to the cosmetic
@ -1024,10 +1017,10 @@ vAPI.DOMFilterer = class {
const idFromNode = (node, out) => {
const raw = node.id;
if ( typeof raw !== 'string' || raw.length === 0 ) { return; }
const s = raw.trim();
if ( queriedIds.has(s) || s.length === 0 ) { return; }
out.push(s);
queriedIds.add(s);
const hash = hashFromStr(0x23 /* '#' */, raw.trim());
if ( queriedHashes.has(hash) ) { return; }
queriedHashes.add(hash);
out.push(hash);
};
// https://github.com/uBlockOrigin/uBlock-issues/discussions/2076
@ -1036,73 +1029,83 @@ vAPI.DOMFilterer = class {
const s = node.getAttribute('class');
if ( typeof s !== 'string' ) { return; }
const len = s.length;
for ( let beg = 0, end = 0, token = ''; beg < len; beg += 1 ) {
for ( let beg = 0, end = 0; beg < len; beg += 1 ) {
end = s.indexOf(' ', beg);
if ( end === beg ) { continue; }
if ( end === -1 ) { end = len; }
token = s.slice(beg, end);
const hash = hashFromStr(0x2E /* '.' */, s.slice(beg, end));
beg = end;
if ( queriedClasses.has(token) ) { continue; }
out.push(token);
queriedClasses.add(token);
if ( queriedHashes.has(hash) ) { continue; }
queriedHashes.add(hash);
out.push(hash);
}
};
const surveyPhase1 = function() {
//console.time('dom surveyor/surveying');
const getSurveyResults = hashes => {
if ( self.vAPI.messaging instanceof Object === false ) {
stop(); return;
}
const promise = hashes.length === 0
? Promise.resolve(null)
: self.vAPI.messaging.send('contentscript', {
what: 'retrieveGenericCosmeticSelectors',
hostname,
hashes,
exceptions: domFilterer.exceptions,
});
promise.then(response => {
processSurveyResults(response);
});
};
const doSurvey = ( ) => {
const t0 = performance.now();
const ids = [];
const classes = [];
const nodes = pendingNodes.buffer;
const deadline = t0 + maxSurveyTimeSlice;
const hashes = [];
const nodes = pendingNodes;
const deadline = t0 + 4;
let processed = 0;
let scanned = 0;
for (;;) {
const n = pendingNodes.next();
const n = nextPendingNodes();
if ( n === 0 ) { break; }
for ( let i = 0; i < n; i++ ) {
const node = nodes[i]; nodes[i] = null;
idFromNode(node, ids);
classesFromNode(node, classes);
if ( domChanged ) {
if ( processedSet.has(node) ) { continue; }
processedSet.add(node);
}
idFromNode(node, hashes);
classesFromNode(node, hashes);
scanned += 1;
}
processed += n;
if ( performance.now() >= deadline ) { break; }
}
const t1 = performance.now();
surveyCost += t1 - t0;
//console.info(`domSurveyor> Surveyed ${processed} nodes in ${(t1-t0).toFixed(2)} ms`);
// Phase 2: Ask main process to lookup relevant cosmetic filters.
if ( ids.length !== 0 || classes.length !== 0 ) {
messaging.send('contentscript', {
what: 'retrieveGenericCosmeticSelectors',
hostname,
ids, classes,
exceptions: domFilterer.exceptions,
cost: surveyCost,
}).then(response => {
surveyPhase3(response);
});
} else {
surveyPhase3(null);
//console.info(`[domSurveyor][${hostname}] Surveyed ${scanned}/${processed} nodes in ${(performance.now()-t0).toFixed(2)} ms: ${hashes.length} hashes`);
scannedCount += scanned;
if ( scannedCount >= maxSurveyNodes ) {
stop();
}
//console.timeEnd('dom surveyor/surveying');
processedSet.clear();
getSurveyResults(hashes);
};
const surveyTimer = new vAPI.SafeAnimationFrame(surveyPhase1);
const surveyTimer = new vAPI.SafeAnimationFrame(doSurvey);
// This is to shutdown the surveyor if result of surveying keeps being
// fruitless. This is useful on long-lived web page. I arbitrarily
// picked 5 minutes before the surveyor is allowed to shutdown. I also
// arbitrarily picked 256 misses before the surveyor is allowed to
// shutdown.
let canShutdownAfter = Date.now() + 300000,
surveyingMissCount = 0;
let canShutdownAfter = Date.now() + 300000;
let surveyResultMissCount = 0;
// Handle main process' response.
const surveyPhase3 = function(response) {
const processSurveyResults = response => {
if ( stopped ) { return; }
const result = response && response.result;
let mustCommit = false;
if ( result ) {
const css = result.injectedCSS;
if ( typeof css === 'string' && css.length !== 0 ) {
@ -1114,99 +1117,86 @@ vAPI.DOMFilterer = class {
domFilterer.exceptCSSRules(selectors);
}
}
if ( pendingNodes.stopped === false ) {
if ( pendingNodes.hasNodes() ) {
surveyTimer.start(1);
}
if ( mustCommit ) {
surveyingMissCount = 0;
canShutdownAfter = Date.now() + 300000;
return;
}
surveyingMissCount += 1;
if ( surveyingMissCount < 256 || Date.now() < canShutdownAfter ) {
return;
}
if ( hasPendingNodes() ) {
surveyTimer.start(1);
}
//console.info('dom surveyor shutting down: too many misses');
surveyTimer.clear();
vAPI.domWatcher.removeListener(domWatcherInterface);
vAPI.domSurveyor = null;
if ( mustCommit ) {
surveyResultMissCount = 0;
canShutdownAfter = Date.now() + 300000;
return;
}
surveyResultMissCount += 1;
if ( surveyResultMissCount < 256 || Date.now() < canShutdownAfter ) {
return;
}
//console.info(`[domSurveyor][${hostname}] Shutting down, too many misses`);
stop();
self.vAPI.messaging.send('contentscript', {
what: 'disableGenericCosmeticFilteringSurveyor',
hostname,
});
};
const domWatcherInterface = {
onDOMCreated: function() {
if (
self.vAPI instanceof Object === false ||
vAPI.domSurveyor instanceof Object === false ||
vAPI.domFilterer instanceof Object === false
) {
if ( self.vAPI instanceof Object ) {
if ( vAPI.domWatcher instanceof Object ) {
vAPI.domWatcher.removeListener(domWatcherInterface);
}
vAPI.domSurveyor = null;
}
return;
}
//console.time('dom surveyor/dom layout created');
domFilterer = vAPI.domFilterer;
pendingNodes.add(document.querySelectorAll(
'[id]:not(html):not(body),[class]:not(html):not(body)'
));
surveyTimer.start();
// https://github.com/uBlockOrigin/uBlock-issues/issues/1692
// Look-up safe-only selectors to mitigate probability of
// html/body elements of erroneously being targeted.
const ids = [], classes = [];
const hashes = [];
if ( document.documentElement !== null ) {
idFromNode(document.documentElement, ids);
classesFromNode(document.documentElement, classes);
idFromNode(document.documentElement, hashes);
classesFromNode(document.documentElement, hashes);
}
if ( document.body !== null ) {
idFromNode(document.body, ids);
classesFromNode(document.body, classes);
idFromNode(document.body, hashes);
classesFromNode(document.body, hashes);
}
if ( ids.length !== 0 || classes.length !== 0 ) {
messaging.send('contentscript', {
what: 'retrieveGenericCosmeticSelectors',
hostname,
ids, classes,
exceptions: domFilterer.exceptions,
safeOnly: true,
}).then(response => {
surveyPhase3(response);
});
addPendingList(document.querySelectorAll(
'[id]:not(html):not(body),[class]:not(html):not(body)'
));
if ( hasPendingNodes() ) {
surveyTimer.start();
}
//console.timeEnd('dom surveyor/dom layout created');
},
onDOMChanged: function(addedNodes) {
if ( addedNodes.length === 0 ) { return; }
//console.time('dom surveyor/dom layout changed');
domChanged = true;
for ( const node of addedNodes ) {
pendingNodes.add([ node ]);
addPendingList([ node ]);
if ( node.firstElementChild === null ) { continue; }
pendingNodes.add(node.querySelectorAll(
'[id]:not(html):not(body),[class]:not(html):not(body)'
));
addPendingList(
node.querySelectorAll(
'[id]:not(html):not(body),[class]:not(html):not(body)'
)
);
}
if ( pendingNodes.hasNodes() ) {
if ( hasPendingNodes() ) {
surveyTimer.start(1);
}
//console.timeEnd('dom surveyor/dom layout changed');
}
};
const start = function(details) {
if ( vAPI.domWatcher instanceof Object === false ) { return; }
const start = details => {
if ( self.vAPI instanceof Object === false ) { return; }
if ( self.vAPI.domFilterer instanceof Object === false ) { return; }
if ( self.vAPI.domWatcher instanceof Object === false ) { return; }
hostname = details.hostname;
vAPI.domWatcher.addListener(domWatcherInterface);
self.vAPI.domWatcher.addListener(domWatcherInterface);
};
vAPI.domSurveyor = { start };
const stop = ( ) => {
stopped = true;
pendingLists.length = 0;
surveyTimer.clear();
if ( self.vAPI instanceof Object === false ) { return; }
if ( self.vAPI.domWatcher instanceof Object ) {
self.vAPI.domWatcher.removeListener(domWatcherInterface);
}
self.vAPI.domSurveyor = null;
};
self.vAPI.domSurveyor = { start, addHashes };
}
/******************************************************************************/
@ -1218,7 +1208,7 @@ vAPI.DOMFilterer = class {
// to be launched if/when needed.
{
const bootstrapPhase2 = function() {
const onDomReady = ( ) => {
// This can happen on Firefox. For instance:
// https://github.com/gorhill/uBlock/issues/1893
if ( window.location === null ) { return; }
@ -1279,9 +1269,8 @@ vAPI.DOMFilterer = class {
// an object -- let's stay around, we may be given the opportunity
// to try bootstrapping again later.
const bootstrapPhase1 = function(response) {
const onResponseReady = response => {
if ( response instanceof Object === false ) { return; }
vAPI.bootstrap = undefined;
// cosmetic filtering engine aka 'cfe'
@ -1308,7 +1297,7 @@ vAPI.DOMFilterer = class {
vAPI.domSurveyor = null;
} else {
const domFilterer = vAPI.domFilterer = new vAPI.DOMFilterer();
if ( noGenericCosmeticFiltering || cfeDetails.noDOMSurveying ) {
if ( noGenericCosmeticFiltering || cfeDetails.disableSurveyor ) {
vAPI.domSurveyor = null;
}
domFilterer.exceptions = cfeDetails.exceptionFilters;
@ -1316,10 +1305,9 @@ vAPI.DOMFilterer = class {
domFilterer.addProceduralSelectors(cfeDetails.proceduralFilters);
domFilterer.exceptCSSRules(cfeDetails.exceptedFilters);
domFilterer.convertedProceduralFilters = cfeDetails.convertedProceduralFilters;
vAPI.userStylesheet.apply();
}
vAPI.userStylesheet.apply();
// Library of resources is located at:
// https://github.com/gorhill/uBlock/blob/master/assets/ublock/resources.txt
if ( scriptlets && typeof self.uBO_scriptletsInjected !== 'boolean' ) {
@ -1328,26 +1316,18 @@ vAPI.DOMFilterer = class {
vAPI.injectedScripts = scriptlets;
}
if ( vAPI.domSurveyor instanceof Object ) {
if ( vAPI.domSurveyor ) {
if ( Array.isArray(cfeDetails.genericCosmeticHashes) ) {
vAPI.domSurveyor.addHashes(cfeDetails.genericCosmeticHashes);
}
vAPI.domSurveyor.start(cfeDetails);
}
// https://github.com/chrisaljoudi/uBlock/issues/587
// If no filters were found, maybe the script was injected before
// uBlock's process was fully initialized. When this happens, pages
// won't be cleaned right after browser launch.
if (
typeof document.readyState === 'string' &&
document.readyState !== 'loading'
) {
bootstrapPhase2();
} else {
document.addEventListener(
'DOMContentLoaded',
bootstrapPhase2,
{ once: true }
);
const readyState = document.readyState;
if ( readyState === 'interactive' || readyState === 'complete' ) {
return onDomReady();
}
document.addEventListener('DOMContentLoaded', onDomReady, { once: true });
};
vAPI.bootstrap = function() {
@ -1356,7 +1336,7 @@ vAPI.DOMFilterer = class {
url: vAPI.effectiveSelf.location.href,
needScriptlets: typeof self.uBO_scriptletsInjected !== 'boolean',
}).then(response => {
bootstrapPhase1(response);
onResponseReady(response);
});
};
}

View file

@ -32,12 +32,6 @@ import {
StaticExtFilteringSessionDB,
} from './static-ext-filtering-db.js';
/******************************************************************************/
const cosmeticSurveyingMissCountMax =
parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) ||
15;
/******************************************************************************/
/******************************************************************************/
@ -48,71 +42,55 @@ const SelectorCacheEntry = class {
reset() {
this.cosmetic = new Set();
this.cosmeticSurveyingMissCount = 0;
this.cosmeticHashes = new Set();
this.disableSurveyor = false;
this.net = new Map();
this.lastAccessTime = Date.now();
this.accessId = SelectorCacheEntry.accessId++;
return this;
}
dispose() {
this.cosmetic = this.net = null;
this.cosmetic = this.cosmeticHashes = this.net = null;
if ( SelectorCacheEntry.junkyard.length < 25 ) {
SelectorCacheEntry.junkyard.push(this);
}
}
addCosmetic(details) {
const selectors = details.selectors;
let i = selectors.length || 0;
// https://github.com/gorhill/uBlock/issues/2011
// Avoiding seemingly pointless surveys only if they appear costly.
if ( details.first && i === 0 ) {
if ( (details.cost || 0) >= 80 ) {
this.cosmeticSurveyingMissCount += 1;
}
return;
const selectors = details.selectors.join(',\n');
if ( selectors.length !== 0 ) {
this.cosmetic.add(selectors);
}
this.cosmeticSurveyingMissCount = 0;
while ( i-- ) {
this.cosmetic.add(selectors[i]);
for ( const hash of details.hashes ) {
this.cosmeticHashes.add(hash);
}
}
addNet(selectors) {
if ( typeof selectors === 'string' ) {
this.addNetOne(selectors, Date.now());
this.net.set(selectors, this.accessId);
} else {
this.addNetMany(selectors, Date.now());
this.net.set(selectors.join(',\n'), this.accessId);
}
// Net request-derived selectors: I limit the number of cached
// selectors, as I expect cases where the blocked net-requests
// selectors, as I expect cases where the blocked network requests
// are never the exact same URL.
if ( this.net.size < SelectorCacheEntry.netHighWaterMark ) {
return;
}
const dict = this.net;
const keys = Array.from(dict.keys()).sort(function(a, b) {
return dict.get(b) - dict.get(a);
}).slice(SelectorCacheEntry.netLowWaterMark);
let i = keys.length;
while ( i-- ) {
dict.delete(keys[i]);
if ( this.net.size < SelectorCacheEntry.netHighWaterMark ) { return; }
const keys = Array.from(this.net)
.sort((a, b) => b[1] - a[1])
.slice(SelectorCacheEntry.netLowWaterMark)
.map(a => a[0]);
for ( const key of keys ) {
this.net.delete(key);
}
}
addNetOne(selector, now) {
this.net.set(selector, now);
}
addNetMany(selectors, now) {
let i = selectors.length || 0;
while ( i-- ) {
this.net.set(selectors[i], now);
}
addNetOne(selector, token) {
this.net.set(selector, token);
}
add(details) {
this.lastAccessTime = Date.now();
this.accessId = SelectorCacheEntry.accessId++;
if ( details.type === 'cosmetic' ) {
this.addCosmetic(details);
} else {
@ -122,10 +100,9 @@ const SelectorCacheEntry = class {
// https://github.com/chrisaljoudi/uBlock/issues/420
remove(type) {
this.lastAccessTime = Date.now();
this.accessId = SelectorCacheEntry.accessId++;
if ( type === undefined || type === 'cosmetic' ) {
this.cosmetic.clear();
this.cosmeticSurveyingMissCount = 0;
}
if ( type === undefined || type === 'net' ) {
this.net.clear();
@ -133,36 +110,41 @@ const SelectorCacheEntry = class {
}
retrieveToArray(iterator, out) {
for ( let selector of iterator ) {
for ( const selector of iterator ) {
out.push(selector);
}
}
retrieveToSet(iterator, out) {
for ( let selector of iterator ) {
for ( const selector of iterator ) {
out.add(selector);
}
}
retrieve(type, out) {
this.lastAccessTime = Date.now();
const iterator = type === 'cosmetic' ? this.cosmetic : this.net.keys();
if ( Array.isArray(out) ) {
this.retrieveToArray(iterator, out);
} else {
this.retrieveToSet(iterator, out);
}
retrieveNet(out) {
this.accessId = SelectorCacheEntry.accessId++;
if ( this.net.size === 0 ) { return false; }
this.retrieveToArray(this.net.keys(), out);
return true;
}
retrieveCosmetic(selectors, hashes) {
this.accessId = SelectorCacheEntry.accessId++;
if ( this.cosmetic.size === 0 ) { return false; }
this.retrieveToSet(this.cosmetic, selectors);
this.retrieveToArray(this.cosmeticHashes, hashes);
return true;
}
static factory() {
const entry = SelectorCacheEntry.junkyard.pop();
if ( entry ) {
return entry.reset();
}
return new SelectorCacheEntry();
return entry
? entry.reset()
: new SelectorCacheEntry();
}
};
SelectorCacheEntry.accessId = 1;
SelectorCacheEntry.netLowWaterMark = 20;
SelectorCacheEntry.netHighWaterMark = 30;
SelectorCacheEntry.junkyard = [];
@ -170,6 +152,61 @@ SelectorCacheEntry.junkyard = [];
/******************************************************************************/
/******************************************************************************/
// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/
// Must mirror content script surveyor's version
const hashFromStr = (type, s) => {
const len = s.length;
const step = len + 7 >>> 3;
let hash = (type << 5) - type + (len & 0xFF) | 0;
for ( let i = 0; i < len; i += step ) {
hash = (hash << 5) - hash + s.charCodeAt(i) | 0;
}
return hash & 0xFFFFFF;
};
// https://github.com/gorhill/uBlock/issues/1668
// The key must be literal: unescape escaped CSS before extracting key.
// It's an uncommon case, so it's best to unescape only when needed.
const keyFromSelector = selector => {
let matches = rePlainSelector.exec(selector);
if ( matches === null ) {
matches = rePlainSelectorEx.exec(selector);
if ( matches !== null ) { return matches[1] || matches[2]; }
return;
}
let key = matches[0];
if ( key.includes('\\') === false ) { return key; }
matches = rePlainSelectorEscaped.exec(selector);
if ( matches === null ) { return; }
key = '';
const escaped = matches[0];
let beg = 0;
reEscapeSequence.lastIndex = 0;
for (;;) {
matches = reEscapeSequence.exec(escaped);
if ( matches === null ) {
return key + escaped.slice(beg);
}
key += escaped.slice(beg, matches.index);
beg = reEscapeSequence.lastIndex;
if ( matches[1].length === 1 ) {
key += matches[1];
} else {
key += String.fromCharCode(parseInt(matches[1], 16));
}
}
};
const rePlainSelector = /^[#.][\w\\-]+/;
const rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)|([#.][\w-]+)$/;
const rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/;
const reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g;
/******************************************************************************/
/******************************************************************************/
// Cosmetic filter family tree:
//
// Generic
@ -186,18 +223,12 @@ SelectorCacheEntry.junkyard = [];
// Specific filers can be enforced before the main document is loaded.
const FilterContainer = function() {
this.rePlainSelector = /^[#.][\w\\-]+/;
this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/;
this.rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)|([#.][\w-]+)$/;
this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g;
this.reSimpleHighGeneric = /^(?:[a-z]*\[[^\]]+\]|\S+)$/;
this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/;
this.selectorCache = new Map();
this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes
this.selectorCacheAgeMax = 120 * 60 * 1000; // 120 minutes
this.selectorCacheCountMin = 25;
this.netSelectorCacheCountMax = SelectorCacheEntry.netHighWaterMark;
this.selectorCacheCountMin = 40;
this.selectorCacheCountMax = 50;
this.selectorCacheTimer = null;
// specific filters
@ -206,20 +237,8 @@ const FilterContainer = function() {
// temporary filters
this.sessionFilterDB = new StaticExtFilteringSessionDB();
// low generic cosmetic filters, organized by id/class then simple/complex.
this.lowlyGeneric = Object.create(null);
this.lowlyGeneric.id = {
canonical: 'ids',
prefix: '#',
simple: new Set(),
complex: new Map()
};
this.lowlyGeneric.cl = {
canonical: 'classes',
prefix: '.',
simple: new Set(),
complex: new Map()
};
// low generic cosmetic filters: map of hash => array of selectors
this.lowlyGeneric = new Map();
// highly generic selectors sets
this.highlyGeneric = Object.create(null);
@ -240,8 +259,6 @@ const FilterContainer = function() {
// is to prevent repeated allocation/deallocation overheads -- the
// constructors/destructors of javascript Set/Map is assumed to be costlier
// than just calling clear() on these.
this.$simpleSet = new Set();
this.$complexSet = new Set();
this.$specificSet = new Set();
this.$exceptionSet = new Set();
this.$proceduralSet = new Set();
@ -266,17 +283,11 @@ FilterContainer.prototype.reset = function() {
this.selectorCacheTimer = null;
}
// whether there is at least one surveyor-based filter
this.needDOMSurveyor = false;
// hostname, entity-based filters
this.specificFilters.clear();
// low generic cosmetic filters, organized by id/class then simple/complex.
this.lowlyGeneric.id.simple.clear();
this.lowlyGeneric.id.complex.clear();
this.lowlyGeneric.cl.simple.clear();
this.lowlyGeneric.cl.complex.clear();
// low generic cosmetic filters
this.lowlyGeneric.clear();
// highly generic selectors sets
this.highlyGeneric.simple.dict.clear();
@ -285,6 +296,8 @@ FilterContainer.prototype.reset = function() {
this.highlyGeneric.complex.dict.clear();
this.highlyGeneric.complex.str = '';
this.highlyGeneric.complex.mru.reset();
this.selfieVersion = 1;
};
/******************************************************************************/
@ -293,12 +306,6 @@ FilterContainer.prototype.freeze = function() {
this.duplicateBuster.clear();
this.specificFilters.collectGarbage();
this.needDOMSurveyor =
this.lowlyGeneric.id.simple.size !== 0 ||
this.lowlyGeneric.id.complex.size !== 0 ||
this.lowlyGeneric.cl.simple.size !== 0 ||
this.lowlyGeneric.cl.complex.size !== 0;
this.highlyGeneric.simple.str = Array.from(this.highlyGeneric.simple.dict).join(',\n');
this.highlyGeneric.simple.mru.reset();
this.highlyGeneric.complex.str = Array.from(this.highlyGeneric.complex.dict).join(',\n');
@ -309,40 +316,6 @@ FilterContainer.prototype.freeze = function() {
/******************************************************************************/
// https://github.com/gorhill/uBlock/issues/1668
// The key must be literal: unescape escaped CSS before extracting key.
// It's an uncommon case, so it's best to unescape only when needed.
FilterContainer.prototype.keyFromSelector = function(selector) {
let matches = this.rePlainSelector.exec(selector);
if ( matches === null ) { return; }
let key = matches[0];
if ( key.indexOf('\\') === -1 ) {
return key;
}
matches = this.rePlainSelectorEscaped.exec(selector);
if ( matches === null ) { return; }
key = '';
const escaped = matches[0];
let beg = 0;
this.reEscapeSequence.lastIndex = 0;
for (;;) {
matches = this.reEscapeSequence.exec(escaped);
if ( matches === null ) {
return key + escaped.slice(beg);
}
key += escaped.slice(beg, matches.index);
beg = this.reEscapeSequence.lastIndex;
if ( matches[1].length === 1 ) {
key += matches[1];
} else {
key += String.fromCharCode(parseInt(matches[1], 16));
}
}
};
/******************************************************************************/
FilterContainer.prototype.compile = function(parser, writer) {
if ( parser.hasOptions() === false ) {
this.compileGenericSelector(parser, writer);
@ -396,38 +369,8 @@ FilterContainer.prototype.compileGenericHideSelector = function(
writer.select('COSMETIC_FILTERS:GENERIC');
const type = compiled.charCodeAt(0);
let key;
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic selector
// falls into that category:
// - ###ad-bigbox
// - ##.ads-bigbox
if ( type === 0x23 /* '#' */ ) {
key = this.keyFromSelector(compiled);
if ( key === compiled ) {
writer.push([ 0, key.slice(1) ]);
return;
}
} else if ( type === 0x2E /* '.' */ ) {
key = this.keyFromSelector(compiled);
if ( key === compiled ) {
writer.push([ 2, key.slice(1) ]);
return;
}
}
// Invalid cosmetic filter, possible reasons:
// - Bad syntax
// - Procedural filters (can't be generic): the compiled version of
// a procedural selector is NEVER equal to its raw version.
// https://github.com/uBlockOrigin/uBlock-issues/issues/464
// Pseudoclass-based selectors can be compiled, but are also valid
// plain selectors.
// https://github.com/uBlockOrigin/uBlock-issues/issues/131
// Support generic procedural filters as per advanced settings.
// TODO: prevent double compilation.
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
if ( µb.hiddenSettings.allowGenericProceduralFilters === true ) {
return this.compileSpecificSelector(parser, '', false, writer);
@ -441,28 +384,12 @@ FilterContainer.prototype.compileGenericHideSelector = function(
return;
}
// Complex selector-based CSS rule:
// - ###tads + div + .c
// - ##.rscontainer > .ellip
const key = keyFromSelector(compiled);
if ( key !== undefined ) {
writer.push([
type === 0x23 /* '#' */ ? 1 : 3,
key.slice(1),
compiled
]);
return;
}
// https://github.com/gorhill/uBlock/issues/909
// Anything which contains a plain id/class selector can be classified
// as a low generic cosmetic filter.
const matches = this.rePlainSelectorEx.exec(compiled);
if ( matches !== null ) {
const key = matches[1] || matches[2];
writer.push([
key.charCodeAt(0) === 0x23 /* '#' */ ? 1 : 3,
key.slice(1),
compiled
0,
hashFromStr(key.charCodeAt(0), key.slice(1)),
compiled,
]);
return;
}
@ -618,36 +545,13 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
this.duplicateBuster.add(fingerprint);
const args = reader.args();
switch ( args[0] ) {
// low generic, simple
case 0: // #AdBanner
case 2: { // .largeAd
const db = args[0] === 0 ? this.lowlyGeneric.id : this.lowlyGeneric.cl;
const bucket = db.complex.get(args[1]);
if ( bucket === undefined ) {
db.simple.add(args[1]);
} else if ( Array.isArray(bucket) ) {
bucket.push(db.prefix + args[1]);
// low generic
case 0: {
if ( this.lowlyGeneric.has(args[1]) ) {
const selector = this.lowlyGeneric.get(args[1]);
this.lowlyGeneric.set(args[1], `${selector},\n${args[2]}`);
} else {
db.complex.set(args[1], [ bucket, db.prefix + args[1] ]);
}
break;
}
// low generic, complex
case 1: // #tads + div + .c
case 3: { // .Mpopup + #Mad > #MadZone
const db = args[0] === 1 ? this.lowlyGeneric.id : this.lowlyGeneric.cl;
const bucket = db.complex.get(args[1]);
if ( bucket === undefined ) {
if ( db.simple.has(args[1]) ) {
db.complex.set(args[1], [ db.prefix + args[1], args[2] ]);
} else {
db.complex.set(args[1], args[2]);
db.simple.add(args[1]);
}
} else if ( Array.isArray(bucket) ) {
bucket.push(args[2]);
} else {
db.complex.set(args[1], [ bucket, args[2] ]);
this.lowlyGeneric.set(args[1], args[2]);
}
break;
}
@ -682,13 +586,11 @@ FilterContainer.prototype.skipCompiledContent = function(reader, sectionId) {
FilterContainer.prototype.toSelfie = function() {
return {
version: this.selfieVersion,
acceptedCount: this.acceptedCount,
discardedCount: this.discardedCount,
specificFilters: this.specificFilters.toSelfie(),
lowlyGenericSID: Array.from(this.lowlyGeneric.id.simple),
lowlyGenericCID: Array.from(this.lowlyGeneric.id.complex),
lowlyGenericSCL: Array.from(this.lowlyGeneric.cl.simple),
lowlyGenericCCL: Array.from(this.lowlyGeneric.cl.complex),
lowlyGeneric: Array.from(this.lowlyGeneric),
highSimpleGenericHideArray: Array.from(this.highlyGeneric.simple.dict),
highComplexGenericHideArray: Array.from(this.highlyGeneric.complex.dict),
};
@ -697,22 +599,19 @@ FilterContainer.prototype.toSelfie = function() {
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) {
if ( selfie.version !== this.selfieVersion ) {
throw new Error(
`cosmeticFilteringEngine: mismatched selfie version, ${selfie.version}, expected ${this.selfieVersion}`
);
}
this.acceptedCount = selfie.acceptedCount;
this.discardedCount = selfie.discardedCount;
this.specificFilters.fromSelfie(selfie.specificFilters);
this.lowlyGeneric.id.simple = new Set(selfie.lowlyGenericSID);
this.lowlyGeneric.id.complex = new Map(selfie.lowlyGenericCID);
this.lowlyGeneric.cl.simple = new Set(selfie.lowlyGenericSCL);
this.lowlyGeneric.cl.complex = new Map(selfie.lowlyGenericCCL);
this.lowlyGeneric = new Map(selfie.lowlyGeneric);
this.highlyGeneric.simple.dict = new Set(selfie.highSimpleGenericHideArray);
this.highlyGeneric.simple.str = selfie.highSimpleGenericHideArray.join(',\n');
this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray);
this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n');
this.needDOMSurveyor =
selfie.lowlyGenericSID.length !== 0 ||
selfie.lowlyGenericCID.length !== 0 ||
selfie.lowlyGenericSCL.length !== 0 ||
selfie.lowlyGenericCCL.length !== 0;
this.frozen = true;
};
@ -721,12 +620,11 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
FilterContainer.prototype.triggerSelectorCachePruner = function() {
// Of interest: http://fitzgeraldnick.com/weblog/40/
// http://googlecode.blogspot.ca/2009/07/gmail-for-mobile-html5-series-using.html
if ( this.selectorCacheTimer === null ) {
this.selectorCacheTimer = vAPI.setTimeout(
this.pruneSelectorCacheAsync.bind(this),
this.selectorCachePruneDelay
);
}
if ( this.selectorCacheTimer !== null ) { return; }
this.selectorCacheTimer = vAPI.setTimeout(
( ) => { this.pruneSelectorCacheAsync(); },
this.selectorCachePruneDelay
);
};
/******************************************************************************/
@ -740,7 +638,7 @@ FilterContainer.prototype.addToSelectorCache = function(details) {
if ( entry === undefined ) {
entry = SelectorCacheEntry.factory();
this.selectorCache.set(hostname, entry);
if ( this.selectorCache.size > this.selectorCacheCountMin ) {
if ( this.selectorCache.size > this.selectorCacheCountMax ) {
this.triggerSelectorCachePruner();
}
}
@ -753,7 +651,7 @@ FilterContainer.prototype.removeFromSelectorCache = function(
targetHostname = '*',
type = undefined
) {
let targetHostnameLength = targetHostname.length;
const targetHostnameLength = targetHostname.length;
for ( let entry of this.selectorCache ) {
let hostname = entry[0];
let item = entry[1];
@ -772,46 +670,27 @@ FilterContainer.prototype.removeFromSelectorCache = function(
/******************************************************************************/
FilterContainer.prototype.retrieveFromSelectorCache = function(
hostname,
type,
out
) {
let entry = this.selectorCache.get(hostname);
if ( entry !== undefined ) {
entry.retrieve(type, out);
FilterContainer.prototype.pruneSelectorCacheAsync = function() {
this.selectorCacheTimer = null;
if ( this.selectorCache.size <= this.selectorCacheCountMax ) { return; }
const cache = this.selectorCache;
const hostnames = Array.from(cache.keys())
.sort((a, b) => cache.get(b).accessId - cache.get(a).accessId)
.slice(this.selectorCacheCountMin);
for ( const hn of hostnames ) {
cache.get(hn).dispose();
cache.delete(hn);
}
};
/******************************************************************************/
FilterContainer.prototype.pruneSelectorCacheAsync = function() {
this.selectorCacheTimer = null;
if ( this.selectorCache.size <= this.selectorCacheCountMin ) { return; }
let cache = this.selectorCache;
// Sorted from most-recently-used to least-recently-used, because
// we loop beginning at the end below.
// We can't avoid sorting because we have to keep a minimum number of
// entries, and these entries should always be the most-recently-used.
let hostnames = Array.from(cache.keys())
.sort(function(a, b) {
return cache.get(b).lastAccessTime -
cache.get(a).lastAccessTime;
})
.slice(this.selectorCacheCountMin);
let obsolete = Date.now() - this.selectorCacheAgeMax,
i = hostnames.length;
while ( i-- ) {
let hostname = hostnames[i];
let entry = cache.get(hostname);
if ( entry.lastAccessTime > obsolete ) { break; }
// console.debug('pruneSelectorCacheAsync: flushing "%s"', hostname);
entry.dispose();
cache.delete(hostname);
}
if ( cache.size > this.selectorCacheCountMin ) {
this.triggerSelectorCachePruner();
}
FilterContainer.prototype.disableSurveyor = function(details) {
const hostname = details.hostname;
if ( typeof hostname !== 'string' || hostname === '' ) { return; }
const cacheEntry = this.selectorCache.get(hostname);
if ( cacheEntry === undefined ) { return; }
cacheEntry.disableSurveyor = true;
};
/******************************************************************************/
@ -850,43 +729,19 @@ FilterContainer.prototype.cssRuleFromProcedural = function(json) {
/******************************************************************************/
FilterContainer.prototype.retrieveGenericSelectors = function(request) {
if ( this.acceptedCount === 0 ) { return; }
if ( !request.ids && !request.classes ) { return; }
if ( this.lowlyGeneric.size === 0 ) { return; }
if ( Array.isArray(request.hashes) === false ) { return; }
if ( request.hashes.length === 0 ) { return; }
const { safeOnly = false } = request;
//console.time('cosmeticFilteringEngine.retrieveGenericSelectors');
const simpleSelectors = this.$simpleSet;
const complexSelectors = this.$complexSet;
const cacheEntry = this.selectorCache.get(request.hostname);
const previousHits = cacheEntry && cacheEntry.cosmetic || this.$dummySet;
for ( const type in this.lowlyGeneric ) {
const entry = this.lowlyGeneric[type];
const selectors = request[entry.canonical];
if ( Array.isArray(selectors) === false ) { continue; }
for ( const identifier of selectors ) {
if ( entry.simple.has(identifier) === false ) { continue; }
const bucket = entry.complex.get(identifier);
if ( typeof bucket === 'string' ) {
if ( previousHits.has(bucket) ) { continue; }
complexSelectors.add(bucket);
continue;
}
const simpleSelector = entry.prefix + identifier;
if ( Array.isArray(bucket) ) {
for ( const complexSelector of bucket ) {
if ( previousHits.has(complexSelector) ) { continue; }
if ( safeOnly && complexSelector === simpleSelector ) { continue; }
complexSelectors.add(complexSelector);
}
continue;
}
if ( previousHits.has(simpleSelector) ) { continue; }
if ( safeOnly ) { continue; }
simpleSelectors.add(simpleSelector);
const selectorsSet = new Set();
const hashes = [];
for ( const hash of request.hashes ) {
const bucket = this.lowlyGeneric.get(hash);
if ( bucket === undefined ) { continue; }
for ( const selector of bucket.split(',\n') ) {
selectorsSet.add(selector);
}
hashes.push(hash);
}
// Apply exceptions: it is the responsibility of the caller to provide
@ -894,48 +749,29 @@ FilterContainer.prototype.retrieveGenericSelectors = function(request) {
const excepted = [];
if ( Array.isArray(request.exceptions) ) {
for ( const exception of request.exceptions ) {
if (
simpleSelectors.delete(exception) ||
complexSelectors.delete(exception)
) {
if ( selectorsSet.delete(exception) ) {
excepted.push(exception);
}
}
}
if (
simpleSelectors.size === 0 &&
complexSelectors.size === 0 &&
excepted.length === 0
) {
return;
}
if ( selectorsSet.size === 0 && excepted.length === 0 ) { return; }
const out = { injectedCSS: '', excepted, };
const injected = [];
if ( simpleSelectors.size !== 0 ) {
injected.push(...simpleSelectors);
simpleSelectors.clear();
}
if ( complexSelectors.size !== 0 ) {
injected.push(...complexSelectors);
complexSelectors.clear();
}
// Cache and inject looked-up low generic cosmetic filters.
if ( injected.length === 0 ) { return out; }
const selectors = Array.from(selectorsSet);
if ( typeof request.hostname === 'string' && request.hostname !== '' ) {
this.addToSelectorCache({
cost: request.surveyCost || 0,
hostname: request.hostname,
selectors: injected,
selectors,
hashes,
type: 'cosmetic',
});
}
out.injectedCSS = `${injected.join(',\n')}\n{display:none!important;}`;
if ( selectors.length === 0 ) { return out; }
out.injectedCSS = `${selectors.join(',\n')}\n{display:none!important;}`;
vAPI.tabs.insertCSS(request.tabId, {
code: out.injectedCSS,
frameId: request.frameId,
@ -943,8 +779,6 @@ FilterContainer.prototype.retrieveGenericSelectors = function(request) {
runAt: 'document_start',
});
//console.timeEnd('cosmeticFilteringEngine.retrieveGenericSelectors');
return out;
};
@ -972,7 +806,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
exceptedFilters: [],
proceduralFilters: [],
convertedProceduralFilters: [],
noDOMSurveying: this.needDOMSurveyor === false,
disableSurveyor: this.lowlyGeneric.size === 0,
};
const injectedCSS = [];
@ -987,10 +821,9 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// Cached cosmetic filters: these are always declarative.
if ( cacheEntry !== undefined ) {
cacheEntry.retrieve('cosmetic', specificSet);
if ( out.noDOMSurveying === false ) {
out.noDOMSurveying = cacheEntry.cosmeticSurveyingMissCount >
cosmeticSurveyingMissCountMax;
cacheEntry.retrieveCosmetic(specificSet, out.genericCosmeticHashes = []);
if ( cacheEntry.disableSurveyor ) {
out.disableSurveyor = true;
}
}
@ -1123,8 +956,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// CSS selectors for collapsible blocked elements
if ( cacheEntry ) {
const networkFilters = [];
cacheEntry.retrieve('net', networkFilters);
if ( networkFilters.length !== 0 ) {
if ( cacheEntry.retrieveNet(networkFilters) ) {
details.code = `${networkFilters.join('\n')}\n{display:none!important;}`;
if ( request.tabId !== undefined ) {
vAPI.tabs.insertCSS(request.tabId, details);
@ -1144,31 +976,16 @@ FilterContainer.prototype.getFilterCount = function() {
/******************************************************************************/
FilterContainer.prototype.dump = function() {
let genericCount = 0;
for ( const i of [ 'simple', 'complex' ] ) {
for ( const j of [ 'id', 'cl' ] ) {
genericCount += this.lowlyGeneric[j][i].size;
}
const generics = [];
for ( const selectors of this.lowlyGeneric.values() ) {
generics.push(...selectors.split(',\n'));
}
return [
'Cosmetic Filtering Engine internals:',
`specific: ${this.specificFilters.size}`,
`generic: ${genericCount}`,
`+ lowly.id: ${this.lowlyGeneric.id.simple.size + this.lowlyGeneric.id.complex.size}`,
` + simple: ${this.lowlyGeneric.id.simple.size}`,
...Array.from(this.lowlyGeneric.id.simple).map(a => ` ###${a}`),
` + complex: ${this.lowlyGeneric.id.complex.size}`,
...Array.from(this.lowlyGeneric.id.complex.values()).map(a => ` ##${a}`),
`+ lowly.class: ${this.lowlyGeneric.cl.simple.size + this.lowlyGeneric.cl.complex.size}`,
` + simple: ${this.lowlyGeneric.cl.simple.size}`,
...Array.from(this.lowlyGeneric.cl.simple).map(a => ` ##.${a}`),
` + complex: ${this.lowlyGeneric.cl.complex.size}`,
...Array.from(this.lowlyGeneric.cl.complex.values()).map(a => ` ##${a}`),
`+ highly: ${this.highlyGeneric.simple.dict.size + this.highlyGeneric.complex.dict.size}`,
` + highly.simple: ${this.highlyGeneric.simple.dict.size}`,
...Array.from(this.highlyGeneric.simple.dict).map(a => ` ##${a}`),
` + highly.complex: ${this.highlyGeneric.complex.dict.size}`,
...Array.from(this.highlyGeneric.complex.dict).map(a => ` ##${a}`),
`generic: ${generics.length}`,
`+ selectors: ${this.lowlyGeneric.size}`,
...generics.map(a => ` ${a}`),
].join('\n');
};

View file

@ -823,6 +823,10 @@ const onMessage = function(request, sender, callback) {
cosmeticFilteringEngine.addToSelectorCache(request);
break;
case 'disableGenericCosmeticFilteringSurveyor':
cosmeticFilteringEngine.disableSurveyor(request);
break;
case 'getCollapsibleBlockedRequests':
response = {
id: request.id,