uBlock/src/js/cosmetic-filtering.js

1335 lines
41 KiB
JavaScript
Raw Normal View History

2014-06-24 00:42:43 +02:00
/*******************************************************************************
2015-03-07 19:20:18 +01:00
µBlock - a browser extension to block requests.
2014-06-24 00:42:43 +02:00
Copyright (C) 2014 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* jshint bitwise: false */
/* global punycode, µBlock */
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
µBlock.cosmeticFilteringEngine = (function(){
2014-06-24 00:42:43 +02:00
2014-12-26 21:26:44 +01:00
'use strict';
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var µb = µBlock;
2014-09-08 23:46:58 +02:00
/******************************************************************************/
// Could be replaced with encodeURIComponent/decodeURIComponent,
// which seems faster on Firefox.
var encode = JSON.stringify;
var decode = JSON.parse;
2014-06-24 00:42:43 +02:00
/******************************************************************************/
/*
var histogram = function(label, buckets) {
var h = [],
bucket;
for ( var k in buckets ) {
if ( buckets.hasOwnProperty(k) === false ) {
continue;
}
bucket = buckets[k];
h.push({
k: k,
n: bucket instanceof FilterBucket ? bucket.filters.length : 1
});
}
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 3;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
}
}
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey="%s" count=%d', v.k, v.n);
});
console.log('\tTotal buckets count: %d', total);
};
*/
/******************************************************************************/
// Pure id- and class-based filters
// Examples:
// #A9AdsMiddleBoxTop
// .AD-POST
2015-09-13 20:29:43 +02:00
var FilterPlain = function() {
2014-06-24 00:42:43 +02:00
};
FilterPlain.prototype.retrieve = function(s, out) {
2015-09-13 20:37:35 +02:00
out.push(s);
2014-06-24 00:42:43 +02:00
};
2014-09-08 23:46:58 +02:00
FilterPlain.prototype.fid = '#';
FilterPlain.prototype.toSelfie = function() {
};
2015-09-13 20:29:43 +02:00
FilterPlain.fromSelfie = function() {
return filterPlain;
2014-09-08 23:46:58 +02:00
};
2015-09-13 20:29:43 +02:00
var filterPlain = new FilterPlain();
2014-06-24 00:42:43 +02:00
/******************************************************************************/
// Id- and class-based filters with extra selector stuff following.
// Examples:
// #center_col > div[style="font-size:14px;margin-right:0;min-height:5px"] ...
// #adframe:not(frameset)
// .l-container > #fishtank
var FilterPlainMore = function(s) {
this.s = s;
};
FilterPlainMore.prototype.retrieve = function(s, out) {
if ( this.s.lastIndexOf(s, 0) === 0 ) {
2014-06-24 00:42:43 +02:00
out.push(this.s);
}
};
2014-09-08 23:46:58 +02:00
FilterPlainMore.prototype.fid = '#+';
FilterPlainMore.prototype.toSelfie = function() {
return this.s;
};
FilterPlainMore.fromSelfie = function(s) {
return new FilterPlainMore(s);
};
/******************************************************************************/
var FilterBucket = function(a, b) {
this.f = null;
this.filters = [];
if ( a !== undefined ) {
this.filters[0] = a;
if ( b !== undefined ) {
this.filters[1] = b;
}
}
};
FilterBucket.prototype.add = function(a) {
this.filters.push(a);
};
FilterBucket.prototype.retrieve = function(s, out) {
var i = this.filters.length;
while ( i-- ) {
this.filters[i].retrieve(s, out);
}
};
FilterBucket.prototype.fid = '[]';
FilterBucket.prototype.toSelfie = function() {
return this.filters.length.toString();
};
FilterBucket.fromSelfie = function() {
return new FilterBucket();
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
// Any selector specific to a hostname
// Examples:
// search.snapdo.com###ABottomD
// facebook.com##.-cx-PRIVATE-fbAdUnit__root
// sltrib.com###BLContainer + div[style="height:90px;"]
// myps3.com.au##.Boxer[style="height: 250px;"]
// lindaikeji.blogspot.com##a > img[height="600"]
// japantimes.co.jp##table[align="right"][width="250"]
// mobilephonetalk.com##[align="center"] > b > a[href^="http://tinyurl.com/"]
var FilterHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
2014-07-13 08:36:38 +02:00
FilterHostname.prototype.retrieve = function(hostname, out) {
if ( hostname.slice(-this.hostname.length) === this.hostname ) {
out.push(this.s);
}
};
2014-09-08 23:46:58 +02:00
FilterHostname.prototype.fid = 'h';
FilterHostname.prototype.toSelfie = function() {
return encode(this.s) + '\t' + this.hostname;
};
FilterHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterHostname(decode(s.slice(0, pos)), s.slice(pos + 1));
};
/******************************************************************************/
// Any selector specific to an entity
// Examples:
// google.*###cnt #center_col > #res > #topstuff > .ts
var FilterEntity = function(s, entity) {
this.s = s;
this.entity = entity;
};
FilterEntity.prototype.retrieve = function(entity, out) {
if ( entity.slice(-this.entity.length) === this.entity ) {
out.push(this.s);
}
};
2014-09-08 23:46:58 +02:00
FilterEntity.prototype.fid = 'e';
2014-06-24 00:42:43 +02:00
2014-09-08 23:46:58 +02:00
FilterEntity.prototype.toSelfie = function() {
return encode(this.s) + '\t' + this.entity;
2014-06-24 00:42:43 +02:00
};
2014-09-08 23:46:58 +02:00
FilterEntity.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterEntity(decode(s.slice(0, pos)), s.slice(pos + 1));
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
/******************************************************************************/
var FilterParser = function() {
this.prefix = this.suffix = this.style = '';
this.unhide = 0;
2014-06-24 00:42:43 +02:00
this.hostnames = [];
this.invalid = false;
2015-03-16 19:58:35 +01:00
this.cosmetic = true;
this.reParser = /^([^#]*?)(##|#@#)(.+)$/;
2015-09-30 15:33:38 +02:00
this.reScriptContains = /^script:contains\(.+?\)$/;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.reset = function() {
this.prefix = this.suffix = this.style = '';
this.unhide = 0;
this.hostnames.length = 0;
2014-06-24 00:42:43 +02:00
this.invalid = false;
2015-03-16 19:58:35 +01:00
this.cosmetic = true;
2014-06-24 00:42:43 +02:00
return this;
};
/******************************************************************************/
FilterParser.prototype.parse = function(s) {
// important!
this.reset();
var matches = this.reParser.exec(s);
if ( matches === null || matches.length !== 4 ) {
2015-03-16 19:58:35 +01:00
this.cosmetic = false;
2014-06-24 00:42:43 +02:00
return this;
}
this.prefix = matches[1].trim();
this.unhide = matches[2].charAt(1) === '@' ? 1 : 0;
this.suffix = matches[3].trim();
// Cosmetic filters with explicit style properties can apply only:
// - to specific cosmetic filters (those which apply to a specific site)
// - to block cosmetic filters (not exception cosmetic filters)
if ( this.suffix.slice(-1) === '}' ) {
// Not supported for now: this code will ensure some backward
// compatibility for when cosmetic filters with explicit style
// properties start to be in use.
this.invalid = true;
return this;
}
2014-06-24 00:42:43 +02:00
// 2014-05-23:
// https://github.com/gorhill/httpswitchboard/issues/260
// Any sequence of `#` longer than one means the line is not a valid
// cosmetic filter.
if ( this.suffix.indexOf('##') !== -1 ) {
2015-03-16 19:58:35 +01:00
this.cosmetic = false;
2014-06-24 00:42:43 +02:00
return this;
}
// Normalize high-medium selectors: `href` is assumed to imply `a` tag. We
// need to do this here in order to correctly avoid duplicates. The test
// is designed to minimize overhead -- this is a low occurrence filter.
if ( this.suffix.charAt(1) === '[' && this.suffix.slice(2, 9) === 'href^="' ) {
this.suffix = this.suffix.slice(1);
}
if ( this.prefix !== '' ) {
this.hostnames = this.prefix.split(/\s*,\s*/);
}
// Script tag filters: pre-process them so that can be used with minimal
// overhead in the content script.
2015-10-02 15:34:53 +02:00
// Examples:
// focus.de##script:contains(/uabInject/)
// focus.de##script:contains(uabInject)
2015-09-30 15:33:38 +02:00
if ( this.suffix.charAt(0) === 's' && this.reScriptContains.test(this.suffix) ) {
// Currently supported only as non-generic selector. Also, exception
// script tag filter makes no sense, ignore.
if ( this.hostnames.length === 0 || this.unhide === 1 ) {
this.invalid = true;
return this;
}
2015-09-30 15:33:38 +02:00
var suffix = this.suffix;
this.suffix = 'script//:';
if ( suffix.charAt(16) !== '/' || suffix.slice(-2) !== '/)' ) {
this.suffix += suffix.slice(16, -1).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
} else {
this.suffix += suffix.slice(17, -2).replace(/\\/g, '\\');
}
}
2014-06-24 00:42:43 +02:00
return this;
};
/******************************************************************************/
/******************************************************************************/
2014-08-14 02:03:55 +02:00
var SelectorCacheEntry = function() {
2014-12-26 21:26:44 +01:00
this.reset();
};
/******************************************************************************/
SelectorCacheEntry.junkyard = [];
SelectorCacheEntry.factory = function() {
var entry = SelectorCacheEntry.junkyard.pop();
if ( entry ) {
return entry.reset();
}
return new SelectorCacheEntry();
};
/******************************************************************************/
SelectorCacheEntry.prototype.netLowWaterMark = 20;
SelectorCacheEntry.prototype.netHighWaterMark = 30;
/******************************************************************************/
SelectorCacheEntry.prototype.reset = function() {
this.cosmetic = {};
this.net = {};
this.netCount = 0;
2014-08-14 02:03:55 +02:00
this.lastAccessTime = Date.now();
2014-12-26 21:26:44 +01:00
return this;
2014-08-14 02:03:55 +02:00
};
2014-12-26 21:26:44 +01:00
/******************************************************************************/
SelectorCacheEntry.prototype.dispose = function() {
this.cosmetic = this.net = null;
if ( SelectorCacheEntry.junkyard.length < 25 ) {
SelectorCacheEntry.junkyard.push(this);
}
};
/******************************************************************************/
SelectorCacheEntry.prototype.addCosmetic = function(selectors) {
var dict = this.cosmetic;
var i = selectors.length || 0;
while ( i-- ) {
dict[selectors[i]] = true;
}
};
2014-12-26 21:26:44 +01:00
/******************************************************************************/
2014-08-15 16:34:13 +02:00
SelectorCacheEntry.prototype.addNet = function(selectors) {
if ( typeof selectors === 'string' ) {
this.addNetOne(selectors, Date.now());
} else {
this.addNetMany(selectors, Date.now());
}
// Net request-derived selectors: I limit the number of cached selectors,
// as I expect cases where the blocked net-requests are never the
// exact same URL.
2014-08-15 16:34:13 +02:00
if ( this.netCount < this.netHighWaterMark ) {
return;
}
2014-08-15 16:34:13 +02:00
var dict = this.net;
var keys = Object.keys(dict).sort(function(a, b) {
return dict[b] - dict[a];
}).slice(this.netLowWaterMark);
var i = keys.length;
while ( i-- ) {
delete dict[keys[i]];
}
};
2014-12-26 21:26:44 +01:00
/******************************************************************************/
2014-08-15 16:34:13 +02:00
SelectorCacheEntry.prototype.addNetOne = function(selector, now) {
var dict = this.net;
if ( dict[selector] === undefined ) {
this.netCount += 1;
}
dict[selector] = now;
};
2014-12-26 21:26:44 +01:00
/******************************************************************************/
2014-08-15 16:34:13 +02:00
SelectorCacheEntry.prototype.addNetMany = function(selectors, now) {
var dict = this.net;
var i = selectors.length || 0;
var selector;
while ( i-- ) {
selector = selectors[i];
if ( dict[selector] === undefined ) {
this.netCount += 1;
}
2014-08-15 16:34:13 +02:00
dict[selector] = now;
}
};
2014-12-26 21:26:44 +01:00
/******************************************************************************/
SelectorCacheEntry.prototype.add = function(selectors, type) {
2014-08-14 02:03:55 +02:00
this.lastAccessTime = Date.now();
if ( type === 'cosmetic' ) {
this.addCosmetic(selectors);
} else {
this.addNet(selectors);
}
2014-08-14 02:03:55 +02:00
};
2014-12-26 21:26:44 +01:00
/******************************************************************************/
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/420
2014-12-17 16:32:50 +01:00
SelectorCacheEntry.prototype.remove = function(type) {
this.lastAccessTime = Date.now();
2015-02-11 23:28:19 +01:00
if ( type === undefined || type === 'cosmetic' ) {
2014-12-17 16:32:50 +01:00
this.cosmetic = {};
2015-02-11 23:28:19 +01:00
}
if ( type === undefined || type === 'net' ) {
2014-12-17 16:32:50 +01:00
this.net = {};
this.netCount = 0;
}
};
2014-12-26 21:26:44 +01:00
/******************************************************************************/
SelectorCacheEntry.prototype.retrieve = function(type, out) {
2014-08-14 02:03:55 +02:00
this.lastAccessTime = Date.now();
var dict = type === 'cosmetic' ? this.cosmetic : this.net;
for ( var selector in dict ) {
if ( dict.hasOwnProperty(selector) ) {
out.push(selector);
}
2014-08-14 02:03:55 +02:00
}
};
/******************************************************************************/
/******************************************************************************/
// Two Unicode characters:
// T0HHHHHHH HHHHHHHHH
// | | |
// | | |
// | | |
// | | +-- bit 8-0 of FNV
// | |
// | +-- bit 15-9 of FNV
// |
// +-- filter type (0=hide 1=unhide)
//
2014-06-24 00:42:43 +02:00
var makeHash = function(unhide, token, mask) {
// Ref: Given a URL, returns a unique 4-character long hash string
// Based on: FNV32a
// http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
// The rest is custom, suited for µBlock.
var i1 = token.length;
var i2 = i1 >> 1;
var i4 = i1 >> 2;
var i8 = i1 >> 3;
var hval = (0x811c9dc5 ^ token.charCodeAt(0)) >>> 0;
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i1-1);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval &= mask;
if ( unhide !== 0 ) {
hval |= 0x20000;
}
2015-02-24 00:31:29 +01:00
return hval.toString(36);
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
/******************************************************************************/
// Cosmetic filter family tree:
//
// Generic
// Low generic simple: class or id only
// Low generic complex: class or id + extra stuff after
// High generic:
// High-low generic: [alt="..."],[title="..."]
// High-medium generic: [href^="..."]
// High-high generic: everything else
// Specific
// Specfic hostname
// Specific entity
// Generic filters can only be enforced once the main document is loaded.
// Specific filers can be enforced before the main document is loaded.
2014-06-24 00:42:43 +02:00
var FilterContainer = function() {
2015-02-24 00:31:29 +01:00
this.domainHashMask = (1 << 10) - 1; // 10 bits
2014-08-27 19:50:18 +02:00
this.type0NoDomainHash = 'type0NoDomain';
this.type1NoDomainHash = 'type1NoDomain';
2014-08-21 16:56:36 +02:00
this.parser = new FilterParser();
2014-12-26 21:26:44 +01:00
this.selectorCachePruneDelay = 5 * 60 * 1000; // 5 minutes
this.selectorCacheAgeMax = 20 * 60 * 1000; // 20 minutes
this.selectorCacheCountMin = 10;
this.selectorCacheTimer = null;
this.reHasUnicode = /[^\x00-\x7F]/;
this.punycode = punycode;
this.reset();
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
2014-08-21 16:56:36 +02:00
this.parser.reset();
2015-02-24 00:31:29 +01:00
this.µburi = µb.URI;
2014-07-20 21:00:26 +02:00
this.frozen = false;
2014-06-24 00:42:43 +02:00
this.acceptedCount = 0;
this.duplicateCount = 0;
2015-02-24 00:31:29 +01:00
this.duplicateBuster = {};
2014-08-14 02:03:55 +02:00
this.selectorCache = {};
this.selectorCacheCount = 0;
// permanent
// [class], [id]
2015-03-13 17:26:54 +01:00
this.lowGenericHide = {};
// [alt="..."], [title="..."]
this.highLowGenericHide = {};
this.highLowGenericHideCount = 0;
// a[href^="http..."]
this.highMediumGenericHide = {};
this.highMediumGenericHideCount = 0;
// everything else
2015-02-24 00:31:29 +01:00
this.highHighGenericHideArray = [];
2014-09-08 23:46:58 +02:00
this.highHighGenericHide = '';
this.highHighGenericHideCount = 0;
2015-03-13 18:01:46 +01:00
// generic exception filters
this.genericDonthide = [];
2014-09-08 23:46:58 +02:00
// hostname, entity-based filters
this.hostnameFilters = {};
this.entityFilters = {};
this.scriptTagFilters = {};
this.scriptTagFilterCount = 0;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors.
2015-09-08 14:45:22 +02:00
FilterContainer.prototype.isValidSelector = (function() {
var div = document.createElement('div');
// Not all browsers support `Element.matches`:
// http://caniuse.com/#feat=matchesselector
if ( typeof div.matches !== 'function' ) {
return function() {
return true;
};
}
2015-09-08 14:45:22 +02:00
return function(s) {
try {
2015-09-08 14:45:22 +02:00
// https://github.com/gorhill/uBlock/issues/693
div.matches(s + ',\n#foo');
return true;
} catch (e) {
}
if ( s.lastIndexOf('script//:', 0) === 0 ) {
return true;
}
console.error('uBlock> invalid cosmetic filter:', s);
return false;
};
2015-09-08 14:45:22 +02:00
})();
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.compile = function(s, out) {
2014-08-21 16:56:36 +02:00
var parsed = this.parser.parse(s);
2015-03-16 19:58:35 +01:00
if ( parsed.cosmetic === false ) {
2014-06-24 00:42:43 +02:00
return false;
}
2015-03-16 19:58:35 +01:00
if ( parsed.invalid ) {
return true;
}
2014-06-24 00:42:43 +02:00
var hostnames = parsed.hostnames;
var i = hostnames.length;
if ( i === 0 ) {
2015-02-24 00:31:29 +01:00
this.compileGenericSelector(parsed, out);
2015-02-24 05:25:14 +01:00
return true;
}
// For hostname- or entity-based filters, class- or id-based selectors are
// still the most common, and can easily be tested using a plain regex.
if (
this.reClassOrIdSelector.test(parsed.suffix) === false &&
this.isValidSelector(parsed.suffix) === false
) {
return true;
}
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/151
2015-02-24 05:25:14 +01:00
// Negated hostname means the filter applies to all non-negated hostnames
// of same filter OR globally if there is no non-negated hostnames.
var applyGlobally = true;
var hostname;
while ( i-- ) {
hostname = hostnames[i];
if ( hostname.charAt(0) !== '~' ) {
applyGlobally = false;
}
2015-02-24 05:25:14 +01:00
if ( hostname.slice(-2) === '.*' ) {
this.compileEntitySelector(hostname, parsed, out);
} else {
this.compileHostnameSelector(hostname, parsed, out);
2014-08-15 02:00:44 +02:00
}
2014-08-13 02:25:11 +02:00
}
2015-02-24 05:25:14 +01:00
if ( applyGlobally ) {
this.compileGenericSelector(parsed, out);
}
2015-02-24 00:31:29 +01:00
return true;
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.compileGenericSelector = function(parsed, out) {
var selector = parsed.suffix;
2015-03-13 17:26:54 +01:00
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/497
2015-03-13 17:26:54 +01:00
// All generic exception filters are put in the same bucket: they are
// expected to be very rare.
if ( parsed.unhide ) {
if ( this.isValidSelector(selector) ) {
out.push('c\vg1\v' + selector);
}
2015-03-13 17:26:54 +01:00
return;
}
2015-02-24 00:31:29 +01:00
var type = selector.charAt(0);
var matches;
if ( type === '#' || type === '.' ) {
matches = this.rePlainSelector.exec(selector);
if ( matches === null ) {
return;
}
// Single-CSS rule: no need to test for whether the selector
// is valid, the regex took care of this. Most generic selector falls
// into that category.
if ( matches[1] === selector ) {
out.push(
'c\vlg\v' +
2015-09-13 20:29:43 +02:00
matches[1]
);
return;
}
// Many-CSS rules
if ( this.isValidSelector(selector) ) {
out.push(
'c\vlg+\v' +
2015-09-13 20:29:43 +02:00
matches[1] + '\v' +
selector
);
}
2015-02-24 00:31:29 +01:00
return;
}
2015-02-24 00:31:29 +01:00
// ["title"] and ["alt"] will go in high-low generic bin.
if ( this.reHighLow.test(selector) ) {
if ( this.isValidSelector(selector) ) {
out.push('c\vhlg0\v' + selector);
}
2015-02-24 00:31:29 +01:00
return;
}
// [href^="..."] will go in high-medium generic bin.
matches = this.reHighMedium.exec(selector);
if ( matches && matches.length === 2 ) {
if ( this.isValidSelector(selector) ) {
out.push(
'c\vhmg0\v' +
matches[1] + '\v' +
selector
);
}
2015-02-24 00:31:29 +01:00
return;
2014-08-06 17:34:59 +02:00
}
2015-02-24 00:31:29 +01:00
// All else
if ( this.isValidSelector(selector) ) {
out.push('c\vhhg0\v' + selector);
}
};
2014-08-06 17:34:59 +02:00
FilterContainer.prototype.reClassOrIdSelector = /^([#.][\w-]+)$/;
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.rePlainSelector = /^([#.][\w-]+)/;
FilterContainer.prototype.reHighLow = /^[a-z]*\[(?:alt|title)="[^"]+"\]$/;
FilterContainer.prototype.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/;
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, out) {
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/145
var unhide = parsed.unhide;
if ( hostname.charAt(0) === '~' ) {
hostname = hostname.slice(1);
unhide ^= 1;
2014-06-24 00:42:43 +02:00
}
// punycode if needed
if ( this.reHasUnicode.test(hostname) ) {
//console.debug('µBlock.cosmeticFilteringEngine/FilterContainer.compileHostnameSelector> punycoding:', hostname);
hostname = this.punycode.toASCII(hostname);
}
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/188
2015-02-24 00:31:29 +01:00
// If not a real domain as per PSL, assign a synthetic one
var hash;
var domain = this.µburi.domainFromHostname(hostname);
if ( domain === '' ) {
hash = unhide === 0 ? this.type0NoDomainHash : this.type1NoDomainHash;
} else {
2015-02-24 00:31:29 +01:00
hash = makeHash(unhide, domain, this.domainHashMask);
}
2015-02-24 00:31:29 +01:00
out.push(
'c\v' +
'h\v' +
hash + '\v' +
hostname + '\v' +
parsed.suffix
);
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.compileEntitySelector = function(hostname, parsed, out) {
var entity = hostname.slice(0, -2);
2015-02-24 00:31:29 +01:00
out.push(
'c\v' +
2015-03-07 19:20:18 +01:00
'e\v' +
2015-02-24 00:31:29 +01:00
entity + '\v' +
parsed.suffix
);
};
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.fromCompiledContent = function(text, lineBeg, skip) {
if ( skip ) {
return this.skipCompiledContent(text, lineBeg);
}
2015-02-24 00:31:29 +01:00
var lineEnd;
var textEnd = text.length;
var line, fields, filter, key, bucket;
2014-06-24 00:42:43 +02:00
2015-02-24 00:31:29 +01:00
while ( lineBeg < textEnd ) {
if ( text.charAt(lineBeg) !== 'c' ) {
return lineBeg;
2014-08-27 19:50:18 +02:00
}
2015-02-24 00:31:29 +01:00
lineEnd = text.indexOf('\n', lineBeg);
if ( lineEnd === -1 ) {
lineEnd = textEnd;
2014-08-06 17:34:59 +02:00
}
2015-02-24 00:31:29 +01:00
line = text.slice(lineBeg + 2, lineEnd);
lineBeg = lineEnd + 1;
2014-08-06 17:34:59 +02:00
2015-02-24 00:31:29 +01:00
this.acceptedCount += 1;
if ( this.duplicateBuster.hasOwnProperty(line) ) {
this.duplicateCount += 1;
continue;
}
2015-02-24 00:31:29 +01:00
this.duplicateBuster[line] = true;
fields = line.split('\v');
// h ir twitter.com .promoted-tweet
if ( fields[0] === 'h' ) {
// Special filter: script tags. Not a real CSS selector.
if ( fields[3].lastIndexOf('script//:', 0) === 0 ) {
this.createScriptTagFilter(fields[2], fields[3].slice(9));
continue;
}
2015-02-24 00:31:29 +01:00
filter = new FilterHostname(fields[3], fields[2]);
bucket = this.hostnameFilters[fields[1]];
if ( bucket === undefined ) {
this.hostnameFilters[fields[1]] = filter;
} else if ( bucket instanceof FilterBucket ) {
bucket.add(filter);
} else {
this.hostnameFilters[fields[1]] = new FilterBucket(bucket, filter);
}
continue;
}
2014-07-04 22:47:34 +02:00
2015-02-24 00:31:29 +01:00
// lg 105 .largeAd
// lg+ 2jx .Mpopup + #Mad > #MadZone
if ( fields[0] === 'lg' || fields[0] === 'lg+' ) {
filter = fields[0] === 'lg' ?
2015-09-13 20:29:43 +02:00
filterPlain :
2015-02-24 00:31:29 +01:00
new FilterPlainMore(fields[2]);
2015-03-13 17:26:54 +01:00
bucket = this.lowGenericHide[fields[1]];
2015-02-24 00:31:29 +01:00
if ( bucket === undefined ) {
2015-03-13 17:26:54 +01:00
this.lowGenericHide[fields[1]] = filter;
2015-02-24 00:31:29 +01:00
} else if ( bucket instanceof FilterBucket ) {
bucket.add(filter);
} else {
2015-03-13 17:26:54 +01:00
this.lowGenericHide[fields[1]] = new FilterBucket(bucket, filter);
2015-02-24 00:31:29 +01:00
}
continue;
}
2014-09-25 21:44:58 +02:00
2015-02-24 00:31:29 +01:00
// entity selector
if ( fields[0] === 'e' ) {
// Special filter: script tags. Not a real CSS selector.
if ( fields[2].lastIndexOf('script//:', 0) === 0 ) {
this.createScriptTagFilter(fields[1], fields[2].slice(9));
continue;
}
2015-02-24 00:31:29 +01:00
bucket = this.entityFilters[fields[1]];
if ( bucket === undefined ) {
this.entityFilters[fields[1]] = [fields[2]];
} else {
bucket.push(fields[2]);
}
continue;
}
2014-09-25 21:44:58 +02:00
2015-02-24 00:31:29 +01:00
if ( fields[0] === 'hlg0' ) {
this.highLowGenericHide[fields[1]] = true;
this.highLowGenericHideCount += 1;
continue;
}
2014-09-25 21:44:58 +02:00
2015-02-24 00:31:29 +01:00
if ( fields[0] === 'hmg0' ) {
key = fields[1];
bucket = this.highMediumGenericHide[key];
if ( bucket === undefined ) {
this.highMediumGenericHide[key] = fields[2];
} else if ( Array.isArray(bucket) ) {
bucket.push(fields[2]);
2015-02-24 00:31:29 +01:00
} else {
this.highMediumGenericHide[key] = [bucket, fields[2]];
2015-02-24 00:31:29 +01:00
}
this.highMediumGenericHideCount += 1;
continue;
}
2015-02-24 00:31:29 +01:00
if ( fields[0] === 'hhg0' ) {
this.highHighGenericHideArray.push(fields[1]);
this.highHighGenericHideCount += 1;
continue;
}
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/497
2015-03-13 17:26:54 +01:00
// Generic exception filters: expected to be a rare occurrence.
if ( fields[0] === 'g1' ) {
this.genericDonthide.push(fields[1]);
2015-03-13 17:26:54 +01:00
}
2014-06-24 00:42:43 +02:00
}
2015-02-24 00:31:29 +01:00
return textEnd;
};
/******************************************************************************/
2014-09-25 21:44:58 +02:00
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.skipCompiledContent = function(text, lineBeg) {
var lineEnd;
var textEnd = text.length;
2014-09-25 21:44:58 +02:00
2015-02-24 00:31:29 +01:00
while ( lineBeg < textEnd ) {
if ( text.charAt(lineBeg) !== 'c' ) {
return lineBeg;
}
lineEnd = text.indexOf('\n', lineBeg);
if ( lineEnd === -1 ) {
lineEnd = textEnd;
}
lineBeg = lineEnd + 1;
}
return textEnd;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterContainer.prototype.createScriptTagFilter = function(hostname, s) {
if ( this.scriptTagFilters.hasOwnProperty(hostname) ) {
this.scriptTagFilters[hostname] += '|' + s;
} else {
this.scriptTagFilters[hostname] = s;
}
this.scriptTagFilterCount += 1;
};
/******************************************************************************/
FilterContainer.prototype.retrieveScriptTagRegex = function(domain, hostname) {
if ( this.scriptTagFilterCount === 0 ) {
return;
}
var out = [], hn = hostname, pos;
for (;;) {
if ( this.scriptTagFilters.hasOwnProperty(hn) ) {
out.push(this.scriptTagFilters[hn]);
}
if ( hn === domain ) {
break;
}
pos = hn.indexOf('.');
if ( pos === -1 ) {
break;
}
hn = hn.slice(pos + 1);
}
pos = domain.indexOf('.');
if ( pos !== -1 ) {
hn = domain.slice(0, pos);
if ( this.scriptTagFilters.hasOwnProperty(hn) ) {
out.push(this.scriptTagFilters[hn]);
}
}
if ( out.length !== 0 ) {
return out.join('|');
}
};
/******************************************************************************/
2014-06-24 00:42:43 +02:00
FilterContainer.prototype.freeze = function() {
2015-02-24 00:31:29 +01:00
this.duplicateBuster = {};
if ( this.highHighGenericHide !== '' ) {
this.highHighGenericHideArray.unshift(this.highHighGenericHide);
}
this.highHighGenericHide = this.highHighGenericHideArray.join(',\n');
this.highHighGenericHideArray = [];
2014-08-21 16:56:36 +02:00
this.parser.reset();
2014-07-20 21:00:26 +02:00
this.frozen = true;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
2014-09-08 23:46:58 +02:00
FilterContainer.prototype.toSelfie = function() {
var selfieFromDict = function(dict) {
var selfie = [];
var bucket, ff, n, i, f;
for ( var k in dict ) {
if ( dict.hasOwnProperty(k) === false ) {
continue;
}
// We need to encode the key because there could be a `\n`
// character in it, which would trip the code at parse time.
selfie.push('k\t' + encode(k));
bucket = dict[k];
selfie.push(bucket.fid + '\t' + bucket.toSelfie());
if ( bucket.fid !== '[]' ) {
continue;
}
ff = bucket.filters;
n = ff.length;
for ( i = 0; i < n; i++ ) {
f = ff[i];
selfie.push(f.fid + '\t' + f.toSelfie());
}
}
return selfie.join('\n');
};
return {
acceptedCount: this.acceptedCount,
duplicateCount: this.duplicateCount,
hostnameSpecificFilters: selfieFromDict(this.hostnameFilters),
2015-02-24 00:31:29 +01:00
entitySpecificFilters: this.entityFilters,
2015-03-13 17:26:54 +01:00
lowGenericHide: selfieFromDict(this.lowGenericHide),
2014-09-08 23:46:58 +02:00
highLowGenericHide: this.highLowGenericHide,
highLowGenericHideCount: this.highLowGenericHideCount,
highMediumGenericHide: this.highMediumGenericHide,
highMediumGenericHideCount: this.highMediumGenericHideCount,
highHighGenericHide: this.highHighGenericHide,
highHighGenericHideCount: this.highHighGenericHideCount,
genericDonthide: this.genericDonthide,
scriptTagFilters: this.scriptTagFilters,
scriptTagFilterCount: this.scriptTagFilterCount
2014-09-08 23:46:58 +02:00
};
};
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) {
var factories = {
'[]': FilterBucket,
'#': FilterPlain,
'#+': FilterPlainMore,
'h': FilterHostname,
'e': FilterEntity
};
var dictFromSelfie = function(selfie) {
var dict = {};
var dictKey;
var bucket = null;
var rawText = selfie;
var rawEnd = rawText.length;
var lineBeg = 0, lineEnd;
var line, pos, what, factory;
while ( lineBeg < rawEnd ) {
lineEnd = rawText.indexOf('\n', lineBeg);
if ( lineEnd < 0 ) {
lineEnd = rawEnd;
}
line = rawText.slice(lineBeg, lineEnd);
lineBeg = lineEnd + 1;
pos = line.indexOf('\t');
what = line.slice(0, pos);
if ( what === 'k' ) {
dictKey = decode(line.slice(pos + 1));
bucket = null;
continue;
}
factory = factories[what];
if ( bucket === null ) {
bucket = dict[dictKey] = factory.fromSelfie(line.slice(pos + 1));
continue;
}
// When token key is reused, it can't be anything
// else than FilterBucket
bucket.add(factory.fromSelfie(line.slice(pos + 1)));
}
return dict;
};
this.acceptedCount = selfie.acceptedCount;
this.duplicateCount = selfie.duplicateCount;
this.hostnameFilters = dictFromSelfie(selfie.hostnameSpecificFilters);
2015-02-24 00:31:29 +01:00
this.entityFilters = selfie.entitySpecificFilters;
2015-03-13 17:26:54 +01:00
this.lowGenericHide = dictFromSelfie(selfie.lowGenericHide);
2014-09-08 23:46:58 +02:00
this.highLowGenericHide = selfie.highLowGenericHide;
this.highLowGenericHideCount = selfie.highLowGenericHideCount;
this.highMediumGenericHide = selfie.highMediumGenericHide;
this.highMediumGenericHideCount = selfie.highMediumGenericHideCount;
this.highHighGenericHide = selfie.highHighGenericHide;
this.highHighGenericHideCount = selfie.highHighGenericHideCount;
this.genericDonthide = selfie.genericDonthide;
this.scriptTagFilters = selfie.scriptTagFilters;
this.scriptTagFilterCount = selfie.scriptTagFilterCount;
2015-01-23 21:02:47 +01:00
this.frozen = true;
2014-09-08 23:46:58 +02:00
};
/******************************************************************************/
2014-12-26 21:26:44 +01:00
FilterContainer.prototype.triggerSelectorCachePruner = function() {
if ( this.selectorCacheTimer !== null ) {
return;
}
if ( this.selectorCacheCount <= this.selectorCacheCountMin ) {
return;
}
// Of interest: http://fitzgeraldnick.com/weblog/40/
// http://googlecode.blogspot.ca/2009/07/gmail-for-mobile-html5-series-using.html
this.selectorCacheTimer = vAPI.setTimeout(
2014-12-26 21:26:44 +01:00
this.pruneSelectorCacheAsync.bind(this),
this.selectorCachePruneDelay
);
};
/******************************************************************************/
FilterContainer.prototype.addToSelectorCache = function(details) {
var hostname = details.hostname;
2014-08-14 02:03:55 +02:00
if ( typeof hostname !== 'string' || hostname === '' ) {
return;
}
var selectors = details.selectors;
if ( !selectors ) {
2014-08-14 02:03:55 +02:00
return;
}
var entry = this.selectorCache[hostname];
if ( entry === undefined ) {
2014-12-26 21:26:44 +01:00
entry = this.selectorCache[hostname] = SelectorCacheEntry.factory();
2014-08-14 02:03:55 +02:00
this.selectorCacheCount += 1;
2014-12-26 21:26:44 +01:00
this.triggerSelectorCachePruner();
2014-08-14 02:03:55 +02:00
}
entry.add(selectors, details.type);
2014-08-14 02:03:55 +02:00
};
/******************************************************************************/
FilterContainer.prototype.removeFromSelectorCache = function(targetHostname, type) {
for ( var hostname in this.selectorCache ) {
if ( this.selectorCache.hasOwnProperty(hostname) === false ) {
continue;
}
if ( targetHostname !== '*' ) {
if ( hostname.slice(0 - targetHostname.length) !== targetHostname ) {
continue;
}
if ( hostname.length !== targetHostname.length &&
hostname.charAt(0 - targetHostname.length - 1) !== '.' ) {
continue;
}
}
this.selectorCache[hostname].remove(type);
2014-12-17 16:32:50 +01:00
}
};
/******************************************************************************/
FilterContainer.prototype.retrieveFromSelectorCache = function(hostname, type, out) {
2014-08-14 02:03:55 +02:00
var entry = this.selectorCache[hostname];
if ( entry === undefined ) {
return;
}
entry.retrieve(type, out);
2014-08-14 02:03:55 +02:00
};
/******************************************************************************/
2014-12-26 21:26:44 +01:00
FilterContainer.prototype.pruneSelectorCacheAsync = function() {
this.selectorCacheTimer = null;
if ( this.selectorCacheCount <= this.selectorCacheCountMin ) {
return;
}
2014-08-14 02:03:55 +02:00
var cache = this.selectorCache;
2014-12-26 21:26:44 +01:00
// Sorted from most-recently-used to least-recently-used, because
// we loop beginning at the end below.
// We can't avoid sorting because we have to keep a minimum number of
// entries, and these entries should always be the most-recently-used.
var hostnames = Object.keys(cache)
.sort(function(a, b) { return cache[b].lastAccessTime - cache[a].lastAccessTime; })
.slice(this.selectorCacheCountMin);
var obsolete = Date.now() - this.selectorCacheAgeMax;
var hostname, entry;
var i = hostnames.length;
2014-08-14 02:03:55 +02:00
while ( i-- ) {
2014-12-26 21:26:44 +01:00
hostname = hostnames[i];
entry = cache[hostname];
if ( entry.lastAccessTime > obsolete ) {
break;
}
// console.debug('pruneSelectorCacheAsync: flushing "%s"', hostname);
entry.dispose();
delete cache[hostname];
2014-12-27 19:42:47 +01:00
this.selectorCacheCount -= 1;
2014-08-14 02:03:55 +02:00
}
2014-12-26 21:26:44 +01:00
this.triggerSelectorCachePruner();
2014-08-14 02:03:55 +02:00
};
/******************************************************************************/
2014-08-02 17:40:27 +02:00
FilterContainer.prototype.retrieveGenericSelectors = function(request) {
2014-12-19 20:06:55 +01:00
if ( this.acceptedCount === 0 ) {
2014-06-24 00:42:43 +02:00
return;
}
if ( !request.selectors ) {
return;
}
//quickProfiler.start('FilterContainer.retrieve()');
var r = {
hide: []
2014-06-24 00:42:43 +02:00
};
if ( request.firstSurvey ) {
r.highGenerics = {
hideLow: this.highLowGenericHide,
hideLowCount: this.highLowGenericHideCount,
hideMedium: this.highMediumGenericHide,
hideMediumCount: this.highMediumGenericHideCount,
hideHigh: this.highHighGenericHide,
hideHighCount: this.highHighGenericHideCount
};
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/497
r.donthide = this.genericDonthide;
}
2014-06-24 00:42:43 +02:00
var hideSelectors = r.hide;
2015-09-13 20:29:43 +02:00
var selector, bucket;
2014-06-24 00:42:43 +02:00
var selectors = request.selectors;
var i = selectors.length;
while ( i-- ) {
2015-09-13 20:29:43 +02:00
if (
(selector = selectors[i]) &&
(bucket = this.lowGenericHide[selector])
) {
2014-06-24 00:42:43 +02:00
bucket.retrieve(selector, hideSelectors);
}
}
//quickProfiler.stop();
2014-07-02 18:02:29 +02:00
//console.log(
// 'µBlock> abp-hide-filters.js: %d selectors in => %d selectors out',
// request.selectors.length,
// r.hide.length + r.donthide.length
//);
2014-06-24 00:42:43 +02:00
return r;
};
/******************************************************************************/
2014-08-02 17:40:27 +02:00
FilterContainer.prototype.retrieveDomainSelectors = function(request) {
2014-06-24 00:42:43 +02:00
if ( !request.locationURL ) {
return;
}
//quickProfiler.start('FilterContainer.retrieve()');
2015-07-06 16:25:37 +02:00
var hostname = this.µburi.hostnameFromURI(request.locationURL);
var domain = this.µburi.domainFromHostname(hostname) || hostname;
var pos = domain.indexOf('.');
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/587
2015-01-23 21:02:47 +01:00
// r.ready will tell the content script the cosmetic filtering engine is
// up and ready.
2014-06-24 00:42:43 +02:00
var r = {
2015-01-23 21:02:47 +01:00
ready: this.frozen,
domain: domain,
entity: pos === -1 ? domain : domain.slice(0, pos - domain.length),
2014-12-19 20:00:46 +01:00
skipCosmeticFiltering: this.acceptedCount === 0,
cosmeticHide: [],
cosmeticDonthide: [],
netHide: [],
netCollapse: µb.userSettings.collapseBlocked
2014-06-24 00:42:43 +02:00
};
2014-06-24 01:23:36 +02:00
2014-08-07 22:12:15 +02:00
var hash, bucket;
2014-08-27 19:50:18 +02:00
hash = makeHash(0, domain, this.domainHashMask);
if ( (bucket = this.hostnameFilters[hash]) ) {
bucket.retrieve(hostname, r.cosmeticHide);
}
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/188
2014-08-27 19:50:18 +02:00
// Special bucket for those filters without a valid domain name as per PSL
if ( (bucket = this.hostnameFilters[this.type0NoDomainHash]) ) {
2014-08-27 19:50:18 +02:00
bucket.retrieve(hostname, r.cosmeticHide);
}
2015-02-24 00:31:29 +01:00
// entity filter buckets are always plain js array
if ( (bucket = this.entityFilters[r.entity]) ) {
2015-02-24 00:31:29 +01:00
r.cosmeticHide = r.cosmeticHide.concat(bucket);
2014-07-13 08:36:38 +02:00
}
2015-02-24 00:31:29 +01:00
// No entity exceptions as of now
2014-08-27 19:50:18 +02:00
hash = makeHash(1, domain, this.domainHashMask);
if ( (bucket = this.hostnameFilters[hash]) ) {
bucket.retrieve(hostname, r.cosmeticDonthide);
2014-06-24 00:42:43 +02:00
}
2015-02-24 00:31:29 +01:00
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/188
2014-08-27 19:50:18 +02:00
// Special bucket for those filters without a valid domain name as per PSL
if ( (bucket = this.hostnameFilters[this.type1NoDomainHash]) ) {
2014-08-27 19:50:18 +02:00
bucket.retrieve(hostname, r.cosmeticDonthide);
}
2014-06-24 00:42:43 +02:00
this.retrieveFromSelectorCache(hostname, 'cosmetic', r.cosmeticHide);
this.retrieveFromSelectorCache(hostname, 'net', r.netHide);
2014-08-14 02:03:55 +02:00
2014-06-24 00:42:43 +02:00
//quickProfiler.stop();
2014-07-02 18:02:29 +02:00
//console.log(
// 'µBlock> abp-hide-filters.js: "%s" => %d selectors out',
// request.locationURL,
// r.cosmeticHide.length + r.cosmeticDonthide.length
2014-07-02 18:02:29 +02:00
//);
2014-06-24 00:42:43 +02:00
return r;
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
2015-03-02 17:01:21 +01:00
return this.acceptedCount - this.duplicateCount;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
return new FilterContainer();
/******************************************************************************/
})();
/******************************************************************************/