uBlock/src/js/static-net-filtering.js

2282 lines
68 KiB
JavaScript
Raw Normal View History

2014-06-24 00:42:43 +02:00
/*******************************************************************************
µBlock - a Chromium browser extension to block requests.
Copyright (C) 2014 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* jshint bitwise: false, esnext: true */
2014-06-24 00:42:43 +02:00
/* global µBlock */
// Older Safari throws an exception for const when it's used with 'use strict'.
// 'use strict';
2014-06-24 00:42:43 +02:00
/******************************************************************************/
µBlock.staticNetFilteringEngine = (function(){
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2014-09-14 22:20:40 +02:00
var µb = µBlock;
2014-07-14 17:24:59 +02:00
// fedcba9876543210
2014-09-20 16:44:04 +02:00
// | | | |||
// | | | |||
// | | | |||
// | | | |||
// | | | ||+---- bit 0: [BlockAction | AllowAction]
// | | | |+---- bit 1: `important`
// | | | +---- bit 2-3: party [0 - 3]
// | | +---- bit 4-7: type [0 - 15]
// | +---- bit 8-15: unused
// +---- bit 15: never use! (to ensure valid unicode character)
const BlockAction = 0 << 0;
const AllowAction = 1 << 0;
2014-08-28 15:59:05 +02:00
const ToggleAction = BlockAction ^ AllowAction;
2014-06-24 00:42:43 +02:00
2014-09-20 16:44:04 +02:00
const Important = 1 << 1;
2014-06-24 00:42:43 +02:00
2014-09-20 16:44:04 +02:00
const AnyParty = 0 << 2;
const FirstParty = 1 << 2;
const ThirdParty = 2 << 2;
2014-06-24 00:42:43 +02:00
2014-09-22 17:30:57 +02:00
const AnyType = 1 << 4;
2014-09-21 02:06:55 +02:00
var typeNameToTypeValue = {
2014-09-24 23:38:22 +02:00
'stylesheet': 2 << 4,
'image': 3 << 4,
'object': 4 << 4,
'script': 5 << 4,
'xmlhttprequest': 6 << 4,
'sub_frame': 7 << 4,
'other': 8 << 4,
2015-01-09 03:04:48 +01:00
'cosmetic-filtering': 13 << 4,
2014-09-24 23:38:22 +02:00
'inline-script': 14 << 4,
'popup': 15 << 4
2014-09-21 02:06:55 +02:00
};
2015-01-24 14:21:14 +01:00
var typeOtherValue = typeNameToTypeValue.other;
2014-09-21 02:06:55 +02:00
// All network request types to bitmap
// bring origin to 0 (from 4 -- see typeNameToTypeValue)
// add 2 = number of left shift to use
// left-shift 1 by the above-calculated value
// subtract 4 to set all type bits, *except* for 2 lsb
// https://github.com/gorhill/uBlock/issues/723
// The 2 lsb *must* be zeroed
var allNetRequestTypesBitmap = (1 << (typeOtherValue >>> 4) + 2) - 4;
2014-06-24 00:42:43 +02:00
const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
const BlockAnyType = BlockAction | AnyType;
const BlockAnyParty = BlockAction | AnyParty;
const AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
const AllowAnyType = AllowAction | AnyType;
const AllowAnyParty = AllowAction | AnyParty;
var reHostnameRule = /^[0-9a-z][0-9a-z.-]+[0-9a-z]$/;
var reHostnameToken = /^[0-9a-z]+/g;
var reGoodToken = /[%0-9a-z]{2,}/g;
2014-09-19 16:59:44 +02:00
var reURLPostHostnameAnchors = /[\/?#]/;
2014-06-24 00:42:43 +02:00
// ABP filters: https://adblockplus.org/en/filters
// regex tester: http://regex101.com/
/******************************************************************************/
2015-02-05 00:06:31 +01:00
// See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation.
var pageHostnameRegister = '';
var requestHostnameRegister = '';
/******************************************************************************/
var histogram = function() {};
2014-06-24 00:42:43 +02:00
/*
histogram = function(label, categories) {
2014-06-24 00:42:43 +02:00
var h = [],
categoryBucket;
for ( var k in categories ) {
// No need for hasOwnProperty() here: there is no prototype chain.
2014-06-24 00:42:43 +02:00
categoryBucket = categories[k];
for ( var kk in categoryBucket ) {
// No need for hasOwnProperty() here: there is no prototype chain.
2014-06-24 00:42:43 +02:00
filterBucket = categoryBucket[kk];
h.push({
k: k.charCodeAt(0).toString(2) + ' ' + kk,
2014-06-24 00:42:43 +02:00
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
});
}
}
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 2;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
}
}
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey=%s count=%d', v.k, v.n);
});
console.log('\tTotal buckets count: %d', total);
};
*/
2014-09-08 23:46:58 +02:00
/******************************************************************************/
// Could be replaced with encodeURIComponent/decodeURIComponent,
// which seems faster on Firefox.
var encode = JSON.stringify;
var decode = JSON.parse;
var cachedParseInt = parseInt;
var atoi = function(s) {
return cachedParseInt(s, 10);
};
var isFirstParty = function(firstPartyDomain, hostname) {
if ( hostname.slice(0 - firstPartyDomain.length) !== firstPartyDomain ) {
return false;
}
// Be sure to not confuse 'example.com' with 'anotherexample.com'
var c = hostname.charAt(hostname.length - firstPartyDomain.length - 1);
return c === '.' || c === '';
};
2014-06-24 00:42:43 +02:00
/*******************************************************************************
Filters family tree:
- plain (no wildcard)
- anywhere
- no hostname
- specific hostname
- anchored at start
- no hostname
- specific hostname
- anchored at end
- no hostname
- specific hostname
2014-09-19 16:59:44 +02:00
- anchored within hostname
- no hostname
- specific hostname (not implemented)
2014-06-24 00:42:43 +02:00
- one wildcard
- anywhere
- no hostname
- specific hostname
- anchored at start
- no hostname
- specific hostname
- anchored at end
- no hostname
- specific hostname
2014-09-19 16:59:44 +02:00
- anchored within hostname
- no hostname (not implemented)
- specific hostname (not implemented)
2014-06-24 00:42:43 +02:00
- more than one wildcard
- anywhere
- no hostname
- specific hostname
- anchored at start
- no hostname
- specific hostname
- anchored at end
- no hostname
- specific hostname
2014-09-19 16:59:44 +02:00
- anchored within hostname
- no hostname (not implemented)
- specific hostname (not implemented)
2014-06-24 00:42:43 +02:00
*/
/******************************************************************************/
var FilterPlain = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
};
FilterPlain.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg - this.tokenBeg, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlain.prototype.fid = 'a';
2014-08-28 15:59:05 +02:00
FilterPlain.prototype.toString = function() {
return this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlain.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg;
};
FilterPlain.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlain(s.slice(0, pos), atoi(s.slice(pos + 1)));
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
var FilterPlainHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.hostname = hostname;
};
FilterPlainHostname.prototype.match = function(url, tokenBeg) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.substr(tokenBeg - this.tokenBeg, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainHostname.prototype.fid = 'ah';
2014-08-28 15:59:05 +02:00
FilterPlainHostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterPlainHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg + '\t' +
this.hostname;
};
FilterPlainHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterPlainHostname(args[0], atoi(args[1]), args[2]);
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainPrefix0 = function(s) {
this.s = s;
};
FilterPlainPrefix0.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix0.prototype.fid = '0a';
2014-08-28 15:59:05 +02:00
FilterPlainPrefix0.prototype.toString = function() {
return this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix0.prototype.toSelfie = function() {
return this.s;
};
FilterPlainPrefix0.fromSelfie = function(s) {
return new FilterPlainPrefix0(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
var FilterPlainPrefix0Hostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix0Hostname.prototype.match = function(url, tokenBeg) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.substr(tokenBeg, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix0Hostname.prototype.fid = '0ah';
2014-08-28 15:59:05 +02:00
FilterPlainPrefix0Hostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix0Hostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainPrefix0Hostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainPrefix0Hostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainPrefix1 = function(s) {
this.s = s;
};
FilterPlainPrefix1.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg - 1, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix1.prototype.fid = '1a';
2014-08-28 15:59:05 +02:00
FilterPlainPrefix1.prototype.toString = function() {
return this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix1.prototype.toSelfie = function() {
return this.s;
};
FilterPlainPrefix1.fromSelfie = function(s) {
return new FilterPlainPrefix1(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
var FilterPlainPrefix1Hostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix1Hostname.prototype.match = function(url, tokenBeg) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.substr(tokenBeg - 1, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix1Hostname.prototype.fid = '1ah';
2014-08-28 15:59:05 +02:00
FilterPlainPrefix1Hostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix1Hostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainPrefix1Hostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainPrefix1Hostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainLeftAnchored = function(s) {
this.s = s;
};
FilterPlainLeftAnchored.prototype.match = function(url) {
return url.slice(0, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainLeftAnchored.prototype.fid = '|a';
2014-08-28 15:59:05 +02:00
FilterPlainLeftAnchored.prototype.toString = function() {
return '|' + this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainLeftAnchored.prototype.toSelfie = function() {
return this.s;
};
FilterPlainLeftAnchored.fromSelfie = function(s) {
return new FilterPlainLeftAnchored(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
var FilterPlainLeftAnchoredHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainLeftAnchoredHostname.prototype.match = function(url) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.slice(0, this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainLeftAnchoredHostname.prototype.fid = '|ah';
2014-08-28 15:59:05 +02:00
FilterPlainLeftAnchoredHostname.prototype.toString = function() {
return '|' + this.s + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterPlainLeftAnchoredHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainLeftAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainLeftAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainRightAnchored = function(s) {
this.s = s;
};
FilterPlainRightAnchored.prototype.match = function(url) {
return url.slice(-this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainRightAnchored.prototype.fid = 'a|';
2014-08-28 15:59:05 +02:00
FilterPlainRightAnchored.prototype.toString = function() {
return this.s + '|';
};
2014-09-08 23:46:58 +02:00
FilterPlainRightAnchored.prototype.toSelfie = function() {
return this.s;
};
FilterPlainRightAnchored.fromSelfie = function(s) {
return new FilterPlainRightAnchored(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
var FilterPlainRightAnchoredHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainRightAnchoredHostname.prototype.match = function(url) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.slice(-this.s.length) === this.s;
};
2014-09-08 23:46:58 +02:00
FilterPlainRightAnchoredHostname.prototype.fid = 'a|h';
2014-08-28 15:59:05 +02:00
FilterPlainRightAnchoredHostname.prototype.toString = function() {
return this.s + '|$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterPlainRightAnchoredHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainRightAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainRightAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2014-09-19 16:59:44 +02:00
// https://github.com/gorhill/uBlock/issues/235
// The filter is left-anchored somewhere within the hostname part of the URL.
var FilterPlainHnAnchored = function(s) {
this.s = s;
};
FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) {
if ( url.substr(tokenBeg, this.s.length) !== this.s ) {
return false;
}
// Valid only if hostname-valid characters to the left of token
var pos = url.indexOf('://');
return pos !== -1 &&
reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false;
};
FilterPlainHnAnchored.prototype.fid = 'h|a';
FilterPlainHnAnchored.prototype.toString = function() {
return '||' + this.s;
};
FilterPlainHnAnchored.prototype.toSelfie = function() {
return this.s;
};
FilterPlainHnAnchored.fromSelfie = function(s) {
return new FilterPlainHnAnchored(s);
};
// https://www.youtube.com/watch?v=71YS6xDB-E4
/******************************************************************************/
2014-06-24 00:42:43 +02:00
// With a single wildcard, regex is not optimal.
// See:
2015-02-06 01:30:20 +01:00
// http://jsperf.com/regexp-vs-indexof-abp-miss/5
// http://jsperf.com/regexp-vs-indexof-abp-hit/4
2014-06-24 00:42:43 +02:00
2014-09-08 23:46:58 +02:00
var FilterSingleWildcard = function(lSegment, rSegment, tokenBeg) {
2014-06-24 00:42:43 +02:00
this.tokenBeg = tokenBeg;
2014-09-08 23:46:58 +02:00
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
};
FilterSingleWildcard.prototype.match = function(url, tokenBeg) {
tokenBeg -= this.tokenBeg;
return url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcard.prototype.fid = '*';
2014-08-28 15:59:05 +02:00
FilterSingleWildcard.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcard.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.tokenBeg;
};
FilterSingleWildcard.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcard(args[0], args[1], atoi(args[2]));
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
var FilterSingleWildcardHostname = function(lSegment, rSegment, tokenBeg, hostname) {
2014-06-24 00:42:43 +02:00
this.tokenBeg = tokenBeg;
2014-09-08 23:46:58 +02:00
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
this.hostname = hostname;
};
FilterSingleWildcardHostname.prototype.match = function(url, tokenBeg) {
tokenBeg -= this.tokenBeg;
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardHostname.prototype.fid = '*h';
2014-08-28 15:59:05 +02:00
FilterSingleWildcardHostname.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardHostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.tokenBeg + '\t' +
this.hostname;
};
FilterSingleWildcardHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardHostname(args[0], args[1], atoi(args[2]), args[3]);
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
var FilterSingleWildcardPrefix0 = function(lSegment, rSegment) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
};
FilterSingleWildcardPrefix0.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardPrefix0.prototype.fid = '0*';
2014-08-28 15:59:05 +02:00
FilterSingleWildcardPrefix0.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardPrefix0.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment;
};
FilterSingleWildcardPrefix0.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterSingleWildcardPrefix0(s.slice(0, pos), s.slice(pos + 1));
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
var FilterSingleWildcardPrefix0Hostname = function(lSegment, rSegment, hostname) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
this.hostname = hostname;
};
FilterSingleWildcardPrefix0Hostname.prototype.match = function(url, tokenBeg) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardPrefix0Hostname.prototype.fid = '0*h';
2014-08-28 15:59:05 +02:00
FilterSingleWildcardPrefix0Hostname.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardPrefix0Hostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.hostname;
};
2014-06-24 00:42:43 +02:00
2014-09-08 23:46:58 +02:00
FilterSingleWildcardPrefix0Hostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardPrefix0Hostname(args[0], args[1], args[2]);
};
2014-06-24 00:42:43 +02:00
2014-09-08 23:46:58 +02:00
/******************************************************************************/
var FilterSingleWildcardLeftAnchored = function(lSegment, rSegment) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
};
FilterSingleWildcardLeftAnchored.prototype.match = function(url) {
return url.slice(0, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, this.lSegment.length) > 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardLeftAnchored.prototype.fid = '|*';
2014-08-28 15:59:05 +02:00
FilterSingleWildcardLeftAnchored.prototype.toString = function() {
return '|' + this.lSegment + '*' + this.rSegment;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardLeftAnchored.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment;
};
FilterSingleWildcardLeftAnchored.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterSingleWildcardLeftAnchored(s.slice(0, pos), s.slice(pos + 1));
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
var FilterSingleWildcardLeftAnchoredHostname = function(lSegment, rSegment, hostname) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
this.hostname = hostname;
};
FilterSingleWildcardLeftAnchoredHostname.prototype.match = function(url) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.slice(0, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, this.lSegment.length) > 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardLeftAnchoredHostname.prototype.fid = '|*h';
2014-08-28 15:59:05 +02:00
FilterSingleWildcardLeftAnchoredHostname.prototype.toString = function() {
return '|' + this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardLeftAnchoredHostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.hostname;
};
2014-06-24 00:42:43 +02:00
2014-09-08 23:46:58 +02:00
FilterSingleWildcardLeftAnchoredHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardLeftAnchoredHostname(args[0], args[1], args[2]);
};
2014-06-24 00:42:43 +02:00
2014-09-08 23:46:58 +02:00
/******************************************************************************/
var FilterSingleWildcardRightAnchored = function(lSegment, rSegment) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
};
FilterSingleWildcardRightAnchored.prototype.match = function(url) {
return url.slice(-this.rSegment.length) === this.rSegment &&
url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardRightAnchored.prototype.fid = '*|';
2014-08-28 15:59:05 +02:00
FilterSingleWildcardRightAnchored.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '|';
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardRightAnchored.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment;
};
FilterSingleWildcardRightAnchored.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterSingleWildcardRightAnchored(s.slice(0, pos), s.slice(pos + 1));
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
var FilterSingleWildcardRightAnchoredHostname = function(lSegment, rSegment, hostname) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-24 00:42:43 +02:00
this.hostname = hostname;
};
FilterSingleWildcardRightAnchoredHostname.prototype.match = function(url) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
url.slice(-this.rSegment.length) === this.rSegment &&
url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardRightAnchoredHostname.prototype.fid = '*|h';
2014-08-28 15:59:05 +02:00
FilterSingleWildcardRightAnchoredHostname.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '|$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterSingleWildcardRightAnchoredHostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.hostname;
};
FilterSingleWildcardRightAnchoredHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardRightAnchoredHostname(args[0], args[1], args[2]);
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
// With many wildcards, a regex is best.
// Ref: regex escaper taken from:
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
// modified for the purpose here.
var FilterManyWildcards = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
};
FilterManyWildcards.prototype.match = function(url, tokenBeg) {
return this.re.test(url.slice(tokenBeg - this.tokenBeg));
};
2014-09-08 23:46:58 +02:00
FilterManyWildcards.prototype.fid = '*+';
2014-08-28 15:59:05 +02:00
FilterManyWildcards.prototype.toString = function() {
return this.s;
};
2014-09-08 23:46:58 +02:00
FilterManyWildcards.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg;
};
FilterManyWildcards.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterManyWildcards(s.slice(0, pos), atoi(s.slice(pos + 1)));
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
var FilterManyWildcardsHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
this.hostname = hostname;
};
FilterManyWildcardsHostname.prototype.match = function(url, tokenBeg) {
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2014-06-24 00:42:43 +02:00
this.re.test(url.slice(tokenBeg - this.tokenBeg));
};
2014-09-08 23:46:58 +02:00
FilterManyWildcardsHostname.prototype.fid = '*+h';
2014-08-28 15:59:05 +02:00
FilterManyWildcardsHostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 23:46:58 +02:00
FilterManyWildcardsHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg + '\t' +
this.hostname;
};
FilterManyWildcardsHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterManyWildcardsHostname(args[0], atoi(args[1]), args[2]);
};
2015-01-23 17:32:49 +01:00
/******************************************************************************/
// Regex-based filters
var FilterRegex = function(s) {
this.re = new RegExp(s);
};
FilterRegex.prototype.match = function(url) {
return this.re.test(url);
};
FilterRegex.prototype.fid = '//';
FilterRegex.prototype.toString = function() {
return '/' + this.re.source + '/';
};
FilterRegex.prototype.toSelfie = function() {
return this.re.source;
};
FilterRegex.fromSelfie = function(s) {
return new FilterRegex(s);
};
/******************************************************************************/
var FilterRegexHostname = function(s, hostname) {
this.re = new RegExp(s);
this.hostname = hostname;
};
FilterRegexHostname.prototype.match = function(url) {
// test hostname first, it's cheaper than evaluating a regex
2015-02-05 00:06:31 +01:00
return pageHostnameRegister.slice(-this.hostname.length) === this.hostname &&
2015-01-23 17:32:49 +01:00
this.re.test(url);
};
FilterRegexHostname.prototype.fid = '//h';
FilterRegexHostname.prototype.toString = function() {
return '/' + this.re.source + '/$domain=' + this.hostname;
};
FilterRegexHostname.prototype.toSelfie = function() {
return this.re.source + '\t' +
this.hostname;
};
FilterRegexHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterRegexHostname(s.slice(0, pos), s.slice(pos + 1));
};
/******************************************************************************/
2014-09-08 23:46:58 +02:00
/******************************************************************************/
2015-02-05 00:06:31 +01:00
// Dictionary of hostnames
2015-02-05 14:45:29 +01:00
//
// FilterHostnameDict is the main reason why uBlock is not equipped to keep
// track of which filter comes from which list, and also why it's not equipped
// to be able to disable a specific filter -- other than through using a
// counter-filter.
//
// On the other hand it is also *one* of the reason uBlock's memory and CPU
// footprint is smaller. Compacting huge list of hostnames into single strings
// saves a lot of memory compared to having one dictionary entry per hostname.
2015-02-05 00:06:31 +01:00
var FilterHostnameDict = function() {
this.h = ''; // short-lived register
this.dict = {};
this.count = 0;
};
// Somewhat arbitrary: I need to come up with hard data to know at which
// point binary search is better than indexOf.
//
// http://jsperf.com/string-indexof-vs-binary-search
// Tuning above performance benchmark, it appears 250 is roughly a good value
// for both Chromium/Firefox.
// Example of benchmark values: '------30', '-----100', etc. -- the
// needle string must always be 8-character long.
FilterHostnameDict.prototype.cutoff = 250;
// Probably not needed under normal circumstances.
FilterHostnameDict.prototype.meltBucket = function(len, bucket) {
var map = {};
if ( bucket.charAt(0) === ' ' ) {
bucket.trim().split(' ').map(function(k) {
map[k] = true;
});
} else {
var offset = 0;
while ( offset < bucket.length ) {
map[bucket.substring(offset, len)] = true;
offset += len;
}
}
return map;
};
2015-02-05 14:45:29 +01:00
// How the key is derived dictates the number and size of buckets:
// - more bits = more buckets = higher memory footprint
// - less bits = less buckets = lower memory footprint
// - binary search mitigates very well the fact that some buckets may grow
// large when fewer bits are used (or when a large number of items are
// stored). Binary search also mitigate to the point of non-issue the
// CPU footprint requirement with large buckets, as far as reference
// benchmark shows.
2015-02-05 00:06:31 +01:00
//
// A hash key capable of better spread while being as fast would be
// just great.
FilterHostnameDict.prototype.makeKey = function(hn) {
var len = hn.length;
if ( len > 255 ) {
len = 255;
}
var i8 = len >>> 3;
var i4 = len >>> 2;
var i2 = len >>> 1;
// http://jsperf.com/makekey-concat-vs-join/3
// Be sure the msb is not set, this will guarantee a valid unicode
// character (because 0xD800-0xDFFF).
return String.fromCharCode(
(hn.charCodeAt( i8) & 0x01) << 14 |
// (hn.charCodeAt( i4 ) & 0x01) << 13 |
(hn.charCodeAt( i4+i8) & 0x01) << 12 |
(hn.charCodeAt(i2 ) & 0x01) << 11 |
(hn.charCodeAt(i2 +i8) & 0x01) << 10 |
// (hn.charCodeAt(i2+i4 ) & 0x01) << 9 |
(hn.charCodeAt(i2+i4+i8) & 0x01) << 8 ,
len
);
};
FilterHostnameDict.prototype.add = function(hn) {
var key = this.makeKey(hn);
var bucket = this.dict[key];
if ( bucket === undefined ) {
bucket = this.dict[key] = {};
bucket[hn] = true;
this.count += 1;
return true;
}
if ( typeof bucket === 'string' ) {
bucket = this.dict[key] = this.meltBucket(hn.len, bucket);
}
if ( bucket[hn] === undefined ) {
bucket[hn] = true;
this.count += 1;
return true;
}
return false;
};
FilterHostnameDict.prototype.freeze = function() {
var buckets = this.dict;
var bucket, hostnames, len;
for ( var key in buckets ) {
bucket = buckets[key];
if ( typeof bucket !== 'object' ) {
continue;
}
hostnames = Object.keys(bucket);
len = hostnames[0].length * hostnames.length;
if ( hostnames[0].length * hostnames.length < this.cutoff ) {
buckets[key] = ' ' + hostnames.join(' ') + ' ';
} else {
buckets[key] = hostnames.sort().join('');
}
}
};
FilterHostnameDict.prototype.matchesExactly = function(hn) {
// TODO: Handle IP address
var key = this.makeKey(hn);
var bucket = this.dict[key];
if ( bucket === undefined ) {
return false;
}
if ( typeof bucket === 'object' ) {
return bucket.hasOwnProperty(hn) !== undefined;
}
if ( bucket.charAt(0) === ' ' ) {
return bucket.indexOf(' ' + hn + ' ') !== -1;
}
// binary search
var len = hn.length;
var left = 0;
// http://jsperf.com/or-vs-floor/17
var right = (bucket.length / len + 0.5) | 0;
var i, needle;
while ( left < right ) {
i = left + right >> 1;
needle = bucket.substr( len * i, len );
if ( hn < needle ) {
right = i;
} else if ( hn > needle ) {
left = i + 1;
} else {
return true;
}
}
return false;
};
FilterHostnameDict.prototype.match = function(hn) {
// TODO: mind IP addresses
var pos,
hostname = requestHostnameRegister;
while ( this.matchesExactly(hostname) === false ) {
pos = hostname.indexOf('.');
if ( pos === -1 ) {
this.h = '';
return false;
}
hostname = hostname.slice(pos + 1);
}
this.h = '||' + hostname + '^';
return this;
};
FilterHostnameDict.prototype.fid = '{h}';
FilterHostnameDict.prototype.toString = function() {
return this.h;
};
FilterHostnameDict.prototype.toSelfie = function() {
return JSON.stringify({
count: this.count,
dict: this.dict
});
};
FilterHostnameDict.fromSelfie = function(s) {
var f = new FilterHostnameDict();
var o = JSON.parse(s);
f.count = o.count;
f.dict = o.dict;
return f;
};
/******************************************************************************/
/******************************************************************************/
// Some buckets can grow quite large, and finding a hit in these buckets
// may end up being expensive. After considering various solutions, the one
// retained is to promote hit filters to a smaller index, so that next time
// they can be looked-up faster.
2014-09-19 16:59:44 +02:00
// key= 10000 ad count=660
// key= 10000 ads count=433
// key= 10001 google count=277
// key=1000000 2mdn count=267
// key= 10000 social count=240
// key= 10001 pagead2 count=166
// key= 10000 twitter count=122
// key= 10000 doubleclick count=118
// key= 10000 facebook count=114
// key= 10000 share count=113
// key= 10000 google count=106
// key= 10001 code count=103
// key= 11000 doubleclick count=100
// key=1010001 g count=100
// key= 10001 js count= 89
// key= 10000 adv count= 88
// key= 10000 youtube count= 61
// key= 10000 plugins count= 60
// key= 10001 partner count= 59
// key= 10000 ico count= 57
// key= 110001 ssl count= 57
// key= 10000 banner count= 53
// key= 10000 footer count= 51
// key= 10000 rss count= 51
2014-09-19 16:59:44 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
var FilterBucket = function(a, b) {
this.promoted = 0;
this.vip = 16;
2014-10-06 20:02:44 +02:00
this.f = null; // short-lived register
2014-09-08 23:46:58 +02:00
this.filters = [];
if ( a !== undefined ) {
this.filters[0] = a;
if ( b !== undefined ) {
this.filters[1] = b;
}
}
};
FilterBucket.prototype.add = function(a) {
this.filters.push(a);
};
// Promote hit filters so they can be found faster next time.
FilterBucket.prototype.promote = function(i) {
var filters = this.filters;
var pivot = filters.length >>> 1;
while ( i < pivot ) {
pivot >>>= 1;
if ( pivot < this.vip ) {
break;
}
}
if ( i <= pivot ) {
return;
}
var j = this.promoted % pivot;
//console.debug('FilterBucket.promote(): promoted %d to %d', i, j);
var f = filters[j];
filters[j] = filters[i];
filters[i] = f;
this.promoted += 1;
};
2014-09-08 23:46:58 +02:00
FilterBucket.prototype.match = function(url, tokenBeg) {
var filters = this.filters;
var n = filters.length;
for ( var i = 0; i < n; i++ ) {
2014-09-08 23:46:58 +02:00
if ( filters[i].match(url, tokenBeg) !== false ) {
this.f = filters[i];
if ( i >= this.vip ) {
this.promote(i);
}
2014-09-08 23:46:58 +02:00
return true;
}
}
return false;
};
FilterBucket.prototype.fid = '[]';
FilterBucket.prototype.toString = function() {
if ( this.f !== null ) {
return this.f.toString();
}
return '';
};
FilterBucket.prototype.toSelfie = function() {
return this.filters.length.toString();
};
FilterBucket.fromSelfie = function() {
return new FilterBucket();
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2015-01-23 17:32:49 +01:00
var makeFilter = function(details) {
2014-06-24 00:42:43 +02:00
var s = details.f;
2015-01-23 17:32:49 +01:00
if ( details.isRegex ) {
return new FilterRegex(s);
}
2014-06-24 00:42:43 +02:00
var wcOffset = s.indexOf('*');
2014-09-08 23:46:58 +02:00
if ( wcOffset !== -1 ) {
if ( s.indexOf('*', wcOffset + 1) !== -1 ) {
2015-01-23 17:32:49 +01:00
return details.anchor === 0 ? new FilterManyWildcards(s, details.tokenBeg) : null;
2014-06-24 00:42:43 +02:00
}
2014-09-08 23:46:58 +02:00
var lSegment = s.slice(0, wcOffset);
var rSegment = s.slice(wcOffset + 1);
2014-06-24 00:42:43 +02:00
if ( details.anchor < 0 ) {
2014-09-08 23:46:58 +02:00
return new FilterSingleWildcardLeftAnchored(lSegment, rSegment);
2014-06-24 00:42:43 +02:00
}
if ( details.anchor > 0 ) {
2014-09-08 23:46:58 +02:00
return new FilterSingleWildcardRightAnchored(lSegment, rSegment);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
if ( details.tokenBeg === 0 ) {
2014-09-08 23:46:58 +02:00
return new FilterSingleWildcardPrefix0(lSegment, rSegment);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
return new FilterSingleWildcard(lSegment, rSegment, details.tokenBeg);
2014-06-24 00:42:43 +02:00
}
if ( details.anchor < 0 ) {
return new FilterPlainLeftAnchored(s);
}
if ( details.anchor > 0 ) {
return new FilterPlainRightAnchored(s);
}
2014-09-19 16:59:44 +02:00
if ( details.hostnameAnchored ) {
return new FilterPlainHnAnchored(s);
}
2015-01-23 17:32:49 +01:00
if ( details.tokenBeg === 0 ) {
2014-06-24 00:42:43 +02:00
return new FilterPlainPrefix0(s);
}
2015-01-23 17:32:49 +01:00
if ( details.tokenBeg === 1 ) {
2014-06-24 00:42:43 +02:00
return new FilterPlainPrefix1(s);
}
2015-01-23 17:32:49 +01:00
return new FilterPlain(s, details.tokenBeg);
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
2015-01-23 17:32:49 +01:00
var makeHostnameFilter = function(details, hostname) {
2014-06-24 00:42:43 +02:00
var s = details.f;
2015-01-23 17:32:49 +01:00
if ( details.isRegex ) {
return new FilterRegexHostname(s, hostname);
}
2014-06-24 00:42:43 +02:00
var wcOffset = s.indexOf('*');
2014-09-08 23:46:58 +02:00
if ( wcOffset !== -1 ) {
if ( s.indexOf('*', wcOffset + 1) !== -1 ) {
2015-01-23 17:32:49 +01:00
return details.anchor === 0 ? new FilterManyWildcardsHostname(s, details.tokenBeg, hostname) : null;
2014-06-24 00:42:43 +02:00
}
2014-09-08 23:46:58 +02:00
var lSegment = s.slice(0, wcOffset);
var rSegment = s.slice(wcOffset + 1);
2014-06-24 00:42:43 +02:00
if ( details.anchor < 0 ) {
2014-09-08 23:46:58 +02:00
return new FilterSingleWildcardLeftAnchoredHostname(lSegment, rSegment, hostname);
2014-06-24 00:42:43 +02:00
}
if ( details.anchor > 0 ) {
2014-09-08 23:46:58 +02:00
return new FilterSingleWildcardRightAnchoredHostname(lSegment, rSegment, hostname);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
if ( details.tokenBeg === 0 ) {
2014-09-08 23:46:58 +02:00
return new FilterSingleWildcardPrefix0Hostname(lSegment, rSegment, hostname);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
return new FilterSingleWildcardHostname(lSegment, rSegment, details.tokenBeg, hostname);
2014-06-24 00:42:43 +02:00
}
if ( details.anchor < 0 ) {
return new FilterPlainLeftAnchoredHostname(s, hostname);
}
if ( details.anchor > 0 ) {
return new FilterPlainRightAnchoredHostname(s, hostname);
}
2015-01-23 17:32:49 +01:00
if ( details.tokenBeg === 0 ) {
2014-06-24 00:42:43 +02:00
return new FilterPlainPrefix0Hostname(s, hostname);
}
2015-01-23 17:32:49 +01:00
if ( details.tokenBeg === 1 ) {
2014-06-24 00:42:43 +02:00
return new FilterPlainPrefix1Hostname(s, hostname);
}
2015-01-23 17:32:49 +01:00
return new FilterPlainHostname(s, details.tokenBeg, hostname);
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
// Given a string, find a good token. Tokens which are too generic, i.e. very
// common with a high probability of ending up as a miss, are not
// good. Avoid if possible. This has a *significant* positive impact on
// performance.
// These "bad tokens" are collated manually.
var badTokens = {
'com': true,
'http': true,
'https': true,
2014-09-08 23:46:58 +02:00
'icon': true,
2014-06-24 00:42:43 +02:00
'images': true,
'img': true,
'js': true,
'net': true,
2014-06-24 00:42:43 +02:00
'news': true,
'www': true
};
var findFirstGoodToken = function(s) {
reGoodToken.lastIndex = 0;
var matches;
while ( matches = reGoodToken.exec(s) ) {
if ( badTokens[matches[0]] === undefined ) {
return matches;
}
}
// No good token found, just return the first token from left
reGoodToken.lastIndex = 0;
return reGoodToken.exec(s);
};
/******************************************************************************/
var findHostnameToken = function(s) {
reHostnameToken.lastIndex = 0;
return reHostnameToken.exec(s);
};
/******************************************************************************/
// Trim leading/trailing char "c"
var trimChar = function(s, c) {
// Remove leading and trailing wildcards
var pos = 0;
while ( s.charAt(pos) === c ) {
pos += 1;
}
s = s.slice(pos);
if ( pos = s.length ) {
while ( s.charAt(pos-1) === c ) {
pos -= 1;
}
s = s.slice(0, pos);
}
return s;
};
2014-09-08 23:46:58 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterParser = function() {
2015-01-23 17:32:49 +01:00
this.reHasWildcard = /[\^\*]/;
this.reHasUppercase = /[A-Z]/;
2014-06-24 00:42:43 +02:00
this.hostnames = [];
2015-01-23 17:32:49 +01:00
this.notHostnames = [];
2014-08-28 15:59:05 +02:00
this.reset();
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.toNormalizedType = {
'stylesheet': 'stylesheet',
'image': 'image',
'object': 'object',
'object-subrequest': 'object',
'script': 'script',
'xmlhttprequest': 'xmlhttprequest',
'subdocument': 'sub_frame',
2014-07-14 17:24:59 +02:00
'other': 'other',
'elemhide': 'cosmetic-filtering',
2014-09-24 23:38:22 +02:00
'inline-script': 'inline-script',
2014-07-14 17:24:59 +02:00
'popup': 'popup'
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.reset = function() {
this.action = BlockAction;
this.anchor = 0;
this.elemHiding = false;
this.f = '';
this.firstParty = false;
this.fopts = '';
2014-09-19 16:59:44 +02:00
this.hostnameAnchored = false;
this.hostnamePure = false;
2014-08-28 15:59:05 +02:00
this.hostnames.length = 0;
2015-01-23 17:32:49 +01:00
this.notHostnames.length = 0;
this.isRegex = false;
2014-06-24 00:42:43 +02:00
this.thirdParty = false;
2015-01-23 17:32:49 +01:00
this.token = '';
this.tokenBeg = 0;
this.tokenEnd = 0;
this.types = 0;
2014-08-29 21:02:31 +02:00
this.important = 0;
2014-06-24 00:42:43 +02:00
this.unsupported = false;
return this;
};
/******************************************************************************/
// https://github.com/gorhill/uBlock/issues/589
// Be ready to handle multiple negated types
2014-06-24 00:42:43 +02:00
FilterParser.prototype.parseOptType = function(raw, not) {
var type = typeNameToTypeValue[this.toNormalizedType[raw]];
if ( !not ) {
this.types |= 1 << (type >>> 4);
return;
2014-06-24 00:42:43 +02:00
}
2015-02-08 04:20:24 +01:00
// Negated type: set all valid network request type bits to 1
if ( this.types === 0 ) {
this.types = allNetRequestTypesBitmap;
}
this.types &= ~(1 << (type >>> 4));
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.parseOptParty = function(not) {
if ( not ) {
this.firstParty = true;
} else {
this.thirdParty = true;
}
};
/******************************************************************************/
FilterParser.prototype.parseOptHostnames = function(raw) {
var hostnames = raw.split('|');
2015-01-23 17:32:49 +01:00
var hostname;
2014-06-24 00:42:43 +02:00
for ( var i = 0; i < hostnames.length; i++ ) {
hostname = hostnames[i];
2015-01-23 17:32:49 +01:00
if ( hostname.charAt(0) === '~' ) {
this.notHostnames.push(hostname.slice(1));
} else {
this.hostnames.push(hostname);
}
}
};
/******************************************************************************/
FilterParser.prototype.parseOptions = function(s) {
this.fopts = s;
var opts = s.split(',');
var opt, not;
for ( var i = 0; i < opts.length; i++ ) {
opt = opts[i];
not = opt.charAt(0) === '~';
2014-06-24 00:42:43 +02:00
if ( not ) {
2015-01-23 17:32:49 +01:00
opt = opt.slice(1);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
if ( opt === 'third-party' ) {
this.parseOptParty(not);
continue;
}
if ( opt === 'elemhide' && this.action === AllowAction ) {
this.parseOptType('elemhide', false);
2015-01-23 17:32:49 +01:00
this.action = BlockAction;
continue;
}
if ( this.toNormalizedType.hasOwnProperty(opt) ) {
this.parseOptType(opt, not);
continue;
}
if ( opt.slice(0,7) === 'domain=' ) {
this.parseOptHostnames(opt.slice(7));
continue;
}
if ( opt === 'popup' ) {
this.parseOptType('popup', not);
continue;
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
if ( opt === 'important' ) {
this.important = Important;
continue;
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
this.unsupported = true;
break;
2014-06-24 00:42:43 +02:00
}
};
/******************************************************************************/
FilterParser.prototype.parse = function(s) {
// important!
this.reset();
2015-01-23 17:32:49 +01:00
// plain hostname?
2014-09-19 16:59:44 +02:00
if ( reHostnameRule.test(s) ) {
this.f = s;
this.hostnamePure = this.hostnameAnchored = true;
return this;
}
2014-06-24 00:42:43 +02:00
// element hiding filter?
2015-01-23 17:32:49 +01:00
var pos = s.indexOf('#');
if ( pos !== -1 ) {
var c = s.charAt(pos + 1);
if ( c === '#' || c === '@' ) {
console.error('static-net-filtering.js > unexpected cosmetic filters');
this.elemHiding = true;
return this;
}
}
// block or allow filter?
// Important: this must be executed before parsing options
if ( s.lastIndexOf('@@', 0) === 0 ) {
this.action = AllowAction;
s = s.slice(2);
}
2015-01-23 17:32:49 +01:00
// options
pos = s.indexOf('$');
if ( pos !== -1 ) {
this.parseOptions(s.slice(pos + 1));
s = s.slice(0, pos);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
// regex?
if ( s.charAt(0) === '/' && s.slice(-1) === '/' && s.lenght > 2 ) {
2015-01-23 17:32:49 +01:00
this.isRegex = true;
this.f = s.slice(1, -1);
2014-09-08 23:46:58 +02:00
return this;
}
2014-06-24 00:42:43 +02:00
// hostname anchoring
2015-01-23 17:32:49 +01:00
if ( s.lastIndexOf('||', 0) === 0 ) {
2014-09-19 16:59:44 +02:00
this.hostnameAnchored = true;
2014-06-24 00:42:43 +02:00
s = s.slice(2);
}
// left-anchored
if ( s.charAt(0) === '|' ) {
this.anchor = -1;
s = s.slice(1);
}
// right-anchored
if ( s.slice(-1) === '|' ) {
this.anchor = 1;
s = s.slice(0, -1);
}
// normalize placeholders
// TODO: transforming `^` into `*` is not a strict interpretation of
// ABP syntax.
2015-01-23 17:32:49 +01:00
if ( this.reHasWildcard.test(s) ) {
s = s.replace(/\^/g, '*').replace(/\*\*+/g, '*');
s = trimChar(s, '*');
}
2014-09-19 16:59:44 +02:00
2015-01-23 17:32:49 +01:00
// plain hostname?
2014-09-19 16:59:44 +02:00
this.hostnamePure = this.hostnameAnchored && reHostnameRule.test(s);
2015-01-23 17:32:49 +01:00
// This might look weird but we gain memory footprint by not going through
// toLowerCase(), at least on Chromium. Because copy-on-write?
2014-06-24 00:42:43 +02:00
2015-01-23 17:32:49 +01:00
this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s;
return this;
};
/******************************************************************************/
FilterParser.prototype.makeToken = function() {
if ( this.isRegex ) {
this.token = '*';
return;
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
var matches;
if ( this.hostnameAnchored ) {
matches = findHostnameToken(this.f);
if ( !matches || matches[0].length === 0 ) {
return;
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
this.tokenBeg = matches.index;
this.tokenEnd = reHostnameToken.lastIndex;
this.token = this.f.slice(this.tokenBeg, this.tokenEnd);
return;
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
matches = findFirstGoodToken(this.f);
if ( !matches || matches[0].length === 0 ) {
return;
}
this.tokenBeg = matches.index;
this.tokenEnd = reGoodToken.lastIndex;
this.token = this.f.slice(this.tokenBeg, this.tokenEnd);
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
/******************************************************************************/
var TokenEntry = function() {
this.beg = 0;
this.token = '';
};
/******************************************************************************/
/******************************************************************************/
2014-06-24 00:42:43 +02:00
var FilterContainer = function() {
2014-07-20 21:00:26 +02:00
this.reAnyToken = /[%0-9a-z]+/g;
this.tokens = [];
2014-06-24 00:42:43 +02:00
this.filterParser = new FilterParser();
2014-07-20 21:00:26 +02:00
this.reset();
};
/******************************************************************************/
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
this.frozen = false;
2014-06-24 00:42:43 +02:00
this.processedFilterCount = 0;
this.acceptedCount = 0;
2014-09-08 23:46:58 +02:00
this.rejectedCount = 0;
2014-06-24 00:42:43 +02:00
this.allowFilterCount = 0;
this.blockFilterCount = 0;
this.duplicateCount = 0;
this.categories = Object.create(null);
this.duplicates = Object.create(null);
2014-07-20 21:00:26 +02:00
this.filterParser.reset();
};
2014-06-24 00:42:43 +02:00
2014-07-20 21:00:26 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
2014-07-20 21:00:26 +02:00
FilterContainer.prototype.freeze = function() {
histogram('allFilters', this.categories);
2015-02-05 00:06:31 +01:00
var categories = this.categories;
var bucket;
for ( var k in categories ) {
bucket = categories[k]['.'];
if ( bucket !== undefined ) {
bucket.freeze();
}
}
this.duplicates = Object.create(null);
2014-07-20 21:00:26 +02:00
this.filterParser.reset();
this.frozen = true;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
2014-09-08 23:46:58 +02:00
FilterContainer.prototype.toSelfie = function() {
var categoryToSelfie = function(dict) {
var selfie = [];
var bucket, ff, n, i, f;
for ( var k in dict ) {
// No need for hasOwnProperty() here: there is no prototype chain.
2014-09-08 23:46:58 +02:00
// We need to encode the key because there could be a `\n` or '\t'
// character in it, which would trip the code at parse time.
selfie.push('k2\t' + encode(k));
bucket = dict[k];
selfie.push(bucket.fid + '\t' + bucket.toSelfie());
if ( bucket.fid !== '[]' ) {
continue;
}
ff = bucket.filters;
n = ff.length;
for ( i = 0; i < n; i++ ) {
f = ff[i];
selfie.push(f.fid + '\t' + f.toSelfie());
}
}
return selfie.join('\n');
};
var categoriesToSelfie = function(dict) {
var selfie = [];
for ( var k in dict ) {
// No need for hasOwnProperty() here: there is no prototype chain.
2014-09-08 23:46:58 +02:00
// We need to encode the key because there could be a `\n` or '\t'
// character in it, which would trip the code at parse time.
selfie.push('k1\t' + encode(k));
selfie.push(categoryToSelfie(dict[k]));
}
return selfie.join('\n');
};
return {
processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount,
rejectedCount: this.rejectedCount,
allowFilterCount: this.allowFilterCount,
blockFilterCount: this.blockFilterCount,
duplicateCount: this.duplicateCount,
2015-02-05 00:06:31 +01:00
categories: categoriesToSelfie(this.categories)
2014-09-08 23:46:58 +02:00
};
};
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) {
this.frozen = true;
this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount;
this.rejectedCount = selfie.rejectedCount;
this.allowFilterCount = selfie.allowFilterCount;
this.blockFilterCount = selfie.blockFilterCount;
this.duplicateCount = selfie.duplicateCount;
var factories = {
'[]': FilterBucket,
'a': FilterPlain,
'ah': FilterPlainHostname,
'0a': FilterPlainPrefix0,
'0ah': FilterPlainPrefix0Hostname,
'1a': FilterPlainPrefix1,
'1ah': FilterPlainPrefix1Hostname,
'|a': FilterPlainLeftAnchored,
'|ah': FilterPlainLeftAnchoredHostname,
'a|': FilterPlainRightAnchored,
'a|h': FilterPlainRightAnchoredHostname,
2014-09-19 16:59:44 +02:00
'h|a': FilterPlainHnAnchored,
2014-09-08 23:46:58 +02:00
'*': FilterSingleWildcard,
'*h': FilterSingleWildcardHostname,
'0*': FilterSingleWildcardPrefix0,
'0*h': FilterSingleWildcardPrefix0Hostname,
'|*': FilterSingleWildcardLeftAnchored,
'|*h': FilterSingleWildcardLeftAnchoredHostname,
'*|': FilterSingleWildcardRightAnchored,
'*|h': FilterSingleWildcardRightAnchoredHostname,
'*+': FilterManyWildcards,
2015-01-23 17:32:49 +01:00
'*+h': FilterManyWildcardsHostname,
'//': FilterRegex,
2015-02-05 00:06:31 +01:00
'//h': FilterRegexHostname,
'{h}': FilterHostnameDict
2014-09-08 23:46:58 +02:00
};
var catKey, tokenKey;
var dict = this.categories, subdict;
var bucket = null;
var rawText = selfie.categories;
var rawEnd = rawText.length;
var lineBeg = 0, lineEnd;
var line, pos, what, factory;
while ( lineBeg < rawEnd ) {
lineEnd = rawText.indexOf('\n', lineBeg);
if ( lineEnd < 0 ) {
lineEnd = rawEnd;
}
line = rawText.slice(lineBeg, lineEnd);
lineBeg = lineEnd + 1;
pos = line.indexOf('\t');
what = line.slice(0, pos);
if ( what === 'k1' ) {
catKey = decode(line.slice(pos + 1));
subdict = dict[catKey] = Object.create(null);
2014-09-08 23:46:58 +02:00
bucket = null;
continue;
}
if ( what === 'k2' ) {
tokenKey = decode(line.slice(pos + 1));
bucket = null;
continue;
}
factory = factories[what];
if ( bucket === null ) {
bucket = subdict[tokenKey] = factory.fromSelfie(line.slice(pos + 1));
continue;
}
// When token key is reused, it can't be anything
// else than FilterBucket
bucket.add(factory.fromSelfie(line.slice(pos + 1)));
}
};
/******************************************************************************/
2014-06-24 00:42:43 +02:00
FilterContainer.prototype.makeCategoryKey = function(category) {
return String.fromCharCode(category);
};
/******************************************************************************/
2015-02-01 00:34:46 +01:00
FilterContainer.prototype.add = function(raw) {
2014-06-24 00:42:43 +02:00
// ORDER OF TESTS IS IMPORTANT!
// Ignore empty lines
2015-02-01 00:34:46 +01:00
var s = raw.trim();
2015-01-23 17:32:49 +01:00
if ( s.length === 0 ) {
2014-06-24 00:42:43 +02:00
return false;
}
// Ignore comments
2015-01-23 17:32:49 +01:00
var c = s.charAt(0);
if ( c === '[' || c === '!' ) {
2014-06-24 00:42:43 +02:00
return false;
}
var parsed = this.filterParser.parse(s);
2015-01-23 17:32:49 +01:00
// Ignore element-hiding filters
if ( parsed.elemHiding ) {
2014-09-08 23:46:58 +02:00
return false;
}
2015-01-23 17:32:49 +01:00
// Ignore filters with unsupported options
if ( parsed.unsupported ) {
this.rejectedCount += 1;
2015-02-01 00:34:46 +01:00
//console.log('static-net-filtering.js > FilterContainer.add(): unsupported filter "%s"', raw);
2014-06-24 00:42:43 +02:00
return false;
}
this.processedFilterCount += 1;
this.acceptedCount += 1;
2014-09-19 16:59:44 +02:00
// Pure hostnames, use more efficient liquid dict
2015-02-05 00:06:31 +01:00
// https://github.com/gorhill/uBlock/issues/665
// Create a dict keyed on request type etc.
if ( parsed.hostnamePure && this.addHostnameOnlyFilter(parsed) ) {
return true;
2014-06-24 00:42:43 +02:00
}
2014-09-19 16:59:44 +02:00
if ( this.duplicates[s] ) {
2015-02-01 00:34:46 +01:00
//console.log('static-net-filtering.js > FilterContainer.add(): duplicate filter "%s"', raw);
2014-09-19 16:59:44 +02:00
this.duplicateCount++;
return false;
}
2014-09-25 19:26:29 +02:00
if ( this.frozen === false ) {
this.duplicates[s] = true;
}
2014-09-19 16:59:44 +02:00
2014-06-24 00:42:43 +02:00
var r = this.addFilter(parsed);
if ( r === false ) {
return false;
}
if ( parsed.action ) {
this.allowFilterCount += 1;
} else {
this.blockFilterCount += 1;
}
return true;
};
/******************************************************************************/
2015-02-05 00:06:31 +01:00
// Using fast/compact dictionary when filter is a (or portion of) pure hostname.
FilterContainer.prototype.addHostnameOnlyFilter = function(parsed) {
// Can't fit the filter in a pure hostname dictionary.
if ( parsed.hostnames.length !== 0 || parsed.notHostnames.length !== 0 ) {
return false;
}
2015-02-07 14:34:11 +01:00
var isNewFilter = false;
2015-02-05 00:06:31 +01:00
var party = AnyParty;
if ( parsed.firstParty !== parsed.thirdParty ) {
party = parsed.firstParty ? FirstParty : ThirdParty;
}
var keyShard = parsed.action | parsed.important | party;
var key, bucket;
var type = parsed.types >>> 1 || 1; // bit 0 is unused; also, default to AnyType
var bitOffset = 1;
while ( type !== 0 ) {
if ( type & 1 ) {
key = this.makeCategoryKey(keyShard | (bitOffset << 4));
bucket = this.categories[key];
if ( bucket === undefined ) {
bucket = this.categories[key] = Object.create(null);
}
if ( bucket['.'] === undefined ) {
bucket['.'] = new FilterHostnameDict();
}
if ( bucket['.'].add(parsed.f) ) {
2015-02-07 14:34:11 +01:00
isNewFilter = true;
2015-02-05 00:06:31 +01:00
}
}
bitOffset += 1;
type >>>= 1;
}
2015-02-07 14:34:11 +01:00
// https://github.com/gorhill/uBlock/issues/719
// Count whole filter, not its decomposed versions
if ( isNewFilter ) {
if ( parsed.action ) {
this.allowFilterCount += 1;
} else {
this.blockFilterCount += 1;
}
} else {
this.duplicateCount += 1;
}
2015-02-05 00:06:31 +01:00
return true;
};
/******************************************************************************/
2014-06-24 00:42:43 +02:00
FilterContainer.prototype.addFilter = function(parsed) {
2015-01-23 17:32:49 +01:00
parsed.makeToken();
if ( parsed.token === '' ) {
console.error('static-net-filtering.js > FilterContainer.addFilter("%s"): can\'t tokenize', parsed.f);
2014-06-24 00:42:43 +02:00
return false;
}
2014-08-28 00:39:08 +02:00
2014-09-25 19:26:29 +02:00
var party = AnyParty;
if ( parsed.firstParty !== parsed.thirdParty ) {
party = parsed.firstParty ? FirstParty : ThirdParty;
}
2015-01-23 17:32:49 +01:00
var filter;
var i = parsed.hostnames.length;
var j = parsed.notHostnames.length;
2014-09-19 16:59:44 +02:00
2015-01-23 17:32:49 +01:00
// Applies to all domains without exceptions
if ( i === 0 && j === 0 ) {
filter = makeFilter(parsed);
2014-08-28 00:39:08 +02:00
if ( !filter ) {
return false;
}
2015-01-23 17:32:49 +01:00
this.addFilterEntry(filter, parsed, party);
return true;
}
// Applies to specific domains
if ( i !== 0 ) {
2014-08-28 15:59:05 +02:00
while ( i-- ) {
2015-01-23 17:32:49 +01:00
filter = makeHostnameFilter(parsed, parsed.hostnames[i]);
2014-06-24 00:42:43 +02:00
if ( !filter ) {
return false;
}
2015-01-23 17:32:49 +01:00
this.addFilterEntry(filter, parsed, party);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
}
// No exceptions
if ( j === 0 ) {
2014-08-28 00:39:08 +02:00
return true;
}
2015-01-23 17:32:49 +01:00
// Case:
// - applies everywhere except to specific domains
// Example:
// - ||adm.fwmrm.net/p/msnbc_live/$object-subrequest,third-party,domain=~msnbc.msn.com|~www.nbcnews.com
if ( i === 0 ) {
filter = makeFilter(parsed);
if ( !filter ) {
return false;
}
// https://github.com/gorhill/uBlock/issues/251
// Apply third-party option if it is present
this.addFilterEntry(filter, parsed, party);
2014-08-28 00:39:08 +02:00
}
2014-09-25 19:26:29 +02:00
2015-01-23 17:32:49 +01:00
// Cases:
// - applies everywhere except to specific domains
// - applies to specific domains except other specific domains
// Example:
// - /^https?\:\/\/(?!(...)\/)/$script,third-party,xmlhttprequest,domain=photobucket.com|~secure.photobucket.com
2014-09-25 19:26:29 +02:00
2015-01-23 17:32:49 +01:00
// Reverse purpose of filter
parsed.action ^= ToggleAction;
while ( j-- ) {
filter = makeHostnameFilter(parsed, parsed.notHostnames[j]);
if ( !filter ) {
return false;
}
// https://github.com/gorhill/uBlock/issues/191#issuecomment-53654024
// If it is a block filter, we need to reverse the order of
// evaluation.
if ( parsed.action === BlockAction ) {
parsed.important = Important;
}
this.addFilterEntry(filter, parsed, party);
}
2014-06-24 00:42:43 +02:00
return true;
};
/******************************************************************************/
2015-01-23 17:32:49 +01:00
FilterContainer.prototype.addFilterEntry = function(filter, parsed, party) {
2014-08-29 21:02:31 +02:00
var bits = parsed.action | parsed.important | party;
2015-02-05 00:06:31 +01:00
var type = parsed.types >>> 1 || 1; // bit 0 is unused; also, default to AnyType
var bitOffset = 1;
while ( type !== 0 ) {
2015-02-05 00:06:31 +01:00
if ( type & 1 ) {
this.addToCategory(bits | (bitOffset << 4), parsed.token, filter);
}
bitOffset += 1;
type >>>= 1;
2014-06-24 00:42:43 +02:00
}
};
/******************************************************************************/
FilterContainer.prototype.addToCategory = function(category, tokenKey, filter) {
var categoryKey = this.makeCategoryKey(category);
var categoryBucket = this.categories[categoryKey];
if ( !categoryBucket ) {
categoryBucket = this.categories[categoryKey] = Object.create(null);
2014-06-24 00:42:43 +02:00
}
var filterEntry = categoryBucket[tokenKey];
if ( filterEntry === undefined ) {
categoryBucket[tokenKey] = filter;
return;
}
2014-09-08 23:46:58 +02:00
if ( filterEntry.fid === '[]' ) {
2014-06-24 00:42:43 +02:00
filterEntry.add(filter);
return;
}
categoryBucket[tokenKey] = new FilterBucket(filterEntry, filter);
};
/******************************************************************************/
// Since the addition of the `important` evaluation, this means it is now
// likely that the url will have to be scanned more than once. So this is
// to ensure we do it once only, and reuse results.
FilterContainer.prototype.tokenize = function(url) {
var tokens = this.tokens;
2014-06-24 00:42:43 +02:00
var re = this.reAnyToken;
var matches, tokenEntry;
re.lastIndex = 0;
var i = 0;
while ( matches = re.exec(url) ) {
tokenEntry = tokens[i];
if ( tokenEntry === undefined ) {
tokenEntry = tokens[i] = new TokenEntry();
}
tokenEntry.beg = matches.index;
tokenEntry.token = matches[0];
i += 1;
}
// Sentinel
tokenEntry = tokens[i];
if ( tokenEntry === undefined ) {
tokenEntry = tokens[i] = new TokenEntry();
}
tokenEntry.token = '';
};
/******************************************************************************/
FilterContainer.prototype.matchTokens = function(bucket, url) {
2015-02-05 00:06:31 +01:00
// Hostname-only filters
var f = bucket['.'];
if ( f !== undefined && f.match() !== false ) {
return f;
}
var tokens = this.tokens;
2015-02-05 00:06:31 +01:00
var tokenEntry, token;
var i = 0;
for (;;) {
tokenEntry = tokens[i++];
token = tokenEntry.token;
if ( token === '' ) {
break;
}
f = bucket[token];
if ( f !== undefined && f.match(url, tokenEntry.beg) !== false ) {
return f;
2014-06-24 00:42:43 +02:00
}
}
2015-01-23 17:32:49 +01:00
// Regex-based filters
f = bucket['*'];
if ( f !== undefined && f.match(url) !== false ) {
return f;
}
2014-06-24 00:42:43 +02:00
return false;
};
/******************************************************************************/
// Specialized handlers
2014-07-30 03:10:00 +02:00
// https://github.com/gorhill/uBlock/issues/116
// Some type of requests are exceptional, they need custom handling,
// not the generic handling.
FilterContainer.prototype.matchStringExactType = function(context, requestURL, requestType) {
var url = requestURL.toLowerCase();
2015-02-05 00:06:31 +01:00
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = µb.URI.hostnameFromURI(requestURL);
var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty;
2015-01-21 01:39:13 +01:00
// Be prepared to support unknown types
2015-01-24 14:21:14 +01:00
var type = typeNameToTypeValue[requestType] || typeOtherValue;
var categories = this.categories;
2015-01-23 17:32:49 +01:00
var bf = false, bucket;
// Tokenize only once
this.tokenize(url);
2014-08-29 21:02:31 +02:00
// https://github.com/gorhill/uBlock/issues/139
// Test against important block filters
if ( bucket = categories[this.makeCategoryKey(BlockAnyParty | Important | type)] ) {
bf = this.matchTokens(bucket, url);
if ( bf !== false ) {
return 'sb:' + bf.toString();
}
}
if ( bucket = categories[this.makeCategoryKey(BlockAction | Important | type | party)] ) {
bf = this.matchTokens(bucket, url);
if ( bf !== false ) {
return 'sb:' + bf.toString();
}
2014-08-29 21:02:31 +02:00
}
// Test against block filters
if ( bucket = categories[this.makeCategoryKey(BlockAnyParty | type)] ) {
bf = this.matchTokens(bucket, url);
}
if ( bf === false ) {
if ( bucket = categories[this.makeCategoryKey(BlockAction | type | party)] ) {
bf = this.matchTokens(bucket, url);
}
}
2014-08-29 21:02:31 +02:00
// If there is no block filter, no need to test against allow filters
2014-08-28 15:59:05 +02:00
if ( bf === false ) {
2014-09-14 22:20:40 +02:00
return '';
}
// Test against allow filters
var af;
if ( bucket = categories[this.makeCategoryKey(AllowAnyParty | type)] ) {
af = this.matchTokens(bucket, url);
if ( af !== false ) {
return 'sa:' + af.toString();
}
}
if ( bucket = categories[this.makeCategoryKey(AllowAction | type | party)] ) {
af = this.matchTokens(bucket, url);
if ( af !== false ) {
return 'sa:' + af.toString();
}
}
return 'sb:' + bf.toString();
};
/******************************************************************************/
FilterContainer.prototype.matchString = function(context) {
2015-01-17 13:53:19 +01:00
// https://github.com/gorhill/uBlock/issues/519
// Use exact type match for anything beyond `other`
2015-01-21 01:39:13 +01:00
// Also, be prepared to support unknown types
2015-01-24 14:21:14 +01:00
var type = typeNameToTypeValue[context.requestType] || typeOtherValue;
2015-01-17 13:53:19 +01:00
if ( type > 8 << 4 ) {
return this.matchStringExactType(context, context.requestURL, context.requestType);
}
2014-06-24 00:42:43 +02:00
// https://github.com/gorhill/httpswitchboard/issues/239
// Convert url to lower case:
// `match-case` option not supported, but then, I saw only one
// occurrence of it in all the supported lists (bulgaria list).
var url = context.requestURL.toLowerCase();
2014-06-24 00:42:43 +02:00
// The logic here is simple:
//
// block = !whitelisted && blacklisted
// or equivalent
// allow = whitelisted || !blacklisted
2014-06-28 17:40:26 +02:00
// Statistically, hits on a URL in order of likelihood:
// 1. No hit
// 2. Hit on a block filter
// 3. Hit on an allow filter
//
// High likelihood of "no hit" means to optimize we need to reduce as much
// as possible the number of filters to test.
//
// Then, because of the order of probabilities, we should test only
// block filters first, and test allow filters if and only if there is a
2014-06-28 17:40:26 +02:00
// hit on a block filter. Since there is a high likelihood of no hit,
// testing allow filter by default is likely wasted work, hence allow
2014-06-28 17:41:49 +02:00
// filters are tested *only* if there is a (unlikely) hit on a block
// filter.
2014-06-24 00:42:43 +02:00
2015-02-05 00:06:31 +01:00
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = context.requestHostname;
2015-02-05 00:06:31 +01:00
var party = isFirstParty(context.pageDomain, context.requestHostname) ? FirstParty : ThirdParty;
var filterClasses = this.categories;
var bucket;
// Tokenize only once
this.tokenize(url);
2015-02-05 00:06:31 +01:00
var bf = false;
2015-01-21 14:59:23 +01:00
2014-08-29 21:02:31 +02:00
// https://github.com/gorhill/uBlock/issues/139
// Test against important block filters.
// The purpose of the `important` option is to reverse the order of
// evaluation. Normally, it is "evaluate block then evaluate allow", with
// the `important` property it is "evaluate allow then evaluate block".
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyTypeAnyParty | Important)] ) {
bf = this.matchTokens(bucket, url);
if ( bf !== false ) {
return 'sb:' + bf.toString() + '$important';
}
}
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyType | Important | party)] ) {
bf = this.matchTokens(bucket, url);
if ( bf !== false ) {
return 'sb:' + bf.toString() + '$important';
}
}
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyParty | Important | type)] ) {
bf = this.matchTokens(bucket, url);
if ( bf !== false ) {
return 'sb:' + bf.toString() + '$important';
}
}
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAction | Important | type | party)] ) {
bf = this.matchTokens(bucket, url);
if ( bf !== false ) {
return 'sb:' + bf.toString() + '$important';
}
2014-08-29 21:02:31 +02:00
}
2014-06-24 00:42:43 +02:00
// Test against block filters
2014-08-28 15:59:05 +02:00
if ( bf === false ) {
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyTypeAnyParty)] ) {
bf = this.matchTokens(bucket, url);
}
}
if ( bf === false ) {
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyType | party)] ) {
bf = this.matchTokens(bucket, url);
}
}
if ( bf === false ) {
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyParty | type)] ) {
bf = this.matchTokens(bucket, url);
}
}
if ( bf === false ) {
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(BlockAction | type | party)] ) {
bf = this.matchTokens(bucket, url);
}
2014-06-24 00:42:43 +02:00
}
// If there is no block filter, no need to test against allow filters
2014-08-28 15:59:05 +02:00
if ( bf === false ) {
2014-09-14 22:20:40 +02:00
return '';
2014-06-24 00:42:43 +02:00
}
// Test against allow filters
var af;
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(AllowAnyTypeAnyParty)] ) {
af = this.matchTokens(bucket, url);
if ( af !== false ) {
return 'sa:' + af.toString();
}
}
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(AllowAnyType | party)] ) {
af = this.matchTokens(bucket, url);
if ( af !== false ) {
return 'sa:' + af.toString();
}
}
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(AllowAnyParty | type)] ) {
af = this.matchTokens(bucket, url);
if ( af !== false ) {
return 'sa:' + af.toString();
}
}
2015-02-05 00:06:31 +01:00
if ( bucket = filterClasses[this.makeCategoryKey(AllowAction | type | party)] ) {
af = this.matchTokens(bucket, url);
if ( af !== false ) {
return 'sa:' + af.toString();
}
2014-06-24 00:42:43 +02:00
}
return 'sb:' + bf.toString();
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
return this.blockFilterCount + this.allowFilterCount;
};
/******************************************************************************/
return new FilterContainer();
/******************************************************************************/
})();