uBlock/src/js/static-net-filtering.js

2591 lines
74 KiB
JavaScript
Raw Normal View History

2014-06-24 00:42:43 +02:00
/*******************************************************************************
2016-06-27 03:15:18 +02:00
uBlock Origin - a browser extension to block requests.
Copyright (C) 2014-2016 Raymond Hill
2014-06-24 00:42:43 +02:00
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* jshint bitwise: false */
/* global punycode */
2016-06-27 03:15:18 +02:00
'use strict';
2014-06-24 00:42:43 +02:00
/******************************************************************************/
µBlock.staticNetFilteringEngine = (function(){
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2014-09-14 22:20:40 +02:00
var µb = µBlock;
2014-07-14 17:24:59 +02:00
// fedcba9876543210
2014-09-20 16:44:04 +02:00
// | | | |||
// | | | |||
// | | | |||
// | | | |||
// | | | ||+---- bit 0: [BlockAction | AllowAction]
// | | | |+---- bit 1: `important`
// | | | +---- bit 2-3: party [0 - 3]
// | | +---- bit 4-7: type [0 - 15]
// | +---- bit 8-15: unused
// +---- bit 15: never use! (to ensure valid unicode character)
2015-03-03 12:09:35 +01:00
var BlockAction = 0 << 0;
var AllowAction = 1 << 0;
2014-06-24 00:42:43 +02:00
2015-03-03 12:09:35 +01:00
var Important = 1 << 1;
2014-06-24 00:42:43 +02:00
2015-03-03 12:09:35 +01:00
var AnyParty = 0 << 2;
var FirstParty = 1 << 2;
var ThirdParty = 2 << 2;
2014-06-24 00:42:43 +02:00
2015-03-26 00:28:22 +01:00
var AnyType = 0 << 4;
2014-09-21 02:06:55 +02:00
var typeNameToTypeValue = {
2015-03-26 00:28:22 +01:00
'stylesheet': 1 << 4,
'image': 2 << 4,
'object': 3 << 4,
'script': 4 << 4,
'xmlhttprequest': 5 << 4,
'sub_frame': 6 << 4,
2015-04-05 16:38:47 +02:00
'font': 7 << 4,
2016-03-07 01:16:46 +01:00
'media': 8 << 4,
'websocket': 9 << 4,
'other': 10 << 4,
'popunder': 11 << 4,
2015-07-13 14:49:58 +02:00
'main_frame': 12 << 4,
2016-03-15 16:18:34 +01:00
'elemhide': 13 << 4,
2014-09-24 23:38:22 +02:00
'inline-script': 14 << 4,
'popup': 15 << 4
2014-09-21 02:06:55 +02:00
};
2015-01-24 14:21:14 +01:00
var typeOtherValue = typeNameToTypeValue.other;
2014-09-21 02:06:55 +02:00
var typeValueToTypeName = {
1: 'stylesheet',
2: 'image',
3: 'object',
4: 'script',
5: 'xmlhttprequest',
2015-07-13 14:49:58 +02:00
6: 'subdocument',
7: 'font',
2016-03-07 01:16:46 +01:00
8: 'media',
9: 'websocket',
10: 'other',
11: 'popunder',
2015-07-13 14:49:58 +02:00
12: 'document',
2016-03-15 16:18:34 +01:00
13: 'elemhide',
14: 'inline-script',
15: 'popup'
};
// All network request types to bitmap
// bring origin to 0 (from 4 -- see typeNameToTypeValue)
// left-shift 1 by the above-calculated value
2015-03-26 00:28:22 +01:00
// subtract 1 to set all type bits
var allNetRequestTypesBitmap = (1 << (typeOtherValue >>> 4)) - 1;
2015-03-03 12:09:35 +01:00
var BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
var BlockAnyType = BlockAction | AnyType;
var BlockAnyParty = BlockAction | AnyParty;
2014-06-24 00:42:43 +02:00
2015-03-03 12:09:35 +01:00
var AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
var AllowAnyType = AllowAction | AnyType;
var AllowAnyParty = AllowAction | AnyParty;
2014-06-24 00:42:43 +02:00
2014-09-19 16:59:44 +02:00
var reURLPostHostnameAnchors = /[\/?#]/;
2014-06-24 00:42:43 +02:00
// ABP filters: https://adblockplus.org/en/filters
// regex tester: http://regex101.com/
/******************************************************************************/
2015-02-05 00:06:31 +01:00
// See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation.
var pageHostnameRegister = '';
var requestHostnameRegister = '';
2015-03-30 23:42:12 +02:00
//var filterRegister = null;
//var categoryRegister = '';
2015-02-05 00:06:31 +01:00
/******************************************************************************/
var histogram = function() {};
2014-06-24 00:42:43 +02:00
/*
histogram = function(label, categories) {
2014-06-24 00:42:43 +02:00
var h = [],
categoryBucket;
for ( var k in categories ) {
// No need for hasOwnProperty() here: there is no prototype chain.
2014-06-24 00:42:43 +02:00
categoryBucket = categories[k];
for ( var kk in categoryBucket ) {
// No need for hasOwnProperty() here: there is no prototype chain.
2014-06-24 00:42:43 +02:00
filterBucket = categoryBucket[kk];
h.push({
k: k.charCodeAt(0).toString(2) + ' ' + kk,
2014-06-24 00:42:43 +02:00
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
});
}
}
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 2;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
}
}
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey=%s count=%d', v.k, v.n);
});
console.log('\tTotal buckets count: %d', total);
};
*/
2014-09-08 23:46:58 +02:00
/******************************************************************************/
2015-06-09 23:01:31 +02:00
// Local helpers
2014-09-08 23:46:58 +02:00
var cachedParseInt = parseInt;
var atoi = function(s) {
return cachedParseInt(s, 10);
};
var isFirstParty = function(domain, hostname) {
// Be sure to not confuse 'example.com' with 'anotherexample.com'
return hostname.endsWith(domain) &&
(hostname.length === domain.length ||
hostname.charAt(hostname.length - domain.length - 1) === '.');
};
2016-01-17 02:21:17 +01:00
var normalizeRegexSource = function(s) {
2015-10-26 16:23:56 +01:00
try {
2016-01-17 02:21:17 +01:00
var re = new RegExp(s);
return re.source;
2015-10-26 16:23:56 +01:00
} catch (ex) {
2016-01-17 02:21:17 +01:00
normalizeRegexSource.message = ex.toString();
2015-10-26 16:23:56 +01:00
}
2016-01-17 02:21:17 +01:00
return '';
2015-10-26 16:23:56 +01:00
};
2015-03-17 14:39:03 +01:00
var alwaysTruePseudoRegex = {
2015-03-26 20:16:48 +01:00
match: { '0': '', index: 0 },
exec: function(s) {
this.match['0'] = s;
return this.match;
},
2015-03-17 14:39:03 +01:00
test: function() {
return true;
}
};
2015-03-05 01:36:09 +01:00
var strToRegex = function(s, anchor, flags) {
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/1038
2015-03-17 14:39:03 +01:00
// Special case: always match.
if ( s === '*' ) {
return alwaysTruePseudoRegex;
}
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
2015-03-17 14:39:03 +01:00
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
var reStr = s.replace(/[.+?${}()|[\]\\]/g, '\\$&')
2015-12-14 06:54:41 +01:00
.replace(/\^/g, '(?:[^%.0-9a-z_-]|$)')
.replace(/\*/g, '[^ ]*?');
2015-03-17 14:39:03 +01:00
2015-03-05 01:36:09 +01:00
if ( anchor < 0 ) {
reStr = '^' + reStr;
} else if ( anchor > 0 ) {
2015-12-03 16:06:06 +01:00
reStr += '$';
2015-03-05 01:36:09 +01:00
}
2015-03-17 14:39:03 +01:00
2015-03-05 01:36:09 +01:00
//console.debug('µBlock.staticNetFilteringEngine: created RegExp("%s")', reStr);
return new RegExp(reStr, flags);
2015-03-02 16:41:51 +01:00
};
2015-06-09 23:16:28 +02:00
var toHex = function(n) {
return n.toString(16);
2015-06-09 23:01:31 +02:00
};
/******************************************************************************/
// Hostname test helpers: the optimal test function is picked
// according to the content of the `domain` filter option,
var hostnameTestPicker = function(owner) {
var domainOpt = owner.domainOpt;
// Only one hostname
if ( domainOpt.indexOf('|') === -1 ) {
2015-12-27 21:41:48 +01:00
if ( domainOpt.startsWith('~') ) {
owner._notHostname = domainOpt.slice(1);
return hostnameMissTest;
}
return hostnameHitTest;
}
// Multiple hostnames: use a dictionary.
var hostnames = domainOpt.split('|');
var i, hostname, dict;
// First find out whether we have a homogeneous dictionary
var hit = false, miss = false;
i = hostnames.length;
while ( i-- ) {
if ( hostnames[i].startsWith('~') ) {
miss = true;
if ( hit ) {
break;
}
} else {
hit = true;
if ( miss ) {
break;
}
}
}
// Heterogenous dictionary: this can happen, though VERY rarely.
// Spotted one occurrence in EasyList Lite (cjxlist.txt):
// domain=photobucket.com|~secure.photobucket.com
if ( hit && miss ) {
dict = owner._hostnameDict = new Map();
i = hostnames.length;
while ( i-- ) {
hostname = hostnames[i];
if ( hostname.startsWith('~') ) {
dict.set(hostname.slice(1), false);
} else {
dict.set(hostname, true);
}
}
return hostnameMixedSetTest;
}
// Homogeneous dictionary.
dict = owner._hostnameDict = new Set();
i = hostnames.length;
while ( i-- ) {
hostname = hostnames[i];
dict.add(hostname.startsWith('~') ? hostname.slice(1) : hostname);
}
return hit ? hostnameHitSetTest : hostnameMissSetTest;
};
var hostnameHitTest = function(owner) {
var current = pageHostnameRegister;
var target = owner.domainOpt;
return current.endsWith(target) &&
(current.length === target.length ||
current.charAt(current.length - target.length - 1) === '.');
};
var hostnameMissTest = function(owner) {
var current = pageHostnameRegister;
2015-12-27 21:41:48 +01:00
var target = owner._notHostname;
return current.endsWith(target) === false ||
(current.length !== target.length &&
current.charAt(current.length - target.length - 1) !== '.');
};
var hostnameHitSetTest = function(owner) {
var dict = owner._hostnameDict;
var needle = pageHostnameRegister;
var pos;
for (;;) {
if ( dict.has(needle) ) {
return true;
}
pos = needle.indexOf('.');
if ( pos === -1 ) {
break;
}
needle = needle.slice(pos + 1);
}
return false;
};
var hostnameMissSetTest = function(owner) {
var dict = owner._hostnameDict;
var needle = pageHostnameRegister;
var pos;
for (;;) {
if ( dict.has(needle) ) {
return false;
}
pos = needle.indexOf('.');
if ( pos === -1 ) {
break;
}
needle = needle.slice(pos + 1);
}
return true;
};
var hostnameMixedSetTest = function(owner) {
var dict = owner._hostnameDict;
var needle = pageHostnameRegister;
var hit = false;
var v, pos;
for (;;) {
v = dict.get(needle);
if ( v === false ) {
return false;
}
if ( v === true ) {
hit = true;
}
pos = needle.indexOf('.');
if ( pos === -1 ) {
break;
}
needle = needle.slice(pos + 1);
}
return hit;
};
2014-06-24 00:42:43 +02:00
/*******************************************************************************
Filters family tree:
- plain (no wildcard)
- anywhere
- no hostname
- specific hostname
- anchored at start
- no hostname
- specific hostname
- anchored at end
- no hostname
- specific hostname
2014-09-19 16:59:44 +02:00
- anchored within hostname
- no hostname
- specific hostname (not implemented)
2014-06-24 00:42:43 +02:00
2015-03-05 01:36:09 +01:00
- with wildcard(s)
2014-09-19 16:59:44 +02:00
- anchored within hostname
2014-06-24 00:42:43 +02:00
- no hostname
- specific hostname
2015-03-05 01:36:09 +01:00
- all else
2014-06-24 00:42:43 +02:00
- no hostname
- specific hostname
*/
/******************************************************************************/
var FilterPlain = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
};
FilterPlain.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - this.tokenBeg);
2014-06-24 00:42:43 +02:00
};
FilterPlain.fid =
FilterPlain.prototype.fid =
FilterPlain.prototype.rtfid = 'a';
2014-08-28 15:59:05 +02:00
FilterPlain.prototype.toSelfie =
FilterPlain.prototype.rtCompile = function() {
return this.s + '\t' + this.tokenBeg;
2014-09-08 23:46:58 +02:00
};
2015-02-24 00:31:29 +01:00
FilterPlain.compile = function(details) {
return details.f + '\t' + details.tokenBeg;
};
2014-09-08 23:46:58 +02:00
FilterPlain.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlain(s.slice(0, pos), atoi(s.slice(pos + 1)));
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
var FilterPlainHostname = function(s, tokenBeg, domainOpt) {
2014-06-24 00:42:43 +02:00
this.s = s;
this.tokenBeg = tokenBeg;
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainHostname.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - this.tokenBeg) &&
this.hostnameTest(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainHostname.fid =
FilterPlainHostname.prototype.fid =
FilterPlainHostname.prototype.rtfid = 'ah';
2014-09-08 23:46:58 +02:00
FilterPlainHostname.prototype.toSelfie =
FilterPlainHostname.prototype.rtCompile = function() {
return this.s + '\t' + this.tokenBeg + '\t' + this.domainOpt;
2014-09-08 23:46:58 +02:00
};
FilterPlainHostname.compile = function(details) {
return details.f + '\t' + details.tokenBeg + '\t' + details.domainOpt;
2015-02-24 00:31:29 +01:00
};
2014-09-08 23:46:58 +02:00
FilterPlainHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterPlainHostname(args[0], atoi(args[1]), args[2]);
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainPrefix0 = function(s) {
this.s = s;
};
FilterPlainPrefix0.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg);
2014-06-24 00:42:43 +02:00
};
FilterPlainPrefix0.fid =
FilterPlainPrefix0.prototype.fid =
FilterPlainPrefix0.prototype.rtfid = '0a';
2014-08-28 15:59:05 +02:00
FilterPlainPrefix0.prototype.toSelfie =
FilterPlainPrefix0.prototype.rtCompile = function() {
2014-09-08 23:46:58 +02:00
return this.s;
};
2015-02-24 00:31:29 +01:00
FilterPlainPrefix0.compile = function(details) {
return details.f;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix0.fromSelfie = function(s) {
return new FilterPlainPrefix0(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
var FilterPlainPrefix0Hostname = function(s, domainOpt) {
2014-06-24 00:42:43 +02:00
this.s = s;
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainPrefix0Hostname.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg) &&
this.hostnameTest(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainPrefix0Hostname.fid =
FilterPlainPrefix0Hostname.prototype.fid =
FilterPlainPrefix0Hostname.prototype.rtfid = '0ah';
2014-08-28 15:59:05 +02:00
FilterPlainPrefix0Hostname.prototype.toSelfie =
FilterPlainPrefix0Hostname.prototype.rtCompile = function() {
return this.s + '\t' + this.domainOpt;
2014-09-08 23:46:58 +02:00
};
FilterPlainPrefix0Hostname.compile = function(details) {
return details.f + '\t' + details.domainOpt;
2015-02-24 00:31:29 +01:00
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix0Hostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainPrefix0Hostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainPrefix1 = function(s) {
this.s = s;
};
FilterPlainPrefix1.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - 1);
2014-06-24 00:42:43 +02:00
};
FilterPlainPrefix1.fid =
FilterPlainPrefix1.prototype.fid =
FilterPlainPrefix1.prototype.rtfid = '1a';
2014-09-08 23:46:58 +02:00
FilterPlainPrefix1.prototype.toSelfie =
FilterPlainPrefix1.prototype.rtCompile = function() {
2014-09-08 23:46:58 +02:00
return this.s;
};
2015-02-24 00:31:29 +01:00
FilterPlainPrefix1.compile = function(details) {
return details.f;
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix1.fromSelfie = function(s) {
return new FilterPlainPrefix1(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
var FilterPlainPrefix1Hostname = function(s, domainOpt) {
2014-06-24 00:42:43 +02:00
this.s = s;
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainPrefix1Hostname.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - 1) &&
this.hostnameTest(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainPrefix1Hostname.fid =
FilterPlainPrefix1Hostname.prototype.fid =
FilterPlainPrefix1Hostname.prototype.rtfid = '1ah';
2014-08-28 15:59:05 +02:00
FilterPlainPrefix1Hostname.prototype.toSelfie =
FilterPlainPrefix1Hostname.prototype.rtCompile = function() {
return this.s + '\t' + this.domainOpt;
2014-09-08 23:46:58 +02:00
};
FilterPlainPrefix1Hostname.compile = function(details) {
return details.f + '\t' + details.domainOpt;
2015-02-24 00:31:29 +01:00
};
2014-09-08 23:46:58 +02:00
FilterPlainPrefix1Hostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainPrefix1Hostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainLeftAnchored = function(s) {
this.s = s;
};
FilterPlainLeftAnchored.prototype.match = function(url) {
return url.startsWith(this.s);
2014-06-24 00:42:43 +02:00
};
FilterPlainLeftAnchored.fid =
FilterPlainLeftAnchored.prototype.fid =
FilterPlainLeftAnchored.prototype.rtfid = '|a';
2014-08-28 15:59:05 +02:00
FilterPlainLeftAnchored.prototype.toSelfie =
FilterPlainLeftAnchored.prototype.rtCompile = function() {
2014-09-08 23:46:58 +02:00
return this.s;
};
2015-02-24 00:31:29 +01:00
FilterPlainLeftAnchored.compile = function(details) {
return details.f;
};
2014-09-08 23:46:58 +02:00
FilterPlainLeftAnchored.fromSelfie = function(s) {
return new FilterPlainLeftAnchored(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
var FilterPlainLeftAnchoredHostname = function(s, domainOpt) {
2014-06-24 00:42:43 +02:00
this.s = s;
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainLeftAnchoredHostname.prototype.match = function(url) {
return url.startsWith(this.s) &&
this.hostnameTest(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainLeftAnchoredHostname.fid =
FilterPlainLeftAnchoredHostname.prototype.fid =
FilterPlainLeftAnchoredHostname.prototype.rtfid = '|ah';
2014-08-28 15:59:05 +02:00
FilterPlainLeftAnchoredHostname.prototype.toSelfie =
FilterPlainLeftAnchoredHostname.prototype.rtCompile = function() {
return this.s + '\t' + this.domainOpt;
2014-09-08 23:46:58 +02:00
};
FilterPlainLeftAnchoredHostname.compile = function(details) {
return details.f + '\t' + details.domainOpt;
2015-02-24 00:31:29 +01:00
};
2014-09-08 23:46:58 +02:00
FilterPlainLeftAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainLeftAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterPlainRightAnchored = function(s) {
this.s = s;
};
FilterPlainRightAnchored.prototype.match = function(url) {
return url.endsWith(this.s);
2014-06-24 00:42:43 +02:00
};
FilterPlainRightAnchored.fid =
FilterPlainRightAnchored.prototype.fid =
FilterPlainRightAnchored.prototype.rtfid = 'a|';
2014-08-28 15:59:05 +02:00
FilterPlainRightAnchored.prototype.toSelfie =
FilterPlainRightAnchored.prototype.rtCompile = function() {
2014-09-08 23:46:58 +02:00
return this.s;
};
2015-02-24 00:31:29 +01:00
FilterPlainRightAnchored.compile = function(details) {
return details.f;
};
2014-09-08 23:46:58 +02:00
FilterPlainRightAnchored.fromSelfie = function(s) {
return new FilterPlainRightAnchored(s);
};
2014-08-28 15:59:05 +02:00
/******************************************************************************/
var FilterPlainRightAnchoredHostname = function(s, domainOpt) {
2014-06-24 00:42:43 +02:00
this.s = s;
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainRightAnchoredHostname.prototype.match = function(url) {
return url.endsWith(this.s) &&
this.hostnameTest(this);
2014-06-24 00:42:43 +02:00
};
FilterPlainRightAnchoredHostname.fid =
FilterPlainRightAnchoredHostname.prototype.fid =
FilterPlainRightAnchoredHostname.prototype.rtfid = 'a|h';
2014-08-28 15:59:05 +02:00
FilterPlainRightAnchoredHostname.prototype.toSelfie =
FilterPlainRightAnchoredHostname.prototype.rtCompile = function() {
return this.s + '\t' + this.domainOpt;
2014-09-08 23:46:58 +02:00
};
FilterPlainRightAnchoredHostname.compile = function(details) {
return details.f + '\t' + details.domainOpt;
2015-02-24 00:31:29 +01:00
};
2014-09-08 23:46:58 +02:00
FilterPlainRightAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainRightAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/235
2014-09-19 16:59:44 +02:00
// The filter is left-anchored somewhere within the hostname part of the URL.
var FilterPlainHnAnchored = function(s) {
this.s = s;
};
FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) {
if ( url.startsWith(this.s, tokenBeg) === false ) {
2014-09-19 16:59:44 +02:00
return false;
}
// Valid only if hostname-valid characters to the left of token
var pos = url.indexOf('://');
return pos !== -1 &&
reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false;
};
FilterPlainHnAnchored.fid =
FilterPlainHnAnchored.prototype.fid =
FilterPlainHnAnchored.prototype.rtfid = '||a';
2014-09-19 16:59:44 +02:00
FilterPlainHnAnchored.prototype.toSelfie =
FilterPlainHnAnchored.prototype.rtCompile = function() {
2014-09-19 16:59:44 +02:00
return this.s;
};
2015-02-24 00:31:29 +01:00
FilterPlainHnAnchored.compile = function(details) {
return details.f;
};
2014-09-19 16:59:44 +02:00
FilterPlainHnAnchored.fromSelfie = function(s) {
return new FilterPlainHnAnchored(s);
};
// https://www.youtube.com/watch?v=71YS6xDB-E4
// https://www.youtube.com/watch?v=qBPML7ton0E
2014-09-19 16:59:44 +02:00
/******************************************************************************/
2015-04-27 21:09:19 +02:00
// https://github.com/gorhill/uBlock/issues/142
var FilterPlainHnAnchoredHostname = function(s, domainOpt) {
2015-04-27 21:09:19 +02:00
this.s = s;
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2015-04-27 21:09:19 +02:00
};
FilterPlainHnAnchoredHostname.prototype.match = function(url, tokenBeg) {
if (
url.startsWith(this.s, tokenBeg) === false ||
this.hostnameTest(this) === false
) {
2015-04-27 21:09:19 +02:00
return false;
}
// Valid only if hostname-valid characters to the left of token
var pos = url.indexOf('://');
return pos !== -1 &&
reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false;
};
FilterPlainHnAnchoredHostname.fid =
FilterPlainHnAnchoredHostname.prototype.fid =
FilterPlainHnAnchoredHostname.prototype.rtfid = '||ah';
2015-04-27 21:09:19 +02:00
FilterPlainHnAnchoredHostname.prototype.toSelfie =
FilterPlainHnAnchoredHostname.prototype.rtCompile = function() {
return this.s + '\t' + this.domainOpt;
2015-04-27 21:09:19 +02:00
};
FilterPlainHnAnchoredHostname.compile = function(details) {
return details.f + '\t' + details.domainOpt;
2015-04-27 21:09:19 +02:00
};
FilterPlainHnAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainHnAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
/******************************************************************************/
2015-03-05 01:36:09 +01:00
// Generic filter
2014-09-08 23:46:58 +02:00
2015-03-05 01:36:09 +01:00
var FilterGeneric = function(s, anchor) {
this.s = s;
this.anchor = anchor;
this.re = null;
2014-06-24 00:42:43 +02:00
};
2015-03-05 01:36:09 +01:00
FilterGeneric.prototype.match = function(url) {
if ( this.re === null ) {
this.re = strToRegex(this.s, this.anchor);
}
return this.re.test(url);
2014-06-24 00:42:43 +02:00
};
FilterGeneric.fid =
FilterGeneric.prototype.fid =
FilterGeneric.prototype.rtfid = '_';
2014-09-08 23:46:58 +02:00
FilterGeneric.prototype.toSelfie =
FilterGeneric.prototype.rtCompile = function() {
2015-03-05 01:36:09 +01:00
return this.s + '\t' + this.anchor;
2014-09-08 23:46:58 +02:00
};
2015-03-05 01:36:09 +01:00
FilterGeneric.compile = function(details) {
return details.f + '\t' + details.anchor;
2015-02-24 00:31:29 +01:00
};
2015-03-05 01:36:09 +01:00
FilterGeneric.fromSelfie = function(s) {
2014-09-08 23:46:58 +02:00
var pos = s.indexOf('\t');
2015-03-05 01:36:09 +01:00
return new FilterGeneric(s.slice(0, pos), parseInt(s.slice(pos + 1), 10));
2014-09-08 23:46:58 +02:00
};
2014-06-24 00:42:43 +02:00
2014-09-08 23:46:58 +02:00
/******************************************************************************/
2015-03-05 01:36:09 +01:00
// Generic filter
2014-08-28 15:59:05 +02:00
var FilterGenericHostname = function(s, anchor, domainOpt) {
2015-03-05 01:36:09 +01:00
FilterGeneric.call(this, s, anchor);
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2014-06-24 00:42:43 +02:00
};
2015-03-05 01:36:09 +01:00
FilterGenericHostname.prototype = Object.create(FilterGeneric.prototype);
FilterGenericHostname.prototype.constructor = FilterGenericHostname;
2014-06-24 00:42:43 +02:00
2015-03-05 01:36:09 +01:00
FilterGenericHostname.prototype.match = function(url) {
return this.hostnameTest(this) &&
FilterGeneric.prototype.match.call(this, url);
2014-06-24 00:42:43 +02:00
};
FilterGenericHostname.fid =
FilterGenericHostname.prototype.fid =
FilterGenericHostname.prototype.rtfid = '_h';
2014-08-28 15:59:05 +02:00
FilterGenericHostname.prototype.toSelfie =
FilterGenericHostname.prototype.rtCompile = function() {
return FilterGeneric.prototype.toSelfie.call(this) + '\t' + this.domainOpt;
2014-09-08 23:46:58 +02:00
};
FilterGenericHostname.compile = function(details) {
return FilterGeneric.compile(details) + '\t' + details.domainOpt;
2015-02-24 00:31:29 +01:00
};
2015-03-05 01:36:09 +01:00
FilterGenericHostname.fromSelfie = function(s) {
var fields = s.split('\t');
return new FilterGenericHostname(fields[0], parseInt(fields[1], 10), fields[2]);
2014-09-08 23:46:58 +02:00
};
2014-06-24 00:42:43 +02:00
/******************************************************************************/
2015-03-02 16:41:51 +01:00
// Generic filter: hostname-anchored: it has that extra test to find out
// whether the start of the match falls within the hostname part of the
// URL.
var FilterGenericHnAnchored = function(s) {
this.s = s;
this.re = null;
};
FilterGenericHnAnchored.prototype.match = function(url) {
if ( this.re === null ) {
2015-03-05 01:36:09 +01:00
this.re = strToRegex(this.s, 0);
2015-03-02 16:41:51 +01:00
}
// Quick test first
if ( this.re.test(url) === false ) {
return false;
}
// Valid only if begininning of match is within the hostname
// part of the url
var match = this.re.exec(url);
var pos = url.indexOf('://');
return pos !== -1 &&
reURLPostHostnameAnchors.test(url.slice(pos + 3, match.index)) === false;
};
FilterGenericHnAnchored.fid =
FilterGenericHnAnchored.prototype.fid =
FilterGenericHnAnchored.prototype.rtfid = '||_';
2015-03-02 16:41:51 +01:00
FilterGenericHnAnchored.prototype.toSelfie =
FilterGenericHnAnchored.prototype.rtCompile = function() {
2015-03-02 16:41:51 +01:00
return this.s;
};
FilterGenericHnAnchored.compile = function(details) {
return details.f;
};
FilterGenericHnAnchored.fromSelfie = function(s) {
return new FilterGenericHnAnchored(s);
};
/******************************************************************************/
var FilterGenericHnAnchoredHostname = function(s, domainOpt) {
2015-03-02 22:22:23 +01:00
FilterGenericHnAnchored.call(this, s);
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2015-03-02 22:22:23 +01:00
};
FilterGenericHnAnchoredHostname.prototype = Object.create(FilterGenericHnAnchored.prototype);
2015-03-05 01:36:09 +01:00
FilterGenericHnAnchoredHostname.prototype.constructor = FilterGenericHnAnchoredHostname;
2015-03-02 22:22:23 +01:00
FilterGenericHnAnchoredHostname.prototype.match = function(url) {
return this.hostnameTest(this) &&
FilterGenericHnAnchored.prototype.match.call(this, url);
2015-03-02 22:22:23 +01:00
};
FilterGenericHnAnchoredHostname.fid =
FilterGenericHnAnchoredHostname.prototype.fid =
FilterGenericHnAnchoredHostname.prototype.rtfid = '||_h';
2015-03-02 22:22:23 +01:00
FilterGenericHnAnchoredHostname.prototype.toSelfie =
FilterGenericHnAnchoredHostname.prototype.rtCompile = function() {
return this.s + '\t' + this.domainOpt;
2015-03-02 22:22:23 +01:00
};
FilterGenericHnAnchoredHostname.compile = function(details) {
return details.f + '\t' + details.domainOpt;
2015-03-02 22:22:23 +01:00
};
FilterGenericHnAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterGenericHnAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
/******************************************************************************/
2015-01-23 17:32:49 +01:00
// Regex-based filters
var FilterRegex = function(s) {
this.re = new RegExp(s, 'i');
2015-01-23 17:32:49 +01:00
};
FilterRegex.prototype.match = function(url) {
return this.re.test(url);
};
FilterRegex.fid =
FilterRegex.prototype.fid =
FilterRegex.prototype.rtfid = '//';
2015-01-23 17:32:49 +01:00
FilterRegex.prototype.toSelfie =
FilterRegex.prototype.rtCompile = function() {
2015-01-23 17:32:49 +01:00
return this.re.source;
};
2015-02-24 00:31:29 +01:00
FilterRegex.compile = function(details) {
return details.f;
};
2015-01-23 17:32:49 +01:00
FilterRegex.fromSelfie = function(s) {
return new FilterRegex(s);
};
/******************************************************************************/
var FilterRegexHostname = function(s, domainOpt) {
this.re = new RegExp(s, 'i');
this.domainOpt = domainOpt;
this.hostnameTest = hostnameTestPicker(this);
2015-01-23 17:32:49 +01:00
};
FilterRegexHostname.prototype.match = function(url) {
// test hostname first, it's cheaper than evaluating a regex
return this.hostnameTest(this) &&
2015-01-23 17:32:49 +01:00
this.re.test(url);
};
FilterRegexHostname.fid =
FilterRegexHostname.prototype.fid =
FilterRegexHostname.prototype.rtfid = '//h';
2015-01-23 17:32:49 +01:00
FilterRegexHostname.prototype.toSelfie =
FilterRegexHostname.prototype.rtCompile = function() {
return this.re.source + '\t' + this.domainOpt;
2015-02-24 00:31:29 +01:00
};
FilterRegexHostname.compile = function(details) {
return details.f + '\t' + details.domainOpt;
2015-01-23 17:32:49 +01:00
};
FilterRegexHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterRegexHostname(s.slice(0, pos), s.slice(pos + 1));
};
/******************************************************************************/
2014-09-08 23:46:58 +02:00
/******************************************************************************/
2015-02-05 00:06:31 +01:00
// Dictionary of hostnames
2015-02-05 14:45:29 +01:00
//
// FilterHostnameDict is the main reason why uBlock is not equipped to keep
// track of which filter comes from which list, and also why it's not equipped
// to be able to disable a specific filter -- other than through using a
// counter-filter.
//
// On the other hand it is also *one* of the reason uBlock's memory and CPU
// footprint is smaller. Compacting huge list of hostnames into single strings
// saves a lot of memory compared to having one dictionary entry per hostname.
2015-02-05 00:06:31 +01:00
var FilterHostnameDict = function() {
this.h = ''; // short-lived register
this.dict = {};
this.count = 0;
};
// Somewhat arbitrary: I need to come up with hard data to know at which
// point binary search is better than indexOf.
//
// http://jsperf.com/string-indexof-vs-binary-search
// Tuning above performance benchmark, it appears 250 is roughly a good value
// for both Chromium/Firefox.
// Example of benchmark values: '------30', '-----100', etc. -- the
// needle string must always be 8-character long.
FilterHostnameDict.prototype.cutoff = 250;
// Probably not needed under normal circumstances.
FilterHostnameDict.prototype.meltBucket = function(len, bucket) {
var map = {};
if ( bucket.startsWith(' ') ) {
2015-02-05 00:06:31 +01:00
bucket.trim().split(' ').map(function(k) {
map[k] = true;
});
} else {
var offset = 0;
while ( offset < bucket.length ) {
2015-04-27 00:31:51 +02:00
map[bucket.substr(offset, len)] = true;
2015-02-05 00:06:31 +01:00
offset += len;
}
}
return map;
};
2015-04-27 00:31:51 +02:00
FilterHostnameDict.prototype.freezeBucket = function(bucket) {
var hostnames = Object.keys(bucket);
if ( hostnames[0].length * hostnames.length < this.cutoff ) {
return ' ' + hostnames.join(' ') + ' ';
}
return hostnames.sort().join('');
};
2015-02-05 14:45:29 +01:00
// How the key is derived dictates the number and size of buckets:
// - more bits = more buckets = higher memory footprint
// - less bits = less buckets = lower memory footprint
// - binary search mitigates very well the fact that some buckets may grow
// large when fewer bits are used (or when a large number of items are
// stored). Binary search also mitigate to the point of non-issue the
// CPU footprint requirement with large buckets, as far as reference
// benchmark shows.
2015-02-05 00:06:31 +01:00
//
// A hash key capable of better spread while being as fast would be
// just great.
FilterHostnameDict.prototype.makeKey = function(hn) {
var len = hn.length;
if ( len > 255 ) {
len = 255;
}
var i8 = len >>> 3;
var i4 = len >>> 2;
var i2 = len >>> 1;
// http://jsperf.com/makekey-concat-vs-join/3
// Be sure the msb is not set, this will guarantee a valid unicode
// character (because 0xD800-0xDFFF).
return String.fromCharCode(
(hn.charCodeAt( i8) & 0x01) << 14 |
// (hn.charCodeAt( i4 ) & 0x01) << 13 |
(hn.charCodeAt( i4+i8) & 0x01) << 12 |
(hn.charCodeAt(i2 ) & 0x01) << 11 |
(hn.charCodeAt(i2 +i8) & 0x01) << 10 |
// (hn.charCodeAt(i2+i4 ) & 0x01) << 9 |
(hn.charCodeAt(i2+i4+i8) & 0x01) << 8 ,
len
);
};
FilterHostnameDict.prototype.add = function(hn) {
var key = this.makeKey(hn);
var bucket = this.dict[key];
if ( bucket === undefined ) {
bucket = this.dict[key] = {};
bucket[hn] = true;
this.count += 1;
return true;
}
if ( typeof bucket === 'string' ) {
2015-04-27 00:31:51 +02:00
bucket = this.dict[key] = this.meltBucket(hn.length, bucket);
2015-02-05 00:06:31 +01:00
}
2015-02-24 00:31:29 +01:00
if ( bucket.hasOwnProperty(hn) ) {
return false;
2015-02-05 00:06:31 +01:00
}
2015-02-24 00:31:29 +01:00
bucket[hn] = true;
this.count += 1;
return true;
2015-02-05 00:06:31 +01:00
};
FilterHostnameDict.prototype.freeze = function() {
var buckets = this.dict;
2015-04-27 00:31:51 +02:00
var bucket;
2015-02-05 00:06:31 +01:00
for ( var key in buckets ) {
bucket = buckets[key];
2015-04-27 02:33:49 +02:00
if ( typeof bucket === 'object' ) {
buckets[key] = this.freezeBucket(bucket);
2015-02-05 00:06:31 +01:00
}
}
};
FilterHostnameDict.prototype.matchesExactly = function(hn) {
// TODO: Handle IP address
var key = this.makeKey(hn);
var bucket = this.dict[key];
if ( bucket === undefined ) {
return false;
}
if ( typeof bucket === 'object' ) {
2015-04-27 00:31:51 +02:00
bucket = this.dict[key] = this.freezeBucket(bucket);
2015-02-05 00:06:31 +01:00
}
if ( bucket.startsWith(' ') ) {
2015-02-05 00:06:31 +01:00
return bucket.indexOf(' ' + hn + ' ') !== -1;
}
// binary search
var len = hn.length;
var left = 0;
// http://jsperf.com/or-vs-floor/17
var right = (bucket.length / len + 0.5) | 0;
var i, needle;
while ( left < right ) {
i = left + right >> 1;
needle = bucket.substr( len * i, len );
if ( hn < needle ) {
right = i;
} else if ( hn > needle ) {
left = i + 1;
} else {
return true;
}
}
return false;
};
2015-02-24 00:31:29 +01:00
FilterHostnameDict.prototype.match = function() {
2015-02-05 00:06:31 +01:00
// TODO: mind IP addresses
var pos,
hostname = requestHostnameRegister;
while ( this.matchesExactly(hostname) === false ) {
pos = hostname.indexOf('.');
if ( pos === -1 ) {
this.h = '';
return false;
}
hostname = hostname.slice(pos + 1);
}
this.h = hostname;
2015-02-05 00:06:31 +01:00
return this;
};
FilterHostnameDict.fid =
FilterHostnameDict.prototype.fid = '{h}';
FilterHostnameDict.rtfid = '.';
2015-02-05 00:06:31 +01:00
FilterHostnameDict.prototype.rtCompile = function() {
2015-02-05 00:06:31 +01:00
return this.h;
};
FilterHostnameDict.prototype.toSelfie = function() {
return JSON.stringify({
count: this.count,
dict: this.dict
});
};
FilterHostnameDict.fromSelfie = function(s) {
var f = new FilterHostnameDict();
var o = JSON.parse(s);
f.count = o.count;
f.dict = o.dict;
return f;
};
/******************************************************************************/
/******************************************************************************/
// Some buckets can grow quite large, and finding a hit in these buckets
// may end up being expensive. After considering various solutions, the one
// retained is to promote hit filters to a smaller index, so that next time
// they can be looked-up faster.
2014-09-19 16:59:44 +02:00
// key= 10000 ad count=660
// key= 10000 ads count=433
// key= 10001 google count=277
// key=1000000 2mdn count=267
// key= 10000 social count=240
// key= 10001 pagead2 count=166
// key= 10000 twitter count=122
// key= 10000 doubleclick count=118
// key= 10000 facebook count=114
// key= 10000 share count=113
// key= 10000 google count=106
// key= 10001 code count=103
// key= 11000 doubleclick count=100
// key=1010001 g count=100
// key= 10001 js count= 89
// key= 10000 adv count= 88
// key= 10000 youtube count= 61
// key= 10000 plugins count= 60
// key= 10001 partner count= 59
// key= 10000 ico count= 57
// key= 110001 ssl count= 57
// key= 10000 banner count= 53
// key= 10000 footer count= 51
// key= 10000 rss count= 51
2014-09-19 16:59:44 +02:00
/******************************************************************************/
2014-09-08 23:46:58 +02:00
var FilterBucket = function(a, b) {
this.promoted = 0;
this.vip = 16;
2014-10-06 20:02:44 +02:00
this.f = null; // short-lived register
2014-09-08 23:46:58 +02:00
this.filters = [];
if ( a !== undefined ) {
this.filters[0] = a;
if ( b !== undefined ) {
this.filters[1] = b;
}
}
Object.defineProperty(this, 'rtfid', {
get: function() {
return this.f.rtfid;
}
});
2014-09-08 23:46:58 +02:00
};
FilterBucket.prototype.add = function(a) {
this.filters.push(a);
};
// Promote hit filters so they can be found faster next time.
FilterBucket.prototype.promote = function(i) {
var filters = this.filters;
var pivot = filters.length >>> 1;
while ( i < pivot ) {
pivot >>>= 1;
if ( pivot < this.vip ) {
break;
}
}
if ( i <= pivot ) {
return;
}
var j = this.promoted % pivot;
//console.debug('FilterBucket.promote(): promoted %d to %d', i, j);
var f = filters[j];
filters[j] = filters[i];
filters[i] = f;
this.promoted += 1;
};
2014-09-08 23:46:58 +02:00
FilterBucket.prototype.match = function(url, tokenBeg) {
var filters = this.filters;
var n = filters.length;
for ( var i = 0; i < n; i++ ) {
if ( filters[i].match(url, tokenBeg) ) {
2014-09-08 23:46:58 +02:00
this.f = filters[i];
if ( i >= this.vip ) {
this.promote(i);
}
2014-09-08 23:46:58 +02:00
return true;
}
}
return false;
};
FilterBucket.prototype.fid = '[]';
FilterBucket.prototype.toSelfie = function() {
return this.filters.length.toString();
};
// Not supposed to be called without a valid filter hit.
FilterBucket.prototype.rtCompile = function() {
return this.f.rtCompile();
};
2014-09-08 23:46:58 +02:00
FilterBucket.fromSelfie = function() {
return new FilterBucket();
};
/******************************************************************************/
2014-06-24 00:42:43 +02:00
/******************************************************************************/
var FilterParser = function() {
2016-03-12 07:25:02 +01:00
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
this.reHostnameRule2 = /^\**[0-9a-z][0-9a-z.-]*[0-9a-z]\^?$/i;
2015-12-13 18:55:55 +01:00
this.reCleanupHostnameRule2 = /^\**|\^$/g;
2015-01-23 17:32:49 +01:00
this.reHasWildcard = /[\^\*]/;
2015-12-13 17:03:13 +01:00
this.reCanTrimCarets1 = /^[^*]*$/;
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
2015-01-23 17:32:49 +01:00
this.reHasUppercase = /[A-Z]/;
2015-12-13 18:55:55 +01:00
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
2015-02-27 00:08:42 +01:00
this.reHasUnicode = /[^\x00-\x7F]/;
this.domainOpt = '';
2014-08-28 15:59:05 +02:00
this.reset();
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.toNormalizedType = {
'stylesheet': 'stylesheet',
'image': 'image',
'object': 'object',
'object-subrequest': 'object',
'script': 'script',
'xmlhttprequest': 'xmlhttprequest',
'subdocument': 'sub_frame',
2015-04-05 16:38:47 +02:00
'font': 'font',
2016-03-07 01:16:46 +01:00
'media': 'media',
'websocket': 'websocket',
2014-07-14 17:24:59 +02:00
'other': 'other',
'popunder': 'popunder',
2015-07-13 14:49:58 +02:00
'document': 'main_frame',
2016-03-15 16:18:34 +01:00
'elemhide': 'elemhide',
2014-09-24 23:38:22 +02:00
'inline-script': 'inline-script',
2014-07-14 17:24:59 +02:00
'popup': 'popup'
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.reset = function() {
this.action = BlockAction;
this.anchor = 0;
this.elemHiding = false;
this.f = '';
this.firstParty = false;
this.fopts = '';
2014-09-19 16:59:44 +02:00
this.hostnameAnchored = false;
this.hostnamePure = false;
this.domainOpt = '';
2015-01-23 17:32:49 +01:00
this.isRegex = false;
this.raw = '';
this.redirect = false;
2014-06-24 00:42:43 +02:00
this.thirdParty = false;
2015-12-04 03:24:37 +01:00
this.token = '*';
2015-01-23 17:32:49 +01:00
this.tokenBeg = 0;
this.types = 0;
2014-08-29 21:02:31 +02:00
this.important = 0;
2014-06-24 00:42:43 +02:00
this.unsupported = false;
return this;
};
/******************************************************************************/
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/589
// Be ready to handle multiple negated types
2014-06-24 00:42:43 +02:00
FilterParser.prototype.parseOptType = function(raw, not) {
2015-03-26 00:28:22 +01:00
var typeBit = 1 << ((typeNameToTypeValue[this.toNormalizedType[raw]] >>> 4) - 1);
if ( !not ) {
2015-03-26 00:28:22 +01:00
this.types |= typeBit;
return;
2014-06-24 00:42:43 +02:00
}
2015-02-08 04:20:24 +01:00
// Negated type: set all valid network request type bits to 1
2016-03-15 16:18:34 +01:00
if (
(typeBit & allNetRequestTypesBitmap) !== 0 &&
(this.types & allNetRequestTypesBitmap) === 0
) {
this.types |= allNetRequestTypesBitmap;
}
2016-03-15 16:18:34 +01:00
this.types &= ~typeBit;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterParser.prototype.parseOptParty = function(firstParty, not) {
if ( firstParty ) {
not = !not;
}
2014-06-24 00:42:43 +02:00
if ( not ) {
this.firstParty = true;
} else {
this.thirdParty = true;
}
};
/******************************************************************************/
2015-01-23 17:32:49 +01:00
FilterParser.prototype.parseOptions = function(s) {
this.fopts = s;
var opts = s.split(',');
var opt, not;
for ( var i = 0; i < opts.length; i++ ) {
opt = opts[i];
not = opt.startsWith('~');
2014-06-24 00:42:43 +02:00
if ( not ) {
2015-01-23 17:32:49 +01:00
opt = opt.slice(1);
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
if ( opt === 'third-party' ) {
this.parseOptParty(false, not);
2015-01-23 17:32:49 +01:00
continue;
}
// https://issues.adblockplus.org/ticket/616
2015-10-05 17:04:36 +02:00
// `generichide` concept already supported, just a matter of
// adding support for the new keyword.
if ( opt === 'elemhide' || opt === 'generichide' ) {
if ( this.action === AllowAction ) {
2015-03-26 00:28:22 +01:00
this.parseOptType('elemhide', false);
continue;
}
this.unsupported = true;
break;
2015-01-23 17:32:49 +01:00
}
2015-07-13 14:49:58 +02:00
if ( opt === 'document' ) {
if ( this.action === BlockAction ) {
this.parseOptType('document', not);
continue;
}
this.unsupported = true;
break;
}
2015-01-23 17:32:49 +01:00
if ( this.toNormalizedType.hasOwnProperty(opt) ) {
this.parseOptType(opt, not);
continue;
}
if ( opt.startsWith('domain=') ) {
this.domainOpt = opt.slice(7);
2015-01-23 17:32:49 +01:00
continue;
}
if ( opt === 'important' ) {
this.important = Important;
continue;
2014-06-24 00:42:43 +02:00
}
if ( opt === 'first-party' ) {
this.parseOptParty(true, not);
continue;
}
if ( opt.startsWith('redirect=') ) {
2015-11-24 05:34:03 +01:00
if ( this.action === BlockAction ) {
this.redirect = true;
continue;
}
this.unsupported = true;
break;
}
// Used by Adguard, purpose is unclear -- just ignore for now.
if ( opt === 'empty' ) {
continue;
}
2015-11-24 05:34:03 +01:00
// Unrecognized filter option: ignore whole filter.
2015-01-23 17:32:49 +01:00
this.unsupported = true;
break;
2014-06-24 00:42:43 +02:00
}
};
/******************************************************************************/
2015-02-27 00:08:42 +01:00
FilterParser.prototype.parse = function(raw) {
2014-06-24 00:42:43 +02:00
// important!
this.reset();
var s = this.raw = raw;
2015-02-27 00:08:42 +01:00
2015-12-13 18:55:55 +01:00
// plain hostname? (from HOSTS file)
if ( this.reHostnameRule1.test(s) ) {
2014-09-19 16:59:44 +02:00
this.f = s;
this.hostnamePure = this.hostnameAnchored = true;
return this;
}
2014-06-24 00:42:43 +02:00
// element hiding filter?
2015-01-23 17:32:49 +01:00
var pos = s.indexOf('#');
if ( pos !== -1 ) {
var c = s.charAt(pos + 1);
if ( c === '#' || c === '@' ) {
console.error('static-net-filtering.js > unexpected cosmetic filters');
this.elemHiding = true;
return this;
}
}
// block or allow filter?
// Important: this must be executed before parsing options
if ( s.startsWith('@@') ) {
this.action = AllowAction;
s = s.slice(2);
}
2015-01-23 17:32:49 +01:00
// options
2015-11-06 16:49:09 +01:00
// https://github.com/gorhill/uBlock/issues/842
// - ensure sure we are not dealing with a regex-based filter.
// - lookup the last occurrence of `$`.
if ( s.startsWith('/') === false || s.endsWith('/') === false ) {
2015-11-06 16:49:09 +01:00
pos = s.lastIndexOf('$');
if ( pos !== -1 ) {
// https://github.com/gorhill/uBlock/issues/952
// Discard Adguard-specific `$$` filters.
if ( s.indexOf('$$') !== -1 ) {
this.unsupported = true;
return this;
}
2015-11-06 16:49:09 +01:00
this.parseOptions(s.slice(pos + 1));
s = s.slice(0, pos);
}
2014-06-24 00:42:43 +02:00
}
2015-01-23 17:32:49 +01:00
// regex?
if ( s.startsWith('/') && s.endsWith('/') && s.length > 2 ) {
2015-01-23 17:32:49 +01:00
this.isRegex = true;
this.f = s.slice(1, -1);
2016-01-17 02:21:17 +01:00
// https://github.com/gorhill/uBlock/issues/1246
// If the filter is valid, use the corrected version of the source
// string -- this ensure reverse-lookup will work fine.
this.f = normalizeRegexSource(this.f);
if ( this.f === '' ) {
2015-10-26 16:23:56 +01:00
console.error(
"uBlock Origin> discarding bad regular expression-based network filter '%s': '%s'",
raw,
2016-01-17 02:21:17 +01:00
normalizeRegexSource.message
2015-10-26 16:23:56 +01:00
);
this.unsupported = true;
}
2014-09-08 23:46:58 +02:00
return this;
}
2015-02-27 00:08:42 +01:00
// hostname-anchored
if ( s.startsWith('||') ) {
2014-09-19 16:59:44 +02:00
this.hostnameAnchored = true;
2015-12-13 18:55:55 +01:00
s = s.slice(2);
2015-02-27 00:08:42 +01:00
// convert hostname to punycode if needed
if ( this.reHasUnicode.test(s) ) {
var matches = this.reIsolateHostname.exec(s);
2015-12-13 18:55:55 +01:00
if ( matches ) {
s = matches[1] + punycode.toASCII(matches[2]) + matches[3];
2015-02-27 00:08:42 +01:00
//console.debug('µBlock.staticNetFilteringEngine/FilterParser.parse():', raw, '=', s);
}
}
2015-03-26 20:16:48 +01:00
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/1096
if ( s.startsWith('^') ) {
2015-03-26 20:16:48 +01:00
this.unsupported = true;
return this;
}
2015-12-13 18:55:55 +01:00
// plain hostname? (from ABP filter list)
2016-06-27 03:15:18 +02:00
// https://github.com/gorhill/uBlock/issues/1757
// A filter can't be a pure-hostname one if there is a domain option
// present.
if ( this.domainOpt === '' && this.reHostnameRule2.test(s) ) {
2015-12-13 18:55:55 +01:00
this.f = s.replace(this.reCleanupHostnameRule2, '');
this.hostnamePure = true;
return this;
}
2014-06-24 00:42:43 +02:00
}
// left-anchored
if ( s.startsWith('|') ) {
2014-06-24 00:42:43 +02:00
this.anchor = -1;
s = s.slice(1);
}
// right-anchored
if ( s.endsWith('|') ) {
2014-06-24 00:42:43 +02:00
this.anchor = 1;
s = s.slice(0, -1);
}
// normalize placeholders
2015-01-23 17:32:49 +01:00
if ( this.reHasWildcard.test(s) ) {
// remove pointless leading *
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
// Keep the leading asterisk if we are dealing with a hostname-anchored
// filter, this will ensure the generic filter implementation is
// used.
if ( s.startsWith('*') && this.hostnameAnchored === false ) {
s = s.replace(/^\*+([^%0-9a-z])/, '$1');
}
// remove pointless trailing *
if ( s.endsWith('*') ) {
s = s.replace(/([^%0-9a-z])\*+$/, '$1');
}
2015-01-23 17:32:49 +01:00
}
2014-09-19 16:59:44 +02:00
// nothing left?
if ( s === '' ) {
2015-03-17 14:39:03 +01:00
s = '*';
}
2015-12-11 12:36:28 +01:00
// https://github.com/gorhill/uBlock/issues/1047
// Hostname-anchored makes no sense if matching all requests.
if ( s === '*' ) {
this.hostnameAnchored = false;
}
2015-01-23 17:32:49 +01:00
// This might look weird but we gain memory footprint by not going through
// toLowerCase(), at least on Chromium. Because copy-on-write?
2014-06-24 00:42:43 +02:00
2015-01-23 17:32:49 +01:00
this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s;
return this;
};
/******************************************************************************/
2015-03-02 16:41:51 +01:00
// Given a string, find a good token. Tokens which are too generic, i.e. very
// common with a high probability of ending up as a miss, are not
// good. Avoid if possible. This has a *significant* positive impact on
// performance.
// These "bad tokens" are collated manually.
2015-12-04 03:24:37 +01:00
// Hostname-anchored with no wildcard always have a token index of 0.
2015-03-02 16:41:51 +01:00
var reHostnameToken = /^[0-9a-z]+/g;
var reGoodToken = /[%0-9a-z]{2,}/g;
var badTokens = {
'com': true,
'http': true,
'https': true,
'icon': true,
'images': true,
'img': true,
'js': true,
'net': true,
'news': true,
'www': true
};
var findFirstGoodToken = function(s) {
reGoodToken.lastIndex = 0;
2015-12-03 16:06:06 +01:00
var matches, lpos;
2015-12-04 03:24:37 +01:00
var badTokenMatch = null;
while ( (matches = reGoodToken.exec(s)) ) {
2015-12-03 16:06:06 +01:00
// https://github.com/gorhill/uBlock/issues/997
// Ignore token if preceded by wildcard.
lpos = matches.index;
if ( lpos !== 0 && s.charAt(lpos - 1) === '*' ) {
continue;
}
2015-03-02 22:22:23 +01:00
if ( s.charAt(reGoodToken.lastIndex) === '*' ) {
continue;
}
2015-03-02 16:41:51 +01:00
if ( badTokens.hasOwnProperty(matches[0]) ) {
2015-12-04 03:24:37 +01:00
if ( badTokenMatch === null ) {
badTokenMatch = matches;
}
2015-03-02 16:41:51 +01:00
continue;
}
2015-03-02 22:22:23 +01:00
return matches;
}
2015-12-04 03:24:37 +01:00
return badTokenMatch;
2015-03-02 16:41:51 +01:00
};
var findHostnameToken = function(s) {
reHostnameToken.lastIndex = 0;
return reHostnameToken.exec(s);
};
2015-03-02 22:22:23 +01:00
/******************************************************************************/
2015-01-23 17:32:49 +01:00
FilterParser.prototype.makeToken = function() {
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/1038
2015-12-04 03:24:37 +01:00
// Single asterisk will match any URL.
if ( this.isRegex || this.f === '*' ) {
2015-03-17 14:39:03 +01:00
return;
}
2015-12-04 03:24:37 +01:00
var matches = this.hostnameAnchored && this.f.indexOf('*') === -1 ?
findHostnameToken(this.f) :
findFirstGoodToken(this.f);
2015-01-23 17:32:49 +01:00
2015-12-04 03:24:37 +01:00
if ( matches !== null && matches[0].length !== 0 ) {
this.token = matches[0];
2015-01-23 17:32:49 +01:00
this.tokenBeg = matches.index;
}
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
/******************************************************************************/
var FilterContainer = function() {
this.reIsGeneric = /[\^\*]/;
2014-06-24 00:42:43 +02:00
this.filterParser = new FilterParser();
this.urlTokenizer = µb.urlTokenizer;
2014-07-20 21:00:26 +02:00
this.reset();
};
/******************************************************************************/
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
this.frozen = false;
2014-06-24 00:42:43 +02:00
this.processedFilterCount = 0;
this.acceptedCount = 0;
2014-09-08 23:46:58 +02:00
this.rejectedCount = 0;
2014-06-24 00:42:43 +02:00
this.allowFilterCount = 0;
this.blockFilterCount = 0;
2016-03-17 18:56:21 +01:00
this.discardedCount = 0;
2015-02-24 00:31:29 +01:00
this.duplicateBuster = {};
this.categories = Object.create(null);
2014-07-20 21:00:26 +02:00
this.filterParser.reset();
2015-03-05 01:36:09 +01:00
this.filterCounts = {};
// Runtime registers
this.keyRegister = undefined;
this.tokenRegister = undefined;
this.fRegister = null;
2014-07-20 21:00:26 +02:00
};
2014-06-24 00:42:43 +02:00
2014-07-20 21:00:26 +02:00
/******************************************************************************/
2014-06-24 00:42:43 +02:00
2014-07-20 21:00:26 +02:00
FilterContainer.prototype.freeze = function() {
histogram('allFilters', this.categories);
2015-02-24 00:31:29 +01:00
this.duplicateBuster = {};
2015-02-05 00:06:31 +01:00
var categories = this.categories;
var bucket;
for ( var k in categories ) {
bucket = categories[k]['.'];
if ( bucket !== undefined ) {
bucket.freeze();
}
}
2015-02-24 00:31:29 +01:00
2014-07-20 21:00:26 +02:00
this.filterParser.reset();
this.frozen = true;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.factories = {
2015-04-27 21:09:19 +02:00
'[]': FilterBucket,
'a': FilterPlain,
'ah': FilterPlainHostname,
'0a': FilterPlainPrefix0,
'0ah': FilterPlainPrefix0Hostname,
'1a': FilterPlainPrefix1,
'1ah': FilterPlainPrefix1Hostname,
'|a': FilterPlainLeftAnchored,
'|ah': FilterPlainLeftAnchoredHostname,
'a|': FilterPlainRightAnchored,
'a|h': FilterPlainRightAnchoredHostname,
'||a': FilterPlainHnAnchored,
'||ah': FilterPlainHnAnchoredHostname,
'//': FilterRegex,
'//h': FilterRegexHostname,
'{h}': FilterHostnameDict,
'_': FilterGeneric,
'_h': FilterGenericHostname,
'||_': FilterGenericHnAnchored,
'||_h': FilterGenericHnAnchoredHostname
2015-02-24 00:31:29 +01:00
};
/******************************************************************************/
2014-09-08 23:46:58 +02:00
FilterContainer.prototype.toSelfie = function() {
var categoryToSelfie = function(dict) {
var selfie = [];
var bucket, ff, n, i, f;
for ( var token in dict ) {
// No need for hasOwnProperty() here: there is no prototype chain.
selfie.push('k2\t' + token);
bucket = dict[token];
2014-09-08 23:46:58 +02:00
selfie.push(bucket.fid + '\t' + bucket.toSelfie());
if ( bucket.fid !== '[]' ) {
continue;
}
ff = bucket.filters;
n = ff.length;
for ( i = 0; i < n; i++ ) {
f = ff[i];
selfie.push(f.fid + '\t' + f.toSelfie());
}
}
return selfie.join('\n');
};
var categoriesToSelfie = function(dict) {
var selfie = [];
for ( var key in dict ) {
// No need for hasOwnProperty() here: there is no prototype chain.
selfie.push('k1\t' + key);
selfie.push(categoryToSelfie(dict[key]));
2014-09-08 23:46:58 +02:00
}
return selfie.join('\n');
};
return {
processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount,
rejectedCount: this.rejectedCount,
allowFilterCount: this.allowFilterCount,
blockFilterCount: this.blockFilterCount,
2016-03-17 18:56:21 +01:00
discardedCount: this.discardedCount,
2015-02-05 00:06:31 +01:00
categories: categoriesToSelfie(this.categories)
2014-09-08 23:46:58 +02:00
};
};
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) {
this.frozen = true;
this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount;
this.rejectedCount = selfie.rejectedCount;
this.allowFilterCount = selfie.allowFilterCount;
this.blockFilterCount = selfie.blockFilterCount;
2016-03-17 18:56:21 +01:00
this.discardedCount = selfie.discardedCount;
2014-09-08 23:46:58 +02:00
var catKey, tokenKey;
var dict = this.categories, subdict;
var bucket = null;
var rawText = selfie.categories;
var rawEnd = rawText.length;
var lineBeg = 0, lineEnd;
var line, pos, what, factory;
while ( lineBeg < rawEnd ) {
lineEnd = rawText.indexOf('\n', lineBeg);
if ( lineEnd < 0 ) {
lineEnd = rawEnd;
}
line = rawText.slice(lineBeg, lineEnd);
lineBeg = lineEnd + 1;
pos = line.indexOf('\t');
what = line.slice(0, pos);
if ( what === 'k1' ) {
catKey = line.slice(pos + 1);
subdict = dict[catKey] = Object.create(null);
2014-09-08 23:46:58 +02:00
bucket = null;
continue;
}
if ( what === 'k2' ) {
tokenKey = line.slice(pos + 1);
2014-09-08 23:46:58 +02:00
bucket = null;
continue;
}
2015-02-24 00:31:29 +01:00
factory = this.factories[what];
2014-09-08 23:46:58 +02:00
if ( bucket === null ) {
bucket = subdict[tokenKey] = factory.fromSelfie(line.slice(pos + 1));
continue;
}
// When token key is reused, it can't be anything
// else than FilterBucket
bucket.add(factory.fromSelfie(line.slice(pos + 1)));
}
};
/******************************************************************************/
FilterContainer.prototype.getFilterClass = function(details) {
var s = details.f;
if ( details.domainOpt.length !== 0 ) {
if ( details.isRegex ) {
return FilterRegexHostname;
}
if ( this.reIsGeneric.test(s) ) {
if ( details.hostnameAnchored ) {
return FilterGenericHnAnchoredHostname;
}
return FilterGenericHostname;
}
if ( details.anchor < 0 ) {
return FilterPlainLeftAnchoredHostname;
}
if ( details.anchor > 0 ) {
return FilterPlainRightAnchoredHostname;
}
if ( details.hostnameAnchored ) {
return FilterPlainHnAnchoredHostname;
}
if ( details.tokenBeg === 0 ) {
return FilterPlainPrefix0Hostname;
}
if ( details.tokenBeg === 1 ) {
return FilterPlainPrefix1Hostname;
}
return FilterPlainHostname;
}
if ( details.isRegex ) {
return FilterRegex;
}
if ( this.reIsGeneric.test(s) ) {
if ( details.hostnameAnchored ) {
return FilterGenericHnAnchored;
}
return FilterGeneric;
}
if ( details.anchor < 0 ) {
return FilterPlainLeftAnchored;
}
if ( details.anchor > 0 ) {
return FilterPlainRightAnchored;
}
if ( details.hostnameAnchored ) {
return FilterPlainHnAnchored;
}
if ( details.tokenBeg === 0 ) {
return FilterPlainPrefix0;
}
if ( details.tokenBeg === 1 ) {
return FilterPlainPrefix1;
}
return FilterPlain;
};
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.compile = function(raw, out) {
2014-06-24 00:42:43 +02:00
// ORDER OF TESTS IS IMPORTANT!
// Ignore empty lines
2015-02-01 00:34:46 +01:00
var s = raw.trim();
2015-01-23 17:32:49 +01:00
if ( s.length === 0 ) {
2014-06-24 00:42:43 +02:00
return false;
}
var parsed = this.filterParser.parse(s);
2015-01-23 17:32:49 +01:00
// Ignore element-hiding filters
if ( parsed.elemHiding ) {
2014-09-08 23:46:58 +02:00
return false;
}
2015-01-23 17:32:49 +01:00
// Ignore filters with unsupported options
if ( parsed.unsupported ) {
2015-02-01 00:34:46 +01:00
//console.log('static-net-filtering.js > FilterContainer.add(): unsupported filter "%s"', raw);
2014-06-24 00:42:43 +02:00
return false;
}
2014-09-19 16:59:44 +02:00
// Pure hostnames, use more efficient liquid dict
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/665
2015-02-05 00:06:31 +01:00
// Create a dict keyed on request type etc.
2015-02-24 00:31:29 +01:00
if ( parsed.hostnamePure && this.compileHostnameOnlyFilter(parsed, out) ) {
2015-02-05 00:06:31 +01:00
return true;
2014-06-24 00:42:43 +02:00
}
2015-02-24 00:31:29 +01:00
var r = this.compileFilter(parsed, out);
2014-06-24 00:42:43 +02:00
if ( r === false ) {
return false;
}
return true;
};
/******************************************************************************/
2015-02-05 00:06:31 +01:00
// Using fast/compact dictionary when filter is a (or portion of) pure hostname.
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) {
2015-02-05 00:06:31 +01:00
// Can't fit the filter in a pure hostname dictionary.
2016-06-27 03:15:18 +02:00
// https://github.com/gorhill/uBlock/issues/1757
2016-06-27 03:16:54 +02:00
// This should no longer happen with fix to above issue.
2016-06-27 03:15:18 +02:00
//if ( parsed.domainOpt.length !== 0 ) {
// return;
//}
2015-02-05 00:06:31 +01:00
var party = AnyParty;
if ( parsed.firstParty !== parsed.thirdParty ) {
party = parsed.firstParty ? FirstParty : ThirdParty;
}
var keyShard = parsed.action | parsed.important | party;
2015-03-26 00:28:22 +01:00
var type = parsed.types;
if ( type === 0 ) {
out.push(
'n\v' +
2015-06-09 23:01:31 +02:00
toHex(keyShard) + '\v' +
2015-03-26 00:28:22 +01:00
'.\v' +
parsed.f
);
return true;
}
2015-02-05 00:06:31 +01:00
var bitOffset = 1;
2015-03-26 00:28:22 +01:00
do {
2015-02-05 00:06:31 +01:00
if ( type & 1 ) {
2015-02-24 00:31:29 +01:00
out.push(
'n\v' +
2015-06-09 23:01:31 +02:00
toHex(keyShard | (bitOffset << 4)) + '\v' +
2015-02-24 00:31:29 +01:00
'.\v' +
parsed.f
);
2015-02-05 00:06:31 +01:00
}
bitOffset += 1;
type >>>= 1;
2015-03-26 00:28:22 +01:00
} while ( type !== 0 );
2015-02-05 00:06:31 +01:00
return true;
};
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.compileFilter = function(parsed, out) {
2015-01-23 17:32:49 +01:00
parsed.makeToken();
2015-12-04 03:24:37 +01:00
if ( parsed.token === '*' && parsed.hostnameAnchored ) {
console.error('FilterContainer.compileFilter("%s"): invalid filter', parsed.f);
2014-06-24 00:42:43 +02:00
return false;
}
2014-08-28 00:39:08 +02:00
2014-09-25 19:26:29 +02:00
var party = AnyParty;
if ( parsed.firstParty !== parsed.thirdParty ) {
party = parsed.firstParty ? FirstParty : ThirdParty;
}
var filterClass = this.getFilterClass(parsed);
if ( filterClass === null ) {
return false;
2015-01-23 17:32:49 +01:00
}
this.compileToAtomicFilter(filterClass, parsed, party, out);
2014-06-24 00:42:43 +02:00
return true;
};
/******************************************************************************/
FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, party, out) {
2014-08-29 21:02:31 +02:00
var bits = parsed.action | parsed.important | party;
2015-03-26 00:28:22 +01:00
var type = parsed.types;
if ( type === 0 ) {
out.push(
'n\v' +
2015-06-09 23:01:31 +02:00
toHex(bits) + '\v' +
2015-03-26 00:28:22 +01:00
parsed.token + '\v' +
filterClass.fid + '\v' +
filterClass.compile(parsed)
2015-03-26 00:28:22 +01:00
);
return;
}
2015-02-05 00:06:31 +01:00
var bitOffset = 1;
2015-03-26 00:28:22 +01:00
do {
2015-02-05 00:06:31 +01:00
if ( type & 1 ) {
2015-02-24 00:31:29 +01:00
out.push(
'n\v' +
2015-06-09 23:01:31 +02:00
toHex(bits | (bitOffset << 4)) + '\v' +
2015-02-24 00:31:29 +01:00
parsed.token + '\v' +
filterClass.fid + '\v' +
filterClass.compile(parsed)
2015-02-24 00:31:29 +01:00
);
}
bitOffset += 1;
type >>>= 1;
2015-03-26 00:28:22 +01:00
} while ( type !== 0 );
// Only static filter with an explicit type can be redirected. If we reach
// this point, it's because there is one or more explicit type.
if ( !parsed.redirect ) {
return;
}
var redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
2015-11-25 16:05:23 +01:00
if ( Array.isArray(redirects) === false ) {
return;
}
var i = redirects.length;
while ( i-- ) {
out.push('n\v\v\v=>\v' + redirects[i]);
}
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
2015-02-24 00:31:29 +01:00
FilterContainer.prototype.fromCompiledContent = function(text, lineBeg) {
var lineEnd;
var textEnd = text.length;
var line, fields, bucket, entry, factory, filter;
while ( lineBeg < textEnd ) {
if ( text.charCodeAt(lineBeg) !== 0x6E /* 'n' */ ) {
2015-02-24 00:31:29 +01:00
return lineBeg;
}
lineEnd = text.indexOf('\n', lineBeg);
if ( lineEnd === -1 ) {
lineEnd = textEnd;
}
line = text.slice(lineBeg + 2, lineEnd);
fields = line.split('\v');
lineBeg = lineEnd + 1;
// Special cases: delegate to more specialized engines.
// Redirect engine.
if ( fields[2] === '=>' ) {
µb.redirectEngine.fromCompiledRule(fields[3]);
continue;
}
// Plain static filters.
2015-02-24 00:31:29 +01:00
this.acceptedCount += 1;
bucket = this.categories[fields[0]];
if ( bucket === undefined ) {
bucket = this.categories[fields[0]] = Object.create(null);
}
entry = bucket[fields[1]];
if ( fields[1] === '.' ) {
if ( entry === undefined ) {
entry = bucket['.'] = new FilterHostnameDict();
}
if ( entry.add(fields[2]) === false ) {
2016-03-17 18:56:21 +01:00
this.discardedCount += 1;
2015-02-24 00:31:29 +01:00
}
continue;
}
if ( this.duplicateBuster.hasOwnProperty(line) ) {
2016-03-17 18:56:21 +01:00
this.discardedCount += 1;
2015-02-24 00:31:29 +01:00
continue;
}
this.duplicateBuster[line] = true;
factory = this.factories[fields[2]];
2015-03-05 01:36:09 +01:00
2015-03-06 03:17:09 +01:00
// For development purpose
2015-03-05 01:36:09 +01:00
//if ( this.filterCounts.hasOwnProperty(fields[2]) === false ) {
// this.filterCounts[fields[2]] = 1;
//} else {
// this.filterCounts[fields[2]]++;
//}
2015-02-24 00:31:29 +01:00
filter = factory.fromSelfie(fields[3]);
if ( entry === undefined ) {
bucket[fields[1]] = filter;
continue;
}
if ( entry.fid === '[]' ) {
entry.add(filter);
continue;
}
bucket[fields[1]] = new FilterBucket(entry, filter);
2014-06-24 00:42:43 +02:00
}
2015-02-24 00:31:29 +01:00
return textEnd;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterContainer.prototype.filterStringFromCompiled = function(compiled) {
var opts = [];
var vfields = compiled.split('\v');
var filter = '';
var bits = parseInt(vfields[0], 16) | 0;
if ( bits & 0x01 ) {
filter += '@@';
}
var rfid = vfields[1] === '.' ? '.' : vfields[2];
var tfields = rfid !== '.' ? vfields[3].split('\t') : [];
switch ( rfid ) {
case '.':
filter += '||' + vfields[2] + '^';
break;
case 'a':
case 'ah':
case '0a':
case '0ah':
case '1a':
case '1ah':
case '_':
case '_h':
filter += tfields[0];
break;
case '|a':
case '|ah':
filter += '|' + tfields[0];
break;
case 'a|':
case 'a|h':
filter += tfields[0] + '|';
break;
case '||a':
case '||ah':
case '||_':
case '||_h':
filter += '||' + tfields[0];
break;
case '//':
case '//h':
filter += '/' + tfields[0] + '/';
break;
default:
break;
}
// Domain option?
switch ( rfid ) {
case '0ah':
case '1ah':
case '|ah':
case 'a|h':
case '||ah':
case '||_h':
case '//h':
opts.push('domain=' + tfields[1]);
break;
case 'ah':
case '_h':
opts.push('domain=' + tfields[2]);
break;
default:
break;
}
// Filter options
if ( bits & 0x02 ) {
opts.push('important');
}
if ( bits & 0x08 ) {
opts.push('third-party');
} else if ( bits & 0x04 ) {
opts.push('first-party');
}
if ( bits & 0xF0 ) {
opts.push(typeValueToTypeName[bits >>> 4]);
}
if ( opts.length !== 0 ) {
filter += '$' + opts.join(',');
}
return filter;
};
/******************************************************************************/
FilterContainer.prototype.filterRegexFromCompiled = function(compiled, flags) {
var vfields = compiled.split('\v');
var rfid = vfields[1] === '.' ? '.' : vfields[2];
var tfields = rfid !== '.' ? vfields[3].split('\t') : [];
var re = null;
switch ( rfid ) {
case '.':
re = strToRegex(vfields[2], 0, flags);
break;
case 'a':
case 'ah':
case '0a':
case '0ah':
case '1a':
case '1ah':
case '_':
case '_h':
case '||a':
case '||ah':
case '||_':
case '||_h':
re = strToRegex(tfields[0], 0, flags);
break;
case '|a':
case '|ah':
re = strToRegex(tfields[0], -1, flags);
break;
case 'a|':
case 'a|h':
re = strToRegex(tfields[0], 1, flags);
break;
case '//':
case '//h':
re = new RegExp(tfields[0]);
break;
default:
break;
}
return re;
};
/******************************************************************************/
FilterContainer.prototype.matchTokens = function(bucket, url) {
2015-02-05 00:06:31 +01:00
// Hostname-only filters
var f = bucket['.'];
if ( f !== undefined && f.match() ) {
this.tokenRegister = '.';
this.fRegister = f;
return true;
2015-02-05 00:06:31 +01:00
}
var tokens = this.urlTokenizer.getTokens();
2015-02-05 00:06:31 +01:00
var tokenEntry, token;
var i = 0;
for (;;) {
tokenEntry = tokens[i++];
token = tokenEntry.token;
if ( token === '' ) {
break;
}
f = bucket[token];
if ( f !== undefined && f.match(url, tokenEntry.beg) ) {
this.tokenRegister = token;
this.fRegister = f;
return true;
2014-06-24 00:42:43 +02:00
}
}
2015-01-23 17:32:49 +01:00
// Regex-based filters
f = bucket['*'];
if ( f !== undefined && f.match(url) ) {
this.tokenRegister = '*';
this.fRegister = f;
return true;
2015-01-23 17:32:49 +01:00
}
2014-06-24 00:42:43 +02:00
return false;
};
/******************************************************************************/
// Specialized handlers
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/116
2014-07-30 03:10:00 +02:00
// Some type of requests are exceptional, they need custom handling,
// not the generic handling.
FilterContainer.prototype.matchStringExactType = function(context, requestURL, requestType) {
2015-01-21 01:39:13 +01:00
// Be prepared to support unknown types
2015-03-26 00:28:22 +01:00
var type = typeNameToTypeValue[requestType] || 0;
if ( type === 0 ) {
return undefined;
2015-03-26 00:28:22 +01:00
}
// Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(requestURL);
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = µb.URI.hostnameFromURI(url);
var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty;
this.fRegister = null;
2015-06-09 23:01:31 +02:00
var categories = this.categories;
var key, bucket;
2016-03-15 16:18:34 +01:00
// https://github.com/gorhill/uBlock/issues/1477
// Special case: blocking elemhide filter ALWAYS exists, it is implicit --
// thus we always and only check for exception filters.
if ( requestType === 'elemhide' ) {
key = AllowAnyParty | type;
if (
(bucket = categories[toHex(key)]) &&
this.matchTokens(bucket, url)
) {
this.keyRegister = key;
return false;
}
return undefined;
}
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/139
2014-08-29 21:02:31 +02:00
// Test against important block filters
2015-06-19 13:35:03 +02:00
key = BlockAnyParty | Important | type;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return true;
}
}
2015-06-19 13:35:03 +02:00
key = BlockAction | Important | type | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return true;
}
2014-08-29 21:02:31 +02:00
}
// Test against block filters
key = BlockAnyParty | type;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
key = BlockAction | type | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
}
// If there is no block filter, no need to test against allow filters
if ( this.fRegister === null ) {
return undefined;
}
// Test against allow filters
2015-06-19 13:35:03 +02:00
key = AllowAnyParty | type;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return false;
}
}
2015-06-19 13:35:03 +02:00
key = AllowAction | type | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return false;
}
}
return true;
};
/******************************************************************************/
FilterContainer.prototype.matchString = function(context) {
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/519
2015-01-17 13:53:19 +01:00
// Use exact type match for anything beyond `other`
2015-01-21 01:39:13 +01:00
// Also, be prepared to support unknown types
2015-01-24 14:21:14 +01:00
var type = typeNameToTypeValue[context.requestType] || typeOtherValue;
2015-03-27 18:00:55 +01:00
if ( type > typeOtherValue ) {
2015-01-17 13:53:19 +01:00
return this.matchStringExactType(context, context.requestURL, context.requestType);
}
2014-06-24 00:42:43 +02:00
// The logic here is simple:
//
// block = !whitelisted && blacklisted
// or equivalent
// allow = whitelisted || !blacklisted
2014-06-28 17:40:26 +02:00
// Statistically, hits on a URL in order of likelihood:
// 1. No hit
// 2. Hit on a block filter
// 3. Hit on an allow filter
//
// High likelihood of "no hit" means to optimize we need to reduce as much
// as possible the number of filters to test.
//
// Then, because of the order of probabilities, we should test only
// block filters first, and test allow filters if and only if there is a
2014-06-28 17:40:26 +02:00
// hit on a block filter. Since there is a high likelihood of no hit,
// testing allow filter by default is likely wasted work, hence allow
2014-06-28 17:41:49 +02:00
// filters are tested *only* if there is a (unlikely) hit on a block
// filter.
2014-06-24 00:42:43 +02:00
// Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(context.requestURL);
2015-02-05 00:06:31 +01:00
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = context.requestHostname;
this.fRegister = null;
2015-01-21 14:59:23 +01:00
2015-06-09 23:01:31 +02:00
var party = isFirstParty(context.pageDomain, context.requestHostname) ? FirstParty : ThirdParty;
var categories = this.categories;
var key, bucket;
2015-04-07 03:26:05 +02:00
// https://github.com/chrisaljoudi/uBlock/issues/139
2014-08-29 21:02:31 +02:00
// Test against important block filters.
// The purpose of the `important` option is to reverse the order of
// evaluation. Normally, it is "evaluate block then evaluate allow", with
// the `important` property it is "evaluate allow then evaluate block".
2015-06-19 13:35:03 +02:00
key = BlockAnyTypeAnyParty | Important;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return true;
}
}
2015-06-19 13:35:03 +02:00
key = BlockAnyType | Important | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return true;
}
}
2015-06-19 13:35:03 +02:00
key = BlockAnyParty | Important | type;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return true;
}
}
2015-06-19 13:35:03 +02:00
key = BlockAction | Important | type | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return true;
}
2014-08-29 21:02:31 +02:00
}
2014-06-24 00:42:43 +02:00
// Test against block filters
2015-06-19 13:35:03 +02:00
key = BlockAnyTypeAnyParty;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
2015-06-19 13:35:03 +02:00
key = BlockAnyType | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
2015-06-19 13:35:03 +02:00
key = BlockAnyParty | type;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
2015-06-19 13:35:03 +02:00
key = BlockAction | type | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
}
}
}
}
2014-06-24 00:42:43 +02:00
}
// If there is no block filter, no need to test against allow filters
if ( this.fRegister === null ) {
return undefined;
2014-06-24 00:42:43 +02:00
}
// Test against allow filters
2015-06-19 13:35:03 +02:00
key = AllowAnyTypeAnyParty;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return false;
}
}
2015-06-19 13:35:03 +02:00
key = AllowAnyType | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return false;
}
}
2015-06-19 13:35:03 +02:00
key = AllowAnyParty | type;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return false;
}
}
2015-06-19 13:35:03 +02:00
key = AllowAction | type | party;
if ( (bucket = categories[toHex(key)]) ) {
if ( this.matchTokens(bucket, url) ) {
2015-06-09 23:01:31 +02:00
this.keyRegister = key;
return false;
}
2014-06-24 00:42:43 +02:00
}
return true;
};
/******************************************************************************/
// The `verbose` argment tells whether to return a short or long version of
// the filter string. Typically, if the logger is not enabled, there is no
// point in returning the long version: this saves overhead.
FilterContainer.prototype.toResultString = function(verbose) {
if ( this.fRegister === null ) {
return '';
}
2015-06-19 13:35:03 +02:00
var s = this.keyRegister & 0x01 ? 'sa:' : 'sb:';
if ( !verbose ) {
return s;
}
2015-06-19 13:35:03 +02:00
s += toHex(this.keyRegister) + '\v' + this.tokenRegister + '\v';
if ( this.tokenRegister === '.' ) {
s += this.fRegister.rtCompile();
} else {
s += this.fRegister.rtfid + '\v' + this.fRegister.rtCompile();
}
return s;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
2016-03-17 18:56:21 +01:00
return this.acceptedCount - this.discardedCount;
2014-06-24 00:42:43 +02:00
};
/******************************************************************************/
return new FilterContainer();
/******************************************************************************/
})();