uBlock/src/js/static-net-filtering.js

2730 lines
79 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2014-2017 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* jshint bitwise: false */
/* global punycode */
'use strict';
/******************************************************************************/
µBlock.staticNetFilteringEngine = (function(){
/******************************************************************************/
var µb = µBlock;
// fedcba9876543210
// | | |||
// | | |||
// | | |||
// | | |||
// | | ||+---- bit 0: [BlockAction | AllowAction]
// | | |+---- bit 1: `important`
// | | +---- bit 2-3: party [0 - 3]
// | +---- bit 4-8: type [0 - 31]
// +---- bit 9-15: unused
var BlockAction = 0 << 0;
var AllowAction = 1 << 0;
var Important = 1 << 1;
var AnyParty = 0 << 2;
var FirstParty = 1 << 2;
var ThirdParty = 2 << 2;
var AnyType = 0 << 4;
var typeNameToTypeValue = {
'no_type': 0 << 4,
'stylesheet': 1 << 4,
'image': 2 << 4,
'object': 3 << 4,
'script': 4 << 4,
'xmlhttprequest': 5 << 4,
'sub_frame': 6 << 4,
'font': 7 << 4,
'media': 8 << 4,
'websocket': 9 << 4,
'other': 10 << 4,
'popup': 11 << 4, // start of behavorial filtering
'popunder': 12 << 4,
'main_frame': 13 << 4, // start of 1st-party-only behavorial filtering
'generichide': 14 << 4,
'inline-script': 15 << 4,
'data': 16 << 4 // special: a generic data holder
};
var otherTypeBitValue = typeNameToTypeValue.other;
var typeValueToTypeName = {
1: 'stylesheet',
2: 'image',
3: 'object',
4: 'script',
5: 'xmlhttprequest',
6: 'subdocument',
7: 'font',
8: 'media',
9: 'websocket',
10: 'other',
11: 'popup',
12: 'popunder',
13: 'document',
14: 'generichide',
15: 'inline-script',
16: 'data'
};
// All network request types to bitmap
// bring origin to 0 (from 4 -- see typeNameToTypeValue)
// left-shift 1 by the above-calculated value
// subtract 1 to set all type bits
var allNetRequestTypesBitmap = (1 << (otherTypeBitValue >>> 4)) - 1;
var BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
var BlockAnyType = BlockAction | AnyType;
var BlockAnyParty = BlockAction | AnyParty;
var AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
var AllowAnyType = AllowAction | AnyType;
var AllowAnyParty = AllowAction | AnyParty;
var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide,
genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important;
// ABP filters: https://adblockplus.org/en/filters
// regex tester: http://regex101.com/
/******************************************************************************/
// See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation.
var pageHostnameRegister = '',
requestHostnameRegister = '';
//var filterRegister = null;
//var categoryRegister = '';
/******************************************************************************/
var histogram = function() {};
/*
histogram = function(label, categories) {
var h = [],
categoryBucket;
for ( var k in categories ) {
// No need for hasOwnProperty() here: there is no prototype chain.
categoryBucket = categories[k];
for ( var kk in categoryBucket ) {
// No need for hasOwnProperty() here: there is no prototype chain.
filterBucket = categoryBucket[kk];
h.push({
k: k.charCodeAt(0).toString(2) + ' ' + kk,
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
});
}
}
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 2;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
}
}
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey=%s count=%d', v.k, v.n);
});
console.log('\tTotal buckets count: %d', total);
};
*/
/******************************************************************************/
// Local helpers
// Be sure to not confuse 'example.com' with 'anotherexample.com'
var isFirstParty = function(domain, hostname) {
return hostname.endsWith(domain) &&
(hostname.length === domain.length ||
hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */);
};
var normalizeRegexSource = function(s) {
try {
var re = new RegExp(s);
return re.source;
} catch (ex) {
normalizeRegexSource.message = ex.toString();
}
return '';
};
var rawToRegexStr = function(s, anchor) {
var me = rawToRegexStr;
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
// Also: remove leading/trailing wildcards -- there is no point.
var reStr = s.replace(me.escape1, '\\$&')
.replace(me.escape2, '(?:[^%.0-9a-z_-]|$)')
.replace(me.escape3, '')
.replace(me.escape4, '[^ ]*?');
if ( anchor & 0x4 ) {
reStr = '[0-9a-z.-]*?' + reStr;
} else if ( anchor & 0x2 ) {
reStr = '^' + reStr;
}
if ( anchor & 0x1 ) {
reStr += '$';
}
return reStr;
};
rawToRegexStr.escape1 = /[.+?${}()|[\]\\]/g;
rawToRegexStr.escape2 = /\^/g;
rawToRegexStr.escape3 = /^\*|\*$/g;
rawToRegexStr.escape4 = /\*/g;
// If using native Map, we use numerical keys, otherwise for
// Object-based map we use string-based keys.
var exportMapKey = function(k) {
return k.toString(32);
};
var importMapKey = function(k) {
return parseInt(k,32);
};
var toLogDataInternal = function(key, token, filter) {
if ( filter === null ) { return undefined; }
var logData = filter.logData();
logData.compiled = exportMapKey(key) + '\v' +
token + '\v' +
logData.compiled;
if ( key & 0x001 ) {
logData.raw = '@@' + logData.raw;
}
var opts = [];
if ( key & 0x002 ) {
opts.push('important');
}
if ( key & 0x008 ) {
opts.push('third-party');
} else if ( key & 0x004 ) {
opts.push('first-party');
}
var type = (key >>> 4) & 0x1F;
if ( type !== 0 && type !== 16 /* data */ ) {
opts.push(typeValueToTypeName[type]);
}
if ( logData.opts !== undefined ) {
opts.push(logData.opts);
}
if ( opts.length !== 0 ) {
logData.raw += '$' + opts.join(',');
}
return logData;
};
// First character of match must be within the hostname part of the url.
var isHnAnchored = function(url, matchStart) {
var hnStart = url.indexOf('://');
if ( hnStart === -1 ) { return false; }
hnStart += 3;
if ( matchStart <= hnStart ) { return true; }
if ( reURLPostHostnameAnchors.test(url.slice(hnStart, matchStart)) ) {
return false;
}
// https://github.com/gorhill/uBlock/issues/1929
// Match only hostname label boundaries.
return url.charCodeAt(matchStart - 1) === 0x2E;
};
var reURLPostHostnameAnchors = /[\/?#]/;
/*******************************************************************************
Each filter class will register itself in the map. A filter class
id MUST always stringify to ONE single character.
IMPORTANT: any change which modifies the mapping will have to be
reflected with µBlock.systemSettings.compiledMagic.
**/
var filterClasses = new Map(),
filterClassIdGenerator = 0;
var registerFilterClass = function(ctor) {
var fid = filterClassIdGenerator++;
ctor.fidPrefix = ctor.prototype.fidPrefix = fid.toString(32) + '\t';
filterClasses.set(fid, ctor);
//console.log(ctor.name, fid);
};
/******************************************************************************/
var FilterTrue = function() {
};
FilterTrue.prototype.match = function() {
return true;
};
FilterTrue.prototype.logData = function() {
return {
raw: '*',
regex: '^',
compiled: this.compile(),
};
};
FilterTrue.prototype.compile = function() {
return this.fidPrefix;
};
FilterTrue.compile = function() {
return FilterTrue.fidPrefix;
};
FilterTrue.load = function() {
return new FilterTrue();
};
registerFilterClass(FilterTrue);
/******************************************************************************/
var FilterPlain = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
};
FilterPlain.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - this.tokenBeg);
};
FilterPlain.prototype.logData = function() {
return {
raw: this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
};
};
FilterPlain.prototype.compile = function() {
return this.fidPrefix + this.s + '\t' + this.tokenBeg;
};
FilterPlain.compile = function(details) {
return FilterPlain.fidPrefix + details.f + '\t' + details.tokenBeg;
};
FilterPlain.load = function(s) {
var pos = s.indexOf('\t', 2);
return new FilterPlain(
s.slice(2, pos),
parseInt(s.slice(pos + 1), 10)
);
};
registerFilterClass(FilterPlain);
/******************************************************************************/
var FilterPlainPrefix0 = function(s) {
this.s = s;
};
FilterPlainPrefix0.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg);
};
FilterPlainPrefix0.prototype.logData = function() {
return {
raw: this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
};
};
FilterPlainPrefix0.prototype.compile = function() {
return this.fidPrefix + this.s;
};
FilterPlainPrefix0.compile = function(details) {
return FilterPlainPrefix0.fidPrefix + details.f;
};
FilterPlainPrefix0.load = function(s) {
return new FilterPlainPrefix0(s.slice(2));
};
registerFilterClass(FilterPlainPrefix0);
/******************************************************************************/
var FilterPlainPrefix1 = function(s) {
this.s = s;
};
FilterPlainPrefix1.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - 1);
};
FilterPlainPrefix1.prototype.logData = function() {
return {
raw: this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
};
};
FilterPlainPrefix1.prototype.compile = function() {
return this.fidPrefix + this.s;
};
FilterPlainPrefix1.compile = function(details) {
return FilterPlainPrefix1.fidPrefix + details.f;
};
FilterPlainPrefix1.load = function(s) {
return new FilterPlainPrefix1(s.slice(2));
};
registerFilterClass(FilterPlainPrefix1);
/******************************************************************************/
var FilterPlainHostname = function(s) {
this.s = s;
};
FilterPlainHostname.prototype.match = function() {
var haystack = requestHostnameRegister, needle = this.s;
if ( haystack.endsWith(needle) === false ) { return false; }
var offset = haystack.length - needle.length;
return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */;
};
FilterPlainHostname.prototype.logData = function() {
return {
raw: '||' + this.s + '^',
regex: rawToRegexStr(this.s, 0x4),
compiled: this.compile()
};
};
FilterPlainHostname.prototype.compile = function() {
return this.fidPrefix + this.s;
};
FilterPlainHostname.compile = function(details) {
return FilterPlainHostname.fidPrefix + details.f;
};
FilterPlainHostname.load = function(s) {
return new FilterPlainHostname(s.slice(2));
};
registerFilterClass(FilterPlainHostname);
/******************************************************************************/
var FilterPlainLeftAnchored = function(s) {
this.s = s;
};
FilterPlainLeftAnchored.prototype.match = function(url) {
return url.startsWith(this.s);
};
FilterPlainLeftAnchored.prototype.logData = function() {
return {
raw: '|' + this.s,
regex: rawToRegexStr(this.s, 0x2),
compiled: this.compile()
};
};
FilterPlainLeftAnchored.prototype.compile = function() {
return this.fidPrefix + this.s;
};
FilterPlainLeftAnchored.compile = function(details) {
return FilterPlainLeftAnchored.fidPrefix + details.f;
};
FilterPlainLeftAnchored.load = function(s) {
return new FilterPlainLeftAnchored(s.slice(2));
};
registerFilterClass(FilterPlainLeftAnchored);
/******************************************************************************/
var FilterPlainRightAnchored = function(s) {
this.s = s;
};
FilterPlainRightAnchored.prototype.match = function(url) {
return url.endsWith(this.s);
};
FilterPlainRightAnchored.prototype.logData = function() {
return {
raw: this.s + '|',
regex: rawToRegexStr(this.s, 0x1),
compiled: this.compile()
};
};
FilterPlainRightAnchored.prototype.compile = function() {
return this.fidPrefix + this.s;
};
FilterPlainRightAnchored.compile = function(details) {
return FilterPlainRightAnchored.fidPrefix + details.f;
};
FilterPlainRightAnchored.load = function(s) {
return new FilterPlainRightAnchored(s.slice(2));
};
registerFilterClass(FilterPlainRightAnchored);
/******************************************************************************/
var FilterPlainHnAnchored = function(s) {
this.s = s;
};
FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg) &&
isHnAnchored(url, tokenBeg);
};
FilterPlainHnAnchored.prototype.logData = function() {
return {
raw: '||' + this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
};
};
FilterPlainHnAnchored.prototype.compile = function() {
return this.fidPrefix + this.s;
};
FilterPlainHnAnchored.compile = function(details) {
return FilterPlainHnAnchored.fidPrefix + details.f;
};
FilterPlainHnAnchored.load = function(s) {
return new FilterPlainHnAnchored(s.slice(2));
};
registerFilterClass(FilterPlainHnAnchored);
/******************************************************************************/
var FilterGeneric = function(s, anchor) {
this.s = s;
this.anchor = anchor;
};
FilterGeneric.prototype.re = null;
FilterGeneric.prototype.match = function(url) {
if ( this.re === null ) {
this.re = new RegExp(rawToRegexStr(this.s, this.anchor));
}
return this.re.test(url);
};
FilterGeneric.prototype.logData = function() {
var out = {
raw: this.s,
regex: this.re.source,
compiled: this.compile()
};
if ( this.anchor & 0x2 ) {
out.raw = '|' + out.raw;
}
if ( this.anchor & 0x1 ) {
out.raw += '|';
}
return out;
};
FilterGeneric.prototype.compile = function() {
return this.fidPrefix + this.s + '\t' + this.anchor;
};
FilterGeneric.compile = function(details) {
return FilterGeneric.fidPrefix + details.f + '\t' + details.anchor;
};
FilterGeneric.load = function(s) {
var pos = s.indexOf('\t', 2);
return new FilterGeneric(
s.slice(2, pos),
parseInt(s.slice(pos + 1), 10)
);
};
registerFilterClass(FilterGeneric);
/******************************************************************************/
var FilterGenericHnAnchored = function(s) {
this.s = s;
};
FilterGenericHnAnchored.prototype.re = null;
FilterGenericHnAnchored.prototype.anchor = 0x4;
FilterGenericHnAnchored.prototype.match = function(url) {
if ( this.re === null ) {
this.re = new RegExp(rawToRegexStr(this.s, this.anchor));
}
var matchStart = url.search(this.re);
return matchStart !== -1 && isHnAnchored(url, matchStart);
};
FilterGenericHnAnchored.prototype.logData = function() {
var out = {
raw: '||' + this.s,
regex: this.re.source,
compiled: this.compile()
};
return out;
};
FilterGenericHnAnchored.prototype.compile = function() {
return this.fidPrefix + this.s;
};
FilterGenericHnAnchored.compile = function(details) {
return FilterGenericHnAnchored.fidPrefix + details.f;
};
FilterGenericHnAnchored.load = function(s) {
return new FilterGenericHnAnchored(s.slice(2));
};
registerFilterClass(FilterGenericHnAnchored);
/******************************************************************************/
var FilterGenericHnAndRightAnchored = function(s) {
FilterGenericHnAnchored.call(this, s);
};
FilterGenericHnAndRightAnchored.prototype = Object.create(FilterGenericHnAnchored.prototype, {
constructor: {
value: FilterGenericHnAndRightAnchored
},
anchor: {
value: 0x5
},
logData: {
value: function() {
var out = FilterGenericHnAnchored.prototype.logData.call(this);
out.raw += '|';
return out;
}
},
compile: {
value: function() {
return this.fidPrefix + this.s;
}
},
});
FilterGenericHnAndRightAnchored.compile = function(details) {
return FilterGenericHnAndRightAnchored.fidPrefix + details.f;
};
FilterGenericHnAndRightAnchored.load = function(s) {
return new FilterGenericHnAndRightAnchored(s.slice(2));
};
registerFilterClass(FilterGenericHnAndRightAnchored);
/******************************************************************************/
var FilterRegex = function(s) {
this.re = new RegExp(s, 'i');
};
FilterRegex.prototype.match = function(url) {
return this.re.test(url);
};
FilterRegex.prototype.logData = function() {
return {
raw: '/' + this.s + '/',
regex: this.s,
compiled: this.compile()
};
};
FilterRegex.prototype.compile = function() {
return this.fidPrefix + this.re.source;
};
FilterRegex.compile = function(details) {
return FilterRegex.fidPrefix + details.f;
};
FilterRegex.load = function(s) {
return new FilterRegex(s.slice(2));
};
registerFilterClass(FilterRegex);
/******************************************************************************/
// Filtering according to the origin.
var FilterOrigin = function() {
};
FilterOrigin.prototype.wrapped = {
compile: function() {
return '';
},
logData: function() {
return {
compiled: ''
};
},
match: function() {
return true;
}
};
FilterOrigin.prototype.matchOrigin = function() {
return true;
};
FilterOrigin.prototype.match = function(url, tokenBeg) {
return this.matchOrigin() && this.wrapped.match(url, tokenBeg);
};
FilterOrigin.prototype.logData = function() {
var out = this.wrapped.logData(),
domainOpt = this.toDomainOpt();
out.compiled = this.fidPrefix + domainOpt + '\v' + out.compiled;
if ( out.opts === undefined ) {
out.opts = 'domain=' + domainOpt;
} else {
out.opts += ',domain=' + domainOpt;
}
return out;
};
FilterOrigin.prototype.compile = function() {
return this.fidPrefix + this.toDomainOpt() + '\v' + this.wrapped.compile();
};
// *** start of specialized origin matchers
var FilterOriginHit = function(domainOpt) {
FilterOrigin.call(this);
this.hostname = domainOpt;
};
FilterOriginHit.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginHit
},
toDomainOpt: {
value: function() {
return this.hostname;
}
},
matchOrigin: {
value: function() {
var needle = this.hostname, haystack = pageHostnameRegister;
if ( haystack.endsWith(needle) === false ) { return false; }
var offset = haystack.length - needle.length;
return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */;
}
},
});
//
var FilterOriginMiss = function(domainOpt) {
FilterOrigin.call(this);
this.hostname = domainOpt.slice(1);
};
FilterOriginMiss.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginMiss
},
toDomainOpt: {
value: function() {
return '~' + this.hostname;
}
},
matchOrigin: {
value: function() {
var needle = this.hostname, haystack = pageHostnameRegister;
if ( haystack.endsWith(needle) === false ) { return true; }
var offset = haystack.length - needle.length;
return offset !== 0 && haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */;
}
},
});
//
var FilterOriginHitSet = function(domainOpt) {
FilterOrigin.call(this);
this.domainOpt = domainOpt;
};
FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginHitSet
},
oneOf: {
value: null,
writable: true
},
toDomainOpt: {
value: function() {
return this.domainOpt;
}
},
matchOrigin: {
value: function() {
if ( this.oneOf === null ) {
this.oneOf = new RegExp('(?:^|\\.)(?:' + this.domainOpt.replace(/\./g, '\\.') + ')$');
}
return this.oneOf.test(pageHostnameRegister);
}
},
});
//
var FilterOriginMissSet = function(domainOpt) {
FilterOrigin.call(this);
this.domainOpt = domainOpt;
};
FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginMissSet
},
noneOf: {
value: null,
writable: true
},
toDomainOpt: {
value: function() {
return this.domainOpt;
}
},
matchOrigin: {
value: function() {
if ( this.noneOf === null ) {
this.noneOf = new RegExp('(?:^|\\.)(?:' + this.domainOpt.replace(/~/g, '').replace(/\./g, '\\.') + ')$');
}
return this.noneOf.test(pageHostnameRegister) === false;
}
},
});
//
var FilterOriginMixedSet = function(domainOpt) {
FilterOrigin.call(this);
this.domainOpt = domainOpt;
};
FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginMixedSet
},
oneOf: {
value: null,
writable: true
},
noneOf: {
value: null,
writable: true
},
init: {
value: function() {
var oneOf = [], noneOf = [],
hostnames = this.domainOpt.split('|'),
i = hostnames.length,
hostname;
while ( i-- ) {
hostname = hostnames[i].replace(/\./g, '\\.');
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
noneOf.push(hostname.slice(1));
} else {
oneOf.push(hostname);
}
}
this.oneOf = new RegExp('(?:^|\\.)(?:' + oneOf.join('|') + ')$');
this.noneOf = new RegExp('(?:^|\\.)(?:' + noneOf.join('|') + ')$');
}
},
toDomainOpt: {
value: function() {
return this.domainOpt;
}
},
matchOrigin: {
value: function() {
if ( this.oneOf === null ) { this.init(); }
var needle = pageHostnameRegister;
return this.oneOf.test(needle) && this.noneOf.test(needle) === false;
}
},
});
// *** end of specialized origin matchers
// The optimal test function is picked according to the content of the
// `domain=` filter option.
// Re-factored in light of:
// - https://gorhill.github.io/obj-vs-set-vs-map/set-vs-regexp.html
// The re-factoring made possible to reuse instances of a matcher. As of
// writing, I observed that just with EasyList, there were ~1,200 reused
// instances out of ~2,800.
FilterOrigin.matcherFactory = function(domainOpt) {
// One hostname
if ( domainOpt.indexOf('|') === -1 ) {
if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) {
return new FilterOriginMiss(domainOpt);
}
return new FilterOriginHit(domainOpt);
}
// Many hostnames.
// Must be in set (none negated).
if ( domainOpt.indexOf('~') === -1 ) {
return new FilterOriginHitSet(domainOpt);
}
// Must not be in set (all negated).
if ( FilterOrigin.reAllNegated.test(domainOpt) ) {
return new FilterOriginMissSet(domainOpt);
}
// Must be in one set, but not in the other.
return new FilterOriginMixedSet(domainOpt);
};
FilterOrigin.reAllNegated = /^~(?:[^|~]+\|~)+[^|~]+$/;
FilterOrigin.compile = function(details) {
return FilterOrigin.fidPrefix + details.domainOpt;
};
FilterOrigin.load = function(s) {
var pos = s.indexOf('\v', 2),
f = FilterOrigin.matcherFactory(s.slice(2, pos));
f.wrapped = filterFromCompiledData(s.slice(pos + 1));
return f;
};
registerFilterClass(FilterOrigin);
/******************************************************************************/
var FilterDataHolder = function(dataType, dataStr) {
this.dataType = dataType;
this.dataStr = dataStr;
this.wrapped = undefined;
};
FilterDataHolder.prototype.match = function(url, tokenBeg) {
return this.wrapped.match(url, tokenBeg);
};
FilterDataHolder.prototype.logData = function() {
var out = this.wrapped.logData();
out.compiled = this.fidPrefix + this.dataType + '\t' + this.dataStr + '\v' + out.compiled;
var opt = this.dataType;
if ( this.dataStr !== '' ) {
opt += '=' + this.dataStr;
}
if ( out.opts === undefined ) {
out.opts = opt;
} else {
out.opts = opt + ',' + out.opts;
}
return out;
};
FilterDataHolder.prototype.compile = function() {
return this.fidPrefix + this.dataType + '\t' + this.dataStr + '\v' + this.wrapped.compile();
};
FilterDataHolder.compile = function(details) {
return FilterDataHolder.fidPrefix + details.dataType + '\t' + details.dataStr;
};
FilterDataHolder.load = function(s) {
var pos = s.indexOf('\t', 2),
end = s.indexOf('\v', pos),
f = new FilterDataHolder(s.slice(2, pos), s.slice(pos + 1, end));
f.wrapped = filterFromCompiledData(s.slice(end + 1));
return f;
};
registerFilterClass(FilterDataHolder);
// Helper class for storing instances of FilterDataHolder.
var FilterDataHolderEntry = function(key, token, fdata) {
this.keyBits = key;
this.token = token;
this.filter = filterFromCompiledData(fdata);
this.next = undefined;
};
FilterDataHolderEntry.prototype.logData = function() {
return toLogDataInternal(this.keyBits, this.token, this.filter);
};
FilterDataHolderEntry.prototype.compile = function() {
return this.keyBits + '\t' + this.token + '\t' + this.filter.compile();
};
FilterDataHolderEntry.load = function(s) {
var pos1 = s.indexOf('\t'),
pos2 = s.indexOf('\t', pos1 + 1);
return new FilterDataHolderEntry(
parseInt(s, 10),
s.slice(pos1 + 1, pos2),
s.slice(pos2 + 1)
);
};
/******************************************************************************/
// Dictionary of hostnames
//
var FilterHostnameDict = function() {
this.h = ''; // short-lived register
this.dict = new Set();
};
Object.defineProperty(FilterHostnameDict.prototype, 'size', {
get: function() {
return this.dict.size;
}
});
FilterHostnameDict.prototype.add = function(hn) {
if ( this.dict.has(hn) ) {
return false;
}
this.dict.add(hn);
return true;
};
FilterHostnameDict.prototype.remove = function(hn) {
return this.dict.delete(hn);
};
FilterHostnameDict.prototype.match = function() {
// TODO: mind IP addresses
var pos,
hostname = requestHostnameRegister;
while ( this.dict.has(hostname) === false ) {
pos = hostname.indexOf('.');
if ( pos === -1 ) {
this.h = '';
return false;
}
hostname = hostname.slice(pos + 1);
}
this.h = hostname;
return this;
};
FilterHostnameDict.prototype.logData = function() {
return {
raw: '||' + this.h + '^',
regex: rawToRegexStr(this.h) + '(?:[^%.0-9a-z_-]|$)',
compiled: this.h
};
};
FilterHostnameDict.prototype.compile = function() {
return this.fidPrefix + JSON.stringify(µb.setToArray(this.dict));
};
FilterHostnameDict.load = function(s) {
var f = new FilterHostnameDict();
f.dict = µb.setFromArray(JSON.parse(s.slice(2)));
return f;
};
registerFilterClass(FilterHostnameDict);
/******************************************************************************/
// Some buckets can grow quite large, and finding a hit in these buckets
// may end up being expensive. After considering various solutions, the one
// retained is to promote hit filters to a smaller index, so that next time
// they can be looked-up faster.
// key= 10000 ad count=660
// key= 10000 ads count=433
// key= 10001 google count=277
// key=1000000 2mdn count=267
// key= 10000 social count=240
// key= 10001 pagead2 count=166
// key= 10000 twitter count=122
// key= 10000 doubleclick count=118
// key= 10000 facebook count=114
// key= 10000 share count=113
// key= 10000 google count=106
// key= 10001 code count=103
// key= 11000 doubleclick count=100
// key=1010001 g count=100
// key= 10001 js count= 89
// key= 10000 adv count= 88
// key= 10000 youtube count= 61
// key= 10000 plugins count= 60
// key= 10001 partner count= 59
// key= 10000 ico count= 57
// key= 110001 ssl count= 57
// key= 10000 banner count= 53
// key= 10000 footer count= 51
// key= 10000 rss count= 51
/******************************************************************************/
var FilterBucket = function(a, b) {
this.promoted = 0;
this.vip = 16;
this.f = null; // short-lived register
this.filters = [];
if ( a !== undefined ) {
this.filters[0] = a;
if ( b !== undefined ) {
this.filters[1] = b;
}
}
};
FilterBucket.prototype.add = function(a) {
this.filters.push(a);
};
FilterBucket.prototype.remove = function(fdata) {
var i = this.filters.length,
filter;
while ( i-- ) {
filter = this.filters[i];
if ( filter.compile() === fdata ) {
this.filters.splice(i, 1);
}
}
};
// Promote hit filters so they can be found faster next time.
FilterBucket.prototype.promote = function(i) {
var filters = this.filters;
var pivot = filters.length >>> 1;
while ( i < pivot ) {
pivot >>>= 1;
if ( pivot < this.vip ) {
break;
}
}
if ( i <= pivot ) { return; }
var j = this.promoted % pivot;
//console.debug('FilterBucket.promote(): promoted %d to %d', i, j);
var f = filters[j];
filters[j] = filters[i];
filters[i] = f;
this.promoted += 1;
};
FilterBucket.prototype.match = function(url, tokenBeg) {
var filters = this.filters,
n = filters.length;
for ( var i = 0; i < n; i++ ) {
if ( filters[i].match(url, tokenBeg) ) {
this.f = filters[i];
if ( i >= this.vip ) {
this.promote(i);
}
return true;
}
}
return false;
};
FilterBucket.prototype.logData = function() {
return this.f.logData();
};
FilterBucket.prototype.compile = function() {
var compiled = [],
filters = this.filters;
for ( var i = 0, n = filters.length; i < n; i++ ) {
compiled[i] = filters[i].compile();
}
return this.fidPrefix + JSON.stringify(compiled);
};
FilterBucket.load = function(s) {
var f = new FilterBucket(),
compiled = JSON.parse(s.slice(2)),
filters = f.filters;
for ( var i = 0, n = compiled.length; i < n; i++ ) {
filters[i] = filterFromCompiledData(compiled[i]);
}
return f;
};
registerFilterClass(FilterBucket);
/******************************************************************************/
/******************************************************************************/
var filterFromCompiledData = function(compiled) {
if ( compiled === lastLoadedFilterString ) {
return lastLoadedFilter;
}
var fid = parseInt(compiled, 36),
f = filterClasses.get(fid).load(compiled);
//filterClassHistogram.set(fid, (filterClassHistogram.get(fid) || 0) + 1);
lastLoadedFilterString = compiled;
lastLoadedFilter = f;
return f;
};
var lastLoadedFilterString,
lastLoadedFilter;
//var filterClassHistogram = new Map();
/******************************************************************************/
/******************************************************************************/
var FilterParser = function() {
this.cantWebsocket = vAPI.cantWebsocket;
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
this.reHostnameRule2 = /^\**[0-9a-z][0-9a-z.-]*[0-9a-z]\^?$/i;
this.reCleanupHostnameRule2 = /^\**|\^$/g;
this.reHasWildcard = /[\^\*]/;
this.reCanTrimCarets1 = /^[^*]*$/;
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
this.reHasUppercase = /[A-Z]/;
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
this.reHasUnicode = /[^\x00-\x7F]/;
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
this.domainOpt = '';
this.reset();
};
/******************************************************************************/
// https://github.com/gorhill/uBlock/issues/1493
// Transpose `ping` into `other` for now.
FilterParser.prototype.toNormalizedType = {
'beacon': 'other',
'data': 'data',
'document': 'main_frame',
'elemhide': 'generichide',
'font': 'font',
'generichide': 'generichide',
'image': 'image',
'inline-script': 'inline-script',
'media': 'media',
'object': 'object',
'other': 'other',
'object-subrequest': 'object',
'ping': 'other',
'popunder': 'popunder',
'popup': 'popup',
'script': 'script',
'stylesheet': 'stylesheet',
'subdocument': 'sub_frame',
'xmlhttprequest': 'xmlhttprequest',
'websocket': 'websocket'
};
/******************************************************************************/
FilterParser.prototype.reset = function() {
this.action = BlockAction;
this.anchor = 0;
this.badFilter = false;
this.dataType = undefined;
this.dataStr = undefined;
this.elemHiding = false;
this.f = '';
this.firstParty = false;
this.thirdParty = false;
this.party = AnyParty;
this.fopts = '';
this.hostnamePure = false;
this.domainOpt = '';
this.isRegex = false;
this.raw = '';
this.redirect = false;
this.token = '*';
this.tokenBeg = 0;
this.types = 0;
this.important = 0;
this.unsupported = false;
return this;
};
/******************************************************************************/
FilterParser.prototype.bitFromType = function(type) {
return 1 << ((typeNameToTypeValue[type] >>> 4) - 1);
};
/******************************************************************************/
// https://github.com/chrisaljoudi/uBlock/issues/589
// Be ready to handle multiple negated types
FilterParser.prototype.parseTypeOption = function(raw, not) {
var typeBit = this.bitFromType(this.toNormalizedType[raw]);
if ( !not ) {
this.types |= typeBit;
return;
}
// Non-discrete network types can't be negated.
if ( (typeBit & allNetRequestTypesBitmap) === 0 ) {
return;
}
// Negated type: set all valid network request type bits to 1
if (
(typeBit & allNetRequestTypesBitmap) !== 0 &&
(this.types & allNetRequestTypesBitmap) === 0
) {
this.types |= allNetRequestTypesBitmap;
}
this.types &= ~typeBit;
};
/******************************************************************************/
FilterParser.prototype.parsePartyOption = function(firstParty, not) {
if ( firstParty ) {
not = !not;
}
if ( not ) {
this.firstParty = true;
this.party = this.thirdParty ? AnyParty : FirstParty;
} else {
this.thirdParty = true;
this.party = this.firstParty ? AnyParty : ThirdParty;
}
};
/******************************************************************************/
FilterParser.prototype.parseDomainOption = function(s) {
if ( this.reHasUnicode.test(s) ) {
var hostnames = s.split('|'),
i = hostnames.length;
while ( i-- ) {
if ( this.reHasUnicode.test(hostnames[i]) ) {
hostnames[i] = punycode.toASCII(hostnames[i]);
}
}
s = hostnames.join('|');
}
if ( this.reBadDomainOptChars.test(s) ) {
return '';
}
return s;
};
/******************************************************************************/
FilterParser.prototype.parseOptions = function(s) {
this.fopts = s;
var opts = s.split(',');
var opt, not;
for ( var i = 0; i < opts.length; i++ ) {
opt = opts[i];
not = opt.startsWith('~');
if ( not ) {
opt = opt.slice(1);
}
if ( opt === 'third-party' ) {
this.parsePartyOption(false, not);
continue;
}
// https://issues.adblockplus.org/ticket/616
// `generichide` concept already supported, just a matter of
// adding support for the new keyword.
if ( opt === 'elemhide' || opt === 'generichide' ) {
if ( not === false ) {
this.parseTypeOption('generichide', false);
continue;
}
this.unsupported = true;
break;
}
if ( opt === 'document' ) {
if ( this.action === BlockAction ) {
this.parseTypeOption('document', not);
continue;
}
this.unsupported = true;
break;
}
if ( this.toNormalizedType.hasOwnProperty(opt) ) {
this.parseTypeOption(opt, not);
// Due to ABP categorizing `websocket` requests as `other`, we need
// to add `websocket` for when `other` is used.
if ( opt === 'other' ) {
this.parseTypeOption('websocket', not);
}
continue;
}
// https://github.com/gorhill/uBlock/issues/2294
// Detect and discard filter if domain option contains nonsensical
// characters.
if ( opt.startsWith('domain=') ) {
this.domainOpt = this.parseDomainOption(opt.slice(7));
if ( this.domainOpt === '' ) {
this.unsupported = true;
break;
}
continue;
}
if ( opt === 'important' ) {
this.important = Important;
continue;
}
if ( opt === 'first-party' ) {
this.parsePartyOption(true, not);
continue;
}
if ( opt.startsWith('redirect=') ) {
if ( this.action === BlockAction ) {
this.redirect = true;
continue;
}
this.unsupported = true;
break;
}
if ( opt.startsWith('csp=') ) {
if ( opt.length > 4 && this.reBadCSP.test(opt) === false ) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.dataStr = opt.slice(4).trim();
}
continue;
}
if ( opt === 'csp' && this.action === AllowAction ) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.dataStr = '';
continue;
}
// Used by Adguard, purpose is unclear -- just ignore for now.
if ( opt === 'empty' ) {
continue;
}
// https://github.com/uBlockOrigin/uAssets/issues/192
if ( opt === 'badfilter' ) {
this.badFilter = true;
continue;
}
// Unrecognized filter option: ignore whole filter.
this.unsupported = true;
break;
}
};
/******************************************************************************/
// https://github.com/gorhill/uBlock/issues/1943#issuecomment-243188946
// Convert websocket-related filter where possible to a format which
// can be handled using CSP injection.
FilterParser.prototype.translate = function() {
var dataTypeBit = this.bitFromType('data');
if ( this.cantWebsocket && this.reWebsocketAny.test(this.f) ) {
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "connect-src https: http:";
// https://bugs.chromium.org/p/chromium/issues/detail?id=669086
// TODO: remove when most users are beyond Chromium v56
if ( vAPI.chromiumVersion < 57 ) {
this.dataStr += '; frame-src *';
}
return;
}
// Broad |data:-based filters.
if ( this.f === 'data:' ) {
switch ( this.types ) {
case 0:
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "default-src 'self' * blob: 'unsafe-inline' 'unsafe-eval'";
break;
case this.bitFromType('script'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "script-src 'self' * blob: 'unsafe-inline' 'unsafe-eval'";
break;
case this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * blob:";
break;
case this.bitFromType('script') | this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * blob:; script-src 'self' * blob: 'unsafe-inline' 'unsafe-eval';";
break;
default:
break;
}
}
// Broad |blob:-based filters.
if ( this.f === 'blob:' ) {
switch ( this.types ) {
case 0:
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "default-src 'self' * data: 'unsafe-inline' 'unsafe-eval'";
break;
case this.bitFromType('script'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "script-src 'self' * data: 'unsafe-inline' 'unsafe-eval'";
break;
case this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * data:";
break;
case this.bitFromType('script') | this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * data:; script-src 'self' * data: 'unsafe-inline' 'unsafe-eval';";
break;
default:
break;
}
}
};
/*******************************************************************************
anchor: bit vector
0000 (0x0): no anchoring
0001 (0x1): anchored to the end of the URL.
0010 (0x2): anchored to the start of the URL.
0011 (0x3): anchored to the start and end of the URL.
0100 (0x4): anchored to the hostname of the URL.
0101 (0x5): anchored to the hostname and end of the URL.
**/
FilterParser.prototype.parse = function(raw) {
// important!
this.reset();
var s = this.raw = raw;
// plain hostname? (from HOSTS file)
if ( this.reHostnameRule1.test(s) ) {
this.f = s;
this.hostnamePure = true;
this.anchor |= 0x4;
return this;
}
// element hiding filter?
var pos = s.indexOf('#');
if ( pos !== -1 ) {
var c = s.charAt(pos + 1);
if ( c === '#' || c === '@' ) {
console.error('static-net-filtering.js > unexpected cosmetic filters');
this.elemHiding = true;
return this;
}
}
// block or allow filter?
// Important: this must be executed before parsing options
if ( s.startsWith('@@') ) {
this.action = AllowAction;
s = s.slice(2);
}
// options
// https://github.com/gorhill/uBlock/issues/842
// - ensure sure we are not dealing with a regex-based filter.
// - lookup the last occurrence of `$`.
if ( s.startsWith('/') === false || s.endsWith('/') === false ) {
pos = s.lastIndexOf('$');
if ( pos !== -1 ) {
// https://github.com/gorhill/uBlock/issues/952
// Discard Adguard-specific `$$` filters.
if ( s.indexOf('$$') !== -1 ) {
this.unsupported = true;
return this;
}
this.parseOptions(s.slice(pos + 1));
s = s.slice(0, pos);
}
}
// regex?
if ( s.startsWith('/') && s.endsWith('/') && s.length > 2 ) {
this.isRegex = true;
this.f = s.slice(1, -1);
// https://github.com/gorhill/uBlock/issues/1246
// If the filter is valid, use the corrected version of the source
// string -- this ensure reverse-lookup will work fine.
this.f = normalizeRegexSource(this.f);
if ( this.f === '' ) {
console.error(
"uBlock Origin> discarding bad regular expression-based network filter '%s': '%s'",
raw,
normalizeRegexSource.message
);
this.unsupported = true;
}
return this;
}
// hostname-anchored
if ( s.startsWith('||') ) {
this.anchor |= 0x4;
s = s.slice(2);
// convert hostname to punycode if needed
// https://github.com/gorhill/uBlock/issues/2599
if ( this.reHasUnicode.test(s) ) {
var matches = this.reIsolateHostname.exec(s);
if ( matches ) {
s = (matches[1] !== undefined ? matches[1] : '') +
punycode.toASCII(matches[2]) +
matches[3];
//console.debug('µBlock.staticNetFilteringEngine/FilterParser.parse():', raw, '=', s);
}
}
// https://github.com/chrisaljoudi/uBlock/issues/1096
if ( s.startsWith('^') ) {
this.unsupported = true;
return this;
}
// plain hostname? (from ABP filter list)
// https://github.com/gorhill/uBlock/issues/1757
// A filter can't be a pure-hostname one if there is a domain or csp
// option present.
if ( this.reHostnameRule2.test(s) ) {
this.f = s.replace(this.reCleanupHostnameRule2, '');
this.hostnamePure = true;
return this;
}
}
// left-anchored
else if ( s.startsWith('|') ) {
this.anchor |= 0x2;
s = s.slice(1);
}
// right-anchored
if ( s.endsWith('|') ) {
this.anchor |= 0x1;
s = s.slice(0, -1);
}
// normalize placeholders
if ( this.reHasWildcard.test(s) ) {
// remove pointless leading *
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
// Keep the leading asterisk if we are dealing with a hostname-anchored
// filter, this will ensure the generic filter implementation is
// used.
if ( s.startsWith('*') && (this.anchor & 0x4) ) {
s = s.replace(/^\*+([^%0-9a-z])/, '$1');
}
// remove pointless trailing *
if ( s.endsWith('*') ) {
s = s.replace(/([^%0-9a-z])\*+$/, '$1');
}
}
// nothing left?
if ( s === '' ) {
s = '*';
}
// https://github.com/gorhill/uBlock/issues/1047
// Hostname-anchored makes no sense if matching all requests.
if ( s === '*' ) {
this.anchor = 0;
}
// This might look weird but we gain memory footprint by not going through
// toLowerCase(), at least on Chromium. Because copy-on-write?
this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s;
// Convenience:
// Convert special broad filters for non-webRequest aware types into
// `csp` filters wherever possible.
if ( this.anchor & 0x2 && this.party === 0 ) {
this.translate();
}
return this;
};
/******************************************************************************/
// Given a string, find a good token. Tokens which are too generic, i.e. very
// common with a high probability of ending up as a miss, are not
// good. Avoid if possible. This has a *significant* positive impact on
// performance.
// These "bad tokens" are collated manually.
// Hostname-anchored with no wildcard always have a token index of 0.
var reHostnameToken = /^[0-9a-z]+/;
var reGoodToken = /[%0-9a-z]{2,}/g;
var badTokens = new Set([
'com',
'http',
'https',
'icon',
'images',
'img',
'js',
'net',
'news',
'www'
]);
var findFirstGoodToken = function(s) {
reGoodToken.lastIndex = 0;
var matches, lpos;
var badTokenMatch = null;
while ( (matches = reGoodToken.exec(s)) !== null ) {
// https://github.com/gorhill/uBlock/issues/997
// Ignore token if preceded by wildcard.
lpos = matches.index;
if ( lpos !== 0 && s.charCodeAt(lpos - 1) === 0x2A /* '*' */ ) {
continue;
}
if ( s.charCodeAt(reGoodToken.lastIndex) === 0x2A /* '*' */ ) {
continue;
}
if ( badTokens.has(matches[0]) ) {
if ( badTokenMatch === null ) {
badTokenMatch = matches;
}
continue;
}
return matches;
}
return badTokenMatch;
};
var findHostnameToken = function(s) {
return reHostnameToken.exec(s);
};
/******************************************************************************/
FilterParser.prototype.makeToken = function() {
// https://github.com/chrisaljoudi/uBlock/issues/1038
// Single asterisk will match any URL.
if ( this.isRegex || this.f === '*' ) {
return;
}
var matches = this.anchor & 0x4 && this.f.indexOf('*') === -1 ?
findHostnameToken(this.f) :
findFirstGoodToken(this.f);
if ( matches !== null ) {
this.token = matches[0];
this.tokenBeg = matches.index;
}
};
/******************************************************************************/
/******************************************************************************/
var FilterContainer = function() {
this.reIsGeneric = /[\^\*]/;
this.filterParser = new FilterParser();
this.urlTokenizer = µb.urlTokenizer;
this.reset();
};
/******************************************************************************/
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
this.frozen = false;
this.processedFilterCount = 0;
this.acceptedCount = 0;
this.rejectedCount = 0;
this.allowFilterCount = 0;
this.blockFilterCount = 0;
this.discardedCount = 0;
this.badFilters = new Set();
this.duplicateBuster = new Set();
this.categories = new Map();
this.dataFilters = new Map();
this.filterParser.reset();
// Reuse filter instances whenever possible at load time.
this.fclassLast = null;
this.fdataLast = null;
this.filterLast = null;
// Runtime registers
this.keyRegister = undefined;
this.tokenRegister = undefined;
this.fRegister = null;
};
/******************************************************************************/
FilterContainer.prototype.freeze = function() {
histogram('allFilters', this.categories);
this.removeBadFilters();
this.duplicateBuster = new Set();
this.filterParser.reset();
this.fclassLast = null;
this.fdataLast = null;
this.filterLast = null;
this.frozen = true;
//console.log(JSON.stringify(Array.from(filterClassHistogram)));
//this.tokenHistogram = new Map(Array.from(this.tokenHistogram).sort(function(a, b) {
// return a[0].localeCompare(b[0]) || (b[1] - a[1]);
//}));
};
/******************************************************************************/
FilterContainer.prototype.toSelfie = function() {
var categoryToSelfie = function(map) {
var selfie = [],
iterator = map.entries(),
entry;
for (;;) {
entry = iterator.next();
if ( entry.done === true ) { break; }
selfie.push('k2\t' + entry.value[0]); // token
selfie.push(entry.value[1].compile());
}
return selfie.join('\n');
};
var categoriesToSelfie = function(map) {
var selfie = [],
iterator = map.entries(),
entry;
for (;;) {
entry = iterator.next();
if ( entry.done === true ) { break; }
selfie.push('k1\t' + exportMapKey(entry.value[0])); // key
selfie.push(categoryToSelfie(entry.value[1]));
}
return selfie.join('\n');
};
var dataFiltersToSelfie = function(dataFilters) {
var selfie = [],
iter = dataFilters.entries(),
entry;
for (;;) {
entry = iter.next();
if ( entry.done === true ) { break; }
entry = entry.value[1];
do {
selfie.push(entry.compile());
entry = entry.next;
} while ( entry !== undefined );
}
return selfie;
};
return {
processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount,
rejectedCount: this.rejectedCount,
allowFilterCount: this.allowFilterCount,
blockFilterCount: this.blockFilterCount,
discardedCount: this.discardedCount,
categories: categoriesToSelfie(this.categories),
dataFilters: dataFiltersToSelfie(this.dataFilters)
};
};
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) {
this.frozen = true;
this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount;
this.rejectedCount = selfie.rejectedCount;
this.allowFilterCount = selfie.allowFilterCount;
this.blockFilterCount = selfie.blockFilterCount;
this.discardedCount = selfie.discardedCount;
var catKey, tokenKey,
map = this.categories, submap,
lineIter = new µb.LineIterator(selfie.categories),
line;
while ( lineIter.eot() === false ) {
line = lineIter.next();
if ( line.startsWith('k1\t') ) {
catKey = importMapKey(line.slice(3));
submap = new Map();
map.set(catKey, submap);
continue;
}
if ( line.startsWith('k2\t') ) {
tokenKey = line.slice(3);
continue;
}
submap.set(tokenKey, filterFromCompiledData(line));
}
var i = selfie.dataFilters.length,
entry, bucket;
while ( i-- ) {
entry = FilterDataHolderEntry.load(selfie.dataFilters[i]);
bucket = this.dataFilters.get(entry.token);
if ( bucket !== undefined ) {
entry.next = bucket;
}
this.dataFilters.set(entry.token, entry);
}
};
/******************************************************************************/
FilterContainer.prototype.compile = function(raw, out) {
// ORDER OF TESTS IS IMPORTANT!
// Ignore empty lines
var s = raw.trim();
if ( s.length === 0 ) {
return false;
}
var parsed = this.filterParser.parse(s);
// Ignore element-hiding filters
if ( parsed.elemHiding ) {
return false;
}
// Ignore filters with unsupported options
if ( parsed.unsupported ) {
µb.logger.writeOne('', 'error', 'Network filtering invalid filter: ' + raw);
return false;
}
// Pure hostnames, use more efficient dictionary lookup
// https://github.com/chrisaljoudi/uBlock/issues/665
// Create a dict keyed on request type etc.
if (
parsed.hostnamePure &&
parsed.domainOpt === '' &&
parsed.dataType === undefined &&
this.compileHostnameOnlyFilter(parsed, out)
) {
return true;
}
parsed.makeToken();
var fdata = '';
if ( parsed.dataType !== undefined ) {
if ( fdata !== '' ) { fdata += '\v'; }
fdata += FilterDataHolder.compile(parsed);
}
if ( parsed.domainOpt !== '' ) {
if ( fdata !== '' ) { fdata += '\v'; }
fdata += FilterOrigin.compile(parsed);
}
if ( fdata !== '' ) { fdata += '\v'; }
if ( parsed.isRegex ) {
fdata += FilterRegex.compile(parsed);
} else if ( parsed.hostnamePure ) {
fdata += FilterPlainHostname.compile(parsed);
} else if ( parsed.f === '*' ) {
fdata += FilterTrue.compile();
} else if ( parsed.anchor === 0x5 ) {
// https://github.com/gorhill/uBlock/issues/1669
fdata += FilterGenericHnAndRightAnchored.compile(parsed);
} else if ( this.reIsGeneric.test(parsed.f) || parsed.token === '*' ) {
if ( parsed.anchor === 0x4 ) {
fdata += FilterGenericHnAnchored.compile(parsed);
} else {
fdata += FilterGeneric.compile(parsed);
}
} else if ( parsed.anchor === 0x4 ) {
fdata += FilterPlainHnAnchored.compile(parsed);
} else if ( parsed.anchor === 0x2 ) {
fdata += FilterPlainLeftAnchored.compile(parsed);
} else if ( parsed.anchor === 0x1 ) {
fdata += FilterPlainRightAnchored.compile(parsed);
} else if ( parsed.tokenBeg === 0 ) {
fdata += FilterPlainPrefix0.compile(parsed);
} else if ( parsed.tokenBeg === 1 ) {
fdata += FilterPlainPrefix1.compile(parsed);
} else {
fdata += FilterPlain.compile(parsed);
}
this.compileToAtomicFilter(fdata, parsed, out);
return true;
};
/******************************************************************************/
// Using fast/compact dictionary when filter is a (or portion of) pure hostname.
FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) {
// Can't fit the filter in a pure hostname dictionary.
// https://github.com/gorhill/uBlock/issues/1757
// This should no longer happen with fix to above issue.
//if ( parsed.domainOpt.length !== 0 ) {
// return;
//}
var route = parsed.badFilter ? 0x01 : 0x00,
keyShard = parsed.action | parsed.important | parsed.party;
var type = parsed.types;
if ( type === 0 ) {
out.push(
route,
exportMapKey(keyShard) + '\v' +
'.\v' +
parsed.f
);
return true;
}
var bitOffset = 1;
do {
if ( type & 1 ) {
out.push(
route,
exportMapKey(keyShard | (bitOffset << 4)) + '\v' +
'.\v' +
parsed.f
);
}
bitOffset += 1;
type >>>= 1;
} while ( type !== 0 );
return true;
};
/******************************************************************************/
FilterContainer.prototype.compileToAtomicFilter = function(fdata, parsed, out) {
var route = parsed.badFilter ? 0x01 : 0x00,
bits = parsed.action | parsed.important | parsed.party,
type = parsed.types;
if ( type === 0 ) {
out.push(
route,
exportMapKey(bits) + '\v' +
parsed.token + '\v' +
fdata
);
return;
}
var bitOffset = 1;
do {
if ( type & 1 ) {
out.push(
route,
exportMapKey(bits | (bitOffset << 4)) + '\v' +
parsed.token + '\v' +
fdata
);
}
bitOffset += 1;
type >>>= 1;
} while ( type !== 0 );
// Only static filter with an explicit type can be redirected. If we reach
// this point, it's because there is one or more explicit type.
if ( !parsed.redirect ) {
return;
}
if ( parsed.badFilter ) {
return;
}
var redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
if ( Array.isArray(redirects) === false ) {
return;
}
var i = redirects.length;
while ( i-- ) {
out.push(0, '\v\v=>\t' + redirects[i]);
}
};
/******************************************************************************/
FilterContainer.prototype.fromCompiledContent = function(lineIter) {
var line, lineBits, hash, token, fdata,
bucket, entry, filter,
fieldIter = new µb.FieldIterator('\v'),
dataFilterFid = FilterDataHolder.fidPrefix,
buckerFilterFid = FilterBucket.fidPrefix,
aCharCode = 'a'.charCodeAt(0);
while ( lineIter.eot() === false ) {
lineBits = lineIter.charCodeAt(0) - aCharCode;
if ( (lineBits & 0x04) !== 0 ) {
return;
}
line = lineIter.next(1);
if ( (lineBits & 0x02) !== 0 ) {
line = decodeURIComponent(line);
}
if ( (lineBits & 0x01) !== 0 ) {
this.badFilters.add(line);
continue;
}
hash = importMapKey(fieldIter.first(line));
token = fieldIter.next();
fdata = fieldIter.remainder();
// Special cases: delegate to more specialized engines.
// Redirect engine.
if ( fdata.startsWith('=>\t') ) {
µb.redirectEngine.fromCompiledRule(fdata.slice(3));
continue;
}
// Plain static filters.
this.acceptedCount += 1;
// Special treatment: data-holding filters are stored separately
// because they require special matching algorithm (unlike other
// filters, ALL hits must be reported).
if ( fdata.startsWith(dataFilterFid) ) {
if ( this.duplicateBuster.has(line) ) {
this.discardedCount += 1;
continue;
}
this.duplicateBuster.add(line);
entry = new FilterDataHolderEntry(hash, token, fdata);
bucket = this.dataFilters.get(token);
if ( bucket !== undefined ) {
entry.next = bucket;
}
this.dataFilters.set(token, entry);
continue;
}
bucket = this.categories.get(hash);
if ( bucket === undefined ) {
bucket = new Map();
this.categories.set(hash, bucket);
}
entry = bucket.get(token);
if ( token === '.' ) {
if ( entry === undefined ) {
entry = new FilterHostnameDict();
bucket.set('.', entry);
}
if ( entry.add(fdata) === false ) {
this.discardedCount += 1;
}
continue;
}
if ( this.duplicateBuster.has(line) ) {
this.discardedCount += 1;
continue;
}
this.duplicateBuster.add(line);
//this.tokenHistogram.set(token, (this.tokenHistogram.get(token) || 0) + 1);
filter = filterFromCompiledData(fdata);
if ( entry === undefined ) {
bucket.set(token, filter);
continue;
}
if ( entry.fidPrefix === buckerFilterFid ) {
entry.add(filter);
continue;
}
bucket.set(token, new FilterBucket(entry, filter));
}
};
//FilterContainer.prototype.tokenHistogram = new Map();
/******************************************************************************/
FilterContainer.prototype.removeBadFilters = function() {
var lines = µb.setToArray(this.badFilters),
fieldIter = new µb.FieldIterator('\v'),
hash, token, fdata, bucket, entry,
i = lines.length;
while ( i-- ) {
fieldIter.first(lines[i]);
hash = importMapKey(fieldIter.next());
bucket = this.categories.get(hash);
if ( bucket === undefined ) {
continue;
}
token = fieldIter.next();
entry = bucket.get(token);
if ( entry === undefined ) {
continue;
}
fdata = fieldIter.remainder();
if ( entry instanceof FilterBucket ) {
entry.remove(fdata);
if ( entry.filters.length === 1 ) {
bucket.set(token, entry.filters[0]);
}
continue;
}
if ( entry instanceof FilterHostnameDict ) {
entry.remove(fdata);
if ( entry.size === 0 ) {
bucket.delete(token);
if ( bucket.size === 0 ) {
this.categories.delete(hash);
}
}
continue;
}
if ( entry.compile() === fdata ) {
bucket.delete(token);
if ( bucket.size === 0 ) {
this.categories.delete(hash);
}
continue;
}
}
};
/******************************************************************************/
FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) {
if ( this.dataFilters.length === 0 ) { return; }
var url = this.urlTokenizer.setURL(requestURL);
requestHostnameRegister = µb.URI.hostnameFromURI(url);
// We need to visit ALL the matching filters.
var toAddImportant = new Map(),
toAdd = new Map(),
toRemove = new Map();
var entry, f,
tokens = this.urlTokenizer.getTokens(),
tokenEntry, token,
i = 0;
while ( i < 16 ) {
tokenEntry = tokens[i++];
token = tokenEntry.token;
if ( !token ) { break; }
entry = this.dataFilters.get(token);
while ( entry !== undefined ) {
f = entry.filter;
if ( f.match(url, tokenEntry.beg) === true ) {
if ( entry.keyBits & 0x001 ) {
toRemove.set(f.dataStr, entry);
} else if ( entry.keyBits & 0x002 ) {
toAddImportant.set(f.dataStr, entry);
} else {
toAdd.set(f.dataStr, entry);
}
}
entry = entry.next;
}
}
entry = this.dataFilters.get('*');
while ( entry !== undefined ) {
f = entry.filter;
if ( f.match(url, tokenEntry.beg) === true ) {
if ( entry.keyBits & 0x001 ) {
toRemove.set(f.dataStr, entry);
} else if ( entry.keyBits & 0x002 ) {
toAddImportant.set(f.dataStr, entry);
} else {
toAdd.set(f.dataStr, entry);
}
}
entry = entry.next;
}
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; }
// Remove entries overriden by other filters.
var iter = toAddImportant.entries(),
k;
for (;;) {
entry = iter.next();
if ( entry.done === true ) { break; }
k = entry.value[0];
toAdd.delete(k);
toRemove.delete(k);
}
iter = toRemove.entries();
for (;;) {
entry = iter.next();
if ( entry.done === true ) { break; }
k = entry.value[0];
if ( k === '' ) {
toAdd.clear();
break;
}
toAdd.delete(k);
}
var logData;
iter = toAddImportant.entries();
for (;;) {
entry = iter.next();
if ( entry.done === true ) { break; }
out.push(entry.value[0]);
if ( outlog === undefined ) { continue; }
logData = entry.value[1].logData();
logData.source = 'static';
logData.result = 1;
outlog.push(logData);
}
iter = toAdd.entries();
for (;;) {
entry = iter.next();
if ( entry.done === true ) { break; }
out.push(entry.value[0]);
if ( outlog === undefined ) { continue; }
logData = entry.value[1].logData();
logData.source = 'static';
logData.result = 1;
outlog.push(logData);
}
if ( outlog !== undefined ) {
iter = toRemove.entries();
for (;;) {
entry = iter.next();
if ( entry.done === true ) { break; }
logData = entry.value[1].logData();
logData.source = 'static';
logData.result = 2;
outlog.push(logData);
}
}
};
/******************************************************************************/
// bucket: Map
// url: string
FilterContainer.prototype.matchTokens = function(bucket, url) {
// Hostname-only filters
var f = bucket.get('.');
if ( f !== undefined && f.match() ) {
this.tokenRegister = '.';
this.fRegister = f;
return true;
}
var tokens = this.urlTokenizer.getTokens(),
tokenEntry, token,
i = 0;
for (;;) {
tokenEntry = tokens[i++];
token = tokenEntry.token;
if ( !token ) { break; }
f = bucket.get(token);
if ( f !== undefined && f.match(url, tokenEntry.beg) ) {
this.tokenRegister = token;
this.fRegister = f;
return true;
}
}
// Untokenizable filters
f = bucket.get('*');
if ( f !== undefined && f.match(url) ) {
this.tokenRegister = '*';
this.fRegister = f;
return true;
}
return false;
};
/******************************************************************************/
// Specialized handlers
// https://github.com/gorhill/uBlock/issues/1477
// Special case: blocking-generichide filter ALWAYS exists, it is implicit --
// thus we always first check for exception filters, then for important block
// filter if and only if there was a hit on an exception filter.
// https://github.com/gorhill/uBlock/issues/2103
// User may want to override `generichide` exception filters.
FilterContainer.prototype.matchStringGenericHide = function(context, requestURL) {
var url = this.urlTokenizer.setURL(requestURL);
// https://github.com/gorhill/uBlock/issues/2225
// Important: this is used by FilterHostnameDict.match().
requestHostnameRegister = µb.URI.hostnameFromURI(url);
var bucket = this.categories.get(genericHideException);
if ( !bucket || this.matchTokens(bucket, url) === false ) {
this.fRegister = null;
return 0;
}
bucket = this.categories.get(genericHideImportant);
if ( bucket && this.matchTokens(bucket, url) ) {
this.keyRegister = genericHideImportant;
return 1;
}
this.keyRegister = genericHideException;
return 2;
};
/******************************************************************************/
// https://github.com/chrisaljoudi/uBlock/issues/116
// Some type of requests are exceptional, they need custom handling,
// not the generic handling.
FilterContainer.prototype.matchStringExactType = function(context, requestURL, requestType) {
// Special cases.
if ( requestType === 'generichide' ) {
return this.matchStringGenericHide(context, requestURL);
}
var type = typeNameToTypeValue[requestType];
if ( type === undefined ) {
return 0;
}
// Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(requestURL);
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = µb.URI.hostnameFromURI(url);
var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty,
categories = this.categories,
key, bucket;
this.fRegister = null;
// https://github.com/chrisaljoudi/uBlock/issues/139
// Test against important block filters
key = BlockAnyParty | Important | type;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 1;
}
}
key = BlockAction | Important | type | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 1;
}
}
// Test against block filters
key = BlockAnyParty | type;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
key = BlockAction | type | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
}
// If there is no block filter, no need to test against allow filters
if ( this.fRegister === null ) {
return 0;
}
// Test against allow filters
key = AllowAnyParty | type;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 2;
}
}
key = AllowAction | type | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 2;
}
}
return 1;
};
/******************************************************************************/
FilterContainer.prototype.matchString = function(context) {
// https://github.com/chrisaljoudi/uBlock/issues/519
// Use exact type match for anything beyond `other`
// Also, be prepared to support unknown types
var type = typeNameToTypeValue[context.requestType];
if ( type === undefined ) {
type = otherTypeBitValue;
} else if ( type === 0 || type > otherTypeBitValue ) {
return this.matchStringExactType(context, context.requestURL, context.requestType);
}
// The logic here is simple:
//
// block = !whitelisted && blacklisted
// or equivalent
// allow = whitelisted || !blacklisted
// Statistically, hits on a URL in order of likelihood:
// 1. No hit
// 2. Hit on a block filter
// 3. Hit on an allow filter
//
// High likelihood of "no hit" means to optimize we need to reduce as much
// as possible the number of filters to test.
//
// Then, because of the order of probabilities, we should test only
// block filters first, and test allow filters if and only if there is a
// hit on a block filter. Since there is a high likelihood of no hit,
// testing allow filter by default is likely wasted work, hence allow
// filters are tested *only* if there is a (unlikely) hit on a block
// filter.
// Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(context.requestURL);
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = context.requestHostname;
this.fRegister = null;
var party = isFirstParty(context.pageDomain, context.requestHostname)
? FirstParty
: ThirdParty;
var categories = this.categories,
key, bucket;
// https://github.com/chrisaljoudi/uBlock/issues/139
// Test against important block filters.
// The purpose of the `important` option is to reverse the order of
// evaluation. Normally, it is "evaluate block then evaluate allow", with
// the `important` property it is "evaluate allow then evaluate block".
key = BlockAnyTypeAnyParty | Important;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 1;
}
}
key = BlockAnyType | Important | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 1;
}
}
key = BlockAnyParty | Important | type;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 1;
}
}
key = BlockAction | Important | type | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 1;
}
}
// Test against block filters
key = BlockAnyTypeAnyParty;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
key = BlockAnyType | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
key = BlockAnyParty | type;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
if ( this.fRegister === null ) {
key = BlockAction | type | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
}
}
}
}
}
// If there is no block filter, no need to test against allow filters
if ( this.fRegister === null ) {
return 0;
}
// Test against allow filters
key = AllowAnyTypeAnyParty;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 2;
}
}
key = AllowAnyType | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 2;
}
}
key = AllowAnyParty | type;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 2;
}
}
key = AllowAction | type | party;
if ( (bucket = categories.get(key)) ) {
if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key;
return 2;
}
}
return 1;
};
/******************************************************************************/
FilterContainer.prototype.toLogData = function() {
if ( this.fRegister === null ) { return; }
var logData = toLogDataInternal(this.keyRegister, this.tokenRegister, this.fRegister);
logData.source = 'static';
logData.token = this.tokenRegister;
logData.result = this.fRegister === null ? 0 : (this.keyRegister & 1 ? 2 : 1);
return logData;
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
return this.acceptedCount - this.discardedCount;
};
/******************************************************************************/
return new FilterContainer();
/******************************************************************************/
})();