Raymond Hill 081e5b4db9
fix #3581
2018-03-07 10:37:18 -05:00

2847 lines
83 KiB
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

uBlock Origin - a browser extension to block requests.
Copyright (C) 2014-2018 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
/* jshint bitwise: false */
/* global punycode, HNTrieBuilder */
'use strict';
µBlock.staticNetFilteringEngine = (function(){
var µb = µBlock;
// fedcba9876543210
// | | | |||
// | | | |||
// | | | |||
// | | | |||
// | | | ||+---- bit 0: [BlockAction | AllowAction]
// | | | |+----- bit 1: `important`
// | | | +------ bit 2- 3: party [0 - 3]
// | | +-------- bit 4- 8: type [0 - 31]
// | +------------- bit 9-14: unused
// +------------------- bit 15: bad filter
var BlockAction = 0 << 0;
var AllowAction = 1 << 0;
var Important = 1 << 1;
var AnyParty = 0 << 2;
var FirstParty = 1 << 2;
var ThirdParty = 2 << 2;
var BadFilter = 1 << 15;
var AnyType = 0 << 4;
var typeNameToTypeValue = {
'no_type': 0 << 4,
'stylesheet': 1 << 4,
'image': 2 << 4,
'object': 3 << 4,
'object_subrequest': 3 << 4,
'script': 4 << 4,
'xmlhttprequest': 5 << 4,
'sub_frame': 6 << 4,
'font': 7 << 4,
'media': 8 << 4,
'websocket': 9 << 4,
'other': 10 << 4,
'popup': 11 << 4, // start of behavorial filtering
'popunder': 12 << 4,
'main_frame': 13 << 4, // start of 1st-party-only behavorial filtering
'generichide': 14 << 4,
'inline-font': 15 << 4,
'inline-script': 16 << 4,
'data': 17 << 4, // special: a generic data holder
'redirect': 18 << 4,
'webrtc': 19 << 4,
'unsupported': 20 << 4
var otherTypeBitValue = typeNameToTypeValue.other;
var typeValueToTypeName = {
1: 'stylesheet',
2: 'image',
3: 'object',
4: 'script',
5: 'xmlhttprequest',
6: 'subdocument',
7: 'font',
8: 'media',
9: 'websocket',
10: 'other',
11: 'popup',
12: 'popunder',
13: 'document',
14: 'generichide',
15: 'inline-font',
16: 'inline-script',
17: 'data',
18: 'redirect',
19: 'webrtc',
20: 'unsupported'
var BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
var BlockAnyType = BlockAction | AnyType;
var BlockAnyParty = BlockAction | AnyParty;
var AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
var AllowAnyType = AllowAction | AnyType;
var AllowAnyParty = AllowAction | AnyParty;
var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide,
genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important;
// ABP filters: https://adblockplus.org/en/filters
// regex tester: http://regex101.com/
// See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation.
var pageHostnameRegister = '',
requestHostnameRegister = '';
//var filterRegister = null;
//var categoryRegister = '';
var histogram = function() {};
histogram = function(label, categories) {
var h = [],
for ( var k in categories ) {
// No need for hasOwnProperty() here: there is no prototype chain.
categoryBucket = categories[k];
for ( var kk in categoryBucket ) {
// No need for hasOwnProperty() here: there is no prototype chain.
filterBucket = categoryBucket[kk];
k: k.charCodeAt(0).toString(2) + ' ' + kk,
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 2;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey=%s count=%d', v.k, v.n);
console.log('\tTotal buckets count: %d', total);
// Local helpers
// Be sure to not confuse 'example.com' with 'anotherexample.com'
var isFirstParty = function(domain, hostname) {
return hostname.endsWith(domain) &&
(hostname.length === domain.length ||
hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */);
var normalizeRegexSource = function(s) {
try {
var re = new RegExp(s);
return re.source;
} catch (ex) {
normalizeRegexSource.message = ex.toString();
return '';
var rawToRegexStr = function(s, anchor) {
var me = rawToRegexStr;
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
// Also: remove leading/trailing wildcards -- there is no point.
var reStr = s.replace(me.escape1, '\\$&')
.replace(me.escape2, '(?:[^%.0-9a-z_-]|$)')
.replace(me.escape3, '')
.replace(me.escape4, '[^ ]*?');
if ( anchor & 0x4 ) {
reStr = (
reStr.startsWith('\\.') ?
rawToRegexStr.reTextHostnameAnchor2 :
) + reStr;
} else if ( anchor & 0x2 ) {
reStr = '^' + reStr;
if ( anchor & 0x1 ) {
reStr += '$';
return reStr;
rawToRegexStr.escape1 = /[.+?${}()|[\]\\]/g;
rawToRegexStr.escape2 = /\^/g;
rawToRegexStr.escape3 = /^\*|\*$/g;
rawToRegexStr.escape4 = /\*/g;
rawToRegexStr.reTextHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?';
rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
var filterFingerprinter = µb.CompiledLineWriter.fingerprint;
var toLogDataInternal = function(categoryBits, tokenHash, filter) {
if ( filter === null ) { return undefined; }
var logData = filter.logData();
logData.compiled = filterFingerprinter([ categoryBits, tokenHash, logData.compiled ]);
if ( categoryBits & 0x001 ) {
logData.raw = '@@' + logData.raw;
var opts = [];
if ( categoryBits & 0x002 ) {
if ( categoryBits & 0x008 ) {
} else if ( categoryBits & 0x004 ) {
var type = categoryBits & 0x1F0;
if ( type !== 0 && type !== typeNameToTypeValue.data ) {
opts.push(typeValueToTypeName[type >>> 4]);
if ( logData.opts !== undefined ) {
if ( opts.length !== 0 ) {
logData.raw += '$' + opts.join(',');
return logData;
// First character of match must be within the hostname part of the url.
var isHnAnchored = function(url, matchStart) {
var hnStart = url.indexOf('://');
if ( hnStart === -1 ) { return false; }
hnStart += 3;
if ( matchStart <= hnStart ) { return true; }
if ( reURLPostHostnameAnchors.test(url.slice(hnStart, matchStart)) ) {
return false;
// https://github.com/gorhill/uBlock/issues/1929
// Match only hostname label boundaries.
return url.charCodeAt(matchStart - 1) === 0x2E;
var reURLPostHostnameAnchors = /[\/?#]/;
var arrayStrictEquals = function(a, b) {
var n = a.length;
if ( n !== b.length ) { return false; }
var isArray, x, y;
for ( var i = 0; i < n; i++ ) {
x = a[i]; y = b[i];
isArray = Array.isArray(x);
if ( isArray !== Array.isArray(y) ) { return false; }
if ( isArray === true ) {
if ( arrayStrictEquals(x, y) === false ) { return false; }
} else {
if ( x !== y ) { return false; }
return true;
Each filter class will register itself in the map. A filter class
id MUST always stringify to ONE single character.
IMPORTANT: any change which modifies the mapping will have to be
reflected with µBlock.systemSettings.compiledMagic.
var filterClasses = [],
filterClassIdGenerator = 0;
var registerFilterClass = function(ctor) {
var fid = filterClassIdGenerator++;
ctor.fid = ctor.prototype.fid = fid;
filterClasses[fid] = ctor;
//console.log(ctor.name, fid);
var filterFromCompiledData = function(args) {
//filterClassHistogram.set(fid, (filterClassHistogram.get(fid) || 0) + 1);
return filterClasses[args[0]].load(args);
//var filterClassHistogram = new Map();
var FilterTrue = function() {
FilterTrue.prototype.match = function() {
return true;
FilterTrue.prototype.logData = function() {
return {
raw: '*',
regex: '^',
compiled: this.compile(),
FilterTrue.prototype.compile = function() {
return [ this.fid ];
FilterTrue.compile = function() {
return [ FilterTrue.fid ];
FilterTrue.load = function() {
return new FilterTrue();
var FilterPlain = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
FilterPlain.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - this.tokenBeg);
FilterPlain.prototype.logData = function() {
return {
raw: this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
FilterPlain.prototype.compile = function() {
return [ this.fid, this.s, this.tokenBeg ];
FilterPlain.compile = function(details) {
return [ FilterPlain.fid, details.f, details.tokenBeg ];
FilterPlain.load = function(args) {
return new FilterPlain(args[1], args[2]);
var FilterPlainPrefix0 = function(s) {
this.s = s;
FilterPlainPrefix0.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg);
FilterPlainPrefix0.prototype.logData = function() {
return {
raw: this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
FilterPlainPrefix0.prototype.compile = function() {
return [ this.fid, this.s ];
FilterPlainPrefix0.compile = function(details) {
return [ FilterPlainPrefix0.fid, details.f ];
FilterPlainPrefix0.load = function(args) {
return new FilterPlainPrefix0(args[1]);
var FilterPlainPrefix1 = function(s) {
this.s = s;
FilterPlainPrefix1.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg - 1);
FilterPlainPrefix1.prototype.logData = function() {
return {
raw: this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
FilterPlainPrefix1.prototype.compile = function() {
return [ this.fid, this.s ];
FilterPlainPrefix1.compile = function(details) {
return [ FilterPlainPrefix1.fid, details.f ];
FilterPlainPrefix1.load = function(args) {
return new FilterPlainPrefix1(args[1]);
var FilterPlainHostname = function(s) {
this.s = s;
FilterPlainHostname.prototype.match = function() {
var haystack = requestHostnameRegister, needle = this.s;
if ( haystack.endsWith(needle) === false ) { return false; }
var offset = haystack.length - needle.length;
return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */;
FilterPlainHostname.prototype.logData = function() {
return {
raw: '||' + this.s + '^',
regex: rawToRegexStr(this.s + '^'),
compiled: this.compile()
FilterPlainHostname.prototype.compile = function() {
return [ this.fid, this.s ];
FilterPlainHostname.compile = function(details) {
return [ FilterPlainHostname.fid, details.f ];
FilterPlainHostname.load = function(args) {
return new FilterPlainHostname(args[1]);
var FilterPlainLeftAnchored = function(s) {
this.s = s;
FilterPlainLeftAnchored.prototype.match = function(url) {
return url.startsWith(this.s);
FilterPlainLeftAnchored.prototype.logData = function() {
return {
raw: '|' + this.s,
regex: rawToRegexStr(this.s, 0x2),
compiled: this.compile()
FilterPlainLeftAnchored.prototype.compile = function() {
return [ this.fid, this.s ];
FilterPlainLeftAnchored.compile = function(details) {
return [ FilterPlainLeftAnchored.fid, details.f ];
FilterPlainLeftAnchored.load = function(args) {
return new FilterPlainLeftAnchored(args[1]);
var FilterPlainRightAnchored = function(s) {
this.s = s;
FilterPlainRightAnchored.prototype.match = function(url) {
return url.endsWith(this.s);
FilterPlainRightAnchored.prototype.logData = function() {
return {
raw: this.s + '|',
regex: rawToRegexStr(this.s, 0x1),
compiled: this.compile()
FilterPlainRightAnchored.prototype.compile = function() {
return [ this.fid, this.s ];
FilterPlainRightAnchored.compile = function(details) {
return [ FilterPlainRightAnchored.fid, details.f ];
FilterPlainRightAnchored.load = function(args) {
return new FilterPlainRightAnchored(args[1]);
var FilterExactMatch = function(s) {
this.s = s;
FilterExactMatch.prototype.match = function(url) {
return url === this.s;
FilterExactMatch.prototype.logData = function() {
return {
raw: '|' + this.s + '|',
regex: rawToRegexStr(this.s, 0x3),
compiled: this.compile()
FilterExactMatch.prototype.compile = function() {
return [ this.fid, this.s ];
FilterExactMatch.compile = function(details) {
return [ FilterExactMatch.fid, details.f ];
FilterExactMatch.load = function(args) {
return new FilterExactMatch(args[1]);
var FilterPlainHnAnchored = function(s) {
this.s = s;
FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) {
return url.startsWith(this.s, tokenBeg) &&
isHnAnchored(url, tokenBeg);
FilterPlainHnAnchored.prototype.logData = function() {
return {
raw: '||' + this.s,
regex: rawToRegexStr(this.s),
compiled: this.compile()
FilterPlainHnAnchored.prototype.compile = function() {
return [ this.fid, this.s ];
FilterPlainHnAnchored.compile = function(details) {
return [ FilterPlainHnAnchored.fid, details.f ];
FilterPlainHnAnchored.load = function(args) {
return new FilterPlainHnAnchored(args[1]);
var FilterGeneric = function(s, anchor) {
this.s = s;
this.anchor = anchor;
FilterGeneric.prototype.re = null;
FilterGeneric.prototype.match = function(url) {
if ( this.re === null ) {
this.re = new RegExp(rawToRegexStr(this.s, this.anchor));
return this.re.test(url);
FilterGeneric.prototype.logData = function() {
var out = {
raw: this.s,
regex: this.re.source,
compiled: this.compile()
if ( this.anchor & 0x2 ) {
out.raw = '|' + out.raw;
if ( this.anchor & 0x1 ) {
out.raw += '|';
return out;
FilterGeneric.prototype.compile = function() {
return [ this.fid, this.s, this.anchor ];
FilterGeneric.compile = function(details) {
return [ FilterGeneric.fid, details.f, details.anchor ];
FilterGeneric.load = function(args) {
return new FilterGeneric(args[1], args[2]);
var FilterGenericHnAnchored = function(s) {
this.s = s;
FilterGenericHnAnchored.prototype.re = null;
FilterGenericHnAnchored.prototype.anchor = 0x4;
FilterGenericHnAnchored.prototype.match = function(url) {
if ( this.re === null ) {
this.re = new RegExp(rawToRegexStr(this.s, this.anchor));
return this.re.test(url);
FilterGenericHnAnchored.prototype.logData = function() {
var out = {
raw: '||' + this.s,
regex: rawToRegexStr(this.s, this.anchor & ~0x4),
compiled: this.compile()
return out;
FilterGenericHnAnchored.prototype.compile = function() {
return [ this.fid, this.s ];
FilterGenericHnAnchored.compile = function(details) {
return [ FilterGenericHnAnchored.fid, details.f ];
FilterGenericHnAnchored.load = function(args) {
return new FilterGenericHnAnchored(args[1]);
var FilterGenericHnAndRightAnchored = function(s) {
FilterGenericHnAnchored.call(this, s);
FilterGenericHnAndRightAnchored.prototype = Object.create(
constructor: {
value: FilterGenericHnAndRightAnchored
anchor: {
value: 0x5
logData: {
value: function() {
var out = FilterGenericHnAnchored.prototype.logData.call(this);
out.raw += '|';
return out;
compile: {
value: function() {
return [ this.fid, this.s ];
FilterGenericHnAndRightAnchored.compile = function(details) {
return [ FilterGenericHnAndRightAnchored.fid, details.f ];
FilterGenericHnAndRightAnchored.load = function(args) {
return new FilterGenericHnAndRightAnchored(args[1]);
var FilterRegex = function(s) {
this.re = s;
FilterRegex.prototype.match = function(url) {
if ( typeof this.re === 'string' ) {
this.re = new RegExp(this.re, 'i');
return this.re.test(url);
FilterRegex.prototype.logData = function() {
var s = typeof this.re === 'string' ? this.re : this.re.source;
return {
raw: '/' + s + '/',
regex: s,
compiled: this.compile()
FilterRegex.prototype.compile = function() {
return [
typeof this.re === 'string' ? this.re : this.re.source
FilterRegex.compile = function(details) {
return [ FilterRegex.fid, details.f ];
FilterRegex.load = function(args) {
return new FilterRegex(args[1]);
// Filtering according to the origin.
var FilterOrigin = function() {
FilterOrigin.prototype.wrapped = {
compile: function() {
return '';
logData: function() {
return {
compiled: ''
match: function() {
return true;
FilterOrigin.prototype.matchOrigin = function() {
return true;
FilterOrigin.prototype.match = function(url, tokenBeg) {
return this.matchOrigin() && this.wrapped.match(url, tokenBeg);
FilterOrigin.prototype.logData = function() {
var out = this.wrapped.logData(),
domainOpt = this.toDomainOpt();
out.compiled = [ this.fid, domainOpt, out.compiled ];
if ( out.opts === undefined ) {
out.opts = 'domain=' + domainOpt;
} else {
out.opts += ',domain=' + domainOpt;
return out;
FilterOrigin.prototype.compile = function() {
return [ this.fid, this.toDomainOpt(), this.wrapped.compile() ];
// *** start of specialized origin matchers
var FilterOriginHit = function(domainOpt) {
this.hostname = domainOpt;
FilterOriginHit.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginHit
toDomainOpt: {
value: function() {
return this.hostname;
matchOrigin: {
value: function() {
var needle = this.hostname, haystack = pageHostnameRegister;
if ( haystack.endsWith(needle) === false ) { return false; }
var offset = haystack.length - needle.length;
return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */;
var FilterOriginMiss = function(domainOpt) {
this.hostname = domainOpt.slice(1);
FilterOriginMiss.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginMiss
toDomainOpt: {
value: function() {
return '~' + this.hostname;
matchOrigin: {
value: function() {
var needle = this.hostname, haystack = pageHostnameRegister;
if ( haystack.endsWith(needle) === false ) { return true; }
var offset = haystack.length - needle.length;
return offset !== 0 && haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */;
var FilterOriginHitSet = function(domainOpt) {
this.domainOpt = domainOpt.length < 128
? domainOpt
: µb.stringDeduplicater.lookup(domainOpt);
FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginHitSet
oneOf: {
value: null,
writable: true
toDomainOpt: {
value: function() {
return this.domainOpt;
matchOrigin: {
value: function() {
if ( this.oneOf === null ) {
this.oneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt);
return this.oneOf.matches(pageHostnameRegister);
var FilterOriginMissSet = function(domainOpt) {
this.domainOpt = domainOpt.length < 128
? domainOpt
: µb.stringDeduplicater.lookup(domainOpt);
FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginMissSet
noneOf: {
value: null,
writable: true
toDomainOpt: {
value: function() {
return this.domainOpt;
matchOrigin: {
value: function() {
if ( this.noneOf === null ) {
this.noneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt.replace(/~/g, ''));
return this.noneOf.matches(pageHostnameRegister) === false;
var FilterOriginMixedSet = function(domainOpt) {
this.domainOpt = domainOpt.length < 128
? domainOpt
: µb.stringDeduplicater.lookup(domainOpt);
FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
constructor: {
value: FilterOriginMixedSet
oneOf: {
value: null,
writable: true
noneOf: {
value: null,
writable: true
init: {
value: function() {
var oneOf = [], noneOf = [],
hostnames = this.domainOpt.split('|'),
i = hostnames.length,
while ( i-- ) {
hostname = hostnames[i];
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
} else {
this.oneOf = HNTrieBuilder.fromIterable(oneOf);
this.noneOf = HNTrieBuilder.fromIterable(noneOf);
toDomainOpt: {
value: function() {
return this.domainOpt;
matchOrigin: {
value: function() {
if ( this.oneOf === null ) { this.init(); }
var needle = pageHostnameRegister;
return this.oneOf.matches(needle) &&
this.noneOf.matches(needle) === false;
// *** end of specialized origin matchers
// The optimal test function is picked according to the content of the
// `domain=` filter option.
// Re-factored in light of:
// - https://gorhill.github.io/obj-vs-set-vs-map/set-vs-regexp.html
// The re-factoring made possible to reuse instances of a matcher. As of
// writing, I observed that just with EasyList, there were ~1,200 reused
// instances out of ~2,800.
FilterOrigin.matcherFactory = function(domainOpt) {
// One hostname
if ( domainOpt.indexOf('|') === -1 ) {
if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) {
return new FilterOriginMiss(domainOpt);
return new FilterOriginHit(domainOpt);
// Many hostnames.
// Must be in set (none negated).
if ( domainOpt.indexOf('~') === -1 ) {
return new FilterOriginHitSet(domainOpt);
// Must not be in set (all negated).
if ( FilterOrigin.reAllNegated.test(domainOpt) ) {
return new FilterOriginMissSet(domainOpt);
// Must be in one set, but not in the other.
return new FilterOriginMixedSet(domainOpt);
FilterOrigin.reAllNegated = /^~(?:[^|~]+\|~)+[^|~]+$/;
FilterOrigin.compile = function(details) {
return [ FilterOrigin.fid, details.domainOpt ];
FilterOrigin.load = function(args) {
var f = FilterOrigin.matcherFactory(args[1]);
f.wrapped = filterFromCompiledData(args[2]);
return f;
var FilterDataHolder = function(dataType, dataStr) {
this.dataType = dataType;
this.dataStr = dataStr;
this.wrapped = undefined;
FilterDataHolder.prototype.match = function(url, tokenBeg) {
return this.wrapped.match(url, tokenBeg);
FilterDataHolder.prototype.logData = function() {
var out = this.wrapped.logData();
out.compiled = [ this.fid, this.dataType, this.dataStr, out.compiled ];
var opt = this.dataType;
if ( this.dataStr !== '' ) {
opt += '=' + this.dataStr;
if ( out.opts === undefined ) {
out.opts = opt;
} else {
out.opts = opt + ',' + out.opts;
return out;
FilterDataHolder.prototype.compile = function() {
return [ this.fid, this.dataType, this.dataStr, this.wrapped.compile() ];
FilterDataHolder.compile = function(details) {
return [ FilterDataHolder.fid, details.dataType, details.dataStr ];
FilterDataHolder.load = function(args) {
var f = new FilterDataHolder(args[1], args[2]);
f.wrapped = filterFromCompiledData(args[3]);
return f;
// Helper class for storing instances of FilterDataHolder.
var FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) {
this.categoryBits = categoryBits;
this.tokenHash = tokenHash;
this.filter = filterFromCompiledData(fdata);
this.next = undefined;
FilterDataHolderEntry.prototype.logData = function() {
return toLogDataInternal(this.categoryBits, this.tokenHash, this.filter);
FilterDataHolderEntry.prototype.compile = function() {
return [ this.categoryBits, this.tokenHash, this.filter.compile() ];
FilterDataHolderEntry.load = function(data) {
return new FilterDataHolderEntry(data[0], data[1], data[2]);
// Dictionary of hostnames
var FilterHostnameDict = function() {
this.h = ''; // short-lived register
this.dict = new Set();
Object.defineProperty(FilterHostnameDict.prototype, 'size', {
get: function() {
return this.dict.size;
FilterHostnameDict.prototype.add = function(hn) {
if ( this.dict.has(hn) === true ) { return false; }
return true;
FilterHostnameDict.prototype.remove = function(hn) {
return this.dict.delete(hn);
FilterHostnameDict.prototype.match = function() {
// TODO: mind IP addresses
var pos,
hostname = requestHostnameRegister;
while ( this.dict.has(hostname) === false ) {
pos = hostname.indexOf('.');
if ( pos === -1 ) {
this.h = '';
return false;
hostname = hostname.slice(pos + 1);
this.h = hostname;
return true;
FilterHostnameDict.prototype.logData = function() {
return {
raw: '||' + this.h + '^',
regex: rawToRegexStr(this.h) + '(?:[^%.0-9a-z_-]|$)',
compiled: this.h
FilterHostnameDict.prototype.compile = function() {
return [ this.fid, µb.arrayFrom(this.dict) ];
FilterHostnameDict.load = function(args) {
var f = new FilterHostnameDict();
f.dict = new Set(args[1]);
return f;
// Some buckets can grow quite large, and finding a hit in these buckets
// may end up being expensive. After considering various solutions, the one
// retained is to promote hit filters to a smaller index, so that next time
// they can be looked-up faster.
// key= 10000 ad count=660
// key= 10000 ads count=433
// key= 10001 google count=277
// key=1000000 2mdn count=267
// key= 10000 social count=240
// key= 10001 pagead2 count=166
// key= 10000 twitter count=122
// key= 10000 doubleclick count=118
// key= 10000 facebook count=114
// key= 10000 share count=113
// key= 10000 google count=106
// key= 10001 code count=103
// key= 11000 doubleclick count=100
// key=1010001 g count=100
// key= 10001 js count= 89
// key= 10000 adv count= 88
// key= 10000 youtube count= 61
// key= 10000 plugins count= 60
// key= 10001 partner count= 59
// key= 10000 ico count= 57
// key= 110001 ssl count= 57
// key= 10000 banner count= 53
// key= 10000 footer count= 51
// key= 10000 rss count= 51
var FilterPair = function(a, b) {
this.f1 = a;
this.f2 = b;
this.f = null;
Object.defineProperty(FilterPair.prototype, 'size', {
get: function() {
if ( this.f1 === undefined && this.f2 === undefined ) { return 0; }
if ( this.f1 === undefined || this.f2 === undefined ) { return 1; }
return 2;
FilterPair.prototype.remove = function(fdata) {
if ( arrayStrictEquals(this.f2.compile(), fdata) === true ) {
this.f2 = undefined;
if ( arrayStrictEquals(this.f1.compile(), fdata) === true ) {
this.f1 = this.f2;
FilterPair.prototype.match = function(url, tokenBeg) {
if ( this.f1.match(url, tokenBeg) === true ) {
this.f = this.f1;
return true;
if ( this.f2.match(url, tokenBeg) === true ) {
this.f = this.f2;
return true;
return false;
FilterPair.prototype.logData = function() {
return this.f.logData();
FilterPair.prototype.compile = function() {
return [ this.fid, this.f1.compile(), this.f2.compile() ];
FilterPair.prototype.upgrade = function(a) {
var bucket = new FilterBucket(this.f1, this.f2, a);
this.f1 = this.f2 = this.f = null;
FilterPair.available = this;
return bucket;
FilterPair.load = function(args) {
var f1 = filterFromCompiledData(args[1]),
f2 = filterFromCompiledData(args[2]),
pair = FilterPair.available;
if ( pair === null ) {
return new FilterPair(f1, f2);
FilterPair.available = null;
pair.f1 = f1;
pair.f2 = f2;
return pair;
FilterPair.available = null;
var FilterBucket = function(a, b, c) {
this.filters = [];
this.f = null;
if ( a !== undefined ) {
this.filters[0] = a;
this.filters[1] = b;
this.filters[2] = c;
Object.defineProperty(FilterBucket.prototype, 'size', {
get: function() {
return this.filters.length;
FilterBucket.prototype.promoted = 0;
FilterBucket.prototype.add = function(fdata) {
this.filters[this.filters.length] = filterFromCompiledData(fdata);
FilterBucket.prototype.remove = function(fdata) {
var i = this.filters.length,
while ( i-- ) {
filter = this.filters[i];
if ( arrayStrictEquals(filter.compile(), fdata) === true ) {
this.filters.splice(i, 1);
// Promote hit filters so they can be found faster next time.
FilterBucket.prototype.promote = function(i) {
var filters = this.filters,
pivot = filters.length >>> 1;
while ( i < pivot ) {
pivot >>>= 1;
if ( pivot < 16 ) { break; }
if ( i <= pivot ) { return; }
var j = this.promoted % pivot;
//console.debug('FilterBucket.promote(): promoted %d to %d', i, j);
var f = filters[j];
filters[j] = filters[i];
filters[i] = f;
this.promoted += 1;
FilterBucket.prototype.match = function(url, tokenBeg) {
var filters = this.filters;
for ( var i = 0, n = filters.length; i < n; i++ ) {
if ( filters[i].match(url, tokenBeg) === true ) {
this.f = filters[i];
if ( i >= 16 ) { this.promote(i); }
return true;
return false;
FilterBucket.prototype.logData = function() {
return this.f.logData();
FilterBucket.prototype.compile = function() {
var compiled = [],
filters = this.filters;
for ( var i = 0, n = filters.length; i < n; i++ ) {
compiled[i] = filters[i].compile();
return [ this.fid, compiled ];
FilterBucket.prototype.downgrade = function() {
return new FilterPair(this.filters[0], this.filters[1]);
FilterBucket.load = function(args) {
var bucket = new FilterBucket(),
compiledFilters = args[1],
filters = bucket.filters;
for ( var i = 0, n = compiledFilters.length; i < n; i++ ) {
filters[i] = filterFromCompiledData(compiledFilters[i]);
return bucket;
var FilterParser = function() {
this.cantWebsocket = vAPI.cantWebsocket;
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
this.reHostnameRule2 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]\^?$/i;
this.reCleanupHostnameRule2 = /\^$/g;
this.reCanTrimCarets1 = /^[^*]*$/;
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
this.reHasUppercase = /[A-Z]/;
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
this.reHasUnicode = /[^\x00-\x7F]/;
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
this.domainOpt = '';
this.noTokenHash = µb.urlTokenizer.tokenHashFromString('*');
this.unsupportedTypeBit = this.bitFromType('unsupported');
// All network request types to bitmap
// bring origin to 0 (from 4 -- see typeNameToTypeValue)
// left-shift 1 by the above-calculated value
// subtract 1 to set all type bits
this.allNetRequestTypeBits = (1 << (otherTypeBitValue >>> 4)) - 1;
// https://github.com/gorhill/uBlock/issues/1493
// Transpose `ping` into `other` for now.
FilterParser.prototype.toNormalizedType = {
'beacon': 'other',
'css': 'stylesheet',
'data': 'data',
'document': 'main_frame',
'elemhide': 'generichide',
'font': 'font',
'frame': 'sub_frame',
'genericblock': 'unsupported',
'generichide': 'generichide',
'image': 'image',
'inline-font': 'inline-font',
'inline-script': 'inline-script',
'media': 'media',
'object': 'object',
'object-subrequest': 'object',
'other': 'other',
'ping': 'other',
'popunder': 'popunder',
'popup': 'popup',
'script': 'script',
'stylesheet': 'stylesheet',
'subdocument': 'sub_frame',
'xhr': 'xmlhttprequest',
'xmlhttprequest': 'xmlhttprequest',
'webrtc': 'unsupported',
'websocket': 'websocket'
FilterParser.prototype.reset = function() {
this.action = BlockAction;
this.anchor = 0;
this.badFilter = 0;
this.dataType = undefined;
this.dataStr = undefined;
this.elemHiding = false;
this.f = '';
this.firstParty = false;
this.thirdParty = false;
this.party = AnyParty;
this.fopts = '';
this.hostnamePure = false;
this.domainOpt = '';
this.isRegex = false;
this.raw = '';
this.redirect = false;
this.token = '*';
this.tokenHash = this.noTokenHash;
this.tokenBeg = 0;
this.types = 0;
this.important = 0;
this.unsupported = false;
return this;
FilterParser.prototype.bitFromType = function(type) {
return 1 << ((typeNameToTypeValue[type] >>> 4) - 1);
// https://github.com/chrisaljoudi/uBlock/issues/589
// Be ready to handle multiple negated types
FilterParser.prototype.parseTypeOption = function(raw, not) {
var typeBit = this.bitFromType(this.toNormalizedType[raw]);
if ( !not ) {
this.types |= typeBit;
// Non-discrete network types can't be negated.
if ( (typeBit & this.allNetRequestTypeBits) === 0 ) {
// Negated type: set all valid network request type bits to 1
if (
(typeBit & this.allNetRequestTypeBits) !== 0 &&
(this.types & this.allNetRequestTypeBits) === 0
) {
this.types |= this.allNetRequestTypeBits;
this.types &= ~typeBit;
FilterParser.prototype.parsePartyOption = function(firstParty, not) {
if ( firstParty ) {
not = !not;
if ( not ) {
this.firstParty = true;
this.party = this.thirdParty ? AnyParty : FirstParty;
} else {
this.thirdParty = true;
this.party = this.firstParty ? AnyParty : ThirdParty;
FilterParser.prototype.parseDomainOption = function(s) {
if ( this.reHasUnicode.test(s) ) {
var hostnames = s.split('|'),
i = hostnames.length;
while ( i-- ) {
if ( this.reHasUnicode.test(hostnames[i]) ) {
hostnames[i] = punycode.toASCII(hostnames[i]);
s = hostnames.join('|');
if ( this.reBadDomainOptChars.test(s) ) {
return '';
return s;
FilterParser.prototype.parseOptions = function(s) {
this.fopts = s;
var opts = s.split(',');
var opt, not;
for ( var i = 0; i < opts.length; i++ ) {
opt = opts[i];
not = opt.startsWith('~');
if ( not ) {
opt = opt.slice(1);
if ( opt === 'third-party' || opt === '3p' ) {
this.parsePartyOption(false, not);
// https://issues.adblockplus.org/ticket/616
// `generichide` concept already supported, just a matter of
// adding support for the new keyword.
if ( opt === 'elemhide' || opt === 'generichide' ) {
if ( not === false ) {
this.parseTypeOption('generichide', false);
this.unsupported = true;
// Test before handling all other types.
if ( opt.startsWith('redirect=') ) {
if ( this.action === BlockAction ) {
this.redirect = true;
this.unsupported = true;
if ( this.toNormalizedType.hasOwnProperty(opt) ) {
this.parseTypeOption(opt, not);
// https://github.com/gorhill/uBlock/issues/2294
// Detect and discard filter if domain option contains nonsensical
// characters.
if ( opt.startsWith('domain=') ) {
this.domainOpt = this.parseDomainOption(opt.slice(7));
if ( this.domainOpt === '' ) {
this.unsupported = true;
if ( opt === 'important' ) {
this.important = Important;
if ( opt === 'first-party' || opt === '1p' ) {
this.parsePartyOption(true, not);
if ( opt.startsWith('csp=') ) {
if ( opt.length > 4 && this.reBadCSP.test(opt) === false ) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.dataStr = opt.slice(4).trim();
if ( opt === 'csp' && this.action === AllowAction ) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.dataStr = '';
// Used by Adguard, purpose is unclear -- just ignore for now.
if ( opt === 'empty' ) {
// https://github.com/uBlockOrigin/uAssets/issues/192
if ( opt === 'badfilter' ) {
this.badFilter = BadFilter;
// Unrecognized filter option: ignore whole filter.
this.unsupported = true;
// https://github.com/gorhill/uBlock/issues/1943#issuecomment-243188946
// Convert websocket-related filter where possible to a format which
// can be handled using CSP injection.
FilterParser.prototype.translate = function() {
var dataTypeBit = this.bitFromType('data');
if ( this.cantWebsocket && this.reWebsocketAny.test(this.f) ) {
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "connect-src https: http:";
// https://bugs.chromium.org/p/chromium/issues/detail?id=669086
// TODO: remove when most users are beyond Chromium v56
if ( vAPI.chromiumVersion < 57 ) {
this.dataStr += '; frame-src *';
// Broad |data:-based filters.
if ( this.f === 'data:' ) {
switch ( this.types ) {
case 0:
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "default-src 'self' * blob: 'unsafe-inline' 'unsafe-eval'";
case this.bitFromType('script'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "script-src 'self' * blob: 'unsafe-inline' 'unsafe-eval'";
case this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * blob:";
case this.bitFromType('script') | this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * blob:; script-src 'self' * blob: 'unsafe-inline' 'unsafe-eval';";
// Broad |blob:-based filters.
if ( this.f === 'blob:' ) {
switch ( this.types ) {
case 0:
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "default-src 'self' * data: 'unsafe-inline' 'unsafe-eval'";
case this.bitFromType('script'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "script-src 'self' * data: 'unsafe-inline' 'unsafe-eval'";
case this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * data:";
case this.bitFromType('script') | this.bitFromType('sub_frame'):
this.f = '*';
this.types = dataTypeBit;
this.dataType = 'csp';
this.dataStr = "frame-src 'self' * data:; script-src 'self' * data: 'unsafe-inline' 'unsafe-eval';";
anchor: bit vector
0000 (0x0): no anchoring
0001 (0x1): anchored to the end of the URL.
0010 (0x2): anchored to the start of the URL.
0011 (0x3): anchored to the start and end of the URL.
0100 (0x4): anchored to the hostname of the URL.
0101 (0x5): anchored to the hostname and end of the URL.
FilterParser.prototype.parse = function(raw) {
// important!
var s = this.raw = raw;
// plain hostname? (from HOSTS file)
if ( this.reHostnameRule1.test(s) ) {
this.f = s;
this.hostnamePure = true;
this.anchor |= 0x4;
return this;
// element hiding filter?
var pos = s.indexOf('#');
if ( pos !== -1 ) {
var c = s.charAt(pos + 1);
if ( c === '#' || c === '@' ) {
console.error('static-net-filtering.js > unexpected cosmetic filters');
this.elemHiding = true;
return this;
// block or allow filter?
// Important: this must be executed before parsing options
if ( s.startsWith('@@') ) {
this.action = AllowAction;
s = s.slice(2);
// options
// https://github.com/gorhill/uBlock/issues/842
// - ensure sure we are not dealing with a regex-based filter.
// - lookup the last occurrence of `$`.
if ( s.startsWith('/') === false || s.endsWith('/') === false ) {
pos = s.lastIndexOf('$');
if ( pos !== -1 ) {
// https://github.com/gorhill/uBlock/issues/952
// Discard Adguard-specific `$$` filters.
if ( s.indexOf('$$') !== -1 ) {
this.unsupported = true;
return this;
this.parseOptions(s.slice(pos + 1));
// https://github.com/gorhill/uBlock/issues/2283
// Abort if type is only for unsupported types, otherwise
// toggle off `unsupported` bit.
if ( this.types & this.unsupportedTypeBit ) {
this.types &= ~this.unsupportedTypeBit;
if ( this.types === 0 ) {
this.unsupported = true;
return this;
s = s.slice(0, pos);
// regex?
if ( s.startsWith('/') && s.endsWith('/') && s.length > 2 ) {
this.isRegex = true;
this.f = s.slice(1, -1);
// https://github.com/gorhill/uBlock/issues/1246
// If the filter is valid, use the corrected version of the source
// string -- this ensure reverse-lookup will work fine.
this.f = normalizeRegexSource(this.f);
if ( this.f === '' ) {
"uBlock Origin> discarding bad regular expression-based network filter '%s': '%s'",
this.unsupported = true;
return this;
// hostname-anchored
if ( s.startsWith('||') ) {
this.anchor |= 0x4;
s = s.slice(2);
// convert hostname to punycode if needed
// https://github.com/gorhill/uBlock/issues/2599
if ( this.reHasUnicode.test(s) ) {
var matches = this.reIsolateHostname.exec(s);
if ( matches ) {
s = (matches[1] !== undefined ? matches[1] : '') +
punycode.toASCII(matches[2]) +
//console.debug('µBlock.staticNetFilteringEngine/FilterParser.parse():', raw, '=', s);
// https://github.com/chrisaljoudi/uBlock/issues/1096
if ( s.startsWith('^') ) {
this.unsupported = true;
return this;
// plain hostname? (from ABP filter list)
// https://github.com/gorhill/uBlock/issues/1757
// A filter can't be a pure-hostname one if there is a domain or csp
// option present.
if ( this.reHostnameRule2.test(s) ) {
this.f = s.replace(this.reCleanupHostnameRule2, '');
this.hostnamePure = true;
return this;
// left-anchored
else if ( s.startsWith('|') ) {
this.anchor |= 0x2;
s = s.slice(1);
// right-anchored
if ( s.endsWith('|') ) {
this.anchor |= 0x1;
s = s.slice(0, -1);
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
// remove pointless leading *.
// https://github.com/gorhill/uBlock/issues/3034
// - We can remove anchoring if we need to match all at the start.
if ( s.startsWith('*') ) {
s = s.replace(/^\*+([^%0-9a-z])/i, '$1');
this.anchor &= ~0x6;
// remove pointless trailing *
// https://github.com/gorhill/uBlock/issues/3034
// - We can remove anchoring if we need to match all at the end.
if ( s.endsWith('*') ) {
s = s.replace(/([^%0-9a-z])\*+$/i, '$1');
this.anchor &= ~0x1;
// nothing left?
if ( s === '' ) {
s = '*';
// https://github.com/gorhill/uBlock/issues/1047
// Hostname-anchored makes no sense if matching all requests.
if ( s === '*' ) {
this.anchor = 0;
// This might look weird but we gain memory footprint by not going through
// toLowerCase(), at least on Chromium. Because copy-on-write?
this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s;
// Convenience:
// Convert special broad filters for non-webRequest aware types into
// `csp` filters wherever possible.
if ( this.anchor & 0x2 && this.party === 0 ) {
return this;
// Given a string, find a good token. Tokens which are too generic, i.e. very
// common with a high probability of ending up as a miss, are not
// good. Avoid if possible. This has a *significant* positive impact on
// performance.
// These "bad tokens" are collated manually.
// Hostname-anchored with no wildcard always have a token index of 0.
var reHostnameToken = /^[0-9a-z]+/;
var reGoodToken = /[%0-9a-z]{2,}/g;
var reRegexToken = /[%0-9A-Za-z]{2,}/g;
var reRegexTokenAbort = /[([]/;
var reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/;
var reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*]|$)/;
var badTokens = new Set([
FilterParser.prototype.findFirstGoodToken = function() {
reGoodToken.lastIndex = 0;
var s = this.f,
matches, lpos,
badTokenMatch = null;
while ( (matches = reGoodToken.exec(s)) !== null ) {
// https://github.com/gorhill/uBlock/issues/997
// Ignore token if preceded by wildcard.
lpos = matches.index;
if ( lpos !== 0 && s.charCodeAt(lpos - 1) === 0x2A /* '*' */ ) {
if ( s.charCodeAt(reGoodToken.lastIndex) === 0x2A /* '*' */ ) {
if ( badTokens.has(matches[0]) ) {
if ( badTokenMatch === null ) {
badTokenMatch = matches;
return matches;
return badTokenMatch;
FilterParser.prototype.extractTokenFromRegex = function() {
reRegexToken.lastIndex = 0;
var s = this.f,
matches, prefix;
while ( (matches = reRegexToken.exec(s)) !== null ) {
prefix = s.slice(0, matches.index);
if ( reRegexTokenAbort.test(prefix) ) { return; }
if (
reRegexBadPrefix.test(prefix) ||
) {
this.token = matches[0].toLowerCase();
this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = matches.index;
if ( badTokens.has(this.token) === false ) { break; }
// https://github.com/chrisaljoudi/uBlock/issues/1038
// Single asterisk will match any URL.
// https://github.com/gorhill/uBlock/issues/2781
// For efficiency purpose, try to extract a token from a regex-based filter.
FilterParser.prototype.makeToken = function() {
if ( this.isRegex ) {
if ( this.f === '*' ) { return; }
var matches = null;
if ( (this.anchor & 0x4) !== 0 && this.f.indexOf('*') === -1 ) {
matches = reHostnameToken.exec(this.f);
if ( matches === null ) {
matches = this.findFirstGoodToken();
if ( matches !== null ) {
this.token = matches[0];
this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = matches.index;
var FilterContainer = function() {
this.reIsGeneric = /[\^\*]/;
this.filterParser = new FilterParser();
this.urlTokenizer = µb.urlTokenizer;
this.noTokenHash = this.urlTokenizer.tokenHashFromString('*');
this.dotTokenHash = this.urlTokenizer.tokenHashFromString('.');
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
this.frozen = false;
this.processedFilterCount = 0;
this.acceptedCount = 0;
this.rejectedCount = 0;
this.allowFilterCount = 0;
this.blockFilterCount = 0;
this.discardedCount = 0;
this.badFilters = new Set();
this.duplicateBuster = new Set();
this.categories = new Map();
this.dataFilters = new Map();
// Reuse filter instances whenever possible at load time.
this.fclassLast = null;
this.fdataLast = null;
this.filterLast = null;
// Runtime registers
this.cbRegister = undefined;
this.thRegister = undefined;
this.fRegister = null;
FilterContainer.prototype.freeze = function() {
histogram('allFilters', this.categories);
this.duplicateBuster = new Set();
this.fclassLast = null;
this.fdataLast = null;
this.filterLast = null;
this.frozen = true;
//this.tokenHistogram = new Map(µb.arrayFrom(this.tokenHistogram).sort(function(a, b) {
// return a[0].localeCompare(b[0]) || (b[1] - a[1]);
FilterContainer.prototype.toSelfie = function() {
var categoriesToSelfie = function(categoryMap) {
var categoryEntries = [];
for ( var categoryEntry of categoryMap ) {
var tokenEntries = [];
for ( var tokenEntry of categoryEntry[1] ) {
tokenEntries.push([ tokenEntry[0], tokenEntry[1].compile() ]);
categoryEntries.push([ categoryEntry[0], tokenEntries ]);
return JSON.stringify(categoryEntries);
var dataFiltersToSelfie = function(dataFilters) {
var selfie = [];
for ( var entry of dataFilters.values() ) {
do {
entry = entry.next;
} while ( entry !== undefined );
return JSON.stringify(selfie);
return {
processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount,
rejectedCount: this.rejectedCount,
allowFilterCount: this.allowFilterCount,
blockFilterCount: this.blockFilterCount,
discardedCount: this.discardedCount,
categories: categoriesToSelfie(this.categories),
dataFilters: dataFiltersToSelfie(this.dataFilters)
FilterContainer.prototype.fromSelfie = function(selfie) {
this.frozen = true;
this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount;
this.rejectedCount = selfie.rejectedCount;
this.allowFilterCount = selfie.allowFilterCount;
this.blockFilterCount = selfie.blockFilterCount;
this.discardedCount = selfie.discardedCount;
var entries;
var categoryMap = new Map();
entries = JSON.parse(selfie.categories);
for ( var i = 0, ni = entries.length; i < ni; i++ ) {
var categoryEntry = entries[i],
tokenMap = new Map();
var tokenEntries = categoryEntry[1];
for ( var j = 0, nj = tokenEntries.length; j < nj; j++ ) {
var tokenEntry = tokenEntries[j];
tokenMap.set(tokenEntry[0], filterFromCompiledData(tokenEntry[1]));
categoryMap.set(categoryEntry[0], tokenMap);
this.categories = categoryMap;
entries = JSON.parse(selfie.dataFilters);
var entry, bucket;
i = entries.length;
while ( i-- ) {
entry = FilterDataHolderEntry.load(entries[i]);
bucket = this.dataFilters.get(entry.tokenHash);
if ( bucket !== undefined ) {
entry.next = bucket;
this.dataFilters.set(entry.tokenHash, entry);
FilterContainer.prototype.compile = function(raw, writer) {
// Ignore empty lines
var s = raw.trim();
if ( s.length === 0 ) {
return false;
var parsed = this.filterParser.parse(s);
// Ignore element-hiding filters
if ( parsed.elemHiding ) {
return false;
// Ignore filters with unsupported options
if ( parsed.unsupported ) {
µb.logger.writeOne('', 'error', 'Network filtering invalid filter: ' + raw);
return false;
// 0 = network filters
// Pure hostnames, use more efficient dictionary lookup
// https://github.com/chrisaljoudi/uBlock/issues/665
// Create a dict keyed on request type etc.
if (
parsed.hostnamePure &&
parsed.domainOpt === '' &&
parsed.dataType === undefined &&
this.compileHostnameOnlyFilter(parsed, writer)
) {
return true;
var fdata;
if ( parsed.isRegex ) {
fdata = FilterRegex.compile(parsed);
} else if ( parsed.hostnamePure ) {
fdata = FilterPlainHostname.compile(parsed);
} else if ( parsed.f === '*' ) {
fdata = FilterTrue.compile();
} else if ( parsed.anchor === 0x5 ) {
// https://github.com/gorhill/uBlock/issues/1669
fdata = FilterGenericHnAndRightAnchored.compile(parsed);
} else if ( parsed.anchor === 0x4 ) {
if (
this.reIsGeneric.test(parsed.f) === false &&
parsed.tokenHash !== parsed.noTokenHash &&
parsed.tokenBeg === 0
) {
fdata = FilterPlainHnAnchored.compile(parsed);
} else {
fdata = FilterGenericHnAnchored.compile(parsed);
} else if (
this.reIsGeneric.test(parsed.f) ||
parsed.tokenHash === parsed.noTokenHash
) {
fdata = FilterGeneric.compile(parsed);
} else if ( parsed.anchor === 0x2 ) {
fdata = FilterPlainLeftAnchored.compile(parsed);
} else if ( parsed.anchor === 0x1 ) {
fdata = FilterPlainRightAnchored.compile(parsed);
} else if ( parsed.anchor === 0x3 ) {
fdata = FilterExactMatch.compile(parsed);
} else if ( parsed.tokenBeg === 0 ) {
fdata = FilterPlainPrefix0.compile(parsed);
} else if ( parsed.tokenBeg === 1 ) {
fdata = FilterPlainPrefix1.compile(parsed);
} else {
fdata = FilterPlain.compile(parsed);
var fwrapped;
if ( parsed.domainOpt !== '' ) {
fwrapped = fdata;
fdata = FilterOrigin.compile(parsed);
if ( parsed.dataType !== undefined ) {
fwrapped = fdata;
fdata = FilterDataHolder.compile(parsed);
this.compileToAtomicFilter(fdata, parsed, writer);
return true;
// Using fast/compact dictionary when filter is a pure hostname.
FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, writer) {
// Can't fit the filter in a pure hostname dictionary.
// https://github.com/gorhill/uBlock/issues/1757
// This should no longer happen with fix to above issue.
//if ( parsed.domainOpt.length !== 0 ) {
// return;
var descBits = parsed.action | parsed.important | parsed.party | parsed.badFilter;
var type = parsed.types;
if ( type === 0 ) {
writer.push([ descBits, this.dotTokenHash, parsed.f ]);
return true;
var bitOffset = 1;
do {
if ( type & 1 ) {
writer.push([ descBits | (bitOffset << 4), this.dotTokenHash, parsed.f ]);
bitOffset += 1;
type >>>= 1;
} while ( type !== 0 );
return true;
FilterContainer.prototype.compileToAtomicFilter = function(fdata, parsed, writer) {
var descBits = parsed.action | parsed.important | parsed.party | parsed.badFilter,
type = parsed.types;
if ( type === 0 ) {
writer.push([ descBits, parsed.tokenHash, fdata ]);
var bitOffset = 1;
do {
if ( type & 1 ) {
writer.push([ descBits | (bitOffset << 4), parsed.tokenHash, fdata ]);
bitOffset += 1;
type >>>= 1;
} while ( type !== 0 );
// Only static filter with an explicit type can be redirected. If we reach
// this point, it's because there is one or more explicit type.
if ( !parsed.redirect ) {
if ( parsed.badFilter ) {
var redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
if ( Array.isArray(redirects) === false ) {
descBits = typeNameToTypeValue.redirect;
var i = redirects.length;
while ( i-- ) {
writer.push([ descBits, redirects[i] ]);
FilterContainer.prototype.fromCompiledContent = function(reader) {
var badFilterBit = BadFilter,
filterPairId = FilterPair.fid,
filterBucketId = FilterBucket.fid,
filterDataHolderId = FilterDataHolder.fid,
redirectTypeValue = typeNameToTypeValue.redirect,
args, bits, bucket, entry,
tokenHash, fdata, fingerprint;
// 0 = network filters
while ( reader.next() === true ) {
args = reader.args();
bits = args[0];
if ( (bits & badFilterBit) !== 0 ) {
// Special cases: delegate to more specialized engines.
// Redirect engine.
if ( (bits & 0x1F0) === redirectTypeValue ) {
this.acceptedCount += 1;
// Plain static filters.
fingerprint = reader.fingerprint();
tokenHash = args[1];
fdata = args[2];
// Special treatment: data-holding filters are stored separately
// because they require special matching algorithm (unlike other
// filters, ALL hits must be reported).
if ( fdata[0] === filterDataHolderId ) {
if ( this.duplicateBuster.has(fingerprint) ) {
this.discardedCount += 1;
entry = new FilterDataHolderEntry(bits, tokenHash, fdata);
bucket = this.dataFilters.get(tokenHash);
if ( bucket !== undefined ) {
entry.next = bucket;
this.dataFilters.set(tokenHash, entry);
bucket = this.categories.get(bits);
if ( bucket === undefined ) {
bucket = new Map();
this.categories.set(bits, bucket);
entry = bucket.get(tokenHash);
if ( tokenHash === this.dotTokenHash ) {
if ( entry === undefined ) {
entry = new FilterHostnameDict();
bucket.set(this.dotTokenHash, entry);
if ( entry.add(fdata) === false ) {
this.discardedCount += 1;
if ( this.duplicateBuster.has(fingerprint) ) {
this.discardedCount += 1;
if ( entry === undefined ) {
bucket.set(tokenHash, filterFromCompiledData(fdata));
if ( entry.fid === filterBucketId ) {
if ( entry.fid === filterPairId ) {
new FilterPair(entry, filterFromCompiledData(fdata))
FilterContainer.prototype.removeBadFilters = function() {
var filterPairId = FilterPair.fid,
filterBucketId = FilterBucket.fid,
filterHostnameDictId = FilterHostnameDict.fid,
bits, tokenHash, fdata, bucket, entry;
for ( var args of this.badFilters ) {
bits = args[0] & ~BadFilter;
bucket = this.categories.get(bits);
if ( bucket === undefined ) { continue; }
tokenHash = args[1];
entry = bucket.get(tokenHash);
if ( entry === undefined ) { continue; }
fdata = args[2];
if ( entry.fid === filterPairId ) {
if ( entry.size === 1 ) {
bucket.set(tokenHash, entry.f1);
if ( entry.fid === filterBucketId ) {
if ( entry.size === 2 ) {
bucket.set(tokenHash, entry.downgrade());
if ( entry.fid === filterHostnameDictId ) {
if ( entry.size === 0 ) {
if ( bucket.size === 0 ) {
if ( arrayStrictEquals(entry.compile(), fdata) === true ) {
if ( bucket.size === 0 ) {
FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) {
if ( this.dataFilters.length === 0 ) { return; }
var url = this.urlTokenizer.setURL(requestURL);
requestHostnameRegister = µb.URI.hostnameFromURI(url);
// We need to visit ALL the matching filters.
var toAddImportant = new Map(),
toAdd = new Map(),
toRemove = new Map();
var entry, f,
tokenHashes = this.urlTokenizer.getTokens(),
tokenHash, tokenOffset,
i = 0;
while ( i < 32 ) {
tokenHash = tokenHashes[i++];
if ( tokenHash === 0 ) { break; }
tokenOffset = tokenHashes[i++];
entry = this.dataFilters.get(tokenHash);
while ( entry !== undefined ) {
f = entry.filter;
if ( f.match(url, tokenOffset) === true ) {
if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry);
} else if ( entry.categoryBits & 0x002 ) {
toAddImportant.set(f.dataStr, entry);
} else {
toAdd.set(f.dataStr, entry);
entry = entry.next;
entry = this.dataFilters.get(this.noTokenHash);
while ( entry !== undefined ) {
f = entry.filter;
if ( f.match(url) === true ) {
if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry);
} else if ( entry.categoryBits & 0x002 ) {
toAddImportant.set(f.dataStr, entry);
} else {
toAdd.set(f.dataStr, entry);
entry = entry.next;
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; }
// Remove entries overriden by other filters.
var key;
for ( key of toAddImportant.keys() ) {
for ( key of toRemove.keys() ) {
if ( key === '' ) {
var logData;
for ( entry of toAddImportant ) {
if ( outlog === undefined ) { continue; }
logData = entry[1].logData();
logData.source = 'static';
logData.result = 1;
for ( entry of toAdd ) {
if ( outlog === undefined ) { continue; }
logData = entry[1].logData();
logData.source = 'static';
logData.result = 1;
if ( outlog !== undefined ) {
for ( entry of toRemove.values()) {
logData = entry.logData();
logData.source = 'static';
logData.result = 2;
// bucket: Map
// url: string
FilterContainer.prototype.matchTokens = function(bucket, url) {
// Hostname-only filters
var f = bucket.get(this.dotTokenHash);
if ( f !== undefined && f.match() === true ) {
this.thRegister = this.dotTokenHash;
this.fRegister = f;
return true;
var tokenHashes = this.urlTokenizer.getTokens(),
tokenHash, tokenOffset,
i = 0;
for (;;) {
tokenHash = tokenHashes[i++];
if ( tokenHash === 0 ) { break; }
tokenOffset = tokenHashes[i++];
f = bucket.get(tokenHash);
if ( f !== undefined && f.match(url, tokenOffset) === true ) {
this.thRegister = tokenHash;
this.fRegister = f;
return true;
// Untokenizable filters
f = bucket.get(this.noTokenHash);
if ( f !== undefined && f.match(url) === true ) {
this.thRegister = this.noTokenHash;
this.fRegister = f;
return true;
return false;
// Specialized handlers
// https://github.com/gorhill/uBlock/issues/1477
// Special case: blocking-generichide filter ALWAYS exists, it is implicit --
// thus we always first check for exception filters, then for important block
// filter if and only if there was a hit on an exception filter.
// https://github.com/gorhill/uBlock/issues/2103
// User may want to override `generichide` exception filters.
FilterContainer.prototype.matchStringGenericHide = function(context, requestURL) {
var url = this.urlTokenizer.setURL(requestURL);
// https://github.com/gorhill/uBlock/issues/2225
// Important: this is used by FilterHostnameDict.match().
requestHostnameRegister = µb.URI.hostnameFromURI(url);
var bucket = this.categories.get(genericHideException);
if ( !bucket || this.matchTokens(bucket, url) === false ) {
this.fRegister = null;
return 0;
bucket = this.categories.get(genericHideImportant);
if ( bucket && this.matchTokens(bucket, url) ) {
this.cbRegister = genericHideImportant;
return 1;
this.cbRegister = genericHideException;
return 2;
// https://github.com/chrisaljoudi/uBlock/issues/116
// Some type of requests are exceptional, they need custom handling,
// not the generic handling.
FilterContainer.prototype.matchStringExactType = function(context, requestURL, requestType) {
// Special cases.
if ( requestType === 'generichide' ) {
return this.matchStringGenericHide(context, requestURL);
var type = typeNameToTypeValue[requestType];
if ( type === undefined ) {
return 0;
// Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(requestURL);
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = µb.URI.hostnameFromURI(url);
var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty,
categories = this.categories,
catBits, bucket;
this.fRegister = null;
// https://github.com/chrisaljoudi/uBlock/issues/139
// Test against important block filters
catBits = BlockAnyParty | Important | type;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 1;
catBits = BlockAction | Important | type | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 1;
// Test against block filters
catBits = BlockAnyParty | type;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
if ( this.fRegister === null ) {
catBits = BlockAction | type | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
// If there is no block filter, no need to test against allow filters
if ( this.fRegister === null ) {
return 0;
// Test against allow filters
catBits = AllowAnyParty | type;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 2;
catBits = AllowAction | type | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 2;
return 1;
FilterContainer.prototype.matchString = function(context) {
// https://github.com/chrisaljoudi/uBlock/issues/519
// Use exact type match for anything beyond `other`
// Also, be prepared to support unknown types
var type = typeNameToTypeValue[context.requestType];
if ( type === undefined ) {
type = otherTypeBitValue;
} else if ( type === 0 || type > otherTypeBitValue ) {
return this.matchStringExactType(context, context.requestURL, context.requestType);
// The logic here is simple:
// block = !whitelisted && blacklisted
// or equivalent
// allow = whitelisted || !blacklisted
// Statistically, hits on a URL in order of likelihood:
// 1. No hit
// 2. Hit on a block filter
// 3. Hit on an allow filter
// High likelihood of "no hit" means to optimize we need to reduce as much
// as possible the number of filters to test.
// Then, because of the order of probabilities, we should test only
// block filters first, and test allow filters if and only if there is a
// hit on a block filter. Since there is a high likelihood of no hit,
// testing allow filter by default is likely wasted work, hence allow
// filters are tested *only* if there is a (unlikely) hit on a block
// filter.
// Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(context.requestURL);
// These registers will be used by various filters
pageHostnameRegister = context.pageHostname || '';
requestHostnameRegister = context.requestHostname;
this.fRegister = null;
var party = isFirstParty(context.pageDomain, context.requestHostname)
? FirstParty
: ThirdParty;
var categories = this.categories,
catBits, bucket;
// https://github.com/chrisaljoudi/uBlock/issues/139
// Test against important block filters.
// The purpose of the `important` option is to reverse the order of
// evaluation. Normally, it is "evaluate block then evaluate allow", with
// the `important` property it is "evaluate allow then evaluate block".
catBits = BlockAnyTypeAnyParty | Important;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 1;
catBits = BlockAnyType | Important | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 1;
catBits = BlockAnyParty | Important | type;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 1;
catBits = BlockAction | Important | type | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 1;
// Test against block filters
catBits = BlockAnyTypeAnyParty;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
if ( this.fRegister === null ) {
catBits = BlockAnyType | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
if ( this.fRegister === null ) {
catBits = BlockAnyParty | type;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
if ( this.fRegister === null ) {
catBits = BlockAction | type | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
// If there is no block filter, no need to test against allow filters
if ( this.fRegister === null ) {
return 0;
// Test against allow filters
catBits = AllowAnyTypeAnyParty;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 2;
catBits = AllowAnyType | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 2;
catBits = AllowAnyParty | type;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 2;
catBits = AllowAction | type | party;
if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) {
this.cbRegister = catBits;
return 2;
return 1;
FilterContainer.prototype.toLogData = function() {
if ( this.fRegister === null ) { return; }
var logData = toLogDataInternal(this.cbRegister, this.thRegister, this.fRegister);
logData.source = 'static';
logData.tokenHash = this.thRegister;
logData.result = this.fRegister === null ? 0 : (this.cbRegister & 1 ? 2 : 1);
return logData;
FilterContainer.prototype.getFilterCount = function() {
return this.acceptedCount - this.discardedCount;
return new FilterContainer();