uBlock/src/js/static-net-filtering.js

3756 lines
110 KiB
JavaScript

/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2014-present Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* jshint bitwise: false */
/* global punycode */
'use strict';
/******************************************************************************/
µBlock.staticNetFilteringEngine = (( ) => {
/******************************************************************************/
const µb = µBlock;
const urlTokenizer = µb.urlTokenizer;
// fedcba9876543210
// | | |||
// | | |||
// | | |||
// | | |||
// | | ||+---- bit 0: [BlockAction | AllowAction]
// | | |+----- bit 1: `important`
// | | +------ bit 2- 3: party [0 - 3]
// | +-------- bit 4- 8: type [0 - 31]
// +------------- bit 9-15: unused
const BlockAction = 0 << 0;
const AllowAction = 1 << 0;
const Important = 1 << 1;
const AnyParty = 0 << 2;
const FirstParty = 1 << 2;
const ThirdParty = 2 << 2;
const BlockImportant = BlockAction | Important;
const typeNameToTypeValue = {
'no_type': 0 << 4,
'stylesheet': 1 << 4,
'image': 2 << 4,
'object': 3 << 4,
'object_subrequest': 3 << 4,
'script': 4 << 4,
'fetch': 5 << 4,
'xmlhttprequest': 5 << 4,
'sub_frame': 6 << 4,
'font': 7 << 4,
'media': 8 << 4,
'websocket': 9 << 4,
'beacon': 10 << 4,
'ping': 10 << 4,
'other': 11 << 4,
'popup': 12 << 4, // start of behavorial filtering
'popunder': 13 << 4,
'main_frame': 14 << 4, // start of 1st-party-only behavorial filtering
'generichide': 15 << 4,
'specifichide': 16 << 4,
'inline-font': 17 << 4,
'inline-script': 18 << 4,
'data': 19 << 4, // special: a generic data holder
'redirect': 20 << 4,
'webrtc': 21 << 4,
'unsupported': 22 << 4,
};
const otherTypeBitValue = typeNameToTypeValue.other;
// All network request types to bitmap
// bring origin to 0 (from 4 -- see typeNameToTypeValue)
// left-shift 1 by the above-calculated value
// subtract 1 to set all type bits
const allNetworkTypesBits =
(1 << (otherTypeBitValue >>> 4)) - 1;
const allTypesBits =
allNetworkTypesBits |
1 << (typeNameToTypeValue['popup'] >>> 4) - 1 |
1 << (typeNameToTypeValue['main_frame'] >>> 4) - 1 |
1 << (typeNameToTypeValue['inline-font'] >>> 4) - 1 |
1 << (typeNameToTypeValue['inline-script'] >>> 4) - 1;
const unsupportedTypeBit =
1 << (typeNameToTypeValue['unsupported'] >>> 4) - 1;
const typeValueToTypeName = {
1: 'stylesheet',
2: 'image',
3: 'object',
4: 'script',
5: 'xmlhttprequest',
6: 'subdocument',
7: 'font',
8: 'media',
9: 'websocket',
10: 'ping',
11: 'other',
12: 'popup',
13: 'popunder',
14: 'document',
15: 'generichide',
16: 'specifichide',
17: 'inline-font',
18: 'inline-script',
19: 'data',
20: 'redirect',
21: 'webrtc',
22: 'unsupported',
};
// https://github.com/gorhill/uBlock/issues/1493
// Transpose `ping` into `other` for now.
const toNormalizedType = {
'all': 'all',
'beacon': 'ping',
'css': 'stylesheet',
'data': 'data',
'doc': 'main_frame',
'document': 'main_frame',
'font': 'font',
'frame': 'sub_frame',
'genericblock': 'unsupported',
'generichide': 'generichide',
'ghide': 'generichide',
'image': 'image',
'inline-font': 'inline-font',
'inline-script': 'inline-script',
'media': 'media',
'object': 'object',
'object-subrequest': 'object',
'other': 'other',
'ping': 'ping',
'popunder': 'popunder',
'popup': 'popup',
'script': 'script',
'specifichide': 'specifichide',
'shide': 'specifichide',
'stylesheet': 'stylesheet',
'subdocument': 'sub_frame',
'xhr': 'xmlhttprequest',
'xmlhttprequest': 'xmlhttprequest',
'webrtc': 'unsupported',
'websocket': 'websocket',
};
const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111;
/******************************************************************************/
// See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation.
let $requestURL = '';
let $requestHostname = '';
let $docHostname = '';
let $tokenBeg = 0;
let $patternMatchLeft = 0;
let $patternMatchRight = 0;
// EXPERIMENT: $requestTypeBit
let $requestTypeBit = 0;
/******************************************************************************/
// Local helpers
const restrSeparator = '(?:[^%.0-9a-z_-]|$)';
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
const reEscape = /[.*+?^${}()|[\]\\]/g;
// Convert a plain string (devoid of special characters) into a regex.
const restrFromPlainPattern = s => s.replace(reEscape, '\\$&');
const restrFromGenericPattern = function(s, anchor = 0) {
let reStr = s.replace(restrFromGenericPattern.rePlainChars, '\\$&')
.replace(restrFromGenericPattern.reSeparators, restrSeparator)
.replace(restrFromGenericPattern.reDanglingAsterisks, '')
.replace(restrFromGenericPattern.reAsterisks, '\\S*?');
if ( anchor & 0b100 ) {
reStr = (
reStr.startsWith('\\.') ?
restrFromGenericPattern.restrHostnameAnchor2 :
restrFromGenericPattern.restrHostnameAnchor1
) + reStr;
} else if ( anchor & 0b010 ) {
reStr = '^' + reStr;
}
if ( anchor & 0b001 ) {
reStr += '$';
}
return reStr;
};
restrFromGenericPattern.rePlainChars = /[.+?${}()|[\]\\]/g;
restrFromGenericPattern.reSeparators = /\^/g;
restrFromGenericPattern.reDanglingAsterisks = /^\*+|\*+$/g;
restrFromGenericPattern.reAsterisks = /\*+/g;
restrFromGenericPattern.restrHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?';
restrFromGenericPattern.restrHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
const toLogDataInternal = function(categoryBits, tokenHash, iunit) {
if ( iunit === 0 ) { return; }
const pattern = [];
const regex = [];
const options = [];
const domains = [];
const logData = { pattern, regex, domains, options, isRegex: false };
filterUnits[iunit].logData(logData);
if ( categoryBits & 0x001 ) {
logData.pattern.unshift('@@');
}
if ( categoryBits & 0x002 ) {
logData.options.unshift('important');
}
if ( categoryBits & 0x008 ) {
logData.options.unshift('3p');
} else if ( categoryBits & 0x004 ) {
logData.options.unshift('1p');
}
const type = categoryBits & 0x1F0;
if ( type !== 0 && type !== typeNameToTypeValue.data ) {
logData.options.unshift(typeValueToTypeName[type >>> 4]);
}
let raw = logData.pattern.join('');
if (
logData.isRegex === false &&
raw.charCodeAt(0) === 0x2F /* '/' */ &&
raw.charCodeAt(raw.length - 1) === 0x2F /* '/' */
) {
raw += '*';
}
if ( domains.length !== 0 ) {
options.push(`domain=${domains.join('|')}`);
}
if ( options.length !== 0 ) {
raw += '$' + options.join(',');
}
return { raw, regex: logData.regex.join('') };
};
/******************************************************************************/
const charClassMap = new Uint32Array(128);
const CHAR_CLASS_SEPARATOR = 0b00000001;
{
const reSeparators = /[^\w%.-]/;
for ( let i = 0; i < 128; i++ ) {
if ( reSeparators.test(String.fromCharCode(i)) ) {
charClassMap[i] |= CHAR_CLASS_SEPARATOR;
}
}
}
const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;
/******************************************************************************/
let filterUnits = [ null ];
let filterSequences = new Uint32Array(131072);
let filterSequenceWritePtr = 3;
const filterSequenceAdd = function(a, b) {
const i = filterSequenceWritePtr;
filterSequenceWritePtr += 2;
if ( filterSequenceWritePtr > filterSequences.length ) {
filterSequenceBufferResize(filterSequenceWritePtr);
}
filterSequences[i+0] = a;
filterSequences[i+1] = b;
return i;
};
const filterSequenceBufferResize = function(newSize) {
if ( newSize <= filterSequences.length ) { return; }
const size = (newSize + 0x3FFF) & ~0x3FFF;
const buffer = new Uint32Array(size);
buffer.set(filterSequences);
filterSequences = buffer;
};
/******************************************************************************/
const bidiTrieMatchExtra = function(l, r, ix) {
for (;;) {
$patternMatchLeft = l;
$patternMatchRight = r;
const iu = filterSequences[ix+0];
if ( filterUnits[iu].match() ) { return iu; }
ix = filterSequences[ix+1];
if ( ix === 0 ) { break; }
}
return 0;
};
const bidiTrie = (( ) => {
let trieDetails;
try {
trieDetails = JSON.parse(
vAPI.localStorage.getItem('SNFE.bidiTrieDetails')
);
} catch(ex) {
}
const trie = new µb.BidiTrieContainer(trieDetails, bidiTrieMatchExtra);
if ( µb.hiddenSettings.disableWebAssembly !== true ) {
trie.enableWASM();
}
return trie;
})();
const bidiTrieOptimize = function(shrink = false) {
const trieDetails = bidiTrie.optimize(shrink);
vAPI.localStorage.setItem(
'SNFE.bidiTrieDetails',
JSON.stringify(trieDetails)
);
};
/*******************************************************************************
Each filter class will register itself in the map.
IMPORTANT: any change which modifies the mapping will have to be
reflected with µBlock.systemSettings.compiledMagic.
*/
const filterClasses = [];
let filterClassIdGenerator = 0;
const registerFilterClass = function(ctor) {
const fid = filterClassIdGenerator++;
ctor.fid = ctor.prototype.fid = fid;
filterClasses[fid] = ctor;
};
const filterFromCtor = function(ctor, ...args) {
if ( ctor.filterUnit !== undefined ) {
return ctor.filterUnit;
}
const f = new ctor(...args);
const iunit = filterUnits.length;
filterUnits.push(f);
return iunit;
};
const filterUnitFromCompiled = function(args) {
const ctor = filterClasses[args[0]];
return ctor.unitFromCompiled(args);
};
const filterFromSelfie = function(args) {
return filterClasses[args[0]].fromSelfie(args);
};
/******************************************************************************/
const filterPattern = {
compile: function(parsed, units) {
if ( parsed.isRegex ) {
units.push(FilterRegex.compile(parsed));
return;
}
const pattern = parsed.f;
if ( pattern === '*' ) {
units.push(FilterTrue.compile());
return;
}
if ( parsed.tokenHash === parsed.noTokenHash ) {
units.push(FilterPatternGeneric.compile(parsed));
return;
}
if ( parsed.firstWildcardPos === -1 && parsed.firstCaretPos === -1 ) {
units.push(FilterPatternPlain.compile(parsed));
return;
}
if (
parsed.secondWildcardPos !== -1 ||
parsed.secondCaretPos !== -1 ||
parsed.firstCaretPos !== -1 && (
parsed.firstWildcardPos === -1 ||
parsed.firstWildcardPos !== (parsed.firstCaretPos + 1)
)
) {
return this.compileGeneric(parsed, units);
}
const hasCaretCombo = parsed.firstCaretPos !== -1;
const sright = pattern.slice(parsed.firstWildcardPos + 1);
const sleft = pattern.slice(
0,
hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos
);
if ( parsed.tokenBeg < parsed.firstWildcardPos ) {
parsed.f = sleft;
units.push(FilterPatternPlain.compile(parsed));
parsed.f = sright;
units.push(FilterPatternRight.compile(parsed, hasCaretCombo));
return;
}
// parsed.tokenBeg > parsed.firstWildcardPos
parsed.f = sright;
parsed.tokenBeg -= parsed.firstWildcardPos + 1;
units.push(FilterPatternPlain.compile(parsed));
parsed.f = sleft;
units.push(FilterPatternLeft.compile(parsed, hasCaretCombo));
},
compileGeneric: function(parsed, units) {
const pattern = parsed.f;
// Optimize special case: plain pattern with trailing caret
if (
parsed.firstWildcardPos === -1 &&
parsed.firstCaretPos === (pattern.length - 1)
) {
parsed.f = pattern.slice(0, -1);
units.push(FilterPatternPlain.compile(parsed));
units.push(FilterTrailingSeparator.compile());
return;
}
// Use a plain pattern as a first test for whether the generic pattern
// needs to be matched.
// TODO: inconclusive, investigate more.
//let left = parsed.tokenBeg;
//while ( left > 0 ) {
// const c = pattern.charCodeAt(left-1);
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
// left -= 1;
//}
//let right = parsed.tokenBeg + parsed.token.length;
//while ( right < pattern.length ) {
// const c = pattern.charCodeAt(right);
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
// right += 1;
//}
//parsed.f = pattern.slice(left, right);
//parsed.tokenBeg -= left;
//units.push(FilterPatternPlain.compile(parsed));
//parsed.f = pattern;
units.push(FilterPatternGeneric.compile(parsed));
},
};
/******************************************************************************/
const FilterTrue = class {
match() {
return true;
}
logData(details) {
details.pattern.push('*');
details.regex.push('^');
}
toSelfie() {
return FilterTrue.compile();
}
static compile() {
return [ FilterTrue.fid ];
}
static unitFromCompiled() {
return FilterTrue.filterUnit;
}
static fromSelfie() {
return FilterTrue.instance;
}
};
FilterTrue.instance = new FilterTrue();
FilterTrue.filterUnit = filterUnits.push(FilterTrue.instance) - 1;
registerFilterClass(FilterTrue);
/******************************************************************************/
const FilterPatternPlain = class {
constructor(i, n) {
this.i = i | 0;
this.n = n | 0;
}
match() {
const left = $tokenBeg;
if (
bidiTrie.startsWith(
left,
bidiTrie.haystackLen,
this.i,
this.n
) === 0
) {
return false;
}
$patternMatchLeft = left;
$patternMatchRight = left + this.n;
return true;
}
get isBidiTrieable() {
return this.n <= 255;
}
toBidiTrie() {
return { i: this.i, n: this.n, itok: this.tokenBeg };
}
logData(details) {
const s = bidiTrie.extractString(this.i, this.n);
details.pattern.push(s);
details.regex.push(restrFromPlainPattern(s));
}
toSelfie() {
return [ this.fid, this.i, this.n, this.tokenBeg ];
}
static compile(details) {
return [ FilterPatternPlain.fid, details.f, details.tokenBeg ];
}
static unitFromCompiled(args) {
const i = bidiTrie.storeString(args[1]);
const n = args[1].length;
let f;
if ( args[2] === 0 ) {
f = new FilterPatternPlain(i, n);
} else if ( args[2] === 1 ) {
f = new FilterPatternPlain1(i, n);
} else {
f = new FilterPatternPlainX(i, n, args[2]);
}
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
if ( args[3] === 0 ) {
return new FilterPatternPlain(args[1], args[2]);
}
if ( args[3] === 1 ) {
return new FilterPatternPlain1(args[1], args[2]);
}
return new FilterPatternPlainX(args[1], args[2], args[3]);
}
};
FilterPatternPlain.prototype.tokenBeg = 0;
registerFilterClass(FilterPatternPlain);
const FilterPatternPlain1 = class extends FilterPatternPlain {
match() {
const left = $tokenBeg - 1;
if (
bidiTrie.startsWith(
left,
bidiTrie.haystackLen,
this.i,
this.n
) === 0
) {
return false;
}
$patternMatchLeft = left;
$patternMatchRight = left + this.n;
return true;
}
};
FilterPatternPlain1.prototype.tokenBeg = 1;
const FilterPatternPlainX = class extends FilterPatternPlain {
constructor(i, n, tokenBeg) {
super(i, n);
this.tokenBeg = tokenBeg;
}
match() {
const left = $tokenBeg - this.tokenBeg;
if (
bidiTrie.startsWith(
left,
bidiTrie.haystackLen,
this.i,
this.n
) === 0
) {
return false;
}
$patternMatchLeft = left;
$patternMatchRight = left + this.n;
return true;
}
};
/******************************************************************************/
const FilterPatternLeft = class {
constructor(i, n) {
this.i = i | 0;
this.n = n | 0;
}
match() {
const left = bidiTrie.indexOf(
0, $patternMatchLeft,
this.i, this.n
);
if ( left === -1 ) { return false; }
$patternMatchLeft = left;
return true;
}
logData(details) {
const s = bidiTrie.extractString(this.i, this.n);
details.pattern.unshift(s, '*');
details.regex.unshift(restrFromPlainPattern(s), '.*');
}
toSelfie() {
return [ this.fid, this.i, this.n ];
}
static compile(details, ex) {
return [
ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid,
details.f
];
}
static unitFromCompiled(args) {
const i = bidiTrie.storeString(args[1]);
const f = new FilterPatternLeft(i, args[1].length);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterPatternLeft(args[1], args[2]);
}
};
registerFilterClass(FilterPatternLeft);
const FilterPatternLeftEx = class extends FilterPatternLeft {
match() {
let left = 0;
for (;;) {
left = bidiTrie.indexOf(
left, $patternMatchLeft - 1,
this.i, this.n
);
if ( left === -1 ) { return false; }
if ( isSeparatorChar(bidiTrie.haystack[left + this.n]) ) {
break;
}
left += 1;
}
$patternMatchLeft = left;
return true;
}
logData(details) {
const s = bidiTrie.extractString(this.i, this.n);
details.pattern.unshift(s, '^*');
details.regex.unshift(restrFromPlainPattern(s), restrSeparator, '.*');
}
static unitFromCompiled(args) {
const i = bidiTrie.storeString(args[1]);
const f = new FilterPatternLeftEx(i, args[1].length);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterPatternLeftEx(args[1], args[2]);
}
};
registerFilterClass(FilterPatternLeftEx);
/******************************************************************************/
const FilterPatternRight = class {
constructor(i, n) {
this.i = i | 0;
this.n = n | 0;
}
match() {
const right = bidiTrie.lastIndexOf(
$patternMatchRight, bidiTrie.haystackLen,
this.i, this.n
);
if ( right === -1 ) { return false; }
$patternMatchRight = right + this.n;
return true;
}
logData(details) {
const s = bidiTrie.extractString(this.i, this.n);
details.pattern.push('*', s);
details.regex.push('.*', restrFromPlainPattern(s));
}
toSelfie() {
return [ this.fid, this.i, this.n ];
}
static compile(details, ex) {
return [
ex ? FilterPatternRightEx.fid : FilterPatternRight.fid,
details.f
];
}
static unitFromCompiled(args) {
const i = bidiTrie.storeString(args[1]);
const f = new FilterPatternRight(i, args[1].length);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterPatternRight(args[1], args[2]);
}
};
registerFilterClass(FilterPatternRight);
const FilterPatternRightEx = class extends FilterPatternRight {
match() {
const left = $patternMatchRight;
const right = bidiTrie.lastIndexOf(
left + 1, bidiTrie.haystackLen,
this.i, this.n
);
if ( right === -1 ) { return false; }
if ( isSeparatorChar(bidiTrie.haystack[left]) === false ) {
return false;
}
$patternMatchRight = right + this.n;
return true;
}
logData(details) {
const s = bidiTrie.extractString(this.i, this.n);
details.pattern.push('^*', s);
details.regex.push(restrSeparator, '.*', restrFromPlainPattern(s));
}
static unitFromCompiled(args) {
const i = bidiTrie.storeString(args[1]);
const f = new FilterPatternRightEx(i, args[1].length);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterPatternRightEx(args[1], args[2]);
}
};
registerFilterClass(FilterPatternRightEx);
/******************************************************************************/
const FilterPatternGeneric = class {
constructor(s, anchor) {
this.s = s;
if ( anchor !== 0 ) {
this.anchor = anchor;
}
}
match() {
if ( this.re === null ) {
this.re = new RegExp(restrFromGenericPattern(this.s, this.anchor));
}
return this.re.test($requestURL);
}
logData(details) {
details.pattern.length = 0;
if ( (this.anchor & 0b100) !== 0 ) {
details.pattern.push('||');
} else if ( (this.anchor & 0b010) !== 0 ) {
details.pattern.push('|');
}
details.pattern.push(this.s);
if ( (this.anchor & 0b001) !== 0 ) {
details.pattern.push('|');
}
details.regex.length = 0;
details.regex.push(
restrFromGenericPattern(this.s, this.anchor & ~0b100)
);
}
toSelfie() {
return [ this.fid, this.s, this.anchor ];
}
static compile(details) {
const anchor = details.anchor;
details.anchor = 0;
return [ FilterPatternGeneric.fid, details.f, anchor ];
}
static unitFromCompiled(args) {
const f = new FilterPatternGeneric(args[1], args[2]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterPatternGeneric(args[1], args[2]);
}
};
FilterPatternGeneric.prototype.re = null;
FilterPatternGeneric.prototype.anchor = 0;
FilterPatternGeneric.isSlow = true;
registerFilterClass(FilterPatternGeneric);
/******************************************************************************/
const FilterPlainHostname = class {
constructor(s) {
this.s = s;
}
match() {
if ( $requestHostname.endsWith(this.s) === false ) { return false; }
const offset = $requestHostname.length - this.s.length;
return offset === 0 ||
$requestHostname.charCodeAt(offset - 1) === 0x2E /* '.' */;
}
logData(details) {
details.pattern.push('||', this.s, '^');
details.regex.push(restrFromPlainPattern(this.s), restrSeparator);
}
toSelfie() {
return [ this.fid, this.s ];
}
static compile(details) {
return [ FilterPlainHostname.fid, details.f ];
}
static unitFromCompiled(args) {
const f = new FilterPlainHostname(args[1]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterPlainHostname(args[1]);
}
};
registerFilterClass(FilterPlainHostname);
/******************************************************************************/
const FilterAnchorHn = class {
constructor() {
this.lastLen = 0;
this.lastBeg = -1;
this.lastEnd = -1;
}
match() {
const len = $requestHostname.length;
const haystackCodes = bidiTrie.haystack;
if (
len !== this.lastLen ||
this.lastBeg === -1 ||
haystackCodes[this.lastBeg-3] !== 0x3A /* ':' */ ||
haystackCodes[this.lastBeg-2] !== 0x2F /* '/' */ ||
haystackCodes[this.lastBeg-1] !== 0x2F /* '/' */
) {
this.lastBeg = len !== 0 ? haystackCodes.indexOf(0x3A) : -1;
if ( this.lastBeg !== -1 ) {
if (
this.lastBeg >= bidiTrie.haystackLen ||
haystackCodes[this.lastBeg+1] !== 0x2F ||
haystackCodes[this.lastBeg+2] !== 0x2F
) {
this.lastBeg = -1;
}
}
if ( this.lastBeg !== -1 ) {
this.lastBeg += 3;
this.lastEnd = this.lastBeg + len;
} else {
this.lastEnd = -1;
}
this.lastLen = len;
}
const left = $patternMatchLeft;
return left < this.lastEnd && (
left === this.lastBeg ||
left > this.lastBeg && haystackCodes[left-1] === 0x2E /* '.' */
);
}
logData(details) {
details.pattern.unshift('||');
}
toSelfie() {
return [ this.fid ];
}
static compile() {
return [ FilterAnchorHn.fid ];
}
static unitFromCompiled() {
return FilterAnchorHn.filterUnit;
}
static fromSelfie() {
return FilterAnchorHn.instance;
}
};
FilterAnchorHn.instance = new FilterAnchorHn();
FilterAnchorHn.filterUnit = filterUnits.length;
filterUnits.push(FilterAnchorHn.instance);
registerFilterClass(FilterAnchorHn);
/******************************************************************************/
const FilterAnchorLeft = class {
match() {
return $patternMatchLeft === 0;
}
logData(details) {
details.pattern.unshift('|');
details.regex.unshift('^');
}
toSelfie() {
return [ this.fid ];
}
static compile() {
return [ FilterAnchorLeft.fid ];
}
static unitFromCompiled() {
return FilterAnchorLeft.filterUnit;
}
static fromSelfie() {
return FilterAnchorLeft.instance;
}
};
FilterAnchorLeft.instance = new FilterAnchorLeft();
FilterAnchorLeft.filterUnit = filterUnits.length;
filterUnits.push(FilterAnchorLeft.instance);
registerFilterClass(FilterAnchorLeft);
/******************************************************************************/
const FilterAnchorRight = class {
match() {
return $patternMatchRight === $requestURL.length;
}
logData(details) {
details.pattern.push('|');
details.regex.push('$');
}
toSelfie() {
return [ this.fid ];
}
static compile() {
return [ FilterAnchorRight.fid ];
}
static unitFromCompiled() {
return FilterAnchorRight.filterUnit;
}
static fromSelfie() {
return FilterAnchorRight.instance;
}
};
FilterAnchorRight.instance = new FilterAnchorRight();
FilterAnchorRight.filterUnit = filterUnits.length;
filterUnits.push(FilterAnchorRight.instance);
registerFilterClass(FilterAnchorRight);
/******************************************************************************/
const FilterTrailingSeparator = class {
match() {
return $patternMatchRight === $requestURL.length ||
isSeparatorChar(bidiTrie.haystack[$patternMatchRight]);
}
logData(details) {
details.pattern.push('^');
details.regex.push(restrSeparator);
}
toSelfie() {
return [ this.fid ];
}
static compile() {
return [ FilterTrailingSeparator.fid ];
}
static unitFromCompiled() {
return FilterTrailingSeparator.filterUnit;
}
static fromSelfie() {
return FilterTrailingSeparator.instance;
}
};
FilterTrailingSeparator.instance = new FilterTrailingSeparator();
FilterTrailingSeparator.filterUnit = filterUnits.length;
filterUnits.push(FilterTrailingSeparator.instance);
registerFilterClass(FilterTrailingSeparator);
/******************************************************************************/
const FilterType = class {
constructor(bits) {
this.typeBits = bits;
}
match() {
return (this.typeBits & $requestTypeBit) !== 0;
}
logData() {
}
toSelfie() {
return [ this.fid, this.typeBits ];
}
static compile(details) {
return [ FilterType.fid, details.typeBits & allNetworkTypesBits ];
}
static unitFromCompiled(args) {
const f = new FilterType(args[1]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterType(args[1]);
}
};
registerFilterClass(FilterType);
/******************************************************************************/
const FilterRegex = class {
constructor(s) {
this.s = s;
}
match() {
if ( this.re === null ) {
this.re = FilterRegex.dict.get(this.s);
if ( this.re === undefined ) {
this.re = new RegExp(this.s, 'i');
FilterRegex.dict.set(this.s, this.re);
}
}
if ( this.re.test($requestURL) === false ) { return false; }
$patternMatchLeft = $requestURL.search(this.re);
return true;
}
logData(details) {
details.pattern.push('/', this.s, '/');
details.regex.push(this.s);
details.isRegex = true;
}
toSelfie() {
return [ this.fid, this.s ];
}
static compile(details) {
return [ FilterRegex.fid, details.f ];
}
static unitFromCompiled(args) {
const f = new FilterRegex(args[1]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterRegex(args[1]);
}
};
FilterRegex.prototype.re = null;
FilterRegex.isSlow = true;
FilterRegex.dict = new Map();
registerFilterClass(FilterRegex);
/******************************************************************************/
// The optimal "class" is picked according to the content of the
// `domain=` filter option.
const filterOrigin = new (class {
constructor() {
let trieDetails;
try {
trieDetails = JSON.parse(
vAPI.localStorage.getItem('FilterOrigin.trieDetails')
);
} catch(ex) {
}
this.trieContainer = new µb.HNTrieContainer(trieDetails);
this.strToUnitMap = new Map();
this.gcTimer = undefined;
}
compile(details, prepend, units) {
const domainOpt = details.domainOpt;
let compiledMiss, compiledHit;
// One hostname
if ( domainOpt.indexOf('|') === -1 ) {
// Must be a miss
if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) {
compiledMiss = FilterOriginMiss.compile(domainOpt);
}
// Must be a hit
else {
compiledHit = FilterOriginHit.compile(domainOpt);
}
}
// Many hostnames.
// Must be in set (none negated).
else if ( domainOpt.indexOf('~') === -1 ) {
compiledHit = FilterOriginHitSet.compile(domainOpt);
}
// Must not be in set (all negated).
else if ( /^~(?:[^|~]+\|~)+[^|~]+$/.test(domainOpt) ) {
compiledMiss = FilterOriginMissSet.compile(domainOpt);
}
// Must be in one set, but not in the other.
else {
const hostnames = domainOpt.split('|');
const missSet = hostnames.filter(hn => {
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) {
return hn;
}
});
const hitSet = hostnames.filter(hn => {
if ( hn.charCodeAt(0) !== 0x7E /* '~' */ ) {
return hn;
}
});
compiledMiss = missSet.length === 1
? FilterOriginMiss.compile(missSet[0])
: FilterOriginMissSet.compile(missSet.join('|'));
compiledHit = hitSet.length === 1
? FilterOriginHit.compile(hitSet[0])
: FilterOriginHitSet.compile(hitSet.join('|'));
}
if ( prepend ) {
if ( compiledHit ) { units.unshift(compiledHit); }
if ( compiledMiss ) { units.unshift(compiledMiss); }
} else {
if ( compiledMiss ) { units.push(compiledMiss); }
if ( compiledHit ) { units.push(compiledHit); }
}
}
unitFromCompiled(ctor, s) {
let iunit = this.strToUnitMap.get(s);
if ( iunit !== undefined ) { return iunit; }
const f = new ctor(s);
iunit = filterUnits.push(f) - 1;
this.strToUnitMap.set(s, iunit);
if ( this.gcTimer !== undefined ) { return iunit; }
this.gcTimer = self.setTimeout(
( ) => {
this.gcTimer = undefined;
this.strToUnitMap.clear();
},
5000
);
return iunit;
}
reset() {
this.trieContainer.reset();
this.strToUnitMap.clear();
}
optimize() {
const trieDetails = this.trieContainer.optimize();
vAPI.localStorage.setItem(
'FilterOrigin.trieDetails',
JSON.stringify(trieDetails)
);
}
toSelfie() {
}
fromSelfie() {
}
})();
/******************************************************************************/
const FilterOriginHit = class {
constructor(hostname) {
this.hostname = hostname;
}
match() {
const haystack = $docHostname;
const needle = this.hostname;
const offset = haystack.length - needle.length;
if ( offset < 0 ) { return false; }
if ( haystack.charCodeAt(offset) !== needle.charCodeAt(0) ) {
return false;
}
if ( haystack.endsWith(needle) === false ) { return false; }
return offset === 0 || haystack.charCodeAt(offset-1) === 0x2E /* '.' */;
}
toSelfie() {
return [ this.fid, this.hostname ];
}
logData(details) {
details.domains.push(this.hostname);
}
static compile(domainOpt) {
return [ FilterOriginHit.fid, domainOpt ];
}
static unitFromCompiled(args) {
return filterOrigin.unitFromCompiled(FilterOriginHit, args[1]);
}
static fromSelfie(args) {
return new FilterOriginHit(args[1]);
}
};
registerFilterClass(FilterOriginHit);
/******************************************************************************/
const FilterOriginMiss = class {
constructor(hostname) {
this.hostname = hostname.slice(1);
}
match() {
const haystack = $docHostname;
if ( haystack.endsWith(this.hostname) ) {
const offset = haystack.length - this.hostname.length;
if (
offset === 0 ||
haystack.charCodeAt(offset-1) === 0x2E /* '.' */
) {
return false;
}
}
return true;
}
logData(details) {
details.domains.push(`~${this.hostname}`);
}
toSelfie() {
return [ this.fid, `~${this.hostname}` ];
}
static compile(domainOpt) {
return [ FilterOriginMiss.fid, domainOpt ];
}
static unitFromCompiled(args) {
return filterOrigin.unitFromCompiled(FilterOriginMiss, args[1]);
}
static fromSelfie(args) {
return new FilterOriginMiss(args[1]);
}
};
registerFilterClass(FilterOriginMiss);
/******************************************************************************/
const FilterOriginHitSet = class {
constructor(domainOpt, oneOf = null) {
this.domainOpt = domainOpt;
this.oneOf = oneOf !== null
? filterOrigin.trieContainer.createOne(oneOf)
: null;
}
match() {
if ( this.oneOf === null ) {
this.oneOf = filterOrigin.trieContainer.fromIterable(
this.domainOpt.split('|')
);
}
return this.oneOf.matches($docHostname) !== -1;
}
logData(details) {
details.domains.push(this.domainOpt);
}
toSelfie() {
return [
this.fid,
this.domainOpt,
this.oneOf !== null
? filterOrigin.trieContainer.compileOne(this.oneOf)
: null
];
}
static compile(domainOpt) {
return [ FilterOriginHitSet.fid, domainOpt ];
}
static unitFromCompiled(args) {
return filterOrigin.unitFromCompiled(FilterOriginHitSet, args[1]);
}
static fromSelfie(args) {
return new FilterOriginHitSet(args[1], args[2]);
}
};
registerFilterClass(FilterOriginHitSet);
/******************************************************************************/
const FilterOriginMissSet = class {
constructor(domainOpt, noneOf = null) {
this.domainOpt = domainOpt;
this.noneOf = noneOf !== null
? filterOrigin.trieContainer.createOne(noneOf)
: null;
}
match() {
if ( this.noneOf === null ) {
this.noneOf = filterOrigin.trieContainer.fromIterable(
this.domainOpt.replace(/~/g, '').split('|')
);
}
return this.noneOf.matches($docHostname) === -1;
}
logData(details) {
details.domains.push(this.domainOpt);
}
toSelfie() {
return [
this.fid,
this.domainOpt,
this.noneOf !== null
? filterOrigin.trieContainer.compileOne(this.noneOf)
: null
];
}
static compile(domainOpt) {
return [ FilterOriginMissSet.fid, domainOpt ];
}
static unitFromCompiled(args) {
return filterOrigin.unitFromCompiled(FilterOriginMissSet, args[1]);
}
static fromSelfie(args) {
return new FilterOriginMissSet(args[1], args[2]);
}
};
registerFilterClass(FilterOriginMissSet);
/******************************************************************************/
const FilterDataHolder = class {
constructor(dataType, data) {
this.dataType = dataType;
this.data = data;
}
match() {
return true;
}
matchAndFetchData(type, out) {
if ( this.dataType !== type ) { return false; }
if ( Array.isArray(out) ) {
out.push(this);
}
return true;
}
getData(type) {
if ( type === this.dataType ) {
return this.data;
}
}
logData(details) {
let opt = this.dataType;
if ( this.data !== '' ) {
opt += `=${this.data}`;
}
details.options.push(opt);
}
toSelfie() {
return [ this.fid, this.dataType, this.data ];
}
static compile(details) {
return [ FilterDataHolder.fid, details.dataType, details.data ];
}
static unitFromCompiled(args) {
const f = new FilterDataHolder(args[1], args[2]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterDataHolder(args[1], args[2]);
}
};
registerFilterClass(FilterDataHolder);
// Helper class for storing instances of FilterDataHolder which were found to
// be a match.
const FilterDataHolderResult = class {
constructor(bits, th, iunit) {
this.bits = bits;
this.th = th;
this.iunit = iunit;
}
getData(type) {
return filterUnits[this.iunit].getData(type);
}
get result() {
return (this.bits & AllowAction) === 0 ? 1 : 2;
}
logData() {
const r = toLogDataInternal(this.bits, this.th, this.iunit);
r.source = 'static';
r.result = this.result;
return r;
}
};
/******************************************************************************/
const FilterCollection = class {
constructor(i = 0) {
this.i = i | 0;
}
get size() {
let n = 0;
this.forEach(( ) => { n += 1; });
return n;
}
unshift(iunit) {
const j = this.i;
this.i = filterSequenceAdd(iunit, j);
}
shift() {
const sequences = filterSequences;
filterUnits[sequences[this.i+0]] = null;
this.i = sequences[this.i+1];
}
forEach(fn) {
let i = this.i;
if ( i === 0 ) { return; }
const sequences = filterSequences;
do {
const iunit = sequences[i+0];
const r = fn(iunit);
if ( r !== undefined ) { return r; }
i = sequences[i+1];
} while ( i !== 0 );
}
toSelfie() {
return [ this.fid, this.i ];
}
static compile(ctor, fdata) {
return [ ctor.fid, fdata ];
}
static unitFromCompiled(ctor, args) {
let iprev = 0, i0 = 0;
const n = args[1].length;
for ( let i = 0; i < n; i++ ) {
const iunit = filterUnitFromCompiled(args[1][i]);
const inext = filterSequenceAdd(iunit, 0);
if ( iprev !== 0 ) {
filterSequences[iprev+1] = inext;
} else {
i0 = inext;
}
iprev = inext;
}
return filterUnits.push(new ctor(i0, args[1].length)) - 1;
}
static fromSelfie(ctor, args) {
return new ctor(args[1]);
}
};
/******************************************************************************/
const FilterComposite = class extends FilterCollection {
match() {
const sequences = filterSequences;
const units = filterUnits;
let i = this.i;
while ( i !== 0 ) {
if ( units[sequences[i+0]].match() !== true ) { return false; }
i = sequences[i+1];
}
return true;
}
matchAndFetchData(type, out) {
if ( this.match() !== true ) { return false; }
this.forEach(iunit => {
const f = filterUnits[iunit];
if ( f.matchAndFetchData instanceof Function === false ) { return; }
if ( f.matchAndFetchData(type) === false ) { return; }
if ( Array.isArray(out) ) {
out.push(this);
}
return true;
});
}
getData(type) {
return this.forEach(iunit => {
const f = filterUnits[iunit];
if ( f.matchAndFetchData instanceof Function ) {
return f.getData(type);
}
});
}
// FilterPatternPlain is assumed to be first filter in sequence. This can
// be revisited if needed.
get isBidiTrieable() {
return filterUnits[filterSequences[this.i]].isBidiTrieable === true;
}
toBidiTrie() {
const details = filterUnits[filterSequences[this.i]].toBidiTrie();
this.shift();
return details;
}
logData(details) {
this.forEach(iunit => {
filterUnits[iunit].logData(details);
});
}
static compile(fdata) {
return FilterCollection.compile(FilterComposite, fdata);
}
static unitFromCompiled(args) {
return FilterCollection.unitFromCompiled(FilterComposite, args);
}
static fromSelfie(args) {
return FilterCollection.fromSelfie(FilterComposite, args);
}
};
registerFilterClass(FilterComposite);
/******************************************************************************/
// Dictionary of hostnames
const FilterHostnameDict = class {
constructor(args) {
this.$h = ''; // short-lived register
this.dict = FilterHostnameDict.trieContainer.createOne(args);
}
get size() {
return this.dict.size;
}
add(hn) {
return this.dict.add(hn);
}
match() {
const pos = this.dict.matches($requestHostname);
if ( pos === -1 ) { return false; }
this.$h = $requestHostname.slice(pos);
return true;
}
logData(details) {
details.pattern.push('||', this.$h, '^');
details.regex.push(restrFromPlainPattern(this.$h), restrSeparator);
}
toSelfie() {
return [
this.fid,
FilterHostnameDict.trieContainer.compileOne(this.dict)
];
}
static reset() {
return FilterHostnameDict.trieContainer.reset();
}
static optimize() {
const trieDetails = FilterHostnameDict.trieContainer.optimize();
vAPI.localStorage.setItem(
'FilterHostnameDict.trieDetails',
JSON.stringify(trieDetails)
);
}
static fromSelfie(args) {
return new FilterHostnameDict(args[1]);
}
};
FilterHostnameDict.trieContainer = (( ) => {
let trieDetails;
try {
trieDetails = JSON.parse(
vAPI.localStorage.getItem('FilterHostnameDict.trieDetails')
);
} catch(ex) {
}
return new µb.HNTrieContainer(trieDetails);
})();
registerFilterClass(FilterHostnameDict);
/******************************************************************************/
// Dictionary of hostnames for filters which only purpose is to match
// the document origin.
const FilterJustOrigin = class {
constructor(args) {
this.$h = ''; // short-lived register
this.dict = filterOrigin.trieContainer.createOne(args);
}
get size() {
return this.dict.size;
}
add(hn) {
return this.dict.add(hn);
}
match() {
const pos = this.dict.matches($docHostname);
if ( pos === -1 ) { return false; }
this.$h = $docHostname.slice(pos);
return true;
}
logData(details) {
details.pattern.push('*');
details.regex.push('^');
details.domains.push(this.$h);
}
toSelfie() {
return [ this.fid, filterOrigin.trieContainer.compileOne(this.dict) ];
}
static unitFromCompiled(args) {
const f = new FilterJustOrigin(args[1]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterJustOrigin(args[1]);
}
};
registerFilterClass(FilterJustOrigin);
/******************************************************************************/
const FilterHTTPSJustOrigin = class extends FilterJustOrigin {
match() {
return $requestURL.startsWith('https://') && super.match();
}
logData(details) {
details.pattern.push('|https://');
details.regex.push('^https://');
details.domains.push(this.$h);
}
static unitFromCompiled(args) {
const f = new FilterHTTPSJustOrigin(args[1]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterHTTPSJustOrigin(args[1]);
}
};
registerFilterClass(FilterHTTPSJustOrigin);
/******************************************************************************/
const FilterHTTPJustOrigin = class extends FilterJustOrigin {
match() {
return $requestURL.startsWith('http://') && super.match();
}
logData(details) {
details.pattern.push('|http://');
details.regex.push('^http://');
details.domains.push(this.$h);
}
static unitFromCompiled(args) {
const f = new FilterHTTPJustOrigin(args[1]);
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterHTTPJustOrigin(args[1]);
}
};
registerFilterClass(FilterHTTPJustOrigin);
/******************************************************************************/
const FilterPlainTrie = class {
constructor(trie) {
this.plainTrie = trie;
}
match() {
if ( this.plainTrie.matches($tokenBeg) !== 0 ) {
this.$matchedUnit = this.plainTrie.$iu;
return true;
}
return false;
}
matchAndFetchData(/* type, out */) {
// TODO
}
logData(details) {
const s = $requestURL.slice(this.plainTrie.$l, this.plainTrie.$r);
details.pattern.push(s);
details.regex.push(restrFromPlainPattern(s));
if ( this.$matchedUnit !== -1 ) {
filterUnits[this.$matchedUnit].logData(details);
}
}
toSelfie() {
return [ this.fid, bidiTrie.compileOne(this.plainTrie) ];
}
static fromSelfie(args) {
return new FilterPlainTrie(bidiTrie.createOne(args[1]));
}
};
FilterPlainTrie.prototype.$matchedUnit = 0;
registerFilterClass(FilterPlainTrie);
/******************************************************************************/
const FilterBucket = class extends FilterCollection {
match() {
if ( this.plainTrie !== null ) {
if ( this.plainTrie.matches($tokenBeg, this) !== 0 ) {
this.$matchedTrie = true;
this.$matchedUnit = this.plainTrie.$iu;
return true;
}
}
const sequences = filterSequences;
const units = filterUnits;
let i = this.i;
while ( i !== 0 ) {
if ( units[sequences[i+0]].match() ) {
this.$matchedTrie = false;
this.$matchedUnit = sequences[i+0];
return true;
}
i = sequences[i+1];
}
return false;
}
matchAndFetchData(type, out) {
const units = filterUnits;
this.forEach(iunit => {
units[iunit].matchAndFetchData(type, out);
});
}
logData(details) {
if ( this.$matchedTrie ) {
const s = $requestURL.slice(this.plainTrie.$l, this.plainTrie.$r);
details.pattern.push(s);
details.regex.push(restrFromPlainPattern(s));
}
if ( this.$matchedUnit !== -1 ) {
filterUnits[this.$matchedUnit].logData(details);
}
}
toSelfie() {
const selfie = super.toSelfie();
if ( this.plainTrie !== null ) {
selfie.push(bidiTrie.compileOne(this.plainTrie));
}
return selfie;
}
optimize() {
const units = filterUnits;
let n = 0;
let i = this.i;
do {
if ( units[filterSequences[i+0]].isBidiTrieable ) { n += 1; }
i = filterSequences[i+1];
} while ( i !== 0 && n < 3 );
if ( n < 3 ) { return; }
if ( this.plainTrie === null ) {
this.plainTrie = bidiTrie.createOne();
}
i = this.i;
let iprev = 0;
for (;;) {
const iunit = filterSequences[i+0];
const inext = filterSequences[i+1];
if ( units[iunit].isBidiTrieable ) {
this._addToTrie(iunit);
if ( iprev !== 0 ) {
filterSequences[iprev+1] = inext;
} else {
this.i = inext;
}
} else {
iprev = i;
}
if ( inext === 0 ) { break; }
i = inext;
}
if ( this.i === 0 ) {
return new FilterPlainTrie(this.plainTrie);
}
}
_addToTrie(iunit) {
const f = filterUnits[iunit];
const trieDetails = f.toBidiTrie();
const id = this.plainTrie.add(
trieDetails.i,
trieDetails.n,
trieDetails.itok
);
// No point storing a pattern with conditions if the bidi-trie already
// contain a pattern with no conditions.
let ix = this.plainTrie.getExtra(id);
if ( ix === 1 ) {
filterUnits[iunit] = null;
return;
}
// If the newly stored pattern has no condition, shortcut existing
// ones since they will always be short-circuited by the
// condition-less pattern.
if ( f instanceof FilterPatternPlain ) {
this.plainTrie.setExtra(id, 1);
filterUnits[iunit] = null;
return;
}
// FilterComposite is assumed here, i.e. with conditions.
if ( f.n === 1 ) {
filterUnits[iunit] = null;
iunit = filterSequences[f.i];
}
this.plainTrie.setExtra(id, filterSequenceAdd(iunit, ix));
}
static fromSelfie(args) {
const bucket = FilterCollection.fromSelfie(FilterBucket, args);
if ( args.length > 2 && Array.isArray(args[2]) ) {
bucket.plainTrie = bidiTrie.createOne(args[2]);
}
return bucket;
}
};
FilterBucket.prototype.plainTrie = null;
FilterBucket.prototype.$matchedUnit = 0;
FilterBucket.prototype.$matchedTrie = false;
registerFilterClass(FilterBucket);
/******************************************************************************/
const FILTER_UNITS_MIN = filterUnits.length;
const FILTER_SEQUENCES_MIN = filterSequenceWritePtr;
/******************************************************************************/
/******************************************************************************/
const FilterParser = class {
constructor() {
this.cantWebsocket = vAPI.cantWebsocket;
this.domainOpt = '';
this.noTokenHash = urlTokenizer.noTokenHash;
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i;
this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i;
this.reCanTrimCarets1 = /^[^*]*$/;
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
this.reHasUnicode = /[^\x00-\x7F]/;
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
this.reGoodToken = /[%0-9a-z]{1,}/g;
this.reSeparator = /[\/^]/;
this.reRegexToken = /[%0-9A-Za-z]{2,}/g;
this.reRegexTokenAbort = /[([]/;
this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/;
this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/;
// These top 100 "bad tokens" are collated using the "miss" histogram
// from tokenHistograms(). The "score" is their occurrence among the
// 200K+ URLs used in the benchmark and executed against default
// filter lists.
this.badTokens = new Map([
[ 'https',123617 ],
[ 'com',76987 ],
[ 'js',43620 ],
[ 'www',33129 ],
[ 'jpg',32221 ],
[ 'images',31812 ],
[ 'css',19715 ],
[ 'png',19140 ],
[ 'static',15724 ],
[ 'net',15239 ],
[ 'de',13155 ],
[ 'img',11109 ],
[ 'assets',10746 ],
[ 'min',7807 ],
[ 'cdn',7568 ],
[ 'content',6900 ],
[ 'wp',6444 ],
[ 'fonts',6095 ],
[ 'svg',5976 ],
[ 'http',5813 ],
[ 'ssl',5735 ],
[ 'amazon',5440 ],
[ 'ru',5427 ],
[ 'fr',5199 ],
[ 'facebook',5178 ],
[ 'en',5146 ],
[ 'image',5028 ],
[ 'html',4837 ],
[ 'media',4833 ],
[ 'co',4783 ],
[ 'php',3972 ],
[ '2019',3943 ],
[ 'org',3924 ],
[ 'jquery',3531 ],
[ '02',3438 ],
[ 'api',3382 ],
[ 'gif',3350 ],
[ 'eu',3322 ],
[ 'prod',3289 ],
[ 'woff2',3200 ],
[ 'logo',3194 ],
[ 'themes',3107 ],
[ 'icon',3048 ],
[ 'google',3026 ],
[ 'v1',3019 ],
[ 'uploads',2963 ],
[ 'googleapis',2860 ],
[ 'v3',2816 ],
[ 'tv',2762 ],
[ 'icons',2748 ],
[ 'core',2601 ],
[ 'gstatic',2581 ],
[ 'ac',2509 ],
[ 'utag',2466 ],
[ 'id',2459 ],
[ 'ver',2448 ],
[ 'rsrc',2387 ],
[ 'files',2361 ],
[ 'uk',2357 ],
[ 'us',2271 ],
[ 'pl',2262 ],
[ 'common',2205 ],
[ 'public',2076 ],
[ '01',2016 ],
[ 'na',1957 ],
[ 'v2',1954 ],
[ '12',1914 ],
[ 'thumb',1895 ],
[ 'web',1853 ],
[ 'ui',1841 ],
[ 'default',1825 ],
[ 'main',1737 ],
[ 'false',1715 ],
[ '2018',1697 ],
[ 'embed',1639 ],
[ 'player',1634 ],
[ 'dist',1599 ],
[ 'woff',1593 ],
[ 'global',1593 ],
[ 'json',1572 ],
[ '11',1566 ],
[ '600',1559 ],
[ 'app',1556 ],
[ 'styles',1533 ],
[ 'plugins',1526 ],
[ '274',1512 ],
[ 'random',1505 ],
[ 'sites',1505 ],
[ 'imasdk',1501 ],
[ 'bridge3',1501 ],
[ 'news',1496 ],
[ 'width',1494 ],
[ 'thumbs',1485 ],
[ 'ttf',1470 ],
[ 'ajax',1463 ],
[ 'user',1454 ],
[ 'scripts',1446 ],
[ 'twitter',1440 ],
[ 'crop',1431 ],
[ 'new',1412]
]);
this.maxTokenLen = urlTokenizer.MAX_TOKEN_LENGTH;
this.reset();
}
reset() {
this.action = BlockAction;
// anchor: bit vector
// 0000 (0x0): no anchoring
// 0001 (0x1): anchored to the end of the URL.
// 0010 (0x2): anchored to the start of the URL.
// 0011 (0x3): anchored to the start and end of the URL.
// 0100 (0x4): anchored to the hostname of the URL.
// 0101 (0x5): anchored to the hostname and end of the URL.
this.anchor = 0;
this.badFilter = false;
this.dataType = undefined;
this.data = undefined;
this.invalid = false;
this.f = '';
this.firstParty = false;
this.thirdParty = false;
this.party = AnyParty;
this.fopts = '';
this.domainOpt = '';
this.isPureHostname = false;
this.isRegex = false;
this.raw = '';
this.redirect = 0;
this.token = '*';
this.tokenHash = this.noTokenHash;
this.tokenBeg = 0;
this.typeBits = 0;
this.notTypes = 0;
this.important = 0;
this.firstWildcardPos = -1;
this.secondWildcardPos = -1;
this.firstCaretPos = -1;
this.secondCaretPos = -1;
this.unsupported = false;
return this;
}
normalizeRegexSource(s) {
try {
const re = new RegExp(s);
return re.source;
} catch (ex) {
}
return '';
}
bitFromType(type) {
return 1 << ((typeNameToTypeValue[type] >>> 4) - 1);
}
// https://github.com/chrisaljoudi/uBlock/issues/589
// Be ready to handle multiple negated types
parseTypeOption(raw, not) {
const typeBit = raw !== 'all'
? this.bitFromType(toNormalizedType[raw])
: allTypesBits;
if ( not ) {
this.notTypes |= typeBit;
} else {
this.typeBits |= typeBit;
}
}
parsePartyOption(firstParty, not) {
if ( firstParty ) {
not = !not;
}
if ( not ) {
this.firstParty = true;
this.party = this.thirdParty ? AnyParty : FirstParty;
} else {
this.thirdParty = true;
this.party = this.firstParty ? AnyParty : ThirdParty;
}
}
parseDomainOption(s) {
if ( this.reHasUnicode.test(s) ) {
const hostnames = s.split('|');
let i = hostnames.length;
while ( i-- ) {
if ( this.reHasUnicode.test(hostnames[i]) ) {
hostnames[i] = punycode.toASCII(hostnames[i]);
}
}
s = hostnames.join('|');
}
if ( this.reBadDomainOptChars.test(s) ) { return ''; }
return s;
}
parseOptions(s) {
this.fopts = s;
for ( let opt of s.split(/\s*,\s*/) ) {
const not = opt.startsWith('~');
if ( not ) {
opt = opt.slice(1);
}
if ( opt === 'third-party' || opt === '3p' ) {
this.parsePartyOption(false, not);
continue;
}
if ( opt === 'first-party' || opt === '1p' ) {
this.parsePartyOption(true, not);
continue;
}
if ( toNormalizedType.hasOwnProperty(opt) ) {
this.parseTypeOption(opt, not);
continue;
}
// https://github.com/gorhill/uBlock/issues/2294
// Detect and discard filter if domain option contains nonsensical
// characters.
if ( opt.startsWith('domain=') ) {
this.domainOpt = this.parseDomainOption(opt.slice(7));
if ( this.domainOpt === '' ) {
this.unsupported = true;
break;
}
continue;
}
if ( opt === 'important' ) {
this.important = Important;
continue;
}
if ( /^redirect(?:-rule)?=/.test(opt) ) {
if ( this.redirect !== 0 ) {
this.unsupported = true;
break;
}
this.redirect = opt.charCodeAt(8) === 0x3D /* '=' */ ? 1 : 2;
continue;
}
if (
opt.startsWith('csp=') &&
opt.length > 4 &&
this.reBadCSP.test(opt) === false
) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.data = opt.slice(4).trim();
continue;
}
if ( opt === 'csp' && this.action === AllowAction ) {
this.parseTypeOption('data', not);
this.dataType = 'csp';
this.data = '';
continue;
}
// Used by Adguard:
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters?aid=16593#empty-modifier
if ( opt === 'empty' || opt === 'mp4' ) {
if ( this.redirect !== 0 ) {
this.unsupported = true;
break;
}
this.redirect = 1;
continue;
}
// https://github.com/uBlockOrigin/uAssets/issues/192
if ( opt === 'badfilter' ) {
this.badFilter = true;
continue;
}
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
// Add support for `elemhide`. Rarely used but it happens.
if ( opt === 'elemhide' || opt === 'ehide' ) {
this.parseTypeOption('specifichide', not);
this.parseTypeOption('generichide', not);
continue;
}
// Unrecognized filter option: ignore whole filter.
this.unsupported = true;
break;
}
// Redirect rules can't be exception filters.
if ( this.redirect !== 0 && this.action !== BlockAction ) {
this.unsupported = true;
}
// Negated network types? Toggle on all network type bits.
// Negated non-network types can only toggle themselves.
if ( (this.notTypes & allNetworkTypesBits) !== 0 ) {
this.typeBits |= allNetworkTypesBits;
}
if ( this.notTypes !== 0 ) {
this.typeBits &= ~this.notTypes;
if ( this.typeBits === 0 ) {
this.unsupported = true;
}
}
// https://github.com/gorhill/uBlock/issues/2283
// Abort if type is only for unsupported types, otherwise
// toggle off `unsupported` bit.
if ( this.typeBits & unsupportedTypeBit ) {
this.typeBits &= ~unsupportedTypeBit;
if ( this.typeBits === 0 ) {
this.unsupported = true;
}
}
}
// TODO: use charCodeAt where possible.
parse(raw) {
// important!
this.reset();
let s = this.raw = raw.trim();
if ( s.length === 0 ) {
this.invalid = true;
return this;
}
// Filters which are a single alphanumeric character are discarded
// as unsupported.
if ( s.length === 1 && /[0-9a-z]/i.test(s) ) {
this.unsupported = true;
return this;
}
// plain hostname? (from HOSTS file)
if ( this.reHostnameRule1.test(s) ) {
this.f = s.toLowerCase();
this.isPureHostname = true;
this.anchor |= 0b100;
return this;
}
// element hiding filter?
let pos = s.indexOf('#');
if ( pos !== -1 ) {
const c = s.charAt(pos + 1);
if ( c === '#' || c === '@' ) {
console.error('static-net-filtering.js > unexpected cosmetic filters');
this.invalid = true;
return this;
}
}
// block or allow filter?
// Important: this must be executed before parsing options
if ( s.startsWith('@@') ) {
this.action = AllowAction;
s = s.slice(2);
}
// options
// https://github.com/gorhill/uBlock/issues/842
// - ensure sure we are not dealing with a regex-based filter.
// - lookup the last occurrence of `$`.
if (
s.charCodeAt(0) !== 0x2F /* '/' */ ||
s.charCodeAt(s.length - 1) !== 0x2F /* '/' */
) {
pos = s.lastIndexOf('$');
if ( pos !== -1 ) {
// https://github.com/gorhill/uBlock/issues/952
// Discard Adguard-specific `$$` filters.
if ( s.indexOf('$$') !== -1 ) {
this.unsupported = true;
return this;
}
this.parseOptions(s.slice(pos + 1));
if ( this.unsupported ) { return this; }
s = s.slice(0, pos);
}
}
// regex?
if (
s.length > 2 &&
s.charCodeAt(0) === 0x2F /* '/' */ &&
s.charCodeAt(s.length - 1) === 0x2F /* '/' */
) {
this.isRegex = true;
this.f = s.slice(1, -1);
// https://github.com/gorhill/uBlock/issues/1246
// If the filter is valid, use the corrected version of the
// source string -- this ensure reverse-lookup will work fine.
this.f = this.normalizeRegexSource(this.f);
if ( this.f === '' ) {
this.unsupported = true;
}
return this;
}
// hostname-anchored
if ( s.startsWith('||') ) {
this.anchor |= 0x4;
s = s.slice(2);
// convert hostname to punycode if needed
// https://github.com/gorhill/uBlock/issues/2599
if ( this.reHasUnicode.test(s) ) {
const matches = this.reIsolateHostname.exec(s);
if ( matches ) {
s = (matches[1] !== undefined ? matches[1] : '') +
punycode.toASCII(matches[2]) +
matches[3];
}
}
// https://github.com/chrisaljoudi/uBlock/issues/1096
if ( s.startsWith('^') ) {
this.unsupported = true;
return this;
}
// plain hostname? (from ABP filter list)
// https://github.com/gorhill/uBlock/issues/1757
// A filter can't be a pure-hostname one if there is a domain or
// csp option present.
if ( this.reHostnameRule2.test(s) ) {
if ( s.charCodeAt(s.length - 1) === 0x5E /* '^' */ ) {
s = s.slice(0, -1);
}
this.f = s.toLowerCase();
this.isPureHostname = true;
return this;
}
}
// left-anchored
else if ( s.startsWith('|') ) {
this.anchor |= 0x2;
s = s.slice(1);
}
// right-anchored
if ( s.endsWith('|') ) {
this.anchor |= 0x1;
s = s.slice(0, -1);
}
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
// Remove pointless leading *.
// https://github.com/gorhill/uBlock/issues/3034
// We can remove anchoring if we need to match all at the start.
if ( s.startsWith('*') ) {
s = s.replace(/^\*+([^%0-9a-z])/i, '$1');
this.anchor &= ~0x6;
}
// Remove pointless trailing *
// https://github.com/gorhill/uBlock/issues/3034
// We can remove anchoring if we need to match all at the end.
if ( s.endsWith('*') ) {
s = s.replace(/([^%0-9a-z])\*+$/i, '$1');
this.anchor &= ~0x1;
}
// nothing left?
if ( s === '' ) {
s = '*';
}
// TODO: remove once redirect rules with `*/*` pattern are no longer
// used.
else if ( this.redirect !== 0 && s === '/' ) {
s = '*';
}
// https://github.com/gorhill/uBlock/issues/1047
// Hostname-anchored makes no sense if matching all requests.
if ( s === '*' ) {
this.anchor = 0;
}
this.firstWildcardPos = s.indexOf('*');
if ( this.firstWildcardPos !== -1 ) {
this.secondWildcardPos = s.indexOf('*', this.firstWildcardPos + 1);
}
this.firstCaretPos = s.indexOf('^');
if ( this.firstCaretPos !== -1 ) {
this.secondCaretPos = s.indexOf('^', this.firstCaretPos + 1);
}
if ( s.length > 1024 ) {
this.unsupported = true;
return this;
}
this.f = s.toLowerCase();
return this;
}
// Given a string, find a good token. Tokens which are too generic,
// i.e. very common with a high probability of ending up as a miss,
// are not good. Avoid if possible. This has a significant positive
// impact on performance.
makeToken() {
if ( this.isRegex ) {
this.extractTokenFromRegex();
return;
}
if ( this.f === '*' ) { return; }
const matches = this.findGoodToken();
if ( matches === null ) { return; }
this.token = matches[0];
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = matches.index;
}
findGoodToken() {
this.reGoodToken.lastIndex = 0;
const s = this.f;
let bestMatch = null;
let bestBadness = 0;
let match;
while ( (match = this.reGoodToken.exec(s)) !== null ) {
const token = match[0];
// https://github.com/gorhill/uBlock/issues/997
// Ignore token if preceded by wildcard.
const pos = match.index;
if (
pos !== 0 &&
s.charCodeAt(pos - 1) === 0x2A /* '*' */ ||
token.length < this.maxTokenLen &&
s.charCodeAt(pos + token.length) === 0x2A /* '*' */
) {
continue;
}
// A one-char token is better than a documented bad token.
const badness = token.length > 1
? this.badTokens.get(token) || 0
: 1;
if ( badness === 0 ) { return match; }
if ( bestBadness === 0 || badness < bestBadness ) {
bestMatch = match;
bestBadness = badness;
}
}
return bestMatch;
}
// https://github.com/gorhill/uBlock/issues/2781
// For efficiency purpose, try to extract a token from
// a regex-based filter.
extractTokenFromRegex() {
this.reRegexToken.lastIndex = 0;
const s = this.f;
let matches;
while ( (matches = this.reRegexToken.exec(s)) !== null ) {
const prefix = s.slice(0, matches.index);
if ( this.reRegexTokenAbort.test(prefix) ) { return; }
if (
this.reRegexBadPrefix.test(prefix) ||
this.reRegexBadSuffix.test(s.slice(this.reRegexToken.lastIndex))
) {
continue;
}
this.token = matches[0].toLowerCase();
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = matches.index;
if ( this.badTokens.has(this.token) === false ) { break; }
}
}
isJustOrigin() {
return this.isRegex === false &&
this.dataType === undefined &&
this.domainOpt !== '' && (
this.f === '*' || (
this.anchor === 0b010 &&
/^(?:http[s*]?:(?:\/\/)?)$/.test(this.f)
)
) &&
this.domainOpt.indexOf('~') === -1;
}
};
/******************************************************************************/
FilterParser.parse = (( ) => {
let parser;
let last = 0;
let ttlTimer;
const ttlProcess = ( ) => {
ttlTimer = undefined;
if ( (Date.now() - last) > 10000 ) {
parser = undefined;
return;
}
ttlTimer = vAPI.setTimeout(ttlProcess, 10007);
};
return s => {
if ( parser === undefined ) {
parser = new FilterParser();
}
last = Date.now();
if ( ttlTimer === undefined ) {
ttlTimer = vAPI.setTimeout(ttlProcess, 10007);
}
return parser.parse(s);
};
})();
/******************************************************************************/
/******************************************************************************/
const FilterContainer = function() {
this.noTokenHash = urlTokenizer.noTokenHash;
this.dotTokenHash = urlTokenizer.dotTokenHash;
this.anyTokenHash = urlTokenizer.anyTokenHash;
this.anyHTTPSTokenHash = urlTokenizer.anyHTTPSTokenHash;
this.anyHTTPTokenHash = urlTokenizer.anyHTTPTokenHash;
this.reset();
};
/******************************************************************************/
FilterContainer.prototype.reset = function() {
this.frozen = false;
this.processedFilterCount = 0;
this.acceptedCount = 0;
this.rejectedCount = 0;
this.allowFilterCount = 0;
this.blockFilterCount = 0;
this.discardedCount = 0;
this.goodFilters = new Set();
this.badFilters = new Set();
this.categories = new Map();
urlTokenizer.resetKnownTokens();
// This will invalidate all tries
FilterHostnameDict.reset();
filterOrigin.reset();
bidiTrie.reset();
filterUnits = filterUnits.slice(0, FILTER_UNITS_MIN);
filterSequenceWritePtr = FILTER_SEQUENCES_MIN;
// Runtime registers
this.$catbits = 0;
this.$tokenHash = 0;
this.$filterUnit = 0;
};
/******************************************************************************/
FilterContainer.prototype.freeze = function() {
const filterBucketId = FilterBucket.fid;
const redirectTypeValue = typeNameToTypeValue.redirect;
const unserialize = µb.CompiledLineIO.unserialize;
const units = filterUnits;
const t0 = Date.now();
for ( const line of this.goodFilters ) {
if ( this.badFilters.has(line) ) {
this.discardedCount += 1;
continue;
}
const args = unserialize(line);
const bits = args[0];
// Special cases: delegate to more specialized engines.
// Redirect engine.
if ( (bits & 0x1F0) === redirectTypeValue ) {
µb.redirectEngine.fromCompiledRule(args[1]);
continue;
}
// Plain static filters.
const tokenHash = args[1];
const fdata = args[2];
let bucket = this.categories.get(bits);
if ( bucket === undefined ) {
bucket = new Map();
this.categories.set(bits, bucket);
}
let iunit = bucket.get(tokenHash);
if ( tokenHash === this.dotTokenHash ) {
if ( iunit === undefined ) {
iunit = filterFromCtor(FilterHostnameDict);
bucket.set(this.dotTokenHash, iunit);
}
units[iunit].add(fdata);
continue;
}
if ( tokenHash === this.anyTokenHash ) {
if ( iunit === undefined ) {
iunit = filterFromCtor(FilterJustOrigin);
bucket.set(this.anyTokenHash, iunit);
}
units[iunit].add(fdata);
continue;
}
if ( tokenHash === this.anyHTTPSTokenHash ) {
if ( iunit === undefined ) {
iunit = filterFromCtor(FilterHTTPSJustOrigin);
bucket.set(this.anyHTTPSTokenHash, iunit);
}
units[iunit].add(fdata);
continue;
}
if ( tokenHash === this.anyHTTPTokenHash ) {
if ( iunit === undefined ) {
iunit = filterFromCtor(FilterHTTPJustOrigin);
bucket.set(this.anyHTTPTokenHash, iunit);
}
units[iunit].add(fdata);
continue;
}
urlTokenizer.addKnownToken(tokenHash);
const inewunit = filterUnitFromCompiled(fdata);
if ( iunit === undefined ) {
bucket.set(tokenHash, inewunit);
continue;
}
let f = units[iunit];
if ( f.fid === filterBucketId ) {
f.unshift(inewunit);
continue;
}
const ibucketunit = filterFromCtor(FilterBucket);
f = units[ibucketunit];
f.unshift(iunit);
f.unshift(inewunit);
bucket.set(tokenHash, ibucketunit);
}
this.badFilters.clear();
this.goodFilters.clear();
// Skip 'data' type since bidi-trie does not (yet) support matchAll().
const dataTypeValue = typeValueFromCatBits(typeNameToTypeValue['data']);
for ( const [ catBits, bucket ] of this.categories ) {
if ( typeValueFromCatBits(catBits) === dataTypeValue ) { continue; }
for ( const iunit of bucket.values() ) {
const f = units[iunit];
if ( f instanceof FilterBucket === false ) { continue; }
const g = f.optimize();
if ( g !== undefined ) {
units[iunit] = g;
}
}
}
FilterHostnameDict.optimize();
bidiTrieOptimize();
this.frozen = true;
log.info(`staticNetFilteringEngine.freeze() took ${Date.now()-t0} ms`);
};
/******************************************************************************/
FilterContainer.prototype.toSelfie = function(path) {
const categoriesToSelfie = ( ) => {
const selfie = [];
for ( const [ catbits, bucket ] of this.categories ) {
selfie.push([ catbits, Array.from(bucket) ]);
}
return selfie;
};
bidiTrieOptimize(true);
filterOrigin.optimize();
return Promise.all([
µb.assets.put(
`${path}/FilterHostnameDict.trieContainer`,
FilterHostnameDict.trieContainer.serialize(µb.base64)
),
µb.assets.put(
`${path}/FilterOrigin.trieContainer`,
filterOrigin.trieContainer.serialize(µb.base64)
),
µb.assets.put(
`${path}/bidiTrie`,
bidiTrie.serialize(µb.base64)
),
µb.assets.put(
`${path}/filterSequences`,
µb.base64.encode(
filterSequences.buffer,
filterSequenceWritePtr << 2
)
),
µb.assets.put(
`${path}/main`,
JSON.stringify({
processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount,
rejectedCount: this.rejectedCount,
allowFilterCount: this.allowFilterCount,
blockFilterCount: this.blockFilterCount,
discardedCount: this.discardedCount,
categories: categoriesToSelfie(),
urlTokenizer: urlTokenizer.toSelfie(),
filterUnits: filterUnits.map(f =>
f !== null ? f.toSelfie() : null
),
})
)
]);
};
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(path) {
return Promise.all([
µb.assets.get(`${path}/FilterHostnameDict.trieContainer`).then(details =>
FilterHostnameDict.trieContainer.unserialize(
details.content,
µb.base64
)
),
µb.assets.get(`${path}/FilterOrigin.trieContainer`).then(details =>
filterOrigin.trieContainer.unserialize(
details.content,
µb.base64
)
),
µb.assets.get(`${path}/bidiTrie`).then(details =>
bidiTrie.unserialize(
details.content,
µb.base64
)
),
µb.assets.get(`${path}/filterSequences`).then(details => {
const size = µb.base64.decodeSize(details.content) >> 2;
if ( size === 0 ) { return false; }
filterSequenceBufferResize(size);
filterSequences = µb.base64.decode(
details.content,
filterSequences.buffer
);
filterSequenceWritePtr = size;
return true;
}),
µb.assets.get(`${path}/main`).then(details => {
let selfie;
try {
selfie = JSON.parse(details.content);
} catch (ex) {
}
if ( selfie instanceof Object === false ) { return false; }
this.frozen = true;
this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount;
this.rejectedCount = selfie.rejectedCount;
this.allowFilterCount = selfie.allowFilterCount;
this.blockFilterCount = selfie.blockFilterCount;
this.discardedCount = selfie.discardedCount;
urlTokenizer.fromSelfie(selfie.urlTokenizer);
filterUnits = selfie.filterUnits.map(f =>
f !== null ? filterFromSelfie(f) : null
);
for ( const [ catbits, bucket ] of selfie.categories ) {
this.categories.set(catbits, new Map(bucket));
}
return true;
}),
]).then(results =>
results.reduce((acc, v) => acc && v, true)
);
};
/******************************************************************************/
FilterContainer.prototype.compile = function(raw, writer) {
// ORDER OF TESTS IS IMPORTANT!
const parsed = FilterParser.parse(raw);
// Ignore non-static network filters
if ( parsed.invalid ) { return false; }
// Ignore filters with unsupported options
if ( parsed.unsupported ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid network filter in ${who}: ${raw}`
});
return false;
}
// Redirect rule
if ( parsed.redirect !== 0 ) {
const result = this.compileRedirectRule(parsed, writer);
if ( result === false ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid redirect rule in ${who}: ${raw}`
});
return false;
}
if ( parsed.redirect === 2 ) { return true; }
}
// Pure hostnames, use more efficient dictionary lookup
// https://github.com/chrisaljoudi/uBlock/issues/665
// Create a dict keyed on request type etc.
if (
parsed.isPureHostname &&
parsed.domainOpt === '' &&
parsed.dataType === undefined
) {
parsed.tokenHash = this.dotTokenHash;
this.compileToAtomicFilter(parsed, parsed.f, writer);
return true;
}
parsed.makeToken();
const units = [];
// Pattern
if ( parsed.isPureHostname ) {
parsed.anchor = 0;
units.push(FilterPlainHostname.compile(parsed));
} else if ( parsed.isJustOrigin() ) {
const hostnames = parsed.domainOpt.split('|');
if ( parsed.f === '*' ) {
parsed.tokenHash = this.anyTokenHash;
for ( const hn of hostnames ) {
this.compileToAtomicFilter(parsed, hn, writer);
}
return true;
}
if ( parsed.f.startsWith('https') ) {
parsed.tokenHash = this.anyHTTPSTokenHash;
for ( const hn of hostnames ) {
this.compileToAtomicFilter(parsed, hn, writer);
}
return true;
}
parsed.tokenHash = this.anyHTTPTokenHash;
for ( const hn of hostnames ) {
this.compileToAtomicFilter(parsed, hn, writer);
}
return true;
} else {
filterPattern.compile(parsed, units);
}
// Type
// EXPERIMENT: $requestTypeBit
//if ( (parsed.typeBits & allNetworkTypesBits) !== 0 ) {
// units.unshift(FilterType.compile(parsed));
// parsed.typeBits &= ~allNetworkTypesBits;
//}
// Anchor
if ( (parsed.anchor & 0b100) !== 0 ) {
units.push(FilterAnchorHn.compile());
} else if ( (parsed.anchor & 0b010) !== 0 ) {
units.push(FilterAnchorLeft.compile());
}
if ( (parsed.anchor & 0b001) !== 0 ) {
units.push(FilterAnchorRight.compile());
}
// Origin
if ( parsed.domainOpt !== '' ) {
filterOrigin.compile(
parsed,
units.length !== 0 && filterClasses[units[0][0]].isSlow === true,
units
);
}
// Data
if ( parsed.dataType !== undefined ) {
units.push(FilterDataHolder.compile(parsed));
}
const fdata = units.length === 1
? units[0]
: FilterComposite.compile(units);
this.compileToAtomicFilter(parsed, fdata, writer);
return true;
};
/******************************************************************************/
FilterContainer.prototype.compileToAtomicFilter = function(
parsed,
fdata,
writer
) {
// 0 = network filters
// 1 = network filters: bad filters
writer.select(parsed.badFilter ? 1 : 0);
const descBits = parsed.action | parsed.important | parsed.party;
let typeBits = parsed.typeBits;
// Typeless
if ( typeBits === 0 ) {
writer.push([ descBits, parsed.tokenHash, fdata ]);
return;
}
// If all network types are set, create a typeless filter
if ( (typeBits & allNetworkTypesBits) === allNetworkTypesBits ) {
writer.push([ descBits, parsed.tokenHash, fdata ]);
typeBits &= ~allNetworkTypesBits;
}
// One filter per specific types
let bitOffset = 1;
do {
if ( typeBits & 1 ) {
writer.push(
[ descBits | (bitOffset << 4),
parsed.tokenHash,
fdata
]);
}
bitOffset += 1;
typeBits >>>= 1;
} while ( typeBits !== 0 );
};
/******************************************************************************/
FilterContainer.prototype.compileRedirectRule = function(parsed, writer) {
const redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
if ( Array.isArray(redirects) === false ) { return false; }
writer.select(parsed.badFilter ? 1 : 0);
const type = typeNameToTypeValue.redirect;
for ( const redirect of redirects ) {
writer.push([ type, redirect ]);
}
return true;
};
/******************************************************************************/
FilterContainer.prototype.fromCompiledContent = function(reader) {
// 0 = network filters
reader.select(0);
while ( reader.next() ) {
this.acceptedCount += 1;
if ( this.goodFilters.has(reader.line) ) {
this.discardedCount += 1;
} else {
this.goodFilters.add(reader.line);
}
}
// 1 = network filters: bad filter directives
reader.select(1);
while ( reader.next() ) {
this.badFilters.add(reader.line);
}
};
/******************************************************************************/
FilterContainer.prototype.realmMatchAndFetchData = function(
realmBits,
partyBits,
type,
out
) {
const bits01 = realmBits | typeNameToTypeValue.data;
const bits11 = realmBits | typeNameToTypeValue.data | partyBits;
const bucket01 = this.categories.get(bits01);
const bucket11 = partyBits !== 0
? this.categories.get(bits11)
: undefined;
if ( bucket01 === undefined && bucket11 === undefined ) { return false; }
const units = filterUnits;
const tokenHashes = urlTokenizer.getTokens(bidiTrie);
const filters = [];
let i = 0, iunit, f;
for (;;) {
const th = tokenHashes[i];
if ( th === 0 ) { return; }
$tokenBeg = tokenHashes[i+1];
if (
(bucket01 !== undefined) &&
(iunit = bucket01.get(th)) !== undefined
) {
f = units[iunit];
filters.length = 0;
f.matchAndFetchData(type, filters);
for ( f of filters ) {
out.set(
f.getData(type),
new FilterDataHolderResult(bits01, th, iunit)
);
}
}
if (
(bucket11 !== undefined) &&
(iunit = bucket11.get(th)) !== undefined
) {
f = units[iunit];
filters.length = 0;
f.matchAndFetchData(type, filters);
for ( f of filters ) {
out.set(
f.getData(type),
new FilterDataHolderResult(bits11, th, iunit)
);
}
}
i += 2;
}
};
/******************************************************************************/
FilterContainer.prototype.matchAndFetchData = function(fctxt, type) {
$requestURL = urlTokenizer.setURL(fctxt.url);
$docHostname = fctxt.getDocHostname();
$requestHostname = fctxt.getHostname();
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
const toAddImportant = new Map();
this.realmMatchAndFetchData(BlockImportant, partyBits, type, toAddImportant);
const toAdd = new Map();
this.realmMatchAndFetchData(BlockAction, partyBits, type, toAdd);
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return []; }
const toRemove = new Map();
this.realmMatchAndFetchData(AllowAction, partyBits, type, toRemove);
// Remove entries overriden by important block filters.
for ( const key of toAddImportant.keys() ) {
toAdd.delete(key);
toRemove.delete(key);
}
// Special case, except-all:
// - Except-all applies only if there is at least one normal block filters.
// - Except-all does not apply to important block filters.
if ( toRemove.has('') ) {
if ( toAdd.size !== 0 ) {
toAdd.clear();
toRemove.forEach((v, k, m) => {
if ( k !== '' ) { m.delete(k); }
});
} else {
toRemove.clear();
}
}
// Remove excepted block filters and unused exception filters.
else {
for ( const key of toRemove.keys() ) {
if ( toAdd.has(key) ) {
toAdd.delete(key);
} else {
toRemove.delete(key);
}
}
}
// Merge important and normal block filters
for ( const [ key, entry ] of toAddImportant ) {
toAdd.set(key, entry);
}
return Array.from(toAdd.values()).concat(Array.from(toRemove.values()));
};
/******************************************************************************/
FilterContainer.prototype.realmMatchString = function(
realmBits,
typeBits,
partyBits
) {
const exactType = typeBits & 0x80000000;
typeBits &= 0x7FFFFFFF;
const catBits00 = realmBits;
const catBits01 = realmBits | typeBits;
const catBits10 = realmBits | partyBits;
const catBits11 = realmBits | typeBits | partyBits;
const bucket00 = exactType === 0
? this.categories.get(catBits00)
: undefined;
const bucket01 = exactType !== 0 || typeBits !== 0
? this.categories.get(catBits01)
: undefined;
const bucket10 = exactType === 0 && partyBits !== 0
? this.categories.get(catBits10)
: undefined;
const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0
? this.categories.get(catBits11)
: undefined;
if (
bucket00 === undefined && bucket01 === undefined &&
bucket10 === undefined && bucket11 === undefined
) {
return false;
}
const units = filterUnits;
let catBits = 0, iunit = 0;
// Pure hostname-based filters
let tokenHash = this.dotTokenHash;
if (
(bucket00 !== undefined) &&
(iunit = bucket00.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits00;
} else if (
(bucket01 !== undefined) &&
(iunit = bucket01.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits01;
} else if (
(bucket10 !== undefined) &&
(iunit = bucket10.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits10;
} else if (
(bucket11 !== undefined) &&
(iunit = bucket11.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits11;
}
// Pattern-based filters
else {
const tokenHashes = urlTokenizer.getTokens(bidiTrie);
let i = 0;
for (;;) {
tokenHash = tokenHashes[i];
if ( tokenHash === 0 ) { return false; }
$tokenBeg = tokenHashes[i+1];
if (
(bucket00 !== undefined) &&
(iunit = bucket00.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits00;
break;
}
if (
(bucket01 !== undefined) &&
(iunit = bucket01.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits01;
break;
}
if (
(bucket10 !== undefined) &&
(iunit = bucket10.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits10;
break;
}
if (
(bucket11 !== undefined) &&
(iunit = bucket11.get(tokenHash) || 0) !== 0 &&
(units[iunit].match() === true)
) {
catBits = catBits11;
break;
}
i += 2;
}
}
this.$catbits = catBits;
this.$tokenHash = tokenHash;
this.$filterUnit = iunit;
return true;
};
/******************************************************************************/
// Specialized handler
// https://github.com/gorhill/uBlock/issues/1477
// Special case: blocking-generichide filter ALWAYS exists, it is implicit --
// thus we always first check for exception filters, then for important block
// filter if and only if there was a hit on an exception filter.
// https://github.com/gorhill/uBlock/issues/2103
// User may want to override `generichide` exception filters.
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
// Add support for `specifichide`.
FilterContainer.prototype.matchStringElementHide = function(type, url) {
const typeBits = typeNameToTypeValue[`${type}hide`] | 0x80000000;
// Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(url);
this.$filterUnit = 0;
// These registers will be used by various filters
$docHostname = $requestHostname = µb.URI.hostnameFromURI(url);
// Exception filters
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) {
// Important block filters.
if ( this.realmMatchString(BlockImportant, typeBits, FirstParty) ) {
return 1;
}
return 2;
}
return 0;
};
/******************************************************************************/
// https://github.com/chrisaljoudi/uBlock/issues/116
// Some type of requests are exceptional, they need custom handling,
// not the generic handling.
// https://github.com/chrisaljoudi/uBlock/issues/519
// Use exact type match for anything beyond `other`. Also, be prepared to
// support unknown types.
FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
let typeValue = typeNameToTypeValue[fctxt.type];
if ( modifiers === 0 ) {
if ( typeValue === undefined ) {
typeValue = otherTypeBitValue;
} else if ( typeValue === 0 || typeValue > otherTypeBitValue ) {
modifiers |= 0b0001;
}
}
// EXPERIMENT: $requestTypeBit
//$requestTypeBit = 1 << ((typeValue >>> 4) - 1);
if ( (modifiers & 0b0001) !== 0 ) {
if ( typeValue === undefined ) { return 0; }
typeValue |= 0x80000000;
}
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
// Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(fctxt.url);
this.$filterUnit = 0;
// These registers will be used by various filters
$docHostname = fctxt.getDocHostname();
$requestHostname = fctxt.getHostname();
// Important block filters.
if ( this.realmMatchString(BlockImportant, typeValue, partyBits) ) {
return 1;
}
// Block filters
if ( this.realmMatchString(BlockAction, typeValue, partyBits) ) {
// Exception filters
if ( this.realmMatchString(AllowAction, typeValue, partyBits) ) {
return 2;
}
return 1;
}
return 0;
};
/******************************************************************************/
FilterContainer.prototype.toLogData = function() {
if ( this.$filterUnit === 0 ) { return; }
const logData = toLogDataInternal(
this.$catbits,
this.$tokenHash,
this.$filterUnit
);
logData.source = 'static';
logData.tokenHash = this.$tokenHash;
logData.result = this.$filterUnit === 0
? 0
: ((this.$catbits & 1) !== 0 ? 2 : 1);
return logData;
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
return this.acceptedCount - this.discardedCount;
};
/******************************************************************************/
// action: 1=test, 2=record
FilterContainer.prototype.benchmark = async function(action, target) {
const requests = await µb.loadBenchmarkDataset();
if ( Array.isArray(requests) === false || requests.length === 0 ) {
console.info('No requests found to benchmark');
return;
}
console.info(`Benchmarking staticNetFilteringEngine.matchString()...`);
const fctxt = µb.filteringContext.duplicate();
if ( typeof target === 'number' ) {
const request = requests[target];
fctxt.setURL(request.url);
fctxt.setDocOriginFromURL(request.frameUrl);
fctxt.setType(request.cpt);
const r = this.matchString(fctxt);
console.log(`Result=${r}:`);
console.log(`\ttype=${fctxt.type}`);
console.log(`\turl=${fctxt.url}`);
console.log(`\tdocOrigin=${fctxt.getDocOrigin()}`);
return;
}
let expected, recorded;
if ( action === 1 ) {
try {
expected = JSON.parse(
vAPI.localStorage.getItem('FilterContainer.benchmark.results')
);
} catch(ex) {
}
}
if ( action === 2 ) {
recorded = [];
}
const t0 = self.performance.now();
for ( let i = 0; i < requests.length; i++ ) {
const request = requests[i];
fctxt.setURL(request.url);
fctxt.setDocOriginFromURL(request.frameUrl);
fctxt.setType(request.cpt);
const r = this.matchString(fctxt);
if ( recorded !== undefined ) { recorded.push(r); }
if ( expected !== undefined && r !== expected[i] ) {
console.log(`Mismatch with reference results at ${i}:`);
console.log(`\tExpected ${expected[i]}, got ${r}:`);
console.log(`\ttype=${fctxt.type}`);
console.log(`\turl=${fctxt.url}`);
console.log(`\tdocOrigin=${fctxt.getDocOrigin()}`);
}
}
const t1 = self.performance.now();
const dur = t1 - t0;
console.info(`Evaluated ${requests.length} requests in ${dur.toFixed(0)} ms`);
console.info(`\tAverage: ${(dur / requests.length).toFixed(3)} ms per request`);
if ( expected !== undefined ) {
console.info(`\tBlocked: ${expected.reduce((n,r)=>{return r===1?n+1:n;},0)}`);
console.info(`\tExcepted: ${expected.reduce((n,r)=>{return r===2?n+1:n;},0)}`);
}
if ( recorded !== undefined ) {
vAPI.localStorage.setItem(
'FilterContainer.benchmark.results',
JSON.stringify(recorded)
);
}
};
/******************************************************************************/
FilterContainer.prototype.test = function(docURL, type, url) {
const fctxt = µb.filteringContext.duplicate();
fctxt.setDocOriginFromURL(docURL);
fctxt.setType(type);
fctxt.setURL(url);
const r = this.matchString(fctxt);
console.log(`${r}`);
if ( r !== 0 ) {
console.log(this.toLogData());
}
};
/******************************************************************************-
With default filter lists:
As of 2019-04-18:
{bits: "0", token: "ad", size: 926, f: FilterBucket}
{bits: "0", token: "ads", size: 636, f: FilterBucket}
{bits: "41", token: "phncdn", size: 253, f: FilterBucket}
{bits: "0", token: "analytic", size: 174, f: FilterBucket}
{bits: "0", token: "tracking", size: 155, f: FilterBucket}
{bits: "48", token: "http", size: 146, f: FilterBucket}
{bits: "48", token: "https", size: 139, f: FilterBucket}
{bits: "58", token: "http", size: 122, f: FilterBucket}
{bits: "0", token: "adv", size: 121, f: FilterBucket}
{bits: "58", token: "https", size: 118, f: FilterBucket}
{bits: "0", token: "advertis", size: 102, f: FilterBucket}
{bits: "8", token: "doublecl", size: 96, f: FilterBucket}
{bits: "41", token: "imasdk", size: 90, f: FilterBucket}
{bits: "0", token: "cdn", size: 89, f: FilterBucket}
{bits: "0", token: "track", size: 87, f: FilterBucket}
{bits: "0", token: "stats", size: 82, f: FilterBucket}
{bits: "0", token: "banner", size: 74, f: FilterBucket}
{bits: "0", token: "log", size: 72, f: FilterBucket}
{bits: "0", token: "ga", size: 71, f: FilterBucket}
{bits: "0", token: "gif", size: 67, f: FilterBucket}
{bits: "0", token: "cloudfro", size: 64, f: FilterBucket}
{bits: "0", token: "amazonaw", size: 61, f: FilterBucket}
{bits: "41", token: "ajax", size: 58, f: FilterBucket}
{bits: "0", token: "tracker", size: 56, f: FilterBucket}
{bits: "40", token: "pagead2", size: 53, f: FilterBucket}
{bits: "0", token: "affiliat", size: 53, f: FilterBucket}
*/
FilterContainer.prototype.bucketHistogram = function() {
const units = filterUnits;
const results = [];
for ( const [ bits, category ] of this.categories ) {
for ( const [ th, iunit ] of category ) {
const token = urlTokenizer.stringFromTokenHash(th);
const f = units[iunit];
if ( f instanceof FilterBucket ) {
results.push({ bits: bits.toString(16), token, size: f.size, f });
continue;
}
if ( f instanceof FilterHostnameDict ) {
results.push({ bits: bits.toString(16), token, size: f.size, f });
continue;
}
if ( f instanceof FilterJustOrigin ) {
results.push({ bits: bits.toString(16), token, size: f.size, f });
continue;
}
results.push({ bits: bits.toString(16), token, size: 1, f });
}
}
results.sort((a, b) => {
return b.size - a.size;
});
console.log(results);
};
/*******************************************************************************
With default filter lists:
As of 2019-04-25:
{"FilterPlainHnAnchored" => 11078}
{"FilterPlainPrefix1" => 7195}
{"FilterPrefix1Trie" => 5720}
{"FilterOriginHit" => 3561}
{"FilterWildcard2HnAnchored" => 2943}
{"FilterPair" => 2391}
{"FilterBucket" => 1922}
{"FilterWildcard1HnAnchored" => 1910}
{"FilterHnAnchoredTrie" => 1586}
{"FilterPlainHostname" => 1391}
{"FilterOriginHitSet" => 1155}
{"FilterPlain" => 634}
{"FilterWildcard1" => 423}
{"FilterGenericHnAnchored" => 389}
{"FilterOriginMiss" => 302}
{"FilterGeneric" => 163}
{"FilterOriginMissSet" => 150}
{"FilterRegex" => 124}
{"FilterPlainRightAnchored" => 110}
{"FilterGenericHnAndRightAnchored" => 95}
{"FilterHostnameDict" => 59}
{"FilterPlainLeftAnchored" => 30}
{"FilterJustOrigin" => 22}
{"FilterHTTPJustOrigin" => 19}
{"FilterHTTPSJustOrigin" => 18}
{"FilterExactMatch" => 5}
{"FilterOriginMixedSet" => 3}
As of 2019-10-21:
"FilterPatternPlain" => 27542}
"FilterComposite" => 17249}
"FilterPlainTrie" => 13235}
"FilterAnchorHn" => 11938}
"FilterPatternRightEx" => 4446}
"FilterOriginHit" => 4435}
"FilterBucket" => 3833}
"FilterPatternRight" => 3426}
"FilterPlainHostname" => 2786}
"FilterOriginHitSet" => 1433}
"FilterDataHolder" => 666}
"FilterPatternGeneric" => 548}
"FilterOriginMiss" => 441}
"FilterOriginMissSet" => 208}
"FilterTrailingSeparator" => 188}
"FilterRegex" => 181}
"FilterPatternLeft" => 172}
"FilterAnchorRight" => 100}
"FilterPatternLeftEx" => 82}
"FilterHostnameDict" => 60}
"FilterAnchorLeft" => 50}
"FilterJustOrigin" => 24}
"FilterHTTPJustOrigin" => 18}
"FilterTrue" => 17}
"FilterHTTPSJustOrigin" => 17}
*/
FilterContainer.prototype.filterClassHistogram = function() {
const filterClassDetails = new Map();
for ( const fclass of filterClasses ) {
filterClassDetails.set(fclass.fid, { name: fclass.name, count: 0, });
}
// Artificial classes to report content counts
filterClassDetails.set(1000, { name: 'FilterPlainTrie Content', count: 0, });
filterClassDetails.set(1001, { name: 'FilterHostnameDict Content', count: 0, });
const countFilter = function(f) {
if ( f instanceof Object === false ) { return; }
filterClassDetails.get(f.fid).count += 1;
};
for ( const f of filterUnits ) {
if ( f === null ) { continue; }
countFilter(f);
if ( f instanceof FilterCollection ) {
let i = f.i;
while ( i !== 0 ) {
countFilter(filterUnits[filterSequences[i+0]]);
i = filterSequences[i+1];
}
if ( f.plainTrie ) {
filterClassDetails.get(1000).count += f.plainTrie.size;
}
continue;
}
if ( f instanceof FilterHostnameDict ) {
filterClassDetails.get(1001).count += f.size;
continue;
}
if ( f instanceof FilterComposite ) {
let i = f.i;
while ( i !== 0 ) {
countFilter(filterUnits[filterSequences[i+0]]);
i = filterSequences[i+1];
}
continue;
}
if ( f instanceof FilterPlainTrie ) {
filterClassDetails.get(1000).count += f.plainTrie.size;
continue;
}
}
const results = Array.from(filterClassDetails.values()).sort((a, b) => {
return b.count - a.count;
});
console.log(results);
};
/******************************************************************************/
FilterContainer.prototype.tokenHistograms = async function() {
const requests = await µb.loadBenchmarkDataset();
if ( Array.isArray(requests) === false || requests.length === 0 ) {
console.info('No requests found to benchmark');
return;
}
console.info(`Computing token histograms...`);
const fctxt = µb.filteringContext.duplicate();
const missTokenMap = new Map();
const hitTokenMap = new Map();
const reTokens = /[0-9a-z%]{2,}/g;
for ( let i = 0; i < requests.length; i++ ) {
const request = requests[i];
fctxt.setURL(request.url);
fctxt.setDocOriginFromURL(request.frameUrl);
fctxt.setType(request.cpt);
const r = this.matchString(fctxt);
for ( let [ keyword ] of request.url.toLowerCase().matchAll(reTokens) ) {
const token = keyword;
if ( r === 0 ) {
missTokenMap.set(token, (missTokenMap.get(token) || 0) + 1);
} else if ( r === 1 ) {
hitTokenMap.set(token, (hitTokenMap.get(token) || 0) + 1);
}
}
}
const customSort = (a, b) => b[1] - a[1];
const topmisses = Array.from(missTokenMap).sort(customSort).slice(0, 100);
for ( const [ token ] of topmisses ) {
hitTokenMap.delete(token);
}
const tophits = Array.from(hitTokenMap).sort(customSort).slice(0, 100);
console.log('Misses:', JSON.stringify(topmisses));
console.log('Hits:', JSON.stringify(tophits));
};
/******************************************************************************/
return new FilterContainer();
/******************************************************************************/
})();