mirror of
https://github.com/gorhill/uBlock.git
synced 2024-09-23 14:11:28 +02:00
a69b301d81
Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/761
3756 lines
110 KiB
JavaScript
3756 lines
110 KiB
JavaScript
/*******************************************************************************
|
|
|
|
uBlock Origin - a browser extension to block requests.
|
|
Copyright (C) 2014-present Raymond Hill
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see {http://www.gnu.org/licenses/}.
|
|
|
|
Home: https://github.com/gorhill/uBlock
|
|
*/
|
|
|
|
/* jshint bitwise: false */
|
|
/* global punycode */
|
|
|
|
'use strict';
|
|
|
|
/******************************************************************************/
|
|
|
|
µBlock.staticNetFilteringEngine = (( ) => {
|
|
|
|
/******************************************************************************/
|
|
|
|
const µb = µBlock;
|
|
const urlTokenizer = µb.urlTokenizer;
|
|
|
|
// fedcba9876543210
|
|
// | | |||
|
|
// | | |||
|
|
// | | |||
|
|
// | | |||
|
|
// | | ||+---- bit 0: [BlockAction | AllowAction]
|
|
// | | |+----- bit 1: `important`
|
|
// | | +------ bit 2- 3: party [0 - 3]
|
|
// | +-------- bit 4- 8: type [0 - 31]
|
|
// +------------- bit 9-15: unused
|
|
|
|
const BlockAction = 0 << 0;
|
|
const AllowAction = 1 << 0;
|
|
const Important = 1 << 1;
|
|
const AnyParty = 0 << 2;
|
|
const FirstParty = 1 << 2;
|
|
const ThirdParty = 2 << 2;
|
|
const BlockImportant = BlockAction | Important;
|
|
|
|
const typeNameToTypeValue = {
|
|
'no_type': 0 << 4,
|
|
'stylesheet': 1 << 4,
|
|
'image': 2 << 4,
|
|
'object': 3 << 4,
|
|
'object_subrequest': 3 << 4,
|
|
'script': 4 << 4,
|
|
'fetch': 5 << 4,
|
|
'xmlhttprequest': 5 << 4,
|
|
'sub_frame': 6 << 4,
|
|
'font': 7 << 4,
|
|
'media': 8 << 4,
|
|
'websocket': 9 << 4,
|
|
'beacon': 10 << 4,
|
|
'ping': 10 << 4,
|
|
'other': 11 << 4,
|
|
'popup': 12 << 4, // start of behavorial filtering
|
|
'popunder': 13 << 4,
|
|
'main_frame': 14 << 4, // start of 1st-party-only behavorial filtering
|
|
'generichide': 15 << 4,
|
|
'specifichide': 16 << 4,
|
|
'inline-font': 17 << 4,
|
|
'inline-script': 18 << 4,
|
|
'data': 19 << 4, // special: a generic data holder
|
|
'redirect': 20 << 4,
|
|
'webrtc': 21 << 4,
|
|
'unsupported': 22 << 4,
|
|
};
|
|
|
|
const otherTypeBitValue = typeNameToTypeValue.other;
|
|
|
|
// All network request types to bitmap
|
|
// bring origin to 0 (from 4 -- see typeNameToTypeValue)
|
|
// left-shift 1 by the above-calculated value
|
|
// subtract 1 to set all type bits
|
|
const allNetworkTypesBits =
|
|
(1 << (otherTypeBitValue >>> 4)) - 1;
|
|
|
|
const allTypesBits =
|
|
allNetworkTypesBits |
|
|
1 << (typeNameToTypeValue['popup'] >>> 4) - 1 |
|
|
1 << (typeNameToTypeValue['main_frame'] >>> 4) - 1 |
|
|
1 << (typeNameToTypeValue['inline-font'] >>> 4) - 1 |
|
|
1 << (typeNameToTypeValue['inline-script'] >>> 4) - 1;
|
|
|
|
const unsupportedTypeBit =
|
|
1 << (typeNameToTypeValue['unsupported'] >>> 4) - 1;
|
|
|
|
const typeValueToTypeName = {
|
|
1: 'stylesheet',
|
|
2: 'image',
|
|
3: 'object',
|
|
4: 'script',
|
|
5: 'xmlhttprequest',
|
|
6: 'subdocument',
|
|
7: 'font',
|
|
8: 'media',
|
|
9: 'websocket',
|
|
10: 'ping',
|
|
11: 'other',
|
|
12: 'popup',
|
|
13: 'popunder',
|
|
14: 'document',
|
|
15: 'generichide',
|
|
16: 'specifichide',
|
|
17: 'inline-font',
|
|
18: 'inline-script',
|
|
19: 'data',
|
|
20: 'redirect',
|
|
21: 'webrtc',
|
|
22: 'unsupported',
|
|
};
|
|
|
|
// https://github.com/gorhill/uBlock/issues/1493
|
|
// Transpose `ping` into `other` for now.
|
|
const toNormalizedType = {
|
|
'all': 'all',
|
|
'beacon': 'ping',
|
|
'css': 'stylesheet',
|
|
'data': 'data',
|
|
'doc': 'main_frame',
|
|
'document': 'main_frame',
|
|
'font': 'font',
|
|
'frame': 'sub_frame',
|
|
'genericblock': 'unsupported',
|
|
'generichide': 'generichide',
|
|
'ghide': 'generichide',
|
|
'image': 'image',
|
|
'inline-font': 'inline-font',
|
|
'inline-script': 'inline-script',
|
|
'media': 'media',
|
|
'object': 'object',
|
|
'object-subrequest': 'object',
|
|
'other': 'other',
|
|
'ping': 'ping',
|
|
'popunder': 'popunder',
|
|
'popup': 'popup',
|
|
'script': 'script',
|
|
'specifichide': 'specifichide',
|
|
'shide': 'specifichide',
|
|
'stylesheet': 'stylesheet',
|
|
'subdocument': 'sub_frame',
|
|
'xhr': 'xmlhttprequest',
|
|
'xmlhttprequest': 'xmlhttprequest',
|
|
'webrtc': 'unsupported',
|
|
'websocket': 'websocket',
|
|
};
|
|
|
|
const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111;
|
|
|
|
/******************************************************************************/
|
|
|
|
// See the following as short-lived registers, used during evaluation. They are
|
|
// valid until the next evaluation.
|
|
|
|
let $requestURL = '';
|
|
let $requestHostname = '';
|
|
let $docHostname = '';
|
|
let $tokenBeg = 0;
|
|
let $patternMatchLeft = 0;
|
|
let $patternMatchRight = 0;
|
|
|
|
// EXPERIMENT: $requestTypeBit
|
|
let $requestTypeBit = 0;
|
|
|
|
/******************************************************************************/
|
|
|
|
// Local helpers
|
|
|
|
const restrSeparator = '(?:[^%.0-9a-z_-]|$)';
|
|
|
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
|
|
const reEscape = /[.*+?^${}()|[\]\\]/g;
|
|
|
|
// Convert a plain string (devoid of special characters) into a regex.
|
|
const restrFromPlainPattern = s => s.replace(reEscape, '\\$&');
|
|
|
|
const restrFromGenericPattern = function(s, anchor = 0) {
|
|
let reStr = s.replace(restrFromGenericPattern.rePlainChars, '\\$&')
|
|
.replace(restrFromGenericPattern.reSeparators, restrSeparator)
|
|
.replace(restrFromGenericPattern.reDanglingAsterisks, '')
|
|
.replace(restrFromGenericPattern.reAsterisks, '\\S*?');
|
|
if ( anchor & 0b100 ) {
|
|
reStr = (
|
|
reStr.startsWith('\\.') ?
|
|
restrFromGenericPattern.restrHostnameAnchor2 :
|
|
restrFromGenericPattern.restrHostnameAnchor1
|
|
) + reStr;
|
|
} else if ( anchor & 0b010 ) {
|
|
reStr = '^' + reStr;
|
|
}
|
|
if ( anchor & 0b001 ) {
|
|
reStr += '$';
|
|
}
|
|
return reStr;
|
|
};
|
|
restrFromGenericPattern.rePlainChars = /[.+?${}()|[\]\\]/g;
|
|
restrFromGenericPattern.reSeparators = /\^/g;
|
|
restrFromGenericPattern.reDanglingAsterisks = /^\*+|\*+$/g;
|
|
restrFromGenericPattern.reAsterisks = /\*+/g;
|
|
restrFromGenericPattern.restrHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?';
|
|
restrFromGenericPattern.restrHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
|
|
|
|
const toLogDataInternal = function(categoryBits, tokenHash, iunit) {
|
|
if ( iunit === 0 ) { return; }
|
|
const pattern = [];
|
|
const regex = [];
|
|
const options = [];
|
|
const domains = [];
|
|
const logData = { pattern, regex, domains, options, isRegex: false };
|
|
filterUnits[iunit].logData(logData);
|
|
if ( categoryBits & 0x001 ) {
|
|
logData.pattern.unshift('@@');
|
|
}
|
|
if ( categoryBits & 0x002 ) {
|
|
logData.options.unshift('important');
|
|
}
|
|
if ( categoryBits & 0x008 ) {
|
|
logData.options.unshift('3p');
|
|
} else if ( categoryBits & 0x004 ) {
|
|
logData.options.unshift('1p');
|
|
}
|
|
const type = categoryBits & 0x1F0;
|
|
if ( type !== 0 && type !== typeNameToTypeValue.data ) {
|
|
logData.options.unshift(typeValueToTypeName[type >>> 4]);
|
|
}
|
|
let raw = logData.pattern.join('');
|
|
if (
|
|
logData.isRegex === false &&
|
|
raw.charCodeAt(0) === 0x2F /* '/' */ &&
|
|
raw.charCodeAt(raw.length - 1) === 0x2F /* '/' */
|
|
) {
|
|
raw += '*';
|
|
}
|
|
if ( domains.length !== 0 ) {
|
|
options.push(`domain=${domains.join('|')}`);
|
|
}
|
|
if ( options.length !== 0 ) {
|
|
raw += '$' + options.join(',');
|
|
}
|
|
return { raw, regex: logData.regex.join('') };
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
const charClassMap = new Uint32Array(128);
|
|
const CHAR_CLASS_SEPARATOR = 0b00000001;
|
|
|
|
{
|
|
const reSeparators = /[^\w%.-]/;
|
|
for ( let i = 0; i < 128; i++ ) {
|
|
if ( reSeparators.test(String.fromCharCode(i)) ) {
|
|
charClassMap[i] |= CHAR_CLASS_SEPARATOR;
|
|
}
|
|
}
|
|
}
|
|
|
|
const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;
|
|
|
|
/******************************************************************************/
|
|
|
|
let filterUnits = [ null ];
|
|
|
|
let filterSequences = new Uint32Array(131072);
|
|
let filterSequenceWritePtr = 3;
|
|
|
|
const filterSequenceAdd = function(a, b) {
|
|
const i = filterSequenceWritePtr;
|
|
filterSequenceWritePtr += 2;
|
|
if ( filterSequenceWritePtr > filterSequences.length ) {
|
|
filterSequenceBufferResize(filterSequenceWritePtr);
|
|
}
|
|
filterSequences[i+0] = a;
|
|
filterSequences[i+1] = b;
|
|
return i;
|
|
};
|
|
|
|
const filterSequenceBufferResize = function(newSize) {
|
|
if ( newSize <= filterSequences.length ) { return; }
|
|
const size = (newSize + 0x3FFF) & ~0x3FFF;
|
|
const buffer = new Uint32Array(size);
|
|
buffer.set(filterSequences);
|
|
filterSequences = buffer;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
const bidiTrieMatchExtra = function(l, r, ix) {
|
|
for (;;) {
|
|
$patternMatchLeft = l;
|
|
$patternMatchRight = r;
|
|
const iu = filterSequences[ix+0];
|
|
if ( filterUnits[iu].match() ) { return iu; }
|
|
ix = filterSequences[ix+1];
|
|
if ( ix === 0 ) { break; }
|
|
}
|
|
return 0;
|
|
};
|
|
|
|
const bidiTrie = (( ) => {
|
|
let trieDetails;
|
|
try {
|
|
trieDetails = JSON.parse(
|
|
vAPI.localStorage.getItem('SNFE.bidiTrieDetails')
|
|
);
|
|
} catch(ex) {
|
|
}
|
|
const trie = new µb.BidiTrieContainer(trieDetails, bidiTrieMatchExtra);
|
|
if ( µb.hiddenSettings.disableWebAssembly !== true ) {
|
|
trie.enableWASM();
|
|
}
|
|
return trie;
|
|
})();
|
|
|
|
const bidiTrieOptimize = function(shrink = false) {
|
|
const trieDetails = bidiTrie.optimize(shrink);
|
|
vAPI.localStorage.setItem(
|
|
'SNFE.bidiTrieDetails',
|
|
JSON.stringify(trieDetails)
|
|
);
|
|
};
|
|
|
|
/*******************************************************************************
|
|
|
|
Each filter class will register itself in the map.
|
|
|
|
IMPORTANT: any change which modifies the mapping will have to be
|
|
reflected with µBlock.systemSettings.compiledMagic.
|
|
|
|
*/
|
|
|
|
const filterClasses = [];
|
|
let filterClassIdGenerator = 0;
|
|
|
|
const registerFilterClass = function(ctor) {
|
|
const fid = filterClassIdGenerator++;
|
|
ctor.fid = ctor.prototype.fid = fid;
|
|
filterClasses[fid] = ctor;
|
|
};
|
|
|
|
const filterFromCtor = function(ctor, ...args) {
|
|
if ( ctor.filterUnit !== undefined ) {
|
|
return ctor.filterUnit;
|
|
}
|
|
const f = new ctor(...args);
|
|
const iunit = filterUnits.length;
|
|
filterUnits.push(f);
|
|
return iunit;
|
|
};
|
|
|
|
const filterUnitFromCompiled = function(args) {
|
|
const ctor = filterClasses[args[0]];
|
|
return ctor.unitFromCompiled(args);
|
|
};
|
|
|
|
const filterFromSelfie = function(args) {
|
|
return filterClasses[args[0]].fromSelfie(args);
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
const filterPattern = {
|
|
compile: function(parsed, units) {
|
|
if ( parsed.isRegex ) {
|
|
units.push(FilterRegex.compile(parsed));
|
|
return;
|
|
}
|
|
const pattern = parsed.f;
|
|
if ( pattern === '*' ) {
|
|
units.push(FilterTrue.compile());
|
|
return;
|
|
}
|
|
if ( parsed.tokenHash === parsed.noTokenHash ) {
|
|
units.push(FilterPatternGeneric.compile(parsed));
|
|
return;
|
|
}
|
|
if ( parsed.firstWildcardPos === -1 && parsed.firstCaretPos === -1 ) {
|
|
units.push(FilterPatternPlain.compile(parsed));
|
|
return;
|
|
}
|
|
if (
|
|
parsed.secondWildcardPos !== -1 ||
|
|
parsed.secondCaretPos !== -1 ||
|
|
parsed.firstCaretPos !== -1 && (
|
|
parsed.firstWildcardPos === -1 ||
|
|
parsed.firstWildcardPos !== (parsed.firstCaretPos + 1)
|
|
)
|
|
) {
|
|
return this.compileGeneric(parsed, units);
|
|
}
|
|
const hasCaretCombo = parsed.firstCaretPos !== -1;
|
|
const sright = pattern.slice(parsed.firstWildcardPos + 1);
|
|
const sleft = pattern.slice(
|
|
0,
|
|
hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos
|
|
);
|
|
if ( parsed.tokenBeg < parsed.firstWildcardPos ) {
|
|
parsed.f = sleft;
|
|
units.push(FilterPatternPlain.compile(parsed));
|
|
parsed.f = sright;
|
|
units.push(FilterPatternRight.compile(parsed, hasCaretCombo));
|
|
return;
|
|
}
|
|
// parsed.tokenBeg > parsed.firstWildcardPos
|
|
parsed.f = sright;
|
|
parsed.tokenBeg -= parsed.firstWildcardPos + 1;
|
|
units.push(FilterPatternPlain.compile(parsed));
|
|
parsed.f = sleft;
|
|
units.push(FilterPatternLeft.compile(parsed, hasCaretCombo));
|
|
},
|
|
compileGeneric: function(parsed, units) {
|
|
const pattern = parsed.f;
|
|
// Optimize special case: plain pattern with trailing caret
|
|
if (
|
|
parsed.firstWildcardPos === -1 &&
|
|
parsed.firstCaretPos === (pattern.length - 1)
|
|
) {
|
|
parsed.f = pattern.slice(0, -1);
|
|
units.push(FilterPatternPlain.compile(parsed));
|
|
units.push(FilterTrailingSeparator.compile());
|
|
return;
|
|
}
|
|
// Use a plain pattern as a first test for whether the generic pattern
|
|
// needs to be matched.
|
|
// TODO: inconclusive, investigate more.
|
|
//let left = parsed.tokenBeg;
|
|
//while ( left > 0 ) {
|
|
// const c = pattern.charCodeAt(left-1);
|
|
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
|
|
// left -= 1;
|
|
//}
|
|
//let right = parsed.tokenBeg + parsed.token.length;
|
|
//while ( right < pattern.length ) {
|
|
// const c = pattern.charCodeAt(right);
|
|
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
|
|
// right += 1;
|
|
//}
|
|
//parsed.f = pattern.slice(left, right);
|
|
//parsed.tokenBeg -= left;
|
|
//units.push(FilterPatternPlain.compile(parsed));
|
|
//parsed.f = pattern;
|
|
units.push(FilterPatternGeneric.compile(parsed));
|
|
},
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterTrue = class {
|
|
match() {
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('*');
|
|
details.regex.push('^');
|
|
}
|
|
|
|
toSelfie() {
|
|
return FilterTrue.compile();
|
|
}
|
|
|
|
static compile() {
|
|
return [ FilterTrue.fid ];
|
|
}
|
|
|
|
static unitFromCompiled() {
|
|
return FilterTrue.filterUnit;
|
|
}
|
|
|
|
static fromSelfie() {
|
|
return FilterTrue.instance;
|
|
}
|
|
};
|
|
|
|
FilterTrue.instance = new FilterTrue();
|
|
FilterTrue.filterUnit = filterUnits.push(FilterTrue.instance) - 1;
|
|
|
|
registerFilterClass(FilterTrue);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterPatternPlain = class {
|
|
constructor(i, n) {
|
|
this.i = i | 0;
|
|
this.n = n | 0;
|
|
}
|
|
|
|
match() {
|
|
const left = $tokenBeg;
|
|
if (
|
|
bidiTrie.startsWith(
|
|
left,
|
|
bidiTrie.haystackLen,
|
|
this.i,
|
|
this.n
|
|
) === 0
|
|
) {
|
|
return false;
|
|
}
|
|
$patternMatchLeft = left;
|
|
$patternMatchRight = left + this.n;
|
|
return true;
|
|
}
|
|
|
|
get isBidiTrieable() {
|
|
return this.n <= 255;
|
|
}
|
|
|
|
toBidiTrie() {
|
|
return { i: this.i, n: this.n, itok: this.tokenBeg };
|
|
}
|
|
|
|
logData(details) {
|
|
const s = bidiTrie.extractString(this.i, this.n);
|
|
details.pattern.push(s);
|
|
details.regex.push(restrFromPlainPattern(s));
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.i, this.n, this.tokenBeg ];
|
|
}
|
|
|
|
static compile(details) {
|
|
return [ FilterPatternPlain.fid, details.f, details.tokenBeg ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const i = bidiTrie.storeString(args[1]);
|
|
const n = args[1].length;
|
|
let f;
|
|
if ( args[2] === 0 ) {
|
|
f = new FilterPatternPlain(i, n);
|
|
} else if ( args[2] === 1 ) {
|
|
f = new FilterPatternPlain1(i, n);
|
|
} else {
|
|
f = new FilterPatternPlainX(i, n, args[2]);
|
|
}
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
if ( args[3] === 0 ) {
|
|
return new FilterPatternPlain(args[1], args[2]);
|
|
}
|
|
if ( args[3] === 1 ) {
|
|
return new FilterPatternPlain1(args[1], args[2]);
|
|
}
|
|
return new FilterPatternPlainX(args[1], args[2], args[3]);
|
|
}
|
|
};
|
|
|
|
FilterPatternPlain.prototype.tokenBeg = 0;
|
|
|
|
registerFilterClass(FilterPatternPlain);
|
|
|
|
|
|
const FilterPatternPlain1 = class extends FilterPatternPlain {
|
|
match() {
|
|
const left = $tokenBeg - 1;
|
|
if (
|
|
bidiTrie.startsWith(
|
|
left,
|
|
bidiTrie.haystackLen,
|
|
this.i,
|
|
this.n
|
|
) === 0
|
|
) {
|
|
return false;
|
|
}
|
|
$patternMatchLeft = left;
|
|
$patternMatchRight = left + this.n;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
FilterPatternPlain1.prototype.tokenBeg = 1;
|
|
|
|
|
|
const FilterPatternPlainX = class extends FilterPatternPlain {
|
|
constructor(i, n, tokenBeg) {
|
|
super(i, n);
|
|
this.tokenBeg = tokenBeg;
|
|
}
|
|
|
|
match() {
|
|
const left = $tokenBeg - this.tokenBeg;
|
|
if (
|
|
bidiTrie.startsWith(
|
|
left,
|
|
bidiTrie.haystackLen,
|
|
this.i,
|
|
this.n
|
|
) === 0
|
|
) {
|
|
return false;
|
|
}
|
|
$patternMatchLeft = left;
|
|
$patternMatchRight = left + this.n;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterPatternLeft = class {
|
|
constructor(i, n) {
|
|
this.i = i | 0;
|
|
this.n = n | 0;
|
|
}
|
|
|
|
match() {
|
|
const left = bidiTrie.indexOf(
|
|
0, $patternMatchLeft,
|
|
this.i, this.n
|
|
);
|
|
if ( left === -1 ) { return false; }
|
|
$patternMatchLeft = left;
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
const s = bidiTrie.extractString(this.i, this.n);
|
|
details.pattern.unshift(s, '*');
|
|
details.regex.unshift(restrFromPlainPattern(s), '.*');
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.i, this.n ];
|
|
}
|
|
|
|
static compile(details, ex) {
|
|
return [
|
|
ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid,
|
|
details.f
|
|
];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const i = bidiTrie.storeString(args[1]);
|
|
const f = new FilterPatternLeft(i, args[1].length);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterPatternLeft(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterPatternLeft);
|
|
|
|
|
|
const FilterPatternLeftEx = class extends FilterPatternLeft {
|
|
match() {
|
|
let left = 0;
|
|
for (;;) {
|
|
left = bidiTrie.indexOf(
|
|
left, $patternMatchLeft - 1,
|
|
this.i, this.n
|
|
);
|
|
if ( left === -1 ) { return false; }
|
|
if ( isSeparatorChar(bidiTrie.haystack[left + this.n]) ) {
|
|
break;
|
|
}
|
|
left += 1;
|
|
}
|
|
$patternMatchLeft = left;
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
const s = bidiTrie.extractString(this.i, this.n);
|
|
details.pattern.unshift(s, '^*');
|
|
details.regex.unshift(restrFromPlainPattern(s), restrSeparator, '.*');
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const i = bidiTrie.storeString(args[1]);
|
|
const f = new FilterPatternLeftEx(i, args[1].length);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterPatternLeftEx(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterPatternLeftEx);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterPatternRight = class {
|
|
constructor(i, n) {
|
|
this.i = i | 0;
|
|
this.n = n | 0;
|
|
}
|
|
|
|
match() {
|
|
const right = bidiTrie.lastIndexOf(
|
|
$patternMatchRight, bidiTrie.haystackLen,
|
|
this.i, this.n
|
|
);
|
|
if ( right === -1 ) { return false; }
|
|
$patternMatchRight = right + this.n;
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
const s = bidiTrie.extractString(this.i, this.n);
|
|
details.pattern.push('*', s);
|
|
details.regex.push('.*', restrFromPlainPattern(s));
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.i, this.n ];
|
|
}
|
|
|
|
static compile(details, ex) {
|
|
return [
|
|
ex ? FilterPatternRightEx.fid : FilterPatternRight.fid,
|
|
details.f
|
|
];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const i = bidiTrie.storeString(args[1]);
|
|
const f = new FilterPatternRight(i, args[1].length);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterPatternRight(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterPatternRight);
|
|
|
|
|
|
const FilterPatternRightEx = class extends FilterPatternRight {
|
|
match() {
|
|
const left = $patternMatchRight;
|
|
const right = bidiTrie.lastIndexOf(
|
|
left + 1, bidiTrie.haystackLen,
|
|
this.i, this.n
|
|
);
|
|
if ( right === -1 ) { return false; }
|
|
if ( isSeparatorChar(bidiTrie.haystack[left]) === false ) {
|
|
return false;
|
|
}
|
|
$patternMatchRight = right + this.n;
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
const s = bidiTrie.extractString(this.i, this.n);
|
|
details.pattern.push('^*', s);
|
|
details.regex.push(restrSeparator, '.*', restrFromPlainPattern(s));
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const i = bidiTrie.storeString(args[1]);
|
|
const f = new FilterPatternRightEx(i, args[1].length);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterPatternRightEx(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterPatternRightEx);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterPatternGeneric = class {
|
|
constructor(s, anchor) {
|
|
this.s = s;
|
|
if ( anchor !== 0 ) {
|
|
this.anchor = anchor;
|
|
}
|
|
}
|
|
|
|
match() {
|
|
if ( this.re === null ) {
|
|
this.re = new RegExp(restrFromGenericPattern(this.s, this.anchor));
|
|
}
|
|
return this.re.test($requestURL);
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.length = 0;
|
|
if ( (this.anchor & 0b100) !== 0 ) {
|
|
details.pattern.push('||');
|
|
} else if ( (this.anchor & 0b010) !== 0 ) {
|
|
details.pattern.push('|');
|
|
}
|
|
details.pattern.push(this.s);
|
|
if ( (this.anchor & 0b001) !== 0 ) {
|
|
details.pattern.push('|');
|
|
}
|
|
details.regex.length = 0;
|
|
details.regex.push(
|
|
restrFromGenericPattern(this.s, this.anchor & ~0b100)
|
|
);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.s, this.anchor ];
|
|
}
|
|
|
|
static compile(details) {
|
|
const anchor = details.anchor;
|
|
details.anchor = 0;
|
|
return [ FilterPatternGeneric.fid, details.f, anchor ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterPatternGeneric(args[1], args[2]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterPatternGeneric(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
FilterPatternGeneric.prototype.re = null;
|
|
FilterPatternGeneric.prototype.anchor = 0;
|
|
|
|
FilterPatternGeneric.isSlow = true;
|
|
|
|
registerFilterClass(FilterPatternGeneric);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterPlainHostname = class {
|
|
constructor(s) {
|
|
this.s = s;
|
|
}
|
|
|
|
match() {
|
|
if ( $requestHostname.endsWith(this.s) === false ) { return false; }
|
|
const offset = $requestHostname.length - this.s.length;
|
|
return offset === 0 ||
|
|
$requestHostname.charCodeAt(offset - 1) === 0x2E /* '.' */;
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('||', this.s, '^');
|
|
details.regex.push(restrFromPlainPattern(this.s), restrSeparator);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.s ];
|
|
}
|
|
|
|
static compile(details) {
|
|
return [ FilterPlainHostname.fid, details.f ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterPlainHostname(args[1]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterPlainHostname(args[1]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterPlainHostname);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterAnchorHn = class {
|
|
constructor() {
|
|
this.lastLen = 0;
|
|
this.lastBeg = -1;
|
|
this.lastEnd = -1;
|
|
}
|
|
|
|
match() {
|
|
const len = $requestHostname.length;
|
|
const haystackCodes = bidiTrie.haystack;
|
|
if (
|
|
len !== this.lastLen ||
|
|
this.lastBeg === -1 ||
|
|
haystackCodes[this.lastBeg-3] !== 0x3A /* ':' */ ||
|
|
haystackCodes[this.lastBeg-2] !== 0x2F /* '/' */ ||
|
|
haystackCodes[this.lastBeg-1] !== 0x2F /* '/' */
|
|
) {
|
|
this.lastBeg = len !== 0 ? haystackCodes.indexOf(0x3A) : -1;
|
|
if ( this.lastBeg !== -1 ) {
|
|
if (
|
|
this.lastBeg >= bidiTrie.haystackLen ||
|
|
haystackCodes[this.lastBeg+1] !== 0x2F ||
|
|
haystackCodes[this.lastBeg+2] !== 0x2F
|
|
) {
|
|
this.lastBeg = -1;
|
|
}
|
|
}
|
|
if ( this.lastBeg !== -1 ) {
|
|
this.lastBeg += 3;
|
|
this.lastEnd = this.lastBeg + len;
|
|
} else {
|
|
this.lastEnd = -1;
|
|
}
|
|
this.lastLen = len;
|
|
}
|
|
const left = $patternMatchLeft;
|
|
return left < this.lastEnd && (
|
|
left === this.lastBeg ||
|
|
left > this.lastBeg && haystackCodes[left-1] === 0x2E /* '.' */
|
|
);
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.unshift('||');
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid ];
|
|
}
|
|
|
|
static compile() {
|
|
return [ FilterAnchorHn.fid ];
|
|
}
|
|
|
|
static unitFromCompiled() {
|
|
return FilterAnchorHn.filterUnit;
|
|
}
|
|
|
|
static fromSelfie() {
|
|
return FilterAnchorHn.instance;
|
|
}
|
|
};
|
|
|
|
FilterAnchorHn.instance = new FilterAnchorHn();
|
|
FilterAnchorHn.filterUnit = filterUnits.length;
|
|
filterUnits.push(FilterAnchorHn.instance);
|
|
|
|
registerFilterClass(FilterAnchorHn);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterAnchorLeft = class {
|
|
match() {
|
|
return $patternMatchLeft === 0;
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.unshift('|');
|
|
details.regex.unshift('^');
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid ];
|
|
}
|
|
|
|
static compile() {
|
|
return [ FilterAnchorLeft.fid ];
|
|
}
|
|
|
|
static unitFromCompiled() {
|
|
return FilterAnchorLeft.filterUnit;
|
|
}
|
|
|
|
static fromSelfie() {
|
|
return FilterAnchorLeft.instance;
|
|
}
|
|
};
|
|
|
|
FilterAnchorLeft.instance = new FilterAnchorLeft();
|
|
FilterAnchorLeft.filterUnit = filterUnits.length;
|
|
filterUnits.push(FilterAnchorLeft.instance);
|
|
|
|
registerFilterClass(FilterAnchorLeft);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterAnchorRight = class {
|
|
match() {
|
|
return $patternMatchRight === $requestURL.length;
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('|');
|
|
details.regex.push('$');
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid ];
|
|
}
|
|
|
|
static compile() {
|
|
return [ FilterAnchorRight.fid ];
|
|
}
|
|
|
|
static unitFromCompiled() {
|
|
return FilterAnchorRight.filterUnit;
|
|
}
|
|
|
|
static fromSelfie() {
|
|
return FilterAnchorRight.instance;
|
|
}
|
|
};
|
|
|
|
FilterAnchorRight.instance = new FilterAnchorRight();
|
|
FilterAnchorRight.filterUnit = filterUnits.length;
|
|
filterUnits.push(FilterAnchorRight.instance);
|
|
|
|
registerFilterClass(FilterAnchorRight);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterTrailingSeparator = class {
|
|
match() {
|
|
return $patternMatchRight === $requestURL.length ||
|
|
isSeparatorChar(bidiTrie.haystack[$patternMatchRight]);
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('^');
|
|
details.regex.push(restrSeparator);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid ];
|
|
}
|
|
|
|
static compile() {
|
|
return [ FilterTrailingSeparator.fid ];
|
|
}
|
|
|
|
static unitFromCompiled() {
|
|
return FilterTrailingSeparator.filterUnit;
|
|
}
|
|
|
|
static fromSelfie() {
|
|
return FilterTrailingSeparator.instance;
|
|
}
|
|
};
|
|
|
|
FilterTrailingSeparator.instance = new FilterTrailingSeparator();
|
|
FilterTrailingSeparator.filterUnit = filterUnits.length;
|
|
filterUnits.push(FilterTrailingSeparator.instance);
|
|
|
|
registerFilterClass(FilterTrailingSeparator);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterType = class {
|
|
constructor(bits) {
|
|
this.typeBits = bits;
|
|
}
|
|
|
|
match() {
|
|
return (this.typeBits & $requestTypeBit) !== 0;
|
|
}
|
|
|
|
logData() {
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.typeBits ];
|
|
}
|
|
|
|
static compile(details) {
|
|
return [ FilterType.fid, details.typeBits & allNetworkTypesBits ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterType(args[1]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterType(args[1]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterType);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterRegex = class {
|
|
constructor(s) {
|
|
this.s = s;
|
|
}
|
|
|
|
match() {
|
|
if ( this.re === null ) {
|
|
this.re = FilterRegex.dict.get(this.s);
|
|
if ( this.re === undefined ) {
|
|
this.re = new RegExp(this.s, 'i');
|
|
FilterRegex.dict.set(this.s, this.re);
|
|
}
|
|
}
|
|
if ( this.re.test($requestURL) === false ) { return false; }
|
|
$patternMatchLeft = $requestURL.search(this.re);
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('/', this.s, '/');
|
|
details.regex.push(this.s);
|
|
details.isRegex = true;
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.s ];
|
|
}
|
|
|
|
static compile(details) {
|
|
return [ FilterRegex.fid, details.f ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterRegex(args[1]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterRegex(args[1]);
|
|
}
|
|
};
|
|
|
|
FilterRegex.prototype.re = null;
|
|
|
|
FilterRegex.isSlow = true;
|
|
FilterRegex.dict = new Map();
|
|
|
|
registerFilterClass(FilterRegex);
|
|
|
|
/******************************************************************************/
|
|
|
|
// The optimal "class" is picked according to the content of the
|
|
// `domain=` filter option.
|
|
|
|
const filterOrigin = new (class {
|
|
constructor() {
|
|
let trieDetails;
|
|
try {
|
|
trieDetails = JSON.parse(
|
|
vAPI.localStorage.getItem('FilterOrigin.trieDetails')
|
|
);
|
|
} catch(ex) {
|
|
}
|
|
this.trieContainer = new µb.HNTrieContainer(trieDetails);
|
|
this.strToUnitMap = new Map();
|
|
this.gcTimer = undefined;
|
|
}
|
|
|
|
compile(details, prepend, units) {
|
|
const domainOpt = details.domainOpt;
|
|
let compiledMiss, compiledHit;
|
|
// One hostname
|
|
if ( domainOpt.indexOf('|') === -1 ) {
|
|
// Must be a miss
|
|
if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) {
|
|
compiledMiss = FilterOriginMiss.compile(domainOpt);
|
|
}
|
|
// Must be a hit
|
|
else {
|
|
compiledHit = FilterOriginHit.compile(domainOpt);
|
|
}
|
|
}
|
|
// Many hostnames.
|
|
// Must be in set (none negated).
|
|
else if ( domainOpt.indexOf('~') === -1 ) {
|
|
compiledHit = FilterOriginHitSet.compile(domainOpt);
|
|
}
|
|
// Must not be in set (all negated).
|
|
else if ( /^~(?:[^|~]+\|~)+[^|~]+$/.test(domainOpt) ) {
|
|
compiledMiss = FilterOriginMissSet.compile(domainOpt);
|
|
}
|
|
// Must be in one set, but not in the other.
|
|
else {
|
|
const hostnames = domainOpt.split('|');
|
|
const missSet = hostnames.filter(hn => {
|
|
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) {
|
|
return hn;
|
|
}
|
|
});
|
|
const hitSet = hostnames.filter(hn => {
|
|
if ( hn.charCodeAt(0) !== 0x7E /* '~' */ ) {
|
|
return hn;
|
|
}
|
|
});
|
|
compiledMiss = missSet.length === 1
|
|
? FilterOriginMiss.compile(missSet[0])
|
|
: FilterOriginMissSet.compile(missSet.join('|'));
|
|
compiledHit = hitSet.length === 1
|
|
? FilterOriginHit.compile(hitSet[0])
|
|
: FilterOriginHitSet.compile(hitSet.join('|'));
|
|
}
|
|
if ( prepend ) {
|
|
if ( compiledHit ) { units.unshift(compiledHit); }
|
|
if ( compiledMiss ) { units.unshift(compiledMiss); }
|
|
} else {
|
|
if ( compiledMiss ) { units.push(compiledMiss); }
|
|
if ( compiledHit ) { units.push(compiledHit); }
|
|
}
|
|
}
|
|
|
|
unitFromCompiled(ctor, s) {
|
|
let iunit = this.strToUnitMap.get(s);
|
|
if ( iunit !== undefined ) { return iunit; }
|
|
const f = new ctor(s);
|
|
iunit = filterUnits.push(f) - 1;
|
|
this.strToUnitMap.set(s, iunit);
|
|
if ( this.gcTimer !== undefined ) { return iunit; }
|
|
this.gcTimer = self.setTimeout(
|
|
( ) => {
|
|
this.gcTimer = undefined;
|
|
this.strToUnitMap.clear();
|
|
},
|
|
5000
|
|
);
|
|
return iunit;
|
|
}
|
|
|
|
reset() {
|
|
this.trieContainer.reset();
|
|
this.strToUnitMap.clear();
|
|
}
|
|
|
|
optimize() {
|
|
const trieDetails = this.trieContainer.optimize();
|
|
vAPI.localStorage.setItem(
|
|
'FilterOrigin.trieDetails',
|
|
JSON.stringify(trieDetails)
|
|
);
|
|
}
|
|
|
|
toSelfie() {
|
|
}
|
|
|
|
fromSelfie() {
|
|
}
|
|
})();
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterOriginHit = class {
|
|
constructor(hostname) {
|
|
this.hostname = hostname;
|
|
}
|
|
|
|
match() {
|
|
const haystack = $docHostname;
|
|
const needle = this.hostname;
|
|
const offset = haystack.length - needle.length;
|
|
if ( offset < 0 ) { return false; }
|
|
if ( haystack.charCodeAt(offset) !== needle.charCodeAt(0) ) {
|
|
return false;
|
|
}
|
|
if ( haystack.endsWith(needle) === false ) { return false; }
|
|
return offset === 0 || haystack.charCodeAt(offset-1) === 0x2E /* '.' */;
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.hostname ];
|
|
}
|
|
|
|
logData(details) {
|
|
details.domains.push(this.hostname);
|
|
}
|
|
|
|
static compile(domainOpt) {
|
|
return [ FilterOriginHit.fid, domainOpt ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
return filterOrigin.unitFromCompiled(FilterOriginHit, args[1]);
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterOriginHit(args[1]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterOriginHit);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterOriginMiss = class {
|
|
constructor(hostname) {
|
|
this.hostname = hostname.slice(1);
|
|
}
|
|
|
|
match() {
|
|
const haystack = $docHostname;
|
|
if ( haystack.endsWith(this.hostname) ) {
|
|
const offset = haystack.length - this.hostname.length;
|
|
if (
|
|
offset === 0 ||
|
|
haystack.charCodeAt(offset-1) === 0x2E /* '.' */
|
|
) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
details.domains.push(`~${this.hostname}`);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, `~${this.hostname}` ];
|
|
}
|
|
|
|
static compile(domainOpt) {
|
|
return [ FilterOriginMiss.fid, domainOpt ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
return filterOrigin.unitFromCompiled(FilterOriginMiss, args[1]);
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterOriginMiss(args[1]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterOriginMiss);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterOriginHitSet = class {
|
|
constructor(domainOpt, oneOf = null) {
|
|
this.domainOpt = domainOpt;
|
|
this.oneOf = oneOf !== null
|
|
? filterOrigin.trieContainer.createOne(oneOf)
|
|
: null;
|
|
}
|
|
|
|
match() {
|
|
if ( this.oneOf === null ) {
|
|
this.oneOf = filterOrigin.trieContainer.fromIterable(
|
|
this.domainOpt.split('|')
|
|
);
|
|
}
|
|
return this.oneOf.matches($docHostname) !== -1;
|
|
}
|
|
|
|
logData(details) {
|
|
details.domains.push(this.domainOpt);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [
|
|
this.fid,
|
|
this.domainOpt,
|
|
this.oneOf !== null
|
|
? filterOrigin.trieContainer.compileOne(this.oneOf)
|
|
: null
|
|
];
|
|
}
|
|
|
|
static compile(domainOpt) {
|
|
return [ FilterOriginHitSet.fid, domainOpt ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
return filterOrigin.unitFromCompiled(FilterOriginHitSet, args[1]);
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterOriginHitSet(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterOriginHitSet);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterOriginMissSet = class {
|
|
constructor(domainOpt, noneOf = null) {
|
|
this.domainOpt = domainOpt;
|
|
this.noneOf = noneOf !== null
|
|
? filterOrigin.trieContainer.createOne(noneOf)
|
|
: null;
|
|
}
|
|
|
|
match() {
|
|
if ( this.noneOf === null ) {
|
|
this.noneOf = filterOrigin.trieContainer.fromIterable(
|
|
this.domainOpt.replace(/~/g, '').split('|')
|
|
);
|
|
}
|
|
return this.noneOf.matches($docHostname) === -1;
|
|
}
|
|
|
|
logData(details) {
|
|
details.domains.push(this.domainOpt);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [
|
|
this.fid,
|
|
this.domainOpt,
|
|
this.noneOf !== null
|
|
? filterOrigin.trieContainer.compileOne(this.noneOf)
|
|
: null
|
|
];
|
|
}
|
|
|
|
static compile(domainOpt) {
|
|
return [ FilterOriginMissSet.fid, domainOpt ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
return filterOrigin.unitFromCompiled(FilterOriginMissSet, args[1]);
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterOriginMissSet(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterOriginMissSet);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterDataHolder = class {
|
|
constructor(dataType, data) {
|
|
this.dataType = dataType;
|
|
this.data = data;
|
|
}
|
|
|
|
match() {
|
|
return true;
|
|
}
|
|
|
|
matchAndFetchData(type, out) {
|
|
if ( this.dataType !== type ) { return false; }
|
|
if ( Array.isArray(out) ) {
|
|
out.push(this);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
getData(type) {
|
|
if ( type === this.dataType ) {
|
|
return this.data;
|
|
}
|
|
}
|
|
|
|
logData(details) {
|
|
let opt = this.dataType;
|
|
if ( this.data !== '' ) {
|
|
opt += `=${this.data}`;
|
|
}
|
|
details.options.push(opt);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.dataType, this.data ];
|
|
}
|
|
|
|
static compile(details) {
|
|
return [ FilterDataHolder.fid, details.dataType, details.data ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterDataHolder(args[1], args[2]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterDataHolder(args[1], args[2]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterDataHolder);
|
|
|
|
// Helper class for storing instances of FilterDataHolder which were found to
|
|
// be a match.
|
|
|
|
const FilterDataHolderResult = class {
|
|
constructor(bits, th, iunit) {
|
|
this.bits = bits;
|
|
this.th = th;
|
|
this.iunit = iunit;
|
|
}
|
|
|
|
getData(type) {
|
|
return filterUnits[this.iunit].getData(type);
|
|
}
|
|
|
|
get result() {
|
|
return (this.bits & AllowAction) === 0 ? 1 : 2;
|
|
}
|
|
|
|
logData() {
|
|
const r = toLogDataInternal(this.bits, this.th, this.iunit);
|
|
r.source = 'static';
|
|
r.result = this.result;
|
|
return r;
|
|
}
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterCollection = class {
|
|
constructor(i = 0) {
|
|
this.i = i | 0;
|
|
}
|
|
|
|
get size() {
|
|
let n = 0;
|
|
this.forEach(( ) => { n += 1; });
|
|
return n;
|
|
}
|
|
|
|
unshift(iunit) {
|
|
const j = this.i;
|
|
this.i = filterSequenceAdd(iunit, j);
|
|
}
|
|
|
|
shift() {
|
|
const sequences = filterSequences;
|
|
filterUnits[sequences[this.i+0]] = null;
|
|
this.i = sequences[this.i+1];
|
|
}
|
|
|
|
forEach(fn) {
|
|
let i = this.i;
|
|
if ( i === 0 ) { return; }
|
|
const sequences = filterSequences;
|
|
do {
|
|
const iunit = sequences[i+0];
|
|
const r = fn(iunit);
|
|
if ( r !== undefined ) { return r; }
|
|
i = sequences[i+1];
|
|
} while ( i !== 0 );
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, this.i ];
|
|
}
|
|
|
|
static compile(ctor, fdata) {
|
|
return [ ctor.fid, fdata ];
|
|
}
|
|
|
|
static unitFromCompiled(ctor, args) {
|
|
let iprev = 0, i0 = 0;
|
|
const n = args[1].length;
|
|
for ( let i = 0; i < n; i++ ) {
|
|
const iunit = filterUnitFromCompiled(args[1][i]);
|
|
const inext = filterSequenceAdd(iunit, 0);
|
|
if ( iprev !== 0 ) {
|
|
filterSequences[iprev+1] = inext;
|
|
} else {
|
|
i0 = inext;
|
|
}
|
|
iprev = inext;
|
|
}
|
|
return filterUnits.push(new ctor(i0, args[1].length)) - 1;
|
|
}
|
|
|
|
static fromSelfie(ctor, args) {
|
|
return new ctor(args[1]);
|
|
}
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterComposite = class extends FilterCollection {
|
|
match() {
|
|
const sequences = filterSequences;
|
|
const units = filterUnits;
|
|
let i = this.i;
|
|
while ( i !== 0 ) {
|
|
if ( units[sequences[i+0]].match() !== true ) { return false; }
|
|
i = sequences[i+1];
|
|
}
|
|
return true;
|
|
}
|
|
|
|
matchAndFetchData(type, out) {
|
|
if ( this.match() !== true ) { return false; }
|
|
this.forEach(iunit => {
|
|
const f = filterUnits[iunit];
|
|
if ( f.matchAndFetchData instanceof Function === false ) { return; }
|
|
if ( f.matchAndFetchData(type) === false ) { return; }
|
|
if ( Array.isArray(out) ) {
|
|
out.push(this);
|
|
}
|
|
return true;
|
|
});
|
|
}
|
|
|
|
getData(type) {
|
|
return this.forEach(iunit => {
|
|
const f = filterUnits[iunit];
|
|
if ( f.matchAndFetchData instanceof Function ) {
|
|
return f.getData(type);
|
|
}
|
|
});
|
|
}
|
|
|
|
// FilterPatternPlain is assumed to be first filter in sequence. This can
|
|
// be revisited if needed.
|
|
get isBidiTrieable() {
|
|
return filterUnits[filterSequences[this.i]].isBidiTrieable === true;
|
|
}
|
|
|
|
toBidiTrie() {
|
|
const details = filterUnits[filterSequences[this.i]].toBidiTrie();
|
|
this.shift();
|
|
return details;
|
|
}
|
|
|
|
logData(details) {
|
|
this.forEach(iunit => {
|
|
filterUnits[iunit].logData(details);
|
|
});
|
|
}
|
|
|
|
static compile(fdata) {
|
|
return FilterCollection.compile(FilterComposite, fdata);
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
return FilterCollection.unitFromCompiled(FilterComposite, args);
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return FilterCollection.fromSelfie(FilterComposite, args);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterComposite);
|
|
|
|
/******************************************************************************/
|
|
|
|
// Dictionary of hostnames
|
|
|
|
const FilterHostnameDict = class {
|
|
constructor(args) {
|
|
this.$h = ''; // short-lived register
|
|
this.dict = FilterHostnameDict.trieContainer.createOne(args);
|
|
}
|
|
|
|
get size() {
|
|
return this.dict.size;
|
|
}
|
|
|
|
add(hn) {
|
|
return this.dict.add(hn);
|
|
}
|
|
|
|
match() {
|
|
const pos = this.dict.matches($requestHostname);
|
|
if ( pos === -1 ) { return false; }
|
|
this.$h = $requestHostname.slice(pos);
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('||', this.$h, '^');
|
|
details.regex.push(restrFromPlainPattern(this.$h), restrSeparator);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [
|
|
this.fid,
|
|
FilterHostnameDict.trieContainer.compileOne(this.dict)
|
|
];
|
|
}
|
|
|
|
static reset() {
|
|
return FilterHostnameDict.trieContainer.reset();
|
|
}
|
|
|
|
static optimize() {
|
|
const trieDetails = FilterHostnameDict.trieContainer.optimize();
|
|
vAPI.localStorage.setItem(
|
|
'FilterHostnameDict.trieDetails',
|
|
JSON.stringify(trieDetails)
|
|
);
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterHostnameDict(args[1]);
|
|
}
|
|
};
|
|
|
|
FilterHostnameDict.trieContainer = (( ) => {
|
|
let trieDetails;
|
|
try {
|
|
trieDetails = JSON.parse(
|
|
vAPI.localStorage.getItem('FilterHostnameDict.trieDetails')
|
|
);
|
|
} catch(ex) {
|
|
}
|
|
return new µb.HNTrieContainer(trieDetails);
|
|
})();
|
|
|
|
registerFilterClass(FilterHostnameDict);
|
|
|
|
/******************************************************************************/
|
|
|
|
// Dictionary of hostnames for filters which only purpose is to match
|
|
// the document origin.
|
|
|
|
const FilterJustOrigin = class {
|
|
constructor(args) {
|
|
this.$h = ''; // short-lived register
|
|
this.dict = filterOrigin.trieContainer.createOne(args);
|
|
}
|
|
|
|
get size() {
|
|
return this.dict.size;
|
|
}
|
|
|
|
add(hn) {
|
|
return this.dict.add(hn);
|
|
}
|
|
|
|
match() {
|
|
const pos = this.dict.matches($docHostname);
|
|
if ( pos === -1 ) { return false; }
|
|
this.$h = $docHostname.slice(pos);
|
|
return true;
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('*');
|
|
details.regex.push('^');
|
|
details.domains.push(this.$h);
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, filterOrigin.trieContainer.compileOne(this.dict) ];
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterJustOrigin(args[1]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterJustOrigin(args[1]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterJustOrigin);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterHTTPSJustOrigin = class extends FilterJustOrigin {
|
|
match() {
|
|
return $requestURL.startsWith('https://') && super.match();
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('|https://');
|
|
details.regex.push('^https://');
|
|
details.domains.push(this.$h);
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterHTTPSJustOrigin(args[1]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterHTTPSJustOrigin(args[1]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterHTTPSJustOrigin);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterHTTPJustOrigin = class extends FilterJustOrigin {
|
|
match() {
|
|
return $requestURL.startsWith('http://') && super.match();
|
|
}
|
|
|
|
logData(details) {
|
|
details.pattern.push('|http://');
|
|
details.regex.push('^http://');
|
|
details.domains.push(this.$h);
|
|
}
|
|
|
|
static unitFromCompiled(args) {
|
|
const f = new FilterHTTPJustOrigin(args[1]);
|
|
return filterUnits.push(f) - 1;
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterHTTPJustOrigin(args[1]);
|
|
}
|
|
};
|
|
|
|
registerFilterClass(FilterHTTPJustOrigin);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterPlainTrie = class {
|
|
constructor(trie) {
|
|
this.plainTrie = trie;
|
|
}
|
|
|
|
match() {
|
|
if ( this.plainTrie.matches($tokenBeg) !== 0 ) {
|
|
this.$matchedUnit = this.plainTrie.$iu;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
matchAndFetchData(/* type, out */) {
|
|
// TODO
|
|
}
|
|
|
|
logData(details) {
|
|
const s = $requestURL.slice(this.plainTrie.$l, this.plainTrie.$r);
|
|
details.pattern.push(s);
|
|
details.regex.push(restrFromPlainPattern(s));
|
|
if ( this.$matchedUnit !== -1 ) {
|
|
filterUnits[this.$matchedUnit].logData(details);
|
|
}
|
|
}
|
|
|
|
toSelfie() {
|
|
return [ this.fid, bidiTrie.compileOne(this.plainTrie) ];
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
return new FilterPlainTrie(bidiTrie.createOne(args[1]));
|
|
}
|
|
};
|
|
|
|
FilterPlainTrie.prototype.$matchedUnit = 0;
|
|
|
|
registerFilterClass(FilterPlainTrie);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FilterBucket = class extends FilterCollection {
|
|
match() {
|
|
if ( this.plainTrie !== null ) {
|
|
if ( this.plainTrie.matches($tokenBeg, this) !== 0 ) {
|
|
this.$matchedTrie = true;
|
|
this.$matchedUnit = this.plainTrie.$iu;
|
|
return true;
|
|
}
|
|
}
|
|
const sequences = filterSequences;
|
|
const units = filterUnits;
|
|
let i = this.i;
|
|
while ( i !== 0 ) {
|
|
if ( units[sequences[i+0]].match() ) {
|
|
this.$matchedTrie = false;
|
|
this.$matchedUnit = sequences[i+0];
|
|
return true;
|
|
}
|
|
i = sequences[i+1];
|
|
}
|
|
return false;
|
|
}
|
|
|
|
matchAndFetchData(type, out) {
|
|
const units = filterUnits;
|
|
this.forEach(iunit => {
|
|
units[iunit].matchAndFetchData(type, out);
|
|
});
|
|
}
|
|
|
|
logData(details) {
|
|
if ( this.$matchedTrie ) {
|
|
const s = $requestURL.slice(this.plainTrie.$l, this.plainTrie.$r);
|
|
details.pattern.push(s);
|
|
details.regex.push(restrFromPlainPattern(s));
|
|
}
|
|
if ( this.$matchedUnit !== -1 ) {
|
|
filterUnits[this.$matchedUnit].logData(details);
|
|
}
|
|
}
|
|
|
|
toSelfie() {
|
|
const selfie = super.toSelfie();
|
|
if ( this.plainTrie !== null ) {
|
|
selfie.push(bidiTrie.compileOne(this.plainTrie));
|
|
}
|
|
return selfie;
|
|
}
|
|
|
|
optimize() {
|
|
const units = filterUnits;
|
|
let n = 0;
|
|
let i = this.i;
|
|
do {
|
|
if ( units[filterSequences[i+0]].isBidiTrieable ) { n += 1; }
|
|
i = filterSequences[i+1];
|
|
} while ( i !== 0 && n < 3 );
|
|
if ( n < 3 ) { return; }
|
|
if ( this.plainTrie === null ) {
|
|
this.plainTrie = bidiTrie.createOne();
|
|
}
|
|
i = this.i;
|
|
let iprev = 0;
|
|
for (;;) {
|
|
const iunit = filterSequences[i+0];
|
|
const inext = filterSequences[i+1];
|
|
if ( units[iunit].isBidiTrieable ) {
|
|
this._addToTrie(iunit);
|
|
if ( iprev !== 0 ) {
|
|
filterSequences[iprev+1] = inext;
|
|
} else {
|
|
this.i = inext;
|
|
}
|
|
} else {
|
|
iprev = i;
|
|
}
|
|
if ( inext === 0 ) { break; }
|
|
i = inext;
|
|
}
|
|
if ( this.i === 0 ) {
|
|
return new FilterPlainTrie(this.plainTrie);
|
|
}
|
|
}
|
|
|
|
_addToTrie(iunit) {
|
|
const f = filterUnits[iunit];
|
|
const trieDetails = f.toBidiTrie();
|
|
const id = this.plainTrie.add(
|
|
trieDetails.i,
|
|
trieDetails.n,
|
|
trieDetails.itok
|
|
);
|
|
// No point storing a pattern with conditions if the bidi-trie already
|
|
// contain a pattern with no conditions.
|
|
let ix = this.plainTrie.getExtra(id);
|
|
if ( ix === 1 ) {
|
|
filterUnits[iunit] = null;
|
|
return;
|
|
}
|
|
// If the newly stored pattern has no condition, shortcut existing
|
|
// ones since they will always be short-circuited by the
|
|
// condition-less pattern.
|
|
if ( f instanceof FilterPatternPlain ) {
|
|
this.plainTrie.setExtra(id, 1);
|
|
filterUnits[iunit] = null;
|
|
return;
|
|
}
|
|
// FilterComposite is assumed here, i.e. with conditions.
|
|
if ( f.n === 1 ) {
|
|
filterUnits[iunit] = null;
|
|
iunit = filterSequences[f.i];
|
|
}
|
|
this.plainTrie.setExtra(id, filterSequenceAdd(iunit, ix));
|
|
}
|
|
|
|
static fromSelfie(args) {
|
|
const bucket = FilterCollection.fromSelfie(FilterBucket, args);
|
|
if ( args.length > 2 && Array.isArray(args[2]) ) {
|
|
bucket.plainTrie = bidiTrie.createOne(args[2]);
|
|
}
|
|
return bucket;
|
|
}
|
|
};
|
|
|
|
FilterBucket.prototype.plainTrie = null;
|
|
FilterBucket.prototype.$matchedUnit = 0;
|
|
FilterBucket.prototype.$matchedTrie = false;
|
|
|
|
registerFilterClass(FilterBucket);
|
|
|
|
/******************************************************************************/
|
|
|
|
const FILTER_UNITS_MIN = filterUnits.length;
|
|
const FILTER_SEQUENCES_MIN = filterSequenceWritePtr;
|
|
|
|
/******************************************************************************/
|
|
/******************************************************************************/
|
|
|
|
const FilterParser = class {
|
|
constructor() {
|
|
this.cantWebsocket = vAPI.cantWebsocket;
|
|
this.domainOpt = '';
|
|
this.noTokenHash = urlTokenizer.noTokenHash;
|
|
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
|
|
this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i;
|
|
this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i;
|
|
this.reCanTrimCarets1 = /^[^*]*$/;
|
|
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
|
|
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
|
|
this.reHasUnicode = /[^\x00-\x7F]/;
|
|
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
|
|
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
|
|
this.reGoodToken = /[%0-9a-z]{1,}/g;
|
|
this.reSeparator = /[\/^]/;
|
|
this.reRegexToken = /[%0-9A-Za-z]{2,}/g;
|
|
this.reRegexTokenAbort = /[([]/;
|
|
this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/;
|
|
this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/;
|
|
// These top 100 "bad tokens" are collated using the "miss" histogram
|
|
// from tokenHistograms(). The "score" is their occurrence among the
|
|
// 200K+ URLs used in the benchmark and executed against default
|
|
// filter lists.
|
|
this.badTokens = new Map([
|
|
[ 'https',123617 ],
|
|
[ 'com',76987 ],
|
|
[ 'js',43620 ],
|
|
[ 'www',33129 ],
|
|
[ 'jpg',32221 ],
|
|
[ 'images',31812 ],
|
|
[ 'css',19715 ],
|
|
[ 'png',19140 ],
|
|
[ 'static',15724 ],
|
|
[ 'net',15239 ],
|
|
[ 'de',13155 ],
|
|
[ 'img',11109 ],
|
|
[ 'assets',10746 ],
|
|
[ 'min',7807 ],
|
|
[ 'cdn',7568 ],
|
|
[ 'content',6900 ],
|
|
[ 'wp',6444 ],
|
|
[ 'fonts',6095 ],
|
|
[ 'svg',5976 ],
|
|
[ 'http',5813 ],
|
|
[ 'ssl',5735 ],
|
|
[ 'amazon',5440 ],
|
|
[ 'ru',5427 ],
|
|
[ 'fr',5199 ],
|
|
[ 'facebook',5178 ],
|
|
[ 'en',5146 ],
|
|
[ 'image',5028 ],
|
|
[ 'html',4837 ],
|
|
[ 'media',4833 ],
|
|
[ 'co',4783 ],
|
|
[ 'php',3972 ],
|
|
[ '2019',3943 ],
|
|
[ 'org',3924 ],
|
|
[ 'jquery',3531 ],
|
|
[ '02',3438 ],
|
|
[ 'api',3382 ],
|
|
[ 'gif',3350 ],
|
|
[ 'eu',3322 ],
|
|
[ 'prod',3289 ],
|
|
[ 'woff2',3200 ],
|
|
[ 'logo',3194 ],
|
|
[ 'themes',3107 ],
|
|
[ 'icon',3048 ],
|
|
[ 'google',3026 ],
|
|
[ 'v1',3019 ],
|
|
[ 'uploads',2963 ],
|
|
[ 'googleapis',2860 ],
|
|
[ 'v3',2816 ],
|
|
[ 'tv',2762 ],
|
|
[ 'icons',2748 ],
|
|
[ 'core',2601 ],
|
|
[ 'gstatic',2581 ],
|
|
[ 'ac',2509 ],
|
|
[ 'utag',2466 ],
|
|
[ 'id',2459 ],
|
|
[ 'ver',2448 ],
|
|
[ 'rsrc',2387 ],
|
|
[ 'files',2361 ],
|
|
[ 'uk',2357 ],
|
|
[ 'us',2271 ],
|
|
[ 'pl',2262 ],
|
|
[ 'common',2205 ],
|
|
[ 'public',2076 ],
|
|
[ '01',2016 ],
|
|
[ 'na',1957 ],
|
|
[ 'v2',1954 ],
|
|
[ '12',1914 ],
|
|
[ 'thumb',1895 ],
|
|
[ 'web',1853 ],
|
|
[ 'ui',1841 ],
|
|
[ 'default',1825 ],
|
|
[ 'main',1737 ],
|
|
[ 'false',1715 ],
|
|
[ '2018',1697 ],
|
|
[ 'embed',1639 ],
|
|
[ 'player',1634 ],
|
|
[ 'dist',1599 ],
|
|
[ 'woff',1593 ],
|
|
[ 'global',1593 ],
|
|
[ 'json',1572 ],
|
|
[ '11',1566 ],
|
|
[ '600',1559 ],
|
|
[ 'app',1556 ],
|
|
[ 'styles',1533 ],
|
|
[ 'plugins',1526 ],
|
|
[ '274',1512 ],
|
|
[ 'random',1505 ],
|
|
[ 'sites',1505 ],
|
|
[ 'imasdk',1501 ],
|
|
[ 'bridge3',1501 ],
|
|
[ 'news',1496 ],
|
|
[ 'width',1494 ],
|
|
[ 'thumbs',1485 ],
|
|
[ 'ttf',1470 ],
|
|
[ 'ajax',1463 ],
|
|
[ 'user',1454 ],
|
|
[ 'scripts',1446 ],
|
|
[ 'twitter',1440 ],
|
|
[ 'crop',1431 ],
|
|
[ 'new',1412]
|
|
]);
|
|
this.maxTokenLen = urlTokenizer.MAX_TOKEN_LENGTH;
|
|
this.reset();
|
|
}
|
|
|
|
reset() {
|
|
this.action = BlockAction;
|
|
// anchor: bit vector
|
|
// 0000 (0x0): no anchoring
|
|
// 0001 (0x1): anchored to the end of the URL.
|
|
// 0010 (0x2): anchored to the start of the URL.
|
|
// 0011 (0x3): anchored to the start and end of the URL.
|
|
// 0100 (0x4): anchored to the hostname of the URL.
|
|
// 0101 (0x5): anchored to the hostname and end of the URL.
|
|
this.anchor = 0;
|
|
this.badFilter = false;
|
|
this.dataType = undefined;
|
|
this.data = undefined;
|
|
this.invalid = false;
|
|
this.f = '';
|
|
this.firstParty = false;
|
|
this.thirdParty = false;
|
|
this.party = AnyParty;
|
|
this.fopts = '';
|
|
this.domainOpt = '';
|
|
this.isPureHostname = false;
|
|
this.isRegex = false;
|
|
this.raw = '';
|
|
this.redirect = 0;
|
|
this.token = '*';
|
|
this.tokenHash = this.noTokenHash;
|
|
this.tokenBeg = 0;
|
|
this.typeBits = 0;
|
|
this.notTypes = 0;
|
|
this.important = 0;
|
|
this.firstWildcardPos = -1;
|
|
this.secondWildcardPos = -1;
|
|
this.firstCaretPos = -1;
|
|
this.secondCaretPos = -1;
|
|
this.unsupported = false;
|
|
return this;
|
|
}
|
|
|
|
normalizeRegexSource(s) {
|
|
try {
|
|
const re = new RegExp(s);
|
|
return re.source;
|
|
} catch (ex) {
|
|
}
|
|
return '';
|
|
}
|
|
|
|
bitFromType(type) {
|
|
return 1 << ((typeNameToTypeValue[type] >>> 4) - 1);
|
|
}
|
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/589
|
|
// Be ready to handle multiple negated types
|
|
|
|
parseTypeOption(raw, not) {
|
|
const typeBit = raw !== 'all'
|
|
? this.bitFromType(toNormalizedType[raw])
|
|
: allTypesBits;
|
|
if ( not ) {
|
|
this.notTypes |= typeBit;
|
|
} else {
|
|
this.typeBits |= typeBit;
|
|
}
|
|
}
|
|
|
|
parsePartyOption(firstParty, not) {
|
|
if ( firstParty ) {
|
|
not = !not;
|
|
}
|
|
if ( not ) {
|
|
this.firstParty = true;
|
|
this.party = this.thirdParty ? AnyParty : FirstParty;
|
|
} else {
|
|
this.thirdParty = true;
|
|
this.party = this.firstParty ? AnyParty : ThirdParty;
|
|
}
|
|
}
|
|
|
|
parseDomainOption(s) {
|
|
if ( this.reHasUnicode.test(s) ) {
|
|
const hostnames = s.split('|');
|
|
let i = hostnames.length;
|
|
while ( i-- ) {
|
|
if ( this.reHasUnicode.test(hostnames[i]) ) {
|
|
hostnames[i] = punycode.toASCII(hostnames[i]);
|
|
}
|
|
}
|
|
s = hostnames.join('|');
|
|
}
|
|
if ( this.reBadDomainOptChars.test(s) ) { return ''; }
|
|
return s;
|
|
}
|
|
|
|
parseOptions(s) {
|
|
this.fopts = s;
|
|
for ( let opt of s.split(/\s*,\s*/) ) {
|
|
const not = opt.startsWith('~');
|
|
if ( not ) {
|
|
opt = opt.slice(1);
|
|
}
|
|
if ( opt === 'third-party' || opt === '3p' ) {
|
|
this.parsePartyOption(false, not);
|
|
continue;
|
|
}
|
|
if ( opt === 'first-party' || opt === '1p' ) {
|
|
this.parsePartyOption(true, not);
|
|
continue;
|
|
}
|
|
if ( toNormalizedType.hasOwnProperty(opt) ) {
|
|
this.parseTypeOption(opt, not);
|
|
continue;
|
|
}
|
|
// https://github.com/gorhill/uBlock/issues/2294
|
|
// Detect and discard filter if domain option contains nonsensical
|
|
// characters.
|
|
if ( opt.startsWith('domain=') ) {
|
|
this.domainOpt = this.parseDomainOption(opt.slice(7));
|
|
if ( this.domainOpt === '' ) {
|
|
this.unsupported = true;
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
if ( opt === 'important' ) {
|
|
this.important = Important;
|
|
continue;
|
|
}
|
|
if ( /^redirect(?:-rule)?=/.test(opt) ) {
|
|
if ( this.redirect !== 0 ) {
|
|
this.unsupported = true;
|
|
break;
|
|
}
|
|
this.redirect = opt.charCodeAt(8) === 0x3D /* '=' */ ? 1 : 2;
|
|
continue;
|
|
}
|
|
if (
|
|
opt.startsWith('csp=') &&
|
|
opt.length > 4 &&
|
|
this.reBadCSP.test(opt) === false
|
|
) {
|
|
this.parseTypeOption('data', not);
|
|
this.dataType = 'csp';
|
|
this.data = opt.slice(4).trim();
|
|
continue;
|
|
}
|
|
if ( opt === 'csp' && this.action === AllowAction ) {
|
|
this.parseTypeOption('data', not);
|
|
this.dataType = 'csp';
|
|
this.data = '';
|
|
continue;
|
|
}
|
|
// Used by Adguard:
|
|
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters?aid=16593#empty-modifier
|
|
if ( opt === 'empty' || opt === 'mp4' ) {
|
|
if ( this.redirect !== 0 ) {
|
|
this.unsupported = true;
|
|
break;
|
|
}
|
|
this.redirect = 1;
|
|
continue;
|
|
}
|
|
// https://github.com/uBlockOrigin/uAssets/issues/192
|
|
if ( opt === 'badfilter' ) {
|
|
this.badFilter = true;
|
|
continue;
|
|
}
|
|
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
|
|
// Add support for `elemhide`. Rarely used but it happens.
|
|
if ( opt === 'elemhide' || opt === 'ehide' ) {
|
|
this.parseTypeOption('specifichide', not);
|
|
this.parseTypeOption('generichide', not);
|
|
continue;
|
|
}
|
|
// Unrecognized filter option: ignore whole filter.
|
|
this.unsupported = true;
|
|
break;
|
|
}
|
|
|
|
// Redirect rules can't be exception filters.
|
|
if ( this.redirect !== 0 && this.action !== BlockAction ) {
|
|
this.unsupported = true;
|
|
}
|
|
|
|
// Negated network types? Toggle on all network type bits.
|
|
// Negated non-network types can only toggle themselves.
|
|
if ( (this.notTypes & allNetworkTypesBits) !== 0 ) {
|
|
this.typeBits |= allNetworkTypesBits;
|
|
}
|
|
if ( this.notTypes !== 0 ) {
|
|
this.typeBits &= ~this.notTypes;
|
|
if ( this.typeBits === 0 ) {
|
|
this.unsupported = true;
|
|
}
|
|
}
|
|
|
|
// https://github.com/gorhill/uBlock/issues/2283
|
|
// Abort if type is only for unsupported types, otherwise
|
|
// toggle off `unsupported` bit.
|
|
if ( this.typeBits & unsupportedTypeBit ) {
|
|
this.typeBits &= ~unsupportedTypeBit;
|
|
if ( this.typeBits === 0 ) {
|
|
this.unsupported = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// TODO: use charCodeAt where possible.
|
|
|
|
parse(raw) {
|
|
// important!
|
|
this.reset();
|
|
|
|
let s = this.raw = raw.trim();
|
|
|
|
if ( s.length === 0 ) {
|
|
this.invalid = true;
|
|
return this;
|
|
}
|
|
|
|
// Filters which are a single alphanumeric character are discarded
|
|
// as unsupported.
|
|
if ( s.length === 1 && /[0-9a-z]/i.test(s) ) {
|
|
this.unsupported = true;
|
|
return this;
|
|
}
|
|
|
|
// plain hostname? (from HOSTS file)
|
|
if ( this.reHostnameRule1.test(s) ) {
|
|
this.f = s.toLowerCase();
|
|
this.isPureHostname = true;
|
|
this.anchor |= 0b100;
|
|
return this;
|
|
}
|
|
|
|
// element hiding filter?
|
|
let pos = s.indexOf('#');
|
|
if ( pos !== -1 ) {
|
|
const c = s.charAt(pos + 1);
|
|
if ( c === '#' || c === '@' ) {
|
|
console.error('static-net-filtering.js > unexpected cosmetic filters');
|
|
this.invalid = true;
|
|
return this;
|
|
}
|
|
}
|
|
|
|
// block or allow filter?
|
|
// Important: this must be executed before parsing options
|
|
if ( s.startsWith('@@') ) {
|
|
this.action = AllowAction;
|
|
s = s.slice(2);
|
|
}
|
|
|
|
// options
|
|
// https://github.com/gorhill/uBlock/issues/842
|
|
// - ensure sure we are not dealing with a regex-based filter.
|
|
// - lookup the last occurrence of `$`.
|
|
if (
|
|
s.charCodeAt(0) !== 0x2F /* '/' */ ||
|
|
s.charCodeAt(s.length - 1) !== 0x2F /* '/' */
|
|
) {
|
|
pos = s.lastIndexOf('$');
|
|
if ( pos !== -1 ) {
|
|
// https://github.com/gorhill/uBlock/issues/952
|
|
// Discard Adguard-specific `$$` filters.
|
|
if ( s.indexOf('$$') !== -1 ) {
|
|
this.unsupported = true;
|
|
return this;
|
|
}
|
|
this.parseOptions(s.slice(pos + 1));
|
|
if ( this.unsupported ) { return this; }
|
|
s = s.slice(0, pos);
|
|
}
|
|
}
|
|
|
|
// regex?
|
|
if (
|
|
s.length > 2 &&
|
|
s.charCodeAt(0) === 0x2F /* '/' */ &&
|
|
s.charCodeAt(s.length - 1) === 0x2F /* '/' */
|
|
) {
|
|
this.isRegex = true;
|
|
this.f = s.slice(1, -1);
|
|
// https://github.com/gorhill/uBlock/issues/1246
|
|
// If the filter is valid, use the corrected version of the
|
|
// source string -- this ensure reverse-lookup will work fine.
|
|
this.f = this.normalizeRegexSource(this.f);
|
|
if ( this.f === '' ) {
|
|
this.unsupported = true;
|
|
}
|
|
return this;
|
|
}
|
|
|
|
// hostname-anchored
|
|
if ( s.startsWith('||') ) {
|
|
this.anchor |= 0x4;
|
|
s = s.slice(2);
|
|
|
|
// convert hostname to punycode if needed
|
|
// https://github.com/gorhill/uBlock/issues/2599
|
|
if ( this.reHasUnicode.test(s) ) {
|
|
const matches = this.reIsolateHostname.exec(s);
|
|
if ( matches ) {
|
|
s = (matches[1] !== undefined ? matches[1] : '') +
|
|
punycode.toASCII(matches[2]) +
|
|
matches[3];
|
|
}
|
|
}
|
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/1096
|
|
if ( s.startsWith('^') ) {
|
|
this.unsupported = true;
|
|
return this;
|
|
}
|
|
|
|
// plain hostname? (from ABP filter list)
|
|
// https://github.com/gorhill/uBlock/issues/1757
|
|
// A filter can't be a pure-hostname one if there is a domain or
|
|
// csp option present.
|
|
if ( this.reHostnameRule2.test(s) ) {
|
|
if ( s.charCodeAt(s.length - 1) === 0x5E /* '^' */ ) {
|
|
s = s.slice(0, -1);
|
|
}
|
|
this.f = s.toLowerCase();
|
|
this.isPureHostname = true;
|
|
return this;
|
|
}
|
|
}
|
|
|
|
// left-anchored
|
|
else if ( s.startsWith('|') ) {
|
|
this.anchor |= 0x2;
|
|
s = s.slice(1);
|
|
}
|
|
|
|
// right-anchored
|
|
if ( s.endsWith('|') ) {
|
|
this.anchor |= 0x1;
|
|
s = s.slice(0, -1);
|
|
}
|
|
|
|
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
|
|
// Remove pointless leading *.
|
|
// https://github.com/gorhill/uBlock/issues/3034
|
|
// We can remove anchoring if we need to match all at the start.
|
|
if ( s.startsWith('*') ) {
|
|
s = s.replace(/^\*+([^%0-9a-z])/i, '$1');
|
|
this.anchor &= ~0x6;
|
|
}
|
|
// Remove pointless trailing *
|
|
// https://github.com/gorhill/uBlock/issues/3034
|
|
// We can remove anchoring if we need to match all at the end.
|
|
if ( s.endsWith('*') ) {
|
|
s = s.replace(/([^%0-9a-z])\*+$/i, '$1');
|
|
this.anchor &= ~0x1;
|
|
}
|
|
|
|
// nothing left?
|
|
if ( s === '' ) {
|
|
s = '*';
|
|
}
|
|
// TODO: remove once redirect rules with `*/*` pattern are no longer
|
|
// used.
|
|
else if ( this.redirect !== 0 && s === '/' ) {
|
|
s = '*';
|
|
}
|
|
|
|
// https://github.com/gorhill/uBlock/issues/1047
|
|
// Hostname-anchored makes no sense if matching all requests.
|
|
if ( s === '*' ) {
|
|
this.anchor = 0;
|
|
}
|
|
|
|
this.firstWildcardPos = s.indexOf('*');
|
|
if ( this.firstWildcardPos !== -1 ) {
|
|
this.secondWildcardPos = s.indexOf('*', this.firstWildcardPos + 1);
|
|
}
|
|
this.firstCaretPos = s.indexOf('^');
|
|
if ( this.firstCaretPos !== -1 ) {
|
|
this.secondCaretPos = s.indexOf('^', this.firstCaretPos + 1);
|
|
}
|
|
|
|
if ( s.length > 1024 ) {
|
|
this.unsupported = true;
|
|
return this;
|
|
}
|
|
|
|
this.f = s.toLowerCase();
|
|
|
|
return this;
|
|
}
|
|
|
|
// Given a string, find a good token. Tokens which are too generic,
|
|
// i.e. very common with a high probability of ending up as a miss,
|
|
// are not good. Avoid if possible. This has a significant positive
|
|
// impact on performance.
|
|
|
|
makeToken() {
|
|
if ( this.isRegex ) {
|
|
this.extractTokenFromRegex();
|
|
return;
|
|
}
|
|
if ( this.f === '*' ) { return; }
|
|
const matches = this.findGoodToken();
|
|
if ( matches === null ) { return; }
|
|
this.token = matches[0];
|
|
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
|
|
this.tokenBeg = matches.index;
|
|
}
|
|
|
|
findGoodToken() {
|
|
this.reGoodToken.lastIndex = 0;
|
|
const s = this.f;
|
|
let bestMatch = null;
|
|
let bestBadness = 0;
|
|
let match;
|
|
while ( (match = this.reGoodToken.exec(s)) !== null ) {
|
|
const token = match[0];
|
|
// https://github.com/gorhill/uBlock/issues/997
|
|
// Ignore token if preceded by wildcard.
|
|
const pos = match.index;
|
|
if (
|
|
pos !== 0 &&
|
|
s.charCodeAt(pos - 1) === 0x2A /* '*' */ ||
|
|
token.length < this.maxTokenLen &&
|
|
s.charCodeAt(pos + token.length) === 0x2A /* '*' */
|
|
) {
|
|
continue;
|
|
}
|
|
// A one-char token is better than a documented bad token.
|
|
const badness = token.length > 1
|
|
? this.badTokens.get(token) || 0
|
|
: 1;
|
|
if ( badness === 0 ) { return match; }
|
|
if ( bestBadness === 0 || badness < bestBadness ) {
|
|
bestMatch = match;
|
|
bestBadness = badness;
|
|
}
|
|
}
|
|
return bestMatch;
|
|
}
|
|
|
|
// https://github.com/gorhill/uBlock/issues/2781
|
|
// For efficiency purpose, try to extract a token from
|
|
// a regex-based filter.
|
|
extractTokenFromRegex() {
|
|
this.reRegexToken.lastIndex = 0;
|
|
const s = this.f;
|
|
let matches;
|
|
while ( (matches = this.reRegexToken.exec(s)) !== null ) {
|
|
const prefix = s.slice(0, matches.index);
|
|
if ( this.reRegexTokenAbort.test(prefix) ) { return; }
|
|
if (
|
|
this.reRegexBadPrefix.test(prefix) ||
|
|
this.reRegexBadSuffix.test(s.slice(this.reRegexToken.lastIndex))
|
|
) {
|
|
continue;
|
|
}
|
|
this.token = matches[0].toLowerCase();
|
|
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
|
|
this.tokenBeg = matches.index;
|
|
if ( this.badTokens.has(this.token) === false ) { break; }
|
|
}
|
|
}
|
|
|
|
isJustOrigin() {
|
|
return this.isRegex === false &&
|
|
this.dataType === undefined &&
|
|
this.domainOpt !== '' && (
|
|
this.f === '*' || (
|
|
this.anchor === 0b010 &&
|
|
/^(?:http[s*]?:(?:\/\/)?)$/.test(this.f)
|
|
)
|
|
) &&
|
|
this.domainOpt.indexOf('~') === -1;
|
|
}
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterParser.parse = (( ) => {
|
|
let parser;
|
|
let last = 0;
|
|
let ttlTimer;
|
|
|
|
const ttlProcess = ( ) => {
|
|
ttlTimer = undefined;
|
|
if ( (Date.now() - last) > 10000 ) {
|
|
parser = undefined;
|
|
return;
|
|
}
|
|
ttlTimer = vAPI.setTimeout(ttlProcess, 10007);
|
|
};
|
|
|
|
return s => {
|
|
if ( parser === undefined ) {
|
|
parser = new FilterParser();
|
|
}
|
|
last = Date.now();
|
|
if ( ttlTimer === undefined ) {
|
|
ttlTimer = vAPI.setTimeout(ttlProcess, 10007);
|
|
}
|
|
return parser.parse(s);
|
|
};
|
|
})();
|
|
|
|
/******************************************************************************/
|
|
/******************************************************************************/
|
|
|
|
const FilterContainer = function() {
|
|
this.noTokenHash = urlTokenizer.noTokenHash;
|
|
this.dotTokenHash = urlTokenizer.dotTokenHash;
|
|
this.anyTokenHash = urlTokenizer.anyTokenHash;
|
|
this.anyHTTPSTokenHash = urlTokenizer.anyHTTPSTokenHash;
|
|
this.anyHTTPTokenHash = urlTokenizer.anyHTTPTokenHash;
|
|
this.reset();
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.reset = function() {
|
|
this.frozen = false;
|
|
this.processedFilterCount = 0;
|
|
this.acceptedCount = 0;
|
|
this.rejectedCount = 0;
|
|
this.allowFilterCount = 0;
|
|
this.blockFilterCount = 0;
|
|
this.discardedCount = 0;
|
|
this.goodFilters = new Set();
|
|
this.badFilters = new Set();
|
|
this.categories = new Map();
|
|
|
|
urlTokenizer.resetKnownTokens();
|
|
|
|
// This will invalidate all tries
|
|
FilterHostnameDict.reset();
|
|
filterOrigin.reset();
|
|
bidiTrie.reset();
|
|
|
|
filterUnits = filterUnits.slice(0, FILTER_UNITS_MIN);
|
|
filterSequenceWritePtr = FILTER_SEQUENCES_MIN;
|
|
|
|
// Runtime registers
|
|
this.$catbits = 0;
|
|
this.$tokenHash = 0;
|
|
this.$filterUnit = 0;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.freeze = function() {
|
|
const filterBucketId = FilterBucket.fid;
|
|
const redirectTypeValue = typeNameToTypeValue.redirect;
|
|
const unserialize = µb.CompiledLineIO.unserialize;
|
|
const units = filterUnits;
|
|
|
|
const t0 = Date.now();
|
|
|
|
for ( const line of this.goodFilters ) {
|
|
if ( this.badFilters.has(line) ) {
|
|
this.discardedCount += 1;
|
|
continue;
|
|
}
|
|
|
|
const args = unserialize(line);
|
|
const bits = args[0];
|
|
|
|
// Special cases: delegate to more specialized engines.
|
|
// Redirect engine.
|
|
if ( (bits & 0x1F0) === redirectTypeValue ) {
|
|
µb.redirectEngine.fromCompiledRule(args[1]);
|
|
continue;
|
|
}
|
|
|
|
// Plain static filters.
|
|
const tokenHash = args[1];
|
|
const fdata = args[2];
|
|
|
|
let bucket = this.categories.get(bits);
|
|
if ( bucket === undefined ) {
|
|
bucket = new Map();
|
|
this.categories.set(bits, bucket);
|
|
}
|
|
let iunit = bucket.get(tokenHash);
|
|
|
|
if ( tokenHash === this.dotTokenHash ) {
|
|
if ( iunit === undefined ) {
|
|
iunit = filterFromCtor(FilterHostnameDict);
|
|
bucket.set(this.dotTokenHash, iunit);
|
|
}
|
|
units[iunit].add(fdata);
|
|
continue;
|
|
}
|
|
|
|
if ( tokenHash === this.anyTokenHash ) {
|
|
if ( iunit === undefined ) {
|
|
iunit = filterFromCtor(FilterJustOrigin);
|
|
bucket.set(this.anyTokenHash, iunit);
|
|
}
|
|
units[iunit].add(fdata);
|
|
continue;
|
|
}
|
|
|
|
if ( tokenHash === this.anyHTTPSTokenHash ) {
|
|
if ( iunit === undefined ) {
|
|
iunit = filterFromCtor(FilterHTTPSJustOrigin);
|
|
bucket.set(this.anyHTTPSTokenHash, iunit);
|
|
}
|
|
units[iunit].add(fdata);
|
|
continue;
|
|
}
|
|
|
|
if ( tokenHash === this.anyHTTPTokenHash ) {
|
|
if ( iunit === undefined ) {
|
|
iunit = filterFromCtor(FilterHTTPJustOrigin);
|
|
bucket.set(this.anyHTTPTokenHash, iunit);
|
|
}
|
|
units[iunit].add(fdata);
|
|
continue;
|
|
}
|
|
|
|
urlTokenizer.addKnownToken(tokenHash);
|
|
|
|
const inewunit = filterUnitFromCompiled(fdata);
|
|
|
|
if ( iunit === undefined ) {
|
|
bucket.set(tokenHash, inewunit);
|
|
continue;
|
|
}
|
|
let f = units[iunit];
|
|
if ( f.fid === filterBucketId ) {
|
|
f.unshift(inewunit);
|
|
continue;
|
|
}
|
|
const ibucketunit = filterFromCtor(FilterBucket);
|
|
f = units[ibucketunit];
|
|
f.unshift(iunit);
|
|
f.unshift(inewunit);
|
|
bucket.set(tokenHash, ibucketunit);
|
|
}
|
|
|
|
this.badFilters.clear();
|
|
this.goodFilters.clear();
|
|
|
|
// Skip 'data' type since bidi-trie does not (yet) support matchAll().
|
|
const dataTypeValue = typeValueFromCatBits(typeNameToTypeValue['data']);
|
|
for ( const [ catBits, bucket ] of this.categories ) {
|
|
if ( typeValueFromCatBits(catBits) === dataTypeValue ) { continue; }
|
|
for ( const iunit of bucket.values() ) {
|
|
const f = units[iunit];
|
|
if ( f instanceof FilterBucket === false ) { continue; }
|
|
const g = f.optimize();
|
|
if ( g !== undefined ) {
|
|
units[iunit] = g;
|
|
}
|
|
}
|
|
}
|
|
|
|
FilterHostnameDict.optimize();
|
|
bidiTrieOptimize();
|
|
this.frozen = true;
|
|
|
|
log.info(`staticNetFilteringEngine.freeze() took ${Date.now()-t0} ms`);
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.toSelfie = function(path) {
|
|
const categoriesToSelfie = ( ) => {
|
|
const selfie = [];
|
|
for ( const [ catbits, bucket ] of this.categories ) {
|
|
selfie.push([ catbits, Array.from(bucket) ]);
|
|
}
|
|
return selfie;
|
|
};
|
|
|
|
bidiTrieOptimize(true);
|
|
filterOrigin.optimize();
|
|
|
|
return Promise.all([
|
|
µb.assets.put(
|
|
`${path}/FilterHostnameDict.trieContainer`,
|
|
FilterHostnameDict.trieContainer.serialize(µb.base64)
|
|
),
|
|
µb.assets.put(
|
|
`${path}/FilterOrigin.trieContainer`,
|
|
filterOrigin.trieContainer.serialize(µb.base64)
|
|
),
|
|
µb.assets.put(
|
|
`${path}/bidiTrie`,
|
|
bidiTrie.serialize(µb.base64)
|
|
),
|
|
µb.assets.put(
|
|
`${path}/filterSequences`,
|
|
µb.base64.encode(
|
|
filterSequences.buffer,
|
|
filterSequenceWritePtr << 2
|
|
)
|
|
),
|
|
µb.assets.put(
|
|
`${path}/main`,
|
|
JSON.stringify({
|
|
processedFilterCount: this.processedFilterCount,
|
|
acceptedCount: this.acceptedCount,
|
|
rejectedCount: this.rejectedCount,
|
|
allowFilterCount: this.allowFilterCount,
|
|
blockFilterCount: this.blockFilterCount,
|
|
discardedCount: this.discardedCount,
|
|
categories: categoriesToSelfie(),
|
|
urlTokenizer: urlTokenizer.toSelfie(),
|
|
filterUnits: filterUnits.map(f =>
|
|
f !== null ? f.toSelfie() : null
|
|
),
|
|
})
|
|
)
|
|
]);
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.fromSelfie = function(path) {
|
|
return Promise.all([
|
|
µb.assets.get(`${path}/FilterHostnameDict.trieContainer`).then(details =>
|
|
FilterHostnameDict.trieContainer.unserialize(
|
|
details.content,
|
|
µb.base64
|
|
)
|
|
),
|
|
µb.assets.get(`${path}/FilterOrigin.trieContainer`).then(details =>
|
|
filterOrigin.trieContainer.unserialize(
|
|
details.content,
|
|
µb.base64
|
|
)
|
|
),
|
|
µb.assets.get(`${path}/bidiTrie`).then(details =>
|
|
bidiTrie.unserialize(
|
|
details.content,
|
|
µb.base64
|
|
)
|
|
),
|
|
µb.assets.get(`${path}/filterSequences`).then(details => {
|
|
const size = µb.base64.decodeSize(details.content) >> 2;
|
|
if ( size === 0 ) { return false; }
|
|
filterSequenceBufferResize(size);
|
|
filterSequences = µb.base64.decode(
|
|
details.content,
|
|
filterSequences.buffer
|
|
);
|
|
filterSequenceWritePtr = size;
|
|
return true;
|
|
}),
|
|
µb.assets.get(`${path}/main`).then(details => {
|
|
let selfie;
|
|
try {
|
|
selfie = JSON.parse(details.content);
|
|
} catch (ex) {
|
|
}
|
|
if ( selfie instanceof Object === false ) { return false; }
|
|
this.frozen = true;
|
|
this.processedFilterCount = selfie.processedFilterCount;
|
|
this.acceptedCount = selfie.acceptedCount;
|
|
this.rejectedCount = selfie.rejectedCount;
|
|
this.allowFilterCount = selfie.allowFilterCount;
|
|
this.blockFilterCount = selfie.blockFilterCount;
|
|
this.discardedCount = selfie.discardedCount;
|
|
urlTokenizer.fromSelfie(selfie.urlTokenizer);
|
|
filterUnits = selfie.filterUnits.map(f =>
|
|
f !== null ? filterFromSelfie(f) : null
|
|
);
|
|
for ( const [ catbits, bucket ] of selfie.categories ) {
|
|
this.categories.set(catbits, new Map(bucket));
|
|
}
|
|
return true;
|
|
}),
|
|
]).then(results =>
|
|
results.reduce((acc, v) => acc && v, true)
|
|
);
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.compile = function(raw, writer) {
|
|
// ORDER OF TESTS IS IMPORTANT!
|
|
|
|
const parsed = FilterParser.parse(raw);
|
|
|
|
// Ignore non-static network filters
|
|
if ( parsed.invalid ) { return false; }
|
|
|
|
// Ignore filters with unsupported options
|
|
if ( parsed.unsupported ) {
|
|
const who = writer.properties.get('assetKey') || '?';
|
|
µb.logger.writeOne({
|
|
realm: 'message',
|
|
type: 'error',
|
|
text: `Invalid network filter in ${who}: ${raw}`
|
|
});
|
|
return false;
|
|
}
|
|
|
|
// Redirect rule
|
|
if ( parsed.redirect !== 0 ) {
|
|
const result = this.compileRedirectRule(parsed, writer);
|
|
if ( result === false ) {
|
|
const who = writer.properties.get('assetKey') || '?';
|
|
µb.logger.writeOne({
|
|
realm: 'message',
|
|
type: 'error',
|
|
text: `Invalid redirect rule in ${who}: ${raw}`
|
|
});
|
|
return false;
|
|
}
|
|
if ( parsed.redirect === 2 ) { return true; }
|
|
}
|
|
|
|
// Pure hostnames, use more efficient dictionary lookup
|
|
// https://github.com/chrisaljoudi/uBlock/issues/665
|
|
// Create a dict keyed on request type etc.
|
|
if (
|
|
parsed.isPureHostname &&
|
|
parsed.domainOpt === '' &&
|
|
parsed.dataType === undefined
|
|
) {
|
|
parsed.tokenHash = this.dotTokenHash;
|
|
this.compileToAtomicFilter(parsed, parsed.f, writer);
|
|
return true;
|
|
}
|
|
|
|
parsed.makeToken();
|
|
|
|
const units = [];
|
|
|
|
// Pattern
|
|
if ( parsed.isPureHostname ) {
|
|
parsed.anchor = 0;
|
|
units.push(FilterPlainHostname.compile(parsed));
|
|
} else if ( parsed.isJustOrigin() ) {
|
|
const hostnames = parsed.domainOpt.split('|');
|
|
if ( parsed.f === '*' ) {
|
|
parsed.tokenHash = this.anyTokenHash;
|
|
for ( const hn of hostnames ) {
|
|
this.compileToAtomicFilter(parsed, hn, writer);
|
|
}
|
|
return true;
|
|
}
|
|
if ( parsed.f.startsWith('https') ) {
|
|
parsed.tokenHash = this.anyHTTPSTokenHash;
|
|
for ( const hn of hostnames ) {
|
|
this.compileToAtomicFilter(parsed, hn, writer);
|
|
}
|
|
return true;
|
|
}
|
|
parsed.tokenHash = this.anyHTTPTokenHash;
|
|
for ( const hn of hostnames ) {
|
|
this.compileToAtomicFilter(parsed, hn, writer);
|
|
}
|
|
return true;
|
|
} else {
|
|
filterPattern.compile(parsed, units);
|
|
}
|
|
|
|
// Type
|
|
// EXPERIMENT: $requestTypeBit
|
|
//if ( (parsed.typeBits & allNetworkTypesBits) !== 0 ) {
|
|
// units.unshift(FilterType.compile(parsed));
|
|
// parsed.typeBits &= ~allNetworkTypesBits;
|
|
//}
|
|
|
|
// Anchor
|
|
if ( (parsed.anchor & 0b100) !== 0 ) {
|
|
units.push(FilterAnchorHn.compile());
|
|
} else if ( (parsed.anchor & 0b010) !== 0 ) {
|
|
units.push(FilterAnchorLeft.compile());
|
|
}
|
|
if ( (parsed.anchor & 0b001) !== 0 ) {
|
|
units.push(FilterAnchorRight.compile());
|
|
}
|
|
|
|
// Origin
|
|
if ( parsed.domainOpt !== '' ) {
|
|
filterOrigin.compile(
|
|
parsed,
|
|
units.length !== 0 && filterClasses[units[0][0]].isSlow === true,
|
|
units
|
|
);
|
|
}
|
|
|
|
// Data
|
|
if ( parsed.dataType !== undefined ) {
|
|
units.push(FilterDataHolder.compile(parsed));
|
|
}
|
|
|
|
const fdata = units.length === 1
|
|
? units[0]
|
|
: FilterComposite.compile(units);
|
|
|
|
this.compileToAtomicFilter(parsed, fdata, writer);
|
|
|
|
return true;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.compileToAtomicFilter = function(
|
|
parsed,
|
|
fdata,
|
|
writer
|
|
) {
|
|
// 0 = network filters
|
|
// 1 = network filters: bad filters
|
|
writer.select(parsed.badFilter ? 1 : 0);
|
|
|
|
const descBits = parsed.action | parsed.important | parsed.party;
|
|
let typeBits = parsed.typeBits;
|
|
|
|
// Typeless
|
|
if ( typeBits === 0 ) {
|
|
writer.push([ descBits, parsed.tokenHash, fdata ]);
|
|
return;
|
|
}
|
|
|
|
// If all network types are set, create a typeless filter
|
|
if ( (typeBits & allNetworkTypesBits) === allNetworkTypesBits ) {
|
|
writer.push([ descBits, parsed.tokenHash, fdata ]);
|
|
typeBits &= ~allNetworkTypesBits;
|
|
}
|
|
|
|
// One filter per specific types
|
|
let bitOffset = 1;
|
|
do {
|
|
if ( typeBits & 1 ) {
|
|
writer.push(
|
|
[ descBits | (bitOffset << 4),
|
|
parsed.tokenHash,
|
|
fdata
|
|
]);
|
|
}
|
|
bitOffset += 1;
|
|
typeBits >>>= 1;
|
|
} while ( typeBits !== 0 );
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.compileRedirectRule = function(parsed, writer) {
|
|
const redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
|
|
if ( Array.isArray(redirects) === false ) { return false; }
|
|
writer.select(parsed.badFilter ? 1 : 0);
|
|
const type = typeNameToTypeValue.redirect;
|
|
for ( const redirect of redirects ) {
|
|
writer.push([ type, redirect ]);
|
|
}
|
|
return true;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.fromCompiledContent = function(reader) {
|
|
// 0 = network filters
|
|
reader.select(0);
|
|
while ( reader.next() ) {
|
|
this.acceptedCount += 1;
|
|
if ( this.goodFilters.has(reader.line) ) {
|
|
this.discardedCount += 1;
|
|
} else {
|
|
this.goodFilters.add(reader.line);
|
|
}
|
|
}
|
|
|
|
// 1 = network filters: bad filter directives
|
|
reader.select(1);
|
|
while ( reader.next() ) {
|
|
this.badFilters.add(reader.line);
|
|
}
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.realmMatchAndFetchData = function(
|
|
realmBits,
|
|
partyBits,
|
|
type,
|
|
out
|
|
) {
|
|
const bits01 = realmBits | typeNameToTypeValue.data;
|
|
const bits11 = realmBits | typeNameToTypeValue.data | partyBits;
|
|
|
|
const bucket01 = this.categories.get(bits01);
|
|
const bucket11 = partyBits !== 0
|
|
? this.categories.get(bits11)
|
|
: undefined;
|
|
|
|
if ( bucket01 === undefined && bucket11 === undefined ) { return false; }
|
|
|
|
const units = filterUnits;
|
|
const tokenHashes = urlTokenizer.getTokens(bidiTrie);
|
|
const filters = [];
|
|
let i = 0, iunit, f;
|
|
for (;;) {
|
|
const th = tokenHashes[i];
|
|
if ( th === 0 ) { return; }
|
|
$tokenBeg = tokenHashes[i+1];
|
|
if (
|
|
(bucket01 !== undefined) &&
|
|
(iunit = bucket01.get(th)) !== undefined
|
|
) {
|
|
f = units[iunit];
|
|
filters.length = 0;
|
|
f.matchAndFetchData(type, filters);
|
|
for ( f of filters ) {
|
|
out.set(
|
|
f.getData(type),
|
|
new FilterDataHolderResult(bits01, th, iunit)
|
|
);
|
|
}
|
|
}
|
|
if (
|
|
(bucket11 !== undefined) &&
|
|
(iunit = bucket11.get(th)) !== undefined
|
|
) {
|
|
f = units[iunit];
|
|
filters.length = 0;
|
|
f.matchAndFetchData(type, filters);
|
|
for ( f of filters ) {
|
|
out.set(
|
|
f.getData(type),
|
|
new FilterDataHolderResult(bits11, th, iunit)
|
|
);
|
|
}
|
|
}
|
|
i += 2;
|
|
}
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.matchAndFetchData = function(fctxt, type) {
|
|
$requestURL = urlTokenizer.setURL(fctxt.url);
|
|
$docHostname = fctxt.getDocHostname();
|
|
$requestHostname = fctxt.getHostname();
|
|
|
|
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
|
|
|
|
const toAddImportant = new Map();
|
|
this.realmMatchAndFetchData(BlockImportant, partyBits, type, toAddImportant);
|
|
|
|
const toAdd = new Map();
|
|
this.realmMatchAndFetchData(BlockAction, partyBits, type, toAdd);
|
|
|
|
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return []; }
|
|
|
|
const toRemove = new Map();
|
|
this.realmMatchAndFetchData(AllowAction, partyBits, type, toRemove);
|
|
|
|
// Remove entries overriden by important block filters.
|
|
for ( const key of toAddImportant.keys() ) {
|
|
toAdd.delete(key);
|
|
toRemove.delete(key);
|
|
}
|
|
|
|
// Special case, except-all:
|
|
// - Except-all applies only if there is at least one normal block filters.
|
|
// - Except-all does not apply to important block filters.
|
|
if ( toRemove.has('') ) {
|
|
if ( toAdd.size !== 0 ) {
|
|
toAdd.clear();
|
|
toRemove.forEach((v, k, m) => {
|
|
if ( k !== '' ) { m.delete(k); }
|
|
});
|
|
} else {
|
|
toRemove.clear();
|
|
}
|
|
}
|
|
// Remove excepted block filters and unused exception filters.
|
|
else {
|
|
for ( const key of toRemove.keys() ) {
|
|
if ( toAdd.has(key) ) {
|
|
toAdd.delete(key);
|
|
} else {
|
|
toRemove.delete(key);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Merge important and normal block filters
|
|
for ( const [ key, entry ] of toAddImportant ) {
|
|
toAdd.set(key, entry);
|
|
}
|
|
return Array.from(toAdd.values()).concat(Array.from(toRemove.values()));
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.realmMatchString = function(
|
|
realmBits,
|
|
typeBits,
|
|
partyBits
|
|
) {
|
|
const exactType = typeBits & 0x80000000;
|
|
typeBits &= 0x7FFFFFFF;
|
|
|
|
const catBits00 = realmBits;
|
|
const catBits01 = realmBits | typeBits;
|
|
const catBits10 = realmBits | partyBits;
|
|
const catBits11 = realmBits | typeBits | partyBits;
|
|
|
|
const bucket00 = exactType === 0
|
|
? this.categories.get(catBits00)
|
|
: undefined;
|
|
const bucket01 = exactType !== 0 || typeBits !== 0
|
|
? this.categories.get(catBits01)
|
|
: undefined;
|
|
const bucket10 = exactType === 0 && partyBits !== 0
|
|
? this.categories.get(catBits10)
|
|
: undefined;
|
|
const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0
|
|
? this.categories.get(catBits11)
|
|
: undefined;
|
|
|
|
if (
|
|
bucket00 === undefined && bucket01 === undefined &&
|
|
bucket10 === undefined && bucket11 === undefined
|
|
) {
|
|
return false;
|
|
}
|
|
|
|
const units = filterUnits;
|
|
let catBits = 0, iunit = 0;
|
|
|
|
// Pure hostname-based filters
|
|
let tokenHash = this.dotTokenHash;
|
|
if (
|
|
(bucket00 !== undefined) &&
|
|
(iunit = bucket00.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits00;
|
|
} else if (
|
|
(bucket01 !== undefined) &&
|
|
(iunit = bucket01.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits01;
|
|
} else if (
|
|
(bucket10 !== undefined) &&
|
|
(iunit = bucket10.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits10;
|
|
} else if (
|
|
(bucket11 !== undefined) &&
|
|
(iunit = bucket11.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits11;
|
|
}
|
|
// Pattern-based filters
|
|
else {
|
|
const tokenHashes = urlTokenizer.getTokens(bidiTrie);
|
|
let i = 0;
|
|
for (;;) {
|
|
tokenHash = tokenHashes[i];
|
|
if ( tokenHash === 0 ) { return false; }
|
|
$tokenBeg = tokenHashes[i+1];
|
|
if (
|
|
(bucket00 !== undefined) &&
|
|
(iunit = bucket00.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits00;
|
|
break;
|
|
}
|
|
if (
|
|
(bucket01 !== undefined) &&
|
|
(iunit = bucket01.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits01;
|
|
break;
|
|
}
|
|
if (
|
|
(bucket10 !== undefined) &&
|
|
(iunit = bucket10.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits10;
|
|
break;
|
|
}
|
|
if (
|
|
(bucket11 !== undefined) &&
|
|
(iunit = bucket11.get(tokenHash) || 0) !== 0 &&
|
|
(units[iunit].match() === true)
|
|
) {
|
|
catBits = catBits11;
|
|
break;
|
|
}
|
|
i += 2;
|
|
}
|
|
}
|
|
|
|
this.$catbits = catBits;
|
|
this.$tokenHash = tokenHash;
|
|
this.$filterUnit = iunit;
|
|
return true;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
// Specialized handler
|
|
|
|
// https://github.com/gorhill/uBlock/issues/1477
|
|
// Special case: blocking-generichide filter ALWAYS exists, it is implicit --
|
|
// thus we always first check for exception filters, then for important block
|
|
// filter if and only if there was a hit on an exception filter.
|
|
// https://github.com/gorhill/uBlock/issues/2103
|
|
// User may want to override `generichide` exception filters.
|
|
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
|
|
// Add support for `specifichide`.
|
|
|
|
FilterContainer.prototype.matchStringElementHide = function(type, url) {
|
|
const typeBits = typeNameToTypeValue[`${type}hide`] | 0x80000000;
|
|
|
|
// Prime tokenizer: we get a normalized URL in return.
|
|
$requestURL = urlTokenizer.setURL(url);
|
|
this.$filterUnit = 0;
|
|
|
|
// These registers will be used by various filters
|
|
$docHostname = $requestHostname = µb.URI.hostnameFromURI(url);
|
|
|
|
// Exception filters
|
|
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) {
|
|
// Important block filters.
|
|
if ( this.realmMatchString(BlockImportant, typeBits, FirstParty) ) {
|
|
return 1;
|
|
}
|
|
return 2;
|
|
}
|
|
return 0;
|
|
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/116
|
|
// Some type of requests are exceptional, they need custom handling,
|
|
// not the generic handling.
|
|
// https://github.com/chrisaljoudi/uBlock/issues/519
|
|
// Use exact type match for anything beyond `other`. Also, be prepared to
|
|
// support unknown types.
|
|
|
|
FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
|
|
let typeValue = typeNameToTypeValue[fctxt.type];
|
|
if ( modifiers === 0 ) {
|
|
if ( typeValue === undefined ) {
|
|
typeValue = otherTypeBitValue;
|
|
} else if ( typeValue === 0 || typeValue > otherTypeBitValue ) {
|
|
modifiers |= 0b0001;
|
|
}
|
|
}
|
|
// EXPERIMENT: $requestTypeBit
|
|
//$requestTypeBit = 1 << ((typeValue >>> 4) - 1);
|
|
if ( (modifiers & 0b0001) !== 0 ) {
|
|
if ( typeValue === undefined ) { return 0; }
|
|
typeValue |= 0x80000000;
|
|
}
|
|
|
|
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
|
|
|
|
// Prime tokenizer: we get a normalized URL in return.
|
|
$requestURL = urlTokenizer.setURL(fctxt.url);
|
|
this.$filterUnit = 0;
|
|
|
|
// These registers will be used by various filters
|
|
$docHostname = fctxt.getDocHostname();
|
|
$requestHostname = fctxt.getHostname();
|
|
|
|
// Important block filters.
|
|
if ( this.realmMatchString(BlockImportant, typeValue, partyBits) ) {
|
|
return 1;
|
|
}
|
|
// Block filters
|
|
if ( this.realmMatchString(BlockAction, typeValue, partyBits) ) {
|
|
// Exception filters
|
|
if ( this.realmMatchString(AllowAction, typeValue, partyBits) ) {
|
|
return 2;
|
|
}
|
|
return 1;
|
|
}
|
|
return 0;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.toLogData = function() {
|
|
if ( this.$filterUnit === 0 ) { return; }
|
|
const logData = toLogDataInternal(
|
|
this.$catbits,
|
|
this.$tokenHash,
|
|
this.$filterUnit
|
|
);
|
|
logData.source = 'static';
|
|
logData.tokenHash = this.$tokenHash;
|
|
logData.result = this.$filterUnit === 0
|
|
? 0
|
|
: ((this.$catbits & 1) !== 0 ? 2 : 1);
|
|
return logData;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.getFilterCount = function() {
|
|
return this.acceptedCount - this.discardedCount;
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
// action: 1=test, 2=record
|
|
|
|
FilterContainer.prototype.benchmark = async function(action, target) {
|
|
const requests = await µb.loadBenchmarkDataset();
|
|
|
|
if ( Array.isArray(requests) === false || requests.length === 0 ) {
|
|
console.info('No requests found to benchmark');
|
|
return;
|
|
}
|
|
|
|
console.info(`Benchmarking staticNetFilteringEngine.matchString()...`);
|
|
const fctxt = µb.filteringContext.duplicate();
|
|
|
|
if ( typeof target === 'number' ) {
|
|
const request = requests[target];
|
|
fctxt.setURL(request.url);
|
|
fctxt.setDocOriginFromURL(request.frameUrl);
|
|
fctxt.setType(request.cpt);
|
|
const r = this.matchString(fctxt);
|
|
console.log(`Result=${r}:`);
|
|
console.log(`\ttype=${fctxt.type}`);
|
|
console.log(`\turl=${fctxt.url}`);
|
|
console.log(`\tdocOrigin=${fctxt.getDocOrigin()}`);
|
|
return;
|
|
}
|
|
|
|
let expected, recorded;
|
|
if ( action === 1 ) {
|
|
try {
|
|
expected = JSON.parse(
|
|
vAPI.localStorage.getItem('FilterContainer.benchmark.results')
|
|
);
|
|
} catch(ex) {
|
|
}
|
|
}
|
|
if ( action === 2 ) {
|
|
recorded = [];
|
|
}
|
|
|
|
const t0 = self.performance.now();
|
|
for ( let i = 0; i < requests.length; i++ ) {
|
|
const request = requests[i];
|
|
fctxt.setURL(request.url);
|
|
fctxt.setDocOriginFromURL(request.frameUrl);
|
|
fctxt.setType(request.cpt);
|
|
const r = this.matchString(fctxt);
|
|
if ( recorded !== undefined ) { recorded.push(r); }
|
|
if ( expected !== undefined && r !== expected[i] ) {
|
|
console.log(`Mismatch with reference results at ${i}:`);
|
|
console.log(`\tExpected ${expected[i]}, got ${r}:`);
|
|
console.log(`\ttype=${fctxt.type}`);
|
|
console.log(`\turl=${fctxt.url}`);
|
|
console.log(`\tdocOrigin=${fctxt.getDocOrigin()}`);
|
|
}
|
|
}
|
|
const t1 = self.performance.now();
|
|
const dur = t1 - t0;
|
|
|
|
console.info(`Evaluated ${requests.length} requests in ${dur.toFixed(0)} ms`);
|
|
console.info(`\tAverage: ${(dur / requests.length).toFixed(3)} ms per request`);
|
|
if ( expected !== undefined ) {
|
|
console.info(`\tBlocked: ${expected.reduce((n,r)=>{return r===1?n+1:n;},0)}`);
|
|
console.info(`\tExcepted: ${expected.reduce((n,r)=>{return r===2?n+1:n;},0)}`);
|
|
}
|
|
if ( recorded !== undefined ) {
|
|
vAPI.localStorage.setItem(
|
|
'FilterContainer.benchmark.results',
|
|
JSON.stringify(recorded)
|
|
);
|
|
}
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.test = function(docURL, type, url) {
|
|
const fctxt = µb.filteringContext.duplicate();
|
|
fctxt.setDocOriginFromURL(docURL);
|
|
fctxt.setType(type);
|
|
fctxt.setURL(url);
|
|
const r = this.matchString(fctxt);
|
|
console.log(`${r}`);
|
|
if ( r !== 0 ) {
|
|
console.log(this.toLogData());
|
|
}
|
|
};
|
|
|
|
/******************************************************************************-
|
|
|
|
With default filter lists:
|
|
|
|
As of 2019-04-18:
|
|
|
|
{bits: "0", token: "ad", size: 926, f: FilterBucket}
|
|
{bits: "0", token: "ads", size: 636, f: FilterBucket}
|
|
{bits: "41", token: "phncdn", size: 253, f: FilterBucket}
|
|
{bits: "0", token: "analytic", size: 174, f: FilterBucket}
|
|
{bits: "0", token: "tracking", size: 155, f: FilterBucket}
|
|
{bits: "48", token: "http", size: 146, f: FilterBucket}
|
|
{bits: "48", token: "https", size: 139, f: FilterBucket}
|
|
{bits: "58", token: "http", size: 122, f: FilterBucket}
|
|
{bits: "0", token: "adv", size: 121, f: FilterBucket}
|
|
{bits: "58", token: "https", size: 118, f: FilterBucket}
|
|
{bits: "0", token: "advertis", size: 102, f: FilterBucket}
|
|
{bits: "8", token: "doublecl", size: 96, f: FilterBucket}
|
|
{bits: "41", token: "imasdk", size: 90, f: FilterBucket}
|
|
{bits: "0", token: "cdn", size: 89, f: FilterBucket}
|
|
{bits: "0", token: "track", size: 87, f: FilterBucket}
|
|
{bits: "0", token: "stats", size: 82, f: FilterBucket}
|
|
{bits: "0", token: "banner", size: 74, f: FilterBucket}
|
|
{bits: "0", token: "log", size: 72, f: FilterBucket}
|
|
{bits: "0", token: "ga", size: 71, f: FilterBucket}
|
|
{bits: "0", token: "gif", size: 67, f: FilterBucket}
|
|
{bits: "0", token: "cloudfro", size: 64, f: FilterBucket}
|
|
{bits: "0", token: "amazonaw", size: 61, f: FilterBucket}
|
|
{bits: "41", token: "ajax", size: 58, f: FilterBucket}
|
|
{bits: "0", token: "tracker", size: 56, f: FilterBucket}
|
|
{bits: "40", token: "pagead2", size: 53, f: FilterBucket}
|
|
{bits: "0", token: "affiliat", size: 53, f: FilterBucket}
|
|
|
|
*/
|
|
|
|
FilterContainer.prototype.bucketHistogram = function() {
|
|
const units = filterUnits;
|
|
const results = [];
|
|
for ( const [ bits, category ] of this.categories ) {
|
|
for ( const [ th, iunit ] of category ) {
|
|
const token = urlTokenizer.stringFromTokenHash(th);
|
|
const f = units[iunit];
|
|
if ( f instanceof FilterBucket ) {
|
|
results.push({ bits: bits.toString(16), token, size: f.size, f });
|
|
continue;
|
|
}
|
|
if ( f instanceof FilterHostnameDict ) {
|
|
results.push({ bits: bits.toString(16), token, size: f.size, f });
|
|
continue;
|
|
}
|
|
if ( f instanceof FilterJustOrigin ) {
|
|
results.push({ bits: bits.toString(16), token, size: f.size, f });
|
|
continue;
|
|
}
|
|
results.push({ bits: bits.toString(16), token, size: 1, f });
|
|
}
|
|
}
|
|
results.sort((a, b) => {
|
|
return b.size - a.size;
|
|
});
|
|
console.log(results);
|
|
};
|
|
|
|
/*******************************************************************************
|
|
|
|
With default filter lists:
|
|
|
|
As of 2019-04-25:
|
|
|
|
{"FilterPlainHnAnchored" => 11078}
|
|
{"FilterPlainPrefix1" => 7195}
|
|
{"FilterPrefix1Trie" => 5720}
|
|
{"FilterOriginHit" => 3561}
|
|
{"FilterWildcard2HnAnchored" => 2943}
|
|
{"FilterPair" => 2391}
|
|
{"FilterBucket" => 1922}
|
|
{"FilterWildcard1HnAnchored" => 1910}
|
|
{"FilterHnAnchoredTrie" => 1586}
|
|
{"FilterPlainHostname" => 1391}
|
|
{"FilterOriginHitSet" => 1155}
|
|
{"FilterPlain" => 634}
|
|
{"FilterWildcard1" => 423}
|
|
{"FilterGenericHnAnchored" => 389}
|
|
{"FilterOriginMiss" => 302}
|
|
{"FilterGeneric" => 163}
|
|
{"FilterOriginMissSet" => 150}
|
|
{"FilterRegex" => 124}
|
|
{"FilterPlainRightAnchored" => 110}
|
|
{"FilterGenericHnAndRightAnchored" => 95}
|
|
{"FilterHostnameDict" => 59}
|
|
{"FilterPlainLeftAnchored" => 30}
|
|
{"FilterJustOrigin" => 22}
|
|
{"FilterHTTPJustOrigin" => 19}
|
|
{"FilterHTTPSJustOrigin" => 18}
|
|
{"FilterExactMatch" => 5}
|
|
{"FilterOriginMixedSet" => 3}
|
|
|
|
As of 2019-10-21:
|
|
|
|
"FilterPatternPlain" => 27542}
|
|
"FilterComposite" => 17249}
|
|
"FilterPlainTrie" => 13235}
|
|
"FilterAnchorHn" => 11938}
|
|
"FilterPatternRightEx" => 4446}
|
|
"FilterOriginHit" => 4435}
|
|
"FilterBucket" => 3833}
|
|
"FilterPatternRight" => 3426}
|
|
"FilterPlainHostname" => 2786}
|
|
"FilterOriginHitSet" => 1433}
|
|
"FilterDataHolder" => 666}
|
|
"FilterPatternGeneric" => 548}
|
|
"FilterOriginMiss" => 441}
|
|
"FilterOriginMissSet" => 208}
|
|
"FilterTrailingSeparator" => 188}
|
|
"FilterRegex" => 181}
|
|
"FilterPatternLeft" => 172}
|
|
"FilterAnchorRight" => 100}
|
|
"FilterPatternLeftEx" => 82}
|
|
"FilterHostnameDict" => 60}
|
|
"FilterAnchorLeft" => 50}
|
|
"FilterJustOrigin" => 24}
|
|
"FilterHTTPJustOrigin" => 18}
|
|
"FilterTrue" => 17}
|
|
"FilterHTTPSJustOrigin" => 17}
|
|
|
|
*/
|
|
|
|
FilterContainer.prototype.filterClassHistogram = function() {
|
|
const filterClassDetails = new Map();
|
|
|
|
for ( const fclass of filterClasses ) {
|
|
filterClassDetails.set(fclass.fid, { name: fclass.name, count: 0, });
|
|
}
|
|
// Artificial classes to report content counts
|
|
filterClassDetails.set(1000, { name: 'FilterPlainTrie Content', count: 0, });
|
|
filterClassDetails.set(1001, { name: 'FilterHostnameDict Content', count: 0, });
|
|
|
|
const countFilter = function(f) {
|
|
if ( f instanceof Object === false ) { return; }
|
|
filterClassDetails.get(f.fid).count += 1;
|
|
};
|
|
|
|
for ( const f of filterUnits ) {
|
|
if ( f === null ) { continue; }
|
|
countFilter(f);
|
|
if ( f instanceof FilterCollection ) {
|
|
let i = f.i;
|
|
while ( i !== 0 ) {
|
|
countFilter(filterUnits[filterSequences[i+0]]);
|
|
i = filterSequences[i+1];
|
|
}
|
|
if ( f.plainTrie ) {
|
|
filterClassDetails.get(1000).count += f.plainTrie.size;
|
|
}
|
|
continue;
|
|
}
|
|
if ( f instanceof FilterHostnameDict ) {
|
|
filterClassDetails.get(1001).count += f.size;
|
|
continue;
|
|
}
|
|
if ( f instanceof FilterComposite ) {
|
|
let i = f.i;
|
|
while ( i !== 0 ) {
|
|
countFilter(filterUnits[filterSequences[i+0]]);
|
|
i = filterSequences[i+1];
|
|
}
|
|
continue;
|
|
}
|
|
if ( f instanceof FilterPlainTrie ) {
|
|
filterClassDetails.get(1000).count += f.plainTrie.size;
|
|
continue;
|
|
}
|
|
}
|
|
const results = Array.from(filterClassDetails.values()).sort((a, b) => {
|
|
return b.count - a.count;
|
|
});
|
|
console.log(results);
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
FilterContainer.prototype.tokenHistograms = async function() {
|
|
const requests = await µb.loadBenchmarkDataset();
|
|
|
|
if ( Array.isArray(requests) === false || requests.length === 0 ) {
|
|
console.info('No requests found to benchmark');
|
|
return;
|
|
}
|
|
|
|
console.info(`Computing token histograms...`);
|
|
const fctxt = µb.filteringContext.duplicate();
|
|
|
|
const missTokenMap = new Map();
|
|
const hitTokenMap = new Map();
|
|
const reTokens = /[0-9a-z%]{2,}/g;
|
|
|
|
for ( let i = 0; i < requests.length; i++ ) {
|
|
const request = requests[i];
|
|
fctxt.setURL(request.url);
|
|
fctxt.setDocOriginFromURL(request.frameUrl);
|
|
fctxt.setType(request.cpt);
|
|
const r = this.matchString(fctxt);
|
|
for ( let [ keyword ] of request.url.toLowerCase().matchAll(reTokens) ) {
|
|
const token = keyword;
|
|
if ( r === 0 ) {
|
|
missTokenMap.set(token, (missTokenMap.get(token) || 0) + 1);
|
|
} else if ( r === 1 ) {
|
|
hitTokenMap.set(token, (hitTokenMap.get(token) || 0) + 1);
|
|
}
|
|
}
|
|
}
|
|
const customSort = (a, b) => b[1] - a[1];
|
|
const topmisses = Array.from(missTokenMap).sort(customSort).slice(0, 100);
|
|
for ( const [ token ] of topmisses ) {
|
|
hitTokenMap.delete(token);
|
|
}
|
|
const tophits = Array.from(hitTokenMap).sort(customSort).slice(0, 100);
|
|
console.log('Misses:', JSON.stringify(topmisses));
|
|
console.log('Hits:', JSON.stringify(tophits));
|
|
};
|
|
|
|
/******************************************************************************/
|
|
|
|
return new FilterContainer();
|
|
|
|
/******************************************************************************/
|
|
|
|
})();
|