/******************************************************************************* uBlock Origin - a browser extension to block requests. Copyright (C) 2014-present Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see {http://www.gnu.org/licenses/}. Home: https://github.com/gorhill/uBlock */ /* jshint bitwise: false */ /* global punycode, HNTrieContainer, STrieContainer */ 'use strict'; /******************************************************************************/ µBlock.staticNetFilteringEngine = (function(){ /******************************************************************************/ const µb = µBlock; // fedcba9876543210 // | | ||| // | | ||| // | | ||| // | | ||| // | | ||+---- bit 0: [BlockAction | AllowAction] // | | |+----- bit 1: `important` // | | +------ bit 2- 3: party [0 - 3] // | +-------- bit 4- 8: type [0 - 31] // +------------- bit 9-15: unused const BlockAction = 0 << 0; const AllowAction = 1 << 0; const Important = 1 << 1; const AnyParty = 0 << 2; const FirstParty = 1 << 2; const ThirdParty = 2 << 2; const typeNameToTypeValue = { 'no_type': 0 << 4, 'stylesheet': 1 << 4, 'image': 2 << 4, 'object': 3 << 4, 'object_subrequest': 3 << 4, 'script': 4 << 4, 'fetch': 5 << 4, 'xmlhttprequest': 5 << 4, 'sub_frame': 6 << 4, 'font': 7 << 4, 'media': 8 << 4, 'websocket': 9 << 4, 'other': 10 << 4, 'popup': 11 << 4, // start of behavorial filtering 'popunder': 12 << 4, 'main_frame': 13 << 4, // start of 1st-party-only behavorial filtering 'generichide': 14 << 4, 'inline-font': 15 << 4, 'inline-script': 16 << 4, 'data': 17 << 4, // special: a generic data holder 'redirect': 18 << 4, 'webrtc': 19 << 4, 'unsupported': 20 << 4 }; const otherTypeBitValue = typeNameToTypeValue.other; const typeValueToTypeName = { 1: 'stylesheet', 2: 'image', 3: 'object', 4: 'script', 5: 'xmlhttprequest', 6: 'subdocument', 7: 'font', 8: 'media', 9: 'websocket', 10: 'other', 11: 'popup', 12: 'popunder', 13: 'document', 14: 'generichide', 15: 'inline-font', 16: 'inline-script', 17: 'data', 18: 'redirect', 19: 'webrtc', 20: 'unsupported' }; const BlockImportant = BlockAction | Important; const reIsWildcarded = /[\^\*]/; // ABP filters: https://adblockplus.org/en/filters // regex tester: http://regex101.com/ /******************************************************************************/ // See the following as short-lived registers, used during evaluation. They are // valid until the next evaluation. let pageHostnameRegister = ''; let requestHostnameRegister = ''; /******************************************************************************/ // First character of match must be within the hostname part of the url. // // https://github.com/gorhill/uBlock/issues/1929 // Match only hostname label boundaries. const isHnAnchored = (( ) => { let lastLen = 0, lastBeg = -1, lastEnd = -1; return (url, matchStart) => { const len = requestHostnameRegister.length; if ( len !== lastLen || url.endsWith('://', lastBeg) === false ) { lastBeg = len !== 0 ? url.indexOf('://') : -1; if ( lastBeg !== -1 ) { lastBeg += 3; lastEnd = lastBeg + len; } else { lastEnd = -1; } } return matchStart < lastEnd && ( matchStart === lastBeg || matchStart > lastBeg && url.charCodeAt(matchStart - 1) === 0x2E /* '.' */ ); }; })(); /******************************************************************************/ // Local helpers const normalizeRegexSource = function(s) { try { const re = new RegExp(s); return re.source; } catch (ex) { normalizeRegexSource.message = ex.toString(); } return ''; }; const rawToRegexStr = function(s, anchor) { // https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/ // https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions // Also: remove leading/trailing wildcards -- there is no point. let reStr = s.replace(rawToRegexStr.escape1, '\\$&') .replace(rawToRegexStr.escape2, '(?:[^%.0-9a-z_-]|$)') .replace(rawToRegexStr.escape3, '') .replace(rawToRegexStr.escape4, '[^ ]*?'); if ( anchor & 0b100 ) { reStr = ( reStr.startsWith('\\.') ? rawToRegexStr.reTextHostnameAnchor2 : rawToRegexStr.reTextHostnameAnchor1 ) + reStr; } else if ( anchor & 0b010 ) { reStr = '^' + reStr; } if ( anchor & 0b001 ) { reStr += '$'; } return reStr; }; rawToRegexStr.escape1 = /[.+?${}()|[\]\\]/g; rawToRegexStr.escape2 = /\^/g; rawToRegexStr.escape3 = /^\*|\*$/g; rawToRegexStr.escape4 = /\*/g; rawToRegexStr.reTextHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?'; rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?'; // https://github.com/uBlockOrigin/uAssets/issues/4083#issuecomment-436914727 const rawToPlainStr = function(s, anchor) { if ( anchor === 0 && s.charCodeAt(0) === 0x2F /* '/' */ && s.length > 2 && s.charCodeAt(s.length-1) === 0x2F /* '/' */ ) { s = s + '*'; } return s; }; const filterDataSerialize = µb.CompiledLineIO.serialize; const toLogDataInternal = function(categoryBits, tokenHash, filter) { if ( filter === null ) { return undefined; } const logData = filter.logData(); logData.compiled = filterDataSerialize([ categoryBits, tokenHash, logData.compiled ]); if ( categoryBits & 0x001 ) { logData.raw = `@@${logData.raw}`; } const opts = []; if ( categoryBits & 0x002 ) { opts.push('important'); } if ( categoryBits & 0x008 ) { opts.push('third-party'); } else if ( categoryBits & 0x004 ) { opts.push('first-party'); } const type = categoryBits & 0x1F0; if ( type !== 0 && type !== typeNameToTypeValue.data ) { opts.push(typeValueToTypeName[type >>> 4]); } if ( logData.opts !== undefined ) { opts.push(logData.opts); } if ( opts.length !== 0 ) { logData.raw += '$' + opts.join(','); } return logData; }; /******************************************************************************* Each filter class will register itself in the map. A filter class id MUST always stringify to ONE single character. IMPORTANT: any change which modifies the mapping will have to be reflected with µBlock.systemSettings.compiledMagic. */ const filterClasses = []; let filterClassIdGenerator = 0; const registerFilterClass = function(ctor) { let fid = filterClassIdGenerator++; ctor.fid = ctor.prototype.fid = fid; filterClasses[fid] = ctor; }; const filterFromCompiledData = function(args) { return filterClasses[args[0]].load(args); }; /******************************************************************************/ const FilterTrue = class { match() { return true; } logData() { return { raw: '*', regex: '^', compiled: this.compile(), }; } compile() { return [ this.fid ]; } static compile() { return [ FilterTrue.fid ]; } static load() { return FilterTrue.instance; } }; FilterTrue.instance = new FilterTrue(); registerFilterClass(FilterTrue); /******************************************************************************/ const FilterPlain = class { constructor(s, tokenBeg) { this.s = s; this.tokenBeg = tokenBeg; } match(url, tokenBeg) { return url.startsWith(this.s, tokenBeg - this.tokenBeg); } logData() { return { raw: rawToPlainStr(this.s, 0), regex: rawToRegexStr(this.s, 0), compiled: this.compile() }; } compile() { return [ this.fid, this.s, this.tokenBeg ]; } static compile(details) { return [ FilterPlain.fid, details.f, details.tokenBeg ]; } static load(args) { return new FilterPlain(args[1], args[2]); } }; registerFilterClass(FilterPlain); /******************************************************************************/ const FilterPlainPrefix1 = class { constructor(s) { this.s = s; } match(url, tokenBeg) { return url.startsWith(this.s, tokenBeg - 1); } logData() { return { raw: rawToPlainStr(this.s, 0), regex: rawToRegexStr(this.s, 0), compiled: this.compile() }; } compile() { return [ this.fid, this.s ]; } static compile(details) { return [ FilterPlainPrefix1.fid, details.f ]; } static load(args) { return new FilterPlainPrefix1(args[1]); } static trieableStringFromArgs(args) { return args[1]; } }; FilterPlainPrefix1.prototype.trieableId = 0; registerFilterClass(FilterPlainPrefix1); /******************************************************************************/ const FilterPlainHostname = class { constructor(s) { this.s = s; } match() { const haystack = requestHostnameRegister; const needle = this.s; if ( haystack.endsWith(needle) === false ) { return false; } const offset = haystack.length - needle.length; return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */; } logData() { return { raw: `||${this.s}^`, regex: rawToRegexStr(`${this.s}^`, 0), compiled: this.compile() }; } compile() { return [ this.fid, this.s ]; } static compile(details) { return [ FilterPlainHostname.fid, details.f ]; } static load(args) { return new FilterPlainHostname(args[1]); } }; registerFilterClass(FilterPlainHostname); /******************************************************************************/ const FilterPlainLeftAnchored = class { constructor(s) { this.s = s; } match(url) { return url.startsWith(this.s); } logData() { return { raw: `|${this.s}`, regex: rawToRegexStr(this.s, 0b010), compiled: this.compile() }; } compile() { return [ this.fid, this.s ]; } static compile(details) { return [ FilterPlainLeftAnchored.fid, details.f ]; } static load(args) { return new FilterPlainLeftAnchored(args[1]); } }; registerFilterClass(FilterPlainLeftAnchored); /******************************************************************************/ const FilterPlainRightAnchored = class { constructor(s) { this.s = s; } match(url) { return url.endsWith(this.s); } logData() { return { raw: `${this.s}|`, regex: rawToRegexStr(this.s, 0b001), compiled: this.compile() }; } compile() { return [ this.fid, this.s ]; } static compile(details) { return [ FilterPlainRightAnchored.fid, details.f ]; } static load(args) { return new FilterPlainRightAnchored(args[1]); } }; registerFilterClass(FilterPlainRightAnchored); /******************************************************************************/ const FilterExactMatch = class { constructor(s) { this.s = s; } match(url) { return url === this.s; } logData() { return { raw: `|${this.s}|`, regex: rawToRegexStr(this.s, 0b011), compiled: this.compile() }; } compile() { return [ this.fid, this.s ]; } static compile(details) { return [ FilterExactMatch.fid, details.f ]; } static load(args) { return new FilterExactMatch(args[1]); } }; registerFilterClass(FilterExactMatch); /******************************************************************************/ const FilterPlainHnAnchored = class { constructor(s) { this.s = s; } match(url, tokenBeg) { return url.startsWith(this.s, tokenBeg) && isHnAnchored(url, tokenBeg); } logData() { return { raw: `||${this.s}`, regex: rawToRegexStr(this.s, 0), compiled: this.compile() }; } compile() { return [ this.fid, this.s ]; } static compile(details) { return [ FilterPlainHnAnchored.fid, details.f ]; } static load(args) { return new FilterPlainHnAnchored(args[1]); } static trieableStringFromArgs(args) { return args[1]; } }; FilterPlainHnAnchored.prototype.trieableId = 1; registerFilterClass(FilterPlainHnAnchored); /******************************************************************************* Filters with only one single occurrence of wildcard `*` */ const FilterWildcard1 = class { constructor(s0, s1, tokenBeg) { this.s0 = s0; this.s1 = s1; this.tokenBeg = tokenBeg; } match(url, tokenBeg) { if ( this.tokenBeg >= 0 ) { const s0Beg = tokenBeg - this.tokenBeg; return s0Beg >= 0 && url.startsWith(this.s0, s0Beg) && url.indexOf(this.s1, s0Beg + this.s0.length) !== -1; } const s1Beg = tokenBeg + this.tokenBeg; return s1Beg > 0 && url.startsWith(this.s1, s1Beg) && url.lastIndexOf(this.s0, s1Beg) !== -1; } logData() { return { raw: `${this.s0}*${this.s1}`, regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0), compiled: this.compile() }; } compile() { return [ this.fid, this.s0, this.s1, this.tokenBeg ]; } static compile(details) { if ( this.token === '*' ) { return; } if ( details.anchor !== 0 ) { return; } const s = details.f; let pos = s.indexOf('*'); if ( pos === -1 ) { return; } if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; } if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; } return [ FilterWildcard1.fid, s.slice(0, pos), s.slice(pos + 1), details.tokenBeg < pos ? details.tokenBeg : pos + 1 - details.tokenBeg, ]; } static load(args) { return new FilterWildcard1(args[1], args[2], args[3]); } }; registerFilterClass(FilterWildcard1); /******************************************************************************/ const FilterGeneric = class { constructor(s, anchor) { this.s = s; this.anchor = anchor; } match(url) { if ( this.re === null ) { this.re = new RegExp(rawToRegexStr(this.s, this.anchor)); } return this.re.test(url); } logData() { const out = { raw: rawToPlainStr(this.s, this.anchor), regex: this.re.source, compiled: this.compile() }; if ( this.anchor & 0x2 ) { out.raw = `|${out.raw}`; } if ( this.anchor & 0x1 ) { out.raw += '|'; } return out; } compile() { return [ this.fid, this.s, this.anchor ]; } static compile(details) { const compiled = FilterWildcard1.compile(details); if ( compiled !== undefined ) { return compiled; } return [ FilterGeneric.fid, details.f, details.anchor ]; } static load(args) { return new FilterGeneric(args[1], args[2]); } }; FilterGeneric.prototype.re = null; registerFilterClass(FilterGeneric); /******************************************************************************* Hostname-anchored filters with only one occurrence of wildcard `*` */ const FilterWildcard1HnAnchored = class { constructor(s0, s1, tokenBeg) { this.s0 = s0; this.s1 = s1; this.tokenBeg = tokenBeg; } match(url, tokenBeg) { if ( this.tokenBeg >= 0 ) { const s0Beg = tokenBeg - this.tokenBeg; return s0Beg >= 0 && url.startsWith(this.s0, s0Beg) && isHnAnchored(url, s0Beg) && url.indexOf(this.s1, s0Beg + this.s0.length) !== -1; } const s1Beg = tokenBeg + this.tokenBeg; if ( s1Beg < 0 || url.startsWith(this.s1, s1Beg) === false ) { return false; } const s0Beg = url.lastIndexOf(this.s0, s1Beg); return s0Beg !== -1 && isHnAnchored(url, s0Beg); } logData() { return { raw: `||${this.s0}*${this.s1}`, regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0), compiled: this.compile() }; } compile() { return [ this.fid, this.s0, this.s1, this.tokenBeg ]; } static compile(details) { if ( this.token === '*' ) { return; } if ( (details.anchor & 0x0b001) !== 0 ) { return; } const s = details.f; let pos = s.indexOf('*'); if ( pos === -1 ) { return; } if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; } const needSeparator = pos !== 0 && s.charCodeAt(pos - 1) === 0x5E /* '^' */; if ( needSeparator ) { pos -= 1; } if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; } if ( needSeparator ) { return FilterWildcard2HnAnchored.compile(details, pos); } return [ FilterWildcard1HnAnchored.fid, s.slice(0, pos), s.slice(pos + 1), details.tokenBeg < pos ? details.tokenBeg : pos + 1 - details.tokenBeg, ]; } static load(args) { return new FilterWildcard1HnAnchored(args[1], args[2], args[3]); } }; registerFilterClass(FilterWildcard1HnAnchored); /******************************************************************************* Hostname-anchored filters with one occurrence of the wildcard sequence `^*` and no other wildcard-equivalent character */ const FilterWildcard2HnAnchored = class { constructor(s0, s1, tokenBeg) { this.s0 = s0; this.s1 = s1; this.tokenBeg = tokenBeg; } match(url, tokenBeg) { let s0End, s1Beg; if ( this.tokenBeg >= 0 ) { const s0Beg = tokenBeg - this.tokenBeg; if ( s0Beg < 0 || url.startsWith(this.s0, s0Beg) === false ) { return false; } if ( isHnAnchored(url, s0Beg) === false ) { return false; } s0End = s0Beg + this.s0.length; s1Beg = url.indexOf(this.s1, s0End); if ( s1Beg === -1 ) { return false; } } else { s1Beg = tokenBeg + this.tokenBeg; if ( s1Beg < 0 || url.startsWith(this.s1, s1Beg) === false ) { return false; } const s0Beg = url.lastIndexOf(this.s0, s1Beg); if ( s0Beg === -1 || isHnAnchored(url, s0Beg) === false ) { return false; } s0End = s0Beg + this.s0.length; } return this.reSeparators.test(url.slice(s0End, s1Beg)); } logData() { return { raw: `||${this.s0}^*${this.s1}`, regex: rawToRegexStr(`${this.s0}^*${this.s1}`, 0), compiled: this.compile() }; } compile() { return [ this.fid, this.s0, this.s1, this.tokenBeg ]; } static compile(details, pos) { return [ FilterWildcard2HnAnchored.fid, details.f.slice(0, pos), details.f.slice(pos + 2), details.tokenBeg < pos ? details.tokenBeg : pos + 2 - details.tokenBeg, ]; } static load(args) { return new FilterWildcard2HnAnchored(args[1], args[2], args[3]); } }; FilterWildcard2HnAnchored.prototype.reSeparators = /[^\w%.-]/; registerFilterClass(FilterWildcard2HnAnchored); /******************************************************************************/ const FilterGenericHnAnchored = class { constructor(s) { this.s = s; } match(url) { if ( this.re === null ) { this.re = new RegExp(rawToRegexStr(this.s, this.anchor)); } return this.re.test(url); } logData() { return { raw: `||${this.s}`, regex: rawToRegexStr(this.s, this.anchor & 0b001), compiled: this.compile() }; } compile() { return [ this.fid, this.s ]; } static compile(details) { const compiled = FilterWildcard1HnAnchored.compile(details); if ( compiled !== undefined ) { return compiled; } return [ FilterGenericHnAnchored.fid, details.f ]; } static load(args) { return new FilterGenericHnAnchored(args[1]); } }; FilterGenericHnAnchored.prototype.re = null; FilterGenericHnAnchored.prototype.anchor = 0x4; registerFilterClass(FilterGenericHnAnchored); /******************************************************************************/ const FilterGenericHnAndRightAnchored = class extends FilterGenericHnAnchored { logData() { const out = super.logData(); out.raw += '|'; return out; } static compile(details) { return [ FilterGenericHnAndRightAnchored.fid, details.f ]; } static load(args) { return new FilterGenericHnAndRightAnchored(args[1]); } }; FilterGenericHnAndRightAnchored.prototype.anchor = 0x5; registerFilterClass(FilterGenericHnAndRightAnchored); /******************************************************************************/ const FilterRegex = class { constructor(s) { this.re = s; } match(url) { if ( typeof this.re === 'string' ) { this.re = new RegExp(this.re, 'i'); } return this.re.test(url); } logData() { const s = typeof this.re === 'string' ? this.re : this.re.source; return { raw: `/${s}/`, regex: s, compiled: this.compile() }; } compile() { return [ this.fid, typeof this.re === 'string' ? this.re : this.re.source ]; } static compile(details) { return [ FilterRegex.fid, details.f ]; } static load(args) { return new FilterRegex(args[1]); } }; registerFilterClass(FilterRegex); /******************************************************************************/ // The optimal "class" is picked according to the content of the // `domain=` filter option. const filterOrigin = new (class { constructor() { let trieDetails; try { trieDetails = JSON.parse( vAPI.localStorage.getItem('FilterOrigin.trieDetails') ); } catch(ex) { } this.trieContainer = new HNTrieContainer(trieDetails); } compile(details, wrapped) { const domainOpt = details.domainOpt; // One hostname if ( domainOpt.indexOf('|') === -1 ) { if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) { return FilterOriginMiss.compile(domainOpt, wrapped); } return FilterOriginHit.compile(domainOpt, wrapped); } // Many hostnames. // Must be in set (none negated). if ( domainOpt.indexOf('~') === -1 ) { return FilterOriginHitSet.compile(domainOpt, wrapped); } // Must not be in set (all negated). const reAllNegated = /^~(?:[^|~]+\|~)+[^|~]+$/; if ( reAllNegated.test(domainOpt) ) { return FilterOriginMissSet.compile(domainOpt, wrapped); } // Must be in one set, but not in the other. return FilterOriginMixedSet.compile(domainOpt, wrapped); } logData(f, arg1, arg2) { const out = f.wrapped.logData(); out.compiled = [ f.fid, arg1, out.compiled ]; if ( out.opts !== undefined ) { out.opts += ','; } out.opts = `domain=${arg2 || arg1}`; return out; } readyToUse() { return this.trieContainer.readyToUse(); } reset() { return this.trieContainer.reset(); } optimize() { const trieDetails = this.trieContainer.optimize(); vAPI.localStorage.setItem( 'FilterOrigin.trieDetails', JSON.stringify(trieDetails) ); } })(); /******************************************************************************/ // Surprinsingly, first peeking and comparing only the first character using // charCodeAt() does help a bit performance -- 3-6µs gain per request on // average for Chromium 71 and Firefox 65 with default lists. // A likely explanation is that most visits are a miss, and in such case // calling charCodeAt() to bail out earlier is cheaper than calling endsWith(). const FilterOriginHit = class { constructor(hostname, wrapped) { this.hostname = hostname; this.wrapped = wrapped; } match(url, tokenBeg) { const haystack = pageHostnameRegister; const offset = haystack.length - this.hostname.length; if ( offset < 0 ) { return false; } if ( haystack.charCodeAt(offset) !== this.hostname.charCodeAt(0) ) { return false; } if ( haystack.endsWith(this.hostname) === false ) { return false; } if ( offset !== 0 && haystack.charCodeAt(offset-1) !== 0x2E /* '.' */ ) { return false; } return this.wrapped.match(url, tokenBeg); } logData() { return filterOrigin.logData(this, this.hostname); } compile() { return [ this.fid, this.hostname, this.wrapped.compile() ]; } static compile(domainOpt, wrapped) { return [ FilterOriginHit.fid, domainOpt, wrapped ]; } static load(args) { return new FilterOriginHit( args[1], filterFromCompiledData(args[2]) ); } }; registerFilterClass(FilterOriginHit); /******************************************************************************/ const FilterOriginMiss = class { constructor(hostname, wrapped) { this.hostname = hostname; this.wrapped = wrapped; } match(url, tokenBeg) { const haystack = pageHostnameRegister; if ( haystack.endsWith(this.hostname) ) { const offset = haystack.length - this.hostname.length; if ( offset === 0 || haystack.charCodeAt(offset-1) === 0x2E /* '.' */ ) { return false; } } return this.wrapped.match(url, tokenBeg); } logData() { return filterOrigin.logData(this, this.hostname, `~${this.hostname}`); } compile() { return [ this.fid, this.hostname, this.wrapped.compile() ]; } static compile(domainOpt, wrapped) { return [ FilterOriginMiss.fid, domainOpt.slice(1), wrapped ]; } static load(args) { return new FilterOriginMiss( args[1], filterFromCompiledData(args[2]) ); } }; registerFilterClass(FilterOriginMiss); /******************************************************************************/ const FilterOriginHitSet = class { constructor(domainOpt, oneOf, wrapped) { this.domainOpt = domainOpt.length < 128 ? domainOpt : µb.stringDeduplicater.lookup(domainOpt); this.oneOf = oneOf !== null ? filterOrigin.trieContainer.createOne(oneOf) : null; this.wrapped = wrapped; } match(url, tokenBeg) { if ( this.oneOf === null ) { this.oneOf = filterOrigin.trieContainer.fromIterable( this.domainOpt.split('|') ); } return this.oneOf.matches(pageHostnameRegister) !== -1 && this.wrapped.match(url, tokenBeg); } logData() { return filterOrigin.logData(this, this.domainOpt); } compile() { return [ this.fid, this.domainOpt, this.oneOf !== null ? filterOrigin.trieContainer.compileOne(this.oneOf) : null, this.wrapped.compile() ]; } static compile(domainOpt, wrapped) { return [ FilterOriginHitSet.fid, domainOpt, null, wrapped ]; } static load(args) { return new FilterOriginHitSet( args[1], args[2], filterFromCompiledData(args[3]) ); } }; registerFilterClass(FilterOriginHitSet); /******************************************************************************/ const FilterOriginMissSet = class { constructor(domainOpt, noneOf, wrapped) { this.domainOpt = domainOpt.length < 128 ? domainOpt : µb.stringDeduplicater.lookup(domainOpt); this.noneOf = noneOf !== null ? filterOrigin.trieContainer.createOne(noneOf) : null; this.wrapped = wrapped; } match(url, tokenBeg) { if ( this.noneOf === null ) { this.noneOf = filterOrigin.trieContainer.fromIterable( this.domainOpt.replace(/~/g, '').split('|') ); } return this.noneOf.matches(pageHostnameRegister) === -1 && this.wrapped.match(url, tokenBeg); } logData() { return filterOrigin.logData(this, this.domainOpt); } compile() { return [ this.fid, this.domainOpt, this.noneOf !== null ? filterOrigin.trieContainer.compileOne(this.noneOf) : null, this.wrapped.compile() ]; } static compile(domainOpt, wrapped) { return [ FilterOriginMissSet.fid, domainOpt, null, wrapped ]; } static load(args) { return new FilterOriginMissSet( args[1], args[2], filterFromCompiledData(args[3]) ); } }; registerFilterClass(FilterOriginMissSet); /******************************************************************************/ const FilterOriginMixedSet = class { constructor(domainOpt, oneOf, noneOf, wrapped) { this.domainOpt = domainOpt.length < 128 ? domainOpt : µb.stringDeduplicater.lookup(domainOpt); this.oneOf = oneOf !== null ? filterOrigin.trieContainer.createOne(oneOf) : null; this.noneOf = noneOf !== null ? filterOrigin.trieContainer.createOne(noneOf) : null; this.wrapped = wrapped; } init() { const oneOf = [], noneOf = []; for ( const hostname of this.domainOpt.split('|') ) { if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { noneOf.push(hostname.slice(1)); } else { oneOf.push(hostname); } } this.oneOf = filterOrigin.trieContainer.fromIterable(oneOf); this.noneOf = filterOrigin.trieContainer.fromIterable(noneOf); } match(url, tokenBeg) { if ( this.oneOf === null ) { this.init(); } let needle = pageHostnameRegister; return this.oneOf.matches(needle) !== -1 && this.noneOf.matches(needle) === -1 && this.wrapped.match(url, tokenBeg); } logData() { return filterOrigin.logData(this, this.domainOpt); } compile() { return [ this.fid, this.domainOpt, this.oneOf !== null ? filterOrigin.trieContainer.compileOne(this.oneOf) : null, this.noneOf !== null ? filterOrigin.trieContainer.compileOne(this.noneOf) : null, this.wrapped.compile() ]; } static compile(domainOpt, wrapped) { return [ FilterOriginMixedSet.fid, domainOpt, null, null, wrapped ]; } static load(args) { return new FilterOriginMixedSet( args[1], args[2], args[3], filterFromCompiledData(args[4]) ); } }; registerFilterClass(FilterOriginMixedSet); /******************************************************************************/ const FilterDataHolder = class { constructor(dataType, dataStr) { this.dataType = dataType; this.dataStr = dataStr; this.wrapped = undefined; } match(url, tokenBeg) { return this.wrapped.match(url, tokenBeg); } logData() { const out = this.wrapped.logData(); out.compiled = [ this.fid, this.dataType, this.dataStr, out.compiled ]; let opt = this.dataType; if ( this.dataStr !== '' ) { opt += `=${this.dataStr}`; } if ( out.opts === undefined ) { out.opts = opt; } else { out.opts = opt + ',' + out.opts; } return out; } compile() { return [ this.fid, this.dataType, this.dataStr, this.wrapped.compile() ]; } static compile(details) { return [ FilterDataHolder.fid, details.dataType, details.dataStr ]; } static load(args) { const f = new FilterDataHolder(args[1], args[2]); f.wrapped = filterFromCompiledData(args[3]); return f; } }; registerFilterClass(FilterDataHolder); // Helper class for storing instances of FilterDataHolder. const FilterDataHolderEntry = class { constructor(categoryBits, tokenHash, fdata) { this.categoryBits = categoryBits; this.tokenHash = tokenHash; this.filter = filterFromCompiledData(fdata); this.next = undefined; } logData() { return toLogDataInternal(this.categoryBits, this.tokenHash, this.filter); } compile() { return [ this.categoryBits, this.tokenHash, this.filter.compile() ]; } static load(data) { return new FilterDataHolderEntry(data[0], data[1], data[2]); } }; /******************************************************************************/ // Dictionary of hostnames const FilterHostnameDict = class { constructor(args) { this.h = ''; // short-lived register this.dict = FilterHostnameDict.trieContainer.createOne(args); } get size() { return this.dict.size; } add(hn) { return this.dict.add(hn); } match() { const pos = this.dict.matches(requestHostnameRegister); if ( pos === -1 ) { return false; } this.h = requestHostnameRegister.slice(pos); return true; } logData() { return { raw: `||${this.h}^`, regex: `${rawToRegexStr(this.h, 0)}(?:[^%.0-9a-z_-]|$)`, compiled: this.h }; } compile() { return [ this.fid, FilterHostnameDict.trieContainer.compileOne(this.dict) ]; } static readyToUse() { return FilterHostnameDict.trieContainer.readyToUse(); } static reset() { return FilterHostnameDict.trieContainer.reset(); } static optimize() { const trieDetails = FilterHostnameDict.trieContainer.optimize(); vAPI.localStorage.setItem( 'FilterHostnameDict.trieDetails', JSON.stringify(trieDetails) ); } static load(args) { return new FilterHostnameDict(args[1]); } }; FilterHostnameDict.trieContainer = (function() { let trieDetails; try { trieDetails = JSON.parse( vAPI.localStorage.getItem('FilterHostnameDict.trieDetails') ); } catch(ex) { } return new HNTrieContainer(trieDetails); })(); registerFilterClass(FilterHostnameDict); /******************************************************************************/ // Dictionary of hostnames for filters which only purpose is to match // the document origin. const FilterJustOrigin = class { constructor(args) { this.h = ''; // short-lived register this.dict = filterOrigin.trieContainer.createOne(args); } get size() { return this.dict.size; } add(hn) { return this.dict.add(hn); } match() { const pos = this.dict.matches(pageHostnameRegister); if ( pos === -1 ) { return false; } this.h = pageHostnameRegister.slice(pos); return true; } logData() { return { raw: '*', regex: '^', compiled: this.h, opts: `domain=${this.h}`, }; } compile() { return [ this.fid, filterOrigin.trieContainer.compileOne(this.dict) ]; } static load(args) { return new FilterJustOrigin(args[1]); } }; registerFilterClass(FilterJustOrigin); /******************************************************************************/ const FilterHTTPSJustOrigin = class extends FilterJustOrigin { match(url) { return url.startsWith('https://') && super.match(); } logData() { const out = super.logData(); out.raw = '|https://'; out.regex = '^https://'; return out; } static load(args) { return new FilterHTTPSJustOrigin(args[1]); } }; registerFilterClass(FilterHTTPSJustOrigin); /******************************************************************************/ const FilterHTTPJustOrigin = class extends FilterJustOrigin { match(url) { return url.startsWith('http://') && super.match(); } logData() { const out = super.logData(); out.raw = '|https://'; out.regex = '^https://'; return out; } static load(args) { return new FilterHTTPJustOrigin(args[1]); } }; registerFilterClass(FilterHTTPJustOrigin); /******************************************************************************/ const FilterPair = class { constructor(a, b) { this.f1 = a; this.f2 = b; } get size() { return 2; } match(url, tokenBeg) { if ( this.f1.match(url, tokenBeg) === true ) { this.f = this.f1; return true; } if ( this.f2.match(url, tokenBeg) === true ) { this.f = this.f2; return true; } return false; } logData() { return this.f.logData(); } compile() { return [ this.fid, this.f1.compile(), this.f2.compile() ]; } upgrade(a) { const bucket = new FilterBucket(this.f1, this.f2, a); this.f1 = this.f2 = undefined; this.f = null; FilterPair.available = this; return bucket; } static load(args) { const f1 = filterFromCompiledData(args[1]); const f2 = filterFromCompiledData(args[2]); const pair = FilterPair.available; if ( pair === null ) { return new FilterPair(f1, f2); } FilterPair.available = null; pair.f1 = f1; pair.f2 = f2; return pair; } }; FilterPair.prototype.f = null; FilterPair.available = null; registerFilterClass(FilterPair); /******************************************************************************/ const FilterBucket = class { constructor(a, b, c) { this.filters = []; if ( a !== undefined ) { this.filters.push(a, b, c); this._countTrieable(); } } get size() { let size = this.filters.length; if ( this.plainPrefix1Trie !== null ) { size += this.plainPrefix1Trie.size; } if ( this.plainHnAnchoredTrie !== null ) { size += this.plainHnAnchoredTrie.size; } return size; } add(fdata) { if ( fdata[0] === this.plainPrefix1Id ) { if ( this.plainPrefix1Trie !== null ) { return this.plainPrefix1Trie.add( FilterPlainPrefix1.trieableStringFromArgs(fdata) ); } if ( this.plainPrefix1Count === 3 ) { this.plainPrefix1Trie = FilterBucket.trieContainer.createOne(); this._transferTrieable( this.plainPrefix1Id, this.plainPrefix1Trie ); return this.plainPrefix1Trie.add( FilterPlainPrefix1.trieableStringFromArgs(fdata) ); } this.plainPrefix1Count += 1; } if ( fdata[0] === this.plainHnAnchoredId ) { if ( this.plainHnAnchoredTrie !== null ) { return this.plainHnAnchoredTrie.add( FilterPlainHnAnchored.trieableStringFromArgs(fdata) ); } if ( this.plainHnAnchoredCount === 3 ) { this.plainHnAnchoredTrie = FilterBucket.trieContainer.createOne(); this._transferTrieable( this.plainHnAnchoredId, this.plainHnAnchoredTrie ); return this.plainHnAnchoredTrie.add( FilterPlainHnAnchored.trieableStringFromArgs(fdata) ); } this.plainHnAnchoredCount += 1; } this.filters.push(filterFromCompiledData(fdata)); } match(url, tokenBeg) { if ( this.plainPrefix1Trie !== null ) { const pos = this.plainPrefix1Trie.matches(url, tokenBeg - 1); if ( pos !== -1 ) { this.plainPrefix1Filter.s = url.slice(tokenBeg - 1, pos); this.f = this.plainPrefix1Filter; return true; } } if ( this.plainHnAnchoredTrie !== null && isHnAnchored(url, tokenBeg) ) { const pos = this.plainHnAnchoredTrie.matches(url, tokenBeg); if ( pos !== -1 ) { this.plainHnAnchoredFilter.s = url.slice(tokenBeg, pos); this.f = this.plainHnAnchoredFilter; return true; } } const filters = this.filters; for ( let i = 0, n = filters.length; i < n; i++ ) { if ( filters[i].match(url, tokenBeg) === true ) { this.f = filters[i]; if ( i >= 16 ) { this._promote(i); } return true; } } return false; } logData() { return this.f.logData(); } compile() { const compiled = []; const filters = this.filters; for ( let i = 0, n = filters.length; i < n; i++ ) { compiled[i] = filters[i].compile(); } return [ this.fid, compiled, this.plainPrefix1Trie !== null && FilterBucket.trieContainer.compileOne(this.plainPrefix1Trie), this.plainHnAnchoredTrie !== null && FilterBucket.trieContainer.compileOne(this.plainHnAnchoredTrie), ]; } _countTrieable() { for ( const f of this.filters ) { if ( f.fid === this.plainPrefix1Id ) { this.plainPrefix1Count += 1; } else if ( f.fid === this.plainHnAnchoredId ) { this.plainHnAnchoredCount += 1; } } } _transferTrieable(fid, trie) { let i = this.filters.length; while ( i-- ) { const f = this.filters[i]; if ( f.fid !== fid || f.s.length > 255 ) { continue; } trie.add(f.s); this.filters.splice(i, 1); } } // Promote hit filters so they can be found faster next time. _promote(i) { const filters = this.filters; let pivot = filters.length >>> 1; while ( i < pivot ) { pivot >>>= 1; if ( pivot < 16 ) { break; } } if ( i <= pivot ) { return; } const j = this.promoted % pivot; //console.debug('FilterBucket.promote(): promoted %d to %d', i, j); const f = filters[j]; filters[j] = filters[i]; filters[i] = f; this.promoted += 1; } static reset() { FilterBucket.trieContainer.reset(); } static optimize() { const trieDetails = this.trieContainer.optimize(); vAPI.localStorage.setItem( 'FilterBucket.trieDetails', JSON.stringify(trieDetails) ); } static load(args) { const bucket = new FilterBucket(); const compiledFilters = args[1]; const filters = bucket.filters; for ( let i = 0, n = compiledFilters.length; i < n; i++ ) { filters[i] = filterFromCompiledData(compiledFilters[i]); } if ( Array.isArray(args[2]) ) { bucket.plainPrefix1Trie = FilterBucket.trieContainer.createOne(args[2]); } if ( Array.isArray(args[3]) ) { bucket.plainHnAnchoredTrie = FilterBucket.trieContainer.createOne(args[3]); } return bucket; } }; FilterBucket.prototype.f = null; FilterBucket.prototype.promoted = 0; FilterBucket.prototype.plainPrefix1Id = FilterPlainPrefix1.fid; FilterBucket.prototype.plainPrefix1Count = 0; FilterBucket.prototype.plainPrefix1Trie = null; FilterBucket.prototype.plainPrefix1Filter = new FilterPlainPrefix1(''); FilterBucket.prototype.plainHnAnchoredId = FilterPlainHnAnchored.fid; FilterBucket.prototype.plainHnAnchoredCount = 0; FilterBucket.prototype.plainHnAnchoredTrie = null; FilterBucket.prototype.plainHnAnchoredFilter = new FilterPlainHnAnchored(''); FilterBucket.trieContainer = (function() { let trieDetails; try { trieDetails = JSON.parse( vAPI.localStorage.getItem('FilterBucket.trieDetails') ); } catch(ex) { } return new STrieContainer(trieDetails); })(); registerFilterClass(FilterBucket); /******************************************************************************/ /******************************************************************************/ const FilterParser = function() { this.cantWebsocket = vAPI.cantWebsocket; this.reBadDomainOptChars = /[*+?^${}()[\]\\]/; this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i; this.reHostnameRule2 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]\^?$/i; this.reCleanupHostnameRule2 = /\^$/g; this.reCanTrimCarets1 = /^[^*]*$/; this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/; this.reHasUppercase = /[A-Z]/; this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/; this.reHasUnicode = /[^\x00-\x7F]/; this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/; this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/; this.domainOpt = ''; this.noTokenHash = µb.urlTokenizer.noTokenHash; this.unsupportedTypeBit = this.bitFromType('unsupported'); // All network request types to bitmap // bring origin to 0 (from 4 -- see typeNameToTypeValue) // left-shift 1 by the above-calculated value // subtract 1 to set all type bits this.allNetRequestTypeBits = (1 << (otherTypeBitValue >>> 4)) - 1; this.reset(); }; /******************************************************************************/ // https://github.com/gorhill/uBlock/issues/1493 // Transpose `ping` into `other` for now. FilterParser.prototype.toNormalizedType = { 'beacon': 'other', 'css': 'stylesheet', 'data': 'data', 'doc': 'main_frame', 'document': 'main_frame', 'elemhide': 'generichide', 'font': 'font', 'frame': 'sub_frame', 'genericblock': 'unsupported', 'generichide': 'generichide', 'image': 'image', 'inline-font': 'inline-font', 'inline-script': 'inline-script', 'media': 'media', 'object': 'object', 'object-subrequest': 'object', 'other': 'other', 'ping': 'other', 'popunder': 'popunder', 'popup': 'popup', 'script': 'script', 'stylesheet': 'stylesheet', 'subdocument': 'sub_frame', 'xhr': 'xmlhttprequest', 'xmlhttprequest': 'xmlhttprequest', 'webrtc': 'unsupported', 'websocket': 'websocket' }; /******************************************************************************/ FilterParser.prototype.reset = function() { this.action = BlockAction; this.anchor = 0; this.badFilter = false; this.dataType = undefined; this.dataStr = undefined; this.elemHiding = false; this.f = ''; this.firstParty = false; this.thirdParty = false; this.party = AnyParty; this.fopts = ''; this.hostnamePure = false; this.domainOpt = ''; this.isRegex = false; this.raw = ''; this.redirect = false; this.token = '*'; this.tokenHash = this.noTokenHash; this.tokenBeg = 0; this.types = 0; this.important = 0; this.wildcarded = false; this.unsupported = false; return this; }; /******************************************************************************/ FilterParser.prototype.bitFromType = function(type) { return 1 << ((typeNameToTypeValue[type] >>> 4) - 1); }; /******************************************************************************/ // https://github.com/chrisaljoudi/uBlock/issues/589 // Be ready to handle multiple negated types FilterParser.prototype.parseTypeOption = function(raw, not) { var typeBit = this.bitFromType(this.toNormalizedType[raw]); if ( !not ) { this.types |= typeBit; return; } // Non-discrete network types can't be negated. if ( (typeBit & this.allNetRequestTypeBits) === 0 ) { return; } // Negated type: set all valid network request type bits to 1 if ( (typeBit & this.allNetRequestTypeBits) !== 0 && (this.types & this.allNetRequestTypeBits) === 0 ) { this.types |= this.allNetRequestTypeBits; } this.types &= ~typeBit; }; /******************************************************************************/ FilterParser.prototype.parsePartyOption = function(firstParty, not) { if ( firstParty ) { not = !not; } if ( not ) { this.firstParty = true; this.party = this.thirdParty ? AnyParty : FirstParty; } else { this.thirdParty = true; this.party = this.firstParty ? AnyParty : ThirdParty; } }; /******************************************************************************/ FilterParser.prototype.parseDomainOption = function(s) { if ( this.reHasUnicode.test(s) ) { const hostnames = s.split('|'); let i = hostnames.length; while ( i-- ) { if ( this.reHasUnicode.test(hostnames[i]) ) { hostnames[i] = punycode.toASCII(hostnames[i]); } } s = hostnames.join('|'); } if ( this.reBadDomainOptChars.test(s) ) { return ''; } return s; }; /******************************************************************************/ FilterParser.prototype.parseOptions = function(s) { this.fopts = s; var opts = s.split(','); var opt, not; for ( var i = 0; i < opts.length; i++ ) { opt = opts[i]; not = opt.startsWith('~'); if ( not ) { opt = opt.slice(1); } if ( opt === 'third-party' || opt === '3p' ) { this.parsePartyOption(false, not); continue; } // https://issues.adblockplus.org/ticket/616 // `generichide` concept already supported, just a matter of // adding support for the new keyword. if ( opt === 'elemhide' || opt === 'generichide' ) { if ( not === false ) { this.parseTypeOption('generichide', false); continue; } this.unsupported = true; break; } // Test before handling all other types. if ( opt.startsWith('redirect=') ) { if ( this.action === BlockAction ) { this.redirect = true; continue; } this.unsupported = true; break; } if ( this.toNormalizedType.hasOwnProperty(opt) ) { this.parseTypeOption(opt, not); continue; } // https://github.com/gorhill/uBlock/issues/2294 // Detect and discard filter if domain option contains nonsensical // characters. if ( opt.startsWith('domain=') ) { this.domainOpt = this.parseDomainOption(opt.slice(7)); if ( this.domainOpt === '' ) { this.unsupported = true; break; } continue; } if ( opt === 'important' ) { this.important = Important; continue; } if ( opt === 'first-party' || opt === '1p' ) { this.parsePartyOption(true, not); continue; } if ( opt.startsWith('csp=') ) { if ( opt.length > 4 && this.reBadCSP.test(opt) === false ) { this.parseTypeOption('data', not); this.dataType = 'csp'; this.dataStr = opt.slice(4).trim(); } continue; } if ( opt === 'csp' && this.action === AllowAction ) { this.parseTypeOption('data', not); this.dataType = 'csp'; this.dataStr = ''; continue; } // Used by Adguard, purpose is unclear -- just ignore for now. if ( opt === 'empty' ) { continue; } // https://github.com/uBlockOrigin/uAssets/issues/192 if ( opt === 'badfilter' ) { this.badFilter = true; continue; } // Unrecognized filter option: ignore whole filter. this.unsupported = true; break; } }; /******************************************************************************* anchor: bit vector 0000 (0x0): no anchoring 0001 (0x1): anchored to the end of the URL. 0010 (0x2): anchored to the start of the URL. 0011 (0x3): anchored to the start and end of the URL. 0100 (0x4): anchored to the hostname of the URL. 0101 (0x5): anchored to the hostname and end of the URL. **/ FilterParser.prototype.parse = function(raw) { // important! this.reset(); var s = this.raw = raw; // plain hostname? (from HOSTS file) if ( this.reHostnameRule1.test(s) ) { this.f = s; this.hostnamePure = true; this.anchor |= 0x4; return this; } // element hiding filter? var pos = s.indexOf('#'); if ( pos !== -1 ) { var c = s.charAt(pos + 1); if ( c === '#' || c === '@' ) { console.error('static-net-filtering.js > unexpected cosmetic filters'); this.elemHiding = true; return this; } } // block or allow filter? // Important: this must be executed before parsing options if ( s.startsWith('@@') ) { this.action = AllowAction; s = s.slice(2); } // options // https://github.com/gorhill/uBlock/issues/842 // - ensure sure we are not dealing with a regex-based filter. // - lookup the last occurrence of `$`. if ( s.startsWith('/') === false || s.endsWith('/') === false ) { pos = s.lastIndexOf('$'); if ( pos !== -1 ) { // https://github.com/gorhill/uBlock/issues/952 // Discard Adguard-specific `$$` filters. if ( s.indexOf('$$') !== -1 ) { this.unsupported = true; return this; } this.parseOptions(s.slice(pos + 1)); // https://github.com/gorhill/uBlock/issues/2283 // Abort if type is only for unsupported types, otherwise // toggle off `unsupported` bit. if ( this.types & this.unsupportedTypeBit ) { this.types &= ~this.unsupportedTypeBit; if ( this.types === 0 ) { this.unsupported = true; return this; } } s = s.slice(0, pos); } } // regex? if ( s.startsWith('/') && s.endsWith('/') && s.length > 2 ) { this.isRegex = true; this.f = s.slice(1, -1); // https://github.com/gorhill/uBlock/issues/1246 // If the filter is valid, use the corrected version of the source // string -- this ensure reverse-lookup will work fine. this.f = normalizeRegexSource(this.f); if ( this.f === '' ) { console.error( "uBlock Origin> discarding bad regular expression-based network filter '%s': '%s'", raw, normalizeRegexSource.message ); this.unsupported = true; } return this; } // hostname-anchored if ( s.startsWith('||') ) { this.anchor |= 0x4; s = s.slice(2); // convert hostname to punycode if needed // https://github.com/gorhill/uBlock/issues/2599 if ( this.reHasUnicode.test(s) ) { var matches = this.reIsolateHostname.exec(s); if ( matches ) { s = (matches[1] !== undefined ? matches[1] : '') + punycode.toASCII(matches[2]) + matches[3]; //console.debug('µBlock.staticNetFilteringEngine/FilterParser.parse():', raw, '=', s); } } // https://github.com/chrisaljoudi/uBlock/issues/1096 if ( s.startsWith('^') ) { this.unsupported = true; return this; } // plain hostname? (from ABP filter list) // https://github.com/gorhill/uBlock/issues/1757 // A filter can't be a pure-hostname one if there is a domain or csp // option present. if ( this.reHostnameRule2.test(s) ) { this.f = s.replace(this.reCleanupHostnameRule2, ''); this.hostnamePure = true; return this; } } // left-anchored else if ( s.startsWith('|') ) { this.anchor |= 0x2; s = s.slice(1); } // right-anchored if ( s.endsWith('|') ) { this.anchor |= 0x1; s = s.slice(0, -1); } // https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448 // remove pointless leading *. // https://github.com/gorhill/uBlock/issues/3034 // - We can remove anchoring if we need to match all at the start. if ( s.startsWith('*') ) { s = s.replace(/^\*+([^%0-9a-z])/i, '$1'); this.anchor &= ~0x6; } // remove pointless trailing * // https://github.com/gorhill/uBlock/issues/3034 // - We can remove anchoring if we need to match all at the end. if ( s.endsWith('*') ) { s = s.replace(/([^%0-9a-z])\*+$/i, '$1'); this.anchor &= ~0x1; } // nothing left? if ( s === '' ) { s = '*'; } // https://github.com/gorhill/uBlock/issues/1047 // Hostname-anchored makes no sense if matching all requests. if ( s === '*' ) { this.anchor = 0; } this.wildcarded = reIsWildcarded.test(s); // This might look weird but we gain memory footprint by not going through // toLowerCase(), at least on Chromium. Because copy-on-write? this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s; return this; }; /******************************************************************************/ // Given a string, find a good token. Tokens which are too generic, i.e. very // common with a high probability of ending up as a miss, are not // good. Avoid if possible. This has a *significant* positive impact on // performance. // These "bad tokens" are collated manually. // Hostname-anchored with no wildcard always have a token index of 0. const reHostnameToken = /^[0-9a-z]+/; const reGoodToken = /[%0-9a-z]{2,}/g; const reRegexToken = /[%0-9A-Za-z]{2,}/g; const reRegexTokenAbort = /[([]/; const reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/; const reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/; const badTokens = new Set([ 'com', 'google', 'http', 'https', 'icon', 'images', 'img', 'js', 'net', 'news', 'www' ]); FilterParser.prototype.findFirstGoodToken = function() { reGoodToken.lastIndex = 0; const s = this.f; let matches; let badTokenMatch = null; while ( (matches = reGoodToken.exec(s)) !== null ) { // https://github.com/gorhill/uBlock/issues/997 // Ignore token if preceded by wildcard. const lpos = matches.index; if ( lpos !== 0 && s.charCodeAt(lpos - 1) === 0x2A /* '*' */ ) { continue; } if ( s.charCodeAt(reGoodToken.lastIndex) === 0x2A /* '*' */ ) { continue; } if ( badTokens.has(matches[0]) ) { if ( badTokenMatch === null ) { badTokenMatch = matches; } continue; } return matches; } return badTokenMatch; }; FilterParser.prototype.extractTokenFromRegex = function() { reRegexToken.lastIndex = 0; const s = this.f; let matches; while ( (matches = reRegexToken.exec(s)) !== null ) { const prefix = s.slice(0, matches.index); if ( reRegexTokenAbort.test(prefix) ) { return; } if ( reRegexBadPrefix.test(prefix) || reRegexBadSuffix.test(s.slice(reRegexToken.lastIndex)) ) { continue; } this.token = matches[0].toLowerCase(); this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token); this.tokenBeg = matches.index; if ( badTokens.has(this.token) === false ) { break; } } }; /******************************************************************************/ // https://github.com/chrisaljoudi/uBlock/issues/1038 // Single asterisk will match any URL. // https://github.com/gorhill/uBlock/issues/2781 // For efficiency purpose, try to extract a token from a regex-based filter. FilterParser.prototype.makeToken = function() { if ( this.isRegex ) { this.extractTokenFromRegex(); return; } if ( this.f === '*' ) { return; } let matches = null; if ( (this.anchor & 0x4) !== 0 && this.wildcarded === false ) { matches = reHostnameToken.exec(this.f); } if ( matches === null ) { matches = this.findFirstGoodToken(); } if ( matches !== null ) { this.token = matches[0]; this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token); this.tokenBeg = matches.index; } }; /******************************************************************************/ FilterParser.prototype.isJustOrigin = function() { return this.datatype === undefined && this.redirect === false && this.domainOpt !== '' && /^(?:\*|https?:(?:\/\/)?)$/.test(this.f) && this.domainOpt.indexOf('~') === -1; }; /******************************************************************************/ /******************************************************************************/ const FilterContainer = function() { this.filterParser = new FilterParser(); this.urlTokenizer = µb.urlTokenizer; this.noTokenHash = this.urlTokenizer.noTokenHash; this.dotTokenHash = this.urlTokenizer.dotTokenHash; this.anyTokenHash = this.urlTokenizer.anyTokenHash; this.anyHTTPSTokenHash = this.urlTokenizer.anyHTTPSTokenHash; this.anyHTTPTokenHash = this.urlTokenizer.anyHTTPTokenHash; this.reset(); }; /******************************************************************************/ // Reset all, thus reducing to a minimum memory footprint of the context. FilterContainer.prototype.reset = function() { this.frozen = false; this.processedFilterCount = 0; this.acceptedCount = 0; this.rejectedCount = 0; this.allowFilterCount = 0; this.blockFilterCount = 0; this.discardedCount = 0; this.goodFilters = new Set(); this.badFilters = new Set(); this.categories = new Map(); this.dataFilters = new Map(); this.filterParser.reset(); this.urlTokenizer.resetKnownTokens(); // This will invalidate all tries FilterHostnameDict.reset(); filterOrigin.reset(); FilterBucket.reset(); // Runtime registers this.urlRegister = ''; this.catbitsRegister = 0; this.tokenRegister = 0; this.filterRegister = null; }; /******************************************************************************/ FilterContainer.prototype.freeze = function() { const filterPairId = FilterPair.fid; const filterBucketId = FilterBucket.fid; const filterDataHolderId = FilterDataHolder.fid; const redirectTypeValue = typeNameToTypeValue.redirect; const unserialize = µb.CompiledLineIO.unserialize; const knownTokens = this.urlTokenizer.knownTokens; for ( const line of this.goodFilters ) { if ( this.badFilters.has(line) ) { this.discardedCount += 1; continue; } const args = unserialize(line); const bits = args[0]; // Special cases: delegate to more specialized engines. // Redirect engine. if ( (bits & 0x1F0) === redirectTypeValue ) { µb.redirectEngine.fromCompiledRule(args[1]); continue; } // Plain static filters. const tokenHash = args[1]; const fdata = args[2]; // Special treatment: data-holding filters are stored separately // because they require special matching algorithm (unlike other // filters, ALL hits must be reported). if ( fdata[0] === filterDataHolderId ) { let entry = new FilterDataHolderEntry(bits, tokenHash, fdata); let bucket = this.dataFilters.get(tokenHash); if ( bucket !== undefined ) { entry.next = bucket; } this.dataFilters.set(tokenHash, entry); knownTokens[tokenHash & 0xFFFF] = 1; continue; } let bucket = this.categories.get(bits); if ( bucket === undefined ) { bucket = new Map(); this.categories.set(bits, bucket); } let entry = bucket.get(tokenHash); if ( tokenHash === this.dotTokenHash ) { if ( entry === undefined ) { entry = new FilterHostnameDict(); bucket.set(this.dotTokenHash, entry); } entry.add(fdata); continue; } if ( tokenHash === this.anyTokenHash ) { if ( entry === undefined ) { entry = new FilterJustOrigin(); bucket.set(this.anyTokenHash, entry); } entry.add(fdata); continue; } if ( tokenHash === this.anyHTTPSTokenHash ) { if ( entry === undefined ) { entry = new FilterHTTPSJustOrigin(); bucket.set(this.anyHTTPSTokenHash, entry); } entry.add(fdata); continue; } if ( tokenHash === this.anyHTTPTokenHash ) { if ( entry === undefined ) { entry = new FilterHTTPJustOrigin(); bucket.set(this.anyHTTPTokenHash, entry); } entry.add(fdata); continue; } knownTokens[tokenHash & 0xFFFF] = 1; if ( entry === undefined ) { bucket.set(tokenHash, filterFromCompiledData(fdata)); continue; } if ( entry.fid === filterBucketId ) { entry.add(fdata); continue; } if ( entry.fid === filterPairId ) { bucket.set( tokenHash, entry.upgrade(filterFromCompiledData(fdata)) ); continue; } bucket.set( tokenHash, new FilterPair(entry, filterFromCompiledData(fdata)) ); } this.filterParser.reset(); this.badFilters.clear(); this.goodFilters.clear(); FilterHostnameDict.optimize(); FilterBucket.optimize(); this.frozen = true; }; /******************************************************************************/ // This is necessary for when the filtering engine readiness will depend // on asynchronous operations (ex.: when loading a wasm module). FilterContainer.prototype.readyToUse = function() { return Promise.resolve(); }; /******************************************************************************/ FilterContainer.prototype.toSelfie = function(path) { const categoriesToSelfie = function(categoryMap) { const selfie = []; for ( const [ catbits, bucket ] of categoryMap ) { const tokenEntries = []; for ( const [ token, filter ] of bucket ) { tokenEntries.push([ token, filter.compile() ]); } selfie.push([ catbits, tokenEntries ]); } return selfie; }; const dataFiltersToSelfie = function(dataFilters) { const selfie = []; for ( let entry of dataFilters.values() ) { do { selfie.push(entry.compile()); entry = entry.next; } while ( entry !== undefined ); } return selfie; }; filterOrigin.optimize(); return Promise.all([ µBlock.assets.put( `${path}/FilterHostnameDict.trieContainer`, FilterHostnameDict.trieContainer.serialize(µBlock.base64) ), µBlock.assets.put( `${path}/FilterOrigin.trieContainer`, filterOrigin.trieContainer.serialize(µBlock.base64) ), µBlock.assets.put( `${path}/FilterBucket.trieContainer`, FilterBucket.trieContainer.serialize(µBlock.base64) ), µBlock.assets.put( `${path}/main`, JSON.stringify({ processedFilterCount: this.processedFilterCount, acceptedCount: this.acceptedCount, rejectedCount: this.rejectedCount, allowFilterCount: this.allowFilterCount, blockFilterCount: this.blockFilterCount, discardedCount: this.discardedCount, categories: categoriesToSelfie(this.categories), dataFilters: dataFiltersToSelfie(this.dataFilters), urlTokenizer: this.urlTokenizer.toSelfie(), }) ) ]); }; /******************************************************************************/ FilterContainer.prototype.fromSelfie = function(path) { return Promise.all([ µBlock.assets.get(`${path}/FilterHostnameDict.trieContainer`).then(details => FilterHostnameDict.trieContainer.unserialize( details.content, µBlock.base64 ) ), µBlock.assets.get(`${path}/FilterOrigin.trieContainer`).then(details => filterOrigin.trieContainer.unserialize( details.content, µBlock.base64 ) ), µBlock.assets.get(`${path}/FilterBucket.trieContainer`).then(details => FilterBucket.trieContainer.unserialize( details.content, µBlock.base64 ) ), µBlock.assets.get(`${path}/main`).then(details => { let selfie; try { selfie = JSON.parse(details.content); } catch (ex) { } if ( selfie instanceof Object === false ) { return false; } this.frozen = true; this.processedFilterCount = selfie.processedFilterCount; this.acceptedCount = selfie.acceptedCount; this.rejectedCount = selfie.rejectedCount; this.allowFilterCount = selfie.allowFilterCount; this.blockFilterCount = selfie.blockFilterCount; this.discardedCount = selfie.discardedCount; this.urlTokenizer.fromSelfie(selfie.urlTokenizer); for ( const [ catbits, bucket ] of selfie.categories ) { const tokenMap = new Map(); for ( const [ token, fdata ] of bucket ) { tokenMap.set(token, filterFromCompiledData(fdata)); } this.categories.set(catbits, tokenMap); } for ( const dataEntry of selfie.dataFilters ) { const entry = FilterDataHolderEntry.load(dataEntry); const bucket = this.dataFilters.get(entry.tokenHash); if ( bucket !== undefined ) { entry.next = bucket; } this.dataFilters.set(entry.tokenHash, entry); } return true; }), ]).then(results => results.reduce((acc, v) => acc && v, true) ); }; /******************************************************************************/ FilterContainer.prototype.compile = function(raw, writer) { // ORDER OF TESTS IS IMPORTANT! // Ignore empty lines const s = raw.trim(); if ( s.length === 0 ) { return false; } const parsed = this.filterParser.parse(s); // Ignore element-hiding filters if ( parsed.elemHiding ) { return false; } // Ignore filters with unsupported options if ( parsed.unsupported ) { const who = writer.properties.get('assetKey') || '?'; µb.logger.writeOne({ realm: 'message', type: 'error', text: `Invalid network filter in ${who}: ${raw}` }); return false; } // Pure hostnames, use more efficient dictionary lookup // https://github.com/chrisaljoudi/uBlock/issues/665 // Create a dict keyed on request type etc. if ( parsed.hostnamePure && parsed.domainOpt === '' && parsed.dataType === undefined ) { parsed.tokenHash = this.dotTokenHash; this.compileToAtomicFilter(parsed, parsed.f, writer); return true; } parsed.makeToken(); let fdata; if ( parsed.isRegex ) { fdata = FilterRegex.compile(parsed); } else if ( parsed.hostnamePure ) { fdata = FilterPlainHostname.compile(parsed); } else if ( parsed.f === '*' ) { if ( parsed.isJustOrigin() ) { parsed.tokenHash = this.anyTokenHash; for ( const hn of parsed.domainOpt.split('|') ) { this.compileToAtomicFilter(parsed, hn, writer); } return true; } fdata = FilterTrue.compile(); } else if ( parsed.anchor === 0x5 ) { fdata = FilterGenericHnAndRightAnchored.compile(parsed); } else if ( parsed.anchor === 0x4 ) { if ( parsed.wildcarded === false && parsed.tokenHash !== parsed.noTokenHash && parsed.tokenBeg === 0 ) { fdata = FilterPlainHnAnchored.compile(parsed); } else { fdata = FilterGenericHnAnchored.compile(parsed); } } else if ( parsed.wildcarded || parsed.tokenHash === parsed.noTokenHash ) { fdata = FilterGeneric.compile(parsed); } else if ( parsed.anchor === 0x2 ) { if ( parsed.isJustOrigin() ) { if ( parsed.f === 'https://' ) { parsed.tokenHash = this.anyHTTPSTokenHash; for ( const hn of parsed.domainOpt.split('|') ) { this.compileToAtomicFilter(parsed, hn, writer); } return true; } if ( parsed.f === 'http://' ) { parsed.tokenHash = this.anyHTTPTokenHash; for ( const hn of parsed.domainOpt.split('|') ) { this.compileToAtomicFilter(parsed, hn, writer); } return true; } } fdata = FilterPlainLeftAnchored.compile(parsed); } else if ( parsed.anchor === 0x1 ) { fdata = FilterPlainRightAnchored.compile(parsed); } else if ( parsed.anchor === 0x3 ) { fdata = FilterExactMatch.compile(parsed); } else if ( parsed.tokenBeg === 1 ) { fdata = FilterPlainPrefix1.compile(parsed); } else { fdata = FilterPlain.compile(parsed); } if ( parsed.domainOpt !== '' ) { fdata = filterOrigin.compile(parsed, fdata); } if ( parsed.dataType !== undefined ) { let fwrapped = fdata; fdata = FilterDataHolder.compile(parsed); fdata.push(fwrapped); } this.compileToAtomicFilter(parsed, fdata, writer); return true; }; /******************************************************************************/ FilterContainer.prototype.compileToAtomicFilter = function( parsed, fdata, writer ) { // 0 = network filters // 1 = network filters: bad filters if ( parsed.badFilter ) { writer.select(1); } else { writer.select(0); } let descBits = parsed.action | parsed.important | parsed.party; let type = parsed.types; // Typeless if ( type === 0 ) { writer.push([ descBits, parsed.tokenHash, fdata ]); return; } // Specific type(s) let bitOffset = 1; do { if ( type & 1 ) { writer.push([ descBits | (bitOffset << 4), parsed.tokenHash, fdata ]); } bitOffset += 1; type >>>= 1; } while ( type !== 0 ); // Only static filter with an explicit type can be redirected. If we reach // this point, it's because there is one or more explicit type. if ( parsed.redirect ) { let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw); if ( Array.isArray(redirects) ) { for ( let redirect of redirects ) { writer.push([ typeNameToTypeValue.redirect, redirect ]); } } } }; /******************************************************************************/ FilterContainer.prototype.fromCompiledContent = function(reader) { // 0 = network filters reader.select(0); while ( reader.next() ) { this.acceptedCount += 1; if ( this.goodFilters.has(reader.line) ) { this.discardedCount += 1; } else { this.goodFilters.add(reader.line); } } // 1 = network filters: bad filter directives reader.select(1); while ( reader.next() ) { this.badFilters.add(reader.line); } }; /******************************************************************************/ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) { if ( this.dataFilters.size === 0 ) { return; } let url = this.urlTokenizer.setURL(requestURL); pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url); // We need to visit ALL the matching filters. let toAddImportant = new Map(), toAdd = new Map(), toRemove = new Map(); const tokenHashes = this.urlTokenizer.getTokens(); let i = 0; while ( i < 32 ) { let tokenHash = tokenHashes[i++]; if ( tokenHash === 0 ) { break; } let tokenOffset = tokenHashes[i++]; let entry = this.dataFilters.get(tokenHash); while ( entry !== undefined ) { let f = entry.filter; if ( f.match(url, tokenOffset) === true ) { if ( entry.categoryBits & 0x001 ) { toRemove.set(f.dataStr, entry); } else if ( entry.categoryBits & 0x002 ) { toAddImportant.set(f.dataStr, entry); } else { toAdd.set(f.dataStr, entry); } } entry = entry.next; } } let entry = this.dataFilters.get(this.noTokenHash); while ( entry !== undefined ) { let f = entry.filter; if ( f.match(url) === true ) { if ( entry.categoryBits & 0x001 ) { toRemove.set(f.dataStr, entry); } else if ( entry.categoryBits & 0x002 ) { toAddImportant.set(f.dataStr, entry); } else { toAdd.set(f.dataStr, entry); } } entry = entry.next; } if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; } // Remove entries overriden by other filters. for ( let key of toAddImportant.keys() ) { toAdd.delete(key); toRemove.delete(key); } for ( let key of toRemove.keys() ) { if ( key === '' ) { toAdd.clear(); break; } toAdd.delete(key); } for ( let entry of toAddImportant ) { out.push(entry[0]); if ( outlog === undefined ) { continue; } let logData = entry[1].logData(); logData.source = 'static'; logData.result = 1; outlog.push(logData); } for ( let entry of toAdd ) { out.push(entry[0]); if ( outlog === undefined ) { continue; } let logData = entry[1].logData(); logData.source = 'static'; logData.result = 1; outlog.push(logData); } if ( outlog !== undefined ) { for ( let entry of toRemove.values()) { let logData = entry.logData(); logData.source = 'static'; logData.result = 2; outlog.push(logData); } } }; /******************************************************************************/ FilterContainer.prototype.matchTokens = function(bucket) { // Hostname-only filters let f = bucket.get(this.dotTokenHash); if ( f !== undefined && f.match() === true ) { this.tokenRegister = this.dotTokenHash; this.filterRegister = f; return true; } const url = this.urlRegister; const tokenHashes = this.urlTokenizer.getTokens(); let i = 0; for (;;) { const tokenHash = tokenHashes[i]; if ( tokenHash === 0 ) { break; } f = bucket.get(tokenHash); if ( f !== undefined && f.match(url, tokenHashes[i+1]) === true ) { this.tokenRegister = tokenHash; this.filterRegister = f; return true; } i += 2; } return false; }; /******************************************************************************/ FilterContainer.prototype.realmMatchStringExactType = function( realmBits, typeBits, partyBits ) { let bucket; let catBits = realmBits | typeBits; if ( (bucket = this.categories.get(catBits)) ) { if ( this.matchTokens(bucket) ) { this.catbitsRegister = catBits; return true; } } if ( partyBits !== 0 ) { catBits = realmBits | typeBits | partyBits; if ( (bucket = this.categories.get(catBits)) ) { if ( this.matchTokens(bucket) ) { this.catbitsRegister = catBits; return true; } } } return false; }; /******************************************************************************/ FilterContainer.prototype.realmMatchString = function( realmBits, typeBits, partyBits ) { let bucket; let catBits = realmBits; if ( (bucket = this.categories.get(catBits)) ) { if ( this.matchTokens(bucket) ) { this.catbitsRegister = catBits; return true; } } if ( partyBits !== 0 ) { catBits = realmBits | partyBits; if ( (bucket = this.categories.get(catBits)) ) { if ( this.matchTokens(bucket) ) { this.catbitsRegister = catBits; return true; } } } if ( typeBits !== 0 ) { catBits = realmBits | typeBits; if ( (bucket = this.categories.get(catBits)) ) { if ( this.matchTokens(bucket) ) { this.catbitsRegister = catBits; return true; } } } if ( typeBits !== 0 && partyBits !== 0 ) { catBits = realmBits | typeBits | partyBits; if ( (bucket = this.categories.get(catBits)) ) { if ( this.matchTokens(bucket) ) { this.catbitsRegister = catBits; return true; } } } return false; }; /******************************************************************************/ // Specialized handler // https://github.com/gorhill/uBlock/issues/1477 // Special case: blocking-generichide filter ALWAYS exists, it is implicit -- // thus we always first check for exception filters, then for important block // filter if and only if there was a hit on an exception filter. // https://github.com/gorhill/uBlock/issues/2103 // User may want to override `generichide` exception filters. FilterContainer.prototype.matchStringGenericHide = function(requestURL) { const typeBits = typeNameToTypeValue['generichide']; // Prime tokenizer: we get a normalized URL in return. this.urlRegister = this.urlTokenizer.setURL(requestURL); this.filterRegister = null; // These registers will be used by various filters pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(requestURL); // Exception filters if ( this.realmMatchStringExactType(AllowAction, typeBits, FirstParty) ) { // Important block filters. if ( this.realmMatchStringExactType(BlockImportant, typeBits, FirstParty) ) { return 1; } return 2; } return 0; }; /******************************************************************************/ // https://github.com/chrisaljoudi/uBlock/issues/116 // Some type of requests are exceptional, they need custom handling, // not the generic handling. FilterContainer.prototype.matchStringExactType = function(fctxt, requestType) { const typeBits = typeNameToTypeValue[requestType]; if ( typeBits === undefined ) { return 0; } const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; // Prime tokenizer: we get a normalized URL in return. this.urlRegister = this.urlTokenizer.setURL(fctxt.url); this.filterRegister = null; // These registers will be used by various filters pageHostnameRegister = fctxt.getDocHostname(); requestHostnameRegister = fctxt.getHostname(); // Important block filters. if ( this.realmMatchStringExactType(BlockImportant, typeBits, partyBits) ) { return 1; } // Block filters if ( this.realmMatchStringExactType(BlockAction, typeBits, partyBits) ) { // Exception filters if ( this.realmMatchStringExactType(AllowAction, typeBits, partyBits) ) { return 2; } return 1; } return 0; }; /******************************************************************************/ FilterContainer.prototype.matchString = function(fctxt) { // https://github.com/chrisaljoudi/uBlock/issues/519 // Use exact type match for anything beyond `other` // Also, be prepared to support unknown types let typeBits = typeNameToTypeValue[fctxt.type]; if ( typeBits === undefined ) { typeBits = otherTypeBitValue; } else if ( typeBits === 0 || typeBits > otherTypeBitValue ) { return this.matchStringExactType(fctxt, fctxt.type); } const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; // Prime tokenizer: we get a normalized URL in return. this.urlRegister = this.urlTokenizer.setURL(fctxt.url); this.filterRegister = null; // These registers will be used by various filters pageHostnameRegister = fctxt.getDocHostname(); requestHostnameRegister = fctxt.getHostname(); // Important block filters. if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) { return 1; } // Block filters if ( this.realmMatchString(BlockAction, typeBits, partyBits) ) { // Exception filters if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) { return 2; } return 1; } return 0; }; /******************************************************************************/ FilterContainer.prototype.toLogData = function() { if ( this.filterRegister === null ) { return; } const logData = toLogDataInternal( this.catbitsRegister, this.tokenRegister, this.filterRegister ); logData.source = 'static'; logData.tokenHash = this.tokenRegister; logData.result = this.filterRegister === null ? 0 : (this.catbitsRegister & 1 ? 2 : 1); return logData; }; /******************************************************************************/ FilterContainer.prototype.getFilterCount = function() { return this.acceptedCount - this.discardedCount; }; /******************************************************************************/ // action: 1=test, 2=record FilterContainer.prototype.benchmark = function(action) { µb.loadBenchmarkDataset().then(requests => { if ( Array.isArray(requests) === false || requests.length === 0 ) { console.info('No requests found to benchmark'); return; } console.info(`Benchmarking staticNetFilteringEngine.matchString()...`); const fctxt = µb.filteringContext.duplicate(); let expected, recorded; if ( action === 1 ) { try { expected = JSON.parse( vAPI.localStorage.getItem('FilterContainer.benchmark.results') ); } catch(ex) { } } if ( action === 2 ) { recorded = []; } const t0 = self.performance.now(); for ( let i = 0; i < requests.length; i++ ) { const request = requests[i]; fctxt.setURL(request.url); fctxt.setDocOriginFromURL(request.frameUrl); fctxt.setType(request.cpt); const r = this.matchString(fctxt); if ( recorded !== undefined ) { recorded.push(r); } if ( expected !== undefined && r !== expected[i] ) { console.log('Mismatch with reference results:'); console.log(`\tExpected ${expected[i]}, got ${r}:`); console.log(`\ttype=${fctxt.type}`); console.log(`\turl=${fctxt.url}`); console.log(`\tdocOrigin=${fctxt.getDocOrigin()}`); } } const t1 = self.performance.now(); const dur = t1 - t0; console.info(`Evaluated ${requests.length} requests in ${dur.toFixed(0)} ms`); console.info(`\tAverage: ${(dur / requests.length).toFixed(3)} ms per request`); if ( expected !== undefined ) { console.info(`\tBlocked: ${expected.reduce((n,r)=>{return r===1?n+1:n;},0)}`); console.info(`\tExcepted: ${expected.reduce((n,r)=>{return r===2?n+1:n;},0)}`); } if ( recorded !== undefined ) { vAPI.localStorage.setItem( 'FilterContainer.benchmark.results', JSON.stringify(recorded) ); } }); return 'ok'; }; /******************************************************************************- With default filter lists: As of 2019-04-18: {bits: "0", token: "ad", size: 926, f: FilterBucket} {bits: "0", token: "ads", size: 636, f: FilterBucket} {bits: "41", token: "phncdn", size: 253, f: FilterBucket} {bits: "0", token: "analytic", size: 174, f: FilterBucket} {bits: "0", token: "tracking", size: 155, f: FilterBucket} {bits: "48", token: "http", size: 146, f: FilterBucket} {bits: "48", token: "https", size: 139, f: FilterBucket} {bits: "58", token: "http", size: 122, f: FilterBucket} {bits: "0", token: "adv", size: 121, f: FilterBucket} {bits: "58", token: "https", size: 118, f: FilterBucket} {bits: "0", token: "advertis", size: 102, f: FilterBucket} {bits: "8", token: "doublecl", size: 96, f: FilterBucket} {bits: "41", token: "imasdk", size: 90, f: FilterBucket} {bits: "0", token: "cdn", size: 89, f: FilterBucket} {bits: "0", token: "track", size: 87, f: FilterBucket} {bits: "0", token: "stats", size: 82, f: FilterBucket} {bits: "0", token: "banner", size: 74, f: FilterBucket} {bits: "0", token: "log", size: 72, f: FilterBucket} {bits: "0", token: "ga", size: 71, f: FilterBucket} {bits: "0", token: "gif", size: 67, f: FilterBucket} {bits: "0", token: "cloudfro", size: 64, f: FilterBucket} {bits: "0", token: "amazonaw", size: 61, f: FilterBucket} {bits: "41", token: "ajax", size: 58, f: FilterBucket} {bits: "0", token: "tracker", size: 56, f: FilterBucket} {bits: "40", token: "pagead2", size: 53, f: FilterBucket} {bits: "0", token: "affiliat", size: 53, f: FilterBucket} */ FilterContainer.prototype.bucketHistogram = function() { const results = []; for ( const [ bits, category ] of this.categories ) { for ( const [ th, f ] of category ) { if ( f instanceof FilterPair ) { const token = µBlock.urlTokenizer.stringFromTokenHash(th); results.push({ bits: bits.toString(16), token, size: f.size, f }); continue; } if ( f instanceof FilterBucket ) { const token = µBlock.urlTokenizer.stringFromTokenHash(th); results.push({ bits: bits.toString(16), token, size: f.size, f }); continue; } if ( f instanceof FilterHostnameDict ) { const token = µBlock.urlTokenizer.stringFromTokenHash(th); results.push({ bits: bits.toString(16), token, size: f.size, f }); continue; } if ( f instanceof FilterJustOrigin ) { const token = µBlock.urlTokenizer.stringFromTokenHash(th); results.push({ bits: bits.toString(16), token, size: f.size, f }); continue; } } } results.sort((a, b) => { return b.size - a.size; }); console.log(results); }; /******************************************************************************* With default filter lists: As of 2019-04-13: {"FilterPlainHnAnchored" => 12619} {"FilterPlainPrefix1" => 8743} {"FilterGenericHnAnchored" => 5231} {"FilterOriginHit" => 4149} {"FilterPair" => 2381} {"FilterBucket" => 1940} {"FilterPlainHostname" => 1612} {"FilterOriginHitSet" => 1430} {"FilterPlainLeftAnchored" => 799} {"FilterGeneric" => 588} {"FilterPlain" => 510} {"FilterOriginMiss" => 299} {"FilterDataHolder" => 280} {"FilterOriginMissSet" => 150} {"FilterTrue" => 130} {"FilterRegex" => 124} {"FilterPlainRightAnchored" => 110} {"FilterGenericHnAndRightAnchored" => 95} {"FilterHostnameDict" => 59} {"FilterPlainPrefix0" => 29} {"FilterExactMatch" => 5} {"FilterOriginMixedSet" => 3} Observations: - No need for FilterPlainPrefix0. - FilterPlainHnAnchored and FilterPlainPrefix1 are good candidates for storing in a plain string trie. As of 2019-04-25: {"FilterPlainHnAnchored" => 11078} {"FilterPlainPrefix1" => 7195} {"FilterPrefix1Trie" => 5720} {"FilterOriginHit" => 3561} {"FilterWildcard2HnAnchored" => 2943} {"FilterPair" => 2391} {"FilterBucket" => 1922} {"FilterWildcard1HnAnchored" => 1910} {"FilterHnAnchoredTrie" => 1586} {"FilterPlainHostname" => 1391} {"FilterOriginHitSet" => 1155} {"FilterPlain" => 634} {"FilterWildcard1" => 423} {"FilterGenericHnAnchored" => 389} {"FilterOriginMiss" => 302} {"FilterGeneric" => 163} {"FilterOriginMissSet" => 150} {"FilterRegex" => 124} {"FilterPlainRightAnchored" => 110} {"FilterGenericHnAndRightAnchored" => 95} {"FilterHostnameDict" => 59} {"FilterPlainLeftAnchored" => 30} {"FilterJustOrigin" => 22} {"FilterHTTPJustOrigin" => 19} {"FilterHTTPSJustOrigin" => 18} {"FilterExactMatch" => 5} {"FilterOriginMixedSet" => 3} */ FilterContainer.prototype.filterClassHistogram = function() { const filterClassDetails = new Map(); for ( let i = 0; i < filterClasses.length; i++ ) { filterClassDetails.set(i, { name: filterClasses[i].name, count: 0, }); } // Artificial classes to report content of tries filterClassDetails.set(1000, { name: 'FilterPrefix1Trie', count: 0, }); filterClassDetails.set(1001, { name: 'FilterHnAnchoredTrie', count: 0, }); const countFilter = function(f) { if ( f instanceof Object === false ) { return; } filterClassDetails.get(f.fid).count += 1; if ( f.wrapped ) { countFilter(f.wrapped); } }; for ( const category of this.categories.values() ) { for ( const f of category.values() ) { countFilter(f); if ( f instanceof FilterBucket ) { for ( const g of f.filters ) { countFilter(g); } if ( f.plainPrefix1Trie !== null ) { filterClassDetails.get(1000).count += f.plainPrefix1Trie.size; } if ( f.plainHnAnchoredTrie !== null ) { filterClassDetails.get(1001).count += f.plainHnAnchoredTrie.size; } continue; } if ( f instanceof FilterPair ) { countFilter(f.f1); countFilter(f.f2); continue; } } } const results = Array.from(filterClassDetails.values()).sort((a, b) => { return b.count - a.count; }); console.log(results); }; /******************************************************************************/ return new FilterContainer(); /******************************************************************************/ })();