2014-06-24 00:42:43 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
|
2016-06-27 03:15:18 +02:00
|
|
|
uBlock Origin - a browser extension to block requests.
|
2018-07-22 16:47:02 +02:00
|
|
|
Copyright (C) 2014-present Raymond Hill
|
2014-06-24 00:42:43 +02:00
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see {http://www.gnu.org/licenses/}.
|
|
|
|
|
|
|
|
Home: https://github.com/gorhill/uBlock
|
|
|
|
*/
|
|
|
|
|
2015-12-29 17:34:41 +01:00
|
|
|
/* jshint bitwise: false */
|
2019-06-19 16:00:19 +02:00
|
|
|
/* global punycode */
|
2014-10-19 17:10:31 +02:00
|
|
|
|
2016-06-27 03:15:18 +02:00
|
|
|
'use strict';
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-05-22 23:51:03 +02:00
|
|
|
µBlock.staticNetFilteringEngine = (( ) => {
|
2014-06-24 00:42:43 +02:00
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const µb = µBlock;
|
2014-09-14 22:20:40 +02:00
|
|
|
|
2014-07-14 17:24:59 +02:00
|
|
|
// fedcba9876543210
|
2018-10-23 19:01:08 +02:00
|
|
|
// | | |||
|
|
|
|
// | | |||
|
|
|
|
// | | |||
|
|
|
|
// | | |||
|
|
|
|
// | | ||+---- bit 0: [BlockAction | AllowAction]
|
|
|
|
// | | |+----- bit 1: `important`
|
|
|
|
// | | +------ bit 2- 3: party [0 - 3]
|
|
|
|
// | +-------- bit 4- 8: type [0 - 31]
|
|
|
|
// +------------- bit 9-15: unused
|
2014-09-20 16:44:04 +02:00
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const BlockAction = 0 << 0;
|
|
|
|
const AllowAction = 1 << 0;
|
|
|
|
const Important = 1 << 1;
|
|
|
|
const AnyParty = 0 << 2;
|
|
|
|
const FirstParty = 1 << 2;
|
|
|
|
const ThirdParty = 2 << 2;
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const typeNameToTypeValue = {
|
2016-08-31 11:19:16 +02:00
|
|
|
'no_type': 0 << 4,
|
2015-03-26 00:28:22 +01:00
|
|
|
'stylesheet': 1 << 4,
|
|
|
|
'image': 2 << 4,
|
|
|
|
'object': 3 << 4,
|
2017-11-03 21:51:28 +01:00
|
|
|
'object_subrequest': 3 << 4,
|
2015-03-26 00:28:22 +01:00
|
|
|
'script': 4 << 4,
|
2019-02-15 22:18:03 +01:00
|
|
|
'fetch': 5 << 4,
|
2015-03-26 00:28:22 +01:00
|
|
|
'xmlhttprequest': 5 << 4,
|
|
|
|
'sub_frame': 6 << 4,
|
2015-04-05 16:38:47 +02:00
|
|
|
'font': 7 << 4,
|
2016-03-07 01:16:46 +01:00
|
|
|
'media': 8 << 4,
|
|
|
|
'websocket': 9 << 4,
|
|
|
|
'other': 10 << 4,
|
2017-05-12 16:35:11 +02:00
|
|
|
'popup': 11 << 4, // start of behavorial filtering
|
|
|
|
'popunder': 12 << 4,
|
|
|
|
'main_frame': 13 << 4, // start of 1st-party-only behavorial filtering
|
|
|
|
'generichide': 14 << 4,
|
2017-09-16 13:49:43 +02:00
|
|
|
'inline-font': 15 << 4,
|
|
|
|
'inline-script': 16 << 4,
|
|
|
|
'data': 17 << 4, // special: a generic data holder
|
|
|
|
'redirect': 18 << 4,
|
|
|
|
'webrtc': 19 << 4,
|
|
|
|
'unsupported': 20 << 4
|
2014-09-21 02:06:55 +02:00
|
|
|
};
|
2019-05-20 19:46:36 +02:00
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const otherTypeBitValue = typeNameToTypeValue.other;
|
2014-09-21 02:06:55 +02:00
|
|
|
|
2019-05-20 19:46:36 +02:00
|
|
|
// All network request types to bitmap
|
|
|
|
// bring origin to 0 (from 4 -- see typeNameToTypeValue)
|
|
|
|
// left-shift 1 by the above-calculated value
|
|
|
|
// subtract 1 to set all type bits
|
|
|
|
const allNetworkTypesBits =
|
|
|
|
(1 << (otherTypeBitValue >>> 4)) - 1;
|
|
|
|
|
|
|
|
const allTypesBits =
|
|
|
|
allNetworkTypesBits |
|
|
|
|
1 << (typeNameToTypeValue['popup'] >>> 4) - 1 |
|
|
|
|
1 << (typeNameToTypeValue['main_frame'] >>> 4) - 1 |
|
|
|
|
1 << (typeNameToTypeValue['inline-font'] >>> 4) - 1 |
|
|
|
|
1 << (typeNameToTypeValue['inline-script'] >>> 4) - 1;
|
|
|
|
|
|
|
|
const unsupportedTypeBit =
|
|
|
|
1 << (typeNameToTypeValue['unsupported'] >>> 4) - 1;
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const typeValueToTypeName = {
|
2015-06-09 16:27:08 +02:00
|
|
|
1: 'stylesheet',
|
|
|
|
2: 'image',
|
|
|
|
3: 'object',
|
|
|
|
4: 'script',
|
|
|
|
5: 'xmlhttprequest',
|
2015-07-13 14:49:58 +02:00
|
|
|
6: 'subdocument',
|
2015-06-09 16:27:08 +02:00
|
|
|
7: 'font',
|
2016-03-07 01:16:46 +01:00
|
|
|
8: 'media',
|
|
|
|
9: 'websocket',
|
|
|
|
10: 'other',
|
2017-05-12 16:35:11 +02:00
|
|
|
11: 'popup',
|
|
|
|
12: 'popunder',
|
|
|
|
13: 'document',
|
|
|
|
14: 'generichide',
|
2017-09-16 13:49:43 +02:00
|
|
|
15: 'inline-font',
|
|
|
|
16: 'inline-script',
|
|
|
|
17: 'data',
|
|
|
|
18: 'redirect',
|
|
|
|
19: 'webrtc',
|
|
|
|
20: 'unsupported'
|
2015-06-09 16:27:08 +02:00
|
|
|
};
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const BlockImportant = BlockAction | Important;
|
2016-11-08 13:13:26 +01:00
|
|
|
|
2019-04-25 23:48:08 +02:00
|
|
|
const reIsWildcarded = /[\^\*]/;
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
// ABP filters: https://adblockplus.org/en/filters
|
|
|
|
// regex tester: http://regex101.com/
|
|
|
|
|
|
|
|
/******************************************************************************/
|
2014-09-21 20:03:41 +02:00
|
|
|
|
2015-02-05 00:06:31 +01:00
|
|
|
// See the following as short-lived registers, used during evaluation. They are
|
|
|
|
// valid until the next evaluation.
|
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
let urlRegister = '';
|
2019-04-25 23:48:08 +02:00
|
|
|
let pageHostnameRegister = '';
|
|
|
|
let requestHostnameRegister = '';
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
// First character of match must be within the hostname part of the url.
|
|
|
|
//
|
|
|
|
// https://github.com/gorhill/uBlock/issues/1929
|
|
|
|
// Match only hostname label boundaries.
|
|
|
|
|
|
|
|
const isHnAnchored = (( ) => {
|
|
|
|
let lastLen = 0, lastBeg = -1, lastEnd = -1;
|
|
|
|
|
|
|
|
return (url, matchStart) => {
|
|
|
|
const len = requestHostnameRegister.length;
|
|
|
|
if ( len !== lastLen || url.endsWith('://', lastBeg) === false ) {
|
|
|
|
lastBeg = len !== 0 ? url.indexOf('://') : -1;
|
|
|
|
if ( lastBeg !== -1 ) {
|
|
|
|
lastBeg += 3;
|
|
|
|
lastEnd = lastBeg + len;
|
|
|
|
} else {
|
|
|
|
lastEnd = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return matchStart < lastEnd && (
|
|
|
|
matchStart === lastBeg ||
|
|
|
|
matchStart > lastBeg &&
|
|
|
|
url.charCodeAt(matchStart - 1) === 0x2E /* '.' */
|
|
|
|
);
|
|
|
|
};
|
|
|
|
})();
|
|
|
|
|
|
|
|
/******************************************************************************/
|
2015-02-05 00:06:31 +01:00
|
|
|
|
2015-06-09 23:01:31 +02:00
|
|
|
// Local helpers
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const normalizeRegexSource = function(s) {
|
2015-10-26 16:23:56 +01:00
|
|
|
try {
|
2018-11-08 12:01:41 +01:00
|
|
|
const re = new RegExp(s);
|
2016-01-17 02:21:17 +01:00
|
|
|
return re.source;
|
2015-10-26 16:23:56 +01:00
|
|
|
} catch (ex) {
|
2016-01-17 02:21:17 +01:00
|
|
|
normalizeRegexSource.message = ex.toString();
|
2015-10-26 16:23:56 +01:00
|
|
|
}
|
2016-01-17 02:21:17 +01:00
|
|
|
return '';
|
2015-10-26 16:23:56 +01:00
|
|
|
};
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const rawToRegexStr = function(s, anchor) {
|
2015-07-04 23:34:18 +02:00
|
|
|
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
|
2015-03-17 14:39:03 +01:00
|
|
|
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
|
2016-07-01 21:15:58 +02:00
|
|
|
// Also: remove leading/trailing wildcards -- there is no point.
|
2018-11-08 12:01:41 +01:00
|
|
|
let reStr = s.replace(rawToRegexStr.escape1, '\\$&')
|
|
|
|
.replace(rawToRegexStr.escape2, '(?:[^%.0-9a-z_-]|$)')
|
|
|
|
.replace(rawToRegexStr.escape3, '')
|
|
|
|
.replace(rawToRegexStr.escape4, '[^ ]*?');
|
|
|
|
if ( anchor & 0b100 ) {
|
2017-08-31 20:17:55 +02:00
|
|
|
reStr = (
|
2018-11-08 12:01:41 +01:00
|
|
|
reStr.startsWith('\\.') ?
|
|
|
|
rawToRegexStr.reTextHostnameAnchor2 :
|
|
|
|
rawToRegexStr.reTextHostnameAnchor1
|
|
|
|
) + reStr;
|
|
|
|
} else if ( anchor & 0b010 ) {
|
2015-03-05 01:36:09 +01:00
|
|
|
reStr = '^' + reStr;
|
|
|
|
}
|
2018-11-08 12:01:41 +01:00
|
|
|
if ( anchor & 0b001 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
reStr += '$';
|
2016-08-23 16:33:28 +02:00
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
return reStr;
|
2015-03-02 16:41:51 +01:00
|
|
|
};
|
2017-05-12 16:35:11 +02:00
|
|
|
rawToRegexStr.escape1 = /[.+?${}()|[\]\\]/g;
|
|
|
|
rawToRegexStr.escape2 = /\^/g;
|
|
|
|
rawToRegexStr.escape3 = /^\*|\*$/g;
|
|
|
|
rawToRegexStr.escape4 = /\*/g;
|
2017-08-31 20:17:55 +02:00
|
|
|
rawToRegexStr.reTextHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?';
|
|
|
|
rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2018-11-08 12:01:41 +01:00
|
|
|
// https://github.com/uBlockOrigin/uAssets/issues/4083#issuecomment-436914727
|
|
|
|
const rawToPlainStr = function(s, anchor) {
|
|
|
|
if (
|
|
|
|
anchor === 0 &&
|
|
|
|
s.charCodeAt(0) === 0x2F /* '/' */ &&
|
|
|
|
s.length > 2 &&
|
|
|
|
s.charCodeAt(s.length-1) === 0x2F /* '/' */
|
|
|
|
) {
|
|
|
|
s = s + '*';
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
};
|
|
|
|
|
2018-10-23 19:01:08 +02:00
|
|
|
const filterDataSerialize = µb.CompiledLineIO.serialize;
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const toLogDataInternal = function(categoryBits, tokenHash, filter) {
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( filter === null ) { return undefined; }
|
2019-02-16 18:16:30 +01:00
|
|
|
const logData = filter.logData();
|
2018-10-23 19:01:08 +02:00
|
|
|
logData.compiled = filterDataSerialize([
|
|
|
|
categoryBits,
|
|
|
|
tokenHash,
|
|
|
|
logData.compiled
|
|
|
|
]);
|
2017-05-19 14:45:19 +02:00
|
|
|
if ( categoryBits & 0x001 ) {
|
2019-04-14 22:23:52 +02:00
|
|
|
logData.raw = `@@${logData.raw}`;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2019-04-14 22:23:52 +02:00
|
|
|
const opts = [];
|
2017-05-19 14:45:19 +02:00
|
|
|
if ( categoryBits & 0x002 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
opts.push('important');
|
|
|
|
}
|
2017-05-19 14:45:19 +02:00
|
|
|
if ( categoryBits & 0x008 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
opts.push('third-party');
|
2017-05-19 14:45:19 +02:00
|
|
|
} else if ( categoryBits & 0x004 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
opts.push('first-party');
|
|
|
|
}
|
2019-04-14 22:23:52 +02:00
|
|
|
const type = categoryBits & 0x1F0;
|
2017-09-16 13:49:43 +02:00
|
|
|
if ( type !== 0 && type !== typeNameToTypeValue.data ) {
|
|
|
|
opts.push(typeValueToTypeName[type >>> 4]);
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
|
|
|
if ( logData.opts !== undefined ) {
|
|
|
|
opts.push(logData.opts);
|
|
|
|
}
|
|
|
|
if ( opts.length !== 0 ) {
|
|
|
|
logData.raw += '$' + opts.join(',');
|
|
|
|
}
|
|
|
|
return logData;
|
2015-06-09 23:01:31 +02:00
|
|
|
};
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
/*******************************************************************************
|
2015-08-22 18:15:16 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
Each filter class will register itself in the map. A filter class
|
|
|
|
id MUST always stringify to ONE single character.
|
2015-08-22 18:15:16 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
IMPORTANT: any change which modifies the mapping will have to be
|
|
|
|
reflected with µBlock.systemSettings.compiledMagic.
|
2017-01-06 18:39:37 +01:00
|
|
|
|
2019-04-17 01:20:56 +02:00
|
|
|
*/
|
2015-08-22 18:15:16 +02:00
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const filterClasses = [];
|
|
|
|
let filterClassIdGenerator = 0;
|
2015-08-22 18:15:16 +02:00
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const registerFilterClass = function(ctor) {
|
|
|
|
let fid = filterClassIdGenerator++;
|
2017-05-25 23:46:59 +02:00
|
|
|
ctor.fid = ctor.prototype.fid = fid;
|
|
|
|
filterClasses[fid] = ctor;
|
2015-08-22 18:15:16 +02:00
|
|
|
};
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const filterFromCompiledData = function(args) {
|
2017-05-25 23:46:59 +02:00
|
|
|
return filterClasses[args[0]].load(args);
|
|
|
|
};
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
/******************************************************************************/
|
2017-01-06 18:39:37 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterTrue = class {
|
|
|
|
match() {
|
|
|
|
return true;
|
|
|
|
}
|
2015-08-22 03:52:16 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: '*',
|
|
|
|
regex: '^',
|
|
|
|
compiled: this.compile(),
|
|
|
|
};
|
|
|
|
}
|
2015-08-22 18:15:16 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.fid ];
|
|
|
|
}
|
2015-08-22 18:15:16 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile() {
|
|
|
|
return [ FilterTrue.fid ];
|
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load() {
|
|
|
|
return FilterTrue.instance;
|
|
|
|
}
|
2015-08-22 18:15:16 +02:00
|
|
|
};
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterTrue.instance = new FilterTrue();
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterTrue);
|
2014-06-24 00:42:43 +02:00
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterPlain = class {
|
2019-06-19 01:16:39 +02:00
|
|
|
constructor(s) {
|
2019-04-14 22:23:52 +02:00
|
|
|
this.s = s;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
2019-06-19 01:16:39 +02:00
|
|
|
return url.startsWith(this.s, tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: rawToPlainStr(this.s, 0),
|
|
|
|
regex: rawToRegexStr(this.s, 0),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.fid, this.s, this.tokenBeg ];
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
addToTrie(trie) {
|
|
|
|
trie.add(this.s, this.tokenBeg);
|
|
|
|
}
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterPlain.fid, details.f, details.tokenBeg ];
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( args[2] === 0 ) {
|
|
|
|
return new FilterPlain(args[1]);
|
|
|
|
}
|
|
|
|
if ( args[2] === 1 ) {
|
|
|
|
return new FilterPlain1(args[1]);
|
|
|
|
}
|
|
|
|
return new FilterPlainX(args[1], args[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static addToTrie(args, trie) {
|
|
|
|
trie.add(args[1], args[2]);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
};
|
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
FilterPlain.trieableId = 0;
|
|
|
|
FilterPlain.prototype.trieableId = FilterPlain.trieableId;
|
|
|
|
FilterPlain.prototype.tokenBeg = 0;
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterPlain);
|
2014-09-08 23:46:58 +02:00
|
|
|
|
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
const FilterPlain1 = class extends FilterPlain {
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
return url.startsWith(this.s, tokenBeg - 1);
|
|
|
|
}
|
2019-06-19 01:16:39 +02:00
|
|
|
};
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
FilterPlain1.prototype.tokenBeg = 1;
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
const FilterPlainX = class extends FilterPlain {
|
|
|
|
constructor(s, tokenBeg) {
|
|
|
|
super(s);
|
|
|
|
this.tokenBeg = tokenBeg;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
return url.startsWith(this.s, tokenBeg - this.tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
};
|
|
|
|
|
2014-08-28 15:59:05 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterPlainHostname = class {
|
|
|
|
constructor(s) {
|
|
|
|
this.s = s;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match() {
|
|
|
|
const haystack = requestHostnameRegister;
|
|
|
|
const needle = this.s;
|
|
|
|
if ( haystack.endsWith(needle) === false ) { return false; }
|
|
|
|
const offset = haystack.length - needle.length;
|
|
|
|
return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `||${this.s}^`,
|
|
|
|
regex: rawToRegexStr(`${this.s}^`, 0),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
2014-08-28 15:59:05 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.fid, this.s ];
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterPlainHostname.fid, details.f ];
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterPlainHostname(args[1]);
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
};
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterPlainHostname);
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterPlainLeftAnchored = class {
|
|
|
|
constructor(s) {
|
|
|
|
this.s = s;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url) {
|
|
|
|
return url.startsWith(this.s);
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `|${this.s}`,
|
|
|
|
regex: rawToRegexStr(this.s, 0b010),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
2014-08-28 15:59:05 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.fid, this.s ];
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterPlainLeftAnchored.fid, details.f ];
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterPlainLeftAnchored(args[1]);
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
};
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterPlainLeftAnchored);
|
|
|
|
|
2014-08-28 15:59:05 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterPlainRightAnchored = class {
|
|
|
|
constructor(s) {
|
|
|
|
this.s = s;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url) {
|
|
|
|
return url.endsWith(this.s);
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `${this.s}|`,
|
|
|
|
regex: rawToRegexStr(this.s, 0b001),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
2014-08-28 15:59:05 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.fid, this.s ];
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterPlainRightAnchored.fid, details.f ];
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterPlainRightAnchored(args[1]);
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
};
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterPlainRightAnchored);
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterExactMatch = class {
|
|
|
|
constructor(s) {
|
|
|
|
this.s = s;
|
|
|
|
}
|
2017-10-09 15:28:28 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url) {
|
|
|
|
return url === this.s;
|
|
|
|
}
|
2017-10-09 15:28:28 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `|${this.s}|`,
|
|
|
|
regex: rawToRegexStr(this.s, 0b011),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
2017-10-09 15:28:28 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.fid, this.s ];
|
|
|
|
}
|
2017-10-09 15:28:28 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterExactMatch.fid, details.f ];
|
|
|
|
}
|
2017-10-09 15:28:28 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterExactMatch(args[1]);
|
|
|
|
}
|
2017-10-09 15:28:28 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterExactMatch);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterPlainHnAnchored = class {
|
|
|
|
constructor(s) {
|
|
|
|
this.s = s;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
return url.startsWith(this.s, tokenBeg) &&
|
|
|
|
isHnAnchored(url, tokenBeg);
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `||${this.s}`,
|
2019-06-19 01:16:39 +02:00
|
|
|
regex: rawToRegexStr(this.s, this.tokenBeg),
|
2019-04-14 22:23:52 +02:00
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
2014-08-28 15:59:05 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
2019-06-19 01:16:39 +02:00
|
|
|
return [ this.fid, this.s, this.tokenBeg ];
|
|
|
|
}
|
|
|
|
|
|
|
|
addToTrie(trie) {
|
|
|
|
trie.add(this.s, this.tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
2019-06-19 01:16:39 +02:00
|
|
|
return [ FilterPlainHnAnchored.fid, details.f, details.tokenBeg ];
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( args[2] === 0 ) {
|
|
|
|
return new FilterPlainHnAnchored(args[1]);
|
|
|
|
}
|
|
|
|
return new FilterPlainHnAnchoredX(args[1], args[2]);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
static addToTrie(args, trie) {
|
|
|
|
trie.add(args[1], args[2]);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
};
|
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
FilterPlainHnAnchored.trieableId = 1;
|
|
|
|
FilterPlainHnAnchored.prototype.trieableId = FilterPlainHnAnchored.trieableId;
|
|
|
|
FilterPlainHnAnchored.prototype.tokenBeg = 0;
|
2019-04-14 22:23:52 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterPlainHnAnchored);
|
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
|
|
|
|
const FilterPlainHnAnchoredX = class extends FilterPlainHnAnchored {
|
|
|
|
constructor(s, tokenBeg) {
|
|
|
|
super(s);
|
|
|
|
this.tokenBeg = tokenBeg;
|
|
|
|
}
|
|
|
|
|
|
|
|
match(url, tokenBeg) {
|
|
|
|
const beg = tokenBeg - this.tokenBeg;
|
|
|
|
return url.startsWith(this.s, beg) && isHnAnchored(url, beg);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-04-25 23:48:08 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
Filters with only one single occurrence of wildcard `*`
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
const FilterWildcard1 = class {
|
2019-04-26 17:16:47 +02:00
|
|
|
constructor(s0, s1, tokenBeg) {
|
2019-04-25 23:48:08 +02:00
|
|
|
this.s0 = s0;
|
|
|
|
this.s1 = s1;
|
2019-04-26 17:16:47 +02:00
|
|
|
this.tokenBeg = tokenBeg;
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
2019-04-26 17:16:47 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
if ( this.tokenBeg >= 0 ) {
|
|
|
|
const s0Beg = tokenBeg - this.tokenBeg;
|
|
|
|
return s0Beg >= 0 &&
|
|
|
|
url.startsWith(this.s0, s0Beg) &&
|
|
|
|
url.indexOf(this.s1, s0Beg + this.s0.length) !== -1;
|
|
|
|
}
|
|
|
|
const s1Beg = tokenBeg + this.tokenBeg;
|
|
|
|
return s1Beg > 0 &&
|
|
|
|
url.startsWith(this.s1, s1Beg) &&
|
|
|
|
url.lastIndexOf(this.s0, s1Beg) !== -1;
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `${this.s0}*${this.s1}`,
|
|
|
|
regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
compile() {
|
2019-04-26 17:16:47 +02:00
|
|
|
return [ this.fid, this.s0, this.s1, this.tokenBeg ];
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static compile(details) {
|
2019-05-11 23:16:00 +02:00
|
|
|
if ( details.token === '*' ) { return; }
|
2019-04-25 23:48:08 +02:00
|
|
|
if ( details.anchor !== 0 ) { return; }
|
|
|
|
const s = details.f;
|
|
|
|
let pos = s.indexOf('*');
|
|
|
|
if ( pos === -1 ) { return; }
|
|
|
|
if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; }
|
|
|
|
if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; }
|
2019-04-26 17:16:47 +02:00
|
|
|
return [
|
|
|
|
FilterWildcard1.fid,
|
|
|
|
s.slice(0, pos),
|
|
|
|
s.slice(pos + 1),
|
|
|
|
details.tokenBeg < pos
|
|
|
|
? details.tokenBeg
|
|
|
|
: pos + 1 - details.tokenBeg,
|
|
|
|
];
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
2019-04-26 17:16:47 +02:00
|
|
|
return new FilterWildcard1(args[1], args[2], args[3]);
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterWildcard1);
|
|
|
|
|
2014-08-28 15:59:05 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterGeneric = class {
|
|
|
|
constructor(s, anchor) {
|
|
|
|
this.s = s;
|
|
|
|
this.anchor = anchor;
|
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url) {
|
|
|
|
if ( this.re === null ) {
|
|
|
|
this.re = new RegExp(rawToRegexStr(this.s, this.anchor));
|
|
|
|
}
|
|
|
|
return this.re.test(url);
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
const out = {
|
|
|
|
raw: rawToPlainStr(this.s, this.anchor),
|
|
|
|
regex: this.re.source,
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
if ( this.anchor & 0x2 ) {
|
|
|
|
out.raw = `|${out.raw}`;
|
|
|
|
}
|
|
|
|
if ( this.anchor & 0x1 ) {
|
|
|
|
out.raw += '|';
|
|
|
|
}
|
|
|
|
return out;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2019-04-14 22:23:52 +02:00
|
|
|
|
|
|
|
compile() {
|
|
|
|
return [ this.fid, this.s, this.anchor ];
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2014-08-28 15:59:05 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
2019-04-25 23:48:08 +02:00
|
|
|
const compiled = FilterWildcard1.compile(details);
|
|
|
|
if ( compiled !== undefined ) { return compiled; }
|
2019-04-14 22:23:52 +02:00
|
|
|
return [ FilterGeneric.fid, details.f, details.anchor ];
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterGeneric(args[1], args[2]);
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
};
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterGeneric.prototype.re = null;
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterGeneric);
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-25 23:48:08 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
Hostname-anchored filters with only one occurrence of wildcard `*`
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
const FilterWildcard1HnAnchored = class {
|
2019-04-26 17:16:47 +02:00
|
|
|
constructor(s0, s1, tokenBeg) {
|
2019-04-25 23:48:08 +02:00
|
|
|
this.s0 = s0;
|
|
|
|
this.s1 = s1;
|
2019-04-26 17:16:47 +02:00
|
|
|
this.tokenBeg = tokenBeg;
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
2019-04-26 17:16:47 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
if ( this.tokenBeg >= 0 ) {
|
|
|
|
const s0Beg = tokenBeg - this.tokenBeg;
|
|
|
|
return s0Beg >= 0 &&
|
|
|
|
url.startsWith(this.s0, s0Beg) &&
|
|
|
|
isHnAnchored(url, s0Beg) &&
|
|
|
|
url.indexOf(this.s1, s0Beg + this.s0.length) !== -1;
|
|
|
|
}
|
|
|
|
const s1Beg = tokenBeg + this.tokenBeg;
|
|
|
|
if ( s1Beg < 0 || url.startsWith(this.s1, s1Beg) === false ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
const s0Beg = url.lastIndexOf(this.s0, s1Beg);
|
|
|
|
return s0Beg !== -1 && isHnAnchored(url, s0Beg);
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `||${this.s0}*${this.s1}`,
|
|
|
|
regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
compile() {
|
2019-04-26 17:16:47 +02:00
|
|
|
return [ this.fid, this.s0, this.s1, this.tokenBeg ];
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static compile(details) {
|
2019-05-11 23:16:00 +02:00
|
|
|
if ( details.token === '*' ) { return; }
|
2019-04-25 23:48:08 +02:00
|
|
|
if ( (details.anchor & 0x0b001) !== 0 ) { return; }
|
|
|
|
const s = details.f;
|
|
|
|
let pos = s.indexOf('*');
|
|
|
|
if ( pos === -1 ) { return; }
|
|
|
|
if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; }
|
|
|
|
const needSeparator =
|
|
|
|
pos !== 0 && s.charCodeAt(pos - 1) === 0x5E /* '^' */;
|
|
|
|
if ( needSeparator ) { pos -= 1; }
|
|
|
|
if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; }
|
|
|
|
if ( needSeparator ) {
|
|
|
|
return FilterWildcard2HnAnchored.compile(details, pos);
|
|
|
|
}
|
|
|
|
return [
|
|
|
|
FilterWildcard1HnAnchored.fid,
|
|
|
|
s.slice(0, pos),
|
|
|
|
s.slice(pos + 1),
|
2019-04-26 17:16:47 +02:00
|
|
|
details.tokenBeg < pos
|
|
|
|
? details.tokenBeg
|
|
|
|
: pos + 1 - details.tokenBeg,
|
2019-04-25 23:48:08 +02:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
2019-04-26 17:16:47 +02:00
|
|
|
return new FilterWildcard1HnAnchored(args[1], args[2], args[3]);
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterWildcard1HnAnchored);
|
|
|
|
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
Hostname-anchored filters with one occurrence of the wildcard
|
|
|
|
sequence `^*` and no other wildcard-equivalent character
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
const FilterWildcard2HnAnchored = class {
|
2019-04-26 17:16:47 +02:00
|
|
|
constructor(s0, s1, tokenBeg) {
|
2019-04-25 23:48:08 +02:00
|
|
|
this.s0 = s0;
|
|
|
|
this.s1 = s1;
|
2019-04-26 17:16:47 +02:00
|
|
|
this.tokenBeg = tokenBeg;
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
2019-04-26 17:16:47 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
let s0End, s1Beg;
|
|
|
|
if ( this.tokenBeg >= 0 ) {
|
|
|
|
const s0Beg = tokenBeg - this.tokenBeg;
|
|
|
|
if ( s0Beg < 0 || url.startsWith(this.s0, s0Beg) === false ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if ( isHnAnchored(url, s0Beg) === false ) { return false; }
|
|
|
|
s0End = s0Beg + this.s0.length;
|
|
|
|
s1Beg = url.indexOf(this.s1, s0End);
|
|
|
|
if ( s1Beg === -1 ) { return false; }
|
|
|
|
} else {
|
|
|
|
s1Beg = tokenBeg + this.tokenBeg;
|
|
|
|
if ( s1Beg < 0 || url.startsWith(this.s1, s1Beg) === false ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
const s0Beg = url.lastIndexOf(this.s0, s1Beg);
|
|
|
|
if ( s0Beg === -1 || isHnAnchored(url, s0Beg) === false ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
s0End = s0Beg + this.s0.length;
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
2019-04-26 17:16:47 +02:00
|
|
|
return this.reSeparators.test(url.slice(s0End, s1Beg));
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `||${this.s0}^*${this.s1}`,
|
|
|
|
regex: rawToRegexStr(`${this.s0}^*${this.s1}`, 0),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
compile() {
|
2019-04-26 17:16:47 +02:00
|
|
|
return [ this.fid, this.s0, this.s1, this.tokenBeg ];
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static compile(details, pos) {
|
|
|
|
return [
|
|
|
|
FilterWildcard2HnAnchored.fid,
|
|
|
|
details.f.slice(0, pos),
|
|
|
|
details.f.slice(pos + 2),
|
2019-04-26 17:16:47 +02:00
|
|
|
details.tokenBeg < pos
|
|
|
|
? details.tokenBeg
|
|
|
|
: pos + 2 - details.tokenBeg,
|
2019-04-25 23:48:08 +02:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
2019-04-26 17:16:47 +02:00
|
|
|
return new FilterWildcard2HnAnchored(args[1], args[2], args[3]);
|
2019-04-25 23:48:08 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-04-26 23:14:00 +02:00
|
|
|
FilterWildcard2HnAnchored.prototype.reSeparators = /[^\w%.-]/;
|
2019-04-25 23:48:08 +02:00
|
|
|
|
|
|
|
registerFilterClass(FilterWildcard2HnAnchored);
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
/******************************************************************************/
|
2014-09-19 16:59:44 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterGenericHnAnchored = class {
|
|
|
|
constructor(s) {
|
|
|
|
this.s = s;
|
|
|
|
}
|
2014-09-19 16:59:44 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url) {
|
|
|
|
if ( this.re === null ) {
|
|
|
|
this.re = new RegExp(rawToRegexStr(this.s, this.anchor));
|
|
|
|
}
|
|
|
|
return this.re.test(url);
|
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: `||${this.s}`,
|
|
|
|
regex: rawToRegexStr(this.s, this.anchor & 0b001),
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2014-09-19 16:59:44 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.fid, this.s ];
|
|
|
|
}
|
2014-09-19 16:59:44 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
2019-04-25 23:48:08 +02:00
|
|
|
const compiled = FilterWildcard1HnAnchored.compile(details);
|
|
|
|
if ( compiled !== undefined ) { return compiled; }
|
2019-04-14 22:23:52 +02:00
|
|
|
return [ FilterGenericHnAnchored.fid, details.f ];
|
|
|
|
}
|
2014-09-19 16:59:44 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterGenericHnAnchored(args[1]);
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
};
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterGenericHnAnchored.prototype.re = null;
|
|
|
|
FilterGenericHnAnchored.prototype.anchor = 0x4;
|
2014-09-19 16:59:44 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterGenericHnAnchored);
|
2014-09-19 16:59:44 +02:00
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterGenericHnAndRightAnchored = class extends FilterGenericHnAnchored {
|
|
|
|
logData() {
|
|
|
|
const out = super.logData();
|
|
|
|
out.raw += '|';
|
|
|
|
return out;
|
|
|
|
}
|
2015-04-27 21:09:19 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterGenericHnAndRightAnchored.fid, details.f ];
|
2017-05-25 23:46:59 +02:00
|
|
|
}
|
2015-04-27 21:09:19 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterGenericHnAndRightAnchored(args[1]);
|
|
|
|
}
|
2015-04-27 21:09:19 +02:00
|
|
|
};
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterGenericHnAndRightAnchored.prototype.anchor = 0x5;
|
2015-04-27 21:09:19 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterGenericHnAndRightAnchored);
|
2015-04-27 21:09:19 +02:00
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterRegex = class {
|
|
|
|
constructor(s) {
|
|
|
|
this.re = s;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url) {
|
|
|
|
if ( typeof this.re === 'string' ) {
|
|
|
|
this.re = new RegExp(this.re, 'i');
|
|
|
|
}
|
|
|
|
return this.re.test(url);
|
2017-07-11 18:21:08 +02:00
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
const s = typeof this.re === 'string' ? this.re : this.re.source;
|
|
|
|
return {
|
|
|
|
raw: `/${s}/`,
|
|
|
|
regex: s,
|
|
|
|
compiled: this.compile()
|
|
|
|
};
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [
|
|
|
|
this.fid,
|
|
|
|
typeof this.re === 'string' ? this.re : this.re.source
|
|
|
|
];
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterRegex.fid, details.f ];
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterRegex(args[1]);
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
};
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterRegex);
|
|
|
|
|
2014-09-08 23:46:58 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
// The optimal "class" is picked according to the content of the
|
|
|
|
// `domain=` filter option.
|
2014-06-24 00:42:43 +02:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const filterOrigin = new (class {
|
|
|
|
constructor() {
|
|
|
|
let trieDetails;
|
|
|
|
try {
|
|
|
|
trieDetails = JSON.parse(
|
|
|
|
vAPI.localStorage.getItem('FilterOrigin.trieDetails')
|
|
|
|
);
|
|
|
|
} catch(ex) {
|
|
|
|
}
|
2019-06-19 16:00:19 +02:00
|
|
|
this.trieContainer = new µBlock.HNTrieContainer(trieDetails);
|
2019-05-17 16:13:58 +02:00
|
|
|
this.strSlots = [];
|
|
|
|
this.strToSlotId = new Map();
|
|
|
|
this.gcTimer = undefined;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
compile(details, wrapped) {
|
2019-02-16 18:16:30 +01:00
|
|
|
const domainOpt = details.domainOpt;
|
|
|
|
// One hostname
|
|
|
|
if ( domainOpt.indexOf('|') === -1 ) {
|
|
|
|
if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) {
|
|
|
|
return FilterOriginMiss.compile(domainOpt, wrapped);
|
|
|
|
}
|
|
|
|
return FilterOriginHit.compile(domainOpt, wrapped);
|
2018-12-04 19:02:09 +01:00
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
// Many hostnames.
|
|
|
|
// Must be in set (none negated).
|
|
|
|
if ( domainOpt.indexOf('~') === -1 ) {
|
|
|
|
return FilterOriginHitSet.compile(domainOpt, wrapped);
|
|
|
|
}
|
|
|
|
// Must not be in set (all negated).
|
|
|
|
const reAllNegated = /^~(?:[^|~]+\|~)+[^|~]+$/;
|
|
|
|
if ( reAllNegated.test(domainOpt) ) {
|
|
|
|
return FilterOriginMissSet.compile(domainOpt, wrapped);
|
|
|
|
}
|
|
|
|
// Must be in one set, but not in the other.
|
|
|
|
return FilterOriginMixedSet.compile(domainOpt, wrapped);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
slotIdFromStr(s) {
|
|
|
|
let slotId = this.strToSlotId.get(s);
|
|
|
|
if ( slotId !== undefined ) { return slotId; }
|
|
|
|
slotId = this.strSlots.push(s) - 1;
|
|
|
|
this.strToSlotId.set(s, slotId);
|
|
|
|
if ( this.gcTimer !== undefined ) { return slotId; }
|
|
|
|
this.gcTimer = self.requestIdleCallback(
|
|
|
|
( ) => {
|
|
|
|
this.gcTimer = undefined;
|
|
|
|
this.strToSlotId.clear();
|
|
|
|
},
|
|
|
|
{ timeout: 5000 }
|
|
|
|
);
|
|
|
|
return slotId;
|
|
|
|
}
|
|
|
|
|
|
|
|
strFromSlotId(slotId) {
|
|
|
|
return this.strSlots[slotId];
|
|
|
|
}
|
|
|
|
|
2019-04-27 13:04:43 +02:00
|
|
|
logData(out, domainOpt) {
|
2019-02-16 18:16:30 +01:00
|
|
|
if ( out.opts !== undefined ) { out.opts += ','; }
|
2019-04-27 13:04:43 +02:00
|
|
|
out.opts = `domain=${domainOpt}`;
|
2019-02-16 18:16:30 +01:00
|
|
|
return out;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
readyToUse() {
|
2019-02-16 18:16:30 +01:00
|
|
|
return this.trieContainer.readyToUse();
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
reset() {
|
2019-05-17 16:13:58 +02:00
|
|
|
this.trieContainer.reset();
|
|
|
|
this.strSlots.length = 0;
|
|
|
|
this.strToSlotId.clear();
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
optimize() {
|
2019-02-16 18:16:30 +01:00
|
|
|
const trieDetails = this.trieContainer.optimize();
|
|
|
|
vAPI.localStorage.setItem(
|
|
|
|
'FilterOrigin.trieDetails',
|
|
|
|
JSON.stringify(trieDetails)
|
|
|
|
);
|
2019-05-17 16:13:58 +02:00
|
|
|
this.strToSlotId.clear();
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
})();
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
/******************************************************************************/
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
// Surprinsingly, first peeking and comparing only the first character using
|
|
|
|
// charCodeAt() does help a bit performance -- 3-6µs gain per request on
|
|
|
|
// average for Chromium 71 and Firefox 65 with default lists.
|
|
|
|
// A likely explanation is that most visits are a miss, and in such case
|
|
|
|
// calling charCodeAt() to bail out earlier is cheaper than calling endsWith().
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterOriginHit = class {
|
|
|
|
constructor(hostname, wrapped) {
|
|
|
|
this.hostname = hostname;
|
|
|
|
this.wrapped = wrapped;
|
|
|
|
}
|
|
|
|
|
|
|
|
match(url, tokenBeg) {
|
2019-02-16 18:16:30 +01:00
|
|
|
const haystack = pageHostnameRegister;
|
|
|
|
const offset = haystack.length - this.hostname.length;
|
|
|
|
if ( offset < 0 ) { return false; }
|
|
|
|
if ( haystack.charCodeAt(offset) !== this.hostname.charCodeAt(0) ) {
|
|
|
|
return false;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
if ( haystack.endsWith(this.hostname) === false ) { return false; }
|
2019-05-17 16:13:58 +02:00
|
|
|
if (
|
|
|
|
offset !== 0 &&
|
|
|
|
haystack.charCodeAt(offset-1) !== 0x2E /* '.' */
|
|
|
|
) {
|
2019-02-16 18:16:30 +01:00
|
|
|
return false;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
return this.wrapped.match(url, tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2019-04-27 13:04:43 +02:00
|
|
|
const out = this.wrapped.logData();
|
|
|
|
out.compiled = [ this.fid, this.hostname, out.compiled ];
|
|
|
|
return filterOrigin.logData(out, this.hostname);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
|
|
|
return [ this.fid, this.hostname, this.wrapped.compile(toSelfie) ];
|
2019-02-16 18:16:30 +01:00
|
|
|
}
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(domainOpt, wrapped) {
|
|
|
|
return [ FilterOriginHit.fid, domainOpt, wrapped ];
|
|
|
|
}
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterOriginHit(
|
|
|
|
args[1],
|
|
|
|
filterFromCompiledData(args[2])
|
|
|
|
);
|
|
|
|
}
|
2015-03-02 16:41:51 +01:00
|
|
|
};
|
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
registerFilterClass(FilterOriginHit);
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
/******************************************************************************/
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterOriginMiss = class {
|
|
|
|
constructor(hostname, wrapped) {
|
|
|
|
this.hostname = hostname;
|
|
|
|
this.wrapped = wrapped;
|
|
|
|
}
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
2019-02-16 18:16:30 +01:00
|
|
|
const haystack = pageHostnameRegister;
|
|
|
|
if ( haystack.endsWith(this.hostname) ) {
|
|
|
|
const offset = haystack.length - this.hostname.length;
|
2019-05-17 16:13:58 +02:00
|
|
|
if (
|
|
|
|
offset === 0 ||
|
|
|
|
haystack.charCodeAt(offset-1) === 0x2E /* '.' */
|
|
|
|
) {
|
2019-02-16 18:16:30 +01:00
|
|
|
return false;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
return this.wrapped.match(url, tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2019-04-27 13:04:43 +02:00
|
|
|
const out = this.wrapped.logData();
|
|
|
|
out.compiled = [ this.fid, this.hostname, out.compiled ];
|
|
|
|
return filterOrigin.logData(out, `~${this.hostname}`);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
|
|
|
return [ this.fid, this.hostname, this.wrapped.compile(toSelfie) ];
|
2019-02-16 18:16:30 +01:00
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(domainOpt, wrapped) {
|
|
|
|
return [ FilterOriginMiss.fid, domainOpt.slice(1), wrapped ];
|
|
|
|
}
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
return new FilterOriginMiss(
|
|
|
|
args[1],
|
|
|
|
filterFromCompiledData(args[2])
|
|
|
|
);
|
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterOriginMiss);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterOriginHitSet = class {
|
2019-05-02 00:54:11 +02:00
|
|
|
constructor(domainOpt, wrapped, oneOf = null) {
|
2019-05-17 16:13:58 +02:00
|
|
|
this.domainOpt = typeof domainOpt === 'number'
|
2019-04-14 22:23:52 +02:00
|
|
|
? domainOpt
|
2019-05-17 16:13:58 +02:00
|
|
|
: filterOrigin.slotIdFromStr(domainOpt);
|
2019-05-02 00:54:11 +02:00
|
|
|
this.wrapped = filterFromCompiledData(wrapped);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.oneOf = oneOf !== null
|
|
|
|
? filterOrigin.trieContainer.createOne(oneOf)
|
|
|
|
: null;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-03-02 22:22:23 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
2019-02-16 18:16:30 +01:00
|
|
|
if ( this.oneOf === null ) {
|
|
|
|
this.oneOf = filterOrigin.trieContainer.fromIterable(
|
2019-05-17 16:13:58 +02:00
|
|
|
filterOrigin.strFromSlotId(this.domainOpt).split('|')
|
2019-02-16 18:16:30 +01:00
|
|
|
);
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
return this.oneOf.matches(pageHostnameRegister) !== -1 &&
|
|
|
|
this.wrapped.match(url, tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2019-04-27 13:04:43 +02:00
|
|
|
const out = this.wrapped.logData();
|
2019-05-17 16:13:58 +02:00
|
|
|
const domainOpt = filterOrigin.strFromSlotId(this.domainOpt);
|
|
|
|
out.compiled = [ this.fid, domainOpt, out.compiled ];
|
|
|
|
return filterOrigin.logData(out, domainOpt);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
2019-05-02 00:54:11 +02:00
|
|
|
const out = [
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.fid,
|
2019-05-17 16:13:58 +02:00
|
|
|
toSelfie
|
|
|
|
? this.domainOpt :
|
|
|
|
filterOrigin.strFromSlotId(this.domainOpt),
|
|
|
|
this.wrapped.compile(toSelfie),
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
];
|
2019-05-02 00:54:11 +02:00
|
|
|
if ( this.oneOf !== null ) {
|
|
|
|
out.push(filterOrigin.trieContainer.compileOne(this.oneOf));
|
|
|
|
}
|
|
|
|
return out;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(domainOpt, wrapped) {
|
2019-05-02 00:54:11 +02:00
|
|
|
return [ FilterOriginHitSet.fid, domainOpt, wrapped ];
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
2019-05-02 00:54:11 +02:00
|
|
|
return new FilterOriginHitSet(...args.slice(1));
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterOriginHitSet);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterOriginMissSet = class {
|
2019-05-02 00:54:11 +02:00
|
|
|
constructor(domainOpt, wrapped, noneOf = null) {
|
2019-05-17 16:13:58 +02:00
|
|
|
this.domainOpt = typeof domainOpt === 'number'
|
2019-04-14 22:23:52 +02:00
|
|
|
? domainOpt
|
2019-05-17 16:13:58 +02:00
|
|
|
: filterOrigin.slotIdFromStr(domainOpt);
|
2019-05-02 00:54:11 +02:00
|
|
|
this.wrapped = filterFromCompiledData(wrapped);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.noneOf = noneOf !== null
|
|
|
|
? filterOrigin.trieContainer.createOne(noneOf)
|
|
|
|
: null;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-03-02 22:22:23 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
2019-02-16 18:16:30 +01:00
|
|
|
if ( this.noneOf === null ) {
|
|
|
|
this.noneOf = filterOrigin.trieContainer.fromIterable(
|
2019-05-17 16:13:58 +02:00
|
|
|
filterOrigin
|
|
|
|
.strFromSlotId(this.domainOpt)
|
|
|
|
.replace(/~/g, '')
|
|
|
|
.split('|')
|
2019-02-16 18:16:30 +01:00
|
|
|
);
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2019-02-16 18:16:30 +01:00
|
|
|
return this.noneOf.matches(pageHostnameRegister) === -1 &&
|
|
|
|
this.wrapped.match(url, tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2019-04-27 13:04:43 +02:00
|
|
|
const out = this.wrapped.logData();
|
2019-05-17 16:13:58 +02:00
|
|
|
const domainOpt = filterOrigin.strFromSlotId(this.domainOpt);
|
|
|
|
out.compiled = [ this.fid, domainOpt, out.compiled ];
|
|
|
|
return filterOrigin.logData(out, domainOpt);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
2019-05-02 00:54:11 +02:00
|
|
|
const out = [
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.fid,
|
2019-05-17 16:13:58 +02:00
|
|
|
toSelfie
|
|
|
|
? this.domainOpt
|
|
|
|
: filterOrigin.strFromSlotId(this.domainOpt),
|
|
|
|
this.wrapped.compile(toSelfie),
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
];
|
2019-05-02 00:54:11 +02:00
|
|
|
if ( this.noneOf !== null ) {
|
|
|
|
out.push(filterOrigin.trieContainer.compileOne(this.noneOf));
|
|
|
|
}
|
|
|
|
return out;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(domainOpt, wrapped) {
|
2019-05-02 00:54:11 +02:00
|
|
|
return [ FilterOriginMissSet.fid, domainOpt, wrapped ];
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-03-02 22:22:23 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
2019-05-02 00:54:11 +02:00
|
|
|
return new FilterOriginMissSet(...args.slice(1));
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-03-02 22:22:23 +01:00
|
|
|
};
|
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
registerFilterClass(FilterOriginMissSet);
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
/******************************************************************************/
|
2015-03-02 22:22:23 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterOriginMixedSet = class {
|
2019-05-02 00:54:11 +02:00
|
|
|
constructor(domainOpt, wrapped, oneOf = null, noneOf = null) {
|
2019-05-17 16:13:58 +02:00
|
|
|
this.domainOpt = typeof domainOpt === 'number'
|
2019-04-14 22:23:52 +02:00
|
|
|
? domainOpt
|
2019-05-17 16:13:58 +02:00
|
|
|
: filterOrigin.slotIdFromStr(domainOpt);
|
2019-05-02 00:54:11 +02:00
|
|
|
this.wrapped = filterFromCompiledData(wrapped);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.oneOf = oneOf !== null
|
|
|
|
? filterOrigin.trieContainer.createOne(oneOf)
|
|
|
|
: null;
|
|
|
|
this.noneOf = noneOf !== null
|
|
|
|
? filterOrigin.trieContainer.createOne(noneOf)
|
|
|
|
: null;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-03-02 22:22:23 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
init() {
|
2019-02-16 18:16:30 +01:00
|
|
|
const oneOf = [], noneOf = [];
|
2019-05-17 16:13:58 +02:00
|
|
|
const domainOpt = filterOrigin.strFromSlotId(this.domainOpt);
|
|
|
|
for ( const hostname of domainOpt.split('|') ) {
|
2019-02-16 18:16:30 +01:00
|
|
|
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
|
|
|
|
noneOf.push(hostname.slice(1));
|
|
|
|
} else {
|
|
|
|
oneOf.push(hostname);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
this.oneOf = filterOrigin.trieContainer.fromIterable(oneOf);
|
|
|
|
this.noneOf = filterOrigin.trieContainer.fromIterable(noneOf);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
match(url, tokenBeg) {
|
2019-02-16 18:16:30 +01:00
|
|
|
if ( this.oneOf === null ) { this.init(); }
|
|
|
|
let needle = pageHostnameRegister;
|
|
|
|
return this.oneOf.matches(needle) !== -1 &&
|
|
|
|
this.noneOf.matches(needle) === -1 &&
|
|
|
|
this.wrapped.match(url, tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2019-04-27 13:04:43 +02:00
|
|
|
const out = this.wrapped.logData();
|
2019-05-17 16:13:58 +02:00
|
|
|
const domainOpt = filterOrigin.strFromSlotId(this.domainOpt);
|
|
|
|
out.compiled = [ this.fid, domainOpt, out.compiled ];
|
|
|
|
return filterOrigin.logData(out, domainOpt);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
2019-05-02 00:54:11 +02:00
|
|
|
const out = [
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.fid,
|
2019-05-17 16:13:58 +02:00
|
|
|
toSelfie
|
|
|
|
? this.domainOpt
|
|
|
|
: filterOrigin.strFromSlotId(this.domainOpt),
|
|
|
|
this.wrapped.compile(toSelfie),
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
];
|
2019-05-02 00:54:11 +02:00
|
|
|
if ( this.oneOf !== null ) {
|
|
|
|
out.push(
|
|
|
|
filterOrigin.trieContainer.compileOne(this.oneOf),
|
|
|
|
filterOrigin.trieContainer.compileOne(this.noneOf)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
return out;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2018-12-04 19:02:09 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(domainOpt, wrapped) {
|
2019-05-02 00:54:11 +02:00
|
|
|
return [ FilterOriginMixedSet.fid, domainOpt, wrapped ];
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2018-12-04 19:02:09 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
2019-05-02 00:54:11 +02:00
|
|
|
return new FilterOriginMixedSet(...args.slice(1));
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2018-12-04 19:02:09 +01:00
|
|
|
};
|
|
|
|
|
2019-02-16 18:16:30 +01:00
|
|
|
registerFilterClass(FilterOriginMixedSet);
|
2015-03-02 22:22:23 +01:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
/******************************************************************************/
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterDataHolder = class {
|
|
|
|
constructor(dataType, dataStr) {
|
|
|
|
this.dataType = dataType;
|
|
|
|
this.dataStr = dataStr;
|
|
|
|
this.wrapped = undefined;
|
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
return this.wrapped.match(url, tokenBeg);
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2019-04-14 22:23:52 +02:00
|
|
|
|
|
|
|
logData() {
|
|
|
|
const out = this.wrapped.logData();
|
|
|
|
out.compiled = [ this.fid, this.dataType, this.dataStr, out.compiled ];
|
|
|
|
let opt = this.dataType;
|
|
|
|
if ( this.dataStr !== '' ) {
|
|
|
|
opt += `=${this.dataStr}`;
|
|
|
|
}
|
|
|
|
if ( out.opts === undefined ) {
|
|
|
|
out.opts = opt;
|
|
|
|
} else {
|
|
|
|
out.opts = opt + ',' + out.opts;
|
|
|
|
}
|
|
|
|
return out;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
|
|
|
return [
|
|
|
|
this.fid,
|
|
|
|
this.dataType,
|
|
|
|
this.dataStr,
|
|
|
|
this.wrapped.compile(toSelfie)
|
|
|
|
];
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static compile(details) {
|
|
|
|
return [ FilterDataHolder.fid, details.dataType, details.dataStr ];
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
const f = new FilterDataHolder(args[1], args[2]);
|
|
|
|
f.wrapped = filterFromCompiledData(args[3]);
|
|
|
|
return f;
|
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
};
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterDataHolder);
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
// Helper class for storing instances of FilterDataHolder.
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterDataHolderEntry = class {
|
|
|
|
constructor(categoryBits, tokenHash, fdata) {
|
|
|
|
this.categoryBits = categoryBits;
|
|
|
|
this.tokenHash = tokenHash;
|
|
|
|
this.filter = filterFromCompiledData(fdata);
|
|
|
|
this.next = undefined;
|
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return toLogDataInternal(this.categoryBits, this.tokenHash, this.filter);
|
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
compile() {
|
|
|
|
return [ this.categoryBits, this.tokenHash, this.filter.compile() ];
|
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(data) {
|
|
|
|
return new FilterDataHolderEntry(data[0], data[1], data[2]);
|
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
};
|
|
|
|
|
2014-09-08 23:46:58 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2015-02-05 00:06:31 +01:00
|
|
|
// Dictionary of hostnames
|
2018-12-04 19:02:09 +01:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterHostnameDict = class {
|
|
|
|
constructor(args) {
|
|
|
|
this.h = ''; // short-lived register
|
|
|
|
this.dict = FilterHostnameDict.trieContainer.createOne(args);
|
|
|
|
}
|
2015-02-05 00:06:31 +01:00
|
|
|
|
2018-12-04 19:02:09 +01:00
|
|
|
get size() {
|
2017-03-20 20:54:41 +01:00
|
|
|
return this.dict.size;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
add(hn) {
|
2018-12-04 19:02:09 +01:00
|
|
|
return this.dict.add(hn);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
match() {
|
2018-12-04 19:02:09 +01:00
|
|
|
const pos = this.dict.matches(requestHostnameRegister);
|
|
|
|
if ( pos === -1 ) { return false; }
|
|
|
|
this.h = requestHostnameRegister.slice(pos);
|
|
|
|
return true;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2018-12-04 19:02:09 +01:00
|
|
|
return {
|
2019-04-14 22:23:52 +02:00
|
|
|
raw: `||${this.h}^`,
|
|
|
|
regex: `${rawToRegexStr(this.h, 0)}(?:[^%.0-9a-z_-]|$)`,
|
2018-12-04 19:02:09 +01:00
|
|
|
compiled: this.h
|
|
|
|
};
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
compile() {
|
2018-12-04 19:02:09 +01:00
|
|
|
return [ this.fid, FilterHostnameDict.trieContainer.compileOne(this.dict) ];
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static readyToUse() {
|
|
|
|
return FilterHostnameDict.trieContainer.readyToUse();
|
|
|
|
}
|
|
|
|
|
|
|
|
static reset() {
|
|
|
|
return FilterHostnameDict.trieContainer.reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
static optimize() {
|
|
|
|
const trieDetails = FilterHostnameDict.trieContainer.optimize();
|
|
|
|
vAPI.localStorage.setItem(
|
|
|
|
'FilterHostnameDict.trieDetails',
|
|
|
|
JSON.stringify(trieDetails)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
|
|
|
return new FilterHostnameDict(args[1]);
|
|
|
|
}
|
2017-03-20 20:54:41 +01:00
|
|
|
};
|
|
|
|
|
2019-06-19 16:00:19 +02:00
|
|
|
FilterHostnameDict.trieContainer = (( ) => {
|
2018-12-04 19:02:09 +01:00
|
|
|
let trieDetails;
|
|
|
|
try {
|
|
|
|
trieDetails = JSON.parse(
|
|
|
|
vAPI.localStorage.getItem('FilterHostnameDict.trieDetails')
|
|
|
|
);
|
|
|
|
} catch(ex) {
|
2015-02-05 00:06:31 +01:00
|
|
|
}
|
2019-06-19 16:00:19 +02:00
|
|
|
return new µBlock.HNTrieContainer(trieDetails);
|
2018-12-04 19:02:09 +01:00
|
|
|
})();
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterHostnameDict);
|
|
|
|
|
2015-02-05 00:06:31 +01:00
|
|
|
/******************************************************************************/
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// Dictionary of hostnames for filters which only purpose is to match
|
|
|
|
// the document origin.
|
|
|
|
|
|
|
|
const FilterJustOrigin = class {
|
|
|
|
constructor(args) {
|
|
|
|
this.h = ''; // short-lived register
|
|
|
|
this.dict = filterOrigin.trieContainer.createOne(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
get size() {
|
|
|
|
return this.dict.size;
|
|
|
|
}
|
|
|
|
|
|
|
|
add(hn) {
|
|
|
|
return this.dict.add(hn);
|
|
|
|
}
|
|
|
|
|
|
|
|
match() {
|
|
|
|
const pos = this.dict.matches(pageHostnameRegister);
|
|
|
|
if ( pos === -1 ) { return false; }
|
|
|
|
this.h = pageHostnameRegister.slice(pos);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
|
|
|
return {
|
|
|
|
raw: '*',
|
|
|
|
regex: '^',
|
2019-04-20 23:25:32 +02:00
|
|
|
compiled: this.h,
|
|
|
|
opts: `domain=${this.h}`,
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
compile() {
|
|
|
|
return [ this.fid, filterOrigin.trieContainer.compileOne(this.dict) ];
|
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
|
|
|
return new FilterJustOrigin(args[1]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterJustOrigin);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
const FilterHTTPSJustOrigin = class extends FilterJustOrigin {
|
|
|
|
match(url) {
|
|
|
|
return url.startsWith('https://') && super.match();
|
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2019-04-20 23:25:32 +02:00
|
|
|
const out = super.logData();
|
|
|
|
out.raw = '|https://';
|
|
|
|
out.regex = '^https://';
|
|
|
|
return out;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
|
|
|
return new FilterHTTPSJustOrigin(args[1]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterHTTPSJustOrigin);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
const FilterHTTPJustOrigin = class extends FilterJustOrigin {
|
|
|
|
match(url) {
|
|
|
|
return url.startsWith('http://') && super.match();
|
|
|
|
}
|
|
|
|
|
|
|
|
logData() {
|
2019-04-20 23:25:32 +02:00
|
|
|
const out = super.logData();
|
|
|
|
out.raw = '|https://';
|
|
|
|
out.regex = '^https://';
|
|
|
|
return out;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static load(args) {
|
|
|
|
return new FilterHTTPJustOrigin(args[1]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
registerFilterClass(FilterHTTPJustOrigin);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterPair = class {
|
|
|
|
constructor(a, b) {
|
|
|
|
this.f1 = a;
|
|
|
|
this.f2 = b;
|
|
|
|
}
|
2017-05-27 02:00:21 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
get size() {
|
2017-05-27 02:00:21 +02:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
|
|
|
if ( this.f1.match(url, tokenBeg) === true ) {
|
|
|
|
this.f = this.f1;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if ( this.f2.match(url, tokenBeg) === true ) {
|
|
|
|
this.f = this.f2;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2018-06-24 01:15:56 +02:00
|
|
|
}
|
2017-05-27 02:00:21 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
|
|
|
return this.f.logData();
|
2017-05-27 02:00:21 +02:00
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
|
|
|
return [
|
|
|
|
this.fid,
|
|
|
|
this.f1.compile(toSelfie),
|
|
|
|
this.f2.compile(toSelfie)
|
|
|
|
];
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2017-05-27 02:00:21 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
upgrade(a) {
|
|
|
|
const bucket = new FilterBucket(this.f1, this.f2, a);
|
|
|
|
this.f1 = this.f2 = undefined;
|
|
|
|
this.f = null;
|
|
|
|
FilterPair.available = this;
|
|
|
|
return bucket;
|
|
|
|
}
|
2017-05-27 02:00:21 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
const f1 = filterFromCompiledData(args[1]);
|
|
|
|
const f2 = filterFromCompiledData(args[2]);
|
|
|
|
const pair = FilterPair.available;
|
|
|
|
if ( pair === null ) {
|
|
|
|
return new FilterPair(f1, f2);
|
|
|
|
}
|
|
|
|
FilterPair.available = null;
|
|
|
|
pair.f1 = f1;
|
|
|
|
pair.f2 = f2;
|
|
|
|
return pair;
|
|
|
|
}
|
2018-06-24 01:15:56 +02:00
|
|
|
};
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterPair.prototype.f = null;
|
2017-05-27 02:00:21 +02:00
|
|
|
|
|
|
|
FilterPair.available = null;
|
|
|
|
|
|
|
|
registerFilterClass(FilterPair);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
const FilterBucket = class {
|
|
|
|
constructor(a, b, c) {
|
|
|
|
this.filters = [];
|
|
|
|
if ( a !== undefined ) {
|
|
|
|
this.filters.push(a, b, c);
|
|
|
|
this._countTrieable();
|
|
|
|
}
|
2019-06-19 01:16:39 +02:00
|
|
|
this.trieResult = 0;
|
2014-09-08 23:46:58 +02:00
|
|
|
}
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
get size() {
|
|
|
|
let size = this.filters.length;
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( this.plainTrie !== null ) {
|
|
|
|
size += this.plainTrie.size;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
if ( this.plainHnAnchoredTrie !== null ) {
|
|
|
|
size += this.plainHnAnchoredTrie.size;
|
|
|
|
}
|
|
|
|
return size;
|
2017-05-27 02:00:21 +02:00
|
|
|
}
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
add(fdata) {
|
2019-06-19 01:16:39 +02:00
|
|
|
const fclass = filterClasses[fdata[0]];
|
|
|
|
if ( fclass.trieableId === 0 ) {
|
|
|
|
if ( this.plainTrie !== null ) {
|
|
|
|
return fclass.addToTrie(fdata, this.plainTrie);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( this.plainCount === 3 ) {
|
|
|
|
this.plainTrie = FilterBucket.trieContainer.createOne();
|
|
|
|
this._transferTrieable(0, this.plainTrie);
|
|
|
|
return fclass.addToTrie(fdata, this.plainTrie);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2019-06-19 01:16:39 +02:00
|
|
|
this.plainCount += 1;
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( fclass.trieableId === 1 ) {
|
2019-04-14 22:23:52 +02:00
|
|
|
if ( this.plainHnAnchoredTrie !== null ) {
|
2019-06-19 01:16:39 +02:00
|
|
|
return fclass.addToTrie(fdata, this.plainHnAnchoredTrie);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
if ( this.plainHnAnchoredCount === 3 ) {
|
|
|
|
this.plainHnAnchoredTrie = FilterBucket.trieContainer.createOne();
|
2019-06-19 01:16:39 +02:00
|
|
|
this._transferTrieable(1, this.plainHnAnchoredTrie);
|
|
|
|
return fclass.addToTrie(fdata, this.plainHnAnchoredTrie);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
this.plainHnAnchoredCount += 1;
|
|
|
|
}
|
|
|
|
this.filters.push(filterFromCompiledData(fdata));
|
|
|
|
}
|
2017-05-25 23:46:59 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
match(url, tokenBeg) {
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( this.plainTrie !== null ) {
|
|
|
|
const pos = this.plainTrie.matches(url, tokenBeg);
|
2019-04-14 22:23:52 +02:00
|
|
|
if ( pos !== -1 ) {
|
2019-06-19 01:16:39 +02:00
|
|
|
this.trieResult = pos;
|
|
|
|
this.f = this.plainFilter;
|
|
|
|
this.f.tokenBeg = tokenBeg - (pos >>> 16);
|
2019-04-14 22:23:52 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( this.plainHnAnchoredTrie !== null ) {
|
2019-04-14 22:23:52 +02:00
|
|
|
const pos = this.plainHnAnchoredTrie.matches(url, tokenBeg);
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( pos !== -1 && isHnAnchored(url, pos >>> 16) ) {
|
|
|
|
this.trieResult = pos;
|
2019-04-14 22:23:52 +02:00
|
|
|
this.f = this.plainHnAnchoredFilter;
|
2019-06-19 01:16:39 +02:00
|
|
|
this.f.tokenBeg = tokenBeg - (pos >>> 16);
|
2019-04-14 22:23:52 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const filters = this.filters;
|
|
|
|
for ( let i = 0, n = filters.length; i < n; i++ ) {
|
|
|
|
if ( filters[i].match(url, tokenBeg) === true ) {
|
|
|
|
this.f = filters[i];
|
|
|
|
if ( i >= 16 ) { this._promote(i); }
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
logData() {
|
2019-06-19 01:16:39 +02:00
|
|
|
if (
|
|
|
|
this.f === this.plainFilter ||
|
|
|
|
this.f === this.plainHnAnchoredFilter
|
|
|
|
) {
|
|
|
|
this.f.s = urlRegister.slice(
|
|
|
|
this.trieResult >>> 16,
|
|
|
|
this.trieResult & 0xFFFF
|
|
|
|
);
|
|
|
|
}
|
2019-04-14 22:23:52 +02:00
|
|
|
return this.f.logData();
|
|
|
|
}
|
|
|
|
|
2019-05-17 16:13:58 +02:00
|
|
|
compile(toSelfie = false) {
|
2019-04-14 22:23:52 +02:00
|
|
|
return [
|
|
|
|
this.fid,
|
2019-05-17 16:13:58 +02:00
|
|
|
this.filters.map(filter => filter.compile(toSelfie)),
|
2019-06-19 01:16:39 +02:00
|
|
|
this.plainTrie !== null &&
|
|
|
|
FilterBucket.trieContainer.compileOne(this.plainTrie),
|
2019-04-14 22:23:52 +02:00
|
|
|
this.plainHnAnchoredTrie !== null &&
|
|
|
|
FilterBucket.trieContainer.compileOne(this.plainHnAnchoredTrie),
|
|
|
|
];
|
2017-03-11 19:55:47 +01:00
|
|
|
}
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
_countTrieable() {
|
|
|
|
for ( const f of this.filters ) {
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( f.trieableId === 0 ) {
|
|
|
|
this.plainCount += 1;
|
|
|
|
} else if ( f.trieableId === 1 ) {
|
2019-04-14 22:23:52 +02:00
|
|
|
this.plainHnAnchoredCount += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-09-22 02:26:16 +02:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
_transferTrieable(trieableId, trie) {
|
|
|
|
const filters = this.filters;
|
|
|
|
let i = filters.length;
|
2019-04-14 22:23:52 +02:00
|
|
|
while ( i-- ) {
|
2019-06-19 01:16:39 +02:00
|
|
|
const f = filters[i];
|
|
|
|
if ( f.trieableId !== trieableId || f.s.length > 255 ) { continue; }
|
|
|
|
f.addToTrie(trie);
|
|
|
|
filters.splice(i, 1);
|
2014-09-08 23:46:58 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
// Promote hit filters so they can be found faster next time.
|
|
|
|
_promote(i) {
|
|
|
|
const filters = this.filters;
|
|
|
|
let pivot = filters.length >>> 1;
|
|
|
|
while ( i < pivot ) {
|
|
|
|
pivot >>>= 1;
|
|
|
|
if ( pivot < 16 ) { break; }
|
|
|
|
}
|
|
|
|
if ( i <= pivot ) { return; }
|
|
|
|
const j = this.promoted % pivot;
|
|
|
|
//console.debug('FilterBucket.promote(): promoted %d to %d', i, j);
|
|
|
|
const f = filters[j];
|
|
|
|
filters[j] = filters[i];
|
|
|
|
filters[i] = f;
|
|
|
|
this.promoted += 1;
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static reset() {
|
|
|
|
FilterBucket.trieContainer.reset();
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static optimize() {
|
|
|
|
const trieDetails = this.trieContainer.optimize();
|
|
|
|
vAPI.localStorage.setItem(
|
|
|
|
'FilterBucket.trieDetails',
|
|
|
|
JSON.stringify(trieDetails)
|
|
|
|
);
|
2018-06-24 01:15:56 +02:00
|
|
|
}
|
2017-05-30 17:38:45 +02:00
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
static load(args) {
|
|
|
|
const bucket = new FilterBucket();
|
2019-05-17 16:13:58 +02:00
|
|
|
bucket.filters = args[1].map(data => filterFromCompiledData(data));
|
2019-04-14 22:23:52 +02:00
|
|
|
if ( Array.isArray(args[2]) ) {
|
2019-06-19 01:16:39 +02:00
|
|
|
bucket.plainTrie =
|
2019-05-17 16:13:58 +02:00
|
|
|
FilterBucket.trieContainer.createOne(args[2]);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
if ( Array.isArray(args[3]) ) {
|
2019-05-17 16:13:58 +02:00
|
|
|
bucket.plainHnAnchoredTrie =
|
|
|
|
FilterBucket.trieContainer.createOne(args[3]);
|
2019-04-14 22:23:52 +02:00
|
|
|
}
|
|
|
|
return bucket;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2015-06-09 16:27:08 +02:00
|
|
|
};
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterBucket.prototype.f = null;
|
|
|
|
FilterBucket.prototype.promoted = 0;
|
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
FilterBucket.prototype.plainCount = 0;
|
|
|
|
FilterBucket.prototype.plainTrie = null;
|
|
|
|
FilterBucket.prototype.plainFilter = new FilterPlainX('', 0);
|
2019-04-14 22:23:52 +02:00
|
|
|
|
|
|
|
FilterBucket.prototype.plainHnAnchoredCount = 0;
|
|
|
|
FilterBucket.prototype.plainHnAnchoredTrie = null;
|
2019-06-19 01:16:39 +02:00
|
|
|
FilterBucket.prototype.plainHnAnchoredFilter = new FilterPlainHnAnchoredX('', 0);
|
2019-04-14 22:23:52 +02:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
FilterBucket.trieContainer = (( ) => {
|
2019-04-14 22:23:52 +02:00
|
|
|
let trieDetails;
|
|
|
|
try {
|
|
|
|
trieDetails = JSON.parse(
|
|
|
|
vAPI.localStorage.getItem('FilterBucket.trieDetails')
|
|
|
|
);
|
|
|
|
} catch(ex) {
|
|
|
|
}
|
2019-06-19 01:16:39 +02:00
|
|
|
return new µBlock.BidiTrieContainer(trieDetails);
|
2019-04-14 22:23:52 +02:00
|
|
|
})();
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
registerFilterClass(FilterBucket);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const FilterParser = function() {
|
2016-08-31 01:57:25 +02:00
|
|
|
this.cantWebsocket = vAPI.cantWebsocket;
|
2017-01-09 14:56:42 +01:00
|
|
|
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
|
2019-06-19 16:00:19 +02:00
|
|
|
this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i;
|
|
|
|
this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i;
|
2015-12-13 17:03:13 +01:00
|
|
|
this.reCanTrimCarets1 = /^[^*]*$/;
|
|
|
|
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
|
2015-12-13 18:55:55 +01:00
|
|
|
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
|
2015-02-27 00:08:42 +01:00
|
|
|
this.reHasUnicode = /[^\x00-\x7F]/;
|
2016-09-06 00:56:35 +02:00
|
|
|
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
|
2017-05-16 18:44:12 +02:00
|
|
|
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
|
2015-08-22 18:15:16 +02:00
|
|
|
this.domainOpt = '';
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.noTokenHash = µb.urlTokenizer.noTokenHash;
|
2014-08-28 15:59:05 +02:00
|
|
|
this.reset();
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2016-11-06 16:49:02 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/1493
|
|
|
|
// Transpose `ping` into `other` for now.
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
FilterParser.prototype.toNormalizedType = {
|
2019-05-20 19:46:36 +02:00
|
|
|
'all': 'all',
|
2017-05-12 16:35:11 +02:00
|
|
|
'beacon': 'other',
|
2018-02-26 20:08:16 +01:00
|
|
|
'css': 'stylesheet',
|
2017-05-12 16:35:11 +02:00
|
|
|
'data': 'data',
|
2018-12-17 13:46:04 +01:00
|
|
|
'doc': 'main_frame',
|
2017-05-12 16:35:11 +02:00
|
|
|
'document': 'main_frame',
|
|
|
|
'elemhide': 'generichide',
|
2015-04-05 16:38:47 +02:00
|
|
|
'font': 'font',
|
2018-02-26 20:08:16 +01:00
|
|
|
'frame': 'sub_frame',
|
2017-09-14 23:54:59 +02:00
|
|
|
'genericblock': 'unsupported',
|
2017-05-12 16:35:11 +02:00
|
|
|
'generichide': 'generichide',
|
|
|
|
'image': 'image',
|
2017-09-16 13:49:43 +02:00
|
|
|
'inline-font': 'inline-font',
|
2017-05-12 16:35:11 +02:00
|
|
|
'inline-script': 'inline-script',
|
2016-03-07 01:16:46 +01:00
|
|
|
'media': 'media',
|
2017-05-12 16:35:11 +02:00
|
|
|
'object': 'object',
|
|
|
|
'object-subrequest': 'object',
|
2017-11-03 21:51:28 +01:00
|
|
|
'other': 'other',
|
2016-11-06 16:49:02 +01:00
|
|
|
'ping': 'other',
|
2015-12-04 17:15:09 +01:00
|
|
|
'popunder': 'popunder',
|
2017-05-12 16:35:11 +02:00
|
|
|
'popup': 'popup',
|
|
|
|
'script': 'script',
|
|
|
|
'stylesheet': 'stylesheet',
|
|
|
|
'subdocument': 'sub_frame',
|
2018-02-26 20:08:16 +01:00
|
|
|
'xhr': 'xmlhttprequest',
|
2017-05-12 16:35:11 +02:00
|
|
|
'xmlhttprequest': 'xmlhttprequest',
|
2017-09-14 05:41:20 +02:00
|
|
|
'webrtc': 'unsupported',
|
2019-05-20 19:46:36 +02:00
|
|
|
'websocket': 'websocket',
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
FilterParser.prototype.reset = function() {
|
|
|
|
this.action = BlockAction;
|
|
|
|
this.anchor = 0;
|
2018-10-23 19:01:08 +02:00
|
|
|
this.badFilter = false;
|
2017-05-12 16:35:11 +02:00
|
|
|
this.dataType = undefined;
|
|
|
|
this.dataStr = undefined;
|
2014-06-24 00:42:43 +02:00
|
|
|
this.elemHiding = false;
|
|
|
|
this.f = '';
|
|
|
|
this.firstParty = false;
|
2017-05-12 16:35:11 +02:00
|
|
|
this.thirdParty = false;
|
|
|
|
this.party = AnyParty;
|
2014-06-24 00:42:43 +02:00
|
|
|
this.fopts = '';
|
2015-08-22 18:15:16 +02:00
|
|
|
this.domainOpt = '';
|
2019-06-19 16:00:19 +02:00
|
|
|
this.isPureHostname = false;
|
2015-01-23 17:32:49 +01:00
|
|
|
this.isRegex = false;
|
2015-11-24 01:18:25 +01:00
|
|
|
this.raw = '';
|
2019-08-03 16:18:47 +02:00
|
|
|
this.redirect = 0;
|
2015-12-04 03:24:37 +01:00
|
|
|
this.token = '*';
|
2017-05-19 14:45:19 +02:00
|
|
|
this.tokenHash = this.noTokenHash;
|
2015-01-23 17:32:49 +01:00
|
|
|
this.tokenBeg = 0;
|
2015-01-24 03:47:56 +01:00
|
|
|
this.types = 0;
|
2019-05-21 20:04:21 +02:00
|
|
|
this.notTypes = 0;
|
2014-08-29 21:02:31 +02:00
|
|
|
this.important = 0;
|
2019-04-16 12:52:13 +02:00
|
|
|
this.wildcarded = false;
|
2014-06-24 00:42:43 +02:00
|
|
|
this.unsupported = false;
|
|
|
|
return this;
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2016-08-31 01:57:25 +02:00
|
|
|
FilterParser.prototype.bitFromType = function(type) {
|
|
|
|
return 1 << ((typeNameToTypeValue[type] >>> 4) - 1);
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2015-04-07 03:26:05 +02:00
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/589
|
2015-01-24 03:47:56 +01:00
|
|
|
// Be ready to handle multiple negated types
|
|
|
|
|
2017-01-09 15:53:57 +01:00
|
|
|
FilterParser.prototype.parseTypeOption = function(raw, not) {
|
2019-05-20 19:46:36 +02:00
|
|
|
const typeBit = raw !== 'all'
|
|
|
|
? this.bitFromType(this.toNormalizedType[raw])
|
|
|
|
: allTypesBits;
|
2015-01-24 03:47:56 +01:00
|
|
|
|
2019-05-21 20:04:21 +02:00
|
|
|
if ( not ) {
|
|
|
|
this.notTypes |= typeBit;
|
|
|
|
} else {
|
2015-03-26 00:28:22 +01:00
|
|
|
this.types |= typeBit;
|
2017-05-12 16:35:11 +02:00
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2017-01-09 15:53:57 +01:00
|
|
|
FilterParser.prototype.parsePartyOption = function(firstParty, not) {
|
2015-06-07 00:31:38 +02:00
|
|
|
if ( firstParty ) {
|
|
|
|
not = !not;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
if ( not ) {
|
|
|
|
this.firstParty = true;
|
2017-05-12 16:35:11 +02:00
|
|
|
this.party = this.thirdParty ? AnyParty : FirstParty;
|
2014-06-24 00:42:43 +02:00
|
|
|
} else {
|
|
|
|
this.thirdParty = true;
|
2017-05-12 16:35:11 +02:00
|
|
|
this.party = this.firstParty ? AnyParty : ThirdParty;
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2017-01-09 15:53:57 +01:00
|
|
|
FilterParser.prototype.parseDomainOption = function(s) {
|
|
|
|
if ( this.reHasUnicode.test(s) ) {
|
2019-04-14 22:23:52 +02:00
|
|
|
const hostnames = s.split('|');
|
|
|
|
let i = hostnames.length;
|
2017-01-09 15:53:57 +01:00
|
|
|
while ( i-- ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( this.reHasUnicode.test(hostnames[i]) ) {
|
|
|
|
hostnames[i] = punycode.toASCII(hostnames[i]);
|
|
|
|
}
|
2017-01-09 15:53:57 +01:00
|
|
|
}
|
|
|
|
s = hostnames.join('|');
|
|
|
|
}
|
2019-04-14 22:23:52 +02:00
|
|
|
if ( this.reBadDomainOptChars.test(s) ) { return ''; }
|
2017-01-09 15:53:57 +01:00
|
|
|
return s;
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2015-01-23 17:32:49 +01:00
|
|
|
FilterParser.prototype.parseOptions = function(s) {
|
|
|
|
this.fopts = s;
|
2019-05-22 23:51:03 +02:00
|
|
|
for ( let opt of s.split(/\s*,\s*/) ) {
|
2019-05-21 20:04:21 +02:00
|
|
|
const not = opt.startsWith('~');
|
2014-06-24 00:42:43 +02:00
|
|
|
if ( not ) {
|
2015-01-23 17:32:49 +01:00
|
|
|
opt = opt.slice(1);
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
2018-02-26 20:08:16 +01:00
|
|
|
if ( opt === 'third-party' || opt === '3p' ) {
|
2017-01-09 15:53:57 +01:00
|
|
|
this.parsePartyOption(false, not);
|
2015-01-23 17:32:49 +01:00
|
|
|
continue;
|
|
|
|
}
|
2019-08-03 16:18:47 +02:00
|
|
|
if ( opt === 'first-party' || opt === '1p' ) {
|
|
|
|
this.parsePartyOption(true, not);
|
|
|
|
continue;
|
2017-05-25 23:46:59 +02:00
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
if ( this.toNormalizedType.hasOwnProperty(opt) ) {
|
2017-01-09 15:53:57 +01:00
|
|
|
this.parseTypeOption(opt, not);
|
2015-01-23 17:32:49 +01:00
|
|
|
continue;
|
|
|
|
}
|
2017-01-09 14:56:42 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/2294
|
|
|
|
// Detect and discard filter if domain option contains nonsensical
|
|
|
|
// characters.
|
2015-12-15 16:40:40 +01:00
|
|
|
if ( opt.startsWith('domain=') ) {
|
2017-01-09 15:53:57 +01:00
|
|
|
this.domainOpt = this.parseDomainOption(opt.slice(7));
|
|
|
|
if ( this.domainOpt === '' ) {
|
2017-01-09 14:56:42 +01:00
|
|
|
this.unsupported = true;
|
|
|
|
break;
|
|
|
|
}
|
2015-01-23 17:32:49 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( opt === 'important' ) {
|
|
|
|
this.important = Important;
|
|
|
|
continue;
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
2019-08-03 16:18:47 +02:00
|
|
|
if ( /^redirect(?:-rule)?=/.test(opt) ) {
|
|
|
|
if ( this.redirect !== 0 ) {
|
|
|
|
this.unsupported = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
this.redirect = opt.charCodeAt(8) === 0x3D /* '=' */ ? 1 : 2;
|
2015-06-07 00:31:38 +02:00
|
|
|
continue;
|
|
|
|
}
|
2019-05-24 21:41:37 +02:00
|
|
|
if (
|
|
|
|
opt.startsWith('csp=') &&
|
|
|
|
opt.length > 4 &&
|
|
|
|
this.reBadCSP.test(opt) === false
|
|
|
|
) {
|
|
|
|
this.parseTypeOption('data', not);
|
|
|
|
this.dataType = 'csp';
|
|
|
|
this.dataStr = opt.slice(4).trim();
|
2017-05-12 16:35:11 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( opt === 'csp' && this.action === AllowAction ) {
|
|
|
|
this.parseTypeOption('data', not);
|
|
|
|
this.dataType = 'csp';
|
|
|
|
this.dataStr = '';
|
|
|
|
continue;
|
|
|
|
}
|
2015-12-18 18:19:13 +01:00
|
|
|
// Used by Adguard, purpose is unclear -- just ignore for now.
|
|
|
|
if ( opt === 'empty' ) {
|
2019-08-13 14:16:21 +02:00
|
|
|
if ( this.redirect !== 0 ) {
|
|
|
|
this.unsupported = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
this.redirect = 1;
|
2015-12-18 18:19:13 +01:00
|
|
|
continue;
|
|
|
|
}
|
2017-03-11 19:55:47 +01:00
|
|
|
// https://github.com/uBlockOrigin/uAssets/issues/192
|
|
|
|
if ( opt === 'badfilter' ) {
|
2018-10-23 19:01:08 +02:00
|
|
|
this.badFilter = true;
|
2017-03-11 19:55:47 +01:00
|
|
|
continue;
|
|
|
|
}
|
2015-11-24 05:34:03 +01:00
|
|
|
// Unrecognized filter option: ignore whole filter.
|
2015-01-23 17:32:49 +01:00
|
|
|
this.unsupported = true;
|
|
|
|
break;
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
2019-05-21 20:04:21 +02:00
|
|
|
|
2019-08-03 16:18:47 +02:00
|
|
|
// Redirect rules can't be exception filters.
|
|
|
|
if ( this.redirect !== 0 && this.action !== BlockAction ) {
|
|
|
|
this.unsupported = true;
|
|
|
|
}
|
|
|
|
|
2019-05-21 20:04:21 +02:00
|
|
|
// Negated network types? Toggle on all network type bits.
|
2019-05-22 23:51:03 +02:00
|
|
|
// Negated non-network types can only toggle themselves.
|
2019-05-21 20:04:21 +02:00
|
|
|
if ( (this.notTypes & allNetworkTypesBits) !== 0 ) {
|
2019-05-22 23:51:03 +02:00
|
|
|
this.types |= allNetworkTypesBits;
|
2019-05-21 20:04:21 +02:00
|
|
|
}
|
|
|
|
if ( this.notTypes !== 0 ) {
|
|
|
|
this.types &= ~this.notTypes;
|
|
|
|
if ( this.types === 0 ) {
|
|
|
|
this.unsupported = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://github.com/gorhill/uBlock/issues/2283
|
|
|
|
// Abort if type is only for unsupported types, otherwise
|
|
|
|
// toggle off `unsupported` bit.
|
|
|
|
if ( this.types & unsupportedTypeBit ) {
|
|
|
|
this.types &= ~unsupportedTypeBit;
|
|
|
|
if ( this.types === 0 ) {
|
|
|
|
this.unsupported = true;
|
|
|
|
}
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
anchor: bit vector
|
|
|
|
0000 (0x0): no anchoring
|
|
|
|
0001 (0x1): anchored to the end of the URL.
|
|
|
|
0010 (0x2): anchored to the start of the URL.
|
|
|
|
0011 (0x3): anchored to the start and end of the URL.
|
|
|
|
0100 (0x4): anchored to the hostname of the URL.
|
|
|
|
0101 (0x5): anchored to the hostname and end of the URL.
|
|
|
|
|
|
|
|
**/
|
|
|
|
|
2015-02-27 00:08:42 +01:00
|
|
|
FilterParser.prototype.parse = function(raw) {
|
2014-06-24 00:42:43 +02:00
|
|
|
// important!
|
|
|
|
this.reset();
|
|
|
|
|
2019-05-06 17:12:39 +02:00
|
|
|
let s = this.raw = raw;
|
2015-02-27 00:08:42 +01:00
|
|
|
|
2019-06-29 17:06:03 +02:00
|
|
|
// Filters which are a single alphanumeric character are discarded
|
|
|
|
// as unsupported.
|
|
|
|
if ( s.length === 1 && /[0-9a-z]/i.test(s) ) {
|
|
|
|
this.unsupported = true;
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
2015-12-13 18:55:55 +01:00
|
|
|
// plain hostname? (from HOSTS file)
|
|
|
|
if ( this.reHostnameRule1.test(s) ) {
|
2019-05-06 17:12:39 +02:00
|
|
|
this.f = s.toLowerCase();
|
2019-06-19 16:00:19 +02:00
|
|
|
this.isPureHostname = true;
|
2017-05-12 16:35:11 +02:00
|
|
|
this.anchor |= 0x4;
|
2014-09-19 16:59:44 +02:00
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
// element hiding filter?
|
2019-05-06 17:12:39 +02:00
|
|
|
let pos = s.indexOf('#');
|
2015-01-23 17:32:49 +01:00
|
|
|
if ( pos !== -1 ) {
|
2019-05-22 23:51:03 +02:00
|
|
|
const c = s.charAt(pos + 1);
|
2015-01-23 17:32:49 +01:00
|
|
|
if ( c === '#' || c === '@' ) {
|
|
|
|
console.error('static-net-filtering.js > unexpected cosmetic filters');
|
|
|
|
this.elemHiding = true;
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-24 03:47:56 +01:00
|
|
|
// block or allow filter?
|
|
|
|
// Important: this must be executed before parsing options
|
2015-12-15 16:40:40 +01:00
|
|
|
if ( s.startsWith('@@') ) {
|
2015-01-24 03:47:56 +01:00
|
|
|
this.action = AllowAction;
|
|
|
|
s = s.slice(2);
|
|
|
|
}
|
|
|
|
|
2015-01-23 17:32:49 +01:00
|
|
|
// options
|
2015-11-06 16:49:09 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/842
|
|
|
|
// - ensure sure we are not dealing with a regex-based filter.
|
|
|
|
// - lookup the last occurrence of `$`.
|
2015-12-15 16:40:40 +01:00
|
|
|
if ( s.startsWith('/') === false || s.endsWith('/') === false ) {
|
2015-11-06 16:49:09 +01:00
|
|
|
pos = s.lastIndexOf('$');
|
|
|
|
if ( pos !== -1 ) {
|
2015-11-30 20:47:56 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/952
|
2017-09-14 05:41:20 +02:00
|
|
|
// Discard Adguard-specific `$$` filters.
|
2015-11-30 20:47:56 +01:00
|
|
|
if ( s.indexOf('$$') !== -1 ) {
|
|
|
|
this.unsupported = true;
|
|
|
|
return this;
|
|
|
|
}
|
2015-11-06 16:49:09 +01:00
|
|
|
this.parseOptions(s.slice(pos + 1));
|
2019-05-21 20:04:21 +02:00
|
|
|
if ( this.unsupported ) { return this; }
|
2015-11-06 16:49:09 +01:00
|
|
|
s = s.slice(0, pos);
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
|
|
|
|
2015-01-23 17:32:49 +01:00
|
|
|
// regex?
|
2015-12-15 16:40:40 +01:00
|
|
|
if ( s.startsWith('/') && s.endsWith('/') && s.length > 2 ) {
|
2015-01-23 17:32:49 +01:00
|
|
|
this.isRegex = true;
|
|
|
|
this.f = s.slice(1, -1);
|
2016-01-17 02:21:17 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/1246
|
|
|
|
// If the filter is valid, use the corrected version of the source
|
|
|
|
// string -- this ensure reverse-lookup will work fine.
|
|
|
|
this.f = normalizeRegexSource(this.f);
|
|
|
|
if ( this.f === '' ) {
|
2015-10-26 16:23:56 +01:00
|
|
|
console.error(
|
|
|
|
"uBlock Origin> discarding bad regular expression-based network filter '%s': '%s'",
|
|
|
|
raw,
|
2016-01-17 02:21:17 +01:00
|
|
|
normalizeRegexSource.message
|
2015-10-26 16:23:56 +01:00
|
|
|
);
|
|
|
|
this.unsupported = true;
|
|
|
|
}
|
2014-09-08 23:46:58 +02:00
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
2015-02-27 00:08:42 +01:00
|
|
|
// hostname-anchored
|
2015-12-15 16:40:40 +01:00
|
|
|
if ( s.startsWith('||') ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
this.anchor |= 0x4;
|
2015-12-13 18:55:55 +01:00
|
|
|
s = s.slice(2);
|
|
|
|
|
2015-02-27 00:08:42 +01:00
|
|
|
// convert hostname to punycode if needed
|
2017-05-09 14:58:30 +02:00
|
|
|
// https://github.com/gorhill/uBlock/issues/2599
|
2015-02-27 00:08:42 +01:00
|
|
|
if ( this.reHasUnicode.test(s) ) {
|
2019-05-22 23:51:03 +02:00
|
|
|
const matches = this.reIsolateHostname.exec(s);
|
2015-12-13 18:55:55 +01:00
|
|
|
if ( matches ) {
|
2017-05-09 14:58:30 +02:00
|
|
|
s = (matches[1] !== undefined ? matches[1] : '') +
|
|
|
|
punycode.toASCII(matches[2]) +
|
|
|
|
matches[3];
|
2015-02-27 00:08:42 +01:00
|
|
|
//console.debug('µBlock.staticNetFilteringEngine/FilterParser.parse():', raw, '=', s);
|
|
|
|
}
|
|
|
|
}
|
2015-03-26 20:16:48 +01:00
|
|
|
|
2015-04-07 03:26:05 +02:00
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/1096
|
2015-12-15 16:40:40 +01:00
|
|
|
if ( s.startsWith('^') ) {
|
2015-03-26 20:16:48 +01:00
|
|
|
this.unsupported = true;
|
|
|
|
return this;
|
|
|
|
}
|
2015-12-13 18:55:55 +01:00
|
|
|
|
|
|
|
// plain hostname? (from ABP filter list)
|
2016-06-27 03:15:18 +02:00
|
|
|
// https://github.com/gorhill/uBlock/issues/1757
|
2017-05-12 16:35:11 +02:00
|
|
|
// A filter can't be a pure-hostname one if there is a domain or csp
|
|
|
|
// option present.
|
|
|
|
if ( this.reHostnameRule2.test(s) ) {
|
2019-05-06 17:12:39 +02:00
|
|
|
if ( s.charCodeAt(s.length - 1) === 0x5E /* '^' */ ) {
|
|
|
|
s = s.slice(0, -1);
|
|
|
|
}
|
|
|
|
this.f = s.toLowerCase();
|
2019-06-19 16:00:19 +02:00
|
|
|
this.isPureHostname = true;
|
2015-12-13 18:55:55 +01:00
|
|
|
return this;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
|
|
|
// left-anchored
|
2017-05-12 16:35:11 +02:00
|
|
|
else if ( s.startsWith('|') ) {
|
|
|
|
this.anchor |= 0x2;
|
2014-06-24 00:42:43 +02:00
|
|
|
s = s.slice(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// right-anchored
|
2015-12-15 16:40:40 +01:00
|
|
|
if ( s.endsWith('|') ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
this.anchor |= 0x1;
|
2014-06-24 00:42:43 +02:00
|
|
|
s = s.slice(0, -1);
|
|
|
|
}
|
|
|
|
|
2017-05-30 17:38:45 +02:00
|
|
|
// https://github.com/gorhill/uBlock/issues/1669#issuecomment-224822448
|
|
|
|
// remove pointless leading *.
|
2017-09-18 19:06:36 +02:00
|
|
|
// https://github.com/gorhill/uBlock/issues/3034
|
|
|
|
// - We can remove anchoring if we need to match all at the start.
|
2017-05-30 17:38:45 +02:00
|
|
|
if ( s.startsWith('*') ) {
|
2018-03-07 16:37:18 +01:00
|
|
|
s = s.replace(/^\*+([^%0-9a-z])/i, '$1');
|
2017-09-18 19:06:36 +02:00
|
|
|
this.anchor &= ~0x6;
|
2017-05-30 17:38:45 +02:00
|
|
|
}
|
|
|
|
// remove pointless trailing *
|
2017-09-18 19:06:36 +02:00
|
|
|
// https://github.com/gorhill/uBlock/issues/3034
|
|
|
|
// - We can remove anchoring if we need to match all at the end.
|
2017-05-30 17:38:45 +02:00
|
|
|
if ( s.endsWith('*') ) {
|
2018-03-07 16:37:18 +01:00
|
|
|
s = s.replace(/([^%0-9a-z])\*+$/i, '$1');
|
2017-09-18 19:06:36 +02:00
|
|
|
this.anchor &= ~0x1;
|
2015-01-23 17:32:49 +01:00
|
|
|
}
|
2014-09-19 16:59:44 +02:00
|
|
|
|
2015-02-14 00:59:51 +01:00
|
|
|
// nothing left?
|
|
|
|
if ( s === '' ) {
|
2015-03-17 14:39:03 +01:00
|
|
|
s = '*';
|
2015-02-14 00:59:51 +01:00
|
|
|
}
|
2019-08-03 16:18:47 +02:00
|
|
|
// TODO: remove once redirect rules with `*/*` pattern are no longer used.
|
|
|
|
else if ( this.redirect !== 0 && s === '/' ) {
|
|
|
|
s = '*';
|
|
|
|
}
|
2015-02-14 00:59:51 +01:00
|
|
|
|
2015-12-11 12:36:28 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/1047
|
|
|
|
// Hostname-anchored makes no sense if matching all requests.
|
|
|
|
if ( s === '*' ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
this.anchor = 0;
|
2015-12-11 12:36:28 +01:00
|
|
|
}
|
|
|
|
|
2019-04-25 23:48:08 +02:00
|
|
|
this.wildcarded = reIsWildcarded.test(s);
|
2019-05-06 17:12:39 +02:00
|
|
|
this.f = s.toLowerCase();
|
2015-01-23 17:32:49 +01:00
|
|
|
|
|
|
|
return this;
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2015-03-02 16:41:51 +01:00
|
|
|
// Given a string, find a good token. Tokens which are too generic, i.e. very
|
|
|
|
// common with a high probability of ending up as a miss, are not
|
|
|
|
// good. Avoid if possible. This has a *significant* positive impact on
|
|
|
|
// performance.
|
|
|
|
// These "bad tokens" are collated manually.
|
|
|
|
|
2015-12-04 03:24:37 +01:00
|
|
|
// Hostname-anchored with no wildcard always have a token index of 0.
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const reGoodToken = /[%0-9a-z]{2,}/g;
|
|
|
|
const reRegexToken = /[%0-9A-Za-z]{2,}/g;
|
|
|
|
const reRegexTokenAbort = /[([]/;
|
|
|
|
const reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/;
|
2019-04-26 23:14:00 +02:00
|
|
|
const reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
|
|
|
|
const badTokens = new Set([
|
2017-05-12 16:35:11 +02:00
|
|
|
'com',
|
2019-04-16 12:52:13 +02:00
|
|
|
'google',
|
2017-05-12 16:35:11 +02:00
|
|
|
'http',
|
|
|
|
'https',
|
|
|
|
'icon',
|
|
|
|
'images',
|
|
|
|
'img',
|
|
|
|
'js',
|
|
|
|
'net',
|
|
|
|
'news',
|
|
|
|
'www'
|
|
|
|
]);
|
2015-03-02 16:41:51 +01:00
|
|
|
|
2017-07-11 19:57:31 +02:00
|
|
|
FilterParser.prototype.findFirstGoodToken = function() {
|
2015-03-02 16:41:51 +01:00
|
|
|
reGoodToken.lastIndex = 0;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const s = this.f;
|
|
|
|
let matches;
|
|
|
|
let badTokenMatch = null;
|
2017-05-12 16:35:11 +02:00
|
|
|
while ( (matches = reGoodToken.exec(s)) !== null ) {
|
2015-12-03 16:06:06 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/997
|
|
|
|
// Ignore token if preceded by wildcard.
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const lpos = matches.index;
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( lpos !== 0 && s.charCodeAt(lpos - 1) === 0x2A /* '*' */ ) {
|
2015-12-03 16:06:06 +01:00
|
|
|
continue;
|
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( s.charCodeAt(reGoodToken.lastIndex) === 0x2A /* '*' */ ) {
|
2015-03-02 22:22:23 +01:00
|
|
|
continue;
|
|
|
|
}
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( badTokens.has(matches[0]) ) {
|
2015-12-04 03:24:37 +01:00
|
|
|
if ( badTokenMatch === null ) {
|
|
|
|
badTokenMatch = matches;
|
|
|
|
}
|
2015-03-02 16:41:51 +01:00
|
|
|
continue;
|
|
|
|
}
|
2015-03-02 22:22:23 +01:00
|
|
|
return matches;
|
|
|
|
}
|
2015-12-04 03:24:37 +01:00
|
|
|
return badTokenMatch;
|
2015-03-02 16:41:51 +01:00
|
|
|
};
|
|
|
|
|
2017-07-11 19:57:31 +02:00
|
|
|
FilterParser.prototype.extractTokenFromRegex = function() {
|
2017-07-11 21:04:25 +02:00
|
|
|
reRegexToken.lastIndex = 0;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const s = this.f;
|
|
|
|
let matches;
|
2017-07-11 21:04:25 +02:00
|
|
|
while ( (matches = reRegexToken.exec(s)) !== null ) {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const prefix = s.slice(0, matches.index);
|
2017-07-11 21:04:25 +02:00
|
|
|
if ( reRegexTokenAbort.test(prefix) ) { return; }
|
|
|
|
if (
|
|
|
|
reRegexBadPrefix.test(prefix) ||
|
|
|
|
reRegexBadSuffix.test(s.slice(reRegexToken.lastIndex))
|
|
|
|
) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
this.token = matches[0].toLowerCase();
|
|
|
|
this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token);
|
|
|
|
this.tokenBeg = matches.index;
|
|
|
|
if ( badTokens.has(this.token) === false ) { break; }
|
|
|
|
}
|
2017-07-11 19:57:31 +02:00
|
|
|
};
|
|
|
|
|
2015-03-02 22:22:23 +01:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2017-07-11 18:21:08 +02:00
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/1038
|
|
|
|
// Single asterisk will match any URL.
|
|
|
|
|
|
|
|
// https://github.com/gorhill/uBlock/issues/2781
|
|
|
|
// For efficiency purpose, try to extract a token from a regex-based filter.
|
|
|
|
|
2015-01-23 17:32:49 +01:00
|
|
|
FilterParser.prototype.makeToken = function() {
|
2017-07-11 18:21:08 +02:00
|
|
|
if ( this.isRegex ) {
|
2017-07-11 19:57:31 +02:00
|
|
|
this.extractTokenFromRegex();
|
2017-07-11 18:21:08 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( this.f === '*' ) { return; }
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
let matches = this.findFirstGoodToken();
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( matches !== null ) {
|
2015-12-04 03:24:37 +01:00
|
|
|
this.token = matches[0];
|
2017-05-19 14:45:19 +02:00
|
|
|
this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token);
|
2015-01-23 17:32:49 +01:00
|
|
|
this.tokenBeg = matches.index;
|
|
|
|
}
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
FilterParser.prototype.isJustOrigin = function() {
|
2019-05-24 17:48:19 +02:00
|
|
|
return this.dataType === undefined &&
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.domainOpt !== '' &&
|
2019-08-03 16:18:47 +02:00
|
|
|
/^(?:\*|http[s*]?:(?:\/\/)?)$/.test(this.f) &&
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.domainOpt.indexOf('~') === -1;
|
|
|
|
};
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
const FilterContainer = function() {
|
2014-06-24 00:42:43 +02:00
|
|
|
this.filterParser = new FilterParser();
|
2015-12-29 17:34:41 +01:00
|
|
|
this.urlTokenizer = µb.urlTokenizer;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.noTokenHash = this.urlTokenizer.noTokenHash;
|
|
|
|
this.dotTokenHash = this.urlTokenizer.dotTokenHash;
|
|
|
|
this.anyTokenHash = this.urlTokenizer.anyTokenHash;
|
|
|
|
this.anyHTTPSTokenHash = this.urlTokenizer.anyHTTPSTokenHash;
|
|
|
|
this.anyHTTPTokenHash = this.urlTokenizer.anyHTTPTokenHash;
|
2014-07-20 21:00:26 +02:00
|
|
|
this.reset();
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
// Reset all, thus reducing to a minimum memory footprint of the context.
|
|
|
|
|
|
|
|
FilterContainer.prototype.reset = function() {
|
|
|
|
this.frozen = false;
|
2014-06-24 00:42:43 +02:00
|
|
|
this.processedFilterCount = 0;
|
2014-07-16 16:43:34 +02:00
|
|
|
this.acceptedCount = 0;
|
2014-09-08 23:46:58 +02:00
|
|
|
this.rejectedCount = 0;
|
2014-06-24 00:42:43 +02:00
|
|
|
this.allowFilterCount = 0;
|
|
|
|
this.blockFilterCount = 0;
|
2016-03-17 18:56:21 +01:00
|
|
|
this.discardedCount = 0;
|
2018-10-23 19:01:08 +02:00
|
|
|
this.goodFilters = new Set();
|
2017-03-11 19:55:47 +01:00
|
|
|
this.badFilters = new Set();
|
2016-09-12 16:22:25 +02:00
|
|
|
this.categories = new Map();
|
2017-05-12 16:35:11 +02:00
|
|
|
this.dataFilters = new Map();
|
2014-07-20 21:00:26 +02:00
|
|
|
this.filterParser.reset();
|
2019-04-26 23:14:00 +02:00
|
|
|
this.urlTokenizer.resetKnownTokens();
|
2015-06-09 16:27:08 +02:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// This will invalidate all tries
|
2018-12-04 19:02:09 +01:00
|
|
|
FilterHostnameDict.reset();
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
filterOrigin.reset();
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterBucket.reset();
|
2018-11-03 12:58:46 +01:00
|
|
|
|
2015-06-09 16:27:08 +02:00
|
|
|
// Runtime registers
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.catbitsRegister = 0;
|
|
|
|
this.tokenRegister = 0;
|
|
|
|
this.filterRegister = null;
|
2014-07-20 21:00:26 +02:00
|
|
|
};
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2014-07-20 21:00:26 +02:00
|
|
|
/******************************************************************************/
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2014-07-20 21:00:26 +02:00
|
|
|
FilterContainer.prototype.freeze = function() {
|
2019-04-14 22:23:52 +02:00
|
|
|
const filterPairId = FilterPair.fid;
|
|
|
|
const filterBucketId = FilterBucket.fid;
|
|
|
|
const filterDataHolderId = FilterDataHolder.fid;
|
|
|
|
const redirectTypeValue = typeNameToTypeValue.redirect;
|
|
|
|
const unserialize = µb.CompiledLineIO.unserialize;
|
2018-10-23 19:01:08 +02:00
|
|
|
|
2018-12-04 19:02:09 +01:00
|
|
|
for ( const line of this.goodFilters ) {
|
2018-10-24 11:55:04 +02:00
|
|
|
if ( this.badFilters.has(line) ) {
|
|
|
|
this.discardedCount += 1;
|
|
|
|
continue;
|
|
|
|
}
|
2018-10-23 19:01:08 +02:00
|
|
|
|
2018-12-04 19:02:09 +01:00
|
|
|
const args = unserialize(line);
|
|
|
|
const bits = args[0];
|
2018-10-23 19:01:08 +02:00
|
|
|
|
|
|
|
// Special cases: delegate to more specialized engines.
|
|
|
|
// Redirect engine.
|
|
|
|
if ( (bits & 0x1F0) === redirectTypeValue ) {
|
|
|
|
µb.redirectEngine.fromCompiledRule(args[1]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Plain static filters.
|
2018-12-04 19:02:09 +01:00
|
|
|
const tokenHash = args[1];
|
|
|
|
const fdata = args[2];
|
2018-10-23 19:01:08 +02:00
|
|
|
|
|
|
|
// Special treatment: data-holding filters are stored separately
|
|
|
|
// because they require special matching algorithm (unlike other
|
|
|
|
// filters, ALL hits must be reported).
|
|
|
|
if ( fdata[0] === filterDataHolderId ) {
|
|
|
|
let entry = new FilterDataHolderEntry(bits, tokenHash, fdata);
|
|
|
|
let bucket = this.dataFilters.get(tokenHash);
|
|
|
|
if ( bucket !== undefined ) {
|
|
|
|
entry.next = bucket;
|
|
|
|
}
|
|
|
|
this.dataFilters.set(tokenHash, entry);
|
Increase resolution of known-token lookup table
Related commit:
- https://github.com/gorhill/uBlock/commit/69a43e07c4bc017f3320a669c1e80147c17dddcf
Using 32 bits of token hash rather than just the 16 lower
bits does help discard more unknown tokens.
Using the default filter lists, the known-token lookup
table is populated by 12,276 entries, out of 65,536, thus
making the case that theoretically there is a lot of
possible tokens which can be discarded.
In practice, running the built-in
staticNetFilteringEngine.benchmark() with default filter
lists, I find that 1,518,929 tokens were skipped out of
4,441,891 extracted tokens, or 34%.
2019-04-27 14:18:01 +02:00
|
|
|
this.urlTokenizer.addKnownToken(tokenHash);
|
2018-10-23 19:01:08 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
let bucket = this.categories.get(bits);
|
|
|
|
if ( bucket === undefined ) {
|
|
|
|
bucket = new Map();
|
|
|
|
this.categories.set(bits, bucket);
|
|
|
|
}
|
|
|
|
let entry = bucket.get(tokenHash);
|
|
|
|
|
|
|
|
if ( tokenHash === this.dotTokenHash ) {
|
|
|
|
if ( entry === undefined ) {
|
|
|
|
entry = new FilterHostnameDict();
|
|
|
|
bucket.set(this.dotTokenHash, entry);
|
|
|
|
}
|
|
|
|
entry.add(fdata);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
if ( tokenHash === this.anyTokenHash ) {
|
|
|
|
if ( entry === undefined ) {
|
|
|
|
entry = new FilterJustOrigin();
|
|
|
|
bucket.set(this.anyTokenHash, entry);
|
|
|
|
}
|
|
|
|
entry.add(fdata);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( tokenHash === this.anyHTTPSTokenHash ) {
|
|
|
|
if ( entry === undefined ) {
|
|
|
|
entry = new FilterHTTPSJustOrigin();
|
|
|
|
bucket.set(this.anyHTTPSTokenHash, entry);
|
|
|
|
}
|
|
|
|
entry.add(fdata);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( tokenHash === this.anyHTTPTokenHash ) {
|
|
|
|
if ( entry === undefined ) {
|
|
|
|
entry = new FilterHTTPJustOrigin();
|
|
|
|
bucket.set(this.anyHTTPTokenHash, entry);
|
|
|
|
}
|
|
|
|
entry.add(fdata);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
Increase resolution of known-token lookup table
Related commit:
- https://github.com/gorhill/uBlock/commit/69a43e07c4bc017f3320a669c1e80147c17dddcf
Using 32 bits of token hash rather than just the 16 lower
bits does help discard more unknown tokens.
Using the default filter lists, the known-token lookup
table is populated by 12,276 entries, out of 65,536, thus
making the case that theoretically there is a lot of
possible tokens which can be discarded.
In practice, running the built-in
staticNetFilteringEngine.benchmark() with default filter
lists, I find that 1,518,929 tokens were skipped out of
4,441,891 extracted tokens, or 34%.
2019-04-27 14:18:01 +02:00
|
|
|
this.urlTokenizer.addKnownToken(tokenHash);
|
2019-04-26 23:14:00 +02:00
|
|
|
|
2018-10-23 19:01:08 +02:00
|
|
|
if ( entry === undefined ) {
|
|
|
|
bucket.set(tokenHash, filterFromCompiledData(fdata));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( entry.fid === filterBucketId ) {
|
|
|
|
entry.add(fdata);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( entry.fid === filterPairId ) {
|
|
|
|
bucket.set(
|
|
|
|
tokenHash,
|
|
|
|
entry.upgrade(filterFromCompiledData(fdata))
|
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
bucket.set(
|
|
|
|
tokenHash,
|
|
|
|
new FilterPair(entry, filterFromCompiledData(fdata))
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2014-07-20 21:00:26 +02:00
|
|
|
this.filterParser.reset();
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.badFilters.clear();
|
|
|
|
this.goodFilters.clear();
|
2018-12-04 19:02:09 +01:00
|
|
|
FilterHostnameDict.optimize();
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterBucket.optimize();
|
2014-07-20 21:00:26 +02:00
|
|
|
this.frozen = true;
|
2017-01-06 18:39:37 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2018-11-03 12:58:46 +01:00
|
|
|
// This is necessary for when the filtering engine readiness will depend
|
|
|
|
// on asynchronous operations (ex.: when loading a wasm module).
|
|
|
|
|
|
|
|
FilterContainer.prototype.readyToUse = function() {
|
2018-12-04 19:02:09 +01:00
|
|
|
return Promise.resolve();
|
2018-11-03 12:58:46 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-02-14 19:33:55 +01:00
|
|
|
FilterContainer.prototype.toSelfie = function(path) {
|
|
|
|
const categoriesToSelfie = function(categoryMap) {
|
|
|
|
const selfie = [];
|
|
|
|
for ( const [ catbits, bucket ] of categoryMap ) {
|
|
|
|
const tokenEntries = [];
|
|
|
|
for ( const [ token, filter ] of bucket ) {
|
2019-05-17 16:13:58 +02:00
|
|
|
tokenEntries.push([ token, filter.compile(true) ]);
|
2017-05-25 23:46:59 +02:00
|
|
|
}
|
2019-02-14 19:33:55 +01:00
|
|
|
selfie.push([ catbits, tokenEntries ]);
|
2014-09-08 23:46:58 +02:00
|
|
|
}
|
2018-06-01 13:54:31 +02:00
|
|
|
return selfie;
|
2014-09-08 23:46:58 +02:00
|
|
|
};
|
|
|
|
|
2019-02-14 19:33:55 +01:00
|
|
|
const dataFiltersToSelfie = function(dataFilters) {
|
|
|
|
const selfie = [];
|
2018-06-01 13:54:31 +02:00
|
|
|
for ( let entry of dataFilters.values() ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
do {
|
2019-05-17 16:13:58 +02:00
|
|
|
selfie.push(entry.compile(true));
|
2017-05-12 16:35:11 +02:00
|
|
|
entry = entry.next;
|
|
|
|
} while ( entry !== undefined );
|
|
|
|
}
|
2018-06-01 13:54:31 +02:00
|
|
|
return selfie;
|
2017-05-12 16:35:11 +02:00
|
|
|
};
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
filterOrigin.optimize();
|
|
|
|
|
2019-02-14 19:33:55 +01:00
|
|
|
return Promise.all([
|
|
|
|
µBlock.assets.put(
|
2019-04-14 22:23:52 +02:00
|
|
|
`${path}/FilterHostnameDict.trieContainer`,
|
2019-04-20 15:06:54 +02:00
|
|
|
FilterHostnameDict.trieContainer.serialize(µBlock.base64)
|
2019-02-14 19:33:55 +01:00
|
|
|
),
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
µBlock.assets.put(
|
|
|
|
`${path}/FilterOrigin.trieContainer`,
|
2019-04-20 15:06:54 +02:00
|
|
|
filterOrigin.trieContainer.serialize(µBlock.base64)
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
),
|
2019-04-14 22:23:52 +02:00
|
|
|
µBlock.assets.put(
|
|
|
|
`${path}/FilterBucket.trieContainer`,
|
2019-04-20 15:06:54 +02:00
|
|
|
FilterBucket.trieContainer.serialize(µBlock.base64)
|
2019-04-14 22:23:52 +02:00
|
|
|
),
|
2019-02-14 19:33:55 +01:00
|
|
|
µBlock.assets.put(
|
|
|
|
`${path}/main`,
|
|
|
|
JSON.stringify({
|
|
|
|
processedFilterCount: this.processedFilterCount,
|
|
|
|
acceptedCount: this.acceptedCount,
|
|
|
|
rejectedCount: this.rejectedCount,
|
|
|
|
allowFilterCount: this.allowFilterCount,
|
|
|
|
blockFilterCount: this.blockFilterCount,
|
|
|
|
discardedCount: this.discardedCount,
|
|
|
|
categories: categoriesToSelfie(this.categories),
|
|
|
|
dataFilters: dataFiltersToSelfie(this.dataFilters),
|
2019-04-26 23:14:00 +02:00
|
|
|
urlTokenizer: this.urlTokenizer.toSelfie(),
|
2019-05-17 16:13:58 +02:00
|
|
|
filterOriginStrSlots: filterOrigin.strSlots,
|
2019-02-14 19:33:55 +01:00
|
|
|
})
|
|
|
|
)
|
|
|
|
]);
|
2014-09-08 23:46:58 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-02-14 19:33:55 +01:00
|
|
|
FilterContainer.prototype.fromSelfie = function(path) {
|
|
|
|
return Promise.all([
|
2019-04-20 15:06:54 +02:00
|
|
|
µBlock.assets.get(`${path}/FilterHostnameDict.trieContainer`).then(details =>
|
2019-02-14 19:33:55 +01:00
|
|
|
FilterHostnameDict.trieContainer.unserialize(
|
|
|
|
details.content,
|
2019-04-20 15:06:54 +02:00
|
|
|
µBlock.base64
|
|
|
|
)
|
|
|
|
),
|
|
|
|
µBlock.assets.get(`${path}/FilterOrigin.trieContainer`).then(details =>
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
filterOrigin.trieContainer.unserialize(
|
|
|
|
details.content,
|
2019-04-20 15:06:54 +02:00
|
|
|
µBlock.base64
|
|
|
|
)
|
|
|
|
),
|
|
|
|
µBlock.assets.get(`${path}/FilterBucket.trieContainer`).then(details =>
|
2019-04-14 22:23:52 +02:00
|
|
|
FilterBucket.trieContainer.unserialize(
|
|
|
|
details.content,
|
2019-04-20 15:06:54 +02:00
|
|
|
µBlock.base64
|
|
|
|
)
|
|
|
|
),
|
2019-02-14 19:33:55 +01:00
|
|
|
µBlock.assets.get(`${path}/main`).then(details => {
|
|
|
|
let selfie;
|
|
|
|
try {
|
|
|
|
selfie = JSON.parse(details.content);
|
|
|
|
} catch (ex) {
|
|
|
|
}
|
|
|
|
if ( selfie instanceof Object === false ) { return false; }
|
|
|
|
this.frozen = true;
|
|
|
|
this.processedFilterCount = selfie.processedFilterCount;
|
|
|
|
this.acceptedCount = selfie.acceptedCount;
|
|
|
|
this.rejectedCount = selfie.rejectedCount;
|
|
|
|
this.allowFilterCount = selfie.allowFilterCount;
|
|
|
|
this.blockFilterCount = selfie.blockFilterCount;
|
|
|
|
this.discardedCount = selfie.discardedCount;
|
2019-04-26 23:14:00 +02:00
|
|
|
this.urlTokenizer.fromSelfie(selfie.urlTokenizer);
|
2019-05-17 16:13:58 +02:00
|
|
|
filterOrigin.strSlots = selfie.filterOriginStrSlots;
|
2019-02-14 19:33:55 +01:00
|
|
|
for ( const [ catbits, bucket ] of selfie.categories ) {
|
|
|
|
const tokenMap = new Map();
|
|
|
|
for ( const [ token, fdata ] of bucket ) {
|
|
|
|
tokenMap.set(token, filterFromCompiledData(fdata));
|
|
|
|
}
|
|
|
|
this.categories.set(catbits, tokenMap);
|
|
|
|
}
|
|
|
|
for ( const dataEntry of selfie.dataFilters ) {
|
|
|
|
const entry = FilterDataHolderEntry.load(dataEntry);
|
|
|
|
const bucket = this.dataFilters.get(entry.tokenHash);
|
|
|
|
if ( bucket !== undefined ) {
|
|
|
|
entry.next = bucket;
|
|
|
|
}
|
|
|
|
this.dataFilters.set(entry.tokenHash, entry);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}),
|
|
|
|
]).then(results =>
|
|
|
|
results.reduce((acc, v) => acc && v, true)
|
|
|
|
);
|
2015-12-05 18:25:18 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2017-05-25 23:46:59 +02:00
|
|
|
FilterContainer.prototype.compile = function(raw, writer) {
|
2014-06-24 00:42:43 +02:00
|
|
|
// ORDER OF TESTS IS IMPORTANT!
|
|
|
|
|
|
|
|
// Ignore empty lines
|
2018-12-13 18:30:54 +01:00
|
|
|
const s = raw.trim();
|
|
|
|
if ( s.length === 0 ) { return false; }
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2018-12-13 18:30:54 +01:00
|
|
|
const parsed = this.filterParser.parse(s);
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2015-01-23 17:32:49 +01:00
|
|
|
// Ignore element-hiding filters
|
|
|
|
if ( parsed.elemHiding ) {
|
2014-09-08 23:46:58 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-01-23 17:32:49 +01:00
|
|
|
// Ignore filters with unsupported options
|
|
|
|
if ( parsed.unsupported ) {
|
2018-12-15 16:46:17 +01:00
|
|
|
const who = writer.properties.get('assetKey') || '?';
|
2018-12-13 18:30:54 +01:00
|
|
|
µb.logger.writeOne({
|
2019-01-12 22:36:20 +01:00
|
|
|
realm: 'message',
|
|
|
|
type: 'error',
|
|
|
|
text: `Invalid network filter in ${who}: ${raw}`
|
2018-12-13 18:30:54 +01:00
|
|
|
});
|
2014-06-24 00:42:43 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-08-03 16:18:47 +02:00
|
|
|
// Redirect rule
|
|
|
|
if ( parsed.redirect !== 0 ) {
|
|
|
|
const result = this.compileRedirectRule(parsed, writer);
|
|
|
|
if ( result === false ) {
|
|
|
|
const who = writer.properties.get('assetKey') || '?';
|
|
|
|
µb.logger.writeOne({
|
|
|
|
realm: 'message',
|
|
|
|
type: 'error',
|
|
|
|
text: `Invalid redirect rule in ${who}: ${raw}`
|
|
|
|
});
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if ( parsed.redirect === 2 ) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-11 19:55:47 +01:00
|
|
|
// Pure hostnames, use more efficient dictionary lookup
|
2015-04-07 03:26:05 +02:00
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/665
|
2015-02-05 00:06:31 +01:00
|
|
|
// Create a dict keyed on request type etc.
|
2017-05-12 16:35:11 +02:00
|
|
|
if (
|
2019-06-19 16:00:19 +02:00
|
|
|
parsed.isPureHostname &&
|
2017-05-12 16:35:11 +02:00
|
|
|
parsed.domainOpt === '' &&
|
2018-07-19 15:40:39 +02:00
|
|
|
parsed.dataType === undefined
|
2017-05-12 16:35:11 +02:00
|
|
|
) {
|
2018-07-19 15:40:39 +02:00
|
|
|
parsed.tokenHash = this.dotTokenHash;
|
|
|
|
this.compileToAtomicFilter(parsed, parsed.f, writer);
|
2015-02-05 00:06:31 +01:00
|
|
|
return true;
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
parsed.makeToken();
|
|
|
|
|
2018-12-13 18:30:54 +01:00
|
|
|
let fdata;
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( parsed.isRegex ) {
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterRegex.compile(parsed);
|
2019-06-19 16:00:19 +02:00
|
|
|
} else if ( parsed.isPureHostname ) {
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterPlainHostname.compile(parsed);
|
2017-05-12 16:35:11 +02:00
|
|
|
} else if ( parsed.f === '*' ) {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
if ( parsed.isJustOrigin() ) {
|
|
|
|
parsed.tokenHash = this.anyTokenHash;
|
|
|
|
for ( const hn of parsed.domainOpt.split('|') ) {
|
|
|
|
this.compileToAtomicFilter(parsed, hn, writer);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterTrue.compile();
|
2017-05-12 16:35:11 +02:00
|
|
|
} else if ( parsed.anchor === 0x5 ) {
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterGenericHnAndRightAnchored.compile(parsed);
|
2017-05-30 17:38:45 +02:00
|
|
|
} else if ( parsed.anchor === 0x4 ) {
|
|
|
|
if (
|
2019-04-16 12:52:13 +02:00
|
|
|
parsed.wildcarded === false &&
|
2019-06-19 01:16:39 +02:00
|
|
|
parsed.tokenHash !== parsed.noTokenHash
|
2017-05-30 17:38:45 +02:00
|
|
|
) {
|
|
|
|
fdata = FilterPlainHnAnchored.compile(parsed);
|
|
|
|
} else {
|
|
|
|
fdata = FilterGenericHnAnchored.compile(parsed);
|
|
|
|
}
|
2019-08-03 16:18:47 +02:00
|
|
|
} else if ( parsed.anchor === 0x2 && parsed.isJustOrigin() ) {
|
|
|
|
const hostnames = parsed.domainOpt.split('|');
|
|
|
|
const isHTTPS = parsed.f === 'https://' || parsed.f === 'http*://';
|
|
|
|
const isHTTP = parsed.f === 'http://' || parsed.f === 'http*://';
|
|
|
|
for ( const hn of hostnames ) {
|
|
|
|
if ( isHTTPS ) {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
parsed.tokenHash = this.anyHTTPSTokenHash;
|
2019-08-03 16:18:47 +02:00
|
|
|
this.compileToAtomicFilter(parsed, hn, writer);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
2019-08-03 16:18:47 +02:00
|
|
|
if ( isHTTP ) {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
parsed.tokenHash = this.anyHTTPTokenHash;
|
2019-08-03 16:18:47 +02:00
|
|
|
this.compileToAtomicFilter(parsed, hn, writer);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
|
|
|
}
|
2019-08-03 16:18:47 +02:00
|
|
|
return true;
|
|
|
|
} else if ( parsed.wildcarded || parsed.tokenHash === parsed.noTokenHash ) {
|
|
|
|
fdata = FilterGeneric.compile(parsed);
|
|
|
|
} else if ( parsed.anchor === 0x2 ) {
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterPlainLeftAnchored.compile(parsed);
|
2017-05-12 16:35:11 +02:00
|
|
|
} else if ( parsed.anchor === 0x1 ) {
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterPlainRightAnchored.compile(parsed);
|
2017-10-09 15:28:28 +02:00
|
|
|
} else if ( parsed.anchor === 0x3 ) {
|
|
|
|
fdata = FilterExactMatch.compile(parsed);
|
2017-05-12 16:35:11 +02:00
|
|
|
} else {
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterPlain.compile(parsed);
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
|
|
|
|
2017-05-25 23:46:59 +02:00
|
|
|
if ( parsed.domainOpt !== '' ) {
|
2019-02-16 18:16:30 +01:00
|
|
|
fdata = filterOrigin.compile(parsed, fdata);
|
2017-05-25 23:46:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if ( parsed.dataType !== undefined ) {
|
2019-02-16 18:16:30 +01:00
|
|
|
let fwrapped = fdata;
|
2017-05-25 23:46:59 +02:00
|
|
|
fdata = FilterDataHolder.compile(parsed);
|
|
|
|
fdata.push(fwrapped);
|
|
|
|
}
|
|
|
|
|
2018-07-19 15:40:39 +02:00
|
|
|
this.compileToAtomicFilter(parsed, fdata, writer);
|
2017-05-12 16:35:11 +02:00
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2018-07-19 15:40:39 +02:00
|
|
|
FilterContainer.prototype.compileToAtomicFilter = function(
|
|
|
|
parsed,
|
|
|
|
fdata,
|
|
|
|
writer
|
|
|
|
) {
|
2018-10-23 19:01:08 +02:00
|
|
|
|
|
|
|
// 0 = network filters
|
|
|
|
// 1 = network filters: bad filters
|
2019-08-03 16:18:47 +02:00
|
|
|
writer.select(parsed.badFilter ? 1 : 0);
|
2018-10-23 19:01:08 +02:00
|
|
|
|
2019-05-21 20:04:21 +02:00
|
|
|
const descBits = parsed.action | parsed.important | parsed.party;
|
|
|
|
let typeBits = parsed.types;
|
2018-07-19 15:40:39 +02:00
|
|
|
|
|
|
|
// Typeless
|
2019-05-21 20:04:21 +02:00
|
|
|
if ( typeBits === 0 ) {
|
2017-05-25 23:46:59 +02:00
|
|
|
writer.push([ descBits, parsed.tokenHash, fdata ]);
|
2015-03-26 00:28:22 +01:00
|
|
|
return;
|
|
|
|
}
|
2018-07-19 15:40:39 +02:00
|
|
|
|
2019-05-21 20:04:21 +02:00
|
|
|
// If all network types are set, create a typeless filter
|
|
|
|
if ( (typeBits & allNetworkTypesBits) === allNetworkTypesBits ) {
|
2019-05-20 19:46:36 +02:00
|
|
|
writer.push([ descBits, parsed.tokenHash, fdata ]);
|
2019-05-21 20:04:21 +02:00
|
|
|
typeBits &= ~allNetworkTypesBits;
|
2019-05-20 19:46:36 +02:00
|
|
|
}
|
|
|
|
|
2019-05-21 20:04:21 +02:00
|
|
|
// One filter per specific types
|
2018-07-19 15:40:39 +02:00
|
|
|
let bitOffset = 1;
|
2015-03-26 00:28:22 +01:00
|
|
|
do {
|
2019-05-21 20:04:21 +02:00
|
|
|
if ( typeBits & 1 ) {
|
2017-05-25 23:46:59 +02:00
|
|
|
writer.push([ descBits | (bitOffset << 4), parsed.tokenHash, fdata ]);
|
2015-01-24 03:47:56 +01:00
|
|
|
}
|
|
|
|
bitOffset += 1;
|
2019-05-21 20:04:21 +02:00
|
|
|
typeBits >>>= 1;
|
|
|
|
} while ( typeBits !== 0 );
|
2019-08-03 16:18:47 +02:00
|
|
|
};
|
2015-11-24 01:18:25 +01:00
|
|
|
|
2019-08-03 16:18:47 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
FilterContainer.prototype.compileRedirectRule = function(parsed, writer) {
|
|
|
|
const redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
|
|
|
|
if ( Array.isArray(redirects) === false ) { return false; }
|
|
|
|
writer.select(parsed.badFilter ? 1 : 0);
|
|
|
|
const type = typeNameToTypeValue.redirect;
|
|
|
|
for ( const redirect of redirects ) {
|
|
|
|
writer.push([ type, redirect ]);
|
2015-11-24 01:18:25 +01:00
|
|
|
}
|
2019-08-03 16:18:47 +02:00
|
|
|
return true;
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2017-05-25 23:46:59 +02:00
|
|
|
FilterContainer.prototype.fromCompiledContent = function(reader) {
|
2017-12-28 19:49:02 +01:00
|
|
|
// 0 = network filters
|
|
|
|
reader.select(0);
|
2018-10-23 19:01:08 +02:00
|
|
|
while ( reader.next() ) {
|
2018-10-24 11:55:04 +02:00
|
|
|
this.acceptedCount += 1;
|
2018-10-23 19:01:08 +02:00
|
|
|
if ( this.goodFilters.has(reader.line) ) {
|
|
|
|
this.discardedCount += 1;
|
2018-10-24 11:55:04 +02:00
|
|
|
} else {
|
|
|
|
this.goodFilters.add(reader.line);
|
2015-11-24 01:18:25 +01:00
|
|
|
}
|
2018-10-23 19:01:08 +02:00
|
|
|
}
|
2015-02-24 00:31:29 +01:00
|
|
|
|
2018-10-25 01:06:27 +02:00
|
|
|
// 1 = network filters: bad filter directives
|
2018-10-23 19:01:08 +02:00
|
|
|
reader.select(1);
|
|
|
|
while ( reader.next() ) {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.badFilters.add(reader.line);
|
2017-03-11 19:55:47 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-05-02 00:54:11 +02:00
|
|
|
FilterContainer.prototype.matchAndFetchData = function(
|
|
|
|
dataType,
|
|
|
|
requestURL,
|
|
|
|
out,
|
|
|
|
outlog
|
|
|
|
) {
|
2019-04-06 21:42:24 +02:00
|
|
|
if ( this.dataFilters.size === 0 ) { return; }
|
2015-06-09 16:27:08 +02:00
|
|
|
|
2019-05-02 00:54:11 +02:00
|
|
|
const url = this.urlTokenizer.setURL(requestURL);
|
2015-06-09 16:27:08 +02:00
|
|
|
|
2019-05-02 00:54:11 +02:00
|
|
|
pageHostnameRegister = requestHostnameRegister =
|
|
|
|
µb.URI.hostnameFromURI(url);
|
2015-06-09 16:27:08 +02:00
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
// We need to visit ALL the matching filters.
|
2019-05-02 00:54:11 +02:00
|
|
|
const toAddImportant = new Map();
|
|
|
|
const toAdd = new Map();
|
|
|
|
const toRemove = new Map();
|
2015-06-09 16:27:08 +02:00
|
|
|
|
2019-04-26 23:14:00 +02:00
|
|
|
const tokenHashes = this.urlTokenizer.getTokens();
|
|
|
|
let i = 0;
|
2017-05-19 14:45:19 +02:00
|
|
|
while ( i < 32 ) {
|
2019-05-02 00:54:11 +02:00
|
|
|
const tokenHash = tokenHashes[i++];
|
2017-05-19 14:45:19 +02:00
|
|
|
if ( tokenHash === 0 ) { break; }
|
2019-05-02 00:54:11 +02:00
|
|
|
const tokenOffset = tokenHashes[i++];
|
2018-11-03 12:58:46 +01:00
|
|
|
let entry = this.dataFilters.get(tokenHash);
|
2017-05-12 16:35:11 +02:00
|
|
|
while ( entry !== undefined ) {
|
2019-05-02 00:54:11 +02:00
|
|
|
const f = entry.filter;
|
2017-05-19 14:45:19 +02:00
|
|
|
if ( f.match(url, tokenOffset) === true ) {
|
|
|
|
if ( entry.categoryBits & 0x001 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
toRemove.set(f.dataStr, entry);
|
2017-05-19 14:45:19 +02:00
|
|
|
} else if ( entry.categoryBits & 0x002 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
toAddImportant.set(f.dataStr, entry);
|
|
|
|
} else {
|
|
|
|
toAdd.set(f.dataStr, entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
entry = entry.next;
|
|
|
|
}
|
|
|
|
}
|
2018-11-03 12:58:46 +01:00
|
|
|
let entry = this.dataFilters.get(this.noTokenHash);
|
2017-05-12 16:35:11 +02:00
|
|
|
while ( entry !== undefined ) {
|
2019-05-02 00:54:11 +02:00
|
|
|
const f = entry.filter;
|
2017-05-19 14:45:19 +02:00
|
|
|
if ( f.match(url) === true ) {
|
|
|
|
if ( entry.categoryBits & 0x001 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
toRemove.set(f.dataStr, entry);
|
2017-05-19 14:45:19 +02:00
|
|
|
} else if ( entry.categoryBits & 0x002 ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
toAddImportant.set(f.dataStr, entry);
|
|
|
|
} else {
|
|
|
|
toAdd.set(f.dataStr, entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
entry = entry.next;
|
2015-06-09 16:27:08 +02:00
|
|
|
}
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; }
|
|
|
|
|
|
|
|
// Remove entries overriden by other filters.
|
2019-05-02 00:54:11 +02:00
|
|
|
for ( const key of toAddImportant.keys() ) {
|
2017-05-19 16:12:55 +02:00
|
|
|
toAdd.delete(key);
|
|
|
|
toRemove.delete(key);
|
2015-06-09 16:27:08 +02:00
|
|
|
}
|
2019-05-02 00:54:11 +02:00
|
|
|
for ( const key of toRemove.keys() ) {
|
2017-05-19 16:12:55 +02:00
|
|
|
if ( key === '' ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
toAdd.clear();
|
|
|
|
break;
|
|
|
|
}
|
2017-05-19 16:12:55 +02:00
|
|
|
toAdd.delete(key);
|
2015-06-09 16:27:08 +02:00
|
|
|
}
|
|
|
|
|
2019-05-02 00:54:11 +02:00
|
|
|
for ( const entry of toAddImportant ) {
|
2017-05-19 16:12:55 +02:00
|
|
|
out.push(entry[0]);
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( outlog === undefined ) { continue; }
|
2018-11-03 12:58:46 +01:00
|
|
|
let logData = entry[1].logData();
|
2017-05-12 16:35:11 +02:00
|
|
|
logData.source = 'static';
|
|
|
|
logData.result = 1;
|
|
|
|
outlog.push(logData);
|
|
|
|
}
|
2019-05-02 00:54:11 +02:00
|
|
|
for ( const entry of toAdd ) {
|
2017-05-19 16:12:55 +02:00
|
|
|
out.push(entry[0]);
|
2017-05-12 16:35:11 +02:00
|
|
|
if ( outlog === undefined ) { continue; }
|
2018-11-03 12:58:46 +01:00
|
|
|
let logData = entry[1].logData();
|
2017-05-12 16:35:11 +02:00
|
|
|
logData.source = 'static';
|
|
|
|
logData.result = 1;
|
|
|
|
outlog.push(logData);
|
|
|
|
}
|
|
|
|
if ( outlog !== undefined ) {
|
2019-05-02 00:54:11 +02:00
|
|
|
for ( const entry of toRemove.values()) {
|
|
|
|
const logData = entry.logData();
|
2017-05-12 16:35:11 +02:00
|
|
|
logData.source = 'static';
|
|
|
|
logData.result = 2;
|
|
|
|
outlog.push(logData);
|
|
|
|
}
|
2015-06-09 16:27:08 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-05-22 23:51:03 +02:00
|
|
|
FilterContainer.prototype.realmMatchString = function(
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
realmBits,
|
|
|
|
typeBits,
|
|
|
|
partyBits
|
|
|
|
) {
|
2019-05-22 23:51:03 +02:00
|
|
|
const exactType = typeBits & 0x80000000;
|
|
|
|
typeBits &= 0x7FFFFFFF;
|
|
|
|
|
|
|
|
const catBits00 = realmBits;
|
|
|
|
const catBits01 = realmBits | typeBits;
|
|
|
|
const catBits10 = realmBits | partyBits;
|
|
|
|
const catBits11 = realmBits | typeBits | partyBits;
|
|
|
|
|
|
|
|
const bucket00 = exactType === 0
|
2019-05-23 14:15:26 +02:00
|
|
|
? this.categories.get(catBits00)
|
|
|
|
: undefined;
|
2019-05-22 23:51:03 +02:00
|
|
|
const bucket01 = exactType !== 0 || typeBits !== 0
|
2019-05-23 14:15:26 +02:00
|
|
|
? this.categories.get(catBits01)
|
|
|
|
: undefined;
|
2019-05-22 23:51:03 +02:00
|
|
|
const bucket10 = exactType === 0 && partyBits !== 0
|
2019-05-23 14:15:26 +02:00
|
|
|
? this.categories.get(catBits10)
|
|
|
|
: undefined;
|
2019-05-22 23:51:03 +02:00
|
|
|
const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0
|
2019-05-23 14:15:26 +02:00
|
|
|
? this.categories.get(catBits11)
|
|
|
|
: undefined;
|
2019-05-22 23:51:03 +02:00
|
|
|
|
|
|
|
if (
|
2019-05-23 14:15:26 +02:00
|
|
|
bucket00 === undefined && bucket01 === undefined &&
|
|
|
|
bucket10 === undefined && bucket11 === undefined
|
2019-05-22 23:51:03 +02:00
|
|
|
) {
|
|
|
|
return false;
|
2015-01-23 17:32:49 +01:00
|
|
|
}
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
|
2019-05-23 14:15:26 +02:00
|
|
|
let catBits = 0, f;
|
2015-01-23 17:32:49 +01:00
|
|
|
|
2019-05-22 23:51:03 +02:00
|
|
|
// Pure hostname-based filters
|
|
|
|
let tokenHash = this.dotTokenHash;
|
|
|
|
if (
|
2019-05-23 14:15:26 +02:00
|
|
|
(bucket00 !== undefined) &&
|
2019-05-22 23:51:03 +02:00
|
|
|
(f = bucket00.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match() === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits00;
|
|
|
|
} else if (
|
2019-05-23 14:15:26 +02:00
|
|
|
(bucket01 !== undefined) &&
|
2019-05-22 23:51:03 +02:00
|
|
|
(f = bucket01.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match() === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits01;
|
|
|
|
} else if (
|
2019-05-23 14:15:26 +02:00
|
|
|
(bucket10 !== undefined) &&
|
2019-05-22 23:51:03 +02:00
|
|
|
(f = bucket10.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match() === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits10;
|
|
|
|
} else if (
|
2019-05-23 14:15:26 +02:00
|
|
|
(bucket11 !== undefined) &&
|
2019-05-22 23:51:03 +02:00
|
|
|
(f = bucket11.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match() === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits11;
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
2019-05-22 23:51:03 +02:00
|
|
|
// Pattern-based filters
|
2019-05-23 14:15:26 +02:00
|
|
|
else {
|
2019-06-19 01:16:39 +02:00
|
|
|
const url = urlRegister;
|
2019-05-23 14:15:26 +02:00
|
|
|
const tokenHashes = this.urlTokenizer.getTokens();
|
|
|
|
let i = 0, tokenBeg = 0;
|
|
|
|
for (;;) {
|
|
|
|
tokenHash = tokenHashes[i];
|
|
|
|
if ( tokenHash === 0 ) { return false; }
|
|
|
|
tokenBeg = tokenHashes[i+1];
|
|
|
|
if (
|
|
|
|
(bucket00 !== undefined) &&
|
|
|
|
(f = bucket00.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match(url, tokenBeg) === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits00;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
(bucket01 !== undefined) &&
|
|
|
|
(f = bucket01.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match(url, tokenBeg) === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits01;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
(bucket10 !== undefined) &&
|
|
|
|
(f = bucket10.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match(url, tokenBeg) === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits10;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
(bucket11 !== undefined) &&
|
|
|
|
(f = bucket11.get(tokenHash)) !== undefined &&
|
|
|
|
(f.match(url, tokenBeg) === true)
|
|
|
|
) {
|
|
|
|
catBits = catBits11;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i += 2;
|
2019-05-22 23:51:03 +02:00
|
|
|
}
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
}
|
2019-05-23 14:15:26 +02:00
|
|
|
|
2019-05-22 23:51:03 +02:00
|
|
|
this.catbitsRegister = catBits;
|
|
|
|
this.tokenRegister = tokenHash;
|
|
|
|
this.filterRegister = f;
|
|
|
|
return true;
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// Specialized handler
|
2014-07-30 02:07:08 +02:00
|
|
|
|
2016-11-08 13:13:26 +01:00
|
|
|
// https://github.com/gorhill/uBlock/issues/1477
|
|
|
|
// Special case: blocking-generichide filter ALWAYS exists, it is implicit --
|
|
|
|
// thus we always first check for exception filters, then for important block
|
|
|
|
// filter if and only if there was a hit on an exception filter.
|
|
|
|
// https://github.com/gorhill/uBlock/issues/2103
|
|
|
|
// User may want to override `generichide` exception filters.
|
|
|
|
|
2018-07-22 16:47:02 +02:00
|
|
|
FilterContainer.prototype.matchStringGenericHide = function(requestURL) {
|
2019-05-22 23:51:03 +02:00
|
|
|
const typeBits = typeNameToTypeValue['generichide'] | 0x80000000;
|
2016-11-08 13:13:26 +01:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// Prime tokenizer: we get a normalized URL in return.
|
2019-06-19 01:16:39 +02:00
|
|
|
urlRegister = this.urlTokenizer.setURL(requestURL);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.filterRegister = null;
|
2016-12-08 02:18:58 +01:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// These registers will be used by various filters
|
|
|
|
pageHostnameRegister = requestHostnameRegister =
|
|
|
|
µb.URI.hostnameFromURI(requestURL);
|
2016-11-08 13:13:26 +01:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// Exception filters
|
2019-05-22 23:51:03 +02:00
|
|
|
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// Important block filters.
|
2019-05-22 23:51:03 +02:00
|
|
|
if ( this.realmMatchString(BlockImportant, typeBits, FirstParty) ) {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 2;
|
2016-11-08 13:13:26 +01:00
|
|
|
}
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
return 0;
|
2016-11-08 13:13:26 +01:00
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2015-04-07 03:26:05 +02:00
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/116
|
2016-11-08 13:13:26 +01:00
|
|
|
// Some type of requests are exceptional, they need custom handling,
|
|
|
|
// not the generic handling.
|
2019-05-22 23:51:03 +02:00
|
|
|
// https://github.com/chrisaljoudi/uBlock/issues/519
|
|
|
|
// Use exact type match for anything beyond `other`. Also, be prepared to
|
|
|
|
// support unknown types.
|
2014-07-30 03:10:00 +02:00
|
|
|
|
2019-05-22 23:51:03 +02:00
|
|
|
FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
|
|
|
|
let typeBits = typeNameToTypeValue[fctxt.type];
|
|
|
|
if ( modifiers === 0 ) {
|
|
|
|
if ( typeBits === undefined ) {
|
|
|
|
typeBits = otherTypeBitValue;
|
|
|
|
} else if ( typeBits === 0 || typeBits > otherTypeBitValue ) {
|
|
|
|
modifiers |= 0b0001;
|
2014-12-28 16:07:43 +01:00
|
|
|
}
|
2014-07-30 02:07:08 +02:00
|
|
|
}
|
2019-05-22 23:51:03 +02:00
|
|
|
if ( (modifiers & 0b0001) !== 0 ) {
|
|
|
|
if ( typeBits === undefined ) { return 0; }
|
|
|
|
typeBits |= 0x80000000;
|
2015-01-17 13:53:19 +01:00
|
|
|
}
|
2019-05-22 23:51:03 +02:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
|
2014-06-24 00:42:43 +02:00
|
|
|
|
2015-12-29 17:34:41 +01:00
|
|
|
// Prime tokenizer: we get a normalized URL in return.
|
2019-06-19 01:16:39 +02:00
|
|
|
urlRegister = this.urlTokenizer.setURL(fctxt.url);
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
this.filterRegister = null;
|
2014-10-07 22:30:40 +02:00
|
|
|
|
2015-02-05 00:06:31 +01:00
|
|
|
// These registers will be used by various filters
|
2018-12-13 18:30:54 +01:00
|
|
|
pageHostnameRegister = fctxt.getDocHostname();
|
|
|
|
requestHostnameRegister = fctxt.getHostname();
|
2014-10-07 04:40:25 +02:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// Important block filters.
|
|
|
|
if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) {
|
|
|
|
return 1;
|
2014-12-28 16:07:43 +01:00
|
|
|
}
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
// Block filters
|
|
|
|
if ( this.realmMatchString(BlockAction, typeBits, partyBits) ) {
|
|
|
|
// Exception filters
|
|
|
|
if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) {
|
2017-05-12 16:35:11 +02:00
|
|
|
return 2;
|
2014-12-28 16:07:43 +01:00
|
|
|
}
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
return 1;
|
2014-06-24 00:42:43 +02:00
|
|
|
}
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
return 0;
|
2015-06-09 16:27:08 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2017-05-12 16:35:11 +02:00
|
|
|
FilterContainer.prototype.toLogData = function() {
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
if ( this.filterRegister === null ) { return; }
|
|
|
|
const logData = toLogDataInternal(
|
|
|
|
this.catbitsRegister,
|
|
|
|
this.tokenRegister,
|
|
|
|
this.filterRegister
|
|
|
|
);
|
2017-05-12 16:35:11 +02:00
|
|
|
logData.source = 'static';
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
logData.tokenHash = this.tokenRegister;
|
|
|
|
logData.result = this.filterRegister === null
|
|
|
|
? 0
|
|
|
|
: (this.catbitsRegister & 1 ? 2 : 1);
|
2017-05-12 16:35:11 +02:00
|
|
|
return logData;
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
FilterContainer.prototype.getFilterCount = function() {
|
2016-03-17 18:56:21 +01:00
|
|
|
return this.acceptedCount - this.discardedCount;
|
2014-06-24 00:42:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2019-04-14 22:23:52 +02:00
|
|
|
// action: 1=test, 2=record
|
|
|
|
|
2019-05-23 01:23:04 +02:00
|
|
|
FilterContainer.prototype.benchmark = async function(action) {
|
|
|
|
const requests = await µb.loadBenchmarkDataset();
|
|
|
|
|
|
|
|
if ( Array.isArray(requests) === false || requests.length === 0 ) {
|
|
|
|
console.info('No requests found to benchmark');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
console.info(`Benchmarking staticNetFilteringEngine.matchString()...`);
|
|
|
|
const fctxt = µb.filteringContext.duplicate();
|
|
|
|
let expected, recorded;
|
|
|
|
if ( action === 1 ) {
|
|
|
|
try {
|
|
|
|
expected = JSON.parse(
|
|
|
|
vAPI.localStorage.getItem('FilterContainer.benchmark.results')
|
2019-04-14 15:44:24 +02:00
|
|
|
);
|
2019-05-23 01:23:04 +02:00
|
|
|
} catch(ex) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( action === 2 ) {
|
|
|
|
recorded = [];
|
|
|
|
}
|
|
|
|
|
|
|
|
const t0 = self.performance.now();
|
|
|
|
for ( let i = 0; i < requests.length; i++ ) {
|
|
|
|
const request = requests[i];
|
|
|
|
fctxt.setURL(request.url);
|
|
|
|
fctxt.setDocOriginFromURL(request.frameUrl);
|
|
|
|
fctxt.setType(request.cpt);
|
|
|
|
const r = this.matchString(fctxt);
|
|
|
|
if ( recorded !== undefined ) { recorded.push(r); }
|
|
|
|
if ( expected !== undefined && r !== expected[i] ) {
|
|
|
|
console.log('Mismatch with reference results:');
|
|
|
|
console.log(`\tExpected ${expected[i]}, got ${r}:`);
|
|
|
|
console.log(`\ttype=${fctxt.type}`);
|
|
|
|
console.log(`\turl=${fctxt.url}`);
|
|
|
|
console.log(`\tdocOrigin=${fctxt.getDocOrigin()}`);
|
2019-04-14 15:44:24 +02:00
|
|
|
}
|
2019-05-23 01:23:04 +02:00
|
|
|
}
|
|
|
|
const t1 = self.performance.now();
|
|
|
|
const dur = t1 - t0;
|
|
|
|
|
|
|
|
console.info(`Evaluated ${requests.length} requests in ${dur.toFixed(0)} ms`);
|
|
|
|
console.info(`\tAverage: ${(dur / requests.length).toFixed(3)} ms per request`);
|
|
|
|
if ( expected !== undefined ) {
|
|
|
|
console.info(`\tBlocked: ${expected.reduce((n,r)=>{return r===1?n+1:n;},0)}`);
|
|
|
|
console.info(`\tExcepted: ${expected.reduce((n,r)=>{return r===2?n+1:n;},0)}`);
|
|
|
|
}
|
|
|
|
if ( recorded !== undefined ) {
|
|
|
|
vAPI.localStorage.setItem(
|
|
|
|
'FilterContainer.benchmark.results',
|
|
|
|
JSON.stringify(recorded)
|
|
|
|
);
|
|
|
|
}
|
2018-11-02 20:18:50 +01:00
|
|
|
};
|
|
|
|
|
2019-06-19 01:16:39 +02:00
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
FilterContainer.prototype.test = function(docURL, type, url) {
|
|
|
|
const fctxt = µb.filteringContext.duplicate();
|
|
|
|
fctxt.setDocOriginFromURL(docURL);
|
|
|
|
fctxt.setType(type);
|
|
|
|
fctxt.setURL(url);
|
|
|
|
const r = this.matchString(fctxt);
|
|
|
|
console.log(`${r}`);
|
|
|
|
if ( r !== 0 ) {
|
|
|
|
console.log(this.toLogData());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
/******************************************************************************-
|
|
|
|
|
|
|
|
With default filter lists:
|
|
|
|
|
|
|
|
As of 2019-04-18:
|
|
|
|
|
|
|
|
{bits: "0", token: "ad", size: 926, f: FilterBucket}
|
|
|
|
{bits: "0", token: "ads", size: 636, f: FilterBucket}
|
|
|
|
{bits: "41", token: "phncdn", size: 253, f: FilterBucket}
|
|
|
|
{bits: "0", token: "analytic", size: 174, f: FilterBucket}
|
|
|
|
{bits: "0", token: "tracking", size: 155, f: FilterBucket}
|
|
|
|
{bits: "48", token: "http", size: 146, f: FilterBucket}
|
|
|
|
{bits: "48", token: "https", size: 139, f: FilterBucket}
|
|
|
|
{bits: "58", token: "http", size: 122, f: FilterBucket}
|
|
|
|
{bits: "0", token: "adv", size: 121, f: FilterBucket}
|
|
|
|
{bits: "58", token: "https", size: 118, f: FilterBucket}
|
|
|
|
{bits: "0", token: "advertis", size: 102, f: FilterBucket}
|
|
|
|
{bits: "8", token: "doublecl", size: 96, f: FilterBucket}
|
|
|
|
{bits: "41", token: "imasdk", size: 90, f: FilterBucket}
|
|
|
|
{bits: "0", token: "cdn", size: 89, f: FilterBucket}
|
|
|
|
{bits: "0", token: "track", size: 87, f: FilterBucket}
|
|
|
|
{bits: "0", token: "stats", size: 82, f: FilterBucket}
|
|
|
|
{bits: "0", token: "banner", size: 74, f: FilterBucket}
|
|
|
|
{bits: "0", token: "log", size: 72, f: FilterBucket}
|
|
|
|
{bits: "0", token: "ga", size: 71, f: FilterBucket}
|
|
|
|
{bits: "0", token: "gif", size: 67, f: FilterBucket}
|
|
|
|
{bits: "0", token: "cloudfro", size: 64, f: FilterBucket}
|
|
|
|
{bits: "0", token: "amazonaw", size: 61, f: FilterBucket}
|
|
|
|
{bits: "41", token: "ajax", size: 58, f: FilterBucket}
|
|
|
|
{bits: "0", token: "tracker", size: 56, f: FilterBucket}
|
|
|
|
{bits: "40", token: "pagead2", size: 53, f: FilterBucket}
|
|
|
|
{bits: "0", token: "affiliat", size: 53, f: FilterBucket}
|
|
|
|
|
|
|
|
*/
|
2018-11-02 20:18:50 +01:00
|
|
|
|
2019-04-15 17:45:33 +02:00
|
|
|
FilterContainer.prototype.bucketHistogram = function() {
|
|
|
|
const results = [];
|
|
|
|
for ( const [ bits, category ] of this.categories ) {
|
|
|
|
for ( const [ th, f ] of category ) {
|
2019-04-20 23:25:32 +02:00
|
|
|
if ( f instanceof FilterPair ) {
|
|
|
|
const token = µBlock.urlTokenizer.stringFromTokenHash(th);
|
|
|
|
results.push({ bits: bits.toString(16), token, size: f.size, f });
|
|
|
|
continue;
|
|
|
|
}
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
if ( f instanceof FilterBucket ) {
|
|
|
|
const token = µBlock.urlTokenizer.stringFromTokenHash(th);
|
|
|
|
results.push({ bits: bits.toString(16), token, size: f.size, f });
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( f instanceof FilterHostnameDict ) {
|
|
|
|
const token = µBlock.urlTokenizer.stringFromTokenHash(th);
|
|
|
|
results.push({ bits: bits.toString(16), token, size: f.size, f });
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( f instanceof FilterJustOrigin ) {
|
|
|
|
const token = µBlock.urlTokenizer.stringFromTokenHash(th);
|
|
|
|
results.push({ bits: bits.toString(16), token, size: f.size, f });
|
|
|
|
continue;
|
|
|
|
}
|
2019-04-15 17:45:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
results.sort((a, b) => {
|
|
|
|
return b.size - a.size;
|
|
|
|
});
|
|
|
|
console.log(results);
|
|
|
|
};
|
|
|
|
|
2019-04-17 01:20:56 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
With default filter lists:
|
2019-04-17 01:20:56 +02:00
|
|
|
|
Add HNTrie-based filter classes to store origin-only filters
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622
Following STrie-related work in above issue, I noticed that a large
number of filters in EasyList were filters which only had to match
against the document origin. For instance, among just the top 10
most populous buckets, there were four such buckets with over
hundreds of entries each:
- bits: 72, token: "http", 146 entries
- bits: 72, token: "https", 139 entries
- bits: 88, token: "http", 122 entries
- bits: 88, token: "https", 118 entries
These filters in these buckets have to be matched against all
the network requests.
In order to leverage HNTrie for these filters[1], they are now handled
in a special way so as to ensure they all end up in a single HNTrie
(per bucket), which means that instead of scanning hundreds of entries
per URL, there is now a single scan per bucket per URL for these
apply-everywhere filters.
Now, any filter which fulfill ALL the following condition will be
processed in a special manner internally:
- Is of the form `|https://` or `|http://` or `*`; and
- Does have a `domain=` option; and
- Does not have a negated domain in its `domain=` option; and
- Does not have `csp=` option; and
- Does not have a `redirect=` option
If a filter does not fulfill ALL the conditions above, no change
in behavior.
A filter which matches ALL of the above will be processed in a special
manner:
- The `domain=` option will be decomposed so as to create as many
distinct filter as there is distinct value in the `domain=` option
- This also apply to the `badfilter` version of the filter, which
means it now become possible to `badfilter` only one of the
distinct filter without having to `badfilter` all of them.
- The logger will always report these special filters with only a
single hostname in the `domain=` option.
***
[1] HNTrie is currently WASM-ed on Firefox.
2019-04-19 22:33:46 +02:00
|
|
|
As of 2019-04-13:
|
2019-04-17 01:20:56 +02:00
|
|
|
|
|
|
|
{"FilterPlainHnAnchored" => 12619}
|
|
|
|
{"FilterPlainPrefix1" => 8743}
|
|
|
|
{"FilterGenericHnAnchored" => 5231}
|
|
|
|
{"FilterOriginHit" => 4149}
|
|
|
|
{"FilterPair" => 2381}
|
|
|
|
{"FilterBucket" => 1940}
|
|
|
|
{"FilterPlainHostname" => 1612}
|
|
|
|
{"FilterOriginHitSet" => 1430}
|
|
|
|
{"FilterPlainLeftAnchored" => 799}
|
|
|
|
{"FilterGeneric" => 588}
|
|
|
|
{"FilterPlain" => 510}
|
|
|
|
{"FilterOriginMiss" => 299}
|
|
|
|
{"FilterDataHolder" => 280}
|
|
|
|
{"FilterOriginMissSet" => 150}
|
|
|
|
{"FilterTrue" => 130}
|
|
|
|
{"FilterRegex" => 124}
|
|
|
|
{"FilterPlainRightAnchored" => 110}
|
|
|
|
{"FilterGenericHnAndRightAnchored" => 95}
|
|
|
|
{"FilterHostnameDict" => 59}
|
|
|
|
{"FilterPlainPrefix0" => 29}
|
|
|
|
{"FilterExactMatch" => 5}
|
|
|
|
{"FilterOriginMixedSet" => 3}
|
|
|
|
|
|
|
|
Observations:
|
|
|
|
- No need for FilterPlainPrefix0.
|
|
|
|
- FilterPlainHnAnchored and FilterPlainPrefix1 are good candidates
|
|
|
|
for storing in a plain string trie.
|
|
|
|
|
2019-04-25 23:48:08 +02:00
|
|
|
As of 2019-04-25:
|
|
|
|
|
|
|
|
{"FilterPlainHnAnchored" => 11078}
|
|
|
|
{"FilterPlainPrefix1" => 7195}
|
|
|
|
{"FilterPrefix1Trie" => 5720}
|
|
|
|
{"FilterOriginHit" => 3561}
|
|
|
|
{"FilterWildcard2HnAnchored" => 2943}
|
|
|
|
{"FilterPair" => 2391}
|
|
|
|
{"FilterBucket" => 1922}
|
|
|
|
{"FilterWildcard1HnAnchored" => 1910}
|
|
|
|
{"FilterHnAnchoredTrie" => 1586}
|
|
|
|
{"FilterPlainHostname" => 1391}
|
|
|
|
{"FilterOriginHitSet" => 1155}
|
|
|
|
{"FilterPlain" => 634}
|
|
|
|
{"FilterWildcard1" => 423}
|
|
|
|
{"FilterGenericHnAnchored" => 389}
|
|
|
|
{"FilterOriginMiss" => 302}
|
|
|
|
{"FilterGeneric" => 163}
|
|
|
|
{"FilterOriginMissSet" => 150}
|
|
|
|
{"FilterRegex" => 124}
|
|
|
|
{"FilterPlainRightAnchored" => 110}
|
|
|
|
{"FilterGenericHnAndRightAnchored" => 95}
|
|
|
|
{"FilterHostnameDict" => 59}
|
|
|
|
{"FilterPlainLeftAnchored" => 30}
|
|
|
|
{"FilterJustOrigin" => 22}
|
|
|
|
{"FilterHTTPJustOrigin" => 19}
|
|
|
|
{"FilterHTTPSJustOrigin" => 18}
|
|
|
|
{"FilterExactMatch" => 5}
|
|
|
|
{"FilterOriginMixedSet" => 3}
|
|
|
|
|
2019-04-17 01:20:56 +02:00
|
|
|
*/
|
2019-04-15 17:45:33 +02:00
|
|
|
|
2019-04-17 01:01:14 +02:00
|
|
|
FilterContainer.prototype.filterClassHistogram = function() {
|
|
|
|
const filterClassDetails = new Map();
|
|
|
|
|
|
|
|
for ( let i = 0; i < filterClasses.length; i++ ) {
|
|
|
|
filterClassDetails.set(i, { name: filterClasses[i].name, count: 0, });
|
|
|
|
}
|
|
|
|
// Artificial classes to report content of tries
|
2019-06-19 01:16:39 +02:00
|
|
|
filterClassDetails.set(1000, { name: 'FilterPlainTrie', count: 0, });
|
|
|
|
filterClassDetails.set(1001, { name: 'FilterPlainHnAnchoredTrie', count: 0, });
|
2019-04-17 01:01:14 +02:00
|
|
|
|
|
|
|
const countFilter = function(f) {
|
|
|
|
if ( f instanceof Object === false ) { return; }
|
|
|
|
filterClassDetails.get(f.fid).count += 1;
|
|
|
|
if ( f.wrapped ) {
|
|
|
|
countFilter(f.wrapped);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
for ( const category of this.categories.values() ) {
|
|
|
|
for ( const f of category.values() ) {
|
|
|
|
countFilter(f);
|
|
|
|
if ( f instanceof FilterBucket ) {
|
|
|
|
for ( const g of f.filters ) { countFilter(g); }
|
2019-06-19 01:16:39 +02:00
|
|
|
if ( f.plainTrie !== null ) {
|
|
|
|
filterClassDetails.get(1000).count += f.plainTrie.size;
|
2019-04-17 01:01:14 +02:00
|
|
|
}
|
|
|
|
if ( f.plainHnAnchoredTrie !== null ) {
|
|
|
|
filterClassDetails.get(1001).count += f.plainHnAnchoredTrie.size;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ( f instanceof FilterPair ) {
|
|
|
|
countFilter(f.f1);
|
|
|
|
countFilter(f.f2);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const results = Array.from(filterClassDetails.values()).sort((a, b) => {
|
|
|
|
return b.count - a.count;
|
|
|
|
});
|
|
|
|
console.log(results);
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2014-06-24 00:42:43 +02:00
|
|
|
return new FilterContainer();
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
})();
|