Add support for entity-matching in domain= filter option

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/1008

This commit adds support entity-matching in the filter
option `domain=`. Example:

    pattern$domain=google.*

The `*` above is meant to match any suffix from the Public
Suffix List. The semantic is exactly the same as the
already existing entity-matching support in static
extended filtering:

- https://github.com/gorhill/uBlock/wiki/Static-filter-syntax#entity

Additionally, in this commit:

Fix cases where "just-origin" filters of the form `|http*://`
were erroneously normalized to `|http://`. The proper
normalization of `|http*://` is `*`.

Add support to store hostname strings into the character
buffer of a hntrie container. As of commit time, there are
5,544 instances of FilterOriginHit, and 732 instances of
FilterOriginMiss, which filters require storing/matching a
single hostname string. Those strings are now stored in the
character buffer of the already existing origin-related
 hntrie container. (The same approach is used for plain
patterns which are not part of a bidi-trie.)
This commit is contained in:
Raymond Hill 2020-05-24 10:46:16 -04:00
parent 56a3aff857
commit 3c67d2b89f
No known key found for this signature in database
GPG key ID: 25E1490B761470C2
3 changed files with 377 additions and 208 deletions

View file

@ -138,8 +138,8 @@ const µBlock = (( ) => { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 27, // Increase when compiled format changes compiledMagic: 28, // Increase when compiled format changes
selfieMagic: 26, // Increase when selfie format changes selfieMagic: 28, // Increase when selfie format changes
}, },
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View file

@ -407,6 +407,49 @@ const HNTrieContainer = class {
return true; return true;
} }
// The following *Hostname() methods can be used to store hostname strings
// outside the trie. This is useful to store/match hostnames which are
// not part of a collection, and yet still benefit from storing the strings
// into a trie container's character buffer.
// TODO: WASM version of matchesHostname()
storeHostname(hn) {
let n = hn.length;
if ( n > 255 ) {
hn = hn.slice(-255);
n = 255;
}
if ( (this.buf.length - this.buf32[CHAR1_SLOT]) < n ) {
this.growBuf(0, n);
}
const offset = this.buf32[CHAR1_SLOT];
this.buf32[CHAR1_SLOT] = offset + n;
const buf8 = this.buf;
for ( let i = 0; i < n; i++ ) {
buf8[offset+i] = hn.charCodeAt(i);
}
return offset - this.buf32[CHAR0_SLOT];
}
extractHostname(i, n) {
const textDecoder = new TextDecoder();
const offset = this.buf32[CHAR0_SLOT] + i;
return textDecoder.decode(this.buf.subarray(offset, offset + n));
}
matchesHostname(hn, i, n) {
this.setNeedle(hn);
const buf8 = this.buf;
const hr = buf8[255];
if ( n > hr ) { return false; }
const hl = hr - n;
const nl = this.buf32[CHAR0_SLOT] + i;
for ( let j = 0; j < n; j++ ) {
if ( buf8[nl+j] !== buf8[hl+j] ) { return false; }
}
return n === hr || hn.charCodeAt(hl-1) === 0x2E /* '.' */;
}
async enableWASM() { async enableWASM() {
if ( typeof WebAssembly !== 'object' ) { return false; } if ( typeof WebAssembly !== 'object' ) { return false; }
if ( this.wasmMemory instanceof WebAssembly.Memory ) { return true; } if ( this.wasmMemory instanceof WebAssembly.Memory ) { return true; }

View file

@ -173,12 +173,26 @@ const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111;
let $requestURL = ''; let $requestURL = '';
let $requestHostname = ''; let $requestHostname = '';
let $docHostname = ''; let $docHostname = '';
let $docDomain = '';
let $tokenBeg = 0; let $tokenBeg = 0;
let $patternMatchLeft = 0; let $patternMatchLeft = 0;
let $patternMatchRight = 0; let $patternMatchRight = 0;
// EXPERIMENT: $requestTypeBit const $docEntity = {
let $requestTypeBit = 0; entity: undefined,
compute() {
if ( this.entity === undefined ) {
const pos = $docDomain.indexOf('.');
this.entity = pos !== -1
? $docHostname.slice(0, pos - $docDomain.length)
: '';
}
return this.entity;
},
reset() {
this.entity = undefined;
},
};
/******************************************************************************/ /******************************************************************************/
@ -1072,39 +1086,6 @@ registerFilterClass(FilterTrailingSeparator);
/******************************************************************************/ /******************************************************************************/
const FilterType = class {
constructor(bits) {
this.typeBits = bits;
}
match() {
return (this.typeBits & $requestTypeBit) !== 0;
}
logData() {
}
toSelfie() {
return [ this.fid, this.typeBits ];
}
static compile(details) {
return [ FilterType.fid, details.typeBits & allNetworkTypesBits ];
}
static fromCompiled(args) {
return new FilterType(args[1]);
}
static fromSelfie(args) {
return new FilterType(args[1]);
}
};
registerFilterClass(FilterType);
/******************************************************************************/
const FilterRegex = class { const FilterRegex = class {
constructor(s) { constructor(s) {
this.s = s; this.s = s;
@ -1162,60 +1143,79 @@ registerFilterClass(FilterRegex);
// The optimal "class" is picked according to the content of the // The optimal "class" is picked according to the content of the
// `domain=` filter option. // `domain=` filter option.
const filterOrigin = new (class { const filterOrigin = (( ) => {
const FilterOrigin = class {
constructor() { constructor() {
this.trieContainer = new µb.HNTrieContainer(); this.trieContainer = new µb.HNTrieContainer();
} }
compile(details, prepend, units) { compile(domainOpt, prepend, units) {
const domainOpt = details.domainOpt; const hostnameHits = [];
let compiledMiss, compiledHit; const hostnameMisses = [];
// One hostname const entityHits = [];
if ( domainOpt.indexOf('|') === -1 ) { const entityMisses = [];
// Must be a miss for ( const s of FilterParser.domainOptIterator(domainOpt) ) {
if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) { const len = s.length;
compiledMiss = FilterOriginMiss.compile(domainOpt); const beg = len > 1 && s.charCodeAt(0) === 0x7E ? 1 : 0;
const end = len > 2 &&
s.charCodeAt(len - 1) === 0x2A /* '*' */ &&
s.charCodeAt(len - 2) === 0x2E /* '.' */
? len - 2 : len;
if ( end <= beg ) { continue; }
if ( end === len ) {
if ( beg === 0 ) {
hostnameHits.push(s);
} else {
hostnameMisses.push(s.slice(1));
} }
// Must be a hit } else {
else { if ( beg === 0 ) {
compiledHit = FilterOriginHit.compile(domainOpt); entityHits.push(s.slice(0, -2));
} else {
entityMisses.push(s.slice(1, -2));
} }
} }
// Many hostnames.
// Must be in set (none negated).
else if ( domainOpt.indexOf('~') === -1 ) {
compiledHit = FilterOriginHitSet.compile(domainOpt);
} }
// Must not be in set (all negated). const compiledHit = [];
else if ( /^~(?:[^|~]+\|~)+[^|~]+$/.test(domainOpt) ) { if ( entityHits.length !== 0 ) {
compiledMiss = FilterOriginMissSet.compile(domainOpt); for ( const entity of entityHits ) {
compiledHit.push(FilterOriginEntityHit.compile(entity));
} }
// Must be in one set, but not in the other.
else {
const hostnames = domainOpt.split('|');
const missSet = hostnames.filter(hn => {
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) {
return hn;
} }
}); if ( hostnameHits.length === 1 ) {
const hitSet = hostnames.filter(hn => { compiledHit.push(FilterOriginHit.compile(hostnameHits[0]));
if ( hn.charCodeAt(0) !== 0x7E /* '~' */ ) { } else if ( hostnameHits.length > 1 ) {
return hn; compiledHit.push(FilterOriginHitSet.compile(hostnameHits.join('|')));
} }
}); if ( compiledHit.length > 1 ) {
compiledMiss = missSet.length === 1 compiledHit[0] = [ FilterCompositeAny.compile(compiledHit.slice()) ];
? FilterOriginMiss.compile(missSet[0]) compiledHit.length = 1;
: FilterOriginMissSet.compile(missSet.join('|')); }
compiledHit = hitSet.length === 1 const compiledMiss = [];
? FilterOriginHit.compile(hitSet[0]) if ( entityMisses.length !== 0 ) {
: FilterOriginHitSet.compile(hitSet.join('|')); for ( const entity of entityMisses ) {
compiledMiss.push(FilterOriginEntityMiss.compile(entity));
}
}
if ( hostnameMisses.length === 1 ) {
compiledMiss.push(FilterOriginMiss.compile(hostnameMisses[0]));
} else if ( hostnameMisses.length > 1 ) {
compiledMiss.push(FilterOriginMissSet.compile(hostnameMisses.join('|')));
} }
if ( prepend ) { if ( prepend ) {
if ( compiledHit ) { units.unshift(compiledHit); } if ( compiledHit.length !== 0 ) {
if ( compiledMiss ) { units.unshift(compiledMiss); } units.unshift(compiledHit[0]);
}
if ( compiledMiss.length !== 0 ) {
units.unshift(...compiledMiss);
}
} else { } else {
if ( compiledMiss ) { units.push(compiledMiss); } if ( compiledMiss.length !== 0 ) {
if ( compiledHit ) { units.push(compiledHit); } units.push(...compiledMiss);
}
if ( compiledHit.length !== 0 ) {
units.push(compiledHit[0]);
}
} }
} }
@ -1241,45 +1241,51 @@ const filterOrigin = new (class {
fromSelfie() { fromSelfie() {
} }
};
return new FilterOrigin();
})(); })();
/******************************************************************************/ /******************************************************************************/
const FilterOriginHit = class { const FilterOriginHit = class {
constructor(hostname) { constructor(i, n) {
this.hostname = hostname; this.i = i;
this.n = n;
} }
match() { match() {
const haystack = $docHostname; return filterOrigin.trieContainer.matchesHostname(
const needle = this.hostname; $docHostname,
const offset = haystack.length - needle.length; this.i,
if ( offset < 0 ) { return false; } this.n
if ( haystack.charCodeAt(offset) !== needle.charCodeAt(0) ) { );
return false;
}
if ( haystack.endsWith(needle) === false ) { return false; }
return offset === 0 || haystack.charCodeAt(offset-1) === 0x2E /* '.' */;
} }
toSelfie() { toSelfie() {
return [ this.fid, this.hostname ]; return [ this.fid, this.i, this.n ];
} }
logData(details) { logData(details) {
details.domains.push(this.hostname); details.domains.push(this.getHostname());
} }
static compile(domainOpt) { getHostname() {
return [ FilterOriginHit.fid, domainOpt ]; return filterOrigin.trieContainer.extractHostname(this.i, this.n);
}
static compile(hostname) {
return [ FilterOriginHit.fid, hostname ];
} }
static fromCompiled(args) { static fromCompiled(args) {
return new FilterOriginHit(args[1]); return new FilterOriginHit(
filterOrigin.trieContainer.storeHostname(args[1]),
args[1].length
);
} }
static fromSelfie(args) { static fromSelfie(args) {
return new FilterOriginHit(args[1]); return new FilterOriginHit(args[1], args[2]);
} }
}; };
@ -1287,43 +1293,28 @@ registerFilterClass(FilterOriginHit);
/******************************************************************************/ /******************************************************************************/
const FilterOriginMiss = class { const FilterOriginMiss = class extends FilterOriginHit {
constructor(hostname) {
this.hostname = hostname.slice(1);
}
match() { match() {
const haystack = $docHostname; return super.match() === false;
if ( haystack.endsWith(this.hostname) ) {
const offset = haystack.length - this.hostname.length;
if (
offset === 0 ||
haystack.charCodeAt(offset-1) === 0x2E /* '.' */
) {
return false;
}
}
return true;
} }
logData(details) { logData(details) {
details.domains.push(`~${this.hostname}`); details.domains.push(`~${this.getHostname()}`);
} }
toSelfie() { static compile(hostname) {
return [ this.fid, `~${this.hostname}` ]; return [ FilterOriginMiss.fid, hostname ];
}
static compile(domainOpt) {
return [ FilterOriginMiss.fid, domainOpt ];
} }
static fromCompiled(args) { static fromCompiled(args) {
return new FilterOriginMiss(args[1]); return new FilterOriginMiss(
filterOrigin.trieContainer.storeHostname(args[1]),
args[1].length
);
} }
static fromSelfie(args) { static fromSelfie(args) {
return new FilterOriginMiss(args[1]); return new FilterOriginMiss(args[1], args[2]);
} }
}; };
@ -1342,7 +1333,7 @@ const FilterOriginHitSet = class {
match() { match() {
if ( this.oneOf === null ) { if ( this.oneOf === null ) {
this.oneOf = filterOrigin.trieContainer.fromIterable( this.oneOf = filterOrigin.trieContainer.fromIterable(
this.domainOpt.split('|') FilterParser.domainOptIterator(this.domainOpt)
); );
} }
return this.oneOf.matches($docHostname) !== -1; return this.oneOf.matches($docHostname) !== -1;
@ -1383,35 +1374,15 @@ registerFilterClass(FilterOriginHitSet);
/******************************************************************************/ /******************************************************************************/
const FilterOriginMissSet = class { const FilterOriginMissSet = class extends FilterOriginHitSet {
constructor(domainOpt, noneOf = null) {
this.domainOpt = domainOpt;
this.noneOf = noneOf !== null
? filterOrigin.trieContainer.createOne(noneOf)
: null;
}
match() { match() {
if ( this.noneOf === null ) { return super.match() === false;
this.noneOf = filterOrigin.trieContainer.fromIterable(
this.domainOpt.replace(/~/g, '').split('|')
);
}
return this.noneOf.matches($docHostname) === -1;
} }
logData(details) { logData(details) {
details.domains.push(this.domainOpt); details.domains.push(
} '~' + this.domainOpt.replace('|', '|~')
);
toSelfie() {
return [
this.fid,
this.domainOpt,
this.noneOf !== null
? filterOrigin.trieContainer.compileOne(this.noneOf)
: null
];
} }
static compile(domainOpt) { static compile(domainOpt) {
@ -1435,6 +1406,74 @@ registerFilterClass(FilterOriginMissSet);
/******************************************************************************/ /******************************************************************************/
const FilterOriginEntityHit = class {
constructor(entity) {
this.entity = entity;
}
match() {
const entity = $docEntity.compute();
if ( entity === '' ) { return false; }
const offset = entity.length - this.entity.length;
if ( offset < 0 ) { return false; }
if ( entity.charCodeAt(offset) !== this.entity.charCodeAt(0) ) {
return false;
}
if ( entity.endsWith(this.entity) === false ) { return false; }
return offset === 0 || entity.charCodeAt(offset-1) === 0x2E /* '.' */;
}
toSelfie() {
return [ this.fid, this.entity ];
}
logData(details) {
details.domains.push(`${this.entity}.*`);
}
static compile(entity) {
return [ FilterOriginEntityHit.fid, entity ];
}
static fromCompiled(args) {
return new FilterOriginEntityHit(args[1]);
}
static fromSelfie(args) {
return new FilterOriginEntityHit(args[1]);
}
};
registerFilterClass(FilterOriginEntityHit);
/******************************************************************************/
const FilterOriginEntityMiss = class extends FilterOriginEntityHit {
match() {
return super.match() === false;
}
logData(details) {
details.domains.push(`~${this.entity}.*`);
}
static compile(entity) {
return [ FilterOriginEntityMiss.fid, entity ];
}
static fromCompiled(args) {
return new FilterOriginEntityMiss(args[1]);
}
static fromSelfie(args) {
return new FilterOriginEntityMiss(args[1]);
}
};
registerFilterClass(FilterOriginEntityMiss);
/******************************************************************************/
const FilterDataHolder = class { const FilterDataHolder = class {
constructor(dataType, data) { constructor(dataType, data) {
this.dataType = dataType; this.dataType = dataType;
@ -1549,6 +1588,12 @@ const FilterCollection = class {
} while ( i !== 0 ); } while ( i !== 0 );
} }
logData(details) {
this.forEach(iunit => {
filterUnits[iunit].logData(details);
});
}
toSelfie() { toSelfie() {
return [ this.fid, this.i ]; return [ this.fid, this.i ];
} }
@ -1580,7 +1625,36 @@ const FilterCollection = class {
/******************************************************************************/ /******************************************************************************/
const FilterComposite = class extends FilterCollection { const FilterCompositeAny = class extends FilterCollection {
match() {
const sequences = filterSequences;
const units = filterUnits;
let i = this.i;
while ( i !== 0 ) {
if ( units[sequences[i+0]].match() ) { return true; }
i = sequences[i+1];
}
return false;
}
static compile(fdata) {
return FilterCollection.compile(FilterCompositeAny, fdata);
}
static fromCompiled(args) {
return FilterCollection.fromCompiled(FilterCompositeAny, args);
}
static fromSelfie(args) {
return FilterCollection.fromSelfie(FilterCompositeAny, args);
}
};
registerFilterClass(FilterCompositeAny);
/******************************************************************************/
const FilterCompositeAll = class extends FilterCollection {
match() { match() {
const sequences = filterSequences; const sequences = filterSequences;
const units = filterUnits; const units = filterUnits;
@ -1622,26 +1696,20 @@ const FilterComposite = class extends FilterCollection {
return details; return details;
} }
logData(details) {
this.forEach(iunit => {
filterUnits[iunit].logData(details);
});
}
static compile(fdata) { static compile(fdata) {
return FilterCollection.compile(FilterComposite, fdata); return FilterCollection.compile(FilterCompositeAll, fdata);
} }
static fromCompiled(args) { static fromCompiled(args) {
return FilterCollection.fromCompiled(FilterComposite, args); return FilterCollection.fromCompiled(FilterCompositeAll, args);
} }
static fromSelfie(args) { static fromSelfie(args) {
return FilterCollection.fromSelfie(FilterComposite, args); return FilterCollection.fromSelfie(FilterCompositeAll, args);
} }
}; };
registerFilterClass(FilterComposite); registerFilterClass(FilterCompositeAll);
/******************************************************************************/ /******************************************************************************/
@ -2001,7 +2069,7 @@ const FilterBucket = class extends FilterCollection {
filterUnits[iunit] = null; filterUnits[iunit] = null;
return; return;
} }
// FilterComposite is assumed here, i.e. with conditions. // FilterCompositeAll is assumed here, i.e. with conditions.
if ( f.n === 1 ) { if ( f.n === 1 ) {
filterUnits[iunit] = null; filterUnits[iunit] = null;
iunit = filterSequences[f.i]; iunit = filterSequences[f.i];
@ -2037,7 +2105,7 @@ const FilterParser = class {
this.cantWebsocket = vAPI.cantWebsocket; this.cantWebsocket = vAPI.cantWebsocket;
this.domainOpt = ''; this.domainOpt = '';
this.noTokenHash = urlTokenizer.noTokenHash; this.noTokenHash = urlTokenizer.noTokenHash;
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/; this.reBadDomainOptChars = /[+?^${}()[\]\\]/;
this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i; this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i;
this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i; this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i;
this.reCanTrimCarets1 = /^[^*]*$/; this.reCanTrimCarets1 = /^[^*]*$/;
@ -2651,6 +2719,47 @@ const FilterParser = class {
) && ) &&
this.domainOpt.indexOf('~') === -1; this.domainOpt.indexOf('~') === -1;
} }
domainIsEntity(s) {
const l = s.length;
return l > 2 &&
s.charCodeAt(l-1) === 0x2A /* '*' */ &&
s.charCodeAt(l-2) === 0x2E /* '.' */;
}
static domainOptIterator(domainOpt) {
return new FilterParser.DomainOptIterator(domainOpt);
}
};
/******************************************************************************/
FilterParser.DomainOptIterator = class {
constructor(domainOpt) {
this.domainOpt = domainOpt;
this.i = 0;
this.value = undefined;
this.done = false;
}
next() {
if ( this.i === -1 ) {
this.value = undefined;
this.done = true;
return this;
}
let pos = this.domainOpt.indexOf('|', this.i);
if ( pos !== -1 ) {
this.value = this.domainOpt.slice(this.i, pos);
this.i = pos + 1;
} else {
this.value = this.domainOpt.slice(this.i);
this.i = -1;
}
return this;
}
[Symbol.iterator]() {
return this;
}
}; };
/******************************************************************************/ /******************************************************************************/
@ -3013,37 +3122,50 @@ FilterContainer.prototype.compile = function(raw, writer) {
parsed.makeToken(); parsed.makeToken();
const units = [];
// Special pattern/option cases: // Special pattern/option cases:
// - `*$domain=...` // - `*$domain=...`
// - `|http://$domain=...` // - `|http://$domain=...`
// - `|https://$domain=...` // - `|https://$domain=...`
// The semantic of "just-origin" filters is that contrary to normal
// filters, the original filter is split into as many filters as there
// are entries in the `domain=` option.
if ( parsed.isJustOrigin() ) { if ( parsed.isJustOrigin() ) {
const hostnames = parsed.domainOpt.split('|'); const tokenHash = parsed.tokenHash;
if ( parsed.f === '*' ) { if ( parsed.f === '*' || parsed.f.startsWith('http*') ) {
parsed.tokenHash = this.anyTokenHash; parsed.tokenHash = this.anyTokenHash;
} else if /* 'https:' */ ( parsed.f.startsWith('https') ) { } else if /* 'https:' */ ( parsed.f.startsWith('https') ) {
parsed.tokenHash = this.anyHTTPSTokenHash; parsed.tokenHash = this.anyHTTPSTokenHash;
} else /* 'http:' */ { } else /* 'http:' */ {
parsed.tokenHash = this.anyHTTPTokenHash; parsed.tokenHash = this.anyHTTPTokenHash;
} }
for ( const hn of hostnames ) { const entities = [];
for ( const hn of FilterParser.domainOptIterator(parsed.domainOpt) ) {
if ( parsed.domainIsEntity(hn) === false ) {
this.compileToAtomicFilter(parsed, hn, writer); this.compileToAtomicFilter(parsed, hn, writer);
} else {
entities.push(hn);
}
}
if ( entities.length === 0 ) { return true; }
parsed.tokenHash = tokenHash;
const leftAnchored = (parsed.anchor & 0b010) !== 0;
for ( const entity of entities ) {
const units = [];
filterPattern.compile(parsed, units);
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
filterOrigin.compile(entity, true, units);
this.compileToAtomicFilter(
parsed, FilterCompositeAll.compile(units), writer
);
} }
return true; return true;
} }
const units = [];
// Pattern // Pattern
filterPattern.compile(parsed, units); filterPattern.compile(parsed, units);
// Type
// EXPERIMENT: $requestTypeBit
//if ( (parsed.typeBits & allNetworkTypesBits) !== 0 ) {
// units.unshift(FilterType.compile(parsed));
// parsed.typeBits &= ~allNetworkTypesBits;
//}
// Anchor // Anchor
if ( (parsed.anchor & 0b100) !== 0 ) { if ( (parsed.anchor & 0b100) !== 0 ) {
if ( parsed.isPureHostname ) { if ( parsed.isPureHostname ) {
@ -3061,7 +3183,7 @@ FilterContainer.prototype.compile = function(raw, writer) {
// Origin // Origin
if ( parsed.domainOpt !== '' ) { if ( parsed.domainOpt !== '' ) {
filterOrigin.compile( filterOrigin.compile(
parsed, parsed.domainOpt,
units.length !== 0 && filterClasses[units[0][0]].isSlow === true, units.length !== 0 && filterClasses[units[0][0]].isSlow === true,
units units
); );
@ -3079,7 +3201,7 @@ FilterContainer.prototype.compile = function(raw, writer) {
const fdata = units.length === 1 const fdata = units.length === 1
? units[0] ? units[0]
: FilterComposite.compile(units); : FilterCompositeAll.compile(units);
this.compileToAtomicFilter(parsed, fdata, writer); this.compileToAtomicFilter(parsed, fdata, writer);
@ -3211,6 +3333,8 @@ FilterContainer.prototype.realmMatchAndFetchData = function(
FilterContainer.prototype.matchAndFetchData = function(fctxt, type) { FilterContainer.prototype.matchAndFetchData = function(fctxt, type) {
$requestURL = urlTokenizer.setURL(fctxt.url); $requestURL = urlTokenizer.setURL(fctxt.url);
$docHostname = fctxt.getDocHostname(); $docHostname = fctxt.getDocHostname();
$docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname(); $requestHostname = fctxt.getHostname();
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
@ -3399,7 +3523,9 @@ FilterContainer.prototype.matchStringReverse = function(type, url) {
this.$filterUnit = 0; this.$filterUnit = 0;
// These registers will be used by various filters // These registers will be used by various filters
$docHostname = $requestHostname = µb.URI.hostnameFromURI(url); $docHostname = $requestHostname = vAPI.hostnameFromNetworkURL(url);
$docDomain = vAPI.domainFromHostname($docHostname);
$docEntity.reset();
// Exception filters // Exception filters
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) { if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) {
@ -3431,8 +3557,6 @@ FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
modifiers |= 0b0001; modifiers |= 0b0001;
} }
} }
// EXPERIMENT: $requestTypeBit
//$requestTypeBit = 1 << ((typeValue >>> 4) - 1);
if ( (modifiers & 0b0001) !== 0 ) { if ( (modifiers & 0b0001) !== 0 ) {
if ( typeValue === undefined ) { return 0; } if ( typeValue === undefined ) { return 0; }
typeValue |= 0x80000000; typeValue |= 0x80000000;
@ -3446,6 +3570,8 @@ FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
// These registers will be used by various filters // These registers will be used by various filters
$docHostname = fctxt.getDocHostname(); $docHostname = fctxt.getDocHostname();
$docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname(); $requestHostname = fctxt.getHostname();
// Important block filters. // Important block filters.
@ -3666,7 +3792,7 @@ FilterContainer.prototype.bucketHistogram = function() {
"FilterHostnameDict" Content => 60772} "FilterHostnameDict" Content => 60772}
"FilterPatternPlain" => 26432} "FilterPatternPlain" => 26432}
"FilterComposite" => 17125} "FilterCompositeAll" => 17125}
"FilterPlainTrie Content" => 13519} "FilterPlainTrie Content" => 13519}
"FilterAnchorHnLeft" => 11931} "FilterAnchorHnLeft" => 11931}
"FilterOriginHit" => 5524} "FilterOriginHit" => 5524}
@ -3729,7 +3855,7 @@ FilterContainer.prototype.filterClassHistogram = function() {
filterClassDetails.get(1001).count += f.size; filterClassDetails.get(1001).count += f.size;
continue; continue;
} }
if ( f instanceof FilterComposite ) { if ( f instanceof FilterCompositeAll ) {
let i = f.i; let i = f.i;
while ( i !== 0 ) { while ( i !== 0 ) {
countFilter(filterUnits[filterSequences[i+0]]); countFilter(filterUnits[filterSequences[i+0]]);