Categorize google as a bad token for map key purpose

In the static network filtering engine, `google` token is too
generic and probably leads to too many false positives, beside
causing too large filter bucket.
This commit is contained in:
Raymond Hill 2019-04-16 06:52:13 -04:00
parent 60858b6719
commit 4940cda154
No known key found for this signature in database
GPG key ID: 25E1490B761470C2

View file

@ -1475,6 +1475,7 @@ const FilterParser = function() {
this.reHasUnicode = /[^\x00-\x7F]/; this.reHasUnicode = /[^\x00-\x7F]/;
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/; this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/; this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
this.reIsWildcarded = /[\^\*]/;
this.domainOpt = ''; this.domainOpt = '';
this.noTokenHash = µb.urlTokenizer.tokenHashFromString('*'); this.noTokenHash = µb.urlTokenizer.tokenHashFromString('*');
this.unsupportedTypeBit = this.bitFromType('unsupported'); this.unsupportedTypeBit = this.bitFromType('unsupported');
@ -1545,6 +1546,7 @@ FilterParser.prototype.reset = function() {
this.tokenBeg = 0; this.tokenBeg = 0;
this.types = 0; this.types = 0;
this.important = 0; this.important = 0;
this.wildcarded = false;
this.unsupported = false; this.unsupported = false;
return this; return this;
}; };
@ -1866,6 +1868,8 @@ FilterParser.prototype.parse = function(raw) {
this.anchor = 0; this.anchor = 0;
} }
this.wildcarded = this.reIsWildcarded.test(s);
// This might look weird but we gain memory footprint by not going through // This might look weird but we gain memory footprint by not going through
// toLowerCase(), at least on Chromium. Because copy-on-write? // toLowerCase(), at least on Chromium. Because copy-on-write?
@ -1892,6 +1896,7 @@ var reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*]|$)/;
var badTokens = new Set([ var badTokens = new Set([
'com', 'com',
'google',
'http', 'http',
'https', 'https',
'icon', 'icon',
@ -1966,7 +1971,7 @@ FilterParser.prototype.makeToken = function() {
if ( this.f === '*' ) { return; } if ( this.f === '*' ) { return; }
let matches = null; let matches = null;
if ( (this.anchor & 0x4) !== 0 && this.f.indexOf('*') === -1 ) { if ( (this.anchor & 0x4) !== 0 && this.wildcarded === false ) {
matches = reHostnameToken.exec(this.f); matches = reHostnameToken.exec(this.f);
} }
if ( matches === null ) { if ( matches === null ) {
@ -1983,7 +1988,6 @@ FilterParser.prototype.makeToken = function() {
/******************************************************************************/ /******************************************************************************/
const FilterContainer = function() { const FilterContainer = function() {
this.reIsGeneric = /[\^\*]/;
this.filterParser = new FilterParser(); this.filterParser = new FilterParser();
this.urlTokenizer = µb.urlTokenizer; this.urlTokenizer = µb.urlTokenizer;
this.noTokenHash = this.urlTokenizer.tokenHashFromString('*'); this.noTokenHash = this.urlTokenizer.tokenHashFromString('*');
@ -2277,7 +2281,7 @@ FilterContainer.prototype.compile = function(raw, writer) {
fdata = FilterGenericHnAndRightAnchored.compile(parsed); fdata = FilterGenericHnAndRightAnchored.compile(parsed);
} else if ( parsed.anchor === 0x4 ) { } else if ( parsed.anchor === 0x4 ) {
if ( if (
this.reIsGeneric.test(parsed.f) === false && parsed.wildcarded === false &&
parsed.tokenHash !== parsed.noTokenHash && parsed.tokenHash !== parsed.noTokenHash &&
parsed.tokenBeg === 0 parsed.tokenBeg === 0
) { ) {
@ -2285,10 +2289,7 @@ FilterContainer.prototype.compile = function(raw, writer) {
} else { } else {
fdata = FilterGenericHnAnchored.compile(parsed); fdata = FilterGenericHnAnchored.compile(parsed);
} }
} else if ( } else if ( parsed.wildcarded || parsed.tokenHash === parsed.noTokenHash ) {
this.reIsGeneric.test(parsed.f) ||
parsed.tokenHash === parsed.noTokenHash
) {
fdata = FilterGeneric.compile(parsed); fdata = FilterGeneric.compile(parsed);
} else if ( parsed.anchor === 0x2 ) { } else if ( parsed.anchor === 0x2 ) {
fdata = FilterPlainLeftAnchored.compile(parsed); fdata = FilterPlainLeftAnchored.compile(parsed);