From 3f3a1543ea7fa51d700157a7f6bf0da08dd7a32b Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Fri, 19 Apr 2019 16:33:46 -0400 Subject: [PATCH] Add HNTrie-based filter classes to store origin-only filters Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/528#issuecomment-484408622 Following STrie-related work in above issue, I noticed that a large number of filters in EasyList were filters which only had to match against the document origin. For instance, among just the top 10 most populous buckets, there were four such buckets with over hundreds of entries each: - bits: 72, token: "http", 146 entries - bits: 72, token: "https", 139 entries - bits: 88, token: "http", 122 entries - bits: 88, token: "https", 118 entries These filters in these buckets have to be matched against all the network requests. In order to leverage HNTrie for these filters[1], they are now handled in a special way so as to ensure they all end up in a single HNTrie (per bucket), which means that instead of scanning hundreds of entries per URL, there is now a single scan per bucket per URL for these apply-everywhere filters. Now, any filter which fulfill ALL the following condition will be processed in a special manner internally: - Is of the form `|https://` or `|http://` or `*`; and - Does have a `domain=` option; and - Does not have a negated domain in its `domain=` option; and - Does not have `csp=` option; and - Does not have a `redirect=` option If a filter does not fulfill ALL the conditions above, no change in behavior. A filter which matches ALL of the above will be processed in a special manner: - The `domain=` option will be decomposed so as to create as many distinct filter as there is distinct value in the `domain=` option - This also apply to the `badfilter` version of the filter, which means it now become possible to `badfilter` only one of the distinct filter without having to `badfilter` all of them. - The logger will always report these special filters with only a single hostname in the `domain=` option. *** [1] HNTrie is currently WASM-ed on Firefox. --- src/js/background.js | 4 +- src/js/static-net-filtering.js | 787 +++++++++++++++++++-------------- src/js/strie.js | 121 +++-- src/js/utils.js | 118 ++--- 4 files changed, 590 insertions(+), 440 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index 1531a2bb8..7125119ac 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -137,8 +137,8 @@ const µBlock = (function() { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 8, // Increase when compiled format changes - selfieMagic: 9 // Increase when selfie format changes + compiledMagic: 10, // Increase when compiled format changes + selfieMagic: 10 // Increase when selfie format changes }, restoreBackupSettings: { diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 3b775dda7..f9d24c6ff 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -101,16 +101,8 @@ const typeValueToTypeName = { 20: 'unsupported' }; -const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty; -const BlockAnyType = BlockAction | AnyType; -const BlockAnyParty = BlockAction | AnyParty; - -const AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty; -const AllowAnyType = AllowAction | AnyType; -const AllowAnyParty = AllowAction | AnyParty; - -const genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide, - genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important; +const AnyTypeAnyParty = AnyType | AnyParty; +const BlockImportant = BlockAction | Important; // ABP filters: https://adblockplus.org/en/filters // regex tester: http://regex101.com/ @@ -701,8 +693,19 @@ registerFilterClass(FilterRegex); // The optimal "class" is picked according to the content of the // `domain=` filter option. -const filterOrigin = { - compile: function(details, wrapped) { +const filterOrigin = new (class { + constructor() { + let trieDetails; + try { + trieDetails = JSON.parse( + vAPI.localStorage.getItem('FilterOrigin.trieDetails') + ); + } catch(ex) { + } + this.trieContainer = new HNTrieContainer(trieDetails); + } + + compile(details, wrapped) { const domainOpt = details.domainOpt; // One hostname if ( domainOpt.indexOf('|') === -1 ) { @@ -723,38 +726,32 @@ const filterOrigin = { } // Must be in one set, but not in the other. return FilterOriginMixedSet.compile(domainOpt, wrapped); - }, - logData: function(f, arg1, arg2) { + } + + logData(f, arg1, arg2) { const out = f.wrapped.logData(); out.compiled = [ f.fid, arg1, out.compiled ]; if ( out.opts !== undefined ) { out.opts += ','; } out.opts = `domain=${arg2 || arg1}`; return out; - }, - trieContainer: (function() { - let trieDetails; - try { - trieDetails = JSON.parse( - vAPI.localStorage.getItem('FilterOrigin.trieDetails') - ); - } catch(ex) { - } - return new HNTrieContainer(trieDetails); - })(), - readyToUse: function() { + } + + readyToUse() { return this.trieContainer.readyToUse(); - }, - reset: function() { + } + + reset() { return this.trieContainer.reset(); - }, - optimize: function() { + } + + optimize() { const trieDetails = this.trieContainer.optimize(); vAPI.localStorage.setItem( 'FilterOrigin.trieDetails', JSON.stringify(trieDetails) ); - }, -}; + } +})(); /******************************************************************************/ @@ -850,11 +847,13 @@ registerFilterClass(FilterOriginMiss); /******************************************************************************/ const FilterOriginHitSet = class { - constructor(domainOpt, wrapped) { + constructor(domainOpt, oneOf, wrapped) { this.domainOpt = domainOpt.length < 128 ? domainOpt : µb.stringDeduplicater.lookup(domainOpt); - this.oneOf = null; + this.oneOf = oneOf !== null + ? filterOrigin.trieContainer.createOne(oneOf) + : null; this.wrapped = wrapped; } @@ -873,17 +872,25 @@ const FilterOriginHitSet = class { } compile() { - return [ this.fid, this.domainOpt, this.wrapped.compile() ]; + return [ + this.fid, + this.domainOpt, + this.oneOf !== null + ? filterOrigin.trieContainer.compileOne(this.oneOf) + : null, + this.wrapped.compile() + ]; } static compile(domainOpt, wrapped) { - return [ FilterOriginHitSet.fid, domainOpt, wrapped ]; + return [ FilterOriginHitSet.fid, domainOpt, null, wrapped ]; } static load(args) { return new FilterOriginHitSet( args[1], - filterFromCompiledData(args[2]) + args[2], + filterFromCompiledData(args[3]) ); } }; @@ -893,11 +900,13 @@ registerFilterClass(FilterOriginHitSet); /******************************************************************************/ const FilterOriginMissSet = class { - constructor(domainOpt, wrapped) { + constructor(domainOpt, noneOf, wrapped) { this.domainOpt = domainOpt.length < 128 ? domainOpt : µb.stringDeduplicater.lookup(domainOpt); - this.noneOf = null; + this.noneOf = noneOf !== null + ? filterOrigin.trieContainer.createOne(noneOf) + : null; this.wrapped = wrapped; } @@ -916,17 +925,25 @@ const FilterOriginMissSet = class { } compile() { - return [ this.fid, this.domainOpt, this.wrapped.compile() ]; + return [ + this.fid, + this.domainOpt, + this.noneOf !== null + ? filterOrigin.trieContainer.compileOne(this.noneOf) + : null, + this.wrapped.compile() + ]; } static compile(domainOpt, wrapped) { - return [ FilterOriginMissSet.fid, domainOpt, wrapped ]; + return [ FilterOriginMissSet.fid, domainOpt, null, wrapped ]; } static load(args) { return new FilterOriginMissSet( args[1], - filterFromCompiledData(args[2]) + args[2], + filterFromCompiledData(args[3]) ); } }; @@ -936,12 +953,16 @@ registerFilterClass(FilterOriginMissSet); /******************************************************************************/ const FilterOriginMixedSet = class { - constructor(domainOpt, wrapped) { + constructor(domainOpt, oneOf, noneOf, wrapped) { this.domainOpt = domainOpt.length < 128 ? domainOpt : µb.stringDeduplicater.lookup(domainOpt); - this.oneOf = null; - this.noneOf = null; + this.oneOf = oneOf !== null + ? filterOrigin.trieContainer.createOne(oneOf) + : null; + this.noneOf = noneOf !== null + ? filterOrigin.trieContainer.createOne(noneOf) + : null; this.wrapped = wrapped; } @@ -971,17 +992,29 @@ const FilterOriginMixedSet = class { } compile() { - return [ this.fid, this.domainOpt, this.wrapped.compile() ]; + return [ + this.fid, + this.domainOpt, + this.oneOf !== null + ? filterOrigin.trieContainer.compileOne(this.oneOf) + : null, + this.noneOf !== null + ? filterOrigin.trieContainer.compileOne(this.noneOf) + : null, + this.wrapped.compile() + ]; } static compile(domainOpt, wrapped) { - return [ FilterOriginMixedSet.fid, domainOpt, wrapped ]; + return [ FilterOriginMixedSet.fid, domainOpt, null, null, wrapped ]; } static load(args) { return new FilterOriginMixedSet( args[1], - filterFromCompiledData(args[2]) + args[2], + args[3], + filterFromCompiledData(args[4]) ); } }; @@ -1129,6 +1162,95 @@ registerFilterClass(FilterHostnameDict); /******************************************************************************/ +// Dictionary of hostnames for filters which only purpose is to match +// the document origin. + +const FilterJustOrigin = class { + constructor(args) { + this.h = ''; // short-lived register + this.dict = filterOrigin.trieContainer.createOne(args); + } + + get size() { + return this.dict.size; + } + + add(hn) { + return this.dict.add(hn); + } + + match() { + const pos = this.dict.matches(pageHostnameRegister); + if ( pos === -1 ) { return false; } + this.h = pageHostnameRegister.slice(pos); + return true; + } + + logData() { + return { + raw: '*', + regex: '^', + compiled: this.h + }; + } + + compile() { + return [ this.fid, filterOrigin.trieContainer.compileOne(this.dict) ]; + } + + static load(args) { + return new FilterJustOrigin(args[1]); + } +}; + +registerFilterClass(FilterJustOrigin); + +/******************************************************************************/ + +const FilterHTTPSJustOrigin = class extends FilterJustOrigin { + match(url) { + return url.startsWith('https://') && super.match(); + } + + logData() { + return { + raw: '|https://', + regex: '^https://', + compiled: this.h + }; + } + + static load(args) { + return new FilterHTTPSJustOrigin(args[1]); + } +}; + +registerFilterClass(FilterHTTPSJustOrigin); + +/******************************************************************************/ + +const FilterHTTPJustOrigin = class extends FilterJustOrigin { + match(url) { + return url.startsWith('http://') && super.match(); + } + + logData() { + return { + raw: '|http://', + regex: '^http://', + compiled: this.h + }; + } + + static load(args) { + return new FilterHTTPJustOrigin(args[1]); + } +}; + +registerFilterClass(FilterHTTPJustOrigin); + +/******************************************************************************/ + const FilterPair = class { constructor(a, b) { this.f1 = a; @@ -1409,7 +1531,7 @@ const FilterParser = function() { this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/; this.reIsWildcarded = /[\^\*]/; this.domainOpt = ''; - this.noTokenHash = µb.urlTokenizer.tokenHashFromString('*'); + this.noTokenHash = µb.urlTokenizer.noTokenHash; this.unsupportedTypeBit = this.bitFromType('unsupported'); // All network request types to bitmap // bring origin to 0 (from 4 -- see typeNameToTypeValue) @@ -1819,14 +1941,14 @@ FilterParser.prototype.parse = function(raw) { // These "bad tokens" are collated manually. // Hostname-anchored with no wildcard always have a token index of 0. -var reHostnameToken = /^[0-9a-z]+/; -var reGoodToken = /[%0-9a-z]{2,}/g; -var reRegexToken = /[%0-9A-Za-z]{2,}/g; -var reRegexTokenAbort = /[([]/; -var reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/; -var reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*]|$)/; +const reHostnameToken = /^[0-9a-z]+/; +const reGoodToken = /[%0-9a-z]{2,}/g; +const reRegexToken = /[%0-9A-Za-z]{2,}/g; +const reRegexTokenAbort = /[([]/; +const reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/; +const reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*]|$)/; -var badTokens = new Set([ +const badTokens = new Set([ 'com', 'google', 'http', @@ -1842,13 +1964,13 @@ var badTokens = new Set([ FilterParser.prototype.findFirstGoodToken = function() { reGoodToken.lastIndex = 0; - var s = this.f, - matches, lpos, - badTokenMatch = null; + const s = this.f; + let matches; + let badTokenMatch = null; while ( (matches = reGoodToken.exec(s)) !== null ) { // https://github.com/gorhill/uBlock/issues/997 // Ignore token if preceded by wildcard. - lpos = matches.index; + const lpos = matches.index; if ( lpos !== 0 && s.charCodeAt(lpos - 1) === 0x2A /* '*' */ ) { continue; } @@ -1868,10 +1990,10 @@ FilterParser.prototype.findFirstGoodToken = function() { FilterParser.prototype.extractTokenFromRegex = function() { reRegexToken.lastIndex = 0; - var s = this.f, - matches, prefix; + const s = this.f; + let matches; while ( (matches = reRegexToken.exec(s)) !== null ) { - prefix = s.slice(0, matches.index); + const prefix = s.slice(0, matches.index); if ( reRegexTokenAbort.test(prefix) ) { return; } if ( reRegexBadPrefix.test(prefix) || @@ -1916,14 +2038,27 @@ FilterParser.prototype.makeToken = function() { } }; +/******************************************************************************/ + +FilterParser.prototype.isJustOrigin = function() { + return this.datatype === undefined && + this.redirect === false && + this.domainOpt !== '' && + /^(?:\*|https?:(?:\/\/)?)$/.test(this.f) && + this.domainOpt.indexOf('~') === -1; +}; + /******************************************************************************/ /******************************************************************************/ const FilterContainer = function() { this.filterParser = new FilterParser(); this.urlTokenizer = µb.urlTokenizer; - this.noTokenHash = this.urlTokenizer.tokenHashFromString('*'); - this.dotTokenHash = this.urlTokenizer.tokenHashFromString('.'); + this.noTokenHash = this.urlTokenizer.noTokenHash; + this.dotTokenHash = this.urlTokenizer.dotTokenHash; + this.anyTokenHash = this.urlTokenizer.anyTokenHash; + this.anyHTTPSTokenHash = this.urlTokenizer.anyHTTPSTokenHash; + this.anyHTTPTokenHash = this.urlTokenizer.anyHTTPTokenHash; this.reset(); }; @@ -1945,15 +2080,16 @@ FilterContainer.prototype.reset = function() { this.dataFilters = new Map(); this.filterParser.reset(); - // This will invalidate all hn tries throughout uBO: - filterOrigin.reset(); + // This will invalidate all tries FilterHostnameDict.reset(); + filterOrigin.reset(); FilterBucket.reset(); // Runtime registers - this.cbRegister = undefined; - this.thRegister = undefined; - this.fRegister = null; + this.urlRegister = ''; + this.catbitsRegister = 0; + this.tokenRegister = 0; + this.filterRegister = null; }; /******************************************************************************/ @@ -2014,6 +2150,33 @@ FilterContainer.prototype.freeze = function() { continue; } + if ( tokenHash === this.anyTokenHash ) { + if ( entry === undefined ) { + entry = new FilterJustOrigin(); + bucket.set(this.anyTokenHash, entry); + } + entry.add(fdata); + continue; + } + + if ( tokenHash === this.anyHTTPSTokenHash ) { + if ( entry === undefined ) { + entry = new FilterHTTPSJustOrigin(); + bucket.set(this.anyHTTPSTokenHash, entry); + } + entry.add(fdata); + continue; + } + + if ( tokenHash === this.anyHTTPTokenHash ) { + if ( entry === undefined ) { + entry = new FilterHTTPJustOrigin(); + bucket.set(this.anyHTTPTokenHash, entry); + } + entry.add(fdata); + continue; + } + if ( entry === undefined ) { bucket.set(tokenHash, filterFromCompiledData(fdata)); continue; @@ -2036,8 +2199,8 @@ FilterContainer.prototype.freeze = function() { } this.filterParser.reset(); - this.goodFilters = new Set(); - filterOrigin.optimize(); + this.badFilters.clear(); + this.goodFilters.clear(); FilterHostnameDict.optimize(); FilterBucket.optimize(); this.frozen = true; @@ -2078,11 +2241,17 @@ FilterContainer.prototype.toSelfie = function(path) { return selfie; }; + filterOrigin.optimize(); + return Promise.all([ µBlock.assets.put( `${path}/FilterHostnameDict.trieContainer`, FilterHostnameDict.trieContainer.serialize(µBlock.base128) ), + µBlock.assets.put( + `${path}/FilterOrigin.trieContainer`, + filterOrigin.trieContainer.serialize(µBlock.base128) + ), µBlock.assets.put( `${path}/FilterBucket.trieContainer`, FilterBucket.trieContainer.serialize(µBlock.base128) @@ -2114,6 +2283,13 @@ FilterContainer.prototype.fromSelfie = function(path) { ); return true; }), + µBlock.assets.get(`${path}/FilterOrigin.trieContainer`).then(details => { + filterOrigin.trieContainer.unserialize( + details.content, + µBlock.base128 + ); + return true; + }), µBlock.assets.get(`${path}/FilterBucket.trieContainer`).then(details => { FilterBucket.trieContainer.unserialize( details.content, @@ -2205,9 +2381,15 @@ FilterContainer.prototype.compile = function(raw, writer) { } else if ( parsed.hostnamePure ) { fdata = FilterPlainHostname.compile(parsed); } else if ( parsed.f === '*' ) { + if ( parsed.isJustOrigin() ) { + parsed.tokenHash = this.anyTokenHash; + for ( const hn of parsed.domainOpt.split('|') ) { + this.compileToAtomicFilter(parsed, hn, writer); + } + return true; + } fdata = FilterTrue.compile(); } else if ( parsed.anchor === 0x5 ) { - // https://github.com/gorhill/uBlock/issues/1669 fdata = FilterGenericHnAndRightAnchored.compile(parsed); } else if ( parsed.anchor === 0x4 ) { if ( @@ -2222,6 +2404,22 @@ FilterContainer.prototype.compile = function(raw, writer) { } else if ( parsed.wildcarded || parsed.tokenHash === parsed.noTokenHash ) { fdata = FilterGeneric.compile(parsed); } else if ( parsed.anchor === 0x2 ) { + if ( parsed.isJustOrigin() ) { + if ( parsed.f === 'https://' ) { + parsed.tokenHash = this.anyHTTPSTokenHash; + for ( const hn of parsed.domainOpt.split('|') ) { + this.compileToAtomicFilter(parsed, hn, writer); + } + return true; + } + if ( parsed.f === 'http://' ) { + parsed.tokenHash = this.anyHTTPTokenHash; + for ( const hn of parsed.domainOpt.split('|') ) { + this.compileToAtomicFilter(parsed, hn, writer); + } + return true; + } + } fdata = FilterPlainLeftAnchored.compile(parsed); } else if ( parsed.anchor === 0x1 ) { fdata = FilterPlainRightAnchored.compile(parsed); @@ -2310,15 +2508,9 @@ FilterContainer.prototype.fromCompiledContent = function(reader) { } // 1 = network filters: bad filter directives - // Since we are going to keep bad filter fingerprints around, we ensure - // they are "detached" from the parent string from which they are sliced. - // We keep bad filter fingerprints around to use them when user - // incrementally add filters (through "Block element" for example). reader.select(1); while ( reader.next() ) { - if ( this.badFilters.has(reader.line) === false ) { - this.badFilters.add(µb.orphanizeString(reader.line)); - } + this.badFilters.add(reader.line); } }; @@ -2415,18 +2607,16 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out /******************************************************************************/ -// bucket: Map -// url: string - -FilterContainer.prototype.matchTokens = function(bucket, url) { +FilterContainer.prototype.matchTokens = function(bucket) { // Hostname-only filters let f = bucket.get(this.dotTokenHash); if ( f !== undefined && f.match() === true ) { - this.thRegister = this.dotTokenHash; - this.fRegister = f; + this.tokenRegister = this.dotTokenHash; + this.filterRegister = f; return true; } + const url = this.urlRegister; const tokenHashes = this.urlTokenizer.getTokens(); let i = 0; for (;;) { @@ -2434,27 +2624,91 @@ FilterContainer.prototype.matchTokens = function(bucket, url) { if ( tokenHash === 0 ) { break; } f = bucket.get(tokenHash); if ( f !== undefined && f.match(url, tokenHashes[i+1]) === true ) { - this.thRegister = tokenHash; - this.fRegister = f; + this.tokenRegister = tokenHash; + this.filterRegister = f; return true; } i += 2; } - // Untokenizable filters - f = bucket.get(this.noTokenHash); - if ( f !== undefined && f.match(url, 0) === true ) { - this.thRegister = this.noTokenHash; - this.fRegister = f; - return true; - } - return false; }; /******************************************************************************/ -// Specialized handlers +FilterContainer.prototype.realmMatchStringExactType = function( + realmBits, + typeBits, + partyBits +) { + let bucket; + let catBits = realmBits | typeBits; + if ( (bucket = this.categories.get(catBits)) ) { + if ( this.matchTokens(bucket) ) { + this.catbitsRegister = catBits; + return true; + } + } + if ( partyBits !== 0 ) { + catBits = realmBits | typeBits | partyBits; + if ( (bucket = this.categories.get(catBits)) ) { + if ( this.matchTokens(bucket) ) { + this.catbitsRegister = catBits; + return true; + } + } + } + return false; +}; + +/******************************************************************************/ + +FilterContainer.prototype.realmMatchString = function( + realmBits, + typeBits, + partyBits +) { + let bucket; + let catBits = realmBits | AnyTypeAnyParty; + if ( (bucket = this.categories.get(catBits)) ) { + if ( this.matchTokens(bucket) ) { + this.catbitsRegister = catBits; + return true; + } + } + if ( partyBits !== 0 ) { + catBits = realmBits | AnyType | partyBits; + if ( (bucket = this.categories.get(catBits)) ) { + if ( this.matchTokens(bucket) ) { + this.catbitsRegister = catBits; + return true; + } + } + } + if ( typeBits !== 0 ) { + catBits = realmBits | AnyParty | typeBits; + if ( (bucket = this.categories.get(catBits)) ) { + if ( this.matchTokens(bucket) ) { + this.catbitsRegister = catBits; + return true; + } + } + } + if ( typeBits !== 0 && partyBits !== 0 ) { + catBits = realmBits | typeBits | partyBits; + if ( (bucket = this.categories.get(catBits)) ) { + if ( this.matchTokens(bucket) ) { + this.catbitsRegister = catBits; + return true; + } + } + } + return false; +}; + +/******************************************************************************/ + +// Specialized handler // https://github.com/gorhill/uBlock/issues/1477 // Special case: blocking-generichide filter ALWAYS exists, it is implicit -- @@ -2464,28 +2718,26 @@ FilterContainer.prototype.matchTokens = function(bucket, url) { // User may want to override `generichide` exception filters. FilterContainer.prototype.matchStringGenericHide = function(requestURL) { - let url = this.urlTokenizer.setURL(requestURL); + const typeBits = typeNameToTypeValue['generichide']; - // https://github.com/gorhill/uBlock/issues/2225 - // Important: - // - `pageHostnameRegister` is used by FilterOrigin?.match(). - // - `requestHostnameRegister` is used by FilterHostnameDict.match(). - pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url); + // Prime tokenizer: we get a normalized URL in return. + this.urlRegister = this.urlTokenizer.setURL(requestURL); + this.filterRegister = null; - let bucket = this.categories.get(genericHideException); - if ( !bucket || this.matchTokens(bucket, url) === false ) { - this.fRegister = null; - return 0; + // These registers will be used by various filters + pageHostnameRegister = requestHostnameRegister = + µb.URI.hostnameFromURI(requestURL); + + // Exception filters + if ( this.realmMatchStringExactType(AllowAction, typeBits, FirstParty) ) { + // Important block filters. + if ( this.realmMatchStringExactType(BlockImportant, typeBits, FirstParty) ) { + return 1; + } + return 2; } + return 0; - bucket = this.categories.get(genericHideImportant); - if ( bucket && this.matchTokens(bucket, url) ) { - this.cbRegister = genericHideImportant; - return 1; - } - - this.cbRegister = genericHideException; - return 2; }; /******************************************************************************/ @@ -2495,244 +2747,84 @@ FilterContainer.prototype.matchStringGenericHide = function(requestURL) { // not the generic handling. FilterContainer.prototype.matchStringExactType = function(fctxt, requestType) { - // Special cases. - if ( requestType === 'generichide' ) { - return this.matchStringGenericHide(fctxt.url); - } - let type = typeNameToTypeValue[requestType]; - if ( type === undefined ) { return 0; } + const typeBits = typeNameToTypeValue[requestType]; + if ( typeBits === undefined ) { return 0; } + const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; // Prime tokenizer: we get a normalized URL in return. - let url = this.urlTokenizer.setURL(fctxt.url); + this.urlRegister = this.urlTokenizer.setURL(fctxt.url); + this.filterRegister = null; // These registers will be used by various filters pageHostnameRegister = fctxt.getDocHostname(); requestHostnameRegister = fctxt.getHostname(); - let party = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; - let categories = this.categories, - catBits, bucket; - - this.fRegister = null; - - // https://github.com/chrisaljoudi/uBlock/issues/139 - // Test against important block filters - catBits = BlockAnyParty | Important | type; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 1; - } + // Important block filters. + if ( this.realmMatchStringExactType(BlockImportant, typeBits, partyBits) ) { + return 1; } - catBits = BlockAction | Important | type | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 1; - } - } - - // Test against block filters - catBits = BlockAnyParty | type; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - } - } - if ( this.fRegister === null ) { - catBits = BlockAction | type | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - } - } - } - - // If there is no block filter, no need to test against allow filters - if ( this.fRegister === null ) { - return 0; - } - - // Test against allow filters - catBits = AllowAnyParty | type; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; + // Block filters + if ( this.realmMatchString(BlockAction, typeBits, partyBits) ) { + // Exception filters + if ( this.realmMatchStringExactType(AllowAction, typeBits, partyBits) ) { return 2; } + return 1; } - catBits = AllowAction | type | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 2; - } - } - - return 1; + return 0; }; /******************************************************************************/ FilterContainer.prototype.matchString = function(fctxt) { // https://github.com/chrisaljoudi/uBlock/issues/519 - // Use exact type match for anything beyond `other` - // Also, be prepared to support unknown types - let type = typeNameToTypeValue[fctxt.type]; - if ( type === undefined ) { - type = otherTypeBitValue; - } else if ( type === 0 || type > otherTypeBitValue ) { + // Use exact type match for anything beyond `other` + // Also, be prepared to support unknown types + let typeBits = typeNameToTypeValue[fctxt.type]; + if ( typeBits === undefined ) { + typeBits = otherTypeBitValue; + } else if ( typeBits === 0 || typeBits > otherTypeBitValue ) { return this.matchStringExactType(fctxt, fctxt.type); } - - // The logic here is simple: - // - // block = !whitelisted && blacklisted - // or equivalent - // allow = whitelisted || !blacklisted - - // Statistically, hits on a URL in order of likelihood: - // 1. No hit - // 2. Hit on a block filter - // 3. Hit on an allow filter - // - // High likelihood of "no hit" means to optimize we need to reduce as much - // as possible the number of filters to test. - // - // Then, because of the order of probabilities, we should test only - // block filters first, and test allow filters if and only if there is a - // hit on a block filter. Since there is a high likelihood of no hit, - // testing allow filter by default is likely wasted work, hence allow - // filters are tested *only* if there is a (unlikely) hit on a block - // filter. + const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; // Prime tokenizer: we get a normalized URL in return. - const url = this.urlTokenizer.setURL(fctxt.url); + this.urlRegister = this.urlTokenizer.setURL(fctxt.url); + this.filterRegister = null; // These registers will be used by various filters pageHostnameRegister = fctxt.getDocHostname(); requestHostnameRegister = fctxt.getHostname(); - this.fRegister = null; - - const party = fctxt.is3rdPartyToDoc() - ? ThirdParty - : FirstParty; - const categories = this.categories; - let catBits, bucket; - - // https://github.com/chrisaljoudi/uBlock/issues/139 - // Test against important block filters. - // The purpose of the `important` option is to reverse the order of - // evaluation. Normally, it is "evaluate block then evaluate allow", with - // the `important` property it is "evaluate allow then evaluate block". - catBits = BlockAnyTypeAnyParty | Important; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 1; - } + // Important block filters. + if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) { + return 1; } - catBits = BlockAnyType | Important | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 1; - } - } - catBits = BlockAnyParty | Important | type; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 1; - } - } - catBits = BlockAction | Important | type | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 1; - } - } - - // Test against block filters - catBits = BlockAnyTypeAnyParty; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - } - } - if ( this.fRegister === null ) { - catBits = BlockAnyType | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - } - } - if ( this.fRegister === null ) { - catBits = BlockAnyParty | type; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - } - } - if ( this.fRegister === null ) { - catBits = BlockAction | type | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - } - } - } - } - } - - // If there is no block filter, no need to test against allow filters - if ( this.fRegister === null ) { - return 0; - } - - // Test against allow filters - catBits = AllowAnyTypeAnyParty; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; + // Block filters + if ( this.realmMatchString(BlockAction, typeBits, partyBits) ) { + // Exception filters + if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) { return 2; } + return 1; } - catBits = AllowAnyType | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 2; - } - } - catBits = AllowAnyParty | type; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 2; - } - } - catBits = AllowAction | type | party; - if ( (bucket = categories.get(catBits)) ) { - if ( this.matchTokens(bucket, url) ) { - this.cbRegister = catBits; - return 2; - } - } - - return 1; + return 0; }; /******************************************************************************/ FilterContainer.prototype.toLogData = function() { - if ( this.fRegister === null ) { return; } - const logData = toLogDataInternal(this.cbRegister, this.thRegister, this.fRegister); + if ( this.filterRegister === null ) { return; } + const logData = toLogDataInternal( + this.catbitsRegister, + this.tokenRegister, + this.filterRegister + ); logData.source = 'static'; - logData.tokenHash = this.thRegister; - logData.result = this.fRegister === null ? 0 : (this.cbRegister & 1 ? 2 : 1); + logData.tokenHash = this.tokenRegister; + logData.result = this.filterRegister === null + ? 0 + : (this.catbitsRegister & 1 ? 2 : 1); return logData; }; @@ -2775,7 +2867,9 @@ FilterContainer.prototype.benchmark = function(action) { const r = this.matchString(fctxt); if ( recorded !== undefined ) { recorded.push(r); } if ( expected !== undefined && r !== expected[i] ) { - throw 'Mismatch with reference results'; + console.log('Mismatch with reference results:'); + console.log(`\tExpected ${expected[i]}, got ${r}:`); + console.log(`\turl=${fctxt.url} docOrigin=${fctxt.getDocOrigin()}`); } } const t1 = self.performance.now(); @@ -2796,15 +2890,60 @@ FilterContainer.prototype.benchmark = function(action) { return 'ok'; }; -/******************************************************************************/ +/******************************************************************************- + + With default filter lists: + + As of 2019-04-18: + + {bits: "0", token: "ad", size: 926, f: FilterBucket} + {bits: "0", token: "ads", size: 636, f: FilterBucket} + {bits: "41", token: "phncdn", size: 253, f: FilterBucket} + {bits: "0", token: "analytic", size: 174, f: FilterBucket} + {bits: "0", token: "tracking", size: 155, f: FilterBucket} + {bits: "48", token: "http", size: 146, f: FilterBucket} + {bits: "48", token: "https", size: 139, f: FilterBucket} + {bits: "58", token: "http", size: 122, f: FilterBucket} + {bits: "0", token: "adv", size: 121, f: FilterBucket} + {bits: "58", token: "https", size: 118, f: FilterBucket} + {bits: "0", token: "advertis", size: 102, f: FilterBucket} + {bits: "8", token: "doublecl", size: 96, f: FilterBucket} + {bits: "41", token: "imasdk", size: 90, f: FilterBucket} + {bits: "0", token: "cdn", size: 89, f: FilterBucket} + {bits: "0", token: "track", size: 87, f: FilterBucket} + {bits: "0", token: "stats", size: 82, f: FilterBucket} + {bits: "0", token: "banner", size: 74, f: FilterBucket} + {bits: "0", token: "log", size: 72, f: FilterBucket} + {bits: "0", token: "ga", size: 71, f: FilterBucket} + {bits: "0", token: "gif", size: 67, f: FilterBucket} + {bits: "0", token: "cloudfro", size: 64, f: FilterBucket} + {bits: "0", token: "amazonaw", size: 61, f: FilterBucket} + {bits: "41", token: "ajax", size: 58, f: FilterBucket} + {bits: "0", token: "tracker", size: 56, f: FilterBucket} + {bits: "40", token: "pagead2", size: 53, f: FilterBucket} + {bits: "0", token: "affiliat", size: 53, f: FilterBucket} + +*/ FilterContainer.prototype.bucketHistogram = function() { const results = []; for ( const [ bits, category ] of this.categories ) { for ( const [ th, f ] of category ) { - if ( f instanceof FilterBucket === false ) { continue; } - const token = µBlock.urlTokenizer.stringFromTokenHash(th); - results.push({ bits, token, size: f.size, f }); + if ( f instanceof FilterBucket ) { + const token = µBlock.urlTokenizer.stringFromTokenHash(th); + results.push({ bits: bits.toString(16), token, size: f.size, f }); + continue; + } + if ( f instanceof FilterHostnameDict ) { + const token = µBlock.urlTokenizer.stringFromTokenHash(th); + results.push({ bits: bits.toString(16), token, size: f.size, f }); + continue; + } + if ( f instanceof FilterJustOrigin ) { + const token = µBlock.urlTokenizer.stringFromTokenHash(th); + results.push({ bits: bits.toString(16), token, size: f.size, f }); + continue; + } } } results.sort((a, b) => { @@ -2815,9 +2954,9 @@ FilterContainer.prototype.bucketHistogram = function() { /******************************************************************************* - As of 2019-04-13: + With default filter lists: - Filter classes histogram with default filter lists: + As of 2019-04-13: {"FilterPlainHnAnchored" => 12619} {"FilterPlainPrefix1" => 8743} diff --git a/src/js/strie.js b/src/js/strie.js index 046793ea3..3ecbda22d 100644 --- a/src/js/strie.js +++ b/src/js/strie.js @@ -46,29 +46,29 @@ const STRIE_CHAR1_SLOT = STRIE_TRIE0_SLOT + 3; // 67 / 268 const STRIE_TRIE0_START = STRIE_TRIE0_SLOT + 4 << 2; // 272 -const STrieContainer = function(details) { - if ( details instanceof Object === false ) { details = {}; } - const len = (details.byteLength || 0) + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1); - this.buf = new Uint8Array(Math.max(len, 131072)); - this.buf32 = new Uint32Array(this.buf.buffer); - this.buf32[STRIE_TRIE0_SLOT] = STRIE_TRIE0_START; - this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT]; - this.buf32[STRIE_CHAR0_SLOT] = details.char0 || 65536; - this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT]; -}; +const STrieContainer = class { -STrieContainer.prototype = { + constructor(details) { + if ( details instanceof Object === false ) { details = {}; } + const len = (details.byteLength || 0) + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1); + this.buf = new Uint8Array(Math.max(len, 131072)); + this.buf32 = new Uint32Array(this.buf.buffer); + this.buf32[STRIE_TRIE0_SLOT] = STRIE_TRIE0_START; + this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT]; + this.buf32[STRIE_CHAR0_SLOT] = details.char0 || 65536; + this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT]; + } //-------------------------------------------------------------------------- // Public methods //-------------------------------------------------------------------------- - reset: function() { + reset() { this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT]; this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT]; - }, + } - matches: function(iroot, a, al) { + matches(iroot, a, al) { const ar = a.length; const char0 = this.buf32[STRIE_CHAR0_SLOT]; let icell = iroot; @@ -102,9 +102,9 @@ STrieContainer.prototype = { if ( icell === 0 || this.buf32[icell+2] === 0 ) { return al; } if ( al === ar ) { return -1; } } - }, + } - createOne: function(args) { + createOne(args) { if ( Array.isArray(args) ) { return new this.STrieRef(this, args[0], args[1]); } @@ -118,13 +118,13 @@ STrieContainer.prototype = { this.buf32[iroot+1] = 0; this.buf32[iroot+2] = 0; return new this.STrieRef(this, iroot, 0); - }, + } - compileOne: function(trieRef) { + compileOne(trieRef) { return [ trieRef.iroot, trieRef.size ]; - }, + } - add: function(iroot, s) { + add(iroot, s) { const lschar = s.length; if ( lschar === 0 ) { return 0; } let ischar = 0; @@ -221,26 +221,17 @@ STrieContainer.prototype = { } return 1; } - }, + } - optimize: function() { + optimize() { this.shrinkBuf(); return { byteLength: this.buf.byteLength, char0: this.buf32[STRIE_CHAR0_SLOT], }; - }, + } - fromIterable: function(hostnames, add) { - if ( add === undefined ) { add = 'add'; } - const trieRef = this.createOne(); - for ( const hn of hostnames ) { - trieRef[add](hn); - } - return trieRef; - }, - - serialize: function(encoder) { + serialize(encoder) { if ( encoder instanceof Object ) { return encoder.encode( this.buf32.buffer, @@ -254,9 +245,9 @@ STrieContainer.prototype = { this.buf32[STRIE_CHAR1_SLOT] + 3 >>> 2 ) ); - }, + } - unserialize: function(selfie, decoder) { + unserialize(selfie, decoder) { const shouldDecode = typeof selfie === 'string'; let byteLength = shouldDecode ? decoder.decodeSize(selfie) @@ -272,23 +263,13 @@ STrieContainer.prototype = { } else { this.buf32.set(selfie); } - }, - - //-------------------------------------------------------------------------- - // Class to hold reference to a specific trie - //-------------------------------------------------------------------------- - - STrieRef: function(container, iroot, size) { - this.container = container; - this.iroot = iroot; - this.size = size; - }, + } //-------------------------------------------------------------------------- // Private methods //-------------------------------------------------------------------------- - addCell: function(idown, iright, v) { + addCell(idown, iright, v) { let icell = this.buf32[STRIE_TRIE1_SLOT]; this.buf32[STRIE_TRIE1_SLOT] = icell + 12; icell >>>= 2; @@ -296,9 +277,9 @@ STrieContainer.prototype = { this.buf32[icell+1] = iright; this.buf32[icell+2] = v; return icell; - }, + } - addSegment: function(segment) { + addSegment(segment) { const lsegchar = segment.length; if ( lsegchar === 0 ) { return 0; } let char1 = this.buf32[STRIE_CHAR1_SLOT]; @@ -309,9 +290,9 @@ STrieContainer.prototype = { } while ( i !== lsegchar ); this.buf32[STRIE_CHAR1_SLOT] = char1; return (lsegchar << 24) | isegchar; - }, + } - growBuf: function(trieGrow, charGrow) { + growBuf(trieGrow, charGrow) { const char0 = Math.max( (this.buf32[STRIE_TRIE1_SLOT] + trieGrow + STRIE_PAGE_SIZE-1) & ~(STRIE_PAGE_SIZE-1), this.buf32[STRIE_CHAR0_SLOT] @@ -322,16 +303,16 @@ STrieContainer.prototype = { this.buf.length ); this.resizeBuf(bufLen, char0); - }, + } - shrinkBuf: function() { + shrinkBuf() { const char0 = this.buf32[STRIE_TRIE1_SLOT] + 24; const char1 = char0 + this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT]; const bufLen = char1 + 256; this.resizeBuf(bufLen, char0); - }, + } - resizeBuf: function(bufLen, char0) { + resizeBuf(bufLen, char0) { bufLen = bufLen + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1); if ( bufLen === this.buf.length && @@ -375,23 +356,35 @@ STrieContainer.prototype = { this.buf32[STRIE_CHAR0_SLOT] = char0; this.buf32[STRIE_CHAR1_SLOT] = char0 + charDataLen; } - }, + } }; -/******************************************************************************/ +/******************************************************************************* -STrieContainer.prototype.STrieRef.prototype = { - add: function(pattern) { + Class to hold reference to a specific trie + +*/ + +STrieContainer.prototype.STrieRef = class { + constructor(container, iroot, size) { + this.container = container; + this.iroot = iroot; + this.size = size; + } + + add(pattern) { if ( this.container.add(this.iroot, pattern) === 1 ) { this.size += 1; return true; } return false; - }, - matches: function(a, al) { + } + + matches(a, al) { return this.container.matches(this.iroot, a, al); - }, - [Symbol.iterator]: function() { + } + + [Symbol.iterator]() { return { value: undefined, done: false, @@ -441,5 +434,5 @@ STrieContainer.prototype.STrieRef.prototype = { forks: [], textDecoder: new TextDecoder() }; - }, + } }; diff --git a/src/js/utils.js b/src/js/utils.js index b50de5fd1..536dd307f 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -41,70 +41,101 @@ // Benchmark for string-based tokens vs. safe-integer token values: // https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html -µBlock.urlTokenizer = { - setURL: function(url) { +µBlock.urlTokenizer = new (class { + constructor() { + this._chars = '0123456789%abcdefghijklmnopqrstuvwxyz'; + this._validTokenChars = new Uint8Array(128); + for ( let i = 0, n = this._chars.length; i < n; i++ ) { + this._validTokenChars[this._chars.charCodeAt(i)] = i + 1; + } + + this._charsEx = '0123456789%abcdefghijklmnopqrstuvwxyz*.'; + this._validTokenCharsEx = new Uint8Array(128); + for ( let i = 0, n = this._charsEx.length; i < n; i++ ) { + this._validTokenCharsEx[this._charsEx.charCodeAt(i)] = i + 1; + } + + this.dotTokenHash = this.tokenHashFromString('.'); + this.anyTokenHash = this.tokenHashFromString('..'); + this.anyHTTPSTokenHash = this.tokenHashFromString('..https'); + this.anyHTTPTokenHash = this.tokenHashFromString('..http'); + this.noTokenHash = this.tokenHashFromString('*'); + + this._urlIn = ''; + this._urlOut = ''; + this._tokenized = false; + this._tokens = [ 0 ]; + } + + setURL(url) { if ( url !== this._urlIn ) { this._urlIn = url; this._urlOut = url.toLowerCase(); this._tokenized = false; } return this._urlOut; - }, + } // Tokenize on demand. - getTokens: function() { - if ( this._tokenized === false ) { - this._tokenize(); - this._tokenized = true; + getTokens() { + if ( this._tokenized ) { return this._tokens; } + let i = this._tokenize(); + i = this._appendTokenAt(i, this.anyTokenHash, 0); + if ( this._urlOut.startsWith('https://') ) { + i = this._appendTokenAt(i, this.anyHTTPSTokenHash, 0); + } else if ( this._urlOut.startsWith('http://') ) { + i = this._appendTokenAt(i, this.anyHTTPTokenHash, 0); } + i = this._appendTokenAt(i, this.noTokenHash, 0); + this._tokens[i] = 0; + this._tokenized = true; return this._tokens; - }, + } - tokenHashFromString: function(s) { - var l = s.length; + _appendTokenAt(i, th, ti) { + this._tokens[i+0] = th; + this._tokens[i+1] = ti; + return i + 2; + } + + tokenHashFromString(s) { + const l = s.length; if ( l === 0 ) { return 0; } - if ( l === 1 ) { - if ( s === '*' ) { return 63; } - if ( s === '.' ) { return 62; } - } - var vtc = this._validTokenChars, - th = vtc[s.charCodeAt(0)]; - for ( var i = 1; i !== 8 && i !== l; i++ ) { + const vtc = this._validTokenCharsEx; + let th = vtc[s.charCodeAt(0)]; + for ( let i = 1; i !== 8 && i !== l; i++ ) { th = th * 64 + vtc[s.charCodeAt(i)]; } return th; - }, + } - stringFromTokenHash: function(th) { + stringFromTokenHash(th) { if ( th === 0 ) { return ''; } - if ( th === 63 ) { return '*'; } - if ( th === 62 ) { return '.'; } - const chars = '0123456789%abcdefghijklmnopqrstuvwxyz'; let s = ''; while ( th > 0 ) { - s = `${chars.charAt((th & 0b111111)-1)}${s}`; + s = `${this._charsEx.charAt((th & 0b111111)-1)}${s}`; th /= 64; } return s; - }, + } // https://github.com/chrisaljoudi/uBlock/issues/1118 // We limit to a maximum number of tokens. - _tokenize: function() { - var tokens = this._tokens, - url = this._urlOut, - l = url.length; - if ( l === 0 ) { tokens[0] = 0; return; } + _tokenize() { + const tokens = this._tokens; + let url = this._urlOut; + let l = url.length; + if ( l === 0 ) { return 0; } if ( l > 2048 ) { url = url.slice(0, 2048); l = 2048; } - var i = 0, j = 0, v, n, ti, th, - vtc = this._validTokenChars; + const vtc = this._validTokenChars; + let i = 0, j = 0, v, n, ti, th; for (;;) { for (;;) { - if ( i === l ) { tokens[j] = 0; return; } + if ( i === l ) { return j; } v = vtc[url.charCodeAt(i++)]; if ( v !== 0 ) { break; } } @@ -117,25 +148,12 @@ th = th * 64 + v; n += 1; } - tokens[j++] = th; - tokens[j++] = ti; + tokens[j+0] = th; + tokens[j+1] = ti; + j += 2; } - }, - - _urlIn: '', - _urlOut: '', - _tokenized: false, - _tokens: [ 0 ], - _validTokenChars: (function() { - var vtc = new Uint8Array(128), - chars = '0123456789%abcdefghijklmnopqrstuvwxyz', - i = chars.length; - while ( i-- ) { - vtc[chars.charCodeAt(i)] = i + 1; - } - return vtc; - })() -}; + } +})(); /******************************************************************************/