diff --git a/platform/chromium/vapi-background.js b/platform/chromium/vapi-background.js index 8e3cc9e79..7496f47ba 100644 --- a/platform/chromium/vapi-background.js +++ b/platform/chromium/vapi-background.js @@ -35,7 +35,13 @@ var chrome = self.chrome; var manifest = chrome.runtime.getManifest(); vAPI.chrome = true; -vAPI.cantWebsocket = true; +vAPI.chromiumVersion = (function(){ + var matches = /\bChrom(?:e|ium)\/(\d+)\b/.exec(navigator.userAgent); + return matches !== null ? parseInt(matches[1], 10) : NaN; + })(); +vAPI.cantWebsocket = + chrome.webRequest.ResourceType instanceof Object === false || + chrome.webRequest.ResourceType.WEBSOCKET !== 'websocket'; var noopFunc = function(){}; diff --git a/src/css/logger-ui.css b/src/css/logger-ui.css index c95fb7627..5ff95b5c7 100644 --- a/src/css/logger-ui.css +++ b/src/css/logger-ui.css @@ -157,7 +157,7 @@ body.colorBlind #netInspector tr.nooped { body.colorBlind #netInspector tr.allowed { background-color: rgba(255, 194, 57, 0.1) } -#netInspector tr.cb, #netInspector tr.rr { +#netInspector tr.cosmetic, #netInspector tr.redirect { background-color: rgba(255, 255, 0, 0.1); } #netInspector tr.maindoc { diff --git a/src/css/popup.css b/src/css/popup.css index f82c2881b..721c5f5e2 100644 --- a/src/css/popup.css +++ b/src/css/popup.css @@ -439,40 +439,40 @@ body.advancedUser #firewallContainer > div > span:first-of-type ~ span { background-color: rgb(192, 160, 0); } /* Rule cells */ -body.advancedUser #firewallContainer > div > span.aRule { +body.advancedUser #firewallContainer > div > span.allowRule { background-color: rgba(0, 160, 0, 0.3); } -body.advancedUser #firewallContainer.colorBlind > div > span.aRule { +body.advancedUser #firewallContainer.colorBlind > div > span.allowRule { background-color: rgba(255, 194, 57, 0.4); } -body.advancedUser #firewallContainer > div > span.bRule { +body.advancedUser #firewallContainer > div > span.blockRule { background-color: rgba(192, 0, 0, 0.3); } -body.advancedUser #firewallContainer.colorBlind > div > span.bRule { +body.advancedUser #firewallContainer.colorBlind > div > span.blockRule { background-color: rgba(0, 19, 110, 0.4); } -body.advancedUser #firewallContainer > div > span.nRule { +body.advancedUser #firewallContainer > div > span.noopRule { background-color: rgba(108, 108, 108, 0.3); } -body.advancedUser #firewallContainer.colorBlind > div > span.nRule { +body.advancedUser #firewallContainer.colorBlind > div > span.noopRule { background-color: rgba(96, 96, 96, 0.4); } body.advancedUser #firewallContainer > div > span.ownRule { color: white; } -body.advancedUser #firewallContainer > div > span.aRule.ownRule { +body.advancedUser #firewallContainer > div > span.allowRule.ownRule { background-color: rgba(0, 160, 0, 1); } -body.advancedUser #firewallContainer.colorBlind > div > span.aRule.ownRule { +body.advancedUser #firewallContainer.colorBlind > div > span.allowRule.ownRule { background-color: rgba(255, 194, 57, 1); } -body.advancedUser #firewallContainer > div > span.bRule.ownRule { +body.advancedUser #firewallContainer > div > span.blockRule.ownRule { background-color: rgba(192, 0, 0, 1); } -body.advancedUser #firewallContainer.colorBlind > div > span.bRule.ownRule { +body.advancedUser #firewallContainer.colorBlind > div > span.blockRule.ownRule { background-color: rgba(0, 19, 110, 1); } -body.advancedUser #firewallContainer > div > span.nRule.ownRule { +body.advancedUser #firewallContainer > div > span.noopRule.ownRule { background-color: rgba(108, 108, 108, 1); } diff --git a/src/js/background.js b/src/js/background.js index 4e69a9498..ebe3f0e8f 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -121,8 +121,8 @@ var µBlock = (function() { // jshint ignore:line // read-only systemSettings: { - compiledMagic: 'fxtcjjhbhyiw', - selfieMagic: 'fxtcjjhbhyiw' + compiledMagic: 'lcmfjiajoqwe', + selfieMagic: 'lcmfjiajoqwe' }, restoreBackupSettings: { @@ -170,4 +170,3 @@ var µBlock = (function() { // jshint ignore:line })(); /******************************************************************************/ - diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 44cb62421..4edd2b69c 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -719,6 +719,8 @@ FilterContainer.prototype.freeze = function() { this.highHighComplexGenericHideCount !== 0; this.parser.reset(); + this.compileSelector.reset(); + this.compileProceduralSelector.reset(); this.frozen = true; }; @@ -746,7 +748,7 @@ FilterContainer.prototype.compileSelector = (function() { return true; }; - return function(raw) { + var entryPoint = function(raw) { if ( isValidCSSSelector(raw) && raw.indexOf('[-abp-properties=') === -1 ) { return raw; } @@ -812,6 +814,11 @@ FilterContainer.prototype.compileSelector = (function() { µb.logger.writeOne('', 'error', 'Cosmetic filtering – invalid filter: ' + raw); }; + + entryPoint.reset = function() { + }; + + return entryPoint; })(); /******************************************************************************/ @@ -927,7 +934,7 @@ FilterContainer.prototype.compileProceduralSelector = (function() { return { selector: firstOperand, tasks: tasks }; }; - return function(raw) { + var entryPoint = function(raw) { if ( raw === lastProceduralSelector ) { return lastProceduralSelectorCompiled; } @@ -940,6 +947,13 @@ FilterContainer.prototype.compileProceduralSelector = (function() { lastProceduralSelectorCompiled = compiled; return compiled; }; + + entryPoint.reset = function() { + lastProceduralSelector = ''; + lastProceduralSelectorCompiled = undefined; + }; + + return entryPoint; })(); /******************************************************************************/ @@ -1038,12 +1052,12 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, out) { // is valid, the regex took care of this. Most generic selector falls // into that category. if ( key === selector ) { - out.push('c\vlg\v' + key); + out.push(4, 'lg\v' + key); return; } // Composite CSS rule. if ( this.compileSelector(selector) ) { - out.push('c\vlg+\v' + key + '\v' + selector); + out.push(4, 'lg+\v' + key + '\v' + selector); } return; } @@ -1054,21 +1068,21 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, out) { // ["title"] and ["alt"] will go in high-low generic bin. if ( this.reHighLow.test(selector) ) { - out.push('c\vhlg0\v' + selector); + out.push(4, 'hlg0\v' + selector); return; } // [href^="..."] will go in high-medium generic bin. matches = this.reHighMedium.exec(selector); if ( matches && matches.length === 2 ) { - out.push('c\vhmg0\v' + matches[1] + '\v' + selector); + out.push(4, 'hmg0\v' + matches[1] + '\v' + selector); return; } // script:contains(...) // script:inject(...) if ( this.reScriptSelector.test(selector) ) { - out.push('c\vjs\v0\v\v' + selector); + out.push(4, 'js\v0\v\v' + selector); return; } @@ -1077,16 +1091,16 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, out) { // as a low generic cosmetic filter. matches = this.rePlainSelectorEx.exec(selector); if ( matches && matches.length === 2 ) { - out.push('c\vlg+\v' + matches[1] + '\v' + selector); + out.push(4, 'lg+\v' + matches[1] + '\v' + selector); return; } // All else: high-high generics. // Distinguish simple vs complex selectors. if ( selector.indexOf(' ') === -1 ) { - out.push('c\vhhsg0\v' + selector); + out.push(4, 'hhsg0\v' + selector); } else { - out.push('c\vhhcg0\v' + selector); + out.push(4, 'hhcg0\v' + selector); } }; @@ -1098,7 +1112,7 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(parsed, out) { // script:contains(...) // script:inject(...) if ( this.reScriptSelector.test(selector) ) { - out.push('c\vjs\v1\v\v' + selector); + out.push(4, 'js\v1\v\v' + selector); return; } @@ -1109,7 +1123,7 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(parsed, out) { // https://github.com/chrisaljoudi/uBlock/issues/497 // All generic exception filters are put in the same bucket: they are // expected to be very rare. - out.push('c\vg1\v' + compiled); + out.push(4, 'g1\v' + compiled); }; /******************************************************************************/ @@ -1138,7 +1152,7 @@ FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, o if ( unhide ) { hash = '!' + hash; } - out.push('c\vjs\v' + hash + '\v' + hostname + '\v' + selector); + out.push(4, 'js\v' + hash + '\v' + hostname + '\v' + selector); return; } @@ -1156,12 +1170,16 @@ FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, o hash = '!' + hash; } - out.push('c\vh\v' + hash + '\v' + hostname + '\v' + compiled); + out.push(4, 'h\v' + hash + '\v' + hostname + '\v' + compiled); }; /******************************************************************************/ -FilterContainer.prototype.fromCompiledContent = function(lineIter, skipGenericCosmetic, skipCosmetic) { +FilterContainer.prototype.fromCompiledContent = function( + lineIter, + skipGenericCosmetic, + skipCosmetic +) { if ( skipCosmetic ) { this.skipCompiledContent(lineIter); return; @@ -1171,15 +1189,19 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter, skipGenericCo return; } - var line, field0, field1, field2, field3, filter, bucket, + var lineBits, line, field0, field1, field2, field3, filter, bucket, + aCharCode = 'a'.charCodeAt(0), fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { - line = lineIter.next(); - if ( line.charCodeAt(0) !== 0x63 /* 'c' */ ) { - lineIter.rewind(); + lineBits = lineIter.charCodeAt(0) - aCharCode; + if ( (lineBits & 0x04) === 0 ) { return; } + line = lineIter.next(1); + if ( (lineBits & 0x02) !== 0 ) { + line = decodeURIComponent(line); + } this.acceptedCount += 1; if ( this.duplicateBuster.has(line) ) { @@ -1188,8 +1210,7 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter, skipGenericCo } this.duplicateBuster.add(line); - fieldIter.first(line); - field0 = fieldIter.next(); + field0 = fieldIter.first(line); field1 = fieldIter.next(); // h [\v] hash [\v] example.com [\v] .promoted-tweet @@ -1298,15 +1319,19 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter, skipGenericCo /******************************************************************************/ FilterContainer.prototype.skipGenericCompiledContent = function(lineIter) { - var line, field0, field1, field2, field3, filter, bucket, + var lineBits, line, field0, field1, field2, field3, filter, bucket, + aCharCode = 'a'.charCodeAt(0), fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { - line = lineIter.next(); - if ( line.charCodeAt(0) !== 0x63 /* 'c' */ ) { - lineIter.rewind(); + lineBits = lineIter.charCodeAt(0) - aCharCode; + if ( (lineBits & 0x04) === 0 ) { return; } + line = lineIter.next(1); + if ( (lineBits & 0x02) !== 0 ) { + line = decodeURIComponent(line); + } this.acceptedCount += 1; if ( this.duplicateBuster.has(line) ) { @@ -1361,15 +1386,19 @@ FilterContainer.prototype.skipGenericCompiledContent = function(lineIter) { /******************************************************************************/ FilterContainer.prototype.skipCompiledContent = function(lineIter) { - var line, field0, field1, field2, field3, + var lineBits, line, field0, field1, field2, field3, + aCharCode = 'a'.charCodeAt(0), fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { - line = lineIter.next(); - if ( line.charCodeAt(0) !== 0x63 /* 'c' */ ) { - lineIter.rewind(); + lineBits = lineIter.charCodeAt(0) - aCharCode; + if ( (lineBits & 0x04) === 0 ) { return; } + line = lineIter.next(1); + if ( (lineBits & 0x02) !== 0 ) { + line = decodeURIComponent(line); + } this.acceptedCount += 1; if ( this.duplicateBuster.has(line) ) { diff --git a/src/js/dynamic-net-filtering.js b/src/js/dynamic-net-filtering.js index f6514af0e..25b599f1d 100644 --- a/src/js/dynamic-net-filtering.js +++ b/src/js/dynamic-net-filtering.js @@ -337,7 +337,7 @@ Matrix.prototype.evaluateCellZY = function(srcHostname, desHostname, type) { var d = desHostname; if ( d === '' ) { this.r = 0; - return this; + return 0; } // Prepare broadening handlers -- depends on whether we are dealing with @@ -350,7 +350,9 @@ Matrix.prototype.evaluateCellZY = function(srcHostname, desHostname, type) { // Specific-destination, any party, any type while ( d !== '*' ) { this.y = d; - if ( this.evaluateCellZ(srcHostname, d, '*', broadenSource) !== 0 ) { return this; } + if ( this.evaluateCellZ(srcHostname, d, '*', broadenSource) !== 0 ) { + return this.r; + } d = broadenDestination(d); } @@ -363,27 +365,39 @@ Matrix.prototype.evaluateCellZY = function(srcHostname, desHostname, type) { if ( thirdParty ) { // 3rd-party, specific type if ( type === 'script' ) { - if ( this.evaluateCellZ(srcHostname, '*', '3p-script', broadenSource) !== 0 ) { return this; } + if ( this.evaluateCellZ(srcHostname, '*', '3p-script', broadenSource) !== 0 ) { + return this.r; + } } else if ( type === 'sub_frame' ) { - if ( this.evaluateCellZ(srcHostname, '*', '3p-frame', broadenSource) !== 0 ) { return this; } + if ( this.evaluateCellZ(srcHostname, '*', '3p-frame', broadenSource) !== 0 ) { + return this.r; + } } // 3rd-party, any type - if ( this.evaluateCellZ(srcHostname, '*', '3p', broadenSource) !== 0 ) { return this; } + if ( this.evaluateCellZ(srcHostname, '*', '3p', broadenSource) !== 0 ) { + return this.r; + } } else if ( type === 'script' ) { // 1st party, specific type - if ( this.evaluateCellZ(srcHostname, '*', '1p-script', broadenSource) !== 0 ) { return this; } + if ( this.evaluateCellZ(srcHostname, '*', '1p-script', broadenSource) !== 0 ) { + return this.r; + } } // Any destination, any party, specific type if ( supportedDynamicTypes.hasOwnProperty(type) ) { - if ( this.evaluateCellZ(srcHostname, '*', type, broadenSource) !== 0 ) { return this; } + if ( this.evaluateCellZ(srcHostname, '*', type, broadenSource) !== 0 ) { + return this.r; + } } // Any destination, any party, any type - if ( this.evaluateCellZ(srcHostname, '*', '*', broadenSource) !== 0 ) { return this; } + if ( this.evaluateCellZ(srcHostname, '*', '*', broadenSource) !== 0 ) { + return this.r; + } this.type = ''; - return this; + return 0; }; // http://youtu.be/gSGk1bQ9rcU?t=25m6s @@ -391,7 +405,7 @@ Matrix.prototype.evaluateCellZY = function(srcHostname, desHostname, type) { /******************************************************************************/ Matrix.prototype.mustAllowCellZY = function(srcHostname, desHostname, type) { - return this.evaluateCellZY(srcHostname, desHostname, type).r === 2; + return this.evaluateCellZY(srcHostname, desHostname, type) === 2; }; /******************************************************************************/ @@ -414,23 +428,44 @@ Matrix.prototype.mustAbort = function() { /******************************************************************************/ -Matrix.prototype.toFilterString = function() { - if ( this.r === 0 || this.type === '' ) { - return ''; +Matrix.prototype.lookupRuleData = function(src, des, type) { + var r = this.evaluateCellZY(src, des, type); + if ( r === 0 ) { + return null; } - var body = this.z + ' ' + this.y + ' ' + this.type; - if ( this.r === 1 ) { - return 'db:' + body + ' block'; - } - if ( this.r === 2 ) { - return 'da:' + body + ' allow'; - } - /* this.r === 3 */ - return 'dn:' + body + ' noop'; + return { + src: this.z, + des: this.y, + type: this.type, + action: r === 1 ? 'block' : (r === 2 ? 'allow' : 'noop') + }; }; /******************************************************************************/ +Matrix.prototype.toLogData = function() { + if ( this.r === 0 || this.type === '' ) { + return; + } + var logData = { + source: 'dynamicHost', + result: this.r, + raw: this.z + ' ' + + this.y + ' ' + + this.type + ' ' + + this.intToActionMap.get(this.r) + }; + return logData; +}; + +Matrix.prototype.intToActionMap = new Map([ + [ 1, ' block' ], + [ 2, ' allow' ], + [ 3, ' noop' ] +]); + +/******************************************************************************/ + Matrix.prototype.srcHostnameFromRule = function(rule) { return rule.slice(0, rule.indexOf(' ')); }; diff --git a/src/js/hnswitches.js b/src/js/hnswitches.js index b55f8628c..e16c4d9ea 100644 --- a/src/js/hnswitches.js +++ b/src/js/hnswitches.js @@ -249,10 +249,12 @@ HnSwitches.prototype.evaluateZ = function(switchName, hostname) { /******************************************************************************/ -HnSwitches.prototype.toResultString = function() { - return this.r !== 1 ? - '' : - 'ub:' + this.n + ': ' + this.z + ' true'; +HnSwitches.prototype.toLogData = function() { + return { + source: 'switch', + result: this.r, + raw: this.n + ': ' + this.z + ' true' + }; }; /******************************************************************************/ diff --git a/src/js/logger-ui.js b/src/js/logger-ui.js index ad398f173..56f407a7a 100644 --- a/src/js/logger-ui.js +++ b/src/js/logger-ui.js @@ -1,7 +1,7 @@ /******************************************************************************* uBlock Origin - a browser extension to block requests. - Copyright (C) 2015-2016 Raymond Hill + Copyright (C) 2015-2017 Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -92,9 +92,11 @@ var uglyRequestTypes = { }; var staticFilterTypes = { + 'beacon': 'other', 'doc': 'document', 'css': 'stylesheet', 'frame': 'subdocument', + 'ping': 'other', 'xhr': 'xmlhttprequest' }; @@ -146,212 +148,6 @@ var renderedURLTemplate = document.querySelector('#renderedURLTemplate > span'); /******************************************************************************/ -// Pretty much same logic as found in: -// µBlock.staticNetFilteringEngine.filterStringFromCompiled -// µBlock.staticNetFilteringEngine.filterRegexFromCompiled - -var filterDecompiler = (function() { - var typeValToTypeName = { - 1: 'stylesheet', - 2: 'image', - 3: 'object', - 4: 'script', - 5: 'xmlhttprequest', - 6: 'subdocument', - 7: 'font', - 8: 'media', - 9: 'websocket', - 10: 'other', - 11: 'popunder', - 12: 'document', - 13: 'generichide', - 14: 'inline-script', - 15: 'popup' - }; - - var toString = function(compiled) { - var opts = []; - var vfields = compiled.split('\v'); - var filter = ''; - var bits = parseInt(vfields[0], 16) | 0; - - if ( bits & 0x01 ) { - filter += '@@'; - } - - var fid = vfields[1] === '.' ? '.' : vfields[2]; - var tfields = fid !== '.' ? vfields[3].split('\t') : []; - var tfield0 = tfields[0]; - - // Filter options - // Importance - if ( bits & 0x02 ) { - opts.push('important'); - } - // Party - if ( bits & 0x08 ) { - opts.push('third-party'); - } else if ( bits & 0x04 ) { - opts.push('first-party'); - } - // Type - var typeVal = bits >>> 4 & 0x0F; - if ( typeVal ) { - opts.push(typeValToTypeName[typeVal]); - } - - switch ( fid ) { - case '.': - filter += '||' + vfields[2] + '^'; - break; - case 'a': - case 'ah': - case '0a': - case '0ah': - case '1a': - case '1ah': - case '_': - case '_h': - filter += tfield0; - // If the filter resemble a regex, add a trailing `*` as is - // customary to prevent ambiguity in logger. - if ( tfield0.charAt(0) === '/' && tfield0.slice(-1) === '/' ) { - filter += '*'; - } - break; - case '|a': - case '|ah': - filter += '|' + tfield0; - break; - case 'a|': - case 'a|h': - filter += tfield0 + '|'; - break; - case '||a': - case '||ah': - filter += '||' + tfield0; - break; - case '||_': - case '||_h': - filter += '||' + tfield0; - if ( tfields[1] === '1' ) { // left-anchored? - filter += '|'; - } - break; - case '//': - case '//h': - filter += '/' + tfield0 + '/'; - break; - // https://github.com/gorhill/uBlock/issues/465 - // Unexpected: return the raw compiled representation instead of a - // blank string. - default: - return compiled.replace(/\s+/g, ' '); - } - - // Domain option? - switch ( fid ) { - case '0ah': - case '1ah': - case '|ah': - case 'a|h': - case '||ah': - case '//h': - opts.push('domain=' + tfields[1]); - break; - case 'ah': - case '_h': - case '||_h': - opts.push('domain=' + tfields[2]); - break; - default: - break; - } - - if ( opts.length !== 0 ) { - filter += '$' + opts.join(','); - } - - return filter; - }; - - var reEscapeHostname = /[.[\]]/g; - var reEscape = /[.+?${}()|[\]\\]/g; - var reWildcards = /\*+/g; - var reSeparator = /\^/g; - - var toRegex = function(compiled) { - var vfields = compiled.split('\v'); - var fid = vfields[1] === '.' ? '.' : vfields[2]; - var tfields = fid !== '.' ? vfields[3].split('\t') : []; - var reStr; - - switch ( fid ) { - case '.': - reStr = vfields[2].replace(reEscapeHostname, '\\$&') + - '(?:[^%.0-9a-z_-]|$)'; - break; - case 'a': - case 'ah': - case '0a': - case '0ah': - case '1a': - case '1ah': - case '|a': - case '|ah': - case 'a|': - case 'a|h': - case '_': - case '_h': - reStr = tfields[0] - .replace(reEscape, '\\$&') - .replace(reWildcards, '.*?') - .replace(reSeparator, '(?:[^%.0-9a-z_-]|$)'); - break; - case '||a': - case '||ah': - case '||_': - case '||_h': - reStr = ''; - if ( tfields[0].charCodeAt(0) === 0x2A ) { - reStr = '[0-9a-z.-]*?'; - tfields[0] = tfields[0].slice(1); - } - reStr += tfields[0] - .replace(reEscape, '\\$&') - .replace(reWildcards, '.*?') - .replace(reSeparator, '(?:[^%.0-9a-z_-]|$)'); - break; - case '//': - case '//h': - reStr = tfields[0]; - break; - default: - break; - } - - // Anchored? - var s = fid.slice(0, 2); - if ( s === '|a' ) { - reStr = '^' + reStr; - } else if ( s === 'a|' ) { - reStr += '$'; - } - - if ( reStr === undefined) { - return null; - } - return new RegExp(reStr, 'gi'); - }; - - return { - toString: toString, - toRegex: toRegex - }; -})(); - -/******************************************************************************/ - var createCellAt = function(tr, index) { var td = tr.cells[index]; var mustAppend = !td; @@ -442,7 +238,7 @@ var createGap = function(tabId, url) { var renderNetLogEntry = function(tr, entry) { var trcl = tr.classList; - var filter = entry.d0; + var filter = entry.d0 || undefined; var type = entry.d1; var url = entry.d2; var td; @@ -463,52 +259,50 @@ var renderNetLogEntry = function(tr, entry) { tr.setAttribute('data-hn-frame', entry.d4); } - var filterCat = filter.slice(0, 3); - if ( filterCat.charAt(2) === ':' ) { - trcl.add(filterCat.slice(0, 2)); + var filteringType; + if ( filter !== undefined && typeof filter.source === 'string' ) { + filteringType = filter.source; + trcl.add(filteringType); } - var filteringType = filterCat.charAt(0); td = tr.cells[2]; - if ( filter !== '' ) { - filter = filter.slice(3); - if ( filteringType === 's' ) { - td.textContent = filterDecompiler.toString(filter); + if ( filter !== undefined ) { + if ( filteringType === 'static' ) { + td.textContent = filter.raw; trcl.add('canLookup'); - tr.setAttribute('data-filter', filter); - } else if ( filteringType === 'c' ) { - td.textContent = filter; + tr.setAttribute('data-filter', filter.compiled); + } else if ( filteringType === 'cosmetic' ) { + td.textContent = filter.raw; trcl.add('canLookup'); } else { - td.textContent = filter; + td.textContent = filter.raw; } } td = tr.cells[3]; - var filteringOp = filterCat.charAt(1); - if ( filteringOp === 'b' ) { - trcl.add('blocked'); - td.textContent = '--'; - } else if ( filteringOp === 'a' ) { - trcl.add('allowed'); - td.textContent = '++'; - } else if ( filteringOp === 'n' ) { - trcl.add('nooped'); - td.textContent = '**'; - } else if ( filteringOp === 'r' ) { - trcl.add('redirected'); - td.textContent = '<<'; - } else { - td.textContent = ''; + if ( filter !== undefined ) { + if ( filter.result === 1 ) { + trcl.add('blocked'); + td.textContent = '--'; + } else if ( filter.result === 2 ) { + trcl.add('allowed'); + td.textContent = '++'; + } else if ( filter.result === 3 ) { + trcl.add('nooped'); + td.textContent = '**'; + } else if ( filter.source === 'redirect' ) { + trcl.add('redirect'); + td.textContent = '<<'; + } } tr.cells[4].textContent = (prettyRequestTypes[type] || type); var re = null; - if ( filteringType === 's' ) { - re = filterDecompiler.toRegex(filter); - } else if ( filteringType === 'l' ) { - re = regexFromURLFilteringResult(filter); + if ( filteringType === 'static' ) { + re = new RegExp(filter.regex, 'gi'); + } else if ( filteringType === 'dynamicUrl' ) { + re = regexFromURLFilteringResult(filter.rule.join(' ')); } tr.cells[5].appendChild(nodeFromURL(url, re)); }; diff --git a/src/js/messaging.js b/src/js/messaging.js index f99bfa8cd..b1d30a2fa 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -258,28 +258,28 @@ var getHostnameDict = function(hostnameToCountMap) { var getFirewallRules = function(srcHostname, desHostnames) { var r = {}; var df = µb.sessionFirewall; - r['/ * *'] = df.evaluateCellZY('*', '*', '*').toFilterString(); - r['/ * image'] = df.evaluateCellZY('*', '*', 'image').toFilterString(); - r['/ * 3p'] = df.evaluateCellZY('*', '*', '3p').toFilterString(); - r['/ * inline-script'] = df.evaluateCellZY('*', '*', 'inline-script').toFilterString(); - r['/ * 1p-script'] = df.evaluateCellZY('*', '*', '1p-script').toFilterString(); - r['/ * 3p-script'] = df.evaluateCellZY('*', '*', '3p-script').toFilterString(); - r['/ * 3p-frame'] = df.evaluateCellZY('*', '*', '3p-frame').toFilterString(); + r['/ * *'] = df.lookupRuleData('*', '*', '*'); + r['/ * image'] = df.lookupRuleData('*', '*', 'image'); + r['/ * 3p'] = df.lookupRuleData('*', '*', '3p'); + r['/ * inline-script'] = df.lookupRuleData('*', '*', 'inline-script'); + r['/ * 1p-script'] = df.lookupRuleData('*', '*', '1p-script'); + r['/ * 3p-script'] = df.lookupRuleData('*', '*', '3p-script'); + r['/ * 3p-frame'] = df.lookupRuleData('*', '*', '3p-frame'); if ( typeof srcHostname !== 'string' ) { return r; } - r['. * *'] = df.evaluateCellZY(srcHostname, '*', '*').toFilterString(); - r['. * image'] = df.evaluateCellZY(srcHostname, '*', 'image').toFilterString(); - r['. * 3p'] = df.evaluateCellZY(srcHostname, '*', '3p').toFilterString(); - r['. * inline-script'] = df.evaluateCellZY(srcHostname, '*', 'inline-script').toFilterString(); - r['. * 1p-script'] = df.evaluateCellZY(srcHostname, '*', '1p-script').toFilterString(); - r['. * 3p-script'] = df.evaluateCellZY(srcHostname, '*', '3p-script').toFilterString(); - r['. * 3p-frame'] = df.evaluateCellZY(srcHostname, '*', '3p-frame').toFilterString(); + r['. * *'] = df.lookupRuleData(srcHostname, '*', '*'); + r['. * image'] = df.lookupRuleData(srcHostname, '*', 'image'); + r['. * 3p'] = df.lookupRuleData(srcHostname, '*', '3p'); + r['. * inline-script'] = df.lookupRuleData(srcHostname, '*', 'inline-script'); + r['. * 1p-script'] = df.lookupRuleData(srcHostname, '*', '1p-script'); + r['. * 3p-script'] = df.lookupRuleData(srcHostname, '*', '3p-script'); + r['. * 3p-frame'] = df.lookupRuleData(srcHostname, '*', '3p-frame'); for ( var desHostname in desHostnames ) { - r['/ ' + desHostname + ' *'] = df.evaluateCellZY('*', desHostname, '*').toFilterString(); - r['. ' + desHostname + ' *'] = df.evaluateCellZY(srcHostname, desHostname, '*').toFilterString(); + r['/ ' + desHostname + ' *'] = df.lookupRuleData('*', desHostname, '*'); + r['. ' + desHostname + ' *'] = df.lookupRuleData(srcHostname, desHostname, '*'); } return r; }; @@ -1239,7 +1239,7 @@ var logCosmeticFilters = function(tabId, details) { µb.logger.writeOne( tabId, 'cosmetic', - 'cb:##' + selectors[i], + { source: 'cosmetic', raw: '##' + selectors[i] }, 'dom', details.frameURL, null, diff --git a/src/js/pagestore.js b/src/js/pagestore.js index 6c0c67cd4..6b0c12a32 100644 --- a/src/js/pagestore.js +++ b/src/js/pagestore.js @@ -48,16 +48,17 @@ var netFilteringResultCacheEntryJunkyardMax = 200; /******************************************************************************/ -var NetFilteringResultCacheEntry = function(result, type) { - this.init(result, type); +var NetFilteringResultCacheEntry = function(result, type, logData) { + this.init(result, type, logData); }; /******************************************************************************/ -NetFilteringResultCacheEntry.prototype.init = function(result, type) { +NetFilteringResultCacheEntry.prototype.init = function(result, type, logData) { this.result = result; this.type = type; this.time = Date.now(); + this.logData = logData; return this; }; @@ -65,6 +66,7 @@ NetFilteringResultCacheEntry.prototype.init = function(result, type) { NetFilteringResultCacheEntry.prototype.dispose = function() { this.result = this.type = ''; + this.logData = undefined; if ( netFilteringResultCacheEntryJunkyard.length < netFilteringResultCacheEntryJunkyardMax ) { netFilteringResultCacheEntryJunkyard.push(this); } @@ -72,11 +74,11 @@ NetFilteringResultCacheEntry.prototype.dispose = function() { /******************************************************************************/ -NetFilteringResultCacheEntry.factory = function(result, type) { +NetFilteringResultCacheEntry.factory = function(result, type, logData) { if ( netFilteringResultCacheEntryJunkyard.length ) { - return netFilteringResultCacheEntryJunkyard.pop().init(result, type); + return netFilteringResultCacheEntryJunkyard.pop().init(result, type, logData); } - return new NetFilteringResultCacheEntry(result, type); + return new NetFilteringResultCacheEntry(result, type, logData); }; /******************************************************************************/ @@ -127,7 +129,7 @@ NetFilteringResultCache.prototype.dispose = function() { /******************************************************************************/ -NetFilteringResultCache.prototype.add = function(context, result) { +NetFilteringResultCache.prototype.add = function(context, result, logData) { var url = context.requestURL, type = context.requestType, key = type + ' ' + url, @@ -136,9 +138,10 @@ NetFilteringResultCache.prototype.add = function(context, result) { entry.result = result; entry.type = type; entry.time = Date.now(); + entry.logData = logData; return; } - this.urls[key] = NetFilteringResultCacheEntry.factory(result, type); + this.urls[key] = NetFilteringResultCacheEntry.factory(result, type, logData); if ( this.count === 0 ) { this.pruneAsync(); } @@ -305,6 +308,7 @@ PageStore.prototype.init = function(tabId) { this.hostnameToCountMap = new Map(); this.contentLastModified = 0; this.frames = Object.create(null); + this.logData = undefined; this.perLoadBlockedRequestCount = 0; this.perLoadAllowedRequestCount = 0; this.hiddenElementCount = ''; // Empty string means "unknown" @@ -320,7 +324,7 @@ PageStore.prototype.init = function(tabId) { µb.logger.writeOne( tabId, 'cosmetic', - µb.hnSwitches.toResultString(), + µb.hnSwitches.toLogData(), 'dom', tabContext.rawURL, this.tabHostname, @@ -336,12 +340,12 @@ PageStore.prototype.init = function(tabId) { tabContext.normalURL, 'generichide' ); - this.noGenericCosmeticFiltering = result === false; - if ( result !== undefined && µb.logger.isEnabled() ) { + this.noGenericCosmeticFiltering = result === 2; + if ( result !== 0 && µb.logger.isEnabled() ) { µb.logger.writeOne( tabId, 'net', - µb.staticNetFilteringEngine.toResultString(true), + µb.staticNetFilteringEngine.toLogData(), 'generichide', tabContext.rawURL, this.tabHostname, @@ -525,7 +529,7 @@ PageStore.prototype.journalAddRequest = function(hostname, result) { if ( hostname === '' ) { return; } this.journal.push( hostname, - result.charCodeAt(1) === 0x62 /* 'b' */ ? 0x00000001 : 0x00010000 + result === 1 ? 0x00000001 : 0x00010000 ); if ( this.journalTimer === null ) { this.journalTimer = vAPI.setTimeout(this.journalProcess.bind(this, true), 1000); @@ -604,6 +608,8 @@ PageStore.prototype.journalProcess = function(fromTimer) { /******************************************************************************/ PageStore.prototype.filterRequest = function(context) { + this.logData = undefined; + var requestType = context.requestType; // We want to short-term cache filtering results of collapsible types, @@ -614,35 +620,39 @@ PageStore.prototype.filterRequest = function(context) { } if ( this.getNetFilteringSwitch() === false ) { - this.netFilteringCache.add(context, ''); - return ''; + this.netFilteringCache.add(context, 0); + return 0; } var entry = this.netFilteringCache.lookup(context); if ( entry !== undefined ) { + this.logData = entry.logData; return entry.result; } // Dynamic URL filtering. - µb.sessionURLFiltering.evaluateZ(context.rootHostname, context.requestURL, requestType); - var result = µb.sessionURLFiltering.toFilterString(); + var result = µb.sessionURLFiltering.evaluateZ(context.rootHostname, context.requestURL, requestType); + if ( result !== 0 && µb.logger.isEnabled() ) { + this.logData = µb.sessionURLFiltering.toLogData(); + } // Dynamic hostname/type filtering. - if ( result === '' && µb.userSettings.advancedUserEnabled ) { - µb.sessionFirewall.evaluateCellZY( context.rootHostname, context.requestHostname, requestType); - if ( µb.sessionFirewall.mustBlockOrAllow() ) { - result = µb.sessionFirewall.toFilterString(); + if ( result === 0 && µb.userSettings.advancedUserEnabled ) { + result = µb.sessionFirewall.evaluateCellZY( context.rootHostname, context.requestHostname, requestType); + if ( result !== 0 && µb.logger.isEnabled() ) { + this.logData = µb.sessionFirewall.toLogData(); } } // Static filtering: lowest filtering precedence. - if ( result === '' || result.charCodeAt(1) === 110 /* 'n' */ ) { - if ( µb.staticNetFilteringEngine.matchString(context) !== undefined ) { - result = µb.staticNetFilteringEngine.toResultString(µb.logger.isEnabled()); + if ( result === 0 || result === 3 ) { + result = µb.staticNetFilteringEngine.matchString(context); + if ( result !== 0 && µb.logger.isEnabled() ) { + this.logData = µb.staticNetFilteringEngine.toLogData(); } } - this.netFilteringCache.add(context, result); + this.netFilteringCache.add(context, result, this.logData); return result; }; @@ -652,14 +662,16 @@ PageStore.prototype.filterRequest = function(context) { // The caller is responsible to check whether filtering is enabled or not. PageStore.prototype.filterLargeMediaElement = function(size) { + this.logData = undefined; + if ( Date.now() < this.allowLargeMediaElementsUntil ) { - return; + return 0; } if ( µb.hnSwitches.evaluateZ('no-large-media', this.tabHostname) !== true ) { - return; + return 0; } if ( (size >>> 10) < µb.userSettings.largeMediaSize ) { - return; + return 0; } this.largeMediaCount += 1; @@ -670,48 +682,66 @@ PageStore.prototype.filterLargeMediaElement = function(size) { ); } - return µb.hnSwitches.toResultString(); + if ( µb.logger.isEnabled() ) { + this.logData = µb.hnSwitches.toLogData(); + } + + return 1; }; /******************************************************************************/ PageStore.prototype.filterRequestNoCache = function(context) { + this.logData = undefined; + if ( this.getNetFilteringSwitch() === false ) { - return ''; + return 0; } - var requestType = context.requestType, - result = ''; + var requestType = context.requestType; if ( requestType === 'csp_report' ) { if ( this.internalRedirectionCount !== 0 ) { - result = 'gb:no-spurious-csp-report'; + if ( µb.logger.isEnabled() ) { + this.logData = { result: 1, source: 'global', raw: 'no-spurious-csp-report' }; + } + return 1; } - } else if ( requestType === 'font' ) { - if ( µb.hnSwitches.evaluateZ('no-remote-fonts', context.rootHostname) !== false ) { - result = µb.hnSwitches.toResultString(); - } - this.remoteFontCount += 1; } + if ( requestType === 'font' ) { + this.remoteFontCount += 1; + if ( µb.hnSwitches.evaluateZ('no-remote-fonts', context.rootHostname) !== false ) { + if ( µb.logger.isEnabled() ) { + this.logData = µb.hnSwitches.toLogData(); + } + return 1; + } + } + + var result = 0; + // Dynamic URL filtering. - if ( result === '' ) { - µb.sessionURLFiltering.evaluateZ(context.rootHostname, context.requestURL, requestType); - result = µb.sessionURLFiltering.toFilterString(); + if ( result === 0 ) { + result = µb.sessionURLFiltering.evaluateZ(context.rootHostname, context.requestURL, requestType); + if ( result !== 0 && µb.logger.isEnabled() ) { + this.logData = µb.sessionURLFiltering.toLogData(); + } } // Dynamic hostname/type filtering. - if ( result === '' && µb.userSettings.advancedUserEnabled ) { - µb.sessionFirewall.evaluateCellZY(context.rootHostname, context.requestHostname, requestType); - if ( µb.sessionFirewall.mustBlockOrAllow() ) { - result = µb.sessionFirewall.toFilterString(); + if ( result === 0 && µb.userSettings.advancedUserEnabled ) { + result = µb.sessionFirewall.evaluateCellZY(context.rootHostname, context.requestHostname, requestType); + if ( result !== 0 && µb.logger.isEnabled() ) { + this.logData = µb.sessionFirewall.toLogData(); } } // Static filtering has lowest precedence. - if ( result === '' || result.charCodeAt(1) === 110 /* 'n' */ ) { - if ( µb.staticNetFilteringEngine.matchString(context) !== undefined ) { - result = µb.staticNetFilteringEngine.toResultString(µb.logger.isEnabled()); + if ( result === 0 || result === 3 ) { + result = µb.staticNetFilteringEngine.matchString(context); + if ( result !== 0 && µb.logger.isEnabled() ) { + this.logData = µb.staticNetFilteringEngine.toLogData(); } } diff --git a/src/js/popup.js b/src/js/popup.js index b5923b9bb..ce17ae5d8 100644 --- a/src/js/popup.js +++ b/src/js/popup.js @@ -79,7 +79,6 @@ var messaging = vAPI.messaging; var popupData = {}; var dfPaneBuilt = false; var reIP = /^\d+(?:\.\d+){1,3}$/; -var reSrcHostnameFromRule = /^d[abn]:([^ ]+) ([^ ]+) ([^ ]+)/; var scopeToSrcHostnameMap = { '/': '*', '.': '' @@ -148,16 +147,12 @@ var hashFromPopupData = function(reset) { return; } - var hasher = []; - var rules = popupData.firewallRules; - var rule; + var hasher = [], + rules = popupData.firewallRules; for ( var key in rules ) { - if ( rules.hasOwnProperty(key) === false ) { - continue; - } - rule = rules[key]; - if ( rule !== '' ) { - hasher.push(rule); + var rule = rules[key]; + if ( rule !== null ) { + hasher.push(rule.src + ' ' + rule.des + ' ' + rule.type + ' ' + rule.action); } } hasher.sort(); @@ -243,18 +238,16 @@ var updateFirewallCell = function(scope, des, type, rule) { } cells.removeClass(); - var action = rule.charAt(1); - if ( action !== '' ) { - cells.toggleClass(action + 'Rule', true); + if ( rule !== null ) { + cells.toggleClass(rule.action + 'Rule', true); } // Use dark shade visual cue if the rule is specific to the cell. var ownRule = false; - var matches = reSrcHostnameFromRule.exec(rule); - if ( matches !== null ) { - ownRule = (matches[2] !== '*' || matches[3] === type) && - (matches[2] === des) && - (matches[1] === scopeToSrcHostnameMap[scope]); + if ( rule !== null ) { + ownRule = (rule.des !== '*' || rule.type === type) && + (rule.des === des) && + (rule.src === scopeToSrcHostnameMap[scope]); } cells.toggleClass('ownRule', ownRule); diff --git a/src/js/reverselookup-worker.js b/src/js/reverselookup-worker.js index 17e8f9915..242f41a72 100644 --- a/src/js/reverselookup-worker.js +++ b/src/js/reverselookup-worker.js @@ -35,14 +35,14 @@ var reEscape = function(s) { return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); }; -var reSpecialChars = /[\*\^\t\v\n]/; +var reSpecialNetworkChars = /[a-d]/; /******************************************************************************/ var fromNetFilter = function(details) { var lists = []; var compiledFilter = details.compiledFilter; - var entry, content, pos, c; + var entry, content, pos, notFound; for ( var assetKey in listEntries ) { entry = listEntries[assetKey]; if ( entry === undefined ) { @@ -52,23 +52,23 @@ var fromNetFilter = function(details) { pos = 0; for (;;) { pos = content.indexOf(compiledFilter, pos); - if ( pos === -1 ) { - break; - } + if ( pos === -1 ) { break; } // We need an exact match. // https://github.com/gorhill/uBlock/issues/1392 // https://github.com/gorhill/uBlock/issues/835 - if ( pos === 0 || reSpecialChars.test(content.charAt(pos - 1)) ) { - c = content.charAt(pos + compiledFilter.length); - if ( c === '' || reSpecialChars.test(c) ) { - lists.push({ - title: entry.title, - supportURL: entry.supportURL - }); - break; - } + pos -= 1; + notFound = + reSpecialNetworkChars.test(content.charAt(pos)) === false || + pos !== 0 && content.charCodeAt(pos - 1) !== 0x0A /* '\n' */; + pos += 1 + compiledFilter.length; + if ( notFound ) { continue; } + if ( pos === content.length || content.charCodeAt(pos) === 0x0A ) { + lists.push({ + title: entry.title, + supportURL: entry.supportURL + }); + break; } - pos += compiledFilter.length; } } @@ -119,18 +119,18 @@ var fromCosmeticFilter = function(details) { var matches = rePlainSelector.exec(filter); if ( matches ) { if ( matches[0] === filter ) { // simple CSS selector - reStr.push('c', 'lg', reEscape(filter)); + reStr.push('[e-h]lg', reEscape(filter)); } else { // complex CSS selector - reStr.push('c', reEscape('lg+'), reEscape(matches[0]), reEscape(filter)); + reStr.push('[e-h]lg\\+', reEscape(matches[0]), reEscape(filter)); } } else if ( reHighLow.test(filter) ) { // [alt] or [title] - reStr.push('c', 'hlg0', reEscape(filter)); + reStr.push('[e-h]hlg0', reEscape(filter)); } else if ( reHighMedium.test(filter) ) { // [href^="..."] - reStr.push('c', 'hmg0', '[^"]{8}', '[a-z]*' + reEscape(filter)); + reStr.push('[e-h]hmg0', '[^"]{8}', '[a-z]*' + reEscape(filter)); } else if ( filter.indexOf(' ') === -1 ) { // high-high-simple selector - reStr.push('c', 'hhsg0', reEscape(filter)); + reStr.push('[e-h]hhsg0', reEscape(filter)); } else { // high-high-complex selector - reStr.push('c', 'hhcg0', reEscape(filter)); + reStr.push('[e-h]hhcg0', reEscape(filter)); } candidates[details.rawFilter] = new RegExp(reStr.join('\\v') + '(?:\\n|$)'); @@ -150,7 +150,7 @@ var fromCosmeticFilter = function(details) { if ( hostname !== '' ) { for ( ;; ) { candidates[hostname + '##' + filter] = new RegExp( - ['c', 'h', '[^\\v]+', reEscape(hostname), filterEx].join('\\v') + + ['[e-h]h', '[^\\v]+', reEscape(hostname), filterEx].join('\\v') + '(?:\\n|$)' ); pos = hostname.indexOf('.'); @@ -168,7 +168,7 @@ var fromCosmeticFilter = function(details) { if ( pos !== -1 ) { var entity = domain.slice(0, pos) + '.*'; candidates[entity + '##' + filter] = new RegExp( - ['c', 'h', '[^\\v]+', reEscape(entity), filterEx].join('\\v') + + ['[e-h]h', '[^\\v]+', reEscape(entity), filterEx].join('\\v') + '(?:\\n|$)' ); } diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 9f7c0d277..380880b07 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -1,7 +1,7 @@ /******************************************************************************* uBlock Origin - a browser extension to block requests. - Copyright (C) 2014-2016 Raymond Hill + Copyright (C) 2014-2017 Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,25 +33,22 @@ var µb = µBlock; // fedcba9876543210 -// | | | ||| -// | | | ||| -// | | | ||| -// | | | ||| -// | | | ||+---- bit 0: [BlockAction | AllowAction] -// | | | |+---- bit 1: `important` -// | | | +---- bit 2-3: party [0 - 3] -// | | +---- bit 4-7: type [0 - 15] -// | +---- bit 8-15: unused -// +---- bit 15: never use! (to ensure valid unicode character) +// | | ||| +// | | ||| +// | | ||| +// | | ||| +// | | ||+---- bit 0: [BlockAction | AllowAction] +// | | |+---- bit 1: `important` +// | | +---- bit 2-3: party [0 - 3] +// | +---- bit 4-8: type [0 - 31] +// +---- bit 9-15: unused var BlockAction = 0 << 0; var AllowAction = 1 << 0; - -var Important = 1 << 1; - -var AnyParty = 0 << 2; -var FirstParty = 1 << 2; -var ThirdParty = 2 << 2; +var Important = 1 << 1; +var AnyParty = 0 << 2; +var FirstParty = 1 << 2; +var ThirdParty = 2 << 2; var AnyType = 0 << 4; var typeNameToTypeValue = { @@ -66,13 +63,14 @@ var typeNameToTypeValue = { 'media': 8 << 4, 'websocket': 9 << 4, 'other': 10 << 4, - 'popunder': 11 << 4, - 'main_frame': 12 << 4, - 'generichide': 13 << 4, - 'inline-script': 14 << 4, - 'popup': 15 << 4 + 'popup': 11 << 4, // start of behavorial filtering + 'popunder': 12 << 4, + 'main_frame': 13 << 4, // start of 1st-party-only behavorial filtering + 'generichide': 14 << 4, + 'inline-script': 15 << 4, + 'data': 16 << 4 // special: a generic data holder }; -var typeOtherValue = typeNameToTypeValue.other; +var otherTypeBitValue = typeNameToTypeValue.other; var typeValueToTypeName = { 1: 'stylesheet', @@ -85,18 +83,19 @@ var typeValueToTypeName = { 8: 'media', 9: 'websocket', 10: 'other', - 11: 'popunder', - 12: 'document', - 13: 'generichide', - 14: 'inline-script', - 15: 'popup' + 11: 'popup', + 12: 'popunder', + 13: 'document', + 14: 'generichide', + 15: 'inline-script', + 16: 'data' }; // All network request types to bitmap // bring origin to 0 (from 4 -- see typeNameToTypeValue) // left-shift 1 by the above-calculated value // subtract 1 to set all type bits -var allNetRequestTypesBitmap = (1 << (typeOtherValue >>> 4)) - 1; +var allNetRequestTypesBitmap = (1 << (otherTypeBitValue >>> 4)) - 1; var BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty; var BlockAnyType = BlockAction | AnyType; @@ -117,8 +116,8 @@ var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generich // See the following as short-lived registers, used during evaluation. They are // valid until the next evaluation. -var pageHostnameRegister = ''; -var requestHostnameRegister = ''; +var pageHostnameRegister = '', + requestHostnameRegister = ''; //var filterRegister = null; //var categoryRegister = ''; @@ -168,17 +167,11 @@ histogram = function(label, categories) { // Local helpers -var cachedParseInt = parseInt; - -var atoi = function(s) { - return cachedParseInt(s, 10); -}; - // Be sure to not confuse 'example.com' with 'anotherexample.com' var isFirstParty = function(domain, hostname) { return hostname.endsWith(domain) && (hostname.length === domain.length || - hostname.charAt(hostname.length - domain.length - 1) === '.'); + hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */); }; var normalizeRegexSource = function(s) { @@ -191,62 +184,85 @@ var normalizeRegexSource = function(s) { return ''; }; -var alwaysTruePseudoRegex = { - match: { '0': '', index: 0 }, - exec: function(s) { - this.match['0'] = s; - return this.match; - }, - test: function() { - return true; - } -}; - -var strToRegex = function(s, anchor, flags) { - // https://github.com/chrisaljoudi/uBlock/issues/1038 - // Special case: always match. - if ( s === '*' ) { - return alwaysTruePseudoRegex; - } - var anchorToHnStart; - if ( s.startsWith('||') ) { - s = s.slice(2); - anchorToHnStart = s.charCodeAt(0) === 0x2A; - } +var rawToRegexStr = function(s, anchor) { + var me = rawToRegexStr; // https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/ // https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions // Also: remove leading/trailing wildcards -- there is no point. - var reStr = s.replace(/[.+?${}()|[\]\\]/g, '\\$&') - .replace(/\^/g, '(?:[^%.0-9a-z_-]|$)') - .replace(/^\*|\*$/g, '') - .replace(/\*/g, '[^ ]*?'); - - if ( anchor < 0 ) { + var reStr = s.replace(me.escape1, '\\$&') + .replace(me.escape2, '(?:[^%.0-9a-z_-]|$)') + .replace(me.escape3, '') + .replace(me.escape4, '[^ ]*?'); + if ( anchor & 0x4 ) { + reStr = '[0-9a-z.-]*?' + reStr; + } else if ( anchor & 0x2 ) { reStr = '^' + reStr; - } else if ( anchor > 0 ) { + } + if ( anchor & 0x1 ) { reStr += '$'; } - if ( anchorToHnStart ) { - reStr = '[0-9a-z.-]*?' + reStr; - } - //console.debug('µBlock.staticNetFilteringEngine: created RegExp("%s")', reStr); - return new RegExp(reStr, flags); + return reStr; }; +rawToRegexStr.escape1 = /[.+?${}()|[\]\\]/g; +rawToRegexStr.escape2 = /\^/g; +rawToRegexStr.escape3 = /^\*|\*$/g; +rawToRegexStr.escape4 = /\*/g; -var toHex = function(n) { - return n.toString(16); +// If using native Map, we use numerical keys, otherwise for +// Object-based map we use string-based keys. +var exportMapKey = Map.polyfill !== true + ? function(k) { return k.toString(32); } + : function(k) { return k; }; + +var importMapKey = Map.polyfill !== true + ? function(k) { return parseInt(k,32); } + : function(k) { return k; }; + +var bitsToMapKey = Map.polyfill !== true + ? function(k) { return k; } + : function(k) { return k.toString(32); }; + +var mapKeyToBits = Map.polyfill !== true + ? function(k) { return k; } + : function(k) { return parseInt(k,32); }; + +var toLogDataInternal = function(key, token, filter) { + if ( filter === null ) { return undefined; } + var logData = filter.logData(); + logData.compiled = exportMapKey(bitsToMapKey(key)) + '\v' + + token + '\v' + + logData.compiled; + if ( key & 0x001 ) { + logData.raw = '@@' + logData.raw; + } + var opts = []; + if ( key & 0x002 ) { + opts.push('important'); + } + if ( key & 0x008 ) { + opts.push('third-party'); + } else if ( key & 0x004 ) { + opts.push('first-party'); + } + var type = (key >>> 4) & 0x1F; + if ( type !== 0 && type !== 16 /* data */ ) { + opts.push(typeValueToTypeName[type]); + } + if ( logData.opts !== undefined ) { + opts.push(logData.opts); + } + if ( opts.length !== 0 ) { + logData.raw += '$' + opts.join(','); + } + return logData; }; // First character of match must be within the hostname part of the url. var isHnAnchored = function(url, matchStart) { var hnStart = url.indexOf('://'); - if ( hnStart === -1 ) { - return false; - } + if ( hnStart === -1 ) { return false; } hnStart += 3; - if ( matchStart <= hnStart ) { - return true; - } + if ( matchStart <= hnStart ) { return true; } if ( reURLPostHostnameAnchors.test(url.slice(hnStart, matchStart)) ) { return false; } @@ -257,170 +273,56 @@ var isHnAnchored = function(url, matchStart) { var reURLPostHostnameAnchors = /[\/?#]/; -/******************************************************************************/ - -// Hostname test helpers: the optimal test function is picked according to the -// content of the `domain=` filter option. - -// Re-factored in light of: -// - https://gorhill.github.io/obj-vs-set-vs-map/set-vs-regexp.html -// The re-factoring made possible to reuse instances of a matcher. As of -// writing, I observed that just with EasyList, there were ~1,200 reused -// instances out of ~2,800. - -var hnMatcherFactory = function(domainOpt) { - var me = hnMatcherFactory; - - // Reuse last instance if possible. - if ( domainOpt === me.domainOpt ) { - return me.hnMatcher; - } - - me.domainOpt = domainOpt; - - // Only one hostname - if ( domainOpt.indexOf('|') === -1 ) { - if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) { - return (me.hnMatcher = new me.Miss(domainOpt)); - } - return (me.hnMatcher = new me.Hit(domainOpt)); - } - - // Many hostnames. - - // Must be in set (none negated). - if ( domainOpt.indexOf('~') === -1 ) { - return (me.hnMatcher = new me.HitSet(domainOpt)); - } - - // Must not be in set (all negated). - if ( me.reAllNegated.test(domainOpt) ) { - return (me.hnMatcher = new me.MissSet(domainOpt)); - } - - // Must be in one set, but not in the other. - return (me.hnMatcher = new me.MixedSet(domainOpt)); -}; - -hnMatcherFactory.reAllNegated = /^~(?:[^|~]+\|~)+[^|~]+$/; -hnMatcherFactory.domainOpt = undefined; -hnMatcherFactory.hnMatcher = undefined; - -hnMatcherFactory.Hit = function(domainOpt) { - this.hostname = domainOpt; -}; -hnMatcherFactory.Hit.prototype.toDomainOpt = function() { - return this.hostname; -}; -hnMatcherFactory.Hit.prototype.test = function() { - var needle = this.hostname, - haystack = pageHostnameRegister; - return haystack.endsWith(needle) && - (haystack.length === needle.length || - haystack.charCodeAt(haystack.length - needle.length - 1) === 0x2E /* '.' */); -}; - -hnMatcherFactory.Miss = function(domainOpt) { - this.hostname = domainOpt.slice(1); -}; -hnMatcherFactory.Miss.prototype.toDomainOpt = function() { - return '~' + this.hostname; -}; -hnMatcherFactory.Miss.prototype.test = function() { - var needle = this.hostname, - haystack = pageHostnameRegister; - return haystack.endsWith(needle) === false || - (haystack.length !== needle.length && - haystack.charCodeAt(haystack.length - needle.length - 1) !== 0x2E /* '.' */); -}; - -hnMatcherFactory.HitSet = function(domainOpt) { - this.domainOpt = domainOpt; -}; -hnMatcherFactory.HitSet.prototype.oneOf = null; -hnMatcherFactory.HitSet.prototype.toDomainOpt = function() { - return this.domainOpt; -}; -hnMatcherFactory.HitSet.prototype.init = function() { - this.oneOf = new RegExp('(?:^|\\.)(?:' + this.domainOpt.replace(/\./g, '\\.') + ')$'); -}; -hnMatcherFactory.HitSet.prototype.test = function() { - if ( this.oneOf === null ) { this.init(); } - return this.oneOf.test(pageHostnameRegister); -}; - -hnMatcherFactory.MissSet = function(domainOpt) { - this.domainOpt = domainOpt; -}; -hnMatcherFactory.MissSet.prototype.noneOf = null; -hnMatcherFactory.MissSet.prototype.toDomainOpt = function() { - return this.domainOpt; -}; -hnMatcherFactory.MissSet.prototype.init = function() { - this.noneOf = new RegExp('(?:^|\\.)(?:' + this.domainOpt.replace(/~/g, '').replace(/\./g, '\\.') + ')$'); -}; -hnMatcherFactory.MissSet.prototype.test = function() { - if ( this.noneOf === null ) { this.init(); } - return this.noneOf.test(pageHostnameRegister) === false; -}; - -hnMatcherFactory.MixedSet = function(domainOpt) { - this.domainOpt = domainOpt; -}; -hnMatcherFactory.MixedSet.prototype.oneOf = null; -hnMatcherFactory.MixedSet.prototype.noneOf = null; -hnMatcherFactory.MixedSet.prototype.toDomainOpt = function() { - return this.domainOpt; -}; -hnMatcherFactory.MixedSet.prototype.init = function() { - var oneOf = [], noneOf = [], - hostnames = this.domainOpt.split('|'), - i = hostnames.length, - hostname; - while ( i-- ) { - hostname = hostnames[i].replace(/\./g, '\\.'); - if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { - noneOf.push(hostname.slice(1)); - } else { - oneOf.push(hostname); - } - } - this.oneOf = new RegExp('(?:^|\\.)(?:' + oneOf.join('|') + ')$'); - this.noneOf = new RegExp('(?:^|\\.)(?:' + noneOf.join('|') + ')$'); -}; -hnMatcherFactory.MixedSet.prototype.test = function() { - if ( this.oneOf === null ) { this.init(); } - var needle = pageHostnameRegister; - return this.oneOf.test(needle) && this.noneOf.test(needle) === false; -}; - /******************************************************************************* -Filters family tree: + Each filter class will register itself in the map. A filter class + id MUST always stringify to ONE single character. -- plain (no wildcard) - - anywhere - - no hostname - - specific hostname - - anchored at start - - no hostname - - specific hostname - - anchored at end - - no hostname - - specific hostname - - anchored within hostname - - no hostname - - specific hostname (not implemented) + IMPORTANT: any change which modifies the mapping will have to be + reflected with µBlock.systemSettings.compiledMagic. -- with wildcard(s) - - anchored within hostname - - no hostname - - specific hostname - - all else - - no hostname - - specific hostname +**/ -*/ +var filterClasses = new Map(), + filterClassIdGenerator = 0; + +var registerFilterClass = function(ctor) { + var fid = filterClassIdGenerator++; + ctor.fidPrefix = ctor.prototype.fidPrefix = fid.toString(32) + '\t'; + filterClasses.set(fid, ctor); + //console.log(ctor.name, fid); +}; + +/******************************************************************************/ + +var FilterTrue = function() { +}; + +FilterTrue.prototype.match = function() { + return true; +}; + +FilterTrue.prototype.logData = function() { + return { + raw: '*', + regex: '^', + compiled: this.compile(), + }; +}; + +FilterTrue.prototype.compile = function() { + return this.fidPrefix; +}; + +FilterTrue.compile = function() { + return FilterTrue.fidPrefix; +}; + +FilterTrue.load = function() { + return new FilterTrue(); +}; + +registerFilterClass(FilterTrue); /******************************************************************************/ @@ -433,53 +335,31 @@ FilterPlain.prototype.match = function(url, tokenBeg) { return url.startsWith(this.s, tokenBeg - this.tokenBeg); }; -FilterPlain.fid = -FilterPlain.prototype.fid = -FilterPlain.prototype.rtfid = 'a'; +FilterPlain.prototype.logData = function() { + return { + raw: this.s, + regex: rawToRegexStr(this.s), + compiled: this.compile() + }; +}; -FilterPlain.prototype.toSelfie = -FilterPlain.prototype.rtCompile = function() { - return this.s + '\t' + this.tokenBeg; +FilterPlain.prototype.compile = function() { + return this.fidPrefix + this.s + '\t' + this.tokenBeg; }; FilterPlain.compile = function(details) { - return details.f + '\t' + details.tokenBeg; + return FilterPlain.fidPrefix + details.f + '\t' + details.tokenBeg; }; -FilterPlain.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterPlain(s.slice(0, pos), atoi(s.slice(pos + 1))); +FilterPlain.load = function(s) { + var pos = s.indexOf('\t', 2); + return new FilterPlain( + s.slice(2, pos), + parseInt(s.slice(pos + 1), 10) + ); }; -/******************************************************************************/ - -var FilterPlainHostname = function(s, tokenBeg, domainOpt) { - this.s = s; - this.tokenBeg = tokenBeg; - this.hnMatcher = hnMatcherFactory(domainOpt); -}; - -FilterPlainHostname.prototype.match = function(url, tokenBeg) { - return url.startsWith(this.s, tokenBeg - this.tokenBeg) && this.hnMatcher.test(); -}; - -FilterPlainHostname.fid = -FilterPlainHostname.prototype.fid = -FilterPlainHostname.prototype.rtfid = 'ah'; - -FilterPlainHostname.prototype.toSelfie = -FilterPlainHostname.prototype.rtCompile = function() { - return this.s + '\t' + this.tokenBeg + '\t' + this.hnMatcher.toDomainOpt(); -}; - -FilterPlainHostname.compile = function(details) { - return details.f + '\t' + details.tokenBeg + '\t' + details.domainOpt; -}; - -FilterPlainHostname.fromSelfie = function(s) { - var args = s.split('\t'); - return new FilterPlainHostname(args[0], atoi(args[1]), args[2]); -}; +registerFilterClass(FilterPlain); /******************************************************************************/ @@ -491,51 +371,27 @@ FilterPlainPrefix0.prototype.match = function(url, tokenBeg) { return url.startsWith(this.s, tokenBeg); }; -FilterPlainPrefix0.fid = -FilterPlainPrefix0.prototype.fid = -FilterPlainPrefix0.prototype.rtfid = '0a'; +FilterPlainPrefix0.prototype.logData = function() { + return { + raw: this.s, + regex: rawToRegexStr(this.s), + compiled: this.compile() + }; +}; -FilterPlainPrefix0.prototype.toSelfie = -FilterPlainPrefix0.prototype.rtCompile = function() { - return this.s; +FilterPlainPrefix0.prototype.compile = function() { + return this.fidPrefix + this.s; }; FilterPlainPrefix0.compile = function(details) { - return details.f; + return FilterPlainPrefix0.fidPrefix + details.f; }; -FilterPlainPrefix0.fromSelfie = function(s) { - return new FilterPlainPrefix0(s); +FilterPlainPrefix0.load = function(s) { + return new FilterPlainPrefix0(s.slice(2)); }; -/******************************************************************************/ - -var FilterPlainPrefix0Hostname = function(s, domainOpt) { - this.s = s; - this.hnMatcher = hnMatcherFactory(domainOpt); -}; - -FilterPlainPrefix0Hostname.prototype.match = function(url, tokenBeg) { - return url.startsWith(this.s, tokenBeg) && this.hnMatcher.test(); -}; - -FilterPlainPrefix0Hostname.fid = -FilterPlainPrefix0Hostname.prototype.fid = -FilterPlainPrefix0Hostname.prototype.rtfid = '0ah'; - -FilterPlainPrefix0Hostname.prototype.toSelfie = -FilterPlainPrefix0Hostname.prototype.rtCompile = function() { - return this.s + '\t' + this.hnMatcher.toDomainOpt(); -}; - -FilterPlainPrefix0Hostname.compile = function(details) { - return details.f + '\t' + details.domainOpt; -}; - -FilterPlainPrefix0Hostname.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterPlainPrefix0Hostname(s.slice(0, pos), s.slice(pos + 1)); -}; +registerFilterClass(FilterPlainPrefix0); /******************************************************************************/ @@ -547,52 +403,63 @@ FilterPlainPrefix1.prototype.match = function(url, tokenBeg) { return url.startsWith(this.s, tokenBeg - 1); }; -FilterPlainPrefix1.fid = -FilterPlainPrefix1.prototype.fid = -FilterPlainPrefix1.prototype.rtfid = '1a'; +FilterPlainPrefix1.prototype.logData = function() { + return { + raw: this.s, + regex: rawToRegexStr(this.s), + compiled: this.compile() + }; +}; -FilterPlainPrefix1.prototype.toSelfie = -FilterPlainPrefix1.prototype.rtCompile = function() { - return this.s; +FilterPlainPrefix1.prototype.compile = function() { + return this.fidPrefix + this.s; }; FilterPlainPrefix1.compile = function(details) { - return details.f; + return FilterPlainPrefix1.fidPrefix + details.f; }; -FilterPlainPrefix1.fromSelfie = function(s) { - return new FilterPlainPrefix1(s); +FilterPlainPrefix1.load = function(s) { + return new FilterPlainPrefix1(s.slice(2)); }; +registerFilterClass(FilterPlainPrefix1); + /******************************************************************************/ -var FilterPlainPrefix1Hostname = function(s, domainOpt) { +var FilterPlainHostname = function(s) { this.s = s; - this.hnMatcher = hnMatcherFactory(domainOpt); }; -FilterPlainPrefix1Hostname.prototype.match = function(url, tokenBeg) { - return url.startsWith(this.s, tokenBeg - 1) && this.hnMatcher.test(); +FilterPlainHostname.prototype.match = function() { + var haystack = requestHostnameRegister, needle = this.s; + if ( haystack.endsWith(needle) === false ) { return false; } + var offset = haystack.length - needle.length; + return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */; }; -FilterPlainPrefix1Hostname.fid = -FilterPlainPrefix1Hostname.prototype.fid = -FilterPlainPrefix1Hostname.prototype.rtfid = '1ah'; - -FilterPlainPrefix1Hostname.prototype.toSelfie = -FilterPlainPrefix1Hostname.prototype.rtCompile = function() { - return this.s + '\t' + this.hnMatcher.toDomainOpt(); +FilterPlainHostname.prototype.logData = function() { + return { + raw: '||' + this.s + '^', + regex: rawToRegexStr(this.s, 0x4), + compiled: this.compile() + }; }; -FilterPlainPrefix1Hostname.compile = function(details) { - return details.f + '\t' + details.domainOpt; +FilterPlainHostname.prototype.compile = function() { + return this.fidPrefix + this.s; }; -FilterPlainPrefix1Hostname.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterPlainPrefix1Hostname(s.slice(0, pos), s.slice(pos + 1)); +FilterPlainHostname.compile = function(details) { + return FilterPlainHostname.fidPrefix + details.f; }; +FilterPlainHostname.load = function(s) { + return new FilterPlainHostname(s.slice(2)); +}; + +registerFilterClass(FilterPlainHostname); + /******************************************************************************/ var FilterPlainLeftAnchored = function(s) { @@ -603,51 +470,27 @@ FilterPlainLeftAnchored.prototype.match = function(url) { return url.startsWith(this.s); }; -FilterPlainLeftAnchored.fid = -FilterPlainLeftAnchored.prototype.fid = -FilterPlainLeftAnchored.prototype.rtfid = '|a'; +FilterPlainLeftAnchored.prototype.logData = function() { + return { + raw: '|' + this.s, + regex: rawToRegexStr(this.s, 0x2), + compiled: this.compile() + }; +}; -FilterPlainLeftAnchored.prototype.toSelfie = -FilterPlainLeftAnchored.prototype.rtCompile = function() { - return this.s; +FilterPlainLeftAnchored.prototype.compile = function() { + return this.fidPrefix + this.s; }; FilterPlainLeftAnchored.compile = function(details) { - return details.f; + return FilterPlainLeftAnchored.fidPrefix + details.f; }; -FilterPlainLeftAnchored.fromSelfie = function(s) { - return new FilterPlainLeftAnchored(s); +FilterPlainLeftAnchored.load = function(s) { + return new FilterPlainLeftAnchored(s.slice(2)); }; -/******************************************************************************/ - -var FilterPlainLeftAnchoredHostname = function(s, domainOpt) { - this.s = s; - this.hnMatcher = hnMatcherFactory(domainOpt); -}; - -FilterPlainLeftAnchoredHostname.prototype.match = function(url) { - return url.startsWith(this.s) && this.hnMatcher.test(); -}; - -FilterPlainLeftAnchoredHostname.fid = -FilterPlainLeftAnchoredHostname.prototype.fid = -FilterPlainLeftAnchoredHostname.prototype.rtfid = '|ah'; - -FilterPlainLeftAnchoredHostname.prototype.toSelfie = -FilterPlainLeftAnchoredHostname.prototype.rtCompile = function() { - return this.s + '\t' + this.hnMatcher.toDomainOpt(); -}; - -FilterPlainLeftAnchoredHostname.compile = function(details) { - return details.f + '\t' + details.domainOpt; -}; - -FilterPlainLeftAnchoredHostname.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterPlainLeftAnchoredHostname(s.slice(0, pos), s.slice(pos + 1)); -}; +registerFilterClass(FilterPlainLeftAnchored); /******************************************************************************/ @@ -659,57 +502,30 @@ FilterPlainRightAnchored.prototype.match = function(url) { return url.endsWith(this.s); }; -FilterPlainRightAnchored.fid = -FilterPlainRightAnchored.prototype.fid = -FilterPlainRightAnchored.prototype.rtfid = 'a|'; +FilterPlainRightAnchored.prototype.logData = function() { + return { + raw: this.s + '|', + regex: rawToRegexStr(this.s, 0x1), + compiled: this.compile() + }; +}; -FilterPlainRightAnchored.prototype.toSelfie = -FilterPlainRightAnchored.prototype.rtCompile = function() { - return this.s; +FilterPlainRightAnchored.prototype.compile = function() { + return this.fidPrefix + this.s; }; FilterPlainRightAnchored.compile = function(details) { - return details.f; + return FilterPlainRightAnchored.fidPrefix + details.f; }; -FilterPlainRightAnchored.fromSelfie = function(s) { - return new FilterPlainRightAnchored(s); +FilterPlainRightAnchored.load = function(s) { + return new FilterPlainRightAnchored(s.slice(2)); }; +registerFilterClass(FilterPlainRightAnchored); + /******************************************************************************/ -var FilterPlainRightAnchoredHostname = function(s, domainOpt) { - this.s = s; - this.hnMatcher = hnMatcherFactory(domainOpt); -}; - -FilterPlainRightAnchoredHostname.prototype.match = function(url) { - return url.endsWith(this.s) && this.hnMatcher.test(); -}; - -FilterPlainRightAnchoredHostname.fid = -FilterPlainRightAnchoredHostname.prototype.fid = -FilterPlainRightAnchoredHostname.prototype.rtfid = 'a|h'; - -FilterPlainRightAnchoredHostname.prototype.toSelfie = -FilterPlainRightAnchoredHostname.prototype.rtCompile = function() { - return this.s + '\t' + this.hnMatcher.toDomainOpt(); -}; - -FilterPlainRightAnchoredHostname.compile = function(details) { - return details.f + '\t' + details.domainOpt; -}; - -FilterPlainRightAnchoredHostname.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterPlainRightAnchoredHostname(s.slice(0, pos), s.slice(pos + 1)); -}; - -/******************************************************************************/ - -// https://github.com/chrisaljoudi/uBlock/issues/235 -// The filter is left-anchored somewhere within the hostname part of the URL. - var FilterPlainHnAnchored = function(s) { this.s = s; }; @@ -719,200 +535,156 @@ FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) { isHnAnchored(url, tokenBeg); }; -FilterPlainHnAnchored.fid = -FilterPlainHnAnchored.prototype.fid = -FilterPlainHnAnchored.prototype.rtfid = '||a'; +FilterPlainHnAnchored.prototype.logData = function() { + return { + raw: '||' + this.s, + regex: rawToRegexStr(this.s), + compiled: this.compile() + }; +}; -FilterPlainHnAnchored.prototype.toSelfie = -FilterPlainHnAnchored.prototype.rtCompile = function() { - return this.s; +FilterPlainHnAnchored.prototype.compile = function() { + return this.fidPrefix + this.s; }; FilterPlainHnAnchored.compile = function(details) { - return details.f; + return FilterPlainHnAnchored.fidPrefix + details.f; }; -FilterPlainHnAnchored.fromSelfie = function(s) { - return new FilterPlainHnAnchored(s); +FilterPlainHnAnchored.load = function(s) { + return new FilterPlainHnAnchored(s.slice(2)); }; -// https://www.youtube.com/watch?v=71YS6xDB-E4 -// https://www.youtube.com/watch?v=qBPML7ton0E +registerFilterClass(FilterPlainHnAnchored); /******************************************************************************/ -// https://github.com/gorhill/uBlock/issues/142 - -var FilterPlainHnAnchoredHostname = function(s, domainOpt) { - this.s = s; - this.hnMatcher = hnMatcherFactory(domainOpt); -}; - -FilterPlainHnAnchoredHostname.prototype.match = function(url, tokenBeg) { - return url.startsWith(this.s, tokenBeg) && - this.hnMatcher.test() && - isHnAnchored(url, tokenBeg); -}; - -FilterPlainHnAnchoredHostname.fid = -FilterPlainHnAnchoredHostname.prototype.fid = -FilterPlainHnAnchoredHostname.prototype.rtfid = '||ah'; - -FilterPlainHnAnchoredHostname.prototype.toSelfie = -FilterPlainHnAnchoredHostname.prototype.rtCompile = function() { - return this.s + '\t' + this.hnMatcher.toDomainOpt(); -}; - -FilterPlainHnAnchoredHostname.compile = function(details) { - return details.f + '\t' + details.domainOpt; -}; - -FilterPlainHnAnchoredHostname.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterPlainHnAnchoredHostname(s.slice(0, pos), s.slice(pos + 1)); -}; - -/******************************************************************************/ - -// Generic filter - var FilterGeneric = function(s, anchor) { this.s = s; this.anchor = anchor; - this.re = null; }; +FilterGeneric.prototype.re = null; + FilterGeneric.prototype.match = function(url) { if ( this.re === null ) { - this.re = strToRegex(this.s, this.anchor); + this.re = new RegExp(rawToRegexStr(this.s, this.anchor)); } return this.re.test(url); }; -FilterGeneric.fid = -FilterGeneric.prototype.fid = -FilterGeneric.prototype.rtfid = '_'; +FilterGeneric.prototype.logData = function() { + var out = { + raw: this.s, + regex: this.re.source, + compiled: this.compile() + }; + if ( this.anchor & 0x2 ) { + out.raw = '|' + out.raw; + } + if ( this.anchor & 0x1 ) { + out.raw += '|'; + } + return out; +}; -FilterGeneric.prototype.toSelfie = -FilterGeneric.prototype.rtCompile = function() { - return this.s + '\t' + this.anchor; +FilterGeneric.prototype.compile = function() { + return this.fidPrefix + this.s + '\t' + this.anchor; }; FilterGeneric.compile = function(details) { - return details.f + '\t' + details.anchor; + return FilterGeneric.fidPrefix + details.f + '\t' + details.anchor; }; -FilterGeneric.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterGeneric(s.slice(0, pos), parseInt(s.slice(pos + 1), 10)); +FilterGeneric.load = function(s) { + var pos = s.indexOf('\t', 2); + return new FilterGeneric( + s.slice(2, pos), + parseInt(s.slice(pos + 1), 10) + ); }; +registerFilterClass(FilterGeneric); + /******************************************************************************/ -// Generic filter - -var FilterGenericHostname = function(s, anchor, domainOpt) { - FilterGeneric.call(this, s, anchor); - this.hnMatcher = hnMatcherFactory(domainOpt); -}; -FilterGenericHostname.prototype = Object.create(FilterGeneric.prototype); -FilterGenericHostname.prototype.constructor = FilterGenericHostname; - -FilterGenericHostname.prototype.match = function(url) { - return this.hnMatcher.test() && FilterGeneric.prototype.match.call(this, url); -}; - -FilterGenericHostname.fid = -FilterGenericHostname.prototype.fid = -FilterGenericHostname.prototype.rtfid = '_h'; - -FilterGenericHostname.prototype.toSelfie = -FilterGenericHostname.prototype.rtCompile = function() { - return FilterGeneric.prototype.toSelfie.call(this) + '\t' + this.hnMatcher.toDomainOpt(); -}; - -FilterGenericHostname.compile = function(details) { - return FilterGeneric.compile(details) + '\t' + details.domainOpt; -}; - -FilterGenericHostname.fromSelfie = function(s) { - var fields = s.split('\t'); - return new FilterGenericHostname(fields[0], parseInt(fields[1], 10), fields[2]); -}; - -/******************************************************************************/ - -// Generic filter: hostname-anchored: it has that extra test to find out -// whether the start of the match falls within the hostname part of the -// URL. - -var FilterGenericHnAnchored = function(s, anchor) { +var FilterGenericHnAnchored = function(s) { this.s = s; - this.anchor = anchor; - this.re = null; }; +FilterGenericHnAnchored.prototype.re = null; +FilterGenericHnAnchored.prototype.anchor = 0x4; + FilterGenericHnAnchored.prototype.match = function(url) { if ( this.re === null ) { - this.re = strToRegex('||' + this.s, this.anchor); + this.re = new RegExp(rawToRegexStr(this.s, this.anchor)); } var matchStart = url.search(this.re); return matchStart !== -1 && isHnAnchored(url, matchStart); }; -FilterGenericHnAnchored.fid = -FilterGenericHnAnchored.prototype.fid = -FilterGenericHnAnchored.prototype.rtfid = '||_'; +FilterGenericHnAnchored.prototype.logData = function() { + var out = { + raw: '||' + this.s, + regex: this.re.source, + compiled: this.compile() + }; + return out; +}; -FilterGenericHnAnchored.prototype.toSelfie = -FilterGenericHnAnchored.prototype.rtCompile = function() { - return this.s + '\t' + this.anchor; +FilterGenericHnAnchored.prototype.compile = function() { + return this.fidPrefix + this.s; }; FilterGenericHnAnchored.compile = function(details) { - return details.f + '\t' + details.anchor; + return FilterGenericHnAnchored.fidPrefix + details.f; }; -FilterGenericHnAnchored.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterGenericHnAnchored(s.slice(0, pos), parseInt(s.slice(pos + 1), 10)); +FilterGenericHnAnchored.load = function(s) { + return new FilterGenericHnAnchored(s.slice(2)); }; +registerFilterClass(FilterGenericHnAnchored); + /******************************************************************************/ -var FilterGenericHnAnchoredHostname = function(s, anchor, domainOpt) { - FilterGenericHnAnchored.call(this, s, anchor); - this.hnMatcher = hnMatcherFactory(domainOpt); -}; -FilterGenericHnAnchoredHostname.prototype = Object.create(FilterGenericHnAnchored.prototype); -FilterGenericHnAnchoredHostname.prototype.constructor = FilterGenericHnAnchoredHostname; - -FilterGenericHnAnchoredHostname.prototype.match = function(url) { - return this.hnMatcher.test() && FilterGenericHnAnchored.prototype.match.call(this, url); +var FilterGenericHnAndRightAnchored = function(s) { + FilterGenericHnAnchored.call(this, s); }; -FilterGenericHnAnchoredHostname.fid = -FilterGenericHnAnchoredHostname.prototype.fid = -FilterGenericHnAnchoredHostname.prototype.rtfid = '||_h'; +FilterGenericHnAndRightAnchored.prototype = Object.create(FilterGenericHnAnchored.prototype, { + constructor: { + value: FilterGenericHnAndRightAnchored + }, + anchor: { + value: 0x5 + }, + logData: { + value: function() { + var out = FilterGenericHnAnchored.prototype.logData.call(this); + out.raw += '|'; + return out; + } + }, + compile: { + value: function() { + return this.fidPrefix + this.s; + } + }, +}); -FilterGenericHnAnchoredHostname.prototype.toSelfie = -FilterGenericHnAnchoredHostname.prototype.rtCompile = function() { - return this.s + '\t' + this.anchor + '\t' + this.hnMatcher.toDomainOpt(); +FilterGenericHnAndRightAnchored.compile = function(details) { + return FilterGenericHnAndRightAnchored.fidPrefix + details.f; }; -FilterGenericHnAnchoredHostname.compile = function(details) { - return details.f + '\t' + details.anchor + '\t' + details.domainOpt; +FilterGenericHnAndRightAnchored.load = function(s) { + return new FilterGenericHnAndRightAnchored(s.slice(2)); }; -FilterGenericHnAnchoredHostname.fromSelfie = function(s) { - var fields = s.split('\t'); - return new FilterGenericHnAnchoredHostname(fields[0], parseInt(fields[1], 10), fields[2]); -}; +registerFilterClass(FilterGenericHnAndRightAnchored); /******************************************************************************/ -// Regex-based filters - var FilterRegex = function(s) { this.re = new RegExp(s, 'i'); }; @@ -921,54 +693,354 @@ FilterRegex.prototype.match = function(url) { return this.re.test(url); }; -FilterRegex.fid = -FilterRegex.prototype.fid = -FilterRegex.prototype.rtfid = '//'; +FilterRegex.prototype.logData = function() { + return { + raw: '/' + this.s + '/', + regex: this.s, + compiled: this.compile() + }; +}; -FilterRegex.prototype.toSelfie = -FilterRegex.prototype.rtCompile = function() { - return this.re.source; +FilterRegex.prototype.compile = function() { + return this.fidPrefix + this.re.source; }; FilterRegex.compile = function(details) { - return details.f; + return FilterRegex.fidPrefix + details.f; }; -FilterRegex.fromSelfie = function(s) { - return new FilterRegex(s); +FilterRegex.load = function(s) { + return new FilterRegex(s.slice(2)); }; +registerFilterClass(FilterRegex); + /******************************************************************************/ -var FilterRegexHostname = function(s, domainOpt) { - this.re = new RegExp(s, 'i'); - this.hnMatcher = hnMatcherFactory(domainOpt); +// Filtering according to the origin. + +var FilterOrigin = function() { }; -FilterRegexHostname.prototype.match = function(url) { - // test hostname first, it's cheaper than evaluating a regex - return this.hnMatcher.test() && this.re.test(url); +FilterOrigin.prototype.wrapped = { + compile: function() { + return ''; + }, + logData: function() { + return { + compiled: '' + }; + }, + match: function() { + return true; + } }; -FilterRegexHostname.fid = -FilterRegexHostname.prototype.fid = -FilterRegexHostname.prototype.rtfid = '//h'; - -FilterRegexHostname.prototype.toSelfie = -FilterRegexHostname.prototype.rtCompile = function() { - return this.re.source + '\t' + this.hnMatcher.toDomainOpt(); +FilterOrigin.prototype.matchOrigin = function() { + return true; }; -FilterRegexHostname.compile = function(details) { - return details.f + '\t' + details.domainOpt; +FilterOrigin.prototype.match = function(url, tokenBeg) { + return this.matchOrigin() && this.wrapped.match(url, tokenBeg); }; -FilterRegexHostname.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterRegexHostname(s.slice(0, pos), s.slice(pos + 1)); +FilterOrigin.prototype.logData = function() { + var out = this.wrapped.logData(), + domainOpt = this.toDomainOpt(); + out.compiled = this.fidPrefix + domainOpt + '\v' + out.compiled; + if ( out.opts === undefined ) { + out.opts = 'domain=' + domainOpt; + } else { + out.opts += ',domain=' + domainOpt; + } + return out; }; +FilterOrigin.prototype.compile = function() { + return this.fidPrefix + this.toDomainOpt() + '\v' + this.wrapped.compile(); +}; + +// *** start of specialized origin matchers + +var FilterOriginHit = function(domainOpt) { + FilterOrigin.call(this); + this.hostname = domainOpt; +}; + +FilterOriginHit.prototype = Object.create(FilterOrigin.prototype, { + constructor: { + value: FilterOriginHit + }, + toDomainOpt: { + value: function() { + return this.hostname; + } + }, + matchOrigin: { + value: function() { + var needle = this.hostname, haystack = pageHostnameRegister; + if ( haystack.endsWith(needle) === false ) { return false; } + var offset = haystack.length - needle.length; + return offset === 0 || haystack.charCodeAt(offset - 1) === 0x2E /* '.' */; + } + }, +}); + +// + +var FilterOriginMiss = function(domainOpt) { + FilterOrigin.call(this); + this.hostname = domainOpt.slice(1); +}; + +FilterOriginMiss.prototype = Object.create(FilterOrigin.prototype, { + constructor: { + value: FilterOriginMiss + }, + toDomainOpt: { + value: function() { + return '~' + this.hostname; + } + }, + matchOrigin: { + value: function() { + var needle = this.hostname, haystack = pageHostnameRegister; + if ( haystack.endsWith(needle) === false ) { return true; } + var offset = haystack.length - needle.length; + return offset !== 0 && haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */; + } + }, +}); + +// + +var FilterOriginHitSet = function(domainOpt) { + FilterOrigin.call(this); + this.domainOpt = domainOpt; +}; + +FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, { + constructor: { + value: FilterOriginHitSet + }, + oneOf: { + value: null, + writable: true + }, + toDomainOpt: { + value: function() { + return this.domainOpt; + } + }, + matchOrigin: { + value: function() { + if ( this.oneOf === null ) { + this.oneOf = new RegExp('(?:^|\\.)(?:' + this.domainOpt.replace(/\./g, '\\.') + ')$'); + } + return this.oneOf.test(pageHostnameRegister); + } + }, +}); + +// + +var FilterOriginMissSet = function(domainOpt) { + FilterOrigin.call(this); + this.domainOpt = domainOpt; +}; + +FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, { + constructor: { + value: FilterOriginMissSet + }, + noneOf: { + value: null, + writable: true + }, + toDomainOpt: { + value: function() { + return this.domainOpt; + } + }, + matchOrigin: { + value: function() { + if ( this.noneOf === null ) { + this.noneOf = new RegExp('(?:^|\\.)(?:' + this.domainOpt.replace(/~/g, '').replace(/\./g, '\\.') + ')$'); + } + return this.noneOf.test(pageHostnameRegister) === false; + } + }, +}); + +// + +var FilterOriginMixedSet = function(domainOpt) { + FilterOrigin.call(this); + this.domainOpt = domainOpt; +}; + +FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, { + constructor: { + value: FilterOriginMixedSet + }, + oneOf: { + value: null, + writable: true + }, + noneOf: { + value: null, + writable: true + }, + init: { + value: function() { + var oneOf = [], noneOf = [], + hostnames = this.domainOpt.split('|'), + i = hostnames.length, + hostname; + while ( i-- ) { + hostname = hostnames[i].replace(/\./g, '\\.'); + if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { + noneOf.push(hostname.slice(1)); + } else { + oneOf.push(hostname); + } + } + this.oneOf = new RegExp('(?:^|\\.)(?:' + oneOf.join('|') + ')$'); + this.noneOf = new RegExp('(?:^|\\.)(?:' + noneOf.join('|') + ')$'); + } + }, + toDomainOpt: { + value: function() { + return this.domainOpt; + } + }, + matchOrigin: { + value: function() { + if ( this.oneOf === null ) { this.init(); } + var needle = pageHostnameRegister; + return this.oneOf.test(needle) && this.noneOf.test(needle) === false; + } + }, +}); + +// *** end of specialized origin matchers + +// The optimal test function is picked according to the content of the +// `domain=` filter option. +// Re-factored in light of: +// - https://gorhill.github.io/obj-vs-set-vs-map/set-vs-regexp.html +// The re-factoring made possible to reuse instances of a matcher. As of +// writing, I observed that just with EasyList, there were ~1,200 reused +// instances out of ~2,800. + +FilterOrigin.matcherFactory = function(domainOpt) { + // One hostname + if ( domainOpt.indexOf('|') === -1 ) { + if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) { + return new FilterOriginMiss(domainOpt); + } + return new FilterOriginHit(domainOpt); + } + // Many hostnames. + // Must be in set (none negated). + if ( domainOpt.indexOf('~') === -1 ) { + return new FilterOriginHitSet(domainOpt); + } + // Must not be in set (all negated). + if ( FilterOrigin.reAllNegated.test(domainOpt) ) { + return new FilterOriginMissSet(domainOpt); + } + // Must be in one set, but not in the other. + return new FilterOriginMixedSet(domainOpt); +}; + +FilterOrigin.reAllNegated = /^~(?:[^|~]+\|~)+[^|~]+$/; + +FilterOrigin.compile = function(details) { + return FilterOrigin.fidPrefix + details.domainOpt; +}; + +FilterOrigin.load = function(s) { + var pos = s.indexOf('\v', 2), + f = FilterOrigin.matcherFactory(s.slice(2, pos)); + f.wrapped = filterFromCompiledData(s.slice(pos + 1)); + return f; +}; + +registerFilterClass(FilterOrigin); + /******************************************************************************/ + +var FilterDataHolder = function(dataType, dataStr) { + this.dataType = dataType; + this.dataStr = dataStr; + this.wrapped = undefined; +}; + +FilterDataHolder.prototype.match = function(url, tokenBeg) { + return this.wrapped.match(url, tokenBeg); +}; + +FilterDataHolder.prototype.logData = function() { + var out = this.wrapped.logData(); + out.compiled = this.fidPrefix + this.dataType + '\t' + this.dataStr + '\v' + out.compiled; + var opt = this.dataType; + if ( this.dataStr !== '' ) { + opt += '=' + this.dataStr; + } + if ( out.opts === undefined ) { + out.opts = opt; + } else { + out.opts = opt + ',' + out.opts; + } + return out; +}; + +FilterDataHolder.prototype.compile = function() { + return this.fidPrefix + this.dataType + '\t' + this.dataStr + '\v' + this.wrapped.compile(); +}; + +FilterDataHolder.compile = function(details) { + return FilterDataHolder.fidPrefix + details.dataType + '\t' + details.dataStr; +}; + +FilterDataHolder.load = function(s) { + var pos = s.indexOf('\t', 2), + end = s.indexOf('\v', pos), + f = new FilterDataHolder(s.slice(2, pos), s.slice(pos + 1, end)); + f.wrapped = filterFromCompiledData(s.slice(end + 1)); + return f; +}; + +registerFilterClass(FilterDataHolder); + +// Helper class for storing instances of FilterDataHolder. + +var FilterDataHolderEntry = function(key, token, fdata) { + this.keyBits = mapKeyToBits(key); + this.token = token; + this.filter = filterFromCompiledData(fdata); + this.next = undefined; +}; + +FilterDataHolderEntry.prototype.logData = function() { + return toLogDataInternal(this.keyBits, this.token, this.filter); +}; + +FilterDataHolderEntry.prototype.compile = function() { + return this.keyBits + '\t' + this.token + '\t' + this.filter.compile(); +}; + +FilterDataHolderEntry.load = function(s) { + var pos1 = s.indexOf('\t'), + pos2 = s.indexOf('\t', pos1 + 1); + return new FilterDataHolderEntry( + parseInt(s, 10), + s.slice(pos1 + 1, pos2), + s.slice(pos2 + 1) + ); +}; + /******************************************************************************/ // Dictionary of hostnames @@ -1012,25 +1084,26 @@ FilterHostnameDict.prototype.match = function() { return this; }; -FilterHostnameDict.fid = -FilterHostnameDict.prototype.fid = '{h}'; -FilterHostnameDict.rtfid = '.'; - -FilterHostnameDict.prototype.rtCompile = function() { - return this.h; +FilterHostnameDict.prototype.logData = function() { + return { + raw: '||' + this.h + '^', + regex: rawToRegexStr(this.h) + '(?:[^%.0-9a-z_-]|$)', + compiled: this.h + }; }; -FilterHostnameDict.prototype.toSelfie = function() { - return JSON.stringify(µb.setToArray(this.dict)); +FilterHostnameDict.prototype.compile = function() { + return this.fidPrefix + JSON.stringify(µb.setToArray(this.dict)); }; -FilterHostnameDict.fromSelfie = function(s) { +FilterHostnameDict.load = function(s) { var f = new FilterHostnameDict(); - f.dict = µb.setFromArray(JSON.parse(s)); + f.dict = µb.setFromArray(JSON.parse(s.slice(2))); return f; }; -/******************************************************************************/ +registerFilterClass(FilterHostnameDict); + /******************************************************************************/ // Some buckets can grow quite large, and finding a hit in these buckets @@ -1078,22 +1151,16 @@ var FilterBucket = function(a, b) { } }; -Object.defineProperty(FilterBucket.prototype, 'rtfid', { - get: function() { - return this.f.rtfid; - } -}); - FilterBucket.prototype.add = function(a) { this.filters.push(a); }; -FilterBucket.prototype.remove = function(fclass, fdata) { +FilterBucket.prototype.remove = function(fdata) { var i = this.filters.length, filter; while ( i-- ) { filter = this.filters[i]; - if ( filter.fid === fclass && filter.toSelfie() === fdata ) { + if ( filter.compile() === fdata ) { this.filters.splice(i, 1); } } @@ -1109,9 +1176,7 @@ FilterBucket.prototype.promote = function(i) { break; } } - if ( i <= pivot ) { - return; - } + if ( i <= pivot ) { return; } var j = this.promoted % pivot; //console.debug('FilterBucket.promote(): promoted %d to %d', i, j); var f = filters[j]; @@ -1121,8 +1186,8 @@ FilterBucket.prototype.promote = function(i) { }; FilterBucket.prototype.match = function(url, tokenBeg) { - var filters = this.filters; - var n = filters.length; + var filters = this.filters, + n = filters.length; for ( var i = 0; i < n; i++ ) { if ( filters[i].match(url, tokenBeg) ) { this.f = filters[i]; @@ -1135,21 +1200,50 @@ FilterBucket.prototype.match = function(url, tokenBeg) { return false; }; -FilterBucket.prototype.fid = '[]'; - -FilterBucket.prototype.toSelfie = function() { - return this.filters.length.toString(); +FilterBucket.prototype.logData = function() { + return this.f.logData(); }; -// Not supposed to be called without a valid filter hit. -FilterBucket.prototype.rtCompile = function() { - return this.f.rtCompile(); +FilterBucket.prototype.compile = function() { + var compiled = [], + filters = this.filters; + for ( var i = 0, n = filters.length; i < n; i++ ) { + compiled[i] = filters[i].compile(); + } + return this.fidPrefix + JSON.stringify(compiled); }; -FilterBucket.fromSelfie = function() { - return new FilterBucket(); +FilterBucket.load = function(s) { + var f = new FilterBucket(), + compiled = JSON.parse(s.slice(2)), + filters = f.filters; + for ( var i = 0, n = compiled.length; i < n; i++ ) { + filters[i] = filterFromCompiledData(compiled[i]); + } + return f; }; +registerFilterClass(FilterBucket); + +/******************************************************************************/ +/******************************************************************************/ + +var filterFromCompiledData = function(compiled) { + if ( compiled === lastLoadedFilterString ) { + return lastLoadedFilter; + } + var fid = parseInt(compiled, 36), + f = filterClasses.get(fid).load(compiled); + //filterClassHistogram.set(fid, (filterClassHistogram.get(fid) || 0) + 1); + lastLoadedFilterString = compiled; + lastLoadedFilter = f; + return f; +}; + +var lastLoadedFilterString, + lastLoadedFilter; +//var filterClassHistogram = new Map(); + /******************************************************************************/ /******************************************************************************/ @@ -1176,25 +1270,26 @@ var FilterParser = function() { // Transpose `ping` into `other` for now. FilterParser.prototype.toNormalizedType = { - 'stylesheet': 'stylesheet', - 'image': 'image', - 'object': 'object', - 'object-subrequest': 'object', - 'script': 'script', - 'xmlhttprequest': 'xmlhttprequest', - 'subdocument': 'sub_frame', - 'font': 'font', - 'media': 'media', - 'websocket': 'websocket', - 'other': 'other', 'beacon': 'other', + 'data': 'data', + 'document': 'main_frame', + 'elemhide': 'generichide', + 'font': 'font', + 'generichide': 'generichide', + 'image': 'image', + 'inline-script': 'inline-script', + 'media': 'media', + 'object': 'object', + 'other': 'other', + 'object-subrequest': 'object', 'ping': 'other', 'popunder': 'popunder', - 'document': 'main_frame', - 'generichide': 'generichide', - 'elemhide': 'generichide', - 'inline-script': 'inline-script', - 'popup': 'popup' + 'popup': 'popup', + 'script': 'script', + 'stylesheet': 'stylesheet', + 'subdocument': 'sub_frame', + 'xmlhttprequest': 'xmlhttprequest', + 'websocket': 'websocket' }; /******************************************************************************/ @@ -1203,17 +1298,19 @@ FilterParser.prototype.reset = function() { this.action = BlockAction; this.anchor = 0; this.badFilter = false; + this.dataType = undefined; + this.dataStr = undefined; this.elemHiding = false; this.f = ''; this.firstParty = false; + this.thirdParty = false; + this.party = AnyParty; this.fopts = ''; - this.hostnameAnchored = false; this.hostnamePure = false; this.domainOpt = ''; this.isRegex = false; this.raw = ''; this.redirect = false; - this.thirdParty = false; this.token = '*'; this.tokenBeg = 0; this.types = 0; @@ -1241,6 +1338,11 @@ FilterParser.prototype.parseTypeOption = function(raw, not) { return; } + // Non-discrete network types can't be negated. + if ( (typeBit & allNetRequestTypesBitmap) === 0 ) { + return; + } + // Negated type: set all valid network request type bits to 1 if ( (typeBit & allNetRequestTypesBitmap) !== 0 && @@ -1259,8 +1361,10 @@ FilterParser.prototype.parsePartyOption = function(firstParty, not) { } if ( not ) { this.firstParty = true; + this.party = this.thirdParty ? AnyParty : FirstParty; } else { this.thirdParty = true; + this.party = this.firstParty ? AnyParty : ThirdParty; } }; @@ -1271,7 +1375,9 @@ FilterParser.prototype.parseDomainOption = function(s) { var hostnames = s.split('|'), i = hostnames.length; while ( i-- ) { - hostnames[i] = punycode.toASCII(hostnames[i]); + if ( this.reHasUnicode.test(hostnames[i]) ) { + hostnames[i] = punycode.toASCII(hostnames[i]); + } } s = hostnames.join('|'); } @@ -1352,6 +1458,20 @@ FilterParser.prototype.parseOptions = function(s) { this.unsupported = true; break; } + if ( opt.startsWith('csp=') ) { + if ( opt.length > 4 ) { + this.parseTypeOption('data', not); + this.dataType = 'csp'; + this.dataStr = opt.slice(4).trim(); + } + continue; + } + if ( opt === 'csp' && this.action === AllowAction ) { + this.parseTypeOption('data', not); + this.dataType = 'csp'; + this.dataStr = ''; + continue; + } // Used by Adguard, purpose is unclear -- just ignore for now. if ( opt === 'empty' ) { continue; @@ -1369,6 +1489,103 @@ FilterParser.prototype.parseOptions = function(s) { /******************************************************************************/ +// https://github.com/gorhill/uBlock/issues/1943#issuecomment-243188946 +// Convert websocket-related filter where possible to a format which +// can be handled using CSP injection. + +FilterParser.prototype.translate = function() { + var dataTypeBit = this.bitFromType('data'); + + if ( this.cantWebsocket && this.reWebsocketAny.test(this.f) ) { + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "connect-src https: http:"; + // https://bugs.chromium.org/p/chromium/issues/detail?id=669086 + // TODO: remove when most users are beyond Chromium v56 + if ( vAPI.chromiumVersion < 57 ) { + this.dataStr += '; frame-src *'; + } + return; + } + + // Broad |data:-based filters. + if ( this.f === 'data:' ) { + switch ( this.types ) { + case 0: + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "default-src 'self' * blob: 'unsafe-inline' 'unsafe-eval'"; + break; + case this.bitFromType('script'): + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "script-src 'self' * blob: 'unsafe-inline' 'unsafe-eval'"; + break; + case this.bitFromType('sub_frame'): + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "frame-src 'self' * blob:"; + break; + case this.bitFromType('script') | this.bitFromType('sub_frame'): + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "frame-src 'self' * blob:; script-src 'self' * blob: 'unsafe-inline' 'unsafe-eval';"; + break; + default: + break; + } + } + + // Broad |blob:-based filters. + if ( this.f === 'blob:' ) { + switch ( this.types ) { + case 0: + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "default-src 'self' * data: 'unsafe-inline' 'unsafe-eval'"; + break; + case this.bitFromType('script'): + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "script-src 'self' * data: 'unsafe-inline' 'unsafe-eval'"; + break; + case this.bitFromType('sub_frame'): + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "frame-src 'self' * data:"; + break; + case this.bitFromType('script') | this.bitFromType('sub_frame'): + this.f = '*'; + this.types = dataTypeBit; + this.dataType = 'csp'; + this.dataStr = "frame-src 'self' * data:; script-src 'self' * data: 'unsafe-inline' 'unsafe-eval';"; + break; + default: + break; + } + } +}; + +/******************************************************************************* + + anchor: bit vector + 0000 (0x0): no anchoring + 0001 (0x1): anchored to the end of the URL. + 0010 (0x2): anchored to the start of the URL. + 0011 (0x3): anchored to the start and end of the URL. + 0100 (0x4): anchored to the hostname of the URL. + 0101 (0x5): anchored to the hostname and end of the URL. + +**/ + FilterParser.prototype.parse = function(raw) { // important! this.reset(); @@ -1378,7 +1595,8 @@ FilterParser.prototype.parse = function(raw) { // plain hostname? (from HOSTS file) if ( this.reHostnameRule1.test(s) ) { this.f = s; - this.hostnamePure = this.hostnameAnchored = true; + this.hostnamePure = true; + this.anchor |= 0x4; return this; } @@ -1439,7 +1657,7 @@ FilterParser.prototype.parse = function(raw) { // hostname-anchored if ( s.startsWith('||') ) { - this.hostnameAnchored = true; + this.anchor |= 0x4; s = s.slice(2); // convert hostname to punycode if needed @@ -1462,24 +1680,23 @@ FilterParser.prototype.parse = function(raw) { // plain hostname? (from ABP filter list) // https://github.com/gorhill/uBlock/issues/1757 - // A filter can't be a pure-hostname one if there is a domain option - // present. - if ( this.domainOpt === '' && this.reHostnameRule2.test(s) ) { + // A filter can't be a pure-hostname one if there is a domain or csp + // option present. + if ( this.reHostnameRule2.test(s) ) { this.f = s.replace(this.reCleanupHostnameRule2, ''); this.hostnamePure = true; return this; } } - // left-anchored - if ( s.startsWith('|') ) { - this.anchor = -1; + else if ( s.startsWith('|') ) { + this.anchor |= 0x2; s = s.slice(1); } // right-anchored if ( s.endsWith('|') ) { - this.anchor = 1; + this.anchor |= 0x1; s = s.slice(0, -1); } @@ -1490,7 +1707,7 @@ FilterParser.prototype.parse = function(raw) { // Keep the leading asterisk if we are dealing with a hostname-anchored // filter, this will ensure the generic filter implementation is // used. - if ( s.startsWith('*') && this.hostnameAnchored === false ) { + if ( s.startsWith('*') && (this.anchor & 0x4) ) { s = s.replace(/^\*+([^%0-9a-z])/, '$1'); } // remove pointless trailing * @@ -1507,7 +1724,7 @@ FilterParser.prototype.parse = function(raw) { // https://github.com/gorhill/uBlock/issues/1047 // Hostname-anchored makes no sense if matching all requests. if ( s === '*' ) { - this.hostnameAnchored = false; + this.anchor = 0; } // This might look weird but we gain memory footprint by not going through @@ -1515,18 +1732,11 @@ FilterParser.prototype.parse = function(raw) { this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s; - // https://github.com/gorhill/uBlock/issues/1943#issuecomment-243188946 - // Convert websocket-related filter where possible to a format which - // can be handled using CSP injection. - if ( - this.cantWebsocket && - this.anchor === -1 && - this.firstParty === false && - this.thirdParty === false && - this.reWebsocketAny.test(this.f) - ) { - this.f = '*'; - this.types = this.bitFromType('websocket'); + // Convenience: + // Convert special broad filters for non-webRequest aware types into + // `csp` filters wherever possible. + if ( this.anchor & 0x2 && this.party === 0 ) { + this.translate(); } return this; @@ -1541,37 +1751,37 @@ FilterParser.prototype.parse = function(raw) { // These "bad tokens" are collated manually. // Hostname-anchored with no wildcard always have a token index of 0. -var reHostnameToken = /^[0-9a-z]+/g; +var reHostnameToken = /^[0-9a-z]+/; var reGoodToken = /[%0-9a-z]{2,}/g; -var badTokens = { - 'com': true, - 'http': true, - 'https': true, - 'icon': true, - 'images': true, - 'img': true, - 'js': true, - 'net': true, - 'news': true, - 'www': true -}; +var badTokens = new Set([ + 'com', + 'http', + 'https', + 'icon', + 'images', + 'img', + 'js', + 'net', + 'news', + 'www' +]); var findFirstGoodToken = function(s) { reGoodToken.lastIndex = 0; var matches, lpos; var badTokenMatch = null; - while ( (matches = reGoodToken.exec(s)) ) { + while ( (matches = reGoodToken.exec(s)) !== null ) { // https://github.com/gorhill/uBlock/issues/997 // Ignore token if preceded by wildcard. lpos = matches.index; - if ( lpos !== 0 && s.charAt(lpos - 1) === '*' ) { + if ( lpos !== 0 && s.charCodeAt(lpos - 1) === 0x2A /* '*' */ ) { continue; } - if ( s.charAt(reGoodToken.lastIndex) === '*' ) { + if ( s.charCodeAt(reGoodToken.lastIndex) === 0x2A /* '*' */ ) { continue; } - if ( badTokens.hasOwnProperty(matches[0]) ) { + if ( badTokens.has(matches[0]) ) { if ( badTokenMatch === null ) { badTokenMatch = matches; } @@ -1583,7 +1793,6 @@ var findFirstGoodToken = function(s) { }; var findHostnameToken = function(s) { - reHostnameToken.lastIndex = 0; return reHostnameToken.exec(s); }; @@ -1595,12 +1804,11 @@ FilterParser.prototype.makeToken = function() { if ( this.isRegex || this.f === '*' ) { return; } - - var matches = this.hostnameAnchored && this.f.indexOf('*') === -1 ? + var matches = this.anchor & 0x4 && this.f.indexOf('*') === -1 ? findHostnameToken(this.f) : findFirstGoodToken(this.f); - if ( matches !== null && matches[0].length !== 0 ) { + if ( matches !== null ) { this.token = matches[0]; this.tokenBeg = matches.index; } @@ -1631,6 +1839,7 @@ FilterContainer.prototype.reset = function() { this.badFilters = new Set(); this.duplicateBuster = new Set(); this.categories = new Map(); + this.dataFilters = new Map(); this.filterParser.reset(); // Reuse filter instances whenever possible at load time. @@ -1655,42 +1864,10 @@ FilterContainer.prototype.freeze = function() { this.fdataLast = null; this.filterLast = null; this.frozen = true; -}; - -/******************************************************************************/ - -FilterContainer.prototype.factories = { - '[]': FilterBucket, - 'a': FilterPlain, - 'ah': FilterPlainHostname, - '0a': FilterPlainPrefix0, - '0ah': FilterPlainPrefix0Hostname, - '1a': FilterPlainPrefix1, - '1ah': FilterPlainPrefix1Hostname, - '|a': FilterPlainLeftAnchored, - '|ah': FilterPlainLeftAnchoredHostname, - 'a|': FilterPlainRightAnchored, - 'a|h': FilterPlainRightAnchoredHostname, - '||a': FilterPlainHnAnchored, - '||ah': FilterPlainHnAnchoredHostname, - '//': FilterRegex, - '//h': FilterRegexHostname, - '{h}': FilterHostnameDict, - '_': FilterGeneric, - '_h': FilterGenericHostname, - '||_': FilterGenericHnAnchored, - '||_h': FilterGenericHnAnchoredHostname -}; - -/******************************************************************************/ - -FilterContainer.prototype.filterFromSelfie = function(fclass, fdata) { - if ( fdata !== this.fdataLast || fclass !== this.fclassLast ) { - this.fclassLast = fclass; - this.fdataLast = fdata; - this.filterLast = this.factories[fclass].fromSelfie(fdata); - } - return this.filterLast; + //console.log(JSON.stringify(Array.from(filterClassHistogram))); + //this.tokenHistogram = new Map(Array.from(this.tokenHistogram).sort(function(a, b) { + // return a[0].localeCompare(b[0]) || (b[1] - a[1]); + //})); }; /******************************************************************************/ @@ -1699,19 +1876,12 @@ FilterContainer.prototype.toSelfie = function() { var categoryToSelfie = function(map) { var selfie = [], iterator = map.entries(), - entry, bucket, ff, f; + entry; for (;;) { entry = iterator.next(); - if ( entry.done ) { break; } - selfie.push('k2\t' + entry.value[0]); - bucket = entry.value[1]; - selfie.push(bucket.fid + '\t' + bucket.toSelfie()); - if ( bucket.fid !== '[]' ) { continue; } - ff = bucket.filters; - for ( var i = 0, ni = ff.length; i < ni; i++ ) { - f = ff[i]; - selfie.push(f.fid + '\t' + f.toSelfie()); - } + if ( entry.done === true ) { break; } + selfie.push('k2\t' + entry.value[0]); // token + selfie.push(entry.value[1].compile()); } return selfie.join('\n'); }; @@ -1722,13 +1892,29 @@ FilterContainer.prototype.toSelfie = function() { entry; for (;;) { entry = iterator.next(); - if ( entry.done ) { break; } - selfie.push('k1\t' + entry.value[0]); + if ( entry.done === true ) { break; } + selfie.push('k1\t' + exportMapKey(entry.value[0])); // key selfie.push(categoryToSelfie(entry.value[1])); } return selfie.join('\n'); }; + var dataFiltersToSelfie = function(dataFilters) { + var selfie = [], + iter = dataFilters.entries(), + entry; + for (;;) { + entry = iter.next(); + if ( entry.done === true ) { break; } + entry = entry.value[1]; + do { + selfie.push(entry.compile()); + entry = entry.next; + } while ( entry !== undefined ); + } + return selfie; + }; + return { processedFilterCount: this.processedFilterCount, acceptedCount: this.acceptedCount, @@ -1736,7 +1922,8 @@ FilterContainer.prototype.toSelfie = function() { allowFilterCount: this.allowFilterCount, blockFilterCount: this.blockFilterCount, discardedCount: this.discardedCount, - categories: categoriesToSelfie(this.categories) + categories: categoriesToSelfie(this.categories), + dataFilters: dataFiltersToSelfie(this.dataFilters) }; }; @@ -1751,113 +1938,35 @@ FilterContainer.prototype.fromSelfie = function(selfie) { this.blockFilterCount = selfie.blockFilterCount; this.discardedCount = selfie.discardedCount; - var catKey, tokenKey; - var map = this.categories, submap; - var bucket = null; - var rawText = selfie.categories; - var rawEnd = rawText.length; - var lineBeg = 0, lineEnd; - var line, pos, what, data, filter; - while ( lineBeg < rawEnd ) { - lineEnd = rawText.indexOf('\n', lineBeg); - if ( lineEnd < 0 ) { - lineEnd = rawEnd; - } - line = rawText.slice(lineBeg, lineEnd); - lineBeg = lineEnd + 1; - pos = line.indexOf('\t'); - what = line.slice(0, pos); - data = line.slice(pos + 1); - if ( what === 'k1' ) { - catKey = data; + var catKey, tokenKey, + map = this.categories, submap, + lineIter = new µb.LineIterator(selfie.categories), + line; + while ( lineIter.eot() === false ) { + line = lineIter.next(); + if ( line.startsWith('k1\t') ) { + catKey = importMapKey(line.slice(3)); submap = new Map(); map.set(catKey, submap); - bucket = null; continue; } - if ( what === 'k2' ) { - tokenKey = data; - bucket = null; + if ( line.startsWith('k2\t') ) { + tokenKey = line.slice(3); continue; } - filter = this.filterFromSelfie(what, data); - if ( bucket === null ) { - bucket = filter; - submap.set(tokenKey, bucket); - continue; - } - // When token key is reused, it can't be anything - // else than FilterBucket - bucket.add(filter); - } -}; - -/******************************************************************************/ - -FilterContainer.prototype.getFilterClass = function(details) { - var s = details.f; - - if ( details.domainOpt.length !== 0 ) { - if ( details.isRegex ) { - return FilterRegexHostname; - } - if ( this.reIsGeneric.test(s) || details.token === '*' ) { - if ( details.hostnameAnchored ) { - return FilterGenericHnAnchoredHostname; - } - return FilterGenericHostname; - } - if ( details.anchor < 0 ) { - return FilterPlainLeftAnchoredHostname; - } - if ( details.anchor > 0 ) { - // https://github.com/gorhill/uBlock/issues/1669 - if ( details.hostnameAnchored ) { - return FilterGenericHnAnchoredHostname; - } - return FilterPlainRightAnchoredHostname; - } - if ( details.hostnameAnchored ) { - return FilterPlainHnAnchoredHostname; - } - if ( details.tokenBeg === 0 ) { - return FilterPlainPrefix0Hostname; - } - if ( details.tokenBeg === 1 ) { - return FilterPlainPrefix1Hostname; - } - return FilterPlainHostname; + submap.set(tokenKey, filterFromCompiledData(line)); } - if ( details.isRegex ) { - return FilterRegex; - } - if ( this.reIsGeneric.test(s) || details.token === '*' ) { - if ( details.hostnameAnchored ) { - return FilterGenericHnAnchored; + var i = selfie.dataFilters.length, + entry, bucket; + while ( i-- ) { + entry = FilterDataHolderEntry.load(selfie.dataFilters[i]); + bucket = this.dataFilters.get(entry.token); + if ( bucket !== undefined ) { + entry.next = bucket; } - return FilterGeneric; + this.dataFilters.set(entry.token, entry); } - if ( details.anchor < 0 ) { - return FilterPlainLeftAnchored; - } - if ( details.anchor > 0 ) { - // https://github.com/gorhill/uBlock/issues/1669 - if ( details.hostnameAnchored ) { - return FilterGenericHnAnchored; - } - return FilterPlainRightAnchored; - } - if ( details.hostnameAnchored ) { - return FilterPlainHnAnchored; - } - if ( details.tokenBeg === 0 ) { - return FilterPlainPrefix0; - } - if ( details.tokenBeg === 1 ) { - return FilterPlainPrefix1; - } - return FilterPlain; }; /******************************************************************************/ @@ -1887,14 +1996,57 @@ FilterContainer.prototype.compile = function(raw, out) { // Pure hostnames, use more efficient dictionary lookup // https://github.com/chrisaljoudi/uBlock/issues/665 // Create a dict keyed on request type etc. - if ( parsed.hostnamePure && this.compileHostnameOnlyFilter(parsed, out) ) { + if ( + parsed.hostnamePure && + parsed.domainOpt === '' && + parsed.dataType === undefined && + this.compileHostnameOnlyFilter(parsed, out) + ) { return true; } - var r = this.compileFilter(parsed, out); - if ( r === false ) { - return false; + parsed.makeToken(); + + var fdata = ''; + if ( parsed.dataType !== undefined ) { + if ( fdata !== '' ) { fdata += '\v'; } + fdata += FilterDataHolder.compile(parsed); } + if ( parsed.domainOpt !== '' ) { + if ( fdata !== '' ) { fdata += '\v'; } + fdata += FilterOrigin.compile(parsed); + } + if ( fdata !== '' ) { fdata += '\v'; } + if ( parsed.isRegex ) { + fdata += FilterRegex.compile(parsed); + } else if ( parsed.hostnamePure ) { + fdata += FilterPlainHostname.compile(parsed); + } else if ( parsed.f === '*' ) { + fdata += FilterTrue.compile(); + } else if ( parsed.anchor === 0x5 ) { + // https://github.com/gorhill/uBlock/issues/1669 + fdata += FilterGenericHnAndRightAnchored.compile(parsed); + } else if ( this.reIsGeneric.test(parsed.f) || parsed.token === '*' ) { + if ( parsed.anchor === 0x4 ) { + fdata += FilterGenericHnAnchored.compile(parsed); + } else { + fdata += FilterGeneric.compile(parsed); + } + } else if ( parsed.anchor === 0x4 ) { + fdata += FilterPlainHnAnchored.compile(parsed); + } else if ( parsed.anchor === 0x2 ) { + fdata += FilterPlainLeftAnchored.compile(parsed); + } else if ( parsed.anchor === 0x1 ) { + fdata += FilterPlainRightAnchored.compile(parsed); + } else if ( parsed.tokenBeg === 0 ) { + fdata += FilterPlainPrefix0.compile(parsed); + } else if ( parsed.tokenBeg === 1 ) { + fdata += FilterPlainPrefix1.compile(parsed); + } else { + fdata += FilterPlain.compile(parsed); + } + + this.compileToAtomicFilter(fdata, parsed, out); return true; }; @@ -1911,20 +2063,14 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) { // return; //} - var route = parsed.badFilter ? 'n-\v' : 'n\v', - party; - if ( parsed.firstParty === parsed.thirdParty ) { - party = AnyParty; - } else { - party = parsed.firstParty ? FirstParty : ThirdParty; - } - var keyShard = parsed.action | parsed.important | party; + var route = parsed.badFilter ? 0x01 : 0x00, + keyShard = parsed.action | parsed.important | parsed.party; var type = parsed.types; if ( type === 0 ) { out.push( - route + - toHex(keyShard) + '\v' + + route, + exportMapKey(bitsToMapKey(keyShard)) + '\v' + '.\v' + parsed.f ); @@ -1935,8 +2081,8 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) { do { if ( type & 1 ) { out.push( - route + - toHex(keyShard | (bitOffset << 4)) + '\v' + + route, + exportMapKey(bitsToMapKey(keyShard | (bitOffset << 4))) + '\v' + '.\v' + parsed.f ); @@ -1949,35 +2095,16 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) { /******************************************************************************/ -FilterContainer.prototype.compileFilter = function(parsed, out) { - parsed.makeToken(); - - var party = AnyParty; - if ( parsed.firstParty !== parsed.thirdParty ) { - party = parsed.firstParty ? FirstParty : ThirdParty; - } - - var filterClass = this.getFilterClass(parsed); - if ( filterClass === null ) { - return false; - } - this.compileToAtomicFilter(filterClass, parsed, party, out); - return true; -}; - -/******************************************************************************/ - -FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, party, out) { - var route = parsed.badFilter ? 'n-\v' : 'n\v', - bits = parsed.action | parsed.important | party, +FilterContainer.prototype.compileToAtomicFilter = function(fdata, parsed, out) { + var route = parsed.badFilter ? 0x01 : 0x00, + bits = parsed.action | parsed.important | parsed.party, type = parsed.types; if ( type === 0 ) { out.push( - route + - toHex(bits) + '\v' + + route, + exportMapKey(bitsToMapKey(bits)) + '\v' + parsed.token + '\v' + - filterClass.fid + '\v' + - filterClass.compile(parsed) + fdata ); return; } @@ -1985,11 +2112,10 @@ FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, do { if ( type & 1 ) { out.push( - route + - toHex(bits | (bitOffset << 4)) + '\v' + + route, + exportMapKey(bitsToMapKey(bits | (bitOffset << 4))) + '\v' + parsed.token + '\v' + - filterClass.fid + '\v' + - filterClass.compile(parsed) + fdata ); } bitOffset += 1; @@ -2012,44 +2138,66 @@ FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, } var i = redirects.length; while ( i-- ) { - out.push('n\v\v\v=>\v' + redirects[i]); + out.push(0, '\v\v=>\t' + redirects[i]); } }; /******************************************************************************/ FilterContainer.prototype.fromCompiledContent = function(lineIter) { - var line, hash, token, fclass, fdata, + var line, lineBits, hash, token, fdata, bucket, entry, filter, - fieldIter = new µb.FieldIterator('\v'); + fieldIter = new µb.FieldIterator('\v'), + dataFilterFid = FilterDataHolder.fidPrefix, + buckerFilterFid = FilterBucket.fidPrefix, + aCharCode = 'a'.charCodeAt(0); while ( lineIter.eot() === false ) { - line = lineIter.next(); - if ( line.charCodeAt(0) !== 0x6E /* 'n' */ ) { - lineIter.rewind(); + lineBits = lineIter.charCodeAt(0) - aCharCode; + if ( (lineBits & 0x04) !== 0 ) { return; } - - if ( fieldIter.first(line) === 'n-' ) { + line = lineIter.next(1); + if ( (lineBits & 0x02) !== 0 ) { + line = decodeURIComponent(line); + } + if ( (lineBits & 0x01) !== 0 ) { this.badFilters.add(line); continue; } - hash = fieldIter.next(); + hash = importMapKey(fieldIter.first(line)); token = fieldIter.next(); - fclass = fieldIter.next(); - fdata = fieldIter.next(); + fdata = fieldIter.remainder(); // Special cases: delegate to more specialized engines. // Redirect engine. - if ( fclass === '=>' ) { - µb.redirectEngine.fromCompiledRule(fdata); + if ( fdata.startsWith('=>\t') ) { + µb.redirectEngine.fromCompiledRule(fdata.slice(3)); continue; } // Plain static filters. this.acceptedCount += 1; + // Special treatment: data-holding filters are stored separately + // because they require special matching algorithm (unlike other + // filters, ALL hits must be reported). + if ( fdata.startsWith(dataFilterFid) ) { + if ( this.duplicateBuster.has(line) ) { + this.discardedCount += 1; + continue; + } + this.duplicateBuster.add(line); + entry = new FilterDataHolderEntry(hash, token, fdata); + bucket = this.dataFilters.get(token); + if ( bucket !== undefined ) { + entry.next = bucket; + } + this.dataFilters.set(token, entry); + continue; + } + bucket = this.categories.get(hash); if ( bucket === undefined ) { bucket = new Map(); @@ -2062,8 +2210,7 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) { entry = new FilterHostnameDict(); bucket.set('.', entry); } - // 'fclass' is hostname - if ( entry.add(fclass) === false ) { + if ( entry.add(fdata) === false ) { this.discardedCount += 1; } continue; @@ -2075,12 +2222,14 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) { } this.duplicateBuster.add(line); - filter = this.filterFromSelfie(fclass, fdata); + //this.tokenHistogram.set(token, (this.tokenHistogram.get(token) || 0) + 1); + + filter = filterFromCompiledData(fdata); if ( entry === undefined ) { bucket.set(token, filter); continue; } - if ( entry.fid === '[]' ) { + if ( entry.fidPrefix === buckerFilterFid ) { entry.add(filter); continue; } @@ -2088,36 +2237,37 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) { } }; +//FilterContainer.prototype.tokenHistogram = new Map(); + /******************************************************************************/ FilterContainer.prototype.removeBadFilters = function() { var lines = µb.setToArray(this.badFilters), fieldIter = new µb.FieldIterator('\v'), - hash, token, fclass, fdata, bucket, entry, + hash, token, fdata, bucket, entry, i = lines.length; while ( i-- ) { fieldIter.first(lines[i]); - hash = fieldIter.next(); - token = fieldIter.next(); - fclass = fieldIter.next(); - fdata = fieldIter.next(); + hash = importMapKey(fieldIter.next()); bucket = this.categories.get(hash); if ( bucket === undefined ) { continue; } + token = fieldIter.next(); entry = bucket.get(token); if ( entry === undefined ) { continue; } + fdata = fieldIter.remainder(); if ( entry instanceof FilterBucket ) { - entry.remove(fclass, fdata); + entry.remove(fdata); if ( entry.filters.length === 1 ) { bucket.set(token, entry.filters[0]); } continue; } if ( entry instanceof FilterHostnameDict ) { - entry.remove(fclass); // 'fclass' is hostname + entry.remove(fdata); if ( entry.size === 0 ) { bucket.delete(token); if ( bucket.size === 0 ) { @@ -2126,7 +2276,7 @@ FilterContainer.prototype.removeBadFilters = function() { } continue; } - if ( entry.fid === fclass && entry.toSelfie() === fdata ) { + if ( entry.compile() === fdata ) { bucket.delete(token); if ( bucket.size === 0 ) { this.categories.delete(hash); @@ -2138,138 +2288,114 @@ FilterContainer.prototype.removeBadFilters = function() { /******************************************************************************/ -FilterContainer.prototype.filterStringFromCompiled = function(compiled) { - var opts = []; - var vfields = compiled.split('\v'); - var filter = ''; - var bits = parseInt(vfields[0], 16) | 0; +FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) { + if ( this.dataFilters.length === 0 ) { return; } - if ( bits & 0x01 ) { - filter += '@@'; + var url = this.urlTokenizer.setURL(requestURL); + + requestHostnameRegister = µb.URI.hostnameFromURI(url); + + // We need to visit ALL the matching filters. + var toAddImportant = new Map(), + toAdd = new Map(), + toRemove = new Map(); + + var entry, f, + tokens = this.urlTokenizer.getTokens(), + tokenEntry, token, + i = 0; + while ( i < 16 ) { + tokenEntry = tokens[i++]; + token = tokenEntry.token; + if ( !token ) { break; } + entry = this.dataFilters.get(token); + while ( entry !== undefined ) { + f = entry.filter; + if ( f.match(url, tokenEntry.beg) === true ) { + if ( entry.keyBits & 0x001 ) { + toRemove.set(f.dataStr, entry); + } else if ( entry.keyBits & 0x002 ) { + toAddImportant.set(f.dataStr, entry); + } else { + toAdd.set(f.dataStr, entry); + } + } + entry = entry.next; + } + } + entry = this.dataFilters.get('*'); + while ( entry !== undefined ) { + f = entry.filter; + if ( f.match(url, tokenEntry.beg) === true ) { + if ( entry.keyBits & 0x001 ) { + toRemove.set(f.dataStr, entry); + } else if ( entry.keyBits & 0x002 ) { + toAddImportant.set(f.dataStr, entry); + } else { + toAdd.set(f.dataStr, entry); + } + } + entry = entry.next; } - var rfid = vfields[1] === '.' ? '.' : vfields[2]; - var tfields = rfid !== '.' ? vfields[3].split('\t') : []; + if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; } - switch ( rfid ) { - case '.': - filter += '||' + vfields[2] + '^'; - break; - case 'a': - case 'ah': - case '0a': - case '0ah': - case '1a': - case '1ah': - case '_': - case '_h': - filter += tfields[0]; - break; - case '|a': - case '|ah': - filter += '|' + tfields[0]; - break; - case 'a|': - case 'a|h': - filter += tfields[0] + '|'; - break; - case '||a': - case '||ah': - case '||_': - case '||_h': - filter += '||' + tfields[0]; - break; - case '//': - case '//h': - filter += '/' + tfields[0] + '/'; - break; - default: - break; + // Remove entries overriden by other filters. + var iter = toAddImportant.entries(), + k; + for (;;) { + entry = iter.next(); + if ( entry.done === true ) { break; } + k = entry.value[0]; + toAdd.delete(k); + toRemove.delete(k); + } + iter = toRemove.entries(); + for (;;) { + entry = iter.next(); + if ( entry.done === true ) { break; } + k = entry.value[0]; + if ( k === '' ) { + toAdd.clear(); + break; + } + toAdd.delete(k); } - // Domain option? - switch ( rfid ) { - case '0ah': - case '1ah': - case '|ah': - case 'a|h': - case '||ah': - case '||_h': - case '//h': - opts.push('domain=' + tfields[1]); - break; - case 'ah': - case '_h': - opts.push('domain=' + tfields[2]); - break; - default: - break; + var logData; + iter = toAddImportant.entries(); + for (;;) { + entry = iter.next(); + if ( entry.done === true ) { break; } + out.push(entry.value[0]); + if ( outlog === undefined ) { continue; } + logData = entry.value[1].logData(); + logData.source = 'static'; + logData.result = 1; + outlog.push(logData); } - - // Filter options - if ( bits & 0x02 ) { - opts.push('important'); + iter = toAdd.entries(); + for (;;) { + entry = iter.next(); + if ( entry.done === true ) { break; } + out.push(entry.value[0]); + if ( outlog === undefined ) { continue; } + logData = entry.value[1].logData(); + logData.source = 'static'; + logData.result = 1; + outlog.push(logData); } - if ( bits & 0x08 ) { - opts.push('third-party'); - } else if ( bits & 0x04 ) { - opts.push('first-party'); + if ( outlog !== undefined ) { + iter = toRemove.entries(); + for (;;) { + entry = iter.next(); + if ( entry.done === true ) { break; } + logData = entry.value[1].logData(); + logData.source = 'static'; + logData.result = 2; + outlog.push(logData); + } } - if ( bits & 0xF0 ) { - opts.push(typeValueToTypeName[bits >>> 4]); - } - if ( opts.length !== 0 ) { - filter += '$' + opts.join(','); - } - - return filter; -}; - -/******************************************************************************/ - -FilterContainer.prototype.filterRegexFromCompiled = function(compiled, flags) { - var vfields = compiled.split('\v'); - var rfid = vfields[1] === '.' ? '.' : vfields[2]; - var tfields = rfid !== '.' ? vfields[3].split('\t') : []; - var re = null; - - switch ( rfid ) { - case '.': - re = strToRegex(vfields[2], 0, flags); - break; - case 'a': - case 'ah': - case '0a': - case '0ah': - case '1a': - case '1ah': - case '_': - case '_h': - re = strToRegex(tfields[0], 0, flags); - break; - case '||a': - case '||ah': - case '||_': - case '||_h': - re = strToRegex('||' + tfields[0], 0, flags); - break; - case '|a': - case '|ah': - re = strToRegex(tfields[0], -1, flags); - break; - case 'a|': - case 'a|h': - re = strToRegex(tfields[0], 1, flags); - break; - case '//': - case '//h': - re = new RegExp(tfields[0]); - break; - default: - break; - } - - return re; }; /******************************************************************************/ @@ -2286,9 +2412,9 @@ FilterContainer.prototype.matchTokens = function(bucket, url) { return true; } - var tokens = this.urlTokenizer.getTokens(); - var tokenEntry, token; - var i = 0; + var tokens = this.urlTokenizer.getTokens(), + tokenEntry, token, + i = 0; for (;;) { tokenEntry = tokens[i++]; token = tokenEntry.token; @@ -2301,7 +2427,7 @@ FilterContainer.prototype.matchTokens = function(bucket, url) { } } - // Regex-based filters + // Untokenizable filters f = bucket.get('*'); if ( f !== undefined && f.match(url) ) { this.tokenRegister = '*'; @@ -2330,20 +2456,20 @@ FilterContainer.prototype.matchStringGenericHide = function(context, requestURL) // Important: this is used by FilterHostnameDict.match(). requestHostnameRegister = µb.URI.hostnameFromURI(url); - var bucket = this.categories.get(toHex(genericHideException)); + var bucket = this.categories.get(bitsToMapKey(genericHideException)); if ( !bucket || this.matchTokens(bucket, url) === false ) { this.fRegister = null; - return; + return 0; } - bucket = this.categories.get(toHex(genericHideImportant)); + bucket = this.categories.get(bitsToMapKey(genericHideImportant)); if ( bucket && this.matchTokens(bucket, url) ) { this.keyRegister = genericHideImportant; - return true; + return 1; } this.keyRegister = genericHideException; - return false; + return 2; }; /******************************************************************************/ @@ -2353,14 +2479,13 @@ FilterContainer.prototype.matchStringGenericHide = function(context, requestURL) // not the generic handling. FilterContainer.prototype.matchStringExactType = function(context, requestURL, requestType) { - // Special case. + // Special cases. if ( requestType === 'generichide' ) { return this.matchStringGenericHide(context, requestURL); } - // Be prepared to support unknown types var type = typeNameToTypeValue[requestType]; if ( type === undefined ) { - return undefined; + return 0; } // Prime tokenizer: we get a normalized URL in return. @@ -2379,30 +2504,30 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r // https://github.com/chrisaljoudi/uBlock/issues/139 // Test against important block filters key = BlockAnyParty | Important | type; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return true; + return 1; } } key = BlockAction | Important | type | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return true; + return 1; } } // Test against block filters key = BlockAnyParty | type; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAction | type | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } @@ -2411,26 +2536,26 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r // If there is no block filter, no need to test against allow filters if ( this.fRegister === null ) { - return; + return 0; } // Test against allow filters key = AllowAnyParty | type; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return false; + return 2; } } key = AllowAction | type | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return false; + return 2; } } - return true; + return 1; }; /******************************************************************************/ @@ -2441,8 +2566,8 @@ FilterContainer.prototype.matchString = function(context) { // Also, be prepared to support unknown types var type = typeNameToTypeValue[context.requestType]; if ( type === undefined ) { - type = typeOtherValue; - } else if ( type === 0 || type > typeOtherValue ) { + type = otherTypeBitValue; + } else if ( type === 0 || type > otherTypeBitValue ) { return this.matchStringExactType(context, context.requestURL, context.requestType); } @@ -2476,9 +2601,11 @@ FilterContainer.prototype.matchString = function(context) { this.fRegister = null; - var party = isFirstParty(context.pageDomain, context.requestHostname) ? FirstParty : ThirdParty; - var categories = this.categories; - var key, bucket; + var party = isFirstParty(context.pageDomain, context.requestHostname) + ? FirstParty + : ThirdParty; + var categories = this.categories, + key, bucket; // https://github.com/chrisaljoudi/uBlock/issues/139 // Test against important block filters. @@ -2486,58 +2613,58 @@ FilterContainer.prototype.matchString = function(context) { // evaluation. Normally, it is "evaluate block then evaluate allow", with // the `important` property it is "evaluate allow then evaluate block". key = BlockAnyTypeAnyParty | Important; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return true; + return 1; } } key = BlockAnyType | Important | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return true; + return 1; } } key = BlockAnyParty | Important | type; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return true; + return 1; } } key = BlockAction | Important | type | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return true; + return 1; } } // Test against block filters key = BlockAnyTypeAnyParty; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAnyType | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAnyParty | type; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAction | type | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } @@ -2548,63 +2675,51 @@ FilterContainer.prototype.matchString = function(context) { // If there is no block filter, no need to test against allow filters if ( this.fRegister === null ) { - return; + return 0; } // Test against allow filters key = AllowAnyTypeAnyParty; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return false; + return 2; } } key = AllowAnyType | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return false; + return 2; } } key = AllowAnyParty | type; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return false; + return 2; } } key = AllowAction | type | party; - if ( (bucket = categories.get(toHex(key))) ) { + if ( (bucket = categories.get(bitsToMapKey(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; - return false; + return 2; } } - return true; + return 1; }; /******************************************************************************/ -// The `verbose` argment tells whether to return a short or long version of -// the filter string. Typically, if the logger is not enabled, there is no -// point in returning the long version: this saves overhead. - -FilterContainer.prototype.toResultString = function(verbose) { - if ( this.fRegister === null ) { - return ''; - } - var s = this.keyRegister & 0x01 ? 'sa:' : 'sb:'; - if ( !verbose ) { - return s; - } - s += toHex(this.keyRegister) + '\v' + this.tokenRegister + '\v'; - if ( this.tokenRegister === '.' ) { - s += this.fRegister.rtCompile(); - } else { - s += this.fRegister.rtfid + '\v' + this.fRegister.rtCompile(); - } - return s; +FilterContainer.prototype.toLogData = function() { + if ( this.fRegister === null ) { return; } + var logData = toLogDataInternal(this.keyRegister, this.tokenRegister, this.fRegister); + logData.source = 'static'; + logData.token = this.tokenRegister; + logData.result = this.fRegister === null ? 0 : (this.keyRegister & 1 ? 2 : 1); + return logData; }; /******************************************************************************/ diff --git a/src/js/storage.js b/src/js/storage.js index 4e1440077..fc1f538e3 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -680,6 +680,9 @@ µb.assets.get(assetKey, onRawListLoaded); return; } + if ( /[^\x00-\x7F]/.test(details.content) ) { + console.log(assetKey, 'has Unicode characters'); + } details.assetKey = assetKey; callback(details); }; @@ -727,7 +730,7 @@ /******************************************************************************/ µBlock.compileFilters = function(rawText) { - var compiledFilters = []; + var compiledFilters = new this.CompiledOutput(); // Useful references: // https://adblockplus.org/en/filter-cheatsheet @@ -793,7 +796,7 @@ staticNetFilteringEngine.compile(line, compiledFilters); } - return compiledFilters.join('\n'); + return compiledFilters.toString(); }; /******************************************************************************/ diff --git a/src/js/tab.js b/src/js/tab.js index 9b9976d62..7fb77d346 100644 --- a/src/js/tab.js +++ b/src/js/tab.js @@ -525,7 +525,8 @@ vAPI.tabs.onClosed = function(tabId) { vAPI.tabs.onPopupUpdated = (function() { // The same context object will be reused everytime. This also allows to // remember whether a popup or popunder was matched. - var context = {}; + var context = {}, + logData; // https://github.com/gorhill/uBlock/commit/1d448b85b2931412508aa01bf899e0b6f0033626#commitcomment-14944764 // See if two URLs are different, disregarding scheme -- because the scheme @@ -549,8 +550,9 @@ vAPI.tabs.onPopupUpdated = (function() { }; var popupMatch = function(openerURL, targetURL, clickedURL, popupType) { - var openerHostname = µb.URI.hostnameFromURI(openerURL); - var openerDomain = µb.URI.domainFromHostname(openerHostname); + var openerHostname = µb.URI.hostnameFromURI(openerURL), + openerDomain = µb.URI.domainFromHostname(openerHostname), + result; context.pageHostname = openerHostname; context.pageDomain = openerDomain; @@ -579,90 +581,90 @@ vAPI.tabs.onPopupUpdated = (function() { // URL. if ( openerHostname !== '' && targetURL !== 'about:blank' ) { // Check per-site switch first - if ( µb.hnSwitches.evaluateZ('no-popups', openerHostname) ) { - if ( typeof clickedURL === 'string' && areDifferentURLs(targetURL, clickedURL) ) { - return 'ub:no-popups: ' + µb.hnSwitches.z + ' true'; + if ( µb.hnSwitches.evaluateZ('no-popups', openerHostname) === true ) { + if ( + typeof clickedURL === 'string' && + areDifferentURLs(targetURL, clickedURL) + ) { + logData = { + source: 'switch', + raw: 'no-popups: ' + µb.hnSwitches.z + ' true' + }; + return 1; } } // https://github.com/gorhill/uBlock/issues/581 // Take into account popup-specific rules in dynamic URL filtering, OR // generic allow rules. - µb.sessionURLFiltering.evaluateZ(openerHostname, targetURL, popupType); + result = µb.sessionURLFiltering.evaluateZ(openerHostname, targetURL, popupType); if ( - µb.sessionURLFiltering.r === 1 && µb.sessionURLFiltering.type === popupType || - µb.sessionURLFiltering.r === 2 + result === 1 && µb.sessionURLFiltering.type === popupType || + result === 2 ) { - return µb.sessionURLFiltering.toFilterString(); + logData = µb.sessionURLFiltering.toLogData(); + return result; } // https://github.com/gorhill/uBlock/issues/581 // Take into account `allow` rules in dynamic filtering: `block` rules // are ignored, as block rules are not meant to block specific types // like `popup` (just like with static filters). - µb.sessionFirewall.evaluateCellZY(openerHostname, context.requestHostname, popupType); - if ( µb.sessionFirewall.r === 2 ) { - return µb.sessionFirewall.toFilterString(); + result = µb.sessionFirewall.evaluateCellZY(openerHostname, context.requestHostname, popupType); + if ( result === 2 ) { + logData = µb.sessionFirewall.toLogData(); + return 2; } } // https://github.com/chrisaljoudi/uBlock/issues/323 // https://github.com/chrisaljoudi/uBlock/issues/1142 // Don't block if uBlock is turned off in popup's context - var snfe = µb.staticNetFilteringEngine; - if ( - µb.getNetFilteringSwitch(targetURL) && - snfe.matchStringExactType(context, targetURL, popupType) !== undefined - ) { - return snfe.toResultString(µb.logger.isEnabled()); + if ( µb.getNetFilteringSwitch(targetURL) ) { + result = µb.staticNetFilteringEngine.matchStringExactType( + context, + targetURL, + popupType + ); + if ( result !== 0 ) { + logData = µb.staticNetFilteringEngine.toLogData(); + return result; + } } - return ''; + return 0; }; var mapPopunderResult = function(popunderURL, popunderHostname, result) { - if ( result.startsWith('sb:') === false ) { - return ''; - } - var snfe = µb.staticNetFilteringEngine; - var token = snfe.tokenRegister; - if ( token === '*' ) { - return ''; - } - if ( token === '.' ) { - return result; - } - result = snfe.toResultString(true); - var re = snfe.filterRegexFromCompiled(result.slice(3)); - if ( re === null ) { - return ''; - } - var matches = re.exec(popunderURL); - if ( matches === null ) { - return ''; + if ( + logData === undefined || + logData.source !== 'static' || + logData.token === '*' + ) { + return 0; } + if ( logData.token === '.' ) { return result; } + var re = new RegExp(logData.regex), + matches = re.exec(popunderURL); + if ( matches === null ) { return ''; } var beg = matches.index, end = beg + matches[0].length, pos = popunderURL.indexOf(popunderHostname); - if ( pos === -1 ) { - return ''; - } + if ( pos === -1 ) { return ''; } // https://github.com/gorhill/uBlock/issues/1471 // We test whether the opener hostname as at least one character // within matched portion of URL. // https://github.com/gorhill/uBlock/issues/1903 // Ignore filters which cause a match before the start of the // hostname in the URL. - return beg >= pos && - beg < pos + popunderHostname.length && - end > pos ? - result : - ''; + return beg >= pos && beg < pos + popunderHostname.length && end > pos + ? result + : 0; }; var popunderMatch = function(openerURL, targetURL) { var result = popupMatch(targetURL, openerURL, null, 'popunder'); - if ( µb.isBlockResult(result) ) { + if ( result === 1 ) { return result; } // https://github.com/gorhill/uBlock/issues/1010#issuecomment-186824878 @@ -671,24 +673,24 @@ vAPI.tabs.onPopupUpdated = (function() { // a broad one, we will consider the opener tab to be a popunder tab. // For now, a "broad" filter is one which does not touch any part of // the hostname part of the opener URL. - var popunderURL = openerURL; - var popunderHostname = µb.URI.hostnameFromURI(popunderURL); + var popunderURL = openerURL, + popunderHostname = µb.URI.hostnameFromURI(popunderURL); if ( popunderHostname === '' ) { - return ''; + return 0; } result = mapPopunderResult( popunderURL, popunderHostname, popupMatch(targetURL, popunderURL, null, 'popup') ); - if ( result !== '' ) { + if ( result !== 0 ) { return result; } // https://github.com/gorhill/uBlock/issues/1598 // Try to find a match against origin part of the opener URL. popunderURL = µb.URI.originFromURI(popunderURL); if ( popunderURL === '' ) { - return ''; + return 0; } return mapPopunderResult( popunderURL, @@ -731,13 +733,13 @@ vAPI.tabs.onPopupUpdated = (function() { } // Popup test. - var popupType = 'popup'; - var result = popupMatch(openerURL, targetURL, µb.mouseURL, 'popup'); + var popupType = 'popup', + result = popupMatch(openerURL, targetURL, µb.mouseURL, 'popup'); // Popunder test. - if ( result === '' ) { + if ( result === 0 ) { result = popunderMatch(openerURL, targetURL); - if ( µb.isBlockResult(result) ) { + if ( result === 1 ) { popupType = 'popunder'; } } @@ -747,7 +749,7 @@ vAPI.tabs.onPopupUpdated = (function() { µb.logger.writeOne( popupType === 'popup' ? openerTabId : targetTabId, 'net', - result, + logData, popupType, popupType === 'popup' ? targetURL : openerURL, µb.URI.hostnameFromURI(context.rootURL), @@ -756,7 +758,7 @@ vAPI.tabs.onPopupUpdated = (function() { } // Not blocked - if ( µb.isAllowResult(result) ) { + if ( result !== 1 ) { return; } diff --git a/src/js/traffic.js b/src/js/traffic.js index fb9bdb53c..af28762ca 100644 --- a/src/js/traffic.js +++ b/src/js/traffic.js @@ -116,7 +116,9 @@ var onBeforeRequest = function(details) { var isFrame = requestType === 'sub_frame'; // https://github.com/chrisaljoudi/uBlock/issues/114 - var requestContext = pageStore.createContextFromFrameId(isFrame ? details.parentFrameId : details.frameId); + var requestContext = pageStore.createContextFromFrameId( + isFrame ? details.parentFrameId : details.frameId + ); // Setup context and evaluate var requestURL = details.url; @@ -132,7 +134,7 @@ var onBeforeRequest = function(details) { µb.logger.writeOne( tabId, 'net', - result, + pageStore.logData, requestType, requestURL, requestContext.rootHostname, @@ -141,7 +143,7 @@ var onBeforeRequest = function(details) { } // Not blocked - if ( µb.isAllowResult(result) ) { + if ( result !== 1 ) { // https://github.com/chrisaljoudi/uBlock/issues/114 if ( details.parentFrameId !== -1 && isFrame ) { pageStore.setFrame(details.frameId, requestURL); @@ -162,7 +164,7 @@ var onBeforeRequest = function(details) { µb.logger.writeOne( tabId, 'redirect', - 'rr:' + µb.redirectEngine.resourceNameRegister, + { source: 'redirect', raw: µb.redirectEngine.resourceNameRegister }, requestType, requestURL, requestContext.rootHostname, @@ -193,8 +195,7 @@ var onBeforeRootFrameRequest = function(details) { // behind-the-scene var µburi = µb.URI, requestHostname = µburi.hostnameFromURI(requestURL), - requestDomain = µburi.domainFromHostname(requestHostname) || requestHostname, - result = ''; + requestDomain = µburi.domainFromHostname(requestHostname) || requestHostname; var context = { rootHostname: requestHostname, rootDomain: requestDomain, @@ -204,22 +205,31 @@ var onBeforeRootFrameRequest = function(details) { requestHostname: requestHostname, requestType: 'main_frame' }; + var result = 0, + logData, + logEnabled = µb.logger.isEnabled(); // If the site is whitelisted, disregard strict blocking if ( µb.getNetFilteringSwitch(requestURL) === false ) { - result = 'ua:whitelisted'; + result = 2; + if ( logEnabled === true ) { + logData = { engine: 'u', result: 2, raw: 'whitelisted' }; + } } // Permanently unrestricted? - if ( result === '' && µb.hnSwitches.evaluateZ('no-strict-blocking', requestHostname) ) { - result = 'ua:no-strict-blocking: ' + µb.hnSwitches.z + ' true'; + if ( result === 0 && µb.hnSwitches.evaluateZ('no-strict-blocking', requestHostname) ) { + result = 2; + if ( logEnabled === true ) { + logData = { engine: 'u', result: 2, raw: 'no-strict-blocking: ' + µb.hnSwitches.z + ' true' }; + } } // Temporarily whitelisted? - if ( result === '' ) { + if ( result === 0 ) { result = isTemporarilyWhitelisted(result, requestHostname); - if ( result.charAt(1) === 'a' ) { - result = 'ua:no-strict-blocking true (temporary)'; + if ( result === 2 && logEnabled === true ) { + logData = { engine: 'u', result: 2, raw: 'no-strict-blocking true (temporary)' }; } } @@ -227,26 +237,31 @@ var onBeforeRootFrameRequest = function(details) { var snfe = µb.staticNetFilteringEngine; // Check for specific block - if ( - result === '' && - snfe.matchStringExactType(context, requestURL, 'main_frame') !== undefined - ) { - result = snfe.toResultString(true); + if ( result === 0 ) { + result = snfe.matchStringExactType(context, requestURL, 'main_frame'); + if ( result !== 0 && logEnabled === true ) { + logData = snfe.toLogData(); + } } // Check for generic block - if ( - result === '' && - snfe.matchStringExactType(context, requestURL, 'no_type') !== undefined - ) { - result = snfe.toResultString(true); - // https://github.com/chrisaljoudi/uBlock/issues/1128 - // Do not block if the match begins after the hostname, except when - // the filter is specifically of type `other`. - // https://github.com/gorhill/uBlock/issues/490 - // Removing this for the time being, will need a new, dedicated type. - if ( result.charAt(1) === 'b' ) { - result = toBlockDocResult(requestURL, requestHostname, result); + if ( result === 0 ) { + result = snfe.matchStringExactType(context, requestURL, 'no_type'); + if ( result !== 0 ) { + if ( result === 1 || logEnabled === true ) { + logData = snfe.toLogData(); + } + // https://github.com/chrisaljoudi/uBlock/issues/1128 + // Do not block if the match begins after the hostname, except when + // the filter is specifically of type `other`. + // https://github.com/gorhill/uBlock/issues/490 + // Removing this for the time being, will need a new, dedicated type. + if ( + result === 1 && + toBlockDocResult(requestURL, requestHostname, logData) === false + ) { + result = 0; + } } } @@ -257,11 +272,11 @@ var onBeforeRootFrameRequest = function(details) { pageStore.journalAddRequest(requestHostname, result); } - if ( µb.logger.isEnabled() ) { + if ( logEnabled ) { µb.logger.writeOne( tabId, 'net', - result, + logData, 'main_frame', requestURL, requestHostname, @@ -270,19 +285,19 @@ var onBeforeRootFrameRequest = function(details) { } // Not blocked - if ( µb.isAllowResult(result) ) { - return; - } + if ( result !== 1 ) { return; } - var compiled = result.slice(3); + // No log data means no strict blocking (because we need to report why + // the blocking occurs. + if ( logData === undefined ) { return; } // Blocked var query = btoa(JSON.stringify({ url: requestURL, hn: requestHostname, dn: requestDomain, - fc: compiled, - fs: snfe.filterStringFromCompiled(compiled) + fc: logData.compiled, + fs: logData.raw })); vAPI.tabs.replace(tabId, vAPI.getURL('document-blocked.html?details=') + query); @@ -292,26 +307,23 @@ var onBeforeRootFrameRequest = function(details) { /******************************************************************************/ -var toBlockDocResult = function(url, hostname, result) { - // Make a regex out of the result - var re = µBlock.staticNetFilteringEngine - .filterRegexFromCompiled(result.slice(3), 'gi'); - if ( re === null ) { - return ''; - } - var matches = re.exec(url); - if ( matches === null ) { - return ''; - } +var toBlockDocResult = function(url, hostname, logData) { + if ( typeof logData.regex !== 'string' ) { return; } + var re = new RegExp(logData.regex), + match = re.exec(url.toLowerCase()); + if ( match === null ) { return ''; } // https://github.com/chrisaljoudi/uBlock/issues/1128 // https://github.com/chrisaljoudi/uBlock/issues/1212 // Relax the rule: verify that the match is completely before the path part - if ( re.lastIndex <= url.indexOf(hostname) + hostname.length + 1 ) { - return result; + if ( + (match.index + match.length) <= + (url.indexOf(hostname) + hostname.length + 1) + ) { + return true; } - return ''; + return false; }; /******************************************************************************/ @@ -410,18 +422,20 @@ var onHeadersReceived = function(details) { // Turns out scripts must also be considered as potential embedded // contexts (as workers) and as such we may need to inject content // security policy directives. - if ( requestType === 'script' || requestType === 'main_frame' || requestType === 'sub_frame' ) { - return processCSP(pageStore, details); + if ( requestType === 'main_frame' || requestType === 'sub_frame' ) { + return injectCSP(pageStore, details); } }; /******************************************************************************/ -var processCSP = function(pageStore, details) { +var injectCSP = function(pageStore, details) { var µb = µBlock, tabId = details.tabId, requestURL = details.url, - loggerEnabled = µb.logger.isEnabled(); + loggerEnabled = µb.logger.isEnabled(), + logger = µb.logger, + cspSubsets = []; var context = pageStore.createContextFromPage(); context.requestHostname = µb.URI.hostnameFromURI(requestURL); @@ -429,79 +443,125 @@ var processCSP = function(pageStore, details) { context.pageHostname = context.pageDomain = context.requestHostname; } - var inlineScriptResult, blockInlineScript, - workerResult, blockWorker; - if ( details.type !== 'script' ) { - context.requestType = 'inline-script'; - context.requestURL = requestURL; - inlineScriptResult = pageStore.filterRequestNoCache(context); - blockInlineScript = µb.isBlockResult(inlineScriptResult); - // https://github.com/gorhill/uBlock/issues/2360 - // https://github.com/gorhill/uBlock/issues/2440 - context.requestType = 'script'; - context.requestURL = 'blob:'; - µb.staticNetFilteringEngine.matchString(context); - workerResult = µb.staticNetFilteringEngine.toResultString(loggerEnabled); - blockWorker = µb.isBlockResult(workerResult); + // Start collecting policies >>>>>>>> + + // ======== built-in policies + + context.requestType = 'inline-script'; + context.requestURL = requestURL; + if ( pageStore.filterRequestNoCache(context) === 1 ) { + cspSubsets[0] = "script-src 'unsafe-eval' * blob: data:"; + // https://bugs.chromium.org/p/chromium/issues/detail?id=669086 + // TODO: remove when most users are beyond Chromium v56 + if ( vAPI.chromiumVersion < 57 ) { + cspSubsets[0] += '; frame-src *'; + } } - - µb.staticNetFilteringEngine.matchStringExactType(context, requestURL, 'websocket'); - var websocketResult = µb.staticNetFilteringEngine.toResultString(loggerEnabled), - blockWebsocket = µb.isBlockResult(websocketResult); - - var headersChanged; - if ( blockInlineScript || blockWebsocket || blockWorker ) { - headersChanged = foilWithCSP( - details.responseHeaders, - blockInlineScript, - blockWebsocket, - blockWorker + if ( loggerEnabled === true ) { + logger.writeOne( + tabId, + 'net', + pageStore.logData, + 'inline-script', + requestURL, + context.rootHostname, + context.pageHostname ); } - if ( loggerEnabled && details.type !== 'script' ) { - if ( blockInlineScript !== undefined ) { - µb.logger.writeOne( - tabId, - 'net', - inlineScriptResult, - 'inline-script', - requestURL, - context.rootHostname, - context.pageHostname - ); - } - if ( websocketResult ) { - µb.logger.writeOne( - tabId, - 'net', - websocketResult, - 'websocket', - requestURL, - context.rootHostname, - context.pageHostname - ); - } - if ( workerResult ) { - µb.logger.writeOne( - tabId, - 'net', - workerResult, - 'worker', - requestURL, - context.rootHostname, - context.pageHostname - ); - } - } + // ======== filter-based policies + + // Static filtering. + + var logData = []; + + µb.staticNetFilteringEngine.matchAndFetchData( + 'csp', + requestURL, + cspSubsets, + loggerEnabled === true ? logData : undefined + ); + + // <<<<<<<< All policies have been collected context.dispose(); - if ( headersChanged !== true ) { return; } + // URL filtering `allow` rules override static filtering. + if ( + cspSubsets.length !== 0 && + µb.sessionURLFiltering.evaluateZ(context.rootHostname, requestURL, 'csp') === 2 + ) { + if ( loggerEnabled === true ) { + logger.writeOne( + tabId, + 'net', + µb.sessionURLFiltering.toLogData(), + 'csp', + requestURL, + context.rootHostname, + context.pageHostname + ); + } + return; + } + + // Dynamic filtering rules override static filtering. + if ( + cspSubsets.length !== 0 && + µb.userSettings.advancedUserEnabled && + µb.sessionFirewall.evaluateCellZY(context.rootHostname, context.rootHostname, '*') === 2 + ) { + if ( loggerEnabled === true ) { + logger.writeOne( + tabId, + 'net', + µb.sessionFirewall.toLogData(), + 'csp', + requestURL, + context.rootHostname, + context.pageHostname + ); + } + return; + } + + // Static CSP policies will be applied. + var i = logData.length; + while ( i-- ) { + logger.writeOne( + tabId, + 'net', + logData[i], + 'csp', + requestURL, + context.rootHostname, + context.pageHostname + ); + } + + if ( cspSubsets.length === 0 ) { + return; + } µb.updateBadgeAsync(tabId); - return { 'responseHeaders': details.responseHeaders }; + var csp, headers = details.responseHeaders; + i = headerIndexFromName('content-security-policy', headers); + if ( i !== -1 ) { + csp = headers[i].value.trim(); + headers.splice(i, 1); + } + cspSubsets = cspSubsets.join(', '); + // Use comma to add a new subset to potentially existing one(s). This new + // subset has its own reporting options and won't cause spurious CSP + // reports to outside world. + // Ref.: https://www.w3.org/TR/CSP2/#implementation-considerations + headers.push({ + name: 'Content-Security-Policy', + value: csp === undefined ? cspSubsets : csp + ', ' + cspSubsets + }); + + return { 'responseHeaders': headers }; }; /******************************************************************************/ @@ -518,13 +578,13 @@ var foilLargeMediaElement = function(pageStore, details) { var tabId = details.tabId, size = parseInt(details.responseHeaders[i].value, 10) || 0, result = pageStore.filterLargeMediaElement(size); - if ( result === undefined ) { return; } + if ( result === 0 ) { return; } if ( µb.logger.isEnabled() ) { µb.logger.writeOne( tabId, 'net', - result, + pageStore.logData, details.type, details.url, pageStore.tabHostname, @@ -537,57 +597,6 @@ var foilLargeMediaElement = function(pageStore, details) { /******************************************************************************/ -var foilWithCSP = function(headers, noInlineScript, noWebsocket, noBlobWorker) { - var i = headerIndexFromName('content-security-policy', headers), - cspSubset = []; - - if ( noInlineScript ) { - cspSubset.push("script-src 'unsafe-eval' *"); - } - - if ( noWebsocket ) { - cspSubset.push('connect-src http: https:'); - } - - // https://www.w3.org/TR/CSP2/#directive-child-src - // https://www.w3.org/TR/CSP3/#directive-worker-src - if ( noBlobWorker ) { - cspSubset.push('child-src http: https:'); - } - - // https://bugs.chromium.org/p/chromium/issues/detail?id=513860 - // Bad Chromium bug: web pages can work around CSP directives by - // creating data:- or blob:-based URI. So if we must restrict using CSP, - // we have no choice but to also prevent the creation of nested browsing - // contexts based on data:- or blob:-based URIs. - if ( vAPI.chrome && (noInlineScript || noWebsocket) ) { - // https://w3c.github.io/webappsec-csp/#directive-frame-src - cspSubset.push('frame-src http: https:'); - } - - if ( cspSubset.length === 0 ) { return; } - - var csp; - if ( i !== -1 ) { - csp = headers[i].value.trim(); - headers.splice(i, 1); - } - - // Use comma to add a new subset to potentially existing one(s). This new - // subset has its own reporting options and won't cause spurious CSP - // reports to outside world. - // Ref.: https://www.w3.org/TR/CSP2/#implementation-considerations - cspSubset = cspSubset.join('; '); - headers.push({ - name: 'Content-Security-Policy', - value: csp === undefined ? cspSubset : csp + ', ' + cspSubset - }); - - return true; -}; - -/******************************************************************************/ - // Caller must ensure headerName is normalized to lower case. var headerIndexFromName = function(headerName, headers) { @@ -620,8 +629,7 @@ vAPI.net.onHeadersReceived = { 'main_frame', 'sub_frame', 'image', - 'media', - 'script' + 'media' ], extra: [ 'blocking', 'responseHeaders' ], callback: onHeadersReceived @@ -629,8 +637,6 @@ vAPI.net.onHeadersReceived = { vAPI.net.registerListeners(); -//console.log('traffic.js > Beginning to intercept net requests at %s', (new Date()).toISOString()); - /******************************************************************************/ var isTemporarilyWhitelisted = function(result, hostname) { @@ -640,17 +646,15 @@ var isTemporarilyWhitelisted = function(result, hostname) { obsolete = documentWhitelists[hostname]; if ( obsolete !== undefined ) { if ( obsolete > Date.now() ) { - if ( result === '' ) { - return 'ua:*' + ' ' + hostname + ' doc allow'; + if ( result === 0 ) { + return 2; } } else { delete documentWhitelists[hostname]; } } pos = hostname.indexOf('.'); - if ( pos === -1 ) { - break; - } + if ( pos === -1 ) { break; } hostname = hostname.slice(pos + 1); } return result; diff --git a/src/js/url-net-filtering.js b/src/js/url-net-filtering.js index e5be493cc..cd531f9d0 100644 --- a/src/js/url-net-filtering.js +++ b/src/js/url-net-filtering.js @@ -210,7 +210,7 @@ URLNetFiltering.prototype.removeRule = function(srcHostname, url, type) { URLNetFiltering.prototype.evaluateZ = function(context, target, type) { this.r = 0; if ( this.rules.size === 0 ) { - return this; + return 0; } var entries, pos, i, entry; for (;;) { @@ -222,7 +222,7 @@ URLNetFiltering.prototype.evaluateZ = function(context, target, type) { this.url = entry.url; this.type = type; this.r = entry.action; - return this; + return this.r; } } if ( (entries = this.rules.get(context + ' *')) ) { @@ -232,14 +232,20 @@ URLNetFiltering.prototype.evaluateZ = function(context, target, type) { this.url = entry.url; this.type = '*'; this.r = entry.action; - return this; + return this.r; } } if ( context === '*' ) { break; } pos = context.indexOf('.'); context = pos !== -1 ? context.slice(pos + 1) : '*'; } - return this; + return 0; +}; + +/******************************************************************************/ + +URLNetFiltering.prototype.mustAllowCellZ = function(context, target, type) { + return this.evaluateZ(context, target, type).r === 2; }; /******************************************************************************/ @@ -250,21 +256,30 @@ URLNetFiltering.prototype.mustBlockOrAllow = function() { /******************************************************************************/ -URLNetFiltering.prototype.toFilterString = function() { - if ( this.r === 0 ) { - return ''; - } - var body = this.context + ' ' + this.url + ' ' + this.type; - if ( this.r === 1 ) { - return 'lb:' + body + ' block'; - } - if ( this.r === 2 ) { - return 'la:' + body + ' allow'; - } - /* this.r === 3 */ - return 'ln:' + body + ' noop'; +URLNetFiltering.prototype.toLogData = function() { + if ( this.r === 0 ) { return; } + return { + source: 'dynamicUrl', + result: this.r, + rule: [ + this.context, + this.url, + this.type, + this.intToActionMap.get(this.r) + ], + raw: this.context + ' ' + + this.url + ' ' + + this.type + ' ' + + this.intToActionMap.get(this.r) + }; }; +URLNetFiltering.prototype.intToActionMap = new Map([ + [ 1, ' block' ], + [ 2, ' allow' ], + [ 3, ' noop' ] +]); + /******************************************************************************/ URLNetFiltering.prototype.copyRules = function(other, context, urls, type) { diff --git a/src/js/utils.js b/src/js/utils.js index 8d3814565..8e92f2cc4 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -1,7 +1,7 @@ /******************************************************************************* uBlock Origin - a browser extension to block requests. - Copyright (C) 2014-2016 Raymond Hill + Copyright (C) 2014-2017 Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -150,7 +150,10 @@ this.offset = offset || 0; }; -µBlock.LineIterator.prototype.next = function() { +µBlock.LineIterator.prototype.next = function(offset) { + if ( offset !== undefined ) { + this.offset += offset; + } var lineEnd = this.text.indexOf('\n', this.offset); if ( lineEnd === -1 ) { lineEnd = this.text.indexOf('\r', this.offset); @@ -163,18 +166,8 @@ return line; }; -µBlock.LineIterator.prototype.rewind = function() { - if ( this.offset <= 1 ) { - this.offset = 0; - return; - } - var lineEnd = this.text.lastIndexOf('\n', this.offset - 2); - if ( lineEnd !== -1 ) { - this.offset = lineEnd + 1; - } else { - lineEnd = this.text.lastIndexOf('\r', this.offset - 2); - this.offset = lineEnd !== -1 ? lineEnd + 1 : 0; - } +µBlock.LineIterator.prototype.charCodeAt = function(offset) { + return this.text.charCodeAt(this.offset + offset); }; µBlock.LineIterator.prototype.eot = function() { @@ -209,6 +202,59 @@ return field; }; +µBlock.FieldIterator.prototype.remainder = function() { + return this.text.slice(this.offset); +}; + +/******************************************************************************/ + +µBlock.CompiledOutput = function() { + this.bufferLen = 8192; + this.buffer = new Uint8Array(this.bufferLen); + this.offset = 0; +}; + +µBlock.CompiledOutput.prototype.push = function(lineBits, line) { + var lineLen = line.length, + offset = this.offset, + need = offset + 2 + lineLen; // lineBits, line, \n + if ( need > this.bufferLen ) { + this.grow(need); + } + var buffer = this.buffer; + if ( offset !== 0 ) { + buffer[offset++] = 0x0A /* '\n' */; + } + buffer[offset++] = 0x61 /* 'a' */ + lineBits; + for ( var i = 0, c; i < lineLen; i++ ) { + c = line.charCodeAt(i); + if ( c > 0x7F ) { + return this.push(lineBits | 0x02, encodeURIComponent(line)); + } + buffer[offset++] = c; + } + this.offset = offset; +}; + +µBlock.CompiledOutput.prototype.grow = function(need) { + var newBufferLen = Math.min( + 2097152, + 1 << Math.ceil(Math.log(need) / Math.log(2)) + ); + while ( newBufferLen < need ) { + newBufferLen += 1048576; + } + var newBuffer = new Uint8Array(newBufferLen); + newBuffer.set(this.buffer); + this.buffer = newBuffer; + this.bufferLen = newBufferLen; +}; + +µBlock.CompiledOutput.prototype.toString = function() { + var decoder = new TextDecoder(); + return decoder.decode(new Uint8Array(this.buffer.buffer, 0, this.offset)); +}; + /******************************************************************************/ µBlock.mapToArray = typeof Array.from === 'function'