Minor code review of static parser code

This commit is contained in:
Raymond Hill 2020-06-09 11:58:27 -04:00
parent 410ef862a4
commit bc7f149252
No known key found for this signature in database
GPG key ID: 25E1490B761470C2
3 changed files with 27 additions and 33 deletions

View file

@ -24,7 +24,7 @@
'use strict'; 'use strict';
CodeMirror.defineMode("ubo-static-filtering", function() { CodeMirror.defineMode("ubo-static-filtering", function() {
const parser = new vAPI.StaticFilteringParser(true); const parser = new vAPI.StaticFilteringParser({ interactive: true });
const reDirective = /^!#(?:if|endif|include)\b/; const reDirective = /^!#(?:if|endif|include)\b/;
let parserSlot = 0; let parserSlot = 0;
let netOptionValueMode = false; let netOptionValueMode = false;

View file

@ -19,16 +19,12 @@
Home: https://github.com/gorhill/uBlock Home: https://github.com/gorhill/uBlock
*/ */
/* global punycode */
'use strict'; 'use strict';
/******************************************************************************* /*******************************************************************************
The goal is for the static filtering parser to avoid external The goal is for the static filtering parser to avoid external
dependencies[1] to other code in the project. dependencies to other code in the project.
[1] Except unavoidable ones, such as punycode.
Roughly, this is how things work: each input string (passed to analyze()) Roughly, this is how things work: each input string (passed to analyze())
is decomposed into a minimal set of distinct slices. Each slice is a is decomposed into a minimal set of distinct slices. Each slice is a
@ -76,12 +72,10 @@
/******************************************************************************/ /******************************************************************************/
const Parser = class { const Parser = class {
constructor(interactive = false) { constructor(options = {}) {
this.interactive = interactive; this.interactive = options.interactive === true;
this.raw = ''; this.raw = '';
this.rawEnd = 0;
this.slices = []; this.slices = [];
this.optSlices = [];
this.leftSpaceSpan = new Span(); this.leftSpaceSpan = new Span();
this.exceptionSpan = new Span(); this.exceptionSpan = new Span();
this.patternLeftAnchorSpan = new Span(); this.patternLeftAnchorSpan = new Span();
@ -109,13 +103,13 @@ const Parser = class {
this.extOptionsIterator = new ExtOptionsIterator(this); this.extOptionsIterator = new ExtOptionsIterator(this);
this.maxTokenLength = Number.MAX_SAFE_INTEGER; this.maxTokenLength = Number.MAX_SAFE_INTEGER;
this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/; this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/;
this.reHostname = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/;
this.punycoder = new URL(self.location);
this.reset(); this.reset();
} }
reset() { reset() {
this.rawPos = 0;
this.sliceWritePtr = 0; this.sliceWritePtr = 0;
this.optSliceWritePtr = 0;
this.category = CATNone; this.category = CATNone;
this.allBits = 0; // bits found in any slices this.allBits = 0; // bits found in any slices
this.patternBits = 0; // bits found in any pattern slices this.patternBits = 0; // bits found in any pattern slices
@ -322,8 +316,8 @@ const Parser = class {
this.patternSpan.l = this.optionsAnchorSpan.i - islice; this.patternSpan.l = this.optionsAnchorSpan.i - islice;
let patternStartIsRegex = let patternStartIsRegex =
islice < this.optionsAnchorSpan.i && islice < this.optionsAnchorSpan.i &&
hasBits(this.slices[islice], BITSlash); hasBits(this.slices[islice], BITSlash);
let patternIsRegex = patternStartIsRegex; let patternIsRegex = patternStartIsRegex;
if ( patternStartIsRegex ) { if ( patternStartIsRegex ) {
const { i, l } = this.patternSpan; const { i, l } = this.patternSpan;
@ -642,8 +636,8 @@ const Parser = class {
slice(raw) { slice(raw) {
this.reset(); this.reset();
this.raw = raw; this.raw = raw;
this.rawEnd = raw.length; const rawEnd = raw.length;
if ( this.rawEnd === 0 ) { return; } if ( rawEnd === 0 ) { return; }
// All unicode characters are allowed in hostname // All unicode characters are allowed in hostname
const unicodeBits = BITUnicode | BITAlpha; const unicodeBits = BITUnicode | BITAlpha;
// Create raw slices // Create raw slices
@ -656,7 +650,7 @@ const Parser = class {
ptr += 2; ptr += 2;
let allBits = aBits; let allBits = aBits;
let i = 0, j = 1; let i = 0, j = 1;
while ( j < this.rawEnd ) { while ( j < rawEnd ) {
c = raw.charCodeAt(j); c = raw.charCodeAt(j);
const bBits = c < 0x80 ? charDescBits[c] : unicodeBits; const bBits = c < 0x80 ? charDescBits[c] : unicodeBits;
if ( bBits !== aBits ) { if ( bBits !== aBits ) {
@ -675,7 +669,7 @@ const Parser = class {
// End-of-line slice // End-of-line slice
this.eolSpan.i = ptr; this.eolSpan.i = ptr;
slices[ptr+0] = 0; slices[ptr+0] = 0;
slices[ptr+1] = this.rawEnd; slices[ptr+1] = rawEnd;
slices[ptr+2] = 0; slices[ptr+2] = 0;
ptr += 3; ptr += 3;
// Trim left // Trim left
@ -947,25 +941,25 @@ const Parser = class {
return this.raw; return this.raw;
} }
// TODO: if there is a need to punycode, we force a re-analysis post-
// punycode conversion. We could avoid the re-analysis by substituting
// the original pattern slices with the post-punycode ones, but it's
// not trivial work and given how rare this occurs it may not be worth
// worrying about this.
toPunycode() { toPunycode() {
if ( this.patternHasUnicode() === false ) { return; } if ( this.patternHasUnicode() === false ) { return true; }
const { i, l } = this.patternSpan; const { i, l } = this.patternSpan;
if ( l === 0 ) { return; } if ( l === 0 ) { return true; }
const re = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/;
let pattern = this.getNetPattern(); let pattern = this.getNetPattern();
const match = re.exec(this.pattern); const match = this.reHostname.exec(this.pattern);
if ( match === null ) { return; } if ( match === null ) { return; }
pattern = punycode.toASCII(match[0]) + try {
this.pattern.slice(match.index + match[0].length); this.punycoder.hostname = match[0].replace(/\*/g, '__asterisk__');
} catch(ex) {
return false;
}
const punycoded = this.punycoder.hostname.replace(/__asterisk__/g, '*');
pattern = punycoded + this.pattern.slice(match.index + match[0].length);
const beg = this.slices[i+1]; const beg = this.slices[i+1];
const end = this.slices[i+l+1]; const end = this.slices[i+l+1];
const raw = this.raw.slice(0, beg) + pattern + this.raw.slice(end); const raw = this.raw.slice(0, beg) + pattern + this.raw.slice(end);
this.analyze(raw); this.analyze(raw);
return true;
} }
isException() { isException() {
@ -1599,7 +1593,7 @@ const ExtOptionsIterator = class {
/******************************************************************************/ /******************************************************************************/
if ( vAPI instanceof Object ) { if ( typeof vAPI === 'object' && vAPI !== null ) {
vAPI.StaticFilteringParser = Parser; vAPI.StaticFilteringParser = Parser;
} else { } else {
self.StaticFilteringParser = Parser; self.StaticFilteringParser = Parser;

View file

@ -722,7 +722,7 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
// Fetching the raw content may cause the compiled content to be // Fetching the raw content may cause the compiled content to be
// generated somewhere else in uBO, hence we try one last time to // generated somewhere else in uBO, hence we try one last time to
// fetch the compiled content in case it has become available. // fetch the compiled content in case it has become available.
let compiledDetails = await this.assets.get(compiledPath); const compiledDetails = await this.assets.get(compiledPath);
if ( compiledDetails.content === '' ) { if ( compiledDetails.content === '' ) {
compiledDetails.content = this.compileFilters( compiledDetails.content = this.compileFilters(
rawDetails.content, rawDetails.content,
@ -825,8 +825,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
// https://github.com/gorhill/uBlock/issues/2599 // https://github.com/gorhill/uBlock/issues/2599
// convert hostname to punycode if needed // convert hostname to punycode if needed
if ( parser.patternHasUnicode() ) { if ( parser.patternHasUnicode() && parser.toPunycode() === false ) {
parser.toPunycode(); continue;
} }
staticNetFilteringEngine.compile(parser, writer); staticNetFilteringEngine.compile(parser, writer);
} }