mirror of
https://github.com/gorhill/uBlock.git
synced 2024-11-13 10:17:34 +01:00
Minor code review of static parser code
This commit is contained in:
parent
410ef862a4
commit
bc7f149252
3 changed files with 27 additions and 33 deletions
|
@ -24,7 +24,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
CodeMirror.defineMode("ubo-static-filtering", function() {
|
CodeMirror.defineMode("ubo-static-filtering", function() {
|
||||||
const parser = new vAPI.StaticFilteringParser(true);
|
const parser = new vAPI.StaticFilteringParser({ interactive: true });
|
||||||
const reDirective = /^!#(?:if|endif|include)\b/;
|
const reDirective = /^!#(?:if|endif|include)\b/;
|
||||||
let parserSlot = 0;
|
let parserSlot = 0;
|
||||||
let netOptionValueMode = false;
|
let netOptionValueMode = false;
|
||||||
|
|
|
@ -19,16 +19,12 @@
|
||||||
Home: https://github.com/gorhill/uBlock
|
Home: https://github.com/gorhill/uBlock
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* global punycode */
|
|
||||||
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
|
|
||||||
The goal is for the static filtering parser to avoid external
|
The goal is for the static filtering parser to avoid external
|
||||||
dependencies[1] to other code in the project.
|
dependencies to other code in the project.
|
||||||
|
|
||||||
[1] Except unavoidable ones, such as punycode.
|
|
||||||
|
|
||||||
Roughly, this is how things work: each input string (passed to analyze())
|
Roughly, this is how things work: each input string (passed to analyze())
|
||||||
is decomposed into a minimal set of distinct slices. Each slice is a
|
is decomposed into a minimal set of distinct slices. Each slice is a
|
||||||
|
@ -76,12 +72,10 @@
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
const Parser = class {
|
const Parser = class {
|
||||||
constructor(interactive = false) {
|
constructor(options = {}) {
|
||||||
this.interactive = interactive;
|
this.interactive = options.interactive === true;
|
||||||
this.raw = '';
|
this.raw = '';
|
||||||
this.rawEnd = 0;
|
|
||||||
this.slices = [];
|
this.slices = [];
|
||||||
this.optSlices = [];
|
|
||||||
this.leftSpaceSpan = new Span();
|
this.leftSpaceSpan = new Span();
|
||||||
this.exceptionSpan = new Span();
|
this.exceptionSpan = new Span();
|
||||||
this.patternLeftAnchorSpan = new Span();
|
this.patternLeftAnchorSpan = new Span();
|
||||||
|
@ -109,13 +103,13 @@ const Parser = class {
|
||||||
this.extOptionsIterator = new ExtOptionsIterator(this);
|
this.extOptionsIterator = new ExtOptionsIterator(this);
|
||||||
this.maxTokenLength = Number.MAX_SAFE_INTEGER;
|
this.maxTokenLength = Number.MAX_SAFE_INTEGER;
|
||||||
this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/;
|
this.reIsLocalhostRedirect = /(?:0\.0\.0\.0|(?:broadcast|local)host|local|ip6-\w+)\b/;
|
||||||
|
this.reHostname = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/;
|
||||||
|
this.punycoder = new URL(self.location);
|
||||||
this.reset();
|
this.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
reset() {
|
reset() {
|
||||||
this.rawPos = 0;
|
|
||||||
this.sliceWritePtr = 0;
|
this.sliceWritePtr = 0;
|
||||||
this.optSliceWritePtr = 0;
|
|
||||||
this.category = CATNone;
|
this.category = CATNone;
|
||||||
this.allBits = 0; // bits found in any slices
|
this.allBits = 0; // bits found in any slices
|
||||||
this.patternBits = 0; // bits found in any pattern slices
|
this.patternBits = 0; // bits found in any pattern slices
|
||||||
|
@ -322,8 +316,8 @@ const Parser = class {
|
||||||
this.patternSpan.l = this.optionsAnchorSpan.i - islice;
|
this.patternSpan.l = this.optionsAnchorSpan.i - islice;
|
||||||
|
|
||||||
let patternStartIsRegex =
|
let patternStartIsRegex =
|
||||||
islice < this.optionsAnchorSpan.i &&
|
islice < this.optionsAnchorSpan.i &&
|
||||||
hasBits(this.slices[islice], BITSlash);
|
hasBits(this.slices[islice], BITSlash);
|
||||||
let patternIsRegex = patternStartIsRegex;
|
let patternIsRegex = patternStartIsRegex;
|
||||||
if ( patternStartIsRegex ) {
|
if ( patternStartIsRegex ) {
|
||||||
const { i, l } = this.patternSpan;
|
const { i, l } = this.patternSpan;
|
||||||
|
@ -642,8 +636,8 @@ const Parser = class {
|
||||||
slice(raw) {
|
slice(raw) {
|
||||||
this.reset();
|
this.reset();
|
||||||
this.raw = raw;
|
this.raw = raw;
|
||||||
this.rawEnd = raw.length;
|
const rawEnd = raw.length;
|
||||||
if ( this.rawEnd === 0 ) { return; }
|
if ( rawEnd === 0 ) { return; }
|
||||||
// All unicode characters are allowed in hostname
|
// All unicode characters are allowed in hostname
|
||||||
const unicodeBits = BITUnicode | BITAlpha;
|
const unicodeBits = BITUnicode | BITAlpha;
|
||||||
// Create raw slices
|
// Create raw slices
|
||||||
|
@ -656,7 +650,7 @@ const Parser = class {
|
||||||
ptr += 2;
|
ptr += 2;
|
||||||
let allBits = aBits;
|
let allBits = aBits;
|
||||||
let i = 0, j = 1;
|
let i = 0, j = 1;
|
||||||
while ( j < this.rawEnd ) {
|
while ( j < rawEnd ) {
|
||||||
c = raw.charCodeAt(j);
|
c = raw.charCodeAt(j);
|
||||||
const bBits = c < 0x80 ? charDescBits[c] : unicodeBits;
|
const bBits = c < 0x80 ? charDescBits[c] : unicodeBits;
|
||||||
if ( bBits !== aBits ) {
|
if ( bBits !== aBits ) {
|
||||||
|
@ -675,7 +669,7 @@ const Parser = class {
|
||||||
// End-of-line slice
|
// End-of-line slice
|
||||||
this.eolSpan.i = ptr;
|
this.eolSpan.i = ptr;
|
||||||
slices[ptr+0] = 0;
|
slices[ptr+0] = 0;
|
||||||
slices[ptr+1] = this.rawEnd;
|
slices[ptr+1] = rawEnd;
|
||||||
slices[ptr+2] = 0;
|
slices[ptr+2] = 0;
|
||||||
ptr += 3;
|
ptr += 3;
|
||||||
// Trim left
|
// Trim left
|
||||||
|
@ -947,25 +941,25 @@ const Parser = class {
|
||||||
return this.raw;
|
return this.raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: if there is a need to punycode, we force a re-analysis post-
|
|
||||||
// punycode conversion. We could avoid the re-analysis by substituting
|
|
||||||
// the original pattern slices with the post-punycode ones, but it's
|
|
||||||
// not trivial work and given how rare this occurs it may not be worth
|
|
||||||
// worrying about this.
|
|
||||||
toPunycode() {
|
toPunycode() {
|
||||||
if ( this.patternHasUnicode() === false ) { return; }
|
if ( this.patternHasUnicode() === false ) { return true; }
|
||||||
const { i, l } = this.patternSpan;
|
const { i, l } = this.patternSpan;
|
||||||
if ( l === 0 ) { return; }
|
if ( l === 0 ) { return true; }
|
||||||
const re = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+/;
|
|
||||||
let pattern = this.getNetPattern();
|
let pattern = this.getNetPattern();
|
||||||
const match = re.exec(this.pattern);
|
const match = this.reHostname.exec(this.pattern);
|
||||||
if ( match === null ) { return; }
|
if ( match === null ) { return; }
|
||||||
pattern = punycode.toASCII(match[0]) +
|
try {
|
||||||
this.pattern.slice(match.index + match[0].length);
|
this.punycoder.hostname = match[0].replace(/\*/g, '__asterisk__');
|
||||||
|
} catch(ex) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const punycoded = this.punycoder.hostname.replace(/__asterisk__/g, '*');
|
||||||
|
pattern = punycoded + this.pattern.slice(match.index + match[0].length);
|
||||||
const beg = this.slices[i+1];
|
const beg = this.slices[i+1];
|
||||||
const end = this.slices[i+l+1];
|
const end = this.slices[i+l+1];
|
||||||
const raw = this.raw.slice(0, beg) + pattern + this.raw.slice(end);
|
const raw = this.raw.slice(0, beg) + pattern + this.raw.slice(end);
|
||||||
this.analyze(raw);
|
this.analyze(raw);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
isException() {
|
isException() {
|
||||||
|
@ -1599,7 +1593,7 @@ const ExtOptionsIterator = class {
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
if ( vAPI instanceof Object ) {
|
if ( typeof vAPI === 'object' && vAPI !== null ) {
|
||||||
vAPI.StaticFilteringParser = Parser;
|
vAPI.StaticFilteringParser = Parser;
|
||||||
} else {
|
} else {
|
||||||
self.StaticFilteringParser = Parser;
|
self.StaticFilteringParser = Parser;
|
||||||
|
|
|
@ -722,7 +722,7 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
|
||||||
// Fetching the raw content may cause the compiled content to be
|
// Fetching the raw content may cause the compiled content to be
|
||||||
// generated somewhere else in uBO, hence we try one last time to
|
// generated somewhere else in uBO, hence we try one last time to
|
||||||
// fetch the compiled content in case it has become available.
|
// fetch the compiled content in case it has become available.
|
||||||
let compiledDetails = await this.assets.get(compiledPath);
|
const compiledDetails = await this.assets.get(compiledPath);
|
||||||
if ( compiledDetails.content === '' ) {
|
if ( compiledDetails.content === '' ) {
|
||||||
compiledDetails.content = this.compileFilters(
|
compiledDetails.content = this.compileFilters(
|
||||||
rawDetails.content,
|
rawDetails.content,
|
||||||
|
@ -825,8 +825,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
|
||||||
|
|
||||||
// https://github.com/gorhill/uBlock/issues/2599
|
// https://github.com/gorhill/uBlock/issues/2599
|
||||||
// convert hostname to punycode if needed
|
// convert hostname to punycode if needed
|
||||||
if ( parser.patternHasUnicode() ) {
|
if ( parser.patternHasUnicode() && parser.toPunycode() === false ) {
|
||||||
parser.toPunycode();
|
continue;
|
||||||
}
|
}
|
||||||
staticNetFilteringEngine.compile(parser, writer);
|
staticNetFilteringEngine.compile(parser, writer);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue