From 609e9a6428dbb3ab8af142b217beae437dcc7365 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Mon, 3 Feb 2020 14:09:37 -0500 Subject: [PATCH] Remove elision of leading wildcard in some filter patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/882 Related commits: - https://github.com/gorhill/uBlock/commit/a95ef16e064a - https://github.com/gorhill/uBlock/commit/7971b223855d Leading wildcards before valid token characters need to be kept in order to respect the semantic of the filter. A leading wildcard in such case changes the semantic of a filter, i.e. two following filters are semantically different: example/abc *example/abc As a result, µBlock.BidiTrieContainer.indexOf() is now able to deal with a needle of length zero -- which is what happens in FilterPatternLeft(Ex) with filter patterns starting with `*` (or `^*`) and followed by valid token characters (0-9, a-z and %). --- src/js/static-net-filtering.js | 17 +++++++---------- src/js/strie.js | 1 + src/js/wasm/biditrie.wasm | Bin 976 -> 981 bytes src/js/wasm/biditrie.wat | 4 ++++ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index f5c42bfd9..0fd0908aa 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -616,6 +616,9 @@ const FilterPatternPlainX = class extends FilterPatternPlain { /******************************************************************************/ +// https://github.com/gorhill/uBlock/commit/7971b223855d#commitcomment-37077525 +// Mind that the left part may be empty. + const FilterPatternLeft = class { constructor(i, n) { this.i = i | 0; @@ -633,8 +636,10 @@ const FilterPatternLeft = class { } logData(details) { + details.pattern.unshift('*'); + if ( this.n === 0 ) { return; } const s = bidiTrie.extractString(this.i, this.n); - details.pattern.unshift(s, '*'); + details.pattern.unshift(s); details.regex.unshift(restrFromPlainPattern(s), '.*'); } @@ -2529,16 +2534,8 @@ const FilterParser = class { this.tokenBeg = matches.index; // https://www.reddit.com/r/uBlockOrigin/comments/dpcvfx/ - // Since we found a valid token, we can get rid of leading/trailing + // Since we found a valid token, we can get rid of trailing // wildcards if any. - // https://github.com/gorhill/uBlock/commit/7971b223855d#commitcomment-37077525 - // Mind that changing the pattern may change token start index. - if ( this.firstWildcardPos === 0 ) { - this.f = this.f.slice(1); - this.firstWildcardPos = this.secondWildcardPos; - this.secondWildcardPos = -1; - this.tokenBeg -= 1; - } if ( this.firstWildcardPos !== -1 ) { const lastCharPos = this.f.length - 1; if ( this.firstWildcardPos === lastCharPos ) { diff --git a/src/js/strie.js b/src/js/strie.js index 896a66144..5243ebb83 100644 --- a/src/js/strie.js +++ b/src/js/strie.js @@ -636,6 +636,7 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1); // Find the left-most instance of substring in main string // WASMable. indexOf(haystackLeft, haystackEnd, needleLeft, needleLen) { + if ( needleLen === 0 ) { return haystackLeft; } haystackEnd -= needleLen; if ( haystackEnd < haystackLeft ) { return -1; } needleLeft += this.buf32[CHAR0_SLOT]; diff --git a/src/js/wasm/biditrie.wasm b/src/js/wasm/biditrie.wasm index d9066ae2b26f7042e3539d347342959f10d0235c..a5d3aef9eed6ceb3bdb4ea2fd728ab446eb9083b 100644 GIT binary patch delta 30 mcmcb>ewBSf2jl6Do!Lyhg^VoqOb$#A3e2v&43qybT>${0UJ0K7 delta 25 hcmcc0et~^L2jj7go!LwrIgBjzOb$#AlNp(>004Mk2q6Ff diff --git a/src/js/wasm/biditrie.wat b/src/js/wasm/biditrie.wat index 9e1df88e0..5f7ec5e46 100644 --- a/src/js/wasm/biditrie.wat +++ b/src/js/wasm/biditrie.wat @@ -544,6 +544,10 @@ (local $c0 i32) block $fail block $succeed + ;; if ( needleLen === 0 ) { return haystackLeft; } + get_local $needleLen + i32.eqz + br_if $succeed ;; haystackEnd -= needleLen; get_local $haystackEnd get_local $needleLen