Remove elision of leading wildcard in some filter patterns

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/882

Related commits:
- https://github.com/gorhill/uBlock/commit/a95ef16e064a
- https://github.com/gorhill/uBlock/commit/7971b223855d

Leading wildcards before valid token characters need to
be kept in order to respect the semantic of the filter.
A leading wildcard in such case changes the semantic of
a filter, i.e. two following filters are semantically
different:

    example/abc
    *example/abc

As a result, µBlock.BidiTrieContainer.indexOf() is now
able to deal with a needle of length zero -- which is
what happens in FilterPatternLeft(Ex) with filter
patterns starting with `*` (or `^*`) and followed by
valid token characters (0-9, a-z and %).
This commit is contained in:
Raymond Hill 2020-02-03 14:09:37 -05:00
parent 3ef999d44b
commit 609e9a6428
No known key found for this signature in database
GPG key ID: 25E1490B761470C2
4 changed files with 12 additions and 10 deletions

View file

@ -616,6 +616,9 @@ const FilterPatternPlainX = class extends FilterPatternPlain {
/******************************************************************************/ /******************************************************************************/
// https://github.com/gorhill/uBlock/commit/7971b223855d#commitcomment-37077525
// Mind that the left part may be empty.
const FilterPatternLeft = class { const FilterPatternLeft = class {
constructor(i, n) { constructor(i, n) {
this.i = i | 0; this.i = i | 0;
@ -633,8 +636,10 @@ const FilterPatternLeft = class {
} }
logData(details) { logData(details) {
details.pattern.unshift('*');
if ( this.n === 0 ) { return; }
const s = bidiTrie.extractString(this.i, this.n); const s = bidiTrie.extractString(this.i, this.n);
details.pattern.unshift(s, '*'); details.pattern.unshift(s);
details.regex.unshift(restrFromPlainPattern(s), '.*'); details.regex.unshift(restrFromPlainPattern(s), '.*');
} }
@ -2529,16 +2534,8 @@ const FilterParser = class {
this.tokenBeg = matches.index; this.tokenBeg = matches.index;
// https://www.reddit.com/r/uBlockOrigin/comments/dpcvfx/ // https://www.reddit.com/r/uBlockOrigin/comments/dpcvfx/
// Since we found a valid token, we can get rid of leading/trailing // Since we found a valid token, we can get rid of trailing
// wildcards if any. // wildcards if any.
// https://github.com/gorhill/uBlock/commit/7971b223855d#commitcomment-37077525
// Mind that changing the pattern may change token start index.
if ( this.firstWildcardPos === 0 ) {
this.f = this.f.slice(1);
this.firstWildcardPos = this.secondWildcardPos;
this.secondWildcardPos = -1;
this.tokenBeg -= 1;
}
if ( this.firstWildcardPos !== -1 ) { if ( this.firstWildcardPos !== -1 ) {
const lastCharPos = this.f.length - 1; const lastCharPos = this.f.length - 1;
if ( this.firstWildcardPos === lastCharPos ) { if ( this.firstWildcardPos === lastCharPos ) {

View file

@ -636,6 +636,7 @@ const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
// Find the left-most instance of substring in main string // Find the left-most instance of substring in main string
// WASMable. // WASMable.
indexOf(haystackLeft, haystackEnd, needleLeft, needleLen) { indexOf(haystackLeft, haystackEnd, needleLeft, needleLen) {
if ( needleLen === 0 ) { return haystackLeft; }
haystackEnd -= needleLen; haystackEnd -= needleLen;
if ( haystackEnd < haystackLeft ) { return -1; } if ( haystackEnd < haystackLeft ) { return -1; }
needleLeft += this.buf32[CHAR0_SLOT]; needleLeft += this.buf32[CHAR0_SLOT];

Binary file not shown.

View file

@ -544,6 +544,10 @@
(local $c0 i32) (local $c0 i32)
block $fail block $fail
block $succeed block $succeed
;; if ( needleLen === 0 ) { return haystackLeft; }
get_local $needleLen
i32.eqz
br_if $succeed
;; haystackEnd -= needleLen; ;; haystackEnd -= needleLen;
get_local $haystackEnd get_local $haystackEnd
get_local $needleLen get_local $needleLen