/******************************************************************************* uBlock Origin - a browser extension to block requests. Copyright (C) 2019-present Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see {http://www.gnu.org/licenses/}. Home: https://github.com/gorhill/uBlock */ /* exported STrieContainer */ 'use strict'; /******************************************************************************* A STrieContainer is mostly a large buffer in which distinct but related tries are stored. The memory layout of the buffer is as follow: 0-255: reserved 256-259: offset to start of trie data section (=> trie0) 260-263: offset to end of trie data section (=> trie1) 264-267: offset to start of character data section (=> char0) 268-271: offset to end of character data section (=> char1) 272: start of trie data section */ const STRIE_PAGE_SIZE = 65536; // i32 / i8 const STRIE_TRIE0_SLOT = 256 >>> 2; // 64 / 256 const STRIE_TRIE1_SLOT = STRIE_TRIE0_SLOT + 1; // 65 / 260 const STRIE_CHAR0_SLOT = STRIE_TRIE0_SLOT + 2; // 66 / 264 const STRIE_CHAR1_SLOT = STRIE_TRIE0_SLOT + 3; // 67 / 268 const STRIE_TRIE0_START = STRIE_TRIE0_SLOT + 4 << 2; // 272 const STrieContainer = function(details) { if ( details instanceof Object === false ) { details = {}; } const len = (details.byteLength || 0) + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1); this.buf = new Uint8Array(Math.max(len, 131072)); this.buf32 = new Uint32Array(this.buf.buffer); this.buf32[STRIE_TRIE0_SLOT] = STRIE_TRIE0_START; this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT]; this.buf32[STRIE_CHAR0_SLOT] = details.char0 || 65536; this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT]; }; STrieContainer.prototype = { //-------------------------------------------------------------------------- // Public methods //-------------------------------------------------------------------------- reset: function() { this.buf32[STRIE_TRIE1_SLOT] = this.buf32[STRIE_TRIE0_SLOT]; this.buf32[STRIE_CHAR1_SLOT] = this.buf32[STRIE_CHAR0_SLOT]; }, matches: function(iroot, a, al) { const ar = a.length; const char0 = this.buf32[STRIE_CHAR0_SLOT]; let icell = iroot; for (;;) { let c = a.charCodeAt(al); al += 1; let v, bl; // find first segment with a first-character match for (;;) { v = this.buf32[icell+2]; bl = char0 + (v & 0x00FFFFFF); if ( this.buf[bl] === c ) { break; } icell = this.buf32[icell+0]; if ( icell === 0 ) { return -1; } } // all characters in segment must match let n = v >>> 24; if ( n > 1 ) { n -= 1; if ( (al + n) > ar ) { return -1; } bl += 1; const br = bl + n; do { if ( a.charCodeAt(al) !== this.buf[bl] ) { return -1; } al += 1; bl += 1; } while ( bl < br ); } // next segment icell = this.buf32[icell+1]; if ( icell === 0 || this.buf32[icell+2] === 0 ) { return al; } if ( al === ar ) { return -1; } } }, createOne: function(args) { if ( Array.isArray(args) ) { return new this.STrieRef(this, args[0], args[1]); } // grow buffer if needed if ( (this.buf32[STRIE_CHAR0_SLOT] - this.buf32[STRIE_TRIE1_SLOT]) < 12 ) { this.growBuf(12, 0); } const iroot = this.buf32[STRIE_TRIE1_SLOT] >>> 2; this.buf32[STRIE_TRIE1_SLOT] += 12; this.buf32[iroot+0] = 0; this.buf32[iroot+1] = 0; this.buf32[iroot+2] = 0; return new this.STrieRef(this, iroot, 0); }, compileOne: function(trieRef) { return [ trieRef.iroot, trieRef.size ]; }, add: function(iroot, s) { const lschar = s.length; if ( lschar === 0 ) { return 0; } let ischar = 0; let icell = iroot; // special case: first node in trie if ( this.buf32[icell+2] === 0 ) { this.buf32[icell+2] = this.addSegment(s.slice(ischar)); return 1; } // grow buffer if needed if ( (this.buf32[STRIE_CHAR0_SLOT] - this.buf32[STRIE_TRIE1_SLOT]) < 24 || (this.buf.length - this.buf32[STRIE_CHAR1_SLOT]) < 256 ) { this.growBuf(24, 256); } // const char0 = this.buf32[STRIE_CHAR0_SLOT]; let inext; // find a matching cell: move down for (;;) { const vseg = this.buf32[icell+2]; // skip boundary cells if ( vseg === 0 ) { icell = this.buf32[icell+1]; continue; } let isegchar0 = char0 + (vseg & 0x00FFFFFF); // if first character is no match, move to next descendant if ( this.buf[isegchar0] !== s.charCodeAt(ischar) ) { inext = this.buf32[icell+0]; if ( inext === 0 ) { this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(s.slice(ischar))); return 1; } icell = inext; continue; } // 1st character was tested let isegchar = 1; ischar += 1; // find 1st mismatch in rest of segment const lsegchar = vseg >>> 24; if ( lsegchar !== 1 ) { for (;;) { if ( isegchar === lsegchar ) { break; } if ( ischar === lschar ) { break; } if ( this.buf[isegchar0+isegchar] !== s.charCodeAt(ischar) ) { break; } isegchar += 1; ischar += 1; } } // all segment characters matched if ( isegchar === lsegchar ) { inext = this.buf32[icell+1]; // needle remainder: no if ( ischar === lschar ) { // boundary cell already present if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; } // need boundary cell this.buf32[icell+1] = this.addCell(0, inext, 0); } // needle remainder: yes else { if ( inext !== 0 ) { icell = inext; continue; } // boundary cell + needle remainder inext = this.addCell(0, 0, 0); this.buf32[icell+1] = inext; this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(s.slice(ischar))); } } // some segment characters matched else { // split current cell isegchar0 -= char0; this.buf32[icell+2] = isegchar << 24 | isegchar0; inext = this.addCell( 0, this.buf32[icell+1], lsegchar - isegchar << 24 | isegchar0 + isegchar ); this.buf32[icell+1] = inext; // needle remainder: no = need boundary cell if ( ischar === lschar ) { this.buf32[icell+1] = this.addCell(0, inext, 0); } // needle remainder: yes = need new cell for remaining characters else { this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(s.slice(ischar))); } } return 1; } }, optimize: function() { this.shrinkBuf(); return { byteLength: this.buf.byteLength, char0: this.buf32[STRIE_CHAR0_SLOT], }; }, fromIterable: function(hostnames, add) { if ( add === undefined ) { add = 'add'; } const trieRef = this.createOne(); for ( const hn of hostnames ) { trieRef[add](hn); } return trieRef; }, serialize: function(encoder) { if ( encoder instanceof Object ) { return encoder.encode( this.buf32.buffer, this.buf32[STRIE_CHAR1_SLOT] ); } return Array.from( new Uint32Array( this.buf32.buffer, 0, this.buf32[STRIE_CHAR1_SLOT] + 3 >>> 2 ) ); }, unserialize: function(selfie, decoder) { const shouldDecode = typeof selfie === 'string'; let byteLength = shouldDecode ? decoder.decodeSize(selfie) : selfie.length << 2; byteLength = byteLength + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1); if ( byteLength === 0 ) { return; } if ( byteLength > this.buf.length ) { this.buf = new Uint8Array(byteLength); this.buf32 = new Uint32Array(this.buf.buffer); } if ( shouldDecode ) { decoder.decode(selfie, this.buf.buffer); } else { this.buf32.set(selfie); } }, //-------------------------------------------------------------------------- // Class to hold reference to a specific trie //-------------------------------------------------------------------------- STrieRef: function(container, iroot, size) { this.container = container; this.iroot = iroot; this.size = size; }, //-------------------------------------------------------------------------- // Private methods //-------------------------------------------------------------------------- addCell: function(idown, iright, v) { let icell = this.buf32[STRIE_TRIE1_SLOT]; this.buf32[STRIE_TRIE1_SLOT] = icell + 12; icell >>>= 2; this.buf32[icell+0] = idown; this.buf32[icell+1] = iright; this.buf32[icell+2] = v; return icell; }, addSegment: function(segment) { const lsegchar = segment.length; if ( lsegchar === 0 ) { return 0; } let char1 = this.buf32[STRIE_CHAR1_SLOT]; const isegchar = char1 - this.buf32[STRIE_CHAR0_SLOT]; let i = 0; do { this.buf[char1++] = segment.charCodeAt(i++); } while ( i !== lsegchar ); this.buf32[STRIE_CHAR1_SLOT] = char1; return (lsegchar << 24) | isegchar; }, growBuf: function(trieGrow, charGrow) { const char0 = Math.max( (this.buf32[STRIE_TRIE1_SLOT] + trieGrow + STRIE_PAGE_SIZE-1) & ~(STRIE_PAGE_SIZE-1), this.buf32[STRIE_CHAR0_SLOT] ); const char1 = char0 + this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT]; const bufLen = Math.max( (char1 + charGrow + STRIE_PAGE_SIZE-1) & ~(STRIE_PAGE_SIZE-1), this.buf.length ); this.resizeBuf(bufLen, char0); }, shrinkBuf: function() { const char0 = this.buf32[STRIE_TRIE1_SLOT] + 24; const char1 = char0 + this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT]; const bufLen = char1 + 256; this.resizeBuf(bufLen, char0); }, resizeBuf: function(bufLen, char0) { bufLen = bufLen + STRIE_PAGE_SIZE-1 & ~(STRIE_PAGE_SIZE-1); if ( bufLen === this.buf.length && char0 === this.buf32[STRIE_CHAR0_SLOT] ) { return; } const charDataLen = this.buf32[STRIE_CHAR1_SLOT] - this.buf32[STRIE_CHAR0_SLOT]; if ( bufLen !== this.buf.length ) { const newBuf = new Uint8Array(bufLen); newBuf.set( new Uint8Array( this.buf.buffer, 0, this.buf32[STRIE_TRIE1_SLOT] ), 0 ); newBuf.set( new Uint8Array( this.buf.buffer, this.buf32[STRIE_CHAR0_SLOT], charDataLen ), char0 ); this.buf = newBuf; this.buf32 = new Uint32Array(this.buf.buffer); this.buf32[STRIE_CHAR0_SLOT] = char0; this.buf32[STRIE_CHAR1_SLOT] = char0 + charDataLen; } if ( char0 !== this.buf32[STRIE_CHAR0_SLOT] ) { this.buf.set( new Uint8Array( this.buf.buffer, this.buf32[STRIE_CHAR0_SLOT], charDataLen ), char0 ); this.buf32[STRIE_CHAR0_SLOT] = char0; this.buf32[STRIE_CHAR1_SLOT] = char0 + charDataLen; } }, }; /******************************************************************************/ STrieContainer.prototype.STrieRef.prototype = { add: function(pattern) { if ( this.container.add(this.iroot, pattern) === 1 ) { this.size += 1; return true; } return false; }, matches: function(a, al) { return this.container.matches(this.iroot, a, al); }, [Symbol.iterator]: function() { return { value: undefined, done: false, next: function() { if ( this.icell === 0 ) { if ( this.forks.length === 0 ) { this.value = undefined; this.done = true; return this; } this.charPtr = this.forks.pop(); this.icell = this.forks.pop(); } for (;;) { const idown = this.container.buf32[this.icell+0]; if ( idown !== 0 ) { this.forks.push(idown, this.charPtr); } const v = this.container.buf32[this.icell+2]; let i0 = this.container.buf32[STRIE_CHAR0_SLOT] + (v & 0x00FFFFFF); const i1 = i0 + (v >>> 24); while ( i0 < i1 ) { this.charPtr -= 1; this.charBuf[this.charPtr] = this.container.buf[i0]; i0 += 1; } this.icell = this.container.buf32[this.icell+1]; if ( this.icell === 0 ) { return this.toPattern(); } if ( this.container.buf32[this.icell+2] === 0 ) { this.icell = this.container.buf32[this.icell+1]; return this.toPattern(); } } }, toPattern: function() { this.value = this.textDecoder.decode( new Uint8Array(this.charBuf.buffer, this.charPtr) ); return this; }, container: this.container, icell: this.iroot, charBuf: new Uint8Array(256), charPtr: 256, forks: [], textDecoder: new TextDecoder() }; }, };