/******************************************************************************* publicsuffixlist.js - an efficient javascript implementation to deal with Mozilla Foundation's Public Suffix List Copyright (C) 2013-present Raymond Hill License: pick the one which suits you: GPL v3 see APL v2 see */ /*! Home: https://github.com/gorhill/publicsuffixlist.js -- GPLv3 APLv2 */ /* jshint browser:true, esversion:6, laxbreak:true, undef:true, unused:true */ /* globals WebAssembly, console, exports:true, module */ /******************************************************************************* Reference: https://publicsuffix.org/list/ Excerpt: > Algorithm > > 1. Match domain against all rules and take note of the matching ones. > 2. If no rules match, the prevailing rule is "*". > 3. If more than one rule matches, the prevailing rule is the one which is an exception rule. > 4. If there is no matching exception rule, the prevailing rule is the one with the most labels. > 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label. > 6. The public suffix is the set of labels from the domain which match the labels of the prevailing rule, using the matching algorithm above. > 7. The registered or registrable domain is the public suffix plus one additional label. */ /******************************************************************************/ (function(context) { // >>>>>>>> start of anonymous namespace 'use strict'; /******************************************************************************* Tree encoding in array buffer: Node: + u8: length of char data + u8: flags => bit 0: is_publicsuffix, bit 1: is_exception + u16: length of array of children + u32: char data or offset to char data + u32: offset to array of children = 12 bytes More bits in flags could be used; for example: - to distinguish private suffixes */ // i32 / i8 const HOSTNAME_SLOT = 0; // jshint ignore:line const LABEL_INDICES_SLOT = 256; // -- / 256 const RULES_PTR_SLOT = 100; // 100 / 400 const CHARDATA_PTR_SLOT = 101; // 101 / 404 const EMPTY_STRING = ''; const SELFIE_MAGIC = 2; let wasmMemory; let pslBuffer32; let pslBuffer8; let pslByteLength = 0; let hostnameArg = EMPTY_STRING; /******************************************************************************/ const fireChangedEvent = function() { if ( window instanceof Object && window.dispatchEvent instanceof Function && window.CustomEvent instanceof Function ) { window.dispatchEvent(new CustomEvent('publicSuffixListChanged')); } }; /******************************************************************************/ const allocateBuffers = function(byteLength) { pslByteLength = byteLength + 3 & ~3; if ( pslBuffer32 !== undefined && pslBuffer32.byteLength >= pslByteLength ) { return; } if ( wasmMemory !== undefined ) { const newPageCount = pslByteLength + 0xFFFF >>> 16; const curPageCount = wasmMemory.buffer.byteLength >>> 16; const delta = newPageCount - curPageCount; if ( delta > 0 ) { wasmMemory.grow(delta); pslBuffer32 = new Uint32Array(wasmMemory.buffer); pslBuffer8 = new Uint8Array(wasmMemory.buffer); } } else { pslBuffer8 = new Uint8Array(pslByteLength); pslBuffer32 = new Uint32Array(pslBuffer8.buffer); } hostnameArg = ''; pslBuffer8[LABEL_INDICES_SLOT] = 0; }; /******************************************************************************/ // Parse and set a UTF-8 text-based suffix list. Format is same as found at: // http://publicsuffix.org/list/ // // `toAscii` is a converter from unicode to punycode. Required since the // Public Suffix List contains unicode characters. // Suggestion: use const parse = function(text, toAscii) { // Use short property names for better minifying results const rootRule = { l: EMPTY_STRING, // l => label f: 0, // f => flags c: undefined // c => children }; // Tree building { const compareLabels = function(a, b) { let n = a.length; let d = n - b.length; if ( d !== 0 ) { return d; } for ( let i = 0; i < n; i++ ) { d = a.charCodeAt(i) - b.charCodeAt(i); if ( d !== 0 ) { return d; } } return 0; }; const addToTree = function(rule, exception) { let node = rootRule; let end = rule.length; while ( end > 0 ) { const beg = rule.lastIndexOf('.', end - 1); const label = rule.slice(beg + 1, end); end = beg; if ( Array.isArray(node.c) === false ) { const child = { l: label, f: 0, c: undefined }; node.c = [ child ]; node = child; continue; } let left = 0; let right = node.c.length; while ( left < right ) { const i = left + right >>> 1; const d = compareLabels(label, node.c[i].l); if ( d < 0 ) { right = i; if ( right === left ) { const child = { l: label, f: 0, c: undefined }; node.c.splice(left, 0, child); node = child; break; } continue; } if ( d > 0 ) { left = i + 1; if ( left === right ) { const child = { l: label, f: 0, c: undefined }; node.c.splice(right, 0, child); node = child; break; } continue; } /* d === 0 */ node = node.c[i]; break; } } node.f |= 0b01; if ( exception ) { node.f |= 0b10; } }; // 2. If no rules match, the prevailing rule is "*". addToTree('*', false); const mustPunycode = /[^a-z0-9.-]/; const textEnd = text.length; let lineBeg = 0; while ( lineBeg < textEnd ) { let lineEnd = text.indexOf('\n', lineBeg); if ( lineEnd === -1 ) { lineEnd = text.indexOf('\r', lineBeg); if ( lineEnd === -1 ) { lineEnd = textEnd; } } let line = text.slice(lineBeg, lineEnd).trim(); lineBeg = lineEnd + 1; // Ignore comments const pos = line.indexOf('//'); if ( pos !== -1 ) { line = line.slice(0, pos); } // Ignore surrounding whitespaces line = line.trim(); if ( line.length === 0 ) { continue; } const exception = line.charCodeAt(0) === 0x21 /* '!' */; if ( exception ) { line = line.slice(1); } if ( mustPunycode.test(line) ) { line = toAscii(line.toLowerCase()); } addToTree(line, exception); } } { const labelToOffsetMap = new Map(); const treeData = []; const charData = []; const allocate = function(n) { const ibuf = treeData.length; for ( let i = 0; i < n; i++ ) { treeData.push(0); } return ibuf; }; const storeNode = function(ibuf, node) { const nChars = node.l.length; const nChildren = node.c !== undefined ? node.c.length : 0; treeData[ibuf+0] = nChildren << 16 | node.f << 8 | nChars; // char data if ( nChars <= 4 ) { let v = 0; if ( nChars > 0 ) { v |= node.l.charCodeAt(0); if ( nChars > 1 ) { v |= node.l.charCodeAt(1) << 8; if ( nChars > 2 ) { v |= node.l.charCodeAt(2) << 16; if ( nChars > 3 ) { v |= node.l.charCodeAt(3) << 24; } } } } treeData[ibuf+1] = v; } else { let offset = labelToOffsetMap.get(node.l); if ( offset === undefined ) { offset = charData.length; for ( let i = 0; i < nChars; i++ ) { charData.push(node.l.charCodeAt(i)); } labelToOffsetMap.set(node.l, offset); } treeData[ibuf+1] = offset; } // child nodes if ( Array.isArray(node.c) === false ) { treeData[ibuf+2] = 0; return; } const iarray = allocate(nChildren * 3); treeData[ibuf+2] = iarray; for ( let i = 0; i < nChildren; i++ ) { storeNode(iarray + i * 3, node.c[i]); } }; // First 512 bytes are reserved for internal use allocate(512 >> 2); const iRootRule = allocate(3); storeNode(iRootRule, rootRule); treeData[RULES_PTR_SLOT] = iRootRule; const iCharData = treeData.length << 2; treeData[CHARDATA_PTR_SLOT] = iCharData; const byteLength = (treeData.length << 2) + (charData.length + 3 & ~3); allocateBuffers(byteLength); pslBuffer32.set(treeData); pslBuffer8.set(charData, treeData.length << 2); } fireChangedEvent(); }; /******************************************************************************/ const setHostnameArg = function(hostname) { const buf = pslBuffer8; if ( hostname === hostnameArg ) { return buf[LABEL_INDICES_SLOT]; } if ( hostname === null || hostname.length === 0 ) { return (buf[LABEL_INDICES_SLOT] = 0); } hostname = hostname.toLowerCase(); hostnameArg = hostname; let n = hostname.length; if ( n > 255 ) { n = 255; } buf[LABEL_INDICES_SLOT] = n; let i = n; let j = LABEL_INDICES_SLOT + 1; while ( i-- ) { const c = hostname.charCodeAt(i); if ( c === 0x2E /* '.' */ ) { buf[j+0] = i + 1; buf[j+1] = i; j += 2; } buf[i] = c; } buf[j] = 0; return n; }; /******************************************************************************/ // Returns an offset to the start of the public suffix. // // WASM-able, because no information outside the buffer content is required. const getPublicSuffixPosJS = function() { const buf8 = pslBuffer8; const buf32 = pslBuffer32; const iCharData = buf32[CHARDATA_PTR_SLOT]; let iNode = pslBuffer32[RULES_PTR_SLOT]; let cursorPos = -1; let iLabel = LABEL_INDICES_SLOT; // Label-lookup loop for (;;) { // Extract label indices const labelBeg = buf8[iLabel+1]; const labelLen = buf8[iLabel+0] - labelBeg; // Match-lookup loop: binary search let r = buf32[iNode+0] >>> 16; if ( r === 0 ) { break; } const iCandidates = buf32[iNode+2]; let l = 0; let iFound = 0; while ( l < r ) { const iCandidate = l + r >>> 1; const iCandidateNode = iCandidates + iCandidate + (iCandidate << 1); const candidateLen = buf32[iCandidateNode+0] & 0x000000FF; let d = labelLen - candidateLen; if ( d === 0 ) { const iCandidateChar = candidateLen <= 4 ? iCandidateNode + 1 << 2 : iCharData + buf32[iCandidateNode+1]; for ( let i = 0; i < labelLen; i++ ) { d = buf8[labelBeg+i] - buf8[iCandidateChar+i]; if ( d !== 0 ) { break; } } } if ( d < 0 ) { r = iCandidate; } else if ( d > 0 ) { l = iCandidate + 1; } else /* if ( d === 0 ) */ { iFound = iCandidateNode; break; } } // 2. If no rules match, the prevailing rule is "*". if ( iFound === 0 ) { if ( buf8[iCandidates + 1 << 2] !== 0x2A /* '*' */ ) { break; } iFound = iCandidates; } iNode = iFound; // 5. If the prevailing rule is a exception rule, modify it by // removing the leftmost label. if ( (buf32[iNode+0] & 0x00000200) !== 0 ) { if ( iLabel > LABEL_INDICES_SLOT ) { return iLabel - 2; } break; } if ( (buf32[iNode+0] & 0x00000100) !== 0 ) { cursorPos = iLabel; } if ( labelBeg === 0 ) { break; } iLabel += 2; } return cursorPos; }; let getPublicSuffixPosWASM; let getPublicSuffixPos = getPublicSuffixPosJS; /******************************************************************************/ const getPublicSuffix = function(hostname) { if ( pslBuffer32 === undefined ) { return EMPTY_STRING; } const hostnameLen = setHostnameArg(hostname); const buf8 = pslBuffer8; if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) { return EMPTY_STRING; } const cursorPos = getPublicSuffixPos(); if ( cursorPos === -1 ) { return EMPTY_STRING; } const beg = buf8[cursorPos + 1]; return beg === 0 ? hostnameArg : hostnameArg.slice(beg); }; /******************************************************************************/ const getDomain = function(hostname) { if ( pslBuffer32 === undefined ) { return EMPTY_STRING; } const hostnameLen = setHostnameArg(hostname); const buf8 = pslBuffer8; if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) { return EMPTY_STRING; } const cursorPos = getPublicSuffixPos(); if ( cursorPos === -1 || buf8[cursorPos + 1] === 0 ) { return EMPTY_STRING; } // 7. The registered or registrable domain is the public suffix plus one // additional label. const beg = buf8[cursorPos + 3]; return beg === 0 ? hostnameArg : hostnameArg.slice(beg); }; /******************************************************************************/ const toSelfie = function(encoder) { if ( pslBuffer8 === undefined ) { return ''; } if ( encoder instanceof Object ) { const bufferStr = encoder.encode(pslBuffer8.buffer, pslByteLength); return `${SELFIE_MAGIC}\t${bufferStr}`; } return { magic: SELFIE_MAGIC, buf32: Array.from( new Uint32Array(pslBuffer8.buffer, 0, pslByteLength >>> 2) ), }; }; const fromSelfie = function(selfie, decoder) { let byteLength = 0; if ( typeof selfie === 'string' && selfie.length !== 0 && decoder instanceof Object ) { const pos = selfie.indexOf('\t'); if ( pos === -1 || selfie.slice(0, pos) !== `${SELFIE_MAGIC}` ) { return false; } const bufferStr = selfie.slice(pos + 1); byteLength = decoder.decodeSize(bufferStr); if ( byteLength === 0 ) { return false; } allocateBuffers(byteLength); decoder.decode(bufferStr, pslBuffer8.buffer); } else if ( selfie instanceof Object && selfie.magic === SELFIE_MAGIC && Array.isArray(selfie.buf32) ) { byteLength = selfie.buf32.length << 2; allocateBuffers(byteLength); pslBuffer32.set(selfie.buf32); } else { return false; } // Important! hostnameArg = ''; pslBuffer8[LABEL_INDICES_SLOT] = 0; fireChangedEvent(); return true; }; /******************************************************************************/ // The WASM module is entirely optional, the JS implementation will be // used should the WASM module be unavailable for whatever reason. const enableWASM = (function() { // The directory from which the current script was fetched should also // contain the related WASM file. The script is fetched from a trusted // location, and consequently so will be the related WASM file. let workingDir; { const url = new URL(document.currentScript.src); const match = /[^\/]+$/.exec(url.pathname); if ( match !== null ) { url.pathname = url.pathname.slice(0, match.index); } workingDir = url.href; } let memory; return function() { if ( getPublicSuffixPosWASM instanceof Function ) { return Promise.resolve(true); } if ( typeof WebAssembly !== 'object' || typeof WebAssembly.instantiateStreaming !== 'function' ) { return Promise.resolve(false); } // The wasm code will work only if CPU is natively little-endian, // as we use native uint32 array in our js code. const uint32s = new Uint32Array(1); const uint8s = new Uint8Array(uint32s.buffer); uint32s[0] = 1; if ( uint8s[0] !== 1 ) { return Promise.resolve(false); } return fetch( workingDir + 'wasm/publicsuffixlist.wasm', { mode: 'same-origin' } ).then(response => { const pageCount = pslBuffer8 !== undefined ? pslBuffer8.byteLength + 0xFFFF >>> 16 : 1; memory = new WebAssembly.Memory({ initial: pageCount }); return WebAssembly.instantiateStreaming( response, { imports: { memory: memory } } ); }).then(({ instance }) => { const curPageCount = memory.buffer.byteLength; const newPageCount = pslBuffer8 !== undefined ? pslBuffer8.byteLength + 0xFFFF >>> 16 : 0; if ( newPageCount > curPageCount ) { memory.grow(newPageCount - curPageCount); } if ( pslBuffer32 !== undefined ) { const buf8 = new Uint8Array(memory.buffer); const buf32 = new Uint32Array(memory.buffer); buf32.set(pslBuffer32); pslBuffer8 = buf8; pslBuffer32 = buf32; } wasmMemory = memory; getPublicSuffixPosWASM = instance.exports.getPublicSuffixPos; getPublicSuffixPos = getPublicSuffixPosWASM; memory = undefined; return true; }).catch(reason => { console.info(reason); return false; }); }; })(); const disableWASM = function() { if ( getPublicSuffixPosWASM instanceof Function ) { getPublicSuffixPos = getPublicSuffixPosJS; getPublicSuffixPosWASM = undefined; } if ( wasmMemory === undefined ) { return; } if ( pslBuffer32 !== undefined ) { const buf8 = new Uint8Array(pslByteLength); const buf32 = new Uint32Array(buf8.buffer); buf32.set(pslBuffer32); pslBuffer8 = buf8; pslBuffer32 = buf32; } wasmMemory = undefined; }; /******************************************************************************/ context = context || window; context.publicSuffixList = { version: '2.0', parse, getDomain, getPublicSuffix, toSelfie, fromSelfie, disableWASM, enableWASM, }; if ( typeof module !== 'undefined' ) { module.exports = context.publicSuffixList; } else if ( typeof exports !== 'undefined' ) { exports = context.publicSuffixList; } /******************************************************************************/ // <<<<<<<< end of anonymous namespace })(this);