code review: improve scriptlet inject code in stream data listener

This commit is contained in:
Raymond Hill 2017-12-29 20:26:03 -05:00
parent ff67cf5ada
commit b36320c643
No known key found for this signature in database
GPG key ID: 25E1490B761470C2

View file

@ -576,10 +576,23 @@ var reMediaContentTypes = /^(?:audio|image|video)\//;
var filterDocument = (function() { var filterDocument = (function() {
var µb = µBlock, var µb = µBlock,
filterers = new Map(), filterers = new Map(),
reDoctype = /^\s*<!DOCTYPE\b[^>]+?>/,
domParser, xmlSerializer, domParser, xmlSerializer,
textDecoderCharset, textDecoder, textEncoder; textDecoderCharset, textDecoder, textEncoder;
// Purpose of following helper is to disconnect from watching the stream
// if all the following conditions are fulfilled:
// - Only need to inject scriptlets.
// - Charset of resource is utf-8.
// - A well-formed doc type declaration is found at the top.
//
// When all the conditions are fulfilled, then the scriptlets are safely
// injected after the doc type declaration, and the stream listener is
// disconnected, thus removing overhead from future streaming data.
//
// If at least one of the condition is not fulfilled and scriptlets need
// to be injected, it will be done the longer way when the whole stream
// has been collated in memory.
var streamJobDone = function(filterer, responseBytes) { var streamJobDone = function(filterer, responseBytes) {
if ( if (
filterer.scriptlets === undefined || filterer.scriptlets === undefined ||
@ -588,40 +601,51 @@ var filterDocument = (function() {
) { ) {
return false; return false;
} }
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder();
}
if ( textEncoder === undefined ) {
textEncoder = new TextEncoder();
}
// We need to insert after DOCTYPE, or else the browser may falls into // We need to insert after DOCTYPE, or else the browser may falls into
// quirks mode. // quirks mode.
var firstResponseBytes = new Uint8Array(responseBytes, 0, 512), var bb = new Uint8Array(responseBytes, 0, 256),
haystack = textDecoder.decode(firstResponseBytes), i = 0, b;
match = reDoctype.exec(haystack); // Skip BOM if present.
if ( match === null ) { return false; } if ( bb[0] === 0xEF && bb[1] === 0xBB && bb[2] === 0xBF ) { i += 3; }
filterers.delete(filterer.stream); // Scan for '<'
// Output bytes may be different than response bytes: the BOM sequence for (;;) {
// if present is removed by the decoder. b = bb[i++];
var firstOutputBytes = textEncoder.encode( if ( b === 0x3C /* '<' */ ) { break; }
haystack.slice(0, match.index + match[0].length) if ( b > 0x20 || b > 0x7F || i > 240 ) { return false; }
);
var insertAt = firstOutputBytes.byteLength;
// Mind BOM if present:
// https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8
if (
firstResponseBytes[0] === 0xEF &&
firstResponseBytes[0] === 0xBB &&
firstResponseBytes[0] === 0xBF
) {
insertAt += 3;
} }
filterer.stream.write(firstOutputBytes); // Test for '!DOCTYPE'
if (
bb[i++] !== 0x21 /* '!' */ ||
bb[i++] !== 0x44 /* 'D' */ ||
bb[i++] !== 0x4F /* 'O' */ ||
bb[i++] !== 0x43 /* 'C' */ ||
bb[i++] !== 0x54 /* 'T' */ ||
bb[i++] !== 0x59 /* 'Y' */ ||
bb[i++] !== 0x50 /* 'P' */ ||
bb[i++] !== 0x45 /* 'E' */
) {
return false;
}
// Scan for '>'.
var qcount = 0;
for (;;) {
b = bb[i++];
if ( b === 0x3E /* '>' */ ) { break; }
if ( b === 0x22 /* '"' */ || b === 0x27 /* "'" */ ) { qcount += 1; }
if ( b > 127 || i > 240 ) { return false; }
}
// Bail out if mismatched quotes.
if ( (qcount & 1) !== 0 ) { return false; }
// We found a valid insertion point.
if ( textEncoder === undefined ) { textEncoder = new TextEncoder(); }
filterer.stream.write(
new Uint8Array(responseBytes, 0, i)
);
filterer.stream.write( filterer.stream.write(
textEncoder.encode('<script>' + filterer.scriptlets + '</script>') textEncoder.encode('<script>' + filterer.scriptlets + '</script>')
); );
filterer.stream.write( filterer.stream.write(
new Uint8Array(responseBytes, insertAt) new Uint8Array(responseBytes, i)
); );
filterer.stream.disconnect(); filterer.stream.disconnect();
return true; return true;