如何使用 node.js 和 cloudflare worker 在现有 HTML 响应中注入 javascript
How to inject javascript in existing HTML response with node.js and cloudflare workers
我有一个指向 GitBook 的虚荣心 URL。 GitBook 不支持插入任意 javascript 片段。目前 GitBook 只有 4 "integrations"。
我可以通过我自己的 VM 服务器路由来完成此操作,但我有 CloudFlare,我想试用 workers。 (CDN 边缘Javascript 运行)。
CloudFlare worker 环境使 header 注入非常容易,但没有明显的方法可以做到这一点。
使用 TransformStream 进行处理很重要,这样处理是异步的并且不需要内存缓冲(为了可扩展性和最小化 GC)- 只有 5 毫秒 CPU 时间预算。
概览:
- 要自己使用,请更改字符串
forHeadStart
、forHeadEnd
和 forBodyEnd
。
- 这种 deferredInjection 方法是推荐的方法,可以最大限度地减少工作人员的 CPU 时间。它更有效,因为它只需要解析 HTML 的开头。另一种方法需要为 headInjection 解析整个头部部分,如果您使用 bodyInjection,它实际上需要解析整个 html 响应。
- deferredInjection 方法的工作原理是将内容注入到 head 标记的开头,然后在 client-side 运行时将您的 HTML 内容部署到所需位置。
- 如果需要,您可以使用
headInjection
and/or bodyInjection
直接注入。取消注释相关代码,包括 injectScripts
中的代码,并设置将被编码的 tagBytes 的字符串。
- 此解决方案将仅解析 HTML 内容类型
- 此解决方案直接处理字节(而非字符串)以提高效率。正在搜索 end-tag 字符串的字节。
- 您可以定位更多 end-tag,但通常您不需要定位超过这两个
- 使用流处理数据(整个 HTML 字符串未缓存在内存中)。这降低了峰值内存使用并加快了第一个字节的时间。
- 处理结束标记位于文本读取边界上的罕见边缘情况。我相信每 ~1000 字节可能会出现一个边界(每个 TCP 数据包 1000-1500 字节),这可能会因 gzip 压缩而有所不同。
- 将注入解析代码分开,以便代码简单地转发其余代码。
- 如果不需要,您可以通过注释掉第二个 body-tag 注入器来禁用它 - 这将加快处理速度。
- 我已经为自己测试了这个确切的代码并且它有效。可能还有剩余的错误(取决于结束标记的位置,并且取决于您的服务器是否使用部分 html 模板(仅 body )回复)。我今天可能修好了一个 2019-06-28
代码
addEventListener('fetch', event => {
event.passThroughOnException();
event.respondWith(handleRequest(event.request))
})
/**
* Fetch and log a request
* @param {Request} request
*/
async function handleRequest(request) {
const response = await fetch(request);
var ctype = response.headers.get('content-type');
if (ctype.startsWith('text/html') === false)
return response; //Only parse html body
let { readable, writable } = new TransformStream();
let promise = injectScripts(response.body, writable);
return new Response(readable, response);
}
let encoder = new TextEncoder('utf-8');
let deferredInjection = function() {
let forHeadStart = `<script>var test = 1; //Start of head section</script>`;
let forHeadEnd = `<script>var test = 2; //End of head section</script>`;
let forBodyEnd = `<script>var test = 3; //End of body section</script><button>click</button>`;
let helper = `
${forHeadStart}
<script>
function appendHtmlTo(element, htmlContent) {
var temp = document.createElement('div');
temp.innerHTML = htmlContent;
while (temp.firstChild) {
element.appendChild(temp.firstChild);
};
}
let forHeadEnd = "${ btoa(forHeadEnd) }";
let forBodyEnd = "${ btoa(forBodyEnd) }";
if (forHeadEnd.length > 0) appendHtmlTo(document.head, atob(forHeadEnd));
if (forBodyEnd.length > 0) window.onload = function() {
appendHtmlTo(document.body, atob(forBodyEnd));
};
</script>
`;
return {
forInjection: encoder.encode(helper),
tagBytes: encoder.encode("<head>"),
insertAfterTag: true
};
}();
// let headInjection = {
// forInjection: encoder.encode("<script>var test = 1;</script>"),
// tagBytes: encoder.encode("</head>"), //case sensitive
// insertAfterTag: false
// };
// let bodyInjection = {
// forInjection: encoder.encode("<script>var test = 1;</script>"),
// tagBytes: encoder.encode("</body>"), //case sensitive
// insertAfterTag: false
// }
//console.log(bodyTagBytes);
encoder = null;
async function injectScripts(readable, writable) {
let processingState = {
readStream: readable,
writeStream: writable,
reader: readable.getReader(),
writer: writable.getWriter(),
leftOvers: null, //data left over after a closing tag is found
inputDone: false,
result: {charactersFound: 0, foundIndex: -1, afterHeadTag: -1} //Reused object for the duration of the request
};
await parseForInjection(processingState, deferredInjection);
//await parseForInjection(processingState, headInjection);
//await parseForInjection(processingState, bodyInjection);
await forwardTheRest(processingState);
}
///Return object will have foundIndex: -1, if there is no match, and no partial match at the end of the array
///If there is an exact match, return object will have charactersFound:(tagBytes.Length)
///If there is a partial match at the end of the array, return object charactersFound will be < (tagBytes.Length)
///The result object needs to be passed in to reduce Garbage Collection - we can reuse the object
function searchByteArrayChunkForClosingTag(chunk, tagBytes, result)
{
//console.log('search');
let searchStart = 0;
//console.log(tagBytes.length);
//console.log(chunk.length);
for (;;) {
result.charactersFound = 0;
result.foundIndex = -1;
result.afterHeadTag = -1;
//console.log(result);
let sweepIndex = chunk.indexOf(tagBytes[0], searchStart);
if (sweepIndex === -1)
return; //Definitely not found
result.foundIndex = sweepIndex;
sweepIndex++;
searchStart = sweepIndex; //where we start searching from next
result.charactersFound++;
result.afterHeadTag = sweepIndex;
//console.log(result);
for (let i = 1; i < tagBytes.length; i++)
{
if (sweepIndex === chunk.length) return; //Partial match
if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; } //Failed to match (even partially to boundary)
result.charactersFound++;
result.afterHeadTag = sweepIndex; //Because we work around the actual found tag in case it's across a boundary
}
if (result.charactersFound === tagBytes.length)
return; //Found
}
}
function continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result)
{
//console.log('continue');
//Finish the search (no need to check the last buffer at all)
//console.log('finish the search');
result.charactersFound = lastSplitResult.charactersFound; //We'll be building on the progress from the lastSplitResult
result.foundIndex = (-1 * result.charactersFound); //This won't be used, but a negative value is indicative of chunk spanning
let sweepIndex = 0;
result.afterHeadTag = 0;
for (let i = lastSplitResult.charactersFound; i < tagBytes.length; i++) //Zero-based
{
if (sweepIndex === chunk.length) return result; //So we support working on a chunk that's smaller than the tagBytes search size
if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; }
result.charactersFound++;
result.afterHeadTag = sweepIndex;
}
}
function continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
{
//console.log('continueOrNewSearch');
if (lastSplitResult == null)
searchByteArrayChunkForClosingTag(chunk, tagBytes, result);
else
{
continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result);
if (result.charactersFound === tagBytes.length)
return result;
else
return searchByteArrayChunkForClosingTag(chunk, tagBytes, result); //Keep searching onward
}
}
async function parseForInjection(processingState, injectionJob)
{
if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?
if (!injectionJob) return;
if (!injectionJob.tagBytes) return;
if (!injectionJob.forInjection) return;
let reader = processingState.reader;
let writer = processingState.writer;
let result = processingState.result;
let tagBytes = injectionJob.tagBytes;
//(reader, writer, tagBytes, forInjection)
let lastSplitResult = null;
let chunk = null;
processingState.inputDone = false;
for (;;) {
if (processingState.leftOvers)
{
chunk = processingState.leftOvers;
processingState.leftOvers = null;
}
else
{
let readerResult = await reader.read();
chunk = readerResult.value;
processingState.inputDone = readerResult.done;
}
if (processingState.inputDone) {
if (lastSplitResult !== null) {
//Very edge case: Somehow tagBytes is never found?
console.log('edge');
throw 'tag not found'; //Causing the system to fall back to the direct request
}
await writer.close();
return true;
}
//console.log(value.length);
continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
//console.log(result);
if (result.charactersFound === tagBytes.length) //Complete match
{
//Inject
//console.log('inject');
if (result.foundIndex > 0)
{
let partValue = chunk.slice(0, result.foundIndex);
//console.log(partValue);
await writer.write(partValue);
}
console.log('injected');
if (parseForInjection.insertAfterTag)
{
await writer.write(injectionJob.forInjection);
await writer.write(injectionJob.tagBytes);
}
else
{
await writer.write(injectionJob.tagBytes);
await writer.write(injectionJob.forInjection);
}
let remainder = chunk.slice(result.afterHeadTag, chunk.length - 1);
processingState.leftOvers = remainder;
lastSplitResult = null;
return;
}
if (lastSplitResult !== null)
{
//console.log('no match over boundary');
//The remainder wasn't found, so write the partial match from before (maybe `<` or `</`)
let failedLastBit = injectionJob.tagBytes.slice(0, lastSplitResult.charactersFound);
await writer.write(failedLastBit);
lastSplitResult = null;
}
if (result.charactersFound === 0)
{
//console.log('not found')
await writer.write(chunk);
continue;
}
if (result.charactersFound < tagBytes.length)
{
//console.log('boundary: ' + result.charactersFound);
lastSplitResult = result;
let partValue = chunk.slice(0, result.foundIndex);
//console.log(partValue);
await writer.write(partValue);
continue;
}
}
}
async function forwardTheRest(processingState)
{
try
{
if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?
if (processingState.leftOvers)
{
chunk = processingState.leftOvers;
await processingState.writer.write(chunk);
}
processingState.reader.releaseLock();
processingState.writer.releaseLock();
await processingState.readStream.pipeTo(processingState.writeStream);
//Should there be an explicit close method called? I couldn't find one
}
catch (e)
{
console.log(e);
}
}
直接使用 (utf-8) 字节的进一步说明:
- 仅使用字节值。这至少可以通过搜索字符的第一个独特的 utf-8 字节(< 128 和 > 192)来实现。但是在这种情况下,我们正在搜索由 lower-than-128 个字节组成的
</head>
,非常容易使用。
- 鉴于搜索 utf-8 的性质(这是最棘手的),这应该适用于 ['utf-8'、'utf8'、'iso-8859-1'、'us-ascii'] .您将需要更改代码段编码器以匹配。
- 这还没有经过全面测试。边界案对我没有触发。理想情况下,我们会有一个核心功能的测试平台
- 感谢 Kenton Varda 挑战我
- 请告诉我是否有 CloudFlare 工作人员在 forwardTheRest 函数中执行 pipeTo 的方法
- 您可能会发现
continueOrNewSearch
和两个 sub-functions 是跨块边界查找 multi-bytes 的有趣方法。直到边界,我们才计算找到了多少字节。没有必要保留这些字节(我们知道它们是什么)。然后在下一个块中,我们从中断的地方继续。我们总是在 header 周围切割数组缓冲区,并确保我们写入 header 字节(使用 tagBytes)
我有一个指向 GitBook 的虚荣心 URL。 GitBook 不支持插入任意 javascript 片段。目前 GitBook 只有 4 "integrations"。
我可以通过我自己的 VM 服务器路由来完成此操作,但我有 CloudFlare,我想试用 workers。 (CDN 边缘Javascript 运行)。
CloudFlare worker 环境使 header 注入非常容易,但没有明显的方法可以做到这一点。
使用 TransformStream 进行处理很重要,这样处理是异步的并且不需要内存缓冲(为了可扩展性和最小化 GC)- 只有 5 毫秒 CPU 时间预算。
概览:
- 要自己使用,请更改字符串
forHeadStart
、forHeadEnd
和forBodyEnd
。 - 这种 deferredInjection 方法是推荐的方法,可以最大限度地减少工作人员的 CPU 时间。它更有效,因为它只需要解析 HTML 的开头。另一种方法需要为 headInjection 解析整个头部部分,如果您使用 bodyInjection,它实际上需要解析整个 html 响应。
- deferredInjection 方法的工作原理是将内容注入到 head 标记的开头,然后在 client-side 运行时将您的 HTML 内容部署到所需位置。
- 如果需要,您可以使用
headInjection
and/orbodyInjection
直接注入。取消注释相关代码,包括injectScripts
中的代码,并设置将被编码的 tagBytes 的字符串。 - 此解决方案将仅解析 HTML 内容类型
- 此解决方案直接处理字节(而非字符串)以提高效率。正在搜索 end-tag 字符串的字节。
- 您可以定位更多 end-tag,但通常您不需要定位超过这两个
- 使用流处理数据(整个 HTML 字符串未缓存在内存中)。这降低了峰值内存使用并加快了第一个字节的时间。
- 处理结束标记位于文本读取边界上的罕见边缘情况。我相信每 ~1000 字节可能会出现一个边界(每个 TCP 数据包 1000-1500 字节),这可能会因 gzip 压缩而有所不同。
- 将注入解析代码分开,以便代码简单地转发其余代码。
- 如果不需要,您可以通过注释掉第二个 body-tag 注入器来禁用它 - 这将加快处理速度。
- 我已经为自己测试了这个确切的代码并且它有效。可能还有剩余的错误(取决于结束标记的位置,并且取决于您的服务器是否使用部分 html 模板(仅 body )回复)。我今天可能修好了一个 2019-06-28
代码
addEventListener('fetch', event => {
event.passThroughOnException();
event.respondWith(handleRequest(event.request))
})
/**
* Fetch and log a request
* @param {Request} request
*/
async function handleRequest(request) {
const response = await fetch(request);
var ctype = response.headers.get('content-type');
if (ctype.startsWith('text/html') === false)
return response; //Only parse html body
let { readable, writable } = new TransformStream();
let promise = injectScripts(response.body, writable);
return new Response(readable, response);
}
let encoder = new TextEncoder('utf-8');
let deferredInjection = function() {
let forHeadStart = `<script>var test = 1; //Start of head section</script>`;
let forHeadEnd = `<script>var test = 2; //End of head section</script>`;
let forBodyEnd = `<script>var test = 3; //End of body section</script><button>click</button>`;
let helper = `
${forHeadStart}
<script>
function appendHtmlTo(element, htmlContent) {
var temp = document.createElement('div');
temp.innerHTML = htmlContent;
while (temp.firstChild) {
element.appendChild(temp.firstChild);
};
}
let forHeadEnd = "${ btoa(forHeadEnd) }";
let forBodyEnd = "${ btoa(forBodyEnd) }";
if (forHeadEnd.length > 0) appendHtmlTo(document.head, atob(forHeadEnd));
if (forBodyEnd.length > 0) window.onload = function() {
appendHtmlTo(document.body, atob(forBodyEnd));
};
</script>
`;
return {
forInjection: encoder.encode(helper),
tagBytes: encoder.encode("<head>"),
insertAfterTag: true
};
}();
// let headInjection = {
// forInjection: encoder.encode("<script>var test = 1;</script>"),
// tagBytes: encoder.encode("</head>"), //case sensitive
// insertAfterTag: false
// };
// let bodyInjection = {
// forInjection: encoder.encode("<script>var test = 1;</script>"),
// tagBytes: encoder.encode("</body>"), //case sensitive
// insertAfterTag: false
// }
//console.log(bodyTagBytes);
encoder = null;
async function injectScripts(readable, writable) {
let processingState = {
readStream: readable,
writeStream: writable,
reader: readable.getReader(),
writer: writable.getWriter(),
leftOvers: null, //data left over after a closing tag is found
inputDone: false,
result: {charactersFound: 0, foundIndex: -1, afterHeadTag: -1} //Reused object for the duration of the request
};
await parseForInjection(processingState, deferredInjection);
//await parseForInjection(processingState, headInjection);
//await parseForInjection(processingState, bodyInjection);
await forwardTheRest(processingState);
}
///Return object will have foundIndex: -1, if there is no match, and no partial match at the end of the array
///If there is an exact match, return object will have charactersFound:(tagBytes.Length)
///If there is a partial match at the end of the array, return object charactersFound will be < (tagBytes.Length)
///The result object needs to be passed in to reduce Garbage Collection - we can reuse the object
function searchByteArrayChunkForClosingTag(chunk, tagBytes, result)
{
//console.log('search');
let searchStart = 0;
//console.log(tagBytes.length);
//console.log(chunk.length);
for (;;) {
result.charactersFound = 0;
result.foundIndex = -1;
result.afterHeadTag = -1;
//console.log(result);
let sweepIndex = chunk.indexOf(tagBytes[0], searchStart);
if (sweepIndex === -1)
return; //Definitely not found
result.foundIndex = sweepIndex;
sweepIndex++;
searchStart = sweepIndex; //where we start searching from next
result.charactersFound++;
result.afterHeadTag = sweepIndex;
//console.log(result);
for (let i = 1; i < tagBytes.length; i++)
{
if (sweepIndex === chunk.length) return; //Partial match
if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; } //Failed to match (even partially to boundary)
result.charactersFound++;
result.afterHeadTag = sweepIndex; //Because we work around the actual found tag in case it's across a boundary
}
if (result.charactersFound === tagBytes.length)
return; //Found
}
}
function continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result)
{
//console.log('continue');
//Finish the search (no need to check the last buffer at all)
//console.log('finish the search');
result.charactersFound = lastSplitResult.charactersFound; //We'll be building on the progress from the lastSplitResult
result.foundIndex = (-1 * result.charactersFound); //This won't be used, but a negative value is indicative of chunk spanning
let sweepIndex = 0;
result.afterHeadTag = 0;
for (let i = lastSplitResult.charactersFound; i < tagBytes.length; i++) //Zero-based
{
if (sweepIndex === chunk.length) return result; //So we support working on a chunk that's smaller than the tagBytes search size
if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; }
result.charactersFound++;
result.afterHeadTag = sweepIndex;
}
}
function continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
{
//console.log('continueOrNewSearch');
if (lastSplitResult == null)
searchByteArrayChunkForClosingTag(chunk, tagBytes, result);
else
{
continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result);
if (result.charactersFound === tagBytes.length)
return result;
else
return searchByteArrayChunkForClosingTag(chunk, tagBytes, result); //Keep searching onward
}
}
async function parseForInjection(processingState, injectionJob)
{
if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?
if (!injectionJob) return;
if (!injectionJob.tagBytes) return;
if (!injectionJob.forInjection) return;
let reader = processingState.reader;
let writer = processingState.writer;
let result = processingState.result;
let tagBytes = injectionJob.tagBytes;
//(reader, writer, tagBytes, forInjection)
let lastSplitResult = null;
let chunk = null;
processingState.inputDone = false;
for (;;) {
if (processingState.leftOvers)
{
chunk = processingState.leftOvers;
processingState.leftOvers = null;
}
else
{
let readerResult = await reader.read();
chunk = readerResult.value;
processingState.inputDone = readerResult.done;
}
if (processingState.inputDone) {
if (lastSplitResult !== null) {
//Very edge case: Somehow tagBytes is never found?
console.log('edge');
throw 'tag not found'; //Causing the system to fall back to the direct request
}
await writer.close();
return true;
}
//console.log(value.length);
continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
//console.log(result);
if (result.charactersFound === tagBytes.length) //Complete match
{
//Inject
//console.log('inject');
if (result.foundIndex > 0)
{
let partValue = chunk.slice(0, result.foundIndex);
//console.log(partValue);
await writer.write(partValue);
}
console.log('injected');
if (parseForInjection.insertAfterTag)
{
await writer.write(injectionJob.forInjection);
await writer.write(injectionJob.tagBytes);
}
else
{
await writer.write(injectionJob.tagBytes);
await writer.write(injectionJob.forInjection);
}
let remainder = chunk.slice(result.afterHeadTag, chunk.length - 1);
processingState.leftOvers = remainder;
lastSplitResult = null;
return;
}
if (lastSplitResult !== null)
{
//console.log('no match over boundary');
//The remainder wasn't found, so write the partial match from before (maybe `<` or `</`)
let failedLastBit = injectionJob.tagBytes.slice(0, lastSplitResult.charactersFound);
await writer.write(failedLastBit);
lastSplitResult = null;
}
if (result.charactersFound === 0)
{
//console.log('not found')
await writer.write(chunk);
continue;
}
if (result.charactersFound < tagBytes.length)
{
//console.log('boundary: ' + result.charactersFound);
lastSplitResult = result;
let partValue = chunk.slice(0, result.foundIndex);
//console.log(partValue);
await writer.write(partValue);
continue;
}
}
}
async function forwardTheRest(processingState)
{
try
{
if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?
if (processingState.leftOvers)
{
chunk = processingState.leftOvers;
await processingState.writer.write(chunk);
}
processingState.reader.releaseLock();
processingState.writer.releaseLock();
await processingState.readStream.pipeTo(processingState.writeStream);
//Should there be an explicit close method called? I couldn't find one
}
catch (e)
{
console.log(e);
}
}
直接使用 (utf-8) 字节的进一步说明:
- 仅使用字节值。这至少可以通过搜索字符的第一个独特的 utf-8 字节(< 128 和 > 192)来实现。但是在这种情况下,我们正在搜索由 lower-than-128 个字节组成的
</head>
,非常容易使用。 - 鉴于搜索 utf-8 的性质(这是最棘手的),这应该适用于 ['utf-8'、'utf8'、'iso-8859-1'、'us-ascii'] .您将需要更改代码段编码器以匹配。
- 这还没有经过全面测试。边界案对我没有触发。理想情况下,我们会有一个核心功能的测试平台
- 感谢 Kenton Varda 挑战我
- 请告诉我是否有 CloudFlare 工作人员在 forwardTheRest 函数中执行 pipeTo 的方法
- 您可能会发现
continueOrNewSearch
和两个 sub-functions 是跨块边界查找 multi-bytes 的有趣方法。直到边界,我们才计算找到了多少字节。没有必要保留这些字节(我们知道它们是什么)。然后在下一个块中,我们从中断的地方继续。我们总是在 header 周围切割数组缓冲区,并确保我们写入 header 字节(使用 tagBytes)