如何使用最少的临时变量将 JS 代码解析为每行一个操作?
How to parse JS code into one-operation-per-line with fewest temp variables?
说我有这个 code:
// Walk GF(2^8)
var x = 0;
var xi = 0;
for (var i = 0; i < 256; i++) {
// Compute sbox
var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);
sx = (sx >>> 8) ^ (sx & 0xff) ^ 0x63;
SBOX[x] = sx;
INV_SBOX[sx] = x;
// Compute multiplication
var x2 = d[x];
var x4 = d[x2];
var x8 = d[x4];
// Compute sub bytes, mix columns tables
var t = (d[sx] * 0x101) ^ (sx * 0x1010100);
SUB_MIX_0[x] = (t << 24) | (t >>> 8);
SUB_MIX_1[x] = (t << 16) | (t >>> 16);
SUB_MIX_2[x] = (t << 8) | (t >>> 24);
SUB_MIX_3[x] = t;
// Compute inv sub bytes, inv mix columns tables
var t = (x8 * 0x1010101) ^ (x4 * 0x10001) ^ (x2 * 0x101) ^ (x * 0x1010100);
INV_SUB_MIX_0[sx] = (t << 24) | (t >>> 8);
INV_SUB_MIX_1[sx] = (t << 16) | (t >>> 16);
INV_SUB_MIX_2[sx] = (t << 8) | (t >>> 24);
INV_SUB_MIX_3[sx] = t;
// Compute next counter
if (!x) {
x = xi = 1;
} else {
x = x2 ^ d[d[d[x8 ^ x2]]];
xi ^= d[d[xi]];
}
}
我正在使用 meriyah 来解析 AST。但一般来说,由于这相当复杂,如果将每个操作都放在一行中,您如何计算出需要多少个临时变量?
所以循环的第一行:
var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);
它会变成这样:
var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
var sx = xi ^ (foo1) ^ (foo2) ^ (foo3) ^ (foo4);
成为类似的人:
var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
var foo6 = (foo3) ^ (foo4)
var foo7 = (foo2) ^ foo6
var foo8 = (foo1) ^ foo7
var sx = xi ^ foo8;
但更好的是:
var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
foo4 = (foo3) ^ (foo4)
foo4 = (foo2) ^ foo4
foo4 = (foo1) ^ foo4
var sx = xi ^ foo4;
所以我们尽量减少我们创建的变量数量。你如何做到这一点,甚至以编程方式考虑它?
我尝试的代码行的 AST 在这里:
{
"type": "Program",
"sourceType": "script",
"body": [
{
"type": "VariableDeclaration",
"kind": "var",
"declarations": [
{
"type": "VariableDeclarator",
"id": {
"type": "Identifier",
"name": "sx"
},
"init": {
"type": "BinaryExpression",
"left": {
"type": "BinaryExpression",
"left": {
"type": "BinaryExpression",
"left": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 1
},
"operator": "<<"
},
"operator": "^"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 2
},
"operator": "<<"
},
"operator": "^"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 3
},
"operator": "<<"
},
"operator": "^"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 4
},
"operator": "<<"
},
"operator": "^"
}
}
]
}
]
}
您将如何以通用方式处理这种特定情况,以便可以将其推广并用于其他地方(需要更多工作)?
首先,var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);
不需要那么多变量
var tmp1 = (xi << 4);
var tmp2 = (xi << 3);
tmp2 = tmp2 ^ tmp1;
tmp1 = (xi << 2);
tmp1 = tmp1 ^ tmp2;
tmp2 = (xi << 1);
tmp2 = tmp2 ^ tmp1;
tmp2 = xi ^ tmp2;
如果您将“结果”变量 sx
算作一个 non-temporary(因为它在代码中有一个声明),那么我们只剩下一个声明变量和一个临时变量。
在任何情况下,“长表达式”的 AST 看起来像这样(在表达式级别,我们不关心将 xi << 4
拆分为一个节点,因为它没有 sub-expressions ):
┌────────── ^ ───────┐
┌─── ^ ──────────┐ xi << 4
┌─── ^ ─────────┐ xi << 3
┌─── ^ ────┐ xi << 2
xi xi << 1
要遍历树(tl;drPost-order树遍历):
我们转到最深的节点 (op: ^, left: xi, right: xi << 1
) 并将表达式存储到一个临时变量中。 从技术上讲,您可以将其分为两步并首先处理正确的表达式。
var tmp1 = xi << 1
此节点中不再有 sub-expressions,因此我们可以继续“解决”级别 ,同时重复使用相同的临时变量。
tmp1 = xi ^ tmp1
转到parent节点。这个级别看起来像 op: ^, left: tmp1, right: xi << 2
。做同样的事情,但我们必须创建一个新变量。
tmp2 = xi << 2
没有更多的表达式,所以:
tmp1 = tmp1 ^ tmp2
观察:我们现在可以丢弃tmp2
,因为我们只需要一个变量来return结果。
Parent 再次结点:op: ^, left: tmp1, right: xi << 3
tmp2 = xi << 3
tmp1 = tmp1 ^ tmp2
观察:我们在技术上re-used来自child节点的tmp2
。
同样的事情:op: ^, left: tmp1, right: xi << 4
tmp2 = xi << 4
tmp1 = tmp1 ^ tmp2
完成,结果在tmp
这是一般逻辑。您必须牢记的几点:
一个关卡中可以有多个 sub-expression。例如,如果最深层次是 left: xi + 1, right: xi << 1
。那么您将需要额外的临时变量。
你可以有两个以上的children。例如。 a > 0 ? a + 5 : a - 5;
或函数调用 f(a + 1, a + 2, a + 3)
。条件表达式在技术上只能采用一种方式,因此不会使用其中一个变量,但我会考虑调用表达式。结合第 1 点,这会影响一个节点需要多少个临时变量。
以上两点可以结合起来复用变量。如果您必须在最低级别 (left: xi + 1, right: xi << 1
) 使用 2 个临时工,那么您可以 re-use parent 节点中的第二个临时工。您可以保留一个计数器或一个已声明变量的列表。
注意逗号。
更新
这是一个适用于您的表达式的示例。要支持所有的JS表达式当然要困难得多:await表达式、函数表达式、spread等
const meriyah = require('meriyah');
const src = `var res = (!xi) ^ (xi << 1) ^ (xi << 2) ^ (xi << 3);`
const ast = meriyah.parse(src);
const longExpression = ast.body[0].declarations[0].init; // (!xi) ^ (xi << 1) ^ (xi << 2) ^ (xi << 3)
const state = {
ops: [],
nextTemp: 0,
maxTempCount: 0,
};
function isSimple(node) {
return node.type === 'Identifier' || node.type === 'Literal';
}
/**
* @param {meriyah.ESTree.Expression} node
*/
function getChildren(node) {
switch (node.type) {
case 'BinaryExpression':
case 'LogicalExpression':
return [node.left, node.right];
case 'CallExpression':
return node.arguments;
case 'UnaryExpression':
case 'UpdateExpression':
return [node.argument];
default:
throw new Error('Not supported.')
}
}
/**
* @param {meriyah.ESTree.Expression} node
* @return string representation
*/
function simplify(node) {
if (isSimple(node)) {
// if it's a literal or identifier,
// we don't need to do anything at all.
// just return the string representation to print the ops
return node.type === 'Literal'
? node.value
: node.name;
}
// store current temp.
// this will be reused so that we can "discard" any temps we create in this node
const originalTemp = state.nextTemp;
// simplify each of the children first.
// for each child we'll need one temp var
const children = getChildren(node);
const simplified = children.map(node => {
const stringRepr = simplify(node);
state.nextTemp++;
return stringRepr;
});
// update maximum temp variable count;
state.maxTempCount = Math.max(state.nextTemp - 1, state.maxTempCount);
// roll back the nextTemp;
// the parent node will do nextTemp++ as needed for any further ops
state.nextTemp = originalTemp;
// and finally, do the operation for this node.
// at this point, we reuse the original temp
// e.g. tmp0 = op tpm0 tmp1 tmp2
state.ops.push({op: node.operator || node.callee.name, args: simplified, temp: originalTemp});
// the string representation for parent node operation
return 'tmp' + state.nextTemp;
}
simplify(longExpression);
console.log('Vars needed: ', state.maxTempCount);
console.log(state.ops.map(({op, args, temp}) =>
`tmp${temp} = ${op} ${args.join(' ')}`));
PS 欢迎反馈,堆栈溢出向导。我确定所有这些都是 well-trodden 地面。
说我有这个 code:
// Walk GF(2^8)
var x = 0;
var xi = 0;
for (var i = 0; i < 256; i++) {
// Compute sbox
var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);
sx = (sx >>> 8) ^ (sx & 0xff) ^ 0x63;
SBOX[x] = sx;
INV_SBOX[sx] = x;
// Compute multiplication
var x2 = d[x];
var x4 = d[x2];
var x8 = d[x4];
// Compute sub bytes, mix columns tables
var t = (d[sx] * 0x101) ^ (sx * 0x1010100);
SUB_MIX_0[x] = (t << 24) | (t >>> 8);
SUB_MIX_1[x] = (t << 16) | (t >>> 16);
SUB_MIX_2[x] = (t << 8) | (t >>> 24);
SUB_MIX_3[x] = t;
// Compute inv sub bytes, inv mix columns tables
var t = (x8 * 0x1010101) ^ (x4 * 0x10001) ^ (x2 * 0x101) ^ (x * 0x1010100);
INV_SUB_MIX_0[sx] = (t << 24) | (t >>> 8);
INV_SUB_MIX_1[sx] = (t << 16) | (t >>> 16);
INV_SUB_MIX_2[sx] = (t << 8) | (t >>> 24);
INV_SUB_MIX_3[sx] = t;
// Compute next counter
if (!x) {
x = xi = 1;
} else {
x = x2 ^ d[d[d[x8 ^ x2]]];
xi ^= d[d[xi]];
}
}
我正在使用 meriyah 来解析 AST。但一般来说,由于这相当复杂,如果将每个操作都放在一行中,您如何计算出需要多少个临时变量?
所以循环的第一行:
var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);
它会变成这样:
var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
var sx = xi ^ (foo1) ^ (foo2) ^ (foo3) ^ (foo4);
成为类似的人:
var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
var foo6 = (foo3) ^ (foo4)
var foo7 = (foo2) ^ foo6
var foo8 = (foo1) ^ foo7
var sx = xi ^ foo8;
但更好的是:
var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
foo4 = (foo3) ^ (foo4)
foo4 = (foo2) ^ foo4
foo4 = (foo1) ^ foo4
var sx = xi ^ foo4;
所以我们尽量减少我们创建的变量数量。你如何做到这一点,甚至以编程方式考虑它?
我尝试的代码行的 AST 在这里:
{
"type": "Program",
"sourceType": "script",
"body": [
{
"type": "VariableDeclaration",
"kind": "var",
"declarations": [
{
"type": "VariableDeclarator",
"id": {
"type": "Identifier",
"name": "sx"
},
"init": {
"type": "BinaryExpression",
"left": {
"type": "BinaryExpression",
"left": {
"type": "BinaryExpression",
"left": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 1
},
"operator": "<<"
},
"operator": "^"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 2
},
"operator": "<<"
},
"operator": "^"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 3
},
"operator": "<<"
},
"operator": "^"
},
"right": {
"type": "BinaryExpression",
"left": {
"type": "Identifier",
"name": "xi"
},
"right": {
"type": "Literal",
"value": 4
},
"operator": "<<"
},
"operator": "^"
}
}
]
}
]
}
您将如何以通用方式处理这种特定情况,以便可以将其推广并用于其他地方(需要更多工作)?
首先,var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);
var tmp1 = (xi << 4);
var tmp2 = (xi << 3);
tmp2 = tmp2 ^ tmp1;
tmp1 = (xi << 2);
tmp1 = tmp1 ^ tmp2;
tmp2 = (xi << 1);
tmp2 = tmp2 ^ tmp1;
tmp2 = xi ^ tmp2;
如果您将“结果”变量 sx
算作一个 non-temporary(因为它在代码中有一个声明),那么我们只剩下一个声明变量和一个临时变量。
在任何情况下,“长表达式”的 AST 看起来像这样(在表达式级别,我们不关心将 xi << 4
拆分为一个节点,因为它没有 sub-expressions ):
┌────────── ^ ───────┐
┌─── ^ ──────────┐ xi << 4
┌─── ^ ─────────┐ xi << 3
┌─── ^ ────┐ xi << 2
xi xi << 1
要遍历树(tl;drPost-order树遍历):
我们转到最深的节点 (
op: ^, left: xi, right: xi << 1
) 并将表达式存储到一个临时变量中。 从技术上讲,您可以将其分为两步并首先处理正确的表达式。var tmp1 = xi << 1
此节点中不再有 sub-expressions,因此我们可以继续“解决”级别 ,同时重复使用相同的临时变量。
tmp1 = xi ^ tmp1
转到parent节点。这个级别看起来像
op: ^, left: tmp1, right: xi << 2
。做同样的事情,但我们必须创建一个新变量。tmp2 = xi << 2
没有更多的表达式,所以:
tmp1 = tmp1 ^ tmp2
观察:我们现在可以丢弃
tmp2
,因为我们只需要一个变量来return结果。Parent 再次结点:
op: ^, left: tmp1, right: xi << 3
tmp2 = xi << 3
tmp1 = tmp1 ^ tmp2
观察:我们在技术上re-used来自child节点的
tmp2
。同样的事情:
op: ^, left: tmp1, right: xi << 4
tmp2 = xi << 4
tmp1 = tmp1 ^ tmp2
完成,结果在
tmp
这是一般逻辑。您必须牢记的几点:
一个关卡中可以有多个 sub-expression。例如,如果最深层次是
left: xi + 1, right: xi << 1
。那么您将需要额外的临时变量。你可以有两个以上的children。例如。
a > 0 ? a + 5 : a - 5;
或函数调用f(a + 1, a + 2, a + 3)
。条件表达式在技术上只能采用一种方式,因此不会使用其中一个变量,但我会考虑调用表达式。结合第 1 点,这会影响一个节点需要多少个临时变量。以上两点可以结合起来复用变量。如果您必须在最低级别 (
left: xi + 1, right: xi << 1
) 使用 2 个临时工,那么您可以 re-use parent 节点中的第二个临时工。您可以保留一个计数器或一个已声明变量的列表。注意逗号。
更新
这是一个适用于您的表达式的示例。要支持所有的JS表达式当然要困难得多:await表达式、函数表达式、spread等
const meriyah = require('meriyah');
const src = `var res = (!xi) ^ (xi << 1) ^ (xi << 2) ^ (xi << 3);`
const ast = meriyah.parse(src);
const longExpression = ast.body[0].declarations[0].init; // (!xi) ^ (xi << 1) ^ (xi << 2) ^ (xi << 3)
const state = {
ops: [],
nextTemp: 0,
maxTempCount: 0,
};
function isSimple(node) {
return node.type === 'Identifier' || node.type === 'Literal';
}
/**
* @param {meriyah.ESTree.Expression} node
*/
function getChildren(node) {
switch (node.type) {
case 'BinaryExpression':
case 'LogicalExpression':
return [node.left, node.right];
case 'CallExpression':
return node.arguments;
case 'UnaryExpression':
case 'UpdateExpression':
return [node.argument];
default:
throw new Error('Not supported.')
}
}
/**
* @param {meriyah.ESTree.Expression} node
* @return string representation
*/
function simplify(node) {
if (isSimple(node)) {
// if it's a literal or identifier,
// we don't need to do anything at all.
// just return the string representation to print the ops
return node.type === 'Literal'
? node.value
: node.name;
}
// store current temp.
// this will be reused so that we can "discard" any temps we create in this node
const originalTemp = state.nextTemp;
// simplify each of the children first.
// for each child we'll need one temp var
const children = getChildren(node);
const simplified = children.map(node => {
const stringRepr = simplify(node);
state.nextTemp++;
return stringRepr;
});
// update maximum temp variable count;
state.maxTempCount = Math.max(state.nextTemp - 1, state.maxTempCount);
// roll back the nextTemp;
// the parent node will do nextTemp++ as needed for any further ops
state.nextTemp = originalTemp;
// and finally, do the operation for this node.
// at this point, we reuse the original temp
// e.g. tmp0 = op tpm0 tmp1 tmp2
state.ops.push({op: node.operator || node.callee.name, args: simplified, temp: originalTemp});
// the string representation for parent node operation
return 'tmp' + state.nextTemp;
}
simplify(longExpression);
console.log('Vars needed: ', state.maxTempCount);
console.log(state.ops.map(({op, args, temp}) =>
`tmp${temp} = ${op} ${args.join(' ')}`));
PS 欢迎反馈,堆栈溢出向导。我确定所有这些都是 well-trodden 地面。