如何使用最少的临时变量将 JS 代码解析为每行一个操作?

How to parse JS code into one-operation-per-line with fewest temp variables?

说我有这个 code:

// Walk GF(2^8)
var x = 0;
var xi = 0;
for (var i = 0; i < 256; i++) {
    // Compute sbox
    var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);
    sx = (sx >>> 8) ^ (sx & 0xff) ^ 0x63;
    SBOX[x] = sx;
    INV_SBOX[sx] = x;

    // Compute multiplication
    var x2 = d[x];
    var x4 = d[x2];
    var x8 = d[x4];

    // Compute sub bytes, mix columns tables
    var t = (d[sx] * 0x101) ^ (sx * 0x1010100);
    SUB_MIX_0[x] = (t << 24) | (t >>> 8);
    SUB_MIX_1[x] = (t << 16) | (t >>> 16);
    SUB_MIX_2[x] = (t << 8)  | (t >>> 24);
    SUB_MIX_3[x] = t;

    // Compute inv sub bytes, inv mix columns tables
    var t = (x8 * 0x1010101) ^ (x4 * 0x10001) ^ (x2 * 0x101) ^ (x * 0x1010100);
    INV_SUB_MIX_0[sx] = (t << 24) | (t >>> 8);
    INV_SUB_MIX_1[sx] = (t << 16) | (t >>> 16);
    INV_SUB_MIX_2[sx] = (t << 8)  | (t >>> 24);
    INV_SUB_MIX_3[sx] = t;

    // Compute next counter
    if (!x) {
        x = xi = 1;
    } else {
        x = x2 ^ d[d[d[x8 ^ x2]]];
        xi ^= d[d[xi]];
    }
}

我正在使用 meriyah 来解析 AST。但一般来说,由于这相当复杂,如果将每个操作都放在一行中,您如何计算出需要多少个临时变量?

所以循环的第一行:

var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);

它会变成这样:

var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
var sx = xi ^ (foo1) ^ (foo2) ^ (foo3) ^ (foo4);

成为类似的人:

var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
var foo6 = (foo3) ^ (foo4)
var foo7 = (foo2) ^ foo6
var foo8 = (foo1) ^ foo7
var sx = xi ^ foo8;

但更好的是:

var foo1 = xi << 1
var foo2 = xi << 2
var foo3 = xi << 3
var foo4 = xi << 4
foo4 = (foo3) ^ (foo4)
foo4 = (foo2) ^ foo4
foo4 = (foo1) ^ foo4
var sx = xi ^ foo4;

所以我们尽量减少我们创建的变量数量。你如何做到这一点,甚至以编程方式考虑它?

我尝试的代码行的 AST 在这里:

{
  "type": "Program",
  "sourceType": "script",
  "body": [
    {
      "type": "VariableDeclaration",
      "kind": "var",
      "declarations": [
        {
          "type": "VariableDeclarator",
          "id": {
            "type": "Identifier",
            "name": "sx"
          },
          "init": {
            "type": "BinaryExpression",
            "left": {
              "type": "BinaryExpression",
              "left": {
                "type": "BinaryExpression",
                "left": {
                  "type": "BinaryExpression",
                  "left": {
                    "type": "Identifier",
                    "name": "xi"
                  },
                  "right": {
                    "type": "BinaryExpression",
                    "left": {
                      "type": "Identifier",
                      "name": "xi"
                    },
                    "right": {
                      "type": "Literal",
                      "value": 1
                    },
                    "operator": "<<"
                  },
                  "operator": "^"
                },
                "right": {
                  "type": "BinaryExpression",
                  "left": {
                    "type": "Identifier",
                    "name": "xi"
                  },
                  "right": {
                    "type": "Literal",
                    "value": 2
                  },
                  "operator": "<<"
                },
                "operator": "^"
              },
              "right": {
                "type": "BinaryExpression",
                "left": {
                  "type": "Identifier",
                  "name": "xi"
                },
                "right": {
                  "type": "Literal",
                  "value": 3
                },
                "operator": "<<"
              },
              "operator": "^"
            },
            "right": {
              "type": "BinaryExpression",
              "left": {
                "type": "Identifier",
                "name": "xi"
              },
              "right": {
                "type": "Literal",
                "value": 4
              },
              "operator": "<<"
            },
            "operator": "^"
          }
        }
      ]
    }
  ]
}

您将如何以通用方式处理这种特定情况,以便可以将其推广并用于其他地方(需要更多工作)?

首先,var sx = xi ^ (xi << 1) ^ (xi << 2) ^ (xi << 3) ^ (xi << 4);

不需要那么多变量
var tmp1 = (xi << 4);

var tmp2 = (xi << 3);
tmp2 = tmp2 ^ tmp1;

tmp1 = (xi << 2);
tmp1 = tmp1 ^ tmp2;

tmp2 = (xi << 1);
tmp2 = tmp2 ^ tmp1;

tmp2 = xi ^ tmp2;

如果您将“结果”变量 sx 算作一个 non-temporary(因为它在代码中有一个声明),那么我们只剩下一个声明变量和一个临时变量。

在任何情况下,“长表达式”的 AST 看起来像这样(在表达式级别,我们不关心将 xi << 4 拆分为一个节点,因为它没有 sub-expressions ):

                ┌────────── ^ ───────┐
            ┌───  ^ ──────────┐     xi << 4
      ┌───  ^ ─────────┐     xi << 3
┌───  ^ ────┐         xi << 2
xi         xi << 1

要遍历树(tl;drPost-order树遍历):

  1. 我们转到最深的节点 (op: ^, left: xi, right: xi << 1) 并将表达式存储到一个临时变量中。 从技术上讲,您可以将其分为两步并首先处理正确的表达式。

    var tmp1 = xi << 1

    此节点中不再有 sub-expressions,因此我们可以继续“解决”级别 ,同时重复使用相同的临时变量

    tmp1 = xi ^ tmp1

  2. 转到parent节点。这个级别看起来像 op: ^, left: tmp1, right: xi << 2。做同样的事情,但我们必须创建一个新变量。

    tmp2 = xi << 2

    没有更多的表达式,所以:

    tmp1 = tmp1 ^ tmp2

    观察:我们现在可以丢弃tmp2,因为我们只需要一个变量来return结果。

  3. Parent 再次结点:op: ^, left: tmp1, right: xi << 3

    tmp2 = xi << 3

    tmp1 = tmp1 ^ tmp2

    观察:我们在技术上re-used来自child节点的tmp2

  4. 同样的事情:op: ^, left: tmp1, right: xi << 4

    tmp2 = xi << 4

    tmp1 = tmp1 ^ tmp2

  5. 完成,结果在tmp

这是一般逻辑。您必须牢记的几点:

  1. 一个关卡中可以有多个 sub-expression。例如,如果最深层次是 left: xi + 1, right: xi << 1。那么您将需要额外的临时变量。

  2. 你可以有两个以上的children。例如。 a > 0 ? a + 5 : a - 5; 或函数调用 f(a + 1, a + 2, a + 3)。条件表达式在技术上只能采用一种方式,因此不会使用其中一个变量,但我会考虑调用表达式。结合第 1 点,这会影响一个节点需要多少个临时变量。

  3. 以上两点可以结合起来复用变量。如果您必须在最低级别 (left: xi + 1, right: xi << 1) 使用 2 个临时工,那么您可以 re-use parent 节点中的第二个临时工。您可以保留一个计数器或一个已声明变量的列表。

  4. 注意逗号。

更新

这是一个适用于您的表达式的示例。要支持所有的JS表达式当然要困难得多:await表达式、函数表达式、spread等

const meriyah = require('meriyah');
const src = `var res = (!xi) ^ (xi << 1) ^ (xi << 2) ^ (xi << 3);`
const ast = meriyah.parse(src);
const longExpression = ast.body[0].declarations[0].init; // (!xi) ^ (xi << 1) ^ (xi << 2) ^ (xi << 3)

const state = {
    ops: [],
    nextTemp: 0,
    maxTempCount: 0,
};

function isSimple(node) {
    return node.type === 'Identifier' || node.type === 'Literal';
}

/**
 * @param {meriyah.ESTree.Expression} node
 */
function getChildren(node) {
    switch (node.type) {
        case 'BinaryExpression':
        case 'LogicalExpression':
            return [node.left, node.right];
        case 'CallExpression':
            return node.arguments;
        case 'UnaryExpression':
        case 'UpdateExpression':
            return [node.argument];
        default:
            throw new Error('Not supported.')
    }
}

/**
 * @param {meriyah.ESTree.Expression} node
 * @return string representation
 */
function simplify(node) {
    if (isSimple(node)) {
        // if it's a literal or identifier,
        // we don't need to do anything at all.
        // just return the string representation to print the ops
        return node.type === 'Literal'
                ? node.value
                : node.name;
    }

    // store current temp.
    // this will be reused so that we can "discard" any temps we create in this node
    const originalTemp = state.nextTemp;

    // simplify each of the children first.
    // for each child we'll need one temp var
    const children = getChildren(node);
    const simplified = children.map(node => {
        const stringRepr = simplify(node);
        state.nextTemp++;
        return stringRepr;
    });

    // update maximum temp variable count;
    state.maxTempCount = Math.max(state.nextTemp - 1, state.maxTempCount);
    // roll back the nextTemp;
    // the parent node will do nextTemp++ as needed for any further ops
    state.nextTemp = originalTemp;

    // and finally, do the operation for this node.
    // at this point, we reuse the original temp
    // e.g. tmp0 = op tpm0 tmp1 tmp2
    state.ops.push({op: node.operator || node.callee.name, args: simplified, temp: originalTemp});

    // the string representation for parent node operation
    return 'tmp' + state.nextTemp;
}

simplify(longExpression);

console.log('Vars needed: ', state.maxTempCount);
console.log(state.ops.map(({op, args, temp}) =>
    `tmp${temp} = ${op} ${args.join(' ')}`));

PS 欢迎反馈,堆栈溢出向导。我确定所有这些都是 well-trodden 地面。