如何在 JavaScript 中正确添加这两个模拟通用寄存器?

How can I properly add these two simulated General Purpose Registers in JavaScript?

在 JavaScript 中,为了进行一些研究,我试图从头开始模拟遵循一些 x86-64 汇编指令的过程。第一步是正确启动并能够使用任何两个寄存器作为运算符执行基本数学运算。由于通用寄存器中的每个较小的寄存器都是较大寄存器的一部分,因此我将 16 个 GPR 寄存器启动为 ArrayBuffers,然后使用 Register class 在 16 个缓冲区上创建正确的视图。

但是我的数学运算必须能够处理 64 位和更大的寄存器大小,所以我的 getOperand 方法试图创建一个 BigUint64Array,其中包含 ArrayBuffer 的任何部分不应包含在清零操作中。 BigUInt64Array 正在初始化为比应有的值大得多的值。

当你 运行 这个例子时你就会看到。我什至不确定我是否会这样做。有人可以解释一下改进它的最佳方法,或者我在做什么有什么问题吗?

注意:如果您 F12 开发控制台而不是 SO 呈现的日志,则此代码段中记录的类型化数组和缓冲区更容易阅读。

const registerContainers = {
    GPR: [],
}
for (let i = 0; i < 16; i++) {
    registerContainers.GPR.push(new ArrayBuffer(8))
}

const registers = {}

/**
 * Creates a new Uint8Array based on two different ArrayBuffers
 *
 * @private
 * @param {ArrayBuffers} buffer1 The first buffer.
 * @param {ArrayBuffers} buffer2 The second buffer.
 * @return {ArrayBuffers} The new ArrayBuffer created out of the two.
 */
function joinArrayBuffers(buffer1, buffer2) {
    var tmp = new Uint8Array(buffer1.byteLength + buffer2.byteLength);
    tmp.set(new Uint8Array(buffer1), 0);
    tmp.set(new Uint8Array(buffer2), buffer1.byteLength);
    console.log('joining array buffers:', buffer1, buffer2, ":", tmp.buffer)
    return tmp.buffer;
  };

function padArrayBufferTo64(arrBuffer){
    console.log('padding arrayBuffer to mult of 64 (8, 16, ...):', arrBuffer)
    if (arrBuffer.byteLength === 8) {
        console.log('arrBuffer.byteLength', arrBuffer.byteLength)
        return arrBuffer
    } else {
        const remainder = arrBuffer.byteLength % 8
        const arrBufferPad = new ArrayBuffer(remainder)
        console.log('joinArrayBuffers(arrBufferPad, arrBuffer).byteLength', joinArrayBuffers(arrBufferPad, arrBuffer).byteLength)
        return joinArrayBuffers(arrBufferPad, arrBuffer)
    }
}

class ArithmeticLogicUnit {
    constructor(){}
    add(args) {
        const operand1 = args.operands[0].getOperand()
        const operand2 = args.operands[1].getOperand()
        const target = args.target
        console.log(operand1, '+', operand2, '=', operand1 + operand2)
        target.set(operand1 + operand2)
    }
}

class Register {
    constructor(name, container, offset, bytes){
        registers[name] = this
        this.container = container
        this.bytes = bytes
        this.offset = offset
    }
    byteArray(){
        return new Uint8Array(this.container)
    }
    getOperand(){
        if (this.bytes === this.container.byteLength) return new BigUint64Array(padArrayBufferTo64(this.container))
        else return new BigUint64Array(padArrayBufferTo64(this.container.slice(this.offset, this.container.length)))
    }
    read(){
        if (this.bytes === this.container.byteLength) return this.byteArray()
        else return this.byteArray().subarray(this.offset, this.container.length)
    }
    readHexStr(){
        return Array.from(this.read()).map(x => {
            return x.toString(16).padStart(2, '0')
        }).join(' ');
    }
    set(value){
        console.log('register', this.read())
        console.log('value to set', value)
        return this.read().set(value)
    }
}

// # General Purpose Registers

/*

Note: Usage during syscall/function call:

    - First six arguments are in rdi, rsi, rdx, rcx, r8d, r9d; remaining arguments are on 
      the stack.
    - For syscalls, the syscall number is in rax.
    - Return value is in rax.
    - The called routine is expected to preserve rsp, rbp, rbx, r12, r13, r14, and r15 but
      may trample any other registers.

*/

/*

Note: Other usage of pointers:

    - AX multiply/divide, string load & store
    - BX index register for MOVE
    - CX count for string operations & shifts
    - DX port address for IN and OUT
    - SP points to top of the stack
    - BP points to base of the stack frame
    - SI points to a source in stream operations
    - DI points to a destination in stream operations

*/

// ## Data Registers

new Register('rax', registerContainers.GPR[0], 0, 8) // Register A Extended
new Register('eax', registerContainers.GPR[0], 4, 4)
new Register('ax',  registerContainers.GPR[0], 6, 2) // multiply/divide, string load & store
new Register('ah',  registerContainers.GPR[0], 6, 1)
new Register('al',  registerContainers.GPR[0], 7, 1)

new Register('rbx', registerContainers.GPR[1], 0, 8) // Register B Extended
new Register('ebx', registerContainers.GPR[1], 4, 4) 
new Register('bx',  registerContainers.GPR[1], 6, 2) // index register for MOVE
new Register('bh',  registerContainers.GPR[1], 6, 1)
new Register('bl',  registerContainers.GPR[1], 7, 1)

new Register('rcx', registerContainers.GPR[2], 0, 8) // Register C Extended
new Register('ecx', registerContainers.GPR[2], 4, 4)
new Register('cx',  registerContainers.GPR[2], 6, 2) // count for string operations & shifts
new Register('ch',  registerContainers.GPR[2], 6, 1)
new Register('cl',  registerContainers.GPR[2], 7, 1)

new Register('rdx', registerContainers.GPR[3], 0, 8) // Register D Extended
new Register('edx', registerContainers.GPR[3], 4, 4)
new Register('dx',  registerContainers.GPR[3], 6, 2) // port address for IN and OUT
new Register('dh',  registerContainers.GPR[3], 6, 1)
new Register('dl',  registerContainers.GPR[3], 7, 1)

// ## Pointer Registers

new Register('rsp', registerContainers.GPR[4], 0, 8)
new Register('esp', registerContainers.GPR[4], 4, 4)
new Register('sp',  registerContainers.GPR[4], 6, 2) // points to top of the stack
new Register('spl', registerContainers.GPR[4], 7, 1)

new Register('rbp', registerContainers.GPR[4], 0, 8)
new Register('ebp', registerContainers.GPR[4], 4, 4)
new Register('bp',  registerContainers.GPR[4], 6, 2) // points to base of the stack frame
new Register('bpl', registerContainers.GPR[4], 7, 1)

new Register('rsi', registerContainers.GPR[4], 0, 8)
new Register('esi', registerContainers.GPR[4], 4, 4)
new Register('si',  registerContainers.GPR[4], 6, 2) // points to a source in stream operations
new Register('sil', registerContainers.GPR[4], 7, 1)

new Register('rdi', registerContainers.GPR[4], 0, 8)
new Register('edi', registerContainers.GPR[4], 4, 4)
new Register('di',  registerContainers.GPR[4], 6, 2) // points to a destination in stream operations
new Register('dil', registerContainers.GPR[4], 7, 1)

new Register('r8',  registerContainers.GPR[4], 0, 8)
new Register('r8d', registerContainers.GPR[4], 4, 4)
new Register('r8w', registerContainers.GPR[4], 6, 2)
new Register('r8b', registerContainers.GPR[4], 7, 1)

new Register('r9',  registerContainers.GPR[4], 0, 8)
new Register('r9d', registerContainers.GPR[4], 4, 4)
new Register('r9w', registerContainers.GPR[4], 6, 2)
new Register('r9b', registerContainers.GPR[4], 7, 1)

new Register('r10',     registerContainers.GPR[4], 0, 8)
new Register('r10d',    registerContainers.GPR[4], 4, 4)
new Register('r10w',    registerContainers.GPR[4], 6, 2)
new Register('r10b',    registerContainers.GPR[4], 7, 1)

new Register('r11',     registerContainers.GPR[4], 0, 8)
new Register('r11d',    registerContainers.GPR[4], 4, 4)
new Register('r11w',    registerContainers.GPR[4], 6, 2)
new Register('r11b',    registerContainers.GPR[4], 7, 1)

new Register('r12',     registerContainers.GPR[4], 0, 8)
new Register('r12d',    registerContainers.GPR[4], 4, 4)
new Register('r12w',    registerContainers.GPR[4], 6, 2)
new Register('r12b',    registerContainers.GPR[4], 7, 1)

new Register('r13',     registerContainers.GPR[4], 0, 8)
new Register('r13d',    registerContainers.GPR[4], 4, 4)
new Register('r13w',    registerContainers.GPR[4], 6, 2)
new Register('r13b',    registerContainers.GPR[4], 7, 1)

new Register('r14',     registerContainers.GPR[4], 0, 8)
new Register('r14d',    registerContainers.GPR[4], 4, 4)
new Register('r14w',    registerContainers.GPR[4], 6, 2)
new Register('r14b',    registerContainers.GPR[4], 7, 1)

new Register('r15',     registerContainers.GPR[4], 0, 8)
new Register('r15d',    registerContainers.GPR[4], 4, 4)
new Register('r15w',    registerContainers.GPR[4], 6, 2)
new Register('r15b',    registerContainers.GPR[4], 7, 1)

const ALU = new ArithmeticLogicUnit()
registers.eax.set(Uint8Array.from([0x1f, 0x1f, 0x1f, 0x1f]))
console.log('updated eax', registers.eax.read())
console.log('updated eax', registers.eax.readHexStr())
registers.ebx.set(Uint8Array.from([0x1f, 0x1f, 0x1f, 0x1f]))
console.log('updated ebx', registers.ebx.read())
console.log('updated ebx', registers.ebx.readHexStr())
registers.ecx.set(Uint8Array.from([0x0, 0x0, 0x0, 0x0]))
console.log('updated ecx', registers.ecx.read())
console.log('updated ecx', registers.ecx.readHexStr())
ALU.add({
    operands: [registers.eax, registers.ebx],
    target: registers.ecx
})

不要搞得这么复杂。 joinArrayBufferspadArrayBufferTo64 非常低效,请注意缓冲区和类型化数组在 JS 中有相当多的开销 - 它们旨在保存大型二进制数据,而不是单个值,你应该尝试一次创建它们之后只有 read/write 给他们。

与其尝试对所有操作数使用 BigUint64Array 并在缓冲区中移动,我建议为较小的寄存器使用适当大小的类型化数组,并在访问后将数字转换为 bigint数组(如果你的所有 ALU 操作都需要 bigints - 32 位 ALU 可能更有效地实现)。

const gprBuffer = new ArrayBuffer(16 * 8);

class Register {
    constructor(type, offset) {
        this.container = new type(gprBuffer, offset, 1);
        this.byteArray = new Uint8Array(gprBuffer, offset, type.BYTES_PER_ELEMENT);
    }
}
class NumberRegister extends Register {
    constructor(type, offset) {
        super(type, offset);
        this.mod = 2n ** BigInt(8 * type.BYTES_PER_ELEMENT);
    }
    read() {
        return BigInt(this.container[0]);
    }
    write(val) {
        this.container[0] = Number(val % this.mod);
    }
}
class BigIntRegister extends Register {
    constructor(type, offset) {
        console.assert(type == BigUint64Array);
        super(type, offset);
    }
    read() {
        return this.container[0];
    }
    write(val) {
        this.container[0] = val;
    }
}

function makeRegister(base, bitsize, byteoffset) {
    const arrayType = {8: Uint8Array, 16: Uint16Array, 32: Uint32Array, 64: BigUint64Array}[bitsize];
    const registerType = bitsize > 53 ? BigIntRegister : NumberRegister
    return new registerType(arrayType, base * 8 + byteoffset);
}
const registers = {
    rax: makeRegister(0, 64, 0),
    eax: makeRegister(0, 32, 4),
    ax: makeRegister(0, 16, 6),
    ah: makeRegister(0, 8, 6),
    al: makeRegister(0, 8, 7),
    rbx: makeRegister(1, 64, 0),
    ebx: makeRegister(1, 32, 4),
    bx: makeRegister(1, 16, 6),
    bh: makeRegister(1, 8, 6),
    bl: makeRegister(1, 8, 7),
    // …
};
console.log(registers)

但是,请注意类型化数组具有任意字节序,您可能不希望模拟器出现这种情况。相反,考虑使用(单个)DataView 来控制字节顺序,并且可以使用不同的方法以任意偏移量将单个值写入 gprBuffer