程序集和 C++ 之间的段错误共享数组

Question

我正在编写一个在汇编和 C++ 之间具有共享状态的程序。我在程序集文件中声明了一个全局数组，并在 C++ 中的一个函数中访问了该数组。当我从 C++ 中调用该函数时，没有任何问题，但随后我从汇编中调用相同的函数时，出现了分段错误。我相信我在函数调用中保留了正确的寄存器。

奇怪的是，当我将 C++ 中的指针类型更改为 uint64_t 指针时，它会正确输出值，但在将其转换为 uint64_t 后再次出现分段错误。

在下面的代码中，一直报错的数组是currentCPUState。

//CPU.cpp
extern uint64_t currentCPUState[6];

extern "C" {
    void initInternalState(void* instructions, int indexSize);
    void printCPUState();
}

void printCPUState() {
    uint64_t b = currentCPUState[0];
    printf("%d\n", b);        //this line DOESNT crash ???
    std::cout << b << "\n"; //this line crashes

    //omitted some code for the sake of brevity

    std::cout << "\n";
}

CPU::CPU() {
    //set initial cpu state
    currentCPUState[AF] = 0;
    currentCPUState[BC] = 0;
    currentCPUState[DE] = 0;
    currentCPUState[HL] = 0;
    currentCPUState[SP] = 0;
    currentCPUState[PC] = 0;
    
    printCPUState(); //this has no issues

    initInternalState(instructions, sizeof(void*));
}

//cpu.s
.section .data
    .balign 8

    instructionArr:
        .space 8 * 1024, 0

    //stores values of registers
    //used for transitioning between C and ASM
    //uint64_t currentCPUState[6]
    .global currentCPUState
    currentCPUState:
        .quad 0, 0, 0, 0, 0, 0

.section .text
    .global initInternalState
    initInternalState:
        push %rdi
        push %rsi
        mov %rcx, %rdi
        mov %rdx, %rsi

        push %R12
        push %R13
        push %R14
        push %R15

        call initGBCpu

        pop %R15
        pop %R14
        pop %R13
        pop %R12

        pop %rsi
        pop %rdi
        ret

    //omitted unimportant code
    //initGBCpu(rdi: void* instructions, rsi:int size)
    //function initializes the array of opcodes
    initGBCpu:
        pushq %rdx
        //move each instruction into the array in proper order
        //also fill the instructionArr
        leaq instructionArr(%rip), %rdx

        addop inst0x00
        addop inst0x01
        addop inst0x02
        addop inst0x03
        addop inst0x04

        call loadCPUState
        call inst0x04   //inc BC
        call saveCPUState
        call printCPUState    //CRASHES HERE
        popq %rdx
        ret

其他详细信息： OS: Windows 64 位编译器 (MinGW64-w) 架构：x64

任何见解将不胜感激

编辑： addop 是一个宏：

//adds an opcode to the array of functions
.macro addop lbl
    leaq \lbl (%rip), %rcx
    mov %rcx, 0(%rdi)
    mov %rcx, 0(%rdx)
    add %rsi, %rdi
    add %rsi, %rdx
.endm

Answer 1

部分 x86-64 calling conventions 要求在调用函数之前必须将堆栈对齐到 16 字节边界。

调用函数后，一个8字节的return地址被压入栈中，因此必须向栈中添加另一个8字节的数据来满足这个对齐要求。否则，某些具有对齐要求的指令（如某些 SSE 指令）可能会崩溃。

假设应用了这样的调用约定，initGBCpu 函数看起来没问题，但是 initInternalState 函数必须在调用 [=12= 之前向堆栈添加一个 8 字节的东西] 函数。

例如：

    initInternalState:
        push %rdi
        push %rsi
        mov %rcx, %rdi
        mov %rdx, %rsi

        push %R12
        push %R13
        push %R14
        push %R15

        sub , %rsp // adjust stack allignment
        call initGBCpu
        add , %rsp // undo the stack pointer movement

        pop %R15
        pop %R14
        pop %R13
        pop %R12

        pop %rsi
        pop %rdi
        ret

程序集和 C++ 之间的段错误共享数组

Segfault sharing array between assembly and C++

c++

64-bit