程序集 x86 中链表值的总和

Question

我正在尝试计算两个 linked 列表的总和值，每个列表代表一个数字。列表中的每个 link 包含 5 个字节。第一个字节表示 link 的值（4 位表示一个十六进制数字和 4 个零），其他 4 个字节是列表中下一个 link 的地址。我还分配了称为 "stack" 的字节，其中包含指向每个 linked 列表的第一个 link 的指针。每个列表中的第一个 link 是它所代表的数字的低位数字。我正在尝试计算堆栈中前两个列表的值。我迭代它们并考虑进位，然后每次迭代将结果推入堆栈并最终从堆栈中弹出结果并创建具有总和值的新列表。

代码被截断：

我的辅助功能：

section .data

    firstLink: dd 0
    stackSize: dd 0
    stackCurSizeBytes: dd 0
    stackPointer: dd 0
    stackCurSize: dd 0
    nextLink: dd 0
    data: db 0
    carry: db 0
    numPush: dd 0 

%macro addNewLink 1
    mov [add], %1
    push 5
    call malloc
    add esp, 4

    mov bl, [add]
    mov [eax], byte bl 
    mov edx, [firstLink]
    mov [eax+1], edx
    mov [firstLink], eax
    mov [add], byte 0

%endmacro


%macro getNext 1 ; get pointer to link and return pointer to next link
    mov eax, %1
    mov eax, [eax+1]
    mov [nextLink], eax
%endmacro

%macro pushLinkedList 0
    mov edx, [firstLink]
    mov ecx, [stackPointer]
    add ecx, [stackCurSizeBytes]
    mov [ecx], edx
    inc dword [stackCurSize]
    add [stackCurSizeBytes], dword 4
%endmacro

%macro getLinkData 1 ; gets pointer to link and return its data (byte)
    mov ebx, %1
    mov ebx, [ebx]
    mov [data], dword ebx
%endmacro

求和函数：

ratorPlus:
    cmp [stackCurSize], dword 1
    jle  argumentError

    mov edx, [stackPointer] ;edx has the top operand pointer
    add edx, [stackCurSizeBytes]
    sub edx, 4
    mov edx, [edx]

    dec dword [stackCurSize] ; dec stack
    sub [stackCurSizeBytes], dword 4 ; dec stack

    mov ecx, [stackPointer] ;ecx has the second top operand pointer
    add ecx, [stackCurSizeBytes]
    sub ecx, 4
    mov ecx, [ecx]

    dec dword [stackCurSize] ; dec stack
    sub [stackCurSizeBytes], dword 4 ; dec stack

    mov [firstLink], dword 0    ;initialize function params
    mov [numPush], dword 0
    mov [carry], byte 0

    checkLen3:
        cmp edx, dword 0x00
        je endFirstNumber3
        cmp ecx, dword 0x00
        je SecondEndedFirstNot3
        ;both numbers still not null
        getLinkData edx

        mov ebx, [data] ;ebx is the first list data
        mov [tempreg], dword ebx
        getLinkData ecx

        mov ebx, [tempreg]
        mov eax, [data] ;eax is the second list data

        add ebx, eax    ;preform plus and and stores the result in ebx
        add ebx, [carry]

        cmp ebx, 0xF
        jle noCary1
        mov [carry], byte 1
        sub ebx, 0x10
        jmp endCary1
        noCary1:
        mov [carry], byte 0
        jmp endCary1
        endCary1:
        push ebx
        inc dword [numPush]
        getNext edx
        mov edx, [nextLink]
        getNext ecx
        mov ecx, [nextLink]
        jmp checkLen3

    endFirstNumber3:    
    cmp ecx, dword 0x00
    je BothEnded3
    ;second is longer
        getLinkData ecx
        mov eax, [data] ;eax is the data
        add eax, [carry]
        cmp eax, 0xF
        jle noCary2
        mov [carry], byte 1
        sub eax, 0x10
        jmp endCary2
        noCary2:
        mov [carry], byte 0
        jmp endCary2
        endCary2:
        push eax
        inc dword [numPush]
        getNext ecx
        mov ecx, [nextLink]
        jmp checkLen3

    SecondEndedFirstNot3: ;first s longer
        getLinkData edx
        mov eax, [data] ;eax is the data
        add eax, [carry]
        cmp eax, 0xF
        jle noCary3
        mov [carry], byte 1
        sub eax, 0x10
        jmp endCary3
        noCary3:
        mov [carry], byte 0
        jmp endCary3
        endCary3:
        push eax
        inc dword [numPush]
        getNext edx
        mov edx, [nextLink]

        jmp checkLen3

    BothEnded3:

        cmp [carry], byte 1 ;both numbers ended
        jne BothEnded2Loop
        push 1
        inc dword [numPush]
        BothEnded2Loop:
        cmp [numPush], dword 0
        je endRatorPlus
        pop eax
        addNewLink al
        dec dword [numPush]
        jmp BothEnded2Loop

    endRatorPlus:
        mov [numPush], dword 0
        mov [carry], dword 0
        pushLinkedList
        jmp running

我认为这里的逻辑很简单，但不知何故它不能很好地工作，我也尝试使用 gdb 对其进行调试，但随后发生了奇怪的事情。

Answer 1

db 0只有1个字节，但ebx和eax是双字寄存器。您正在使用双字存储操作覆盖内存中的后续数据，和/或使用双字加载将垃圾加载到高字节中。使用 movzx eax, byte [symbol] 加载和零扩展一个字节。使用 mov [symbol], al 存储一个字节。您的代码中有几个地方的注释显示 "byte" 并且代码明确指定 "dword"...

仅作记录，一位数的容器是 just about the worst imaginable data structure for a BigInteger。使用链表而不是数组会使这个 much 更糟，而且非常愚蠢，因为大多数操作都会一次为一个数字分配或释放所有节点，因此将它们作为单独的分配使用指针会浪费大量时间 space.

也就是说，如果您的数字确实很大，使用调用堆栈暂时保存所有 N 位数字是行不通的，而且这是不必要的工作。

此外，您的代码过于复杂。根本不需要任何静态存储，几乎不需要任何堆栈 space。（只有一个指针）。您可以在计算新数字时即时 malloc。

我很好奇更高效的版本会是什么样子，所以我写了一个。 未测试。

;; UNTESTED
extern malloc

;; Node *llplus(const Node *L1, const Node *L2);
;; returns a pointer to the head of a newly-allocated linked list
global llplus
llplus:
    push    ebp
    push    ebx                 ; save call-preserved regs for locals that survive malloc
    push    esi
    push    edi
    mov     edi, [esp+16 + 4]        ; Node *L1
    mov     esi, [esp+16 + 8]        ; Node *L2
    sub     esp, 12                  ; align the stack for malloc, space for an arg a [esp] and locals

    xor     ebx, ebx                 ; carry  from last iteration
    lea     ebp, [esp+7]             ; dummy first node will get a pointer to the first real malloced node at [esp+8]

.mainloop:
    mov     dword [esp], 5
    call    malloc              ; malloc(5)
    mov     [ebp+1], eax        ; tail->next = p
    mov     ebp, eax            ; tail = p

    add     bl, [edi]
    add     bl, [esi]           ; sum = carry + L1->val + L2->val
    movzx   eax, bl
    shr     bl, 4               ; carry for next time = sum>>4
    and     al, 0xf
    mov     [ebp+0], al         ; tail->val = digit = sum & 0xf

    mov     edi, [edi+1]        ; L1 = L1->next
    mov     esi, [esi+1]
    test    edi, edi
    jz     .L1_end
    test    esi, esi
    jnz    .mainloop
    ; else fall through
.L2_end:                        ; do{ propagate carry to the end of L1
    ; another version of the same loop that only reads from one linked list
    mov     dword [esp], 5
    call    malloc
    mov     [ebp+1], eax        ; tail->next = p
    mov     ebp, eax            ; tail = p

    add     bl, [edi]           ; carry += L1->val
    movzx   eax, bl
    shr     bl, 4               ; carry for next time = sum>>4
    and     al, 0xf
    mov     [ebp+0], al         ; tail->val = digit = sum & 0xf

    mov     edi, [edi+1]
    test    edi, edi
    jnz     .L2_end             ; }while(L1 = L1->next);

    test    bl, bl
    jnz    .final_carry_out

.both_end:
    mov     dword [ebp+1], 0    ; tail->next = NULL
    mov     eax, [esp+8]        ; return dummy_head->next = first result node.
    add     esp, 12
    pop     edi
    pop     esi
    pop     ebx
    pop     ebp
    ret

.L1_end:
    test  esi, esi
    jz    .both_end

    mov   edi, esi       ; L1 = L2
    jmp   .L2_end        ; and use the other cleanup

.final_carry_out:
    mov     dword [esp], 5
    call    malloc
    mov     [ebp+1], eax      ; tail->next = p
    mov     byte [eax], bl    ; p->val = carry = 1
    mov     ebp, eax          ; tail = p
    jmp     .both_end

L2_end 循环重复与主循环相同的大部分代码。如果我们有一个 指向自身 并且有 val = 0 的虚拟链表节点，我们也许可以避免它。然后结束条件可以检查 NULL 或那个虚拟节点并跳回主循环。这可能会也可能不会节省整体静态代码大小，但会使分支更加复杂。

为了避免本地需要所有 4 个调用保留寄存器（包括 EBP），我们可以将迭代之间的进位保留在内存中。但是，如果 malloc 非常快，那可能会在存储/重新加载延迟方面造成瓶颈。（然后 EBX 可以自由地保存一个输出指针）。

请注意，将两个 4 位值相加（即使有进位）最多只能产生 5 位值，因此进位是结果的 1<<4 位。

.mainloop:
    mov     dword [esp], 5
    call    malloc              ; malloc(5)

    btr     [ebx], 4            ; CF = bit 4 of tail->val, and clear it
    mov     [ebx+1], eax        ; tail->next = p
    mov     ebp, eax            ; tail = p

    movzx   eax, [edi]
    adc     al, [esi]           ; sum = CF + L1->val + L2->val
    mov     [ebp+0], al         ; tail->val = sum = carry:digit, carry to be cleared later


    ...  ; after loops

    btr    byte [ebx], 4
    jc   .final_carry_out

btr 是位测试和复位。如果它在前一次迭代仅执行字节存储之后执行双字加载和存储，则可能会导致此处的存储转发停顿。如果你真的关心性能，你一开始就不会做这样的事情，或者你会使用 EBP 作为另一个指针。或者编写寄存器不那么差的 64 位代码。

程序集 x86 中链表值的总和

Sum of linked list values in assembly x86

x86

assembly

linked-list

bigint