尝试将此汇编代码转换为 C 代码

Question

# x at %ebp+8, n at %ebp+12
movl 8(%ebp), %esi
movl 12(%ebp), %ebx
movl $-1, %edi
movl , %edx
.L2:
movl %edx, %eax
andl %esi, %eax
xorl %eax, %edi
movl %ebx, %ecx
sall %cl, %edx
testl %edx, %edx
jne .L2
movl %edi, %eax

我把上面的代码转换成了下面的代码，但是我不完全确定它是否正确。

    int loop(int x, int n){

    int result = -1;

    for (mask = 1; mask >= result; mask = x&1) {

    result ^= n;

    }
return result;
}

x 和 n 是存储在 %ebp 内存中的两个整数，并被移动到注册表 %esi 和 %ebx。 Result 和 Mask 的值为 -1 和 1，这来自代码的第一部分我认为在 .L2: 之后循环开始，这就是我感到困惑的地方。最后返回结果 movl %edi, %eax

Answer 1

您的代码完全错误。你应该在发帖之前自己做一些测试。

首先，你代码中的mask没有在你的函数中声明。

然后，将mask声明为int后，函数loop将陷入无限循环，因为result不会通过[=20=变为正数].另一方面，除非 n 是 32 的倍数（包括零），否则汇编代码不会陷入死循环。

要将代码转换为汇编：

1。我是直接从汇编转C的。

注意我使用了无符号类型 uint32_t 因为

使用无符号类型，因为左移操作到有符号整数会导致未定义的行为当发生溢出或要移位的值为负时。
使用uint32_t，因为unsigned int的大小取决于环境，它可能小于32位，而这里使用的寄存器（%cl除外）是32位的长。

引自N1570 6.5.7 移位运算符：

4 The result of E1 << E2 is E1 left-shifted E2 bit positions; vacated bits are filled with zeros. If E1 has an unsigned type, the value of the result is E1 × 2^E2, reduced modulo one more than the maximum value representable in the result type. If E1 has a signed type and nonnegative value, and E1 × 2^E2 is representable in the result type, then that is the resulting value; otherwise, the behavior is undefined.

另请注意，必须包含 stdint.h 或 inttypes.h 才能使用 uint32_t。

在 80286 或更高版本的 x86 CPU 中，移位宽度被屏蔽为 5 位长。

uint32_t loop(uint32_t x, uint32_t n) {
    uint32_t esi = x;              /* movl 8(%ebp), %esi         */
    uint32_t ebx = n;              /* movl 12(%ebp), %ebx        */
    uint32_t edi = -1;             /* movl $-1, %edi             */
    uint32_t edx = 1;              /* movl , %edx              */
    uint32_t eax, ecx;
    do {                           /* .L2:                       */
        eax = edx;                 /* movl %edx, %eax            */
        eax &= esi;                /* andl %esi, %eax            */
        edi ^= eax;                /* xorl %eax, %edi            */
        ecx = ebx;                 /* movl %ebx, %ecx            */
        edx <<= (ecx & 0xff) & 31; /* sall %cl, %edx             */
    } while (edx != 0);            /* testl %edx, %edx ; jne .L2 */
    eax = edi;                     /* movl %edi, %eax            */
    return eax;
}

2。我引入了变量名来明确它们的作用。

uint32_t loop(uint32_t x, uint32_t n) {
    uint32_t result = -1;
    uint32_t mask = 1;
    uint32_t eax, ecx;
    do {
        eax = mask;
        eax &= x;
        result ^= eax;
        ecx = n;
        mask <<= (ecx & 0xff) & 31;
    } while (mask != 0);
    return result;
}

3。我合并了一些表达式。

uint32_t loop(uint32_t x, uint32_t n) {
    uint32_t result = -1;
    uint32_t mask = 1;
    do {
        result ^= mask & x;
        mask <<= n & 31;
    } while (mask != 0);
    return result;
}

4。我将 do 循环更改为 for 循环，因为您的尝试使用了它。

uint32_t loop(uint32_t x, uint32_t n) {
    uint32_t result = -1;
    uint32_t mask;
    for (mask = 1; mask != 0; mask <<= n & 31) {
        result ^= mask & x;
    }
    return result;
}

用于测试的完整代码和 demo:

#include <stdio.h>
#include <inttypes.h>
#include <limits.h>

__asm__ (
/* support both environments that does and doesn't add underscore before function name */
"loop_asm:\n"
"_loop_asm:\n"
"push %ebp\n"
"mov %esp, %ebp\n"
"push %esi\n"
"push %edi\n"
"push %ebx\n"

"# x at %ebp+8, n at %ebp+12\n"
"movl 8(%ebp), %esi\n"
"movl 12(%ebp), %ebx\n"
"movl $-1, %edi\n"
"movl , %edx\n"
".L2_test:\n" /* rename .L2 to .L2_test to avoid collision */
"movl %edx, %eax\n"
"andl %esi, %eax\n"
"xorl %eax, %edi\n"
"movl %ebx, %ecx\n"
"sall %cl, %edx\n"
"testl %edx, %edx\n"
"jne .L2_test\n"
"movl %edi, %eax\n"

"pop %ebx\n"
"pop %edi\n"
"pop %esi\n"
"leave\n"
"ret\n"
);

uint32_t loop_asm(uint32_t, uint32_t);

uint32_t loop_convert(uint32_t x, uint32_t n) {
    uint32_t result = -1;
    uint32_t mask;
    for (mask = 1; mask != 0; mask <<= n & 31) {
        result ^= mask & x;
    }
    return result;
}

int mask;
    int loop(int x, int n){

    int result = -1;

    for (mask = 1; mask >= result; mask = x&1) {

    result ^= n;

    }
return result;
}

int main(void) {
    int x, n;
    uint32_t raw, test, conv;
    int miss_count = 0;
    /* search for mismatch in some range */
    for (n = 1; n < 32; n++) {
        uint32_t x_test;
        for (x_test = 0; x_test < UINT32_C(100000); x_test++) {
            if (loop_asm(x, n) != loop_convert(x, n)) {
                printf("mismatch at x=%"PRIu32", n=%d\n", x_test, n);
                if (miss_count < INT_MAX) miss_count++;
            }
        }
    }
    printf("%d mismatch(es) found.\n", miss_count);
    /* print some examples */
    x = 100;
    n = 5;
    raw = loop_asm(x, n);
    conv = loop_convert(x, n);
    printf("loop_asm(%d, %d) = %"PRIu32"\n", x, n, raw);
    printf("loop_convert(%d, %d) = %"PRIu32"\n", x, n, conv);
    fflush(stdout);
    test = loop(x, n);
    printf("loop(%d, %d) = %"PRIu32"\n", x, n, test);
    return 0;
}

尝试将此汇编代码转换为 C 代码

Trying to convert this Assembly code to C code

x86

assembly

gcc

gnu-assembler