二维数组是否需要事先知道它在 C 中的大小？

Question

比较这 2 个代码：

void foo(int rows, int cols, int **ar)
{
  printf("%d\n", ar[rows - 1][cols - 1]);
}

和

void foo(int rows, int cols, int ar[rows][cols])
{
  printf("%d\n", ar[rows - 1][cols - 1]);
}

为

int main()
{
  int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
  foo(3, 2, ar);
}

第一个foo，这里只有双指针，程序终止。第二个指定了尺寸，打印出正确的结果。这是为什么？数组不是作为指向函数的指针传递的吗？

根据汇编输出，两者都导致相同的结果。重点是计算距数组开头的偏移量。从程序集中，第一个 (1) 数字存储在 -32(%rbp)，而想要的结果 (6) 存储在 -12(%rbp)。因此，这两个程序集都会导致 -32(%rbp) + 20 的结果（涉及计算后）。

第一次组装：

.text
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    , %rsp   #,
    movl    %edi, -4(%rbp)  # rows, rows
    movl    %esi, -8(%rbp)  # cols, cols
    movq    %rdx, -16(%rbp) # ar, ar
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -4(%rbp), %eax  # rows, tmp92
    cltq
    salq    , %rax    #, _2
    leaq    -8(%rax), %rdx  #, _3
    movq    -16(%rbp), %rax # ar, tmp93
    addq    %rdx, %rax  # _3, _4
    movq    (%rax), %rdx    # *_4, _5
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -8(%rbp), %eax  # cols, tmp94
    cltq
    salq    , %rax    #, _7
    subq    , %rax    #, _8
    addq    %rdx, %rax  # _5, _9

# FINAL ADDRESS RESOLUTION (IN REGISTER %rax) IS `-32(%rbp) + 20` (WHICH IS CORRECT ADDRESS OF NUMBER `6`)

# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    (%rax), %eax    # *_9, _10
    movl    %eax, %esi  # _10,
    leaq    .LC0(%rip), %rdi    #,
    movl    [=13=], %eax    #,
    call    printf@PLT  #
# b.c:6: }
    nop 
    leave   
    ret 
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    , %rsp   #,
# b.c:9: {
    movq    %fs:40, %rax    # MEM[(<address-space-1> long unsigned int *)40B], tmp86
    movq    %rax, -8(%rbp)  # tmp86, D.2350
    xorl    %eax, %eax  # tmp86
# b.c:10:   int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
    movl    , -32(%rbp)   #, ar[0][0]
    movl    , -28(%rbp)   #, ar[0][1]
    movl    , -24(%rbp)   #, ar[1][0]
    movl    , -20(%rbp)   #, ar[1][1]
    movl    , -16(%rbp)   #, ar[2][0]
    movl    , -12(%rbp)   #, ar[2][1]
# b.c:11:   foo(3, 2, ar);
    leaq    -32(%rbp), %rax #, tmp84
    movq    %rax, %rdx  # tmp84,
    movl    , %esi    #,
    movl    , %edi    #,
    call    foo #
    movl    [=13=], %eax    #, _10
# b.c:12: }
    movq    -8(%rbp), %rcx  # D.2350, tmp87
    subq    %fs:40, %rcx    # MEM[(<address-space-1> long unsigned int *)40B], tmp87
    je  .L4 #,
    call    __stack_chk_fail@PLT    #
.L4:
    leave   
    ret

第二个组件是：

.text
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    pushq   %rbx    #
    subq    , %rsp   #,
    movl    %edi, -36(%rbp) # rows, rows
    movl    %esi, -40(%rbp) # cols, cols
    movq    %rdx, -48(%rbp) # ar, ar
# b.c:3: void foo(int rows, int cols, int ar[rows][cols])
    movl    -40(%rbp), %eax # cols, cols.0_6
    movslq  %eax, %rdx  # cols.0_6, _1
    subq    , %rdx    #, _2
# b.c:3: void foo(int rows, int cols, int ar[rows][cols])
    movq    %rdx, -24(%rbp) # _2, D.2346
    movslq  %eax, %rdx  # cols.0_6, _4
    movq    %rdx, %rcx  # _4, _5
    movl    [=14=], %ebx    #, _5
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -36(%rbp), %edx # rows, tmp99
    subl    , %edx    #, _9
    movslq  %edx, %rdx  # _9, _10
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    cltq
    imulq   %rdx, %rax  # _10, _12
    leaq    0(,%rax,4), %rdx    #, _13
    movq    -48(%rbp), %rax # ar, tmp100
    addq    %rax, %rdx  # tmp100, _14
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -40(%rbp), %eax # cols, tmp101
    subl    , %eax    #, _15
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    cltq
    movl    (%rdx,%rax,4), %eax # (*_14)[_15], _16

# AGAIN, THE FINAL ADDRESS RESOLUTION (IN REGISTER %eax) IS -32(%rbp) + 20` (WHICH IS CORRECT ADDRESS OF NUMBER `6`)

    movl    %eax, %esi  # _16,
    leaq    .LC0(%rip), %rdi    #,
    movl    [=14=], %eax    #,
    call    printf@PLT  #
# b.c:6: }
    nop 
    movq    -8(%rbp), %rbx  #,
    leave   
    ret 
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    , %rsp   #,
# b.c:9: {
    movq    %fs:40, %rax    # MEM[(<address-space-1> long unsigned int *)40B], tmp86
    movq    %rax, -8(%rbp)  # tmp86, D.2355
    xorl    %eax, %eax  # tmp86
# b.c:10:   int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
    movl    , -32(%rbp)   #, ar[0][0]
    movl    , -28(%rbp)   #, ar[0][1]
    movl    , -24(%rbp)   #, ar[1][0]
    movl    , -20(%rbp)   #, ar[1][1]
    movl    , -16(%rbp)   #, ar[2][0]
    movl    , -12(%rbp)   #, ar[2][1]
# b.c:11:   foo(3, 2, ar);
    leaq    -32(%rbp), %rax #, tmp84
    movq    %rax, %rdx  # tmp84,
    movl    , %esi    #,
    movl    , %edi    #,
    call    foo #
    movl    [=14=], %eax    #, _10
# b.c:12: }
    movq    -8(%rbp), %rcx  # D.2355, tmp87
    subq    %fs:40, %rcx    # MEM[(<address-space-1> long unsigned int *)40B], tmp87
    je  .L4 #,
    call    __stack_chk_fail@PLT    #
.L4:
    leave   
    ret

那么，为什么两个程序集都使用相同的地址来产生数字 6 而一个程序终止，另一个程序打印？

Answer 1

声明的数组

int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};

作为函数参数用在表达式中被转换为类型int ( * )[2]。类型 int ** 和 int ( * )[2] 是不兼容的指针类型。所以第一个函数调用是不正确的，函数将调用未定义的行为。

注意在两个函数调用中传递的地址相同，它是数组第一个元素的地址。

但是在第一个函数中，取消引用的指针 ar[rows - 1] 需要一个 int * 类型的指针，而在此内存中存储了数组第一个元素的值。

这里有一个演示程序。

#include <stdio.h>

void foo(int rows, int cols, int **ar)
{
    printf( "%p\n", ( void * )ar[rows - 1] );
}

int main(void) 
{
    int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
    foo(3, 2, ( int ** )ar);
  
    return 0;
}

它的输出可能看起来像

0x600000005

即数组元素在取消引用指针后被解释为指针 ar。因此，再次取消引用指针会导致访问任意内存。

汇编代码的生成方式是根据存储在内存中的对象的类型来解释内存和值。内存相同地址的不同类型导致生成不同的汇编代码。

Answer 2

int **ar:

                                       <----------cols------------------->
                                       +------+------+------+- ~ -+------+
                               ,------>| int  | int  | int  |     | int  |
+------+        ^ +------+    /        +------+------+------+- ~ -+------+
|int** +------->| | int* +---'         <----------cols------------------->
+------+        r +------+             +------+------+------+- ~ -+------+
                o | int* +------------>| int  | int  | int  |     | int  |
                w +------+             +------+------+------+- ~ -+------+
                s ~      ~             <----------cols------------------->
                | +------+             +------+------+------+- ~ -+------+
                | | int* +------------>| int  | int  | int  |     | int  |
                v +------*             +------+------+------+- ~ -+------+

int (*ar)[cols]:

                     <----------cols------------------->
+-------------+    ^ +------+------+------+- ~ -+------+
|int(*)[cols] +--->| | int  | int  | int  |     | int  |
+-------------+    r +------+------+------+- ~ -+------+
                   o | int  | int  | int  |     | int  |
                   w +------+------+------+- ~ -+------+
                   s ~      ~      ~      ~     ~      ~
                   | +------+------+------+- ~ -+------+
                   | | int  | int  | int  |     | int  |
                   v +------+------+------+- ~ -+------+

二维数组是否需要事先知道它在 C 中的大小？

Does 2D array need to know its size beforehand in C?

c

assembly

pointers

function-parameter

multidimensional-array