关于c中文字字符串的内存分配问题

Question about memory allocation for literal strings in c

我是一名刚开始使用 C 学习计算机系统的学生。

我对内存分配如何处理文字字符串有疑问。

我尝试使用以下代码分析字符串文字在内存系统中的分配方式:

#include <stdio.h>

int main(){
   char *p = "1";
   printf("%s\n", p);
   printf("%p\n", p);

   p = "2";
   printf("%s\n", p);
   printf("%p\n", p);

   p = "3";
   printf("%s\n", p);
   printf("%p\n", p);

   p = "4";
   printf("%s\n", p);
   printf("%p\n", p);
}

执行后,结果如下:

1
0000000000404000
2
000000000040400A
3
000000000040400C
4
000000000040400E

根据我学习的浅薄知识,以上所有字符串的大小都是2(包括\0),所以分配给它们的space也必须是2。

从字符串'2'到'4',按我的预期分配了内存,但是在'1'之后分配'2'时,系统产生了8位大小的差距。

这两个字符串之间有什么分配吗?或者系统有什么原因会这样分配字符串?

间距也包括 printf 格式字符串:

0000000000404000 "1[=10=]"
0000000000404002 "%s\n[=10=]"
0000000000404006 "%p\n[=10=]"
000000000040400A "2[=10=]"
000000000040400C "3[=10=]"
000000000040400E "4[=10=]"

请注意,字符串的出现顺序与它们在源代码中出现的顺序相同,但 printf 格式字符串不会在每次调用时重复。由于字符串相同,它们只需要在可执行文件中出现一次。

尽管位大小并不总是“8 位”,它取决于 OS。

我们可以将可执行文件诊断为代码段以获得其地址分配。

CentOS的结果如下,<main>部分的add操作显示了更多细节。

[root@localhost zz]# ./a.out 
1
0x4006e8
2
0x4006f8
3
0x400700
4
0x400708
[root@localhost zz]# objdump -d a.out

a.out:     file format elf64-littleaarch64


Disassembly of section .init:

0000000000400408 <_init>:
  400408:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
  40040c:       910003fd        mov     x29, sp
  400410:       94000032        bl      4004d8 <call_weak_fn>
  400414:       a8c17bfd        ldp     x29, x30, [sp],#16
  400418:       d65f03c0        ret

Disassembly of section .plt:

0000000000400420 <.plt>:
  400420:       a9bf7bf0        stp     x16, x30, [sp,#-16]!
  400424:       f00000f0        adrp    x16, 41f000 <__FRAME_END__+0x1e8f4>
  400428:       f947fe11        ldr     x17, [x16,#4088]
  40042c:       913fe210        add     x16, x16, #0xff8
  400430:       d61f0220        br      x17
  400434:       d503201f        nop
  400438:       d503201f        nop
  40043c:       d503201f        nop

0000000000400440 <__libc_start_main@plt>:
  400440:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400444:       f9400211        ldr     x17, [x16]
  400448:       91000210        add     x16, x16, #0x0
  40044c:       d61f0220        br      x17

0000000000400450 <__gmon_start__@plt>:
  400450:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400454:       f9400611        ldr     x17, [x16,#8]
  400458:       91002210        add     x16, x16, #0x8
  40045c:       d61f0220        br      x17

0000000000400460 <abort@plt>:
  400460:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400464:       f9400a11        ldr     x17, [x16,#16]
  400468:       91004210        add     x16, x16, #0x10
  40046c:       d61f0220        br      x17

0000000000400470 <puts@plt>:
  400470:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400474:       f9400e11        ldr     x17, [x16,#24]
  400478:       91006210        add     x16, x16, #0x18
  40047c:       d61f0220        br      x17

0000000000400480 <printf@plt>:
  400480:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400484:       f9401211        ldr     x17, [x16,#32]
  400488:       91008210        add     x16, x16, #0x20
  40048c:       d61f0220        br      x17

Disassembly of section .text:

0000000000400490 <_start>:
  400490:       d280001d        mov     x29, #0x0                       // #0
  400494:       d280001e        mov     x30, #0x0                       // #0
  400498:       910003fd        mov     x29, sp
  40049c:       aa0003e5        mov     x5, x0
  4004a0:       f94003e1        ldr     x1, [sp]
  4004a4:       910023e2        add     x2, sp, #0x8
  4004a8:       910003e6        mov     x6, sp
  4004ac:       580000a0        ldr     x0, 4004c0 <_start+0x30>
  4004b0:       580000c3        ldr     x3, 4004c8 <_start+0x38>
  4004b4:       580000e4        ldr     x4, 4004d0 <_start+0x40>
  4004b8:       97ffffe2        bl      400440 <__libc_start_main@plt>
  4004bc:       97ffffe9        bl      400460 <abort@plt>
  4004c0:       00400594        .word   0x00400594
  4004c4:       00000000        .word   0x00000000
  4004c8:       00400638        .word   0x00400638
  4004cc:       00000000        .word   0x00000000
  4004d0:       004006b8        .word   0x004006b8
  4004d4:       00000000        .word   0x00000000

00000000004004d8 <call_weak_fn>:
  4004d8:       f00000e0        adrp    x0, 41f000 <__FRAME_END__+0x1e8f4>
  4004dc:       f947f000        ldr     x0, [x0,#4064]
  4004e0:       b4000040        cbz     x0, 4004e8 <call_weak_fn+0x10>
  4004e4:       17ffffdb        b       400450 <__gmon_start__@plt>
  4004e8:       d65f03c0        ret
  4004ec:       00000000        .inst   0x00000000 ; undefined

00000000004004f0 <deregister_tm_clones>:
  4004f0:       90000100        adrp    x0, 420000 <__libc_start_main@GLIBC_2.17>
  4004f4:       9100e000        add     x0, x0, #0x38
  4004f8:       90000101        adrp    x1, 420000 <__libc_start_main@GLIBC_2.17>
  4004fc:       9100e021        add     x1, x1, #0x38
  400500:       eb00003f        cmp     x1, x0
  400504:       540000a0        b.eq    400518 <deregister_tm_clones+0x28>
  400508:       90000001        adrp    x1, 400000 <_init-0x408>
  40050c:       f9436c21        ldr     x1, [x1,#1752]
  400510:       b4000041        cbz     x1, 400518 <deregister_tm_clones+0x28>
  400514:       d61f0020        br      x1
  400518:       d65f03c0        ret
  40051c:       d503201f        nop

0000000000400520 <register_tm_clones>:
  400520:       90000100        adrp    x0, 420000 <__libc_start_main@GLIBC_2.17>
  400524:       9100e000        add     x0, x0, #0x38
  400528:       90000101        adrp    x1, 420000 <__libc_start_main@GLIBC_2.17>
  40052c:       9100e021        add     x1, x1, #0x38
  400530:       cb000021        sub     x1, x1, x0
  400534:       9343fc21        asr     x1, x1, #3
  400538:       8b41fc21        add     x1, x1, x1, lsr #63
  40053c:       9341fc21        asr     x1, x1, #1
  400540:       b40000c1        cbz     x1, 400558 <register_tm_clones+0x38>
  400544:       90000002        adrp    x2, 400000 <_init-0x408>
  400548:       f9437042        ldr     x2, [x2,#1760]
  40054c:       b4000062        cbz     x2, 400558 <register_tm_clones+0x38>
  400550:       d61f0040        br      x2
  400554:       d503201f        nop
  400558:       d65f03c0        ret
  40055c:       d503201f        nop

0000000000400560 <__do_global_dtors_aux>:
  400560:       a9be7bfd        stp     x29, x30, [sp,#-32]!
  400564:       910003fd        mov     x29, sp
  400568:       f9000bf3        str     x19, [sp,#16]
  40056c:       90000113        adrp    x19, 420000 <__libc_start_main@GLIBC_2.17>
  400570:       3940e260        ldrb    w0, [x19,#56]
  400574:       35000080        cbnz    w0, 400584 <__do_global_dtors_aux+0x24>
  400578:       97ffffde        bl      4004f0 <deregister_tm_clones>
  40057c:       52800020        mov     w0, #0x1                        // #1
  400580:       3900e260        strb    w0, [x19,#56]
  400584:       f9400bf3        ldr     x19, [sp,#16]
  400588:       a8c27bfd        ldp     x29, x30, [sp],#32
  40058c:       d65f03c0        ret

0000000000400590 <frame_dummy>:
  400590:       17ffffe4        b       400520 <register_tm_clones>

0000000000400594 <main>:
  400594:       a9be7bfd        stp     x29, x30, [sp,#-32]!
  400598:       910003fd        mov     x29, sp
  40059c:       90000000        adrp    x0, 400000 <_init-0x408>
  4005a0:       911ba000        add     x0, x0, #0x6e8
  4005a4:       f9000fa0        str     x0, [x29,#24]
  4005a8:       f9400fa0        ldr     x0, [x29,#24]
  4005ac:       97ffffb1        bl      400470 <puts@plt>
  4005b0:       90000000        adrp    x0, 400000 <_init-0x408>
  4005b4:       911bc000        add     x0, x0, #0x6f0
  4005b8:       f9400fa1        ldr     x1, [x29,#24]
  4005bc:       97ffffb1        bl      400480 <printf@plt>
  4005c0:       90000000        adrp    x0, 400000 <_init-0x408>
  4005c4:       911be000        add     x0, x0, #0x6f8
  4005c8:       f9000fa0        str     x0, [x29,#24]
  4005cc:       f9400fa0        ldr     x0, [x29,#24]
  4005d0:       97ffffa8        bl      400470 <puts@plt>
  4005d4:       90000000        adrp    x0, 400000 <_init-0x408>
  4005d8:       911bc000        add     x0, x0, #0x6f0
  4005dc:       f9400fa1        ldr     x1, [x29,#24]
  4005e0:       97ffffa8        bl      400480 <printf@plt>
  4005e4:       90000000        adrp    x0, 400000 <_init-0x408>
  4005e8:       911c0000        add     x0, x0, #0x700
  4005ec:       f9000fa0        str     x0, [x29,#24]
  4005f0:       f9400fa0        ldr     x0, [x29,#24]
  4005f4:       97ffff9f        bl      400470 <puts@plt>
  4005f8:       90000000        adrp    x0, 400000 <_init-0x408>
  4005fc:       911bc000        add     x0, x0, #0x6f0
  400600:       f9400fa1        ldr     x1, [x29,#24]
  400604:       97ffff9f        bl      400480 <printf@plt>
  400608:       90000000        adrp    x0, 400000 <_init-0x408>
  40060c:       911c2000        add     x0, x0, #0x708
  400610:       f9000fa0        str     x0, [x29,#24]
  400614:       f9400fa0        ldr     x0, [x29,#24]
  400618:       97ffff96        bl      400470 <puts@plt>
  40061c:       90000000        adrp    x0, 400000 <_init-0x408>
  400620:       911bc000        add     x0, x0, #0x6f0
  400624:       f9400fa1        ldr     x1, [x29,#24]
  400628:       97ffff96        bl      400480 <printf@plt>
  40062c:       52800000        mov     w0, #0x0                        // #0
  400630:       a8c27bfd        ldp     x29, x30, [sp],#32
  400634:       d65f03c0        ret

0000000000400638 <__libc_csu_init>:
  400638:       a9bc7bfd        stp     x29, x30, [sp,#-64]!
  40063c:       910003fd        mov     x29, sp
  400640:       a901d7f4        stp     x20, x21, [sp,#24]
  400644:       f00000f4        adrp    x20, 41f000 <__FRAME_END__+0x1e8f4>
  400648:       f00000f5        adrp    x21, 41f000 <__FRAME_END__+0x1e8f4>
  40064c:       91380294        add     x20, x20, #0xe00
  400650:       9137e2b5        add     x21, x21, #0xdf8
  400654:       a902dff6        stp     x22, x23, [sp,#40]
  400658:       cb150294        sub     x20, x20, x21
  40065c:       f9001ff8        str     x24, [sp,#56]
  400660:       9343fe94        asr     x20, x20, #3
  400664:       2a0003f6        mov     w22, w0
  400668:       aa0103f7        mov     x23, x1
  40066c:       aa0203f8        mov     x24, x2
  400670:       97ffff66        bl      400408 <_init>
  400674:       b4000194        cbz     x20, 4006a4 <__libc_csu_init+0x6c>
  400678:       f9000bb3        str     x19, [x29,#16]
  40067c:       d2800013        mov     x19, #0x0                       // #0
  400680:       f8737aa3        ldr     x3, [x21,x19,lsl #3]
  400684:       aa1803e2        mov     x2, x24
  400688:       aa1703e1        mov     x1, x23
  40068c:       2a1603e0        mov     w0, w22
  400690:       91000673        add     x19, x19, #0x1
  400694:       d63f0060        blr     x3
  400698:       eb13029f        cmp     x20, x19
  40069c:       54ffff21        b.ne    400680 <__libc_csu_init+0x48>
  4006a0:       f9400bb3        ldr     x19, [x29,#16]
  4006a4:       a941d7f4        ldp     x20, x21, [sp,#24]
  4006a8:       a942dff6        ldp     x22, x23, [sp,#40]
  4006ac:       f9401ff8        ldr     x24, [sp,#56]
  4006b0:       a8c47bfd        ldp     x29, x30, [sp],#64
  4006b4:       d65f03c0        ret

00000000004006b8 <__libc_csu_fini>:
  4006b8:       d65f03c0        ret

Disassembly of section .fini:

00000000004006bc <_fini>:
  4006bc:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
  4006c0:       910003fd        mov     x29, sp
  4006c4:       a8c17bfd        ldp     x29, x30, [sp],#16
  4006c8:       d65f03c0        ret