"Relocation truncated to fit" 尝试编写准系统 64 位内核时

Question

我正在尝试遵循 OSDev“Higher Half x86 Bare Bones”教程（在多次完成之后）并修改它以将内核发送到 PML4 的上半部分而不是上半部分一个 32 位页面 table。原因是教程中的语法混合：BB 的使用 GNU 语法，而那里唯一的 64 位教程使用不兼容的 MASM 语法。

到目前为止，我得到了这个 235 行的混乱：

# In 32-bit mode until we get to _start
.code32
# Declare constants for the multiboot header.
.set ALIGN,    1<<0             # align loaded modules on page boundaries
.set MEMINFO,  1<<1             # provide memory map
.set FLAGS,    ALIGN | MEMINFO  # this is the Multiboot 'flag' field
.set MAGIC,    0x1BADB002       # 'magic number' lets bootloader find the header
.set CHECKSUM, -(MAGIC + FLAGS) # checksum of above, to prove we are multiboot

# Declare a header as in the Multiboot Standard.
.section .multiboot
.align 4
.long MAGIC
.long FLAGS
.long CHECKSUM

.section .boot_stack, "aw", @nobits
.align 16
stack_bottom:
.skip 16384 # 16 KiB
stack_top:

.section .bss, "aw", @nobits

# 64-bit higher half page tables
  .align 4096
.global pml4_root
pml4_root:
  .skip 4096

.global pml4_pdptr
pml4_pdptr:
  .skip 4096

.global pml4_dir
pml4_dir:
  .skip 4096

.global pml4_bpt0
pml4_bpt0:
  .skip 4096
#TODO: PML5

#64-bit kernel GDT
.section .gdt
gdt_start:
null:
  .word 0xffff      #Limit
  .word 0       #Base (low)
  .byte 0       #Base (middle)
  .byte 0       #Access
  .byte 1       #Granularity
  .byte 0       #Base (high)
code:
  .word 0       #Limit
  .word 0       #Base (low)
  .byte 0       #Base (middle)
  .byte 0b10011010  #Access (755)
  .byte 0b10101111  #Granularity
  .byte 0       #Base (high)
data:
  .word 0       #Limit
  .word 0       #Base (low)
  .byte 0       #Base (middle)
  .byte 0b10010010  #Access (777)
  .byte 0b00000000  #Granularity
  .byte 0       #Base (high)
gdt_end:

.global gdtp
gdtp:
  .align 8
  .equ gdt_len, gdt_end - gdt_start - 1
  .equ gdt_addr, [=11=]xffff000000000000

# The kernel entry point.
.section .text

.global NoLongMode
NoLongMode:
  .ascii "Error\: Long Mode not detected"
  hlt
  loop NoLongMode #Infinite loop because we've got nothing better to do

.global NoCPUID
NoCPUID:
  .ascii "Error\: could not determine CPUID"
  hlt
  loop NoCPUID #Infinite loop because we've got nothing better to do

.global _start
.type _start, @function
_start:

setup_64:

    #Block interrupts until we have the IDT
    cli

    #CPUID: flags
    pushfl
    popl %eax

    #CPUID: compare
    movl %eax, %ecx

    #CPUID: ID bit
    xorl $(1<<21), %eax

    #FLAGS
    pushl %eax
    popfl
    pushfl
    popl %eax
    pushl %ecx
    popfl

    #If no CPUID functionality exists
    xorl %ecx, %eax
    jz NoCPUID
    ret

    #Long mode detection, part 1
    movl [=11=]x80000000, %eax
    cpuid
    cmpl [=11=]x80000001, %eax
    jb NoLongMode

    #Long mode detection, part 2
    movl [=11=]x80000001, %eax
    cpuid
    testl $(1<<29), %edx
    jz NoLongMode 

    #Temporarily disable paging until we've got it properly set up
    movl %cr0, %eax
    andl [=11=]b01111111111111111111111111111111, %eax
    movl %eax, %cr0

    #PAE
    movl %cr4, %eax
    orl $(1<<5), %eax
    movl %eax, %cr4

    #LM-bit
    movl [=11=]xC0000080, %ecx
    rdmsr
    orl $(1<<8), %eax
    wrmsr

    #Reenable paging
    movl %cr0, %eax
    orl $(1<<31), %eax
    movl %eax, %cr0

    #Clear all 32-bit registers to shut linker up
    movl [=11=], %eax
    movl [=11=], %ecx

    #GDT + LM jump
    lgdt (gdt_len)
    jmp longmode

    #Actually enter 64-bit mode for good
    .code64
longmode:       
    #Physical address of first boot page table
    movabsq $(pml4_bpt0 - 0xffff000000000000), %rdi #Physical address of first boot page table
    movabsq [=11=], %rsi #First address to map

    #64-bit entries are double the size of 32-bit entries but table size is the same
    movabsq 1, %rcx

1:
    #Kernel mapping
    cmpq $(_kernel_start - 0xffff000000000000), %rsi
    jl 2f
    cmpq $(_kernel_end - 0xffff000000000000), %rsi
    jge 3f

    #Map physical address space as present+writable
    movq %rsi, %rdx
    orq [=11=]x003, %rdx
    movq %rdx, (%rdi)

2:
    addq 96, %rsi  #page size in bytes
    addq , %rdi     #size of page entries
    loop 1b           #loop if unfinished

3:
    #Video memory location
    movabsq $(0x00000000000B8000 | 0x003), %rax
    movq %rax, pml4_bpt0 - 0xffff000000000000 + 511 * 8

    #Map first kernel page to the first kernel PDT
    movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_dir - 0xffff000000000000 + 0
    movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
    movq    %rax, pml4_dir - 0xffff000000000000 + 384 * 8

    #Map first kernel PDT to first kernel PDPT
    movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_pdptr - 0xffff000000000000 + 0
    movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_pdptr - 0xffff000000000000 + 384 * 8

    #Map first kernel PDPT to the PML4T
    movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_root - 0xffff000000000000 + 0
    movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_root - 0xffff000000000000 + 384 * 8

    #Set third control register to address of PML4T
    movabsq $(pml4_root - 0xffff000000000000), %rcx
    movq %rcx, %cr3

    #Jump to 64-bit higher half
    leaq 4f, %rcx
    jmpq *%rcx

4:
    #Reload PML4T along with all of its children, incl kernel pages
    movq %cr3, %rcx
    movq %rcx, %cr3
    movabsq $stack_top, %rsp

    #Self-explanatory
    callq kernel_main

    cli
5:  hlt
    jmp 5b

.size _start, . - _start

在我开始使用 movabs 等之前，它有很多链接器错误，这使链接器问题从大约 20 个减少到只有 1 个：

boot64.o: in function `longmode':
(.text+0x18b): relocation truncated to fit: R_X86_64_32S against `.text'
collect2: error: ld returned 1 exit status

如果链接器实际指定了行号来查找 ― 上的错误，这将很容易解决，但事实并非如此。因此，如果有人可以帮助找到有问题的行，我将不胜感激。

链接器脚本与教程中使用的脚本相同，只有一个例外（硬编码地址是 0xFFFF000000000000 而不是 0xC0000000），如果有帮助的话。

Answer 1

原始 32 位代码使用 lea 4f, %ecx / jmp *%ecx 将 EIP 设置为依赖于链接器脚本的绝对地址，而不是当前的 EIP. （lea 4f, %ecx 相当于 mov f, %ecx 的低效率，将 32 位绝对地址放入寄存器）

lea 4f, %rcx 只能使用适合 32 位符号扩展 disp32 寻址模式的绝对地址。（因为这就是 x86-64 寻址模式的工作方式）。这就是 relocation truncated to fit: R_X86_64_32S against `.text' 的意思：目标文件元数据中的 32S 重定位指定正确的绝对地址应编码为 32 位符号扩展值。但是由于您大概调整了链接描述文件以放置 . = 0xFFFF000000000000 而不是 . = 0xC0100000;，标签 4 的有效数字太多。

lea 4f(%rip), %rcx 会 assemble 但达不到目的；你也可以 jmp 4f 或只是 nop 或什么都不做。它根据链接器脚本计算相对于当前 RIP 的地址， 而不是 。如果您在调试器中单步查看 RIP，您会发现 RIP 不是您想要的这个建议。

你想要 movabs f, %rcx 它可以使用 64 位立即数来保存完整的 64 位地址。 间接跳转的目的是将 RIP 设置为一个已知的 absolute 高地址，所以你不能计算相对于当前 RIP 的地址。尽管 x86-64 使位置无关代码更容易，但您需要在这里避免位置无关的方法。

请记住，在此之前 jmp *%rcx，您的代码是从与您在链接描述文件中使用的内容不匹配的 RIP 执行的。 如果例如，您可以在 BOCHS 内置的调试器中单步执行它。

如果您将内核放在虚拟地址 space 顶部的 2GiB 范围内，lea 4f, %rcx 就可以正常工作。（但 mov f, %rcx 仍然会更好。） 7 字节 mov $sign_extended_imm32, %rcx 比 10 字节 movabs $imm64, %rcx 更有效；在其他条件相同的情况下，代码越小越好。

高半内核是极少数情况，其中 mov $sign_extended_imm32, %r64 是将静态地址放入寄存器的好选择；通常（在 bootstrap / 这样的设置代码之外）你通常需要一个 RIP 相关的 LEA。或者 mov $imm32, %r32 如果已知您的地址位于虚拟地址 space 的低 2GiB 中，例如在非 PIE Linux 可执行文件中的用户space。

让内核的静态 code/data 在虚拟地址顶部的 2GiB 范围内 space 也意味着您可以使用像 array(%rdx) 这样的寻址模式，其中 array 的地址被编码为符号扩展 disp32。所以它与 Linux 非 PIE 可执行文件相同，除了只有符号扩展的作品，而不是零扩展。

我建议按照@MichaelPetch 的建议进行操作，并使用 0xFFFFFFFF80000000 作为内核基地址。

顺便说一句，如果您知道图像在 jmp 之前的绝对虚拟地址运行，您可以使用具有较大负位移的直接相对 jmp rel32将 RIP 从小正值包裹到 2GiB "high half" 内。不过，不确定是否有一种简单的方法可以让链接器为您计算它，所以 mov $abs_address, %rcx / jmp *%rcx 肯定更容易，并且一旦您的内核启动并且运行。所以这里的代码大小只影响内核映像的总大小。

其他内容

    #Clear all 32-bit registers to shut the linker up
    movl [=10=], %eax
    movl [=10=], %ecx

什么？这是没有意义的。此外，如果您想将寄存器清零，xor %eax,%eax 是最佳方法。

    #64-bit GDT must be loaded BEFORE the switch to actual 64-bit address space ― see https://wiki.osdev.org/Creating_a_64-bit_kernel for more details
    lgdt (gdtp)

GAS 接受这一点，但内存操作数的标准语法只是简单的符号名称。 lgdt 没有什么特别之处，它仍然使用 ModR/M 寻址方式，就像 add gdtp, %eax 一样。 lgdt 从其内存操作数加载一个指针 + 长度。

lgdt gdtp 将是使用符号的绝对地址作为寻址模式的更标准语法。但是如果你喜欢 (symbol) 来提醒它是一个内存操作数，那也没关系。

您的其他一些代码看起来效率低下；使用大量绝对地址而不是简单的指针增量或偏移量。

"Relocation truncated to fit" 尝试编写准系统 64 位内核时

"Relocation truncated to fit" when attempting to code a barebones 64-bit kernel

assembly

gnu-assembler

osdev

其他内容