在 64 位内核中读取较高半地址的页面错误
Page faults with reads of higher half addresses in 64-bit kernel
我正在用 Rust 和 NASM 汇编器编写一个 64 位高半内核。我正在使用 Multiboot2 (GRUB2) 兼容的引导加载程序来初始加载我的内核。当我的内核在 QEMU 中为 运行 时,我遇到页面错误错误(0x0e
异常),我不明白为什么。我遇到的问题是在到达用 Rust 编写的代码之前出现在我的汇编代码中。
我正在设置分页以便内存看起来像:
0000000000000000: 0000000000000000 --PDA---W
0000000000200000: 0000000000200000 --P-----W
ffffff0000000000: 0000000000000000 --P-----W
ffffff7f80000000: 0000000000000000 X-P------
(这既是我的意图,也是 QEMU 的 info mem
的结果)
table 看起来像:
p4: # pml4
0o000 <- p3_low | PRESENT | WRITABLE
0o776 <- p3_hgh | PRESENT | WRITABLE
p3_low: # pdpte
0o000 <- p2_low | PRESENT | WRITABLE
p3_hgh: # pdpte
0o000 <- p2_krn | PRESENT | WRITABLE
0o667 <- p2_mbi | PRESENT | WRITABLE
p2_low: # pde
0o000 <- 0o000000_000_000_000_000_0000 | PRESENT | WRITABLE | PAGESIZE
0o001 <- 0o000000_000_000_001_000_0000 | PRESENT | WRITABLE | PAGESIZE
p2_krn: # pde
0o000 <- 0o000000_000_000_000_000_0000 | PRESENT | WRITABLE | PAGESIZE
p2_mbi: # pde
0o000 <- 0o000000_000_000_000_000_0000 | PRESENT | PAGESIZE | NOEXEC
其他一切都归零。
我项目中的相关代码在这些文件中:
macros64.asm
:
%macro pte_write 4
mov rax, %4
or rax, %3
mov qword [%1+8*%2], rax
%endmacro
paging64.asm
:
extern kernel_start
extern kernel_end
p_present equ (1<<0)
p_writable equ (1<<1)
p_user equ (1<<2)
p_pagesize equ (1<<7)
p_noexec equ (1<<63)
[section .text]
enable_paging:
; Calculate start and end address of the multiboot2 info structure.
mov r9, rdi
mov r10, r9
add r10d, dword [r9]
and r9, 0xfffffffffffff000
shr r10, 12
inc r10
shl r10, 12
; Clear out all the page tables.
movaps xmm1, [blank]
mov rcx, page_tables_start
.clear_page_tables_loop:
movaps [rcx], xmm1
add rcx, 16
cmp rcx, page_tables_end
jl .clear_page_tables_loop
; TODO Uncomment the recursive page mappings once things actually work -- for now, they just make "info tlb" in QEMU annoying to read.
; Fill out the P4 table.
pte_write p4, 0o000, p3_low, p_present | p_writable
pte_write p4, 0o776, p3_hgh, p_present | p_writable
; pte_write p4, 0o777, p4, p_present | p_writable | p_noexec
; Fill out the P3 tables.
pte_write p3_low, 0o000, p2_low, p_present | p_writable
; pte_write p3_low, 0o777, p3_low, p_present | p_writable | p_noexec
pte_write p3_hgh, 0o000, p2_krn, p_present | p_writable
pte_write p3_hgh, 0o776, p2_mbi, p_present | p_writable
; pte_write p3_hgh, 0o777, p3_hgh, p_present | p_writable | p_noexec
; Identity map the lowest 2MiB.
pte_write p2_low, 0o000, 0o000000_000_000_000_000_0000, p_present | p_writable | p_pagesize
pte_write p2_low, 0o001, 0o000000_000_000_001_000_0000, p_present | p_writable | p_pagesize
; pte_write p2_low, 0o777, p2_low, p_present | p_writable | p_noexec
; Map the kernel.
xor rcx, rcx
mov rsi, kernel_start
.kernel_loop:
pte_write p2_krn, rcx, rsi, p_present | p_writable | p_pagesize
inc rcx
add rsi, 0o000000_000_000_001_000_0000
cmp rsi, kernel_end
jb .kernel_loop
; Map the multiboot2 information structure.
xor rcx, rcx
mov rsi, r9
.mbi_loop:
pte_write p2_mbi, rcx, rsi, p_present | p_pagesize | p_noexec
inc rcx
add rsi, 0o000000_000_000_001_000_0000
cmp rsi, r10
jb .mbi_loop
; Load the new page table. We don't need to flush the TLB because we moved into CR3.
mov rax, p4
mov cr3, rax
; Return.
ret
[section .data]
align 0x10
blank: times 0x10 db 0x00
[section .bss]
alignb 4096
page_tables_start:
p4: resb 4096
p3_low: resb 4096
p3_hgh: resb 4096
p2_low: resb 4096
p2_krn: resb 4096
p2_mbi: resb 4096
page_tables_end:
start64.asm
:
bits 64
extern kmain
global start64
%include "macros64.asm"
%include "paging64.asm"
[section .text]
;; The entry point for 64-bit code. We expect the address of the multiboot2
;; info structure in rdi.
start64:
; Save the address of the multiboot2 info structure.
push rdi
; Clear interrupts. If we get an interrupt before we have an IDT, we'll
; triple fault. We can re-enable it from Rust, later.
cli
; Nuke the segment registers.
mov rax, 0x10
mov ss, ax
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
; Set up paging.
call enable_paging
; The first argument to kmain is the multiboot2 info structure. We need to
; adjust the address to the new higher-half location.
pop rdi
mov rax, 0xffffff7f80000000
add rdi, rax
; DEBUG
mov dword [0xb8004], 0xf021f021
mov rbx, [rdi]
mov dword [0xb8000], 0xf021f021
hlt
; Call kmain. It's more than 4GiB away, so we have to do an indirect call.
mov rax, kmain
call rax
; kmain should never return; call halt if it does.
jmp halt
halt:
; Write "kexit?!?" to the upper right corner.
mov dword [0xb8000], 0x4f654f6b
mov dword [0xb8004], 0x4f694f78
mov dword [0xb8008], 0x4f3f4f74
mov dword [0xb800c], 0x4f3f4f21
; Disable interrupts and halt.
cli
hlt
; Just in case... something? happens.
jmp halt
我将新页面 table 移动到 CR3 后,执行继续正确。但是,一旦我尝试从 start64.asm
中的高端内存中读取一个值,我就会遇到页面错误。故障出现在这条线上:
mov rbx, [rdi]
之前的行 mov dword [0xb8004], 0xf021f021
正确地将 !!
写入屏幕。 [rdi]
是Multiboot2信息记录所在的高半地址
可以在 my GIT repository 中找到完整代码的副本。
我将根据我所做的另一个实验进行有根据的猜测。我怀疑如果您要回滚并查看抛出的异常(其中一张图像的开头被截断),您会在 QEMU 中看到这样的输出:
check_exception old: 0xffffffff new 0xe
0: v=0e e=0009 i=0 cpl=0 ... [snip]
特别是您将寻找以 new 0xe
开头的异常,即 Page Fault Exception 。为了简洁起见,我已经剪掉了。
在第二行,您可能会看到 e=0009
。这是在进入您的页面处理程序之前将被压入堆栈的错误代码。您没有页面处理程序,因此出现三重错误,之后您会遇到其他异常。
错误代码 0x0009 是什么意思? OSDev Wiki 有一个描述:
31 4 0
+---+-- --+---+---+---+---+---+---+
| Reserved | I | R | U | W | P |
+---+-- --+---+---+---+---+---+---+
P 1 bit - Present - When set, the page fault was caused by a page-protection violation.
When not set, it was caused by a non-present page.
W 1 bit - Write - When set, the page fault was caused by a page write.
When not set, it was caused by a page read.
U 1 bit - User - When set, the page fault was caused while CPL = 3. This does not
necessarily mean that the page fault was a privilege violation.
R 1 bit - Reserved write - When set, the page fault was caused by
reading a 1 in a reserved field.
I 1 bit - Instruction Fetch - When set, the page fault was caused by
an instruction fetch.
您的值 e=0009
是 01001 的位掩码。这意味着发生了页面保护违规(而不是 页面不存在错误),并且它表示从保留字段读取了 1。
有问题的保留字段(位)在页面 Table 页面的实际页面 Table 条目 (PTE) 中 Table (PT) 在页面 table 层次结构的底部.当使用 no Page Attribute Tables 的 2MB 页面大小时,页面 Table 中的 PTE 必须将位 12 到 20 设置为零。您当前的代码就是这种情况。保留位的特殊之处在于,如果它们包含值 1,那么您将在 QEMU 输出中得到一个 e=0009
。
要解决此问题,您必须确保实际页面 Tables (PT) 中的页面 table 条目 (PTE) 将这些位设置为 0。快速破解可能是在 macros64.asm
:
中做这样的事情
%macro pte_write 4
mov rax, %4
or rax, %3
mov qword [%1+8*%2], rax
%endmacro
%macro pte_write_res 4
mov rax, %4
mov r11, 0x7fffffffffe00000
and r11, %3
or rax, r11
mov qword [%1+8*%2], rax
%endmacro
主要区别在于 pte_write_res
通过将保留位设置为 0 来专门强制执行保留位的规则。然后您必须修改使用这些宏的代码。在您的情况下,它似乎位于 paging64.asm
:
内的这两个位置
.kernel_loop:
pte_write p2_krn, rcx, rsi, p_present | p_writable | p_pagesize
inc rcx
现在将变为:
.kernel_loop:
pte_write_res p2_krn, rcx, rsi, p_present | p_writable | p_pagesize
inc rcx
和
.mbi_loop:
pte_write p2_mbi, rcx, rsi, p_present | p_pagesize | p_noexec
inc rcx
现在将变为:
.mbi_loop:
pte_write_res p2_mbi, rcx, rsi, p_present | p_pagesize | p_noexec
inc rcx
在这两种情况下,我们都需要写入页面 Table 页面 Table 的条目,其中 RSI 可能设置了我们需要的位为 0.
我正在用 Rust 和 NASM 汇编器编写一个 64 位高半内核。我正在使用 Multiboot2 (GRUB2) 兼容的引导加载程序来初始加载我的内核。当我的内核在 QEMU 中为 运行 时,我遇到页面错误错误(0x0e
异常),我不明白为什么。我遇到的问题是在到达用 Rust 编写的代码之前出现在我的汇编代码中。
我正在设置分页以便内存看起来像:
0000000000000000: 0000000000000000 --PDA---W
0000000000200000: 0000000000200000 --P-----W
ffffff0000000000: 0000000000000000 --P-----W
ffffff7f80000000: 0000000000000000 X-P------
(这既是我的意图,也是 QEMU 的 info mem
的结果)
table 看起来像:
p4: # pml4
0o000 <- p3_low | PRESENT | WRITABLE
0o776 <- p3_hgh | PRESENT | WRITABLE
p3_low: # pdpte
0o000 <- p2_low | PRESENT | WRITABLE
p3_hgh: # pdpte
0o000 <- p2_krn | PRESENT | WRITABLE
0o667 <- p2_mbi | PRESENT | WRITABLE
p2_low: # pde
0o000 <- 0o000000_000_000_000_000_0000 | PRESENT | WRITABLE | PAGESIZE
0o001 <- 0o000000_000_000_001_000_0000 | PRESENT | WRITABLE | PAGESIZE
p2_krn: # pde
0o000 <- 0o000000_000_000_000_000_0000 | PRESENT | WRITABLE | PAGESIZE
p2_mbi: # pde
0o000 <- 0o000000_000_000_000_000_0000 | PRESENT | PAGESIZE | NOEXEC
其他一切都归零。
我项目中的相关代码在这些文件中:
macros64.asm
:
%macro pte_write 4
mov rax, %4
or rax, %3
mov qword [%1+8*%2], rax
%endmacro
paging64.asm
:
extern kernel_start
extern kernel_end
p_present equ (1<<0)
p_writable equ (1<<1)
p_user equ (1<<2)
p_pagesize equ (1<<7)
p_noexec equ (1<<63)
[section .text]
enable_paging:
; Calculate start and end address of the multiboot2 info structure.
mov r9, rdi
mov r10, r9
add r10d, dword [r9]
and r9, 0xfffffffffffff000
shr r10, 12
inc r10
shl r10, 12
; Clear out all the page tables.
movaps xmm1, [blank]
mov rcx, page_tables_start
.clear_page_tables_loop:
movaps [rcx], xmm1
add rcx, 16
cmp rcx, page_tables_end
jl .clear_page_tables_loop
; TODO Uncomment the recursive page mappings once things actually work -- for now, they just make "info tlb" in QEMU annoying to read.
; Fill out the P4 table.
pte_write p4, 0o000, p3_low, p_present | p_writable
pte_write p4, 0o776, p3_hgh, p_present | p_writable
; pte_write p4, 0o777, p4, p_present | p_writable | p_noexec
; Fill out the P3 tables.
pte_write p3_low, 0o000, p2_low, p_present | p_writable
; pte_write p3_low, 0o777, p3_low, p_present | p_writable | p_noexec
pte_write p3_hgh, 0o000, p2_krn, p_present | p_writable
pte_write p3_hgh, 0o776, p2_mbi, p_present | p_writable
; pte_write p3_hgh, 0o777, p3_hgh, p_present | p_writable | p_noexec
; Identity map the lowest 2MiB.
pte_write p2_low, 0o000, 0o000000_000_000_000_000_0000, p_present | p_writable | p_pagesize
pte_write p2_low, 0o001, 0o000000_000_000_001_000_0000, p_present | p_writable | p_pagesize
; pte_write p2_low, 0o777, p2_low, p_present | p_writable | p_noexec
; Map the kernel.
xor rcx, rcx
mov rsi, kernel_start
.kernel_loop:
pte_write p2_krn, rcx, rsi, p_present | p_writable | p_pagesize
inc rcx
add rsi, 0o000000_000_000_001_000_0000
cmp rsi, kernel_end
jb .kernel_loop
; Map the multiboot2 information structure.
xor rcx, rcx
mov rsi, r9
.mbi_loop:
pte_write p2_mbi, rcx, rsi, p_present | p_pagesize | p_noexec
inc rcx
add rsi, 0o000000_000_000_001_000_0000
cmp rsi, r10
jb .mbi_loop
; Load the new page table. We don't need to flush the TLB because we moved into CR3.
mov rax, p4
mov cr3, rax
; Return.
ret
[section .data]
align 0x10
blank: times 0x10 db 0x00
[section .bss]
alignb 4096
page_tables_start:
p4: resb 4096
p3_low: resb 4096
p3_hgh: resb 4096
p2_low: resb 4096
p2_krn: resb 4096
p2_mbi: resb 4096
page_tables_end:
start64.asm
:
bits 64
extern kmain
global start64
%include "macros64.asm"
%include "paging64.asm"
[section .text]
;; The entry point for 64-bit code. We expect the address of the multiboot2
;; info structure in rdi.
start64:
; Save the address of the multiboot2 info structure.
push rdi
; Clear interrupts. If we get an interrupt before we have an IDT, we'll
; triple fault. We can re-enable it from Rust, later.
cli
; Nuke the segment registers.
mov rax, 0x10
mov ss, ax
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
; Set up paging.
call enable_paging
; The first argument to kmain is the multiboot2 info structure. We need to
; adjust the address to the new higher-half location.
pop rdi
mov rax, 0xffffff7f80000000
add rdi, rax
; DEBUG
mov dword [0xb8004], 0xf021f021
mov rbx, [rdi]
mov dword [0xb8000], 0xf021f021
hlt
; Call kmain. It's more than 4GiB away, so we have to do an indirect call.
mov rax, kmain
call rax
; kmain should never return; call halt if it does.
jmp halt
halt:
; Write "kexit?!?" to the upper right corner.
mov dword [0xb8000], 0x4f654f6b
mov dword [0xb8004], 0x4f694f78
mov dword [0xb8008], 0x4f3f4f74
mov dword [0xb800c], 0x4f3f4f21
; Disable interrupts and halt.
cli
hlt
; Just in case... something? happens.
jmp halt
我将新页面 table 移动到 CR3 后,执行继续正确。但是,一旦我尝试从 start64.asm
中的高端内存中读取一个值,我就会遇到页面错误。故障出现在这条线上:
mov rbx, [rdi]
之前的行 mov dword [0xb8004], 0xf021f021
正确地将 !!
写入屏幕。 [rdi]
是Multiboot2信息记录所在的高半地址
可以在 my GIT repository 中找到完整代码的副本。
我将根据我所做的另一个实验进行有根据的猜测。我怀疑如果您要回滚并查看抛出的异常(其中一张图像的开头被截断),您会在 QEMU 中看到这样的输出:
check_exception old: 0xffffffff new 0xe 0: v=0e e=0009 i=0 cpl=0 ... [snip]
特别是您将寻找以 new 0xe
开头的异常,即 Page Fault Exception 。为了简洁起见,我已经剪掉了。
在第二行,您可能会看到 e=0009
。这是在进入您的页面处理程序之前将被压入堆栈的错误代码。您没有页面处理程序,因此出现三重错误,之后您会遇到其他异常。
错误代码 0x0009 是什么意思? OSDev Wiki 有一个描述:
31 4 0 +---+-- --+---+---+---+---+---+---+ | Reserved | I | R | U | W | P | +---+-- --+---+---+---+---+---+---+ P 1 bit - Present - When set, the page fault was caused by a page-protection violation. When not set, it was caused by a non-present page. W 1 bit - Write - When set, the page fault was caused by a page write. When not set, it was caused by a page read. U 1 bit - User - When set, the page fault was caused while CPL = 3. This does not necessarily mean that the page fault was a privilege violation. R 1 bit - Reserved write - When set, the page fault was caused by reading a 1 in a reserved field. I 1 bit - Instruction Fetch - When set, the page fault was caused by an instruction fetch.
您的值 e=0009
是 01001 的位掩码。这意味着发生了页面保护违规(而不是 页面不存在错误),并且它表示从保留字段读取了 1。
有问题的保留字段(位)在页面 Table 页面的实际页面 Table 条目 (PTE) 中 Table (PT) 在页面 table 层次结构的底部.当使用 no Page Attribute Tables 的 2MB 页面大小时,页面 Table 中的 PTE 必须将位 12 到 20 设置为零。您当前的代码就是这种情况。保留位的特殊之处在于,如果它们包含值 1,那么您将在 QEMU 输出中得到一个 e=0009
。
要解决此问题,您必须确保实际页面 Tables (PT) 中的页面 table 条目 (PTE) 将这些位设置为 0。快速破解可能是在 macros64.asm
:
%macro pte_write 4
mov rax, %4
or rax, %3
mov qword [%1+8*%2], rax
%endmacro
%macro pte_write_res 4
mov rax, %4
mov r11, 0x7fffffffffe00000
and r11, %3
or rax, r11
mov qword [%1+8*%2], rax
%endmacro
主要区别在于 pte_write_res
通过将保留位设置为 0 来专门强制执行保留位的规则。然后您必须修改使用这些宏的代码。在您的情况下,它似乎位于 paging64.asm
:
.kernel_loop:
pte_write p2_krn, rcx, rsi, p_present | p_writable | p_pagesize
inc rcx
现在将变为:
.kernel_loop:
pte_write_res p2_krn, rcx, rsi, p_present | p_writable | p_pagesize
inc rcx
和
.mbi_loop:
pte_write p2_mbi, rcx, rsi, p_present | p_pagesize | p_noexec
inc rcx
现在将变为:
.mbi_loop:
pte_write_res p2_mbi, rcx, rsi, p_present | p_pagesize | p_noexec
inc rcx
在这两种情况下,我们都需要写入页面 Table 页面 Table 的条目,其中 RSI 可能设置了我们需要的位为 0.