C 是如何处理堆栈的,这就是导致此问题的原因吗?

How does C handle the stack and is that what is causing this problem?

我有一些代码来自基于 James Molloy OS 教程的教程,我正在尝试修复中断,但我编辑了代码以使用指针作为 C 处理程序的参数(修复代码中的错误)。现在我遇到了一个问题,在调用 C 函数并将 ds 寄存器设置为从堆栈弹出的地址(大概是 0x8)后,它会导致 QEMU 重新启动。我认为这可能是由于 C 使用堆栈的某种方式,因此可能是从堆栈中删除的值和值 ds 设置为垃圾(不是 0x8)。

我用 GDB 做了一些测试,我发现 OS 在将 ds 设置为函数调用后从堆栈加载的值时退出。

这里是处理程序:

void isr_handler(registers_t *r) {
    kprint("received interrupt: ");
    char s[5];
    int_to_ascii(r->int_no, s);
    kprint(s);
    kprint("\n");
    kprint("error code: ");
    char e[3];
    int_to_ascii(r->err_code, s);
    kprint(s);
    kprint("\n");
    kprint(exception_messages[r->int_no]);
    kprint("\n");
}
void irq_handler(registers_t *r) {
    /* After every interrupt we need to send an EOI to the PICs
     * or they will not send another interrupt again */
    if (r->int_no >= 40) port_byte_out(0xA0, 0x20); /* slave */
    port_byte_out(0x20, 0x20); /* master */
    /* Handle the interrupt in a more modular way */
    if (interrupt_handlers[r->int_no] != 0) {
        isr_t handler = interrupt_handlers[r->int_no];
        handler(r);
    } 
    else {
        if (loaded == 1) {
            kprint("");
        }
    }
}

以及汇编代码:

; Common ISR code
isr_common_stub:
    ; 1. Save CPU state
    pushad ; Pushes edi,esi,ebp,esp,ebx,edx,ecx,eax
    mov ax, ds ; Lower 16-bits of eax = ds.
    push eax ; save the data segment descriptor
    mov ax, 0x10  ; kernel data segment descriptor
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    push esp

    ; 2. Call C handler
    call isr_handler
    pop eax

    ; 3. Restore state
    pop eax 
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    popad
    add esp, 8 ; Cleans up the pushed error code and pushed ISR number
    sti
    iret ; pops 5 things at once: CS, EIP, EFLAGS, SS, and ESP

; Common IRQ code. Identical to ISR code except for the 'call' 
; and the 'pop ebx'
irq_common_stub:
    pushad
    mov ax, ds
    push eax
    mov ax, 0x10 ;0x10
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    push esp                 ; At this point ESP is a pointer to where DS (and the rest
                             ; of the interrupt handler state resides)
                             ; Push ESP as 1st parameter as it's a 
                             ; pointer to a registers_t  
    call irq_handler
    pop ebx                  ; Remove the saved ESP on the stack. Efficient to just pop it 
                             ; into any register. You could have done: add esp, 4 as well
    pop ebx
    mov ds, bx
    mov es, bx
    mov fs, bx
    mov gs, bx
    popad
    add esp, 8
    sti
    iret

以及导致问题的处理程序:

static void keyboard_callback(registers_t *regs) {
    /* The PIC leaves us the scancode in port 0x60 */
    uint8_t scancode = port_byte_in(0x60);
    bool iskeyup = false;
    if (scancode >= KEYUPOFFSET) {
        iskeyup = true;
        scancode -= KEYUPOFFSET;
    }
    key_handler(scancode, iskeyup);
    UNUSED(regs);
}

从有问题的命令调用的内存代码

if (strcmp(input, "testMemLess") == 0) {
        uint32_t pAddr;
        uint32_t *test1 = kmalloc(0x1000);
        uint32_t *test2 = kmalloc(0x1000);
        *test2 = 255;
        *test1 = 256;
        sprintd("Data test:");
        sprint_uint(*test1);
        sprint("\n");
        sprintd("Test 2:");
        sprint_uint(*test2);
        char temp[25];
        int_to_ascii(memoryRemaining, temp);
        sprint("\nMemory Remaining: ");
        sprint(temp);
        sprint(" bytes\n");
        free(test1, 0x1000);
        free(test2, 0x1000);
        sprintd("EXECUTION COMPLETE");
    }

免费和 kmalloc:

/* Recursive function to find the best fitting block of mem to use */
void * bestFit(uint32_t size, uint32_t curFit, uint32_t curAddr, uint32_t curFitBlock) {
    uint32_t *nextFreeBlock = get_pointer(curAddr);
    uint32_t *freeSize = get_pointer(curAddr+4);
    uint32_t fit = curFit;
    uint32_t block = curFitBlock;
    uint32_t s = size;
    if (*nextFreeBlock != 0 && *freeSize != 0 && *nextFreeBlock >= MIN && *nextFreeBlock + s <= MAX) {
        /* There is actually memory here */
        uint32_t data = 180; // Random test value
        uint32_t *ptr = get_pointer(*nextFreeBlock+8);
        *ptr = data;
        uint32_t inputD = *ptr;
        if (inputD == data) {
            /* Cool, the memory works */
            if (size <= *freeSize) {
                uint32_t dif = abs(*freeSize-size);
                if (dif < curFit) {
                    fit = dif;
                    block = *nextFreeBlock;
                }
            }
            return bestFit(s, fit, *nextFreeBlock, block);
        } else
        {
            return get_pointer(curFitBlock);
        }

    } else
    {
        return get_pointer(curFitBlock);
    }

}

void block_move(blockData_t *d) {
    // Current is the current free block to read from, equal to addr
    // Size is the size of the current block
    // pAddr is the address to pass to the next call
    // pSize is the size to pass to the next call
    uint32_t current = d->chain_next;
    uint32_t *current_ptr = get_pointer(current);
    uint32_t *current_ptr_offset = get_pointer(current+4);
    uint32_t usedMemBlock = d->usedBlock;
    uint32_t usedBlockSize = d->usedBlockSize;

    if (*current_ptr != 0 && *current_ptr_offset != 0 && *current_ptr >= MIN && (*current_ptr + *current_ptr_offset) <= MAX) {
        // New pointer exists
        d->chain_next = *current_ptr;
        d->next_block_size = *current_ptr_offset;
        // After setting the values for the next call, handle this one
        if (*current_ptr == usedMemBlock) {
            // The next block is the block that is about to be used
            uint32_t *used_ptr = get_pointer(usedMemBlock);
            uint32_t *used_ptr_offset = get_pointer(usedMemBlock + 4);
            uint32_t used_pointing = *used_ptr;
            uint32_t used_pointing_size = *used_ptr_offset;
            *current_ptr = used_pointing;
            *current_ptr_offset = used_pointing_size;
            return;
        } else
        {
            return block_move(d);
        }

    } else {
        return;
    }
}

/* Implementation is just an address which
 * keeps growing, and a chunk scanner to find free chunks. */
uint32_t kmalloc_int(uint32_t size, int align) {
    /* Pages are aligned to 4K, or 0x1000 */
    if (align == 1 && (free_mem_addr & 0xFFFFF000)) {
        free_mem_addr &= 0xFFFFF000;
        free_mem_addr += 0x1000;
    }
    /* Save also the physical address */
    void * bFit = bestFit(size, MAX - free_mem_addr, free_mem_addr, free_mem_addr);
    uint32_t ret = bFit;;
    blockData_t *param;
    uint32_t *f1 = get_pointer(free_mem_addr);
    uint32_t *f2 = get_pointer(free_mem_addr+4);
    param->chain_next = f1;
    param->next_block_size = f2;
    param->usedBlock = ret;
    param->usedBlockSize = size;
    block_move(param);
    if (ret == free_mem_addr) {
        free_mem_addr += size; /* Remember to increment the pointer */
    }
    memoryRemaining -= size;
    usedMem += size;
    return ret;
}

void * kmalloc(uint32_t size) {
    void * t = get_pointer(kmalloc_int(size, 0));
    return t;
}

void free(void * addr, uint32_t size) {
    void *address = get_pointer(addr);
    uint32_t *free_ptr = get_pointer(free_mem_addr);
    uint32_t *free_ptr_offset = get_pointer(free_mem_addr + 4);
    uint32_t curAddr = *free_ptr;
    uint32_t curSize = *free_ptr_offset;
    uint32_t *addr_base = get_pointer(address);
    uint32_t *addr_size = get_pointer(address+4);

    if (address + size == free_mem_addr) {
        /* Add new block to the chain */
        memory_set(address, 0, size);
        uint32_t lastAddr = *free_ptr;
        uint32_t lastSize = *free_ptr_offset;
        free_mem_addr -= size;
        free_ptr = get_pointer(free_mem_addr);
        free_ptr_offset = get_pointer(free_mem_addr + 4);
        *free_ptr = lastAddr;
        *free_ptr_offset = lastSize;
    } else {
        memory_set(address, 0, size);
        *addr_base = curAddr;
        *addr_size = curSize;
        *free_ptr = address;
        *free_ptr_offset = size;
    }


    memoryRemaining += size;
    usedMem -= size;

    sprint("\n\n\n");
}

此外,GitHub 回购如果你想 运行 它:https://github.com/Menotdan/DripOS/tree/dev

编辑:只有当我调用用于测试内存的命令时才会出现问题,不,内存测试不会覆盖堆栈内存。参见上面的内存代码。

编辑 2:我做了一些调试,我发现 ds 被设置为 0,所以它弹出了错误的堆栈值。


该代码使用了我开发的 kmallocfree 版本。堆 space 的开始被传递到我的 kmain 函数并由 kmalloc 使用。我的 boot.s 文件如下所示:

/* Enable intel syntax */
.intel_syntax noprefix
/* Declare constants for the multiboot header. */
.set ALIGN,    1<<0             /* align loaded modules on page boundaries */
.set MEMINFO,  1<<1             /* provide memory map */
.set FLAGS,    ALIGN | MEMINFO  /* this is the Multiboot 'flag' field */
.set MAGIC,    0x1BADB002       /* 'magic number' lets bootloader find the header */
.set CHECKSUM, -(MAGIC + FLAGS) /* checksum of above, to prove we are multiboot */

/*
Declare a multiboot header that marks the program as a kernel. These are magic
values that are documented in the multiboot standard. The bootloader will
search for this signature in the first 8 KiB of the kernel file, aligned at a
32-bit boundary. The signature is in its own section so the header can be
forced to be within the first 8 KiB of the kernel file.
*/
.section .multiboot
.align 4
.long MAGIC
.long FLAGS
.long CHECKSUM

.section .data
/*
GDT from the old DripOS bootloader, which was from the original
project (The OS tutorial)
*/

gdt_start:

        .long 0x0
        .long 0x0

gdt_code:
        .word 0xffff
        .word 0x0
        .byte 0x0
        .byte 0x9A /*10011010 in binary*/
        .byte 0xCF /*11001111 in binary*/
        .byte 0x0
gdt_data:
        .word 0xffff
        .word 0x0
        .byte 0x0
        .byte 0x92 /*10010010 in binary*/
        .byte 0xCF /*11001111 in binary*/
        .byte 0x0

gdt_end:

gdt_descriptor:
        .word gdt_end - gdt_start - 1
        .long gdt_start

CODE_SEG = gdt_code - gdt_start
DATA_SEG = gdt_data - gdt_start

/*
The multiboot standard does not define the value of the stack pointer register
(esp) and it is up to the kernel to provide a stack. This allocates room for a
small stack by creating a symbol at the bottom of it, then allocating 16384
bytes for it, and finally creating a symbol at the top. The stack grows
downwards on x86. The stack is in its own section so it can be marked nobits,
which means the kernel file is smaller because it does not contain an
uninitialized stack. The stack on x86 must be 16-byte aligned according to the
System V ABI standard and de-facto extensions. The compiler will assume the
stack is properly aligned and failure to align the stack will result in
undefined behavior.
*/
.section .bss
.align 16
stack_bottom:
.skip 16384 # 16 KiB
stack_top:

/*
The linker script specifies _start as the entry point to the kernel and the
bootloader will jump to this position once the kernel has been loaded. It
doesn't make sense to return from this function as the bootloader is gone.
*/
.section .text
.global _start
.type _start, @function
_start:
        /*
        The bootloader has loaded us into 32-bit protected mode on a x86
        machine. Interrupts are disabled. Paging is disabled. The processor
        state is as defined in the multiboot standard. The kernel has full
        control of the CPU. The kernel can only make use of hardware features
        and any code it provides as part of itself. There's no printf
        function, unless the kernel provides its own <stdio.h> header and a
        printf implementation. There are no security restrictions, no
        safeguards, no debugging mechanisms, only what the kernel provides
        itself. It has absolute and complete power over the
        machine.
        */

        /*
        To set up a stack, we set the esp register to point to the top of the
        stack (as it grows downwards on x86 systems). This is necessarily done
        in assembly as languages such as C cannot function without a stack.
        */
        mov stack_top, esp

        /*
        This is a good place to initialize crucial processor state before the
        high-level kernel is entered. It's best to minimize the early
        environment where crucial features are offline. Note that the
        processor is not fully initialized yet: Features such as floating
        point instructions and instruction set extensions are not initialized
        yet. The GDT should be loaded here. Paging should be enabled here.
        C++ features such as global constructors and exceptions will require
        runtime support to work as well.
        */
        lgdt [gdt_descriptor] /* Load the GDT */
        /*
        Enter the high-level kernel. The ABI requires the stack is 16-byte
        aligned at the time of the call instruction (which afterwards pushes
        the return pointer of size 4 bytes). The stack was originally 16-byte
        aligned above and we've since pushed a multiple of 16 bytes to the
        stack since (pushed 0 bytes so far) and the alignment is thus
        preserved and the call is well defined.
        */
        /* Credit goes to Michael Petch on Whosebug for helping correctly write this*/
    mov ax, DATA_SEG
        mov ds, ax
        mov es, ax
        mov fs, ax
        mov gs, ax
    jmp CODE_SEG:.next /* JMP to next instruction but set CS! */
.next:
        .att_syntax
        push $test
        .intel_syntax noprefix
        push ebx
        /*mov ebp, 0x90000
        mov esp, ebp*/
        call kmain

        /*
        If the system has nothing more to do, put the computer into an
        infinite loop. To do that:
        1) Disable interrupts with cli (clear interrupt enable in eflags).
           They are already disabled by the bootloader, so this is not needed.
           Mind that you might later enable interrupts and return from
           kernel_main (which is sort of nonsensical to do).
        2) Wait for the next interrupt to arrive with hlt (halt instruction).
           Since they are disabled, this will lock up the computer.
        3) Jump to the hlt instruction if it ever wakes up due to a
           non-maskable interrupt occurring or due to system management mode.
        */
        cli
1:      hlt
        jmp 1b

/*
Set the size of the _start symbol to the current location '.' minus its start.
This is useful when debugging or when you implement call tracing.
*/
.size _start, . - _start
test:

我将其用作链接描述文件:

/* The bootloader will look at this image and start execution at the symbol
   designated as the entry point. */
ENTRY(_start)

/* Tell where the various sections of the object files will be put in the final
   kernel image. */
SECTIONS
{
    /* Begin putting sections at 1 MiB, a conventional place for kernels to be
       loaded at by the bootloader. */
    . = 1M;

    /* First put the multiboot header, as it is required to be put very early
       early in the image or the bootloader won't recognize the file format.
       Next we'll put the .text section. */
    .text BLOCK(4K) : ALIGN(4K)
    {
        *(.multiboot)
        *(.text)
    }

    /* Read-only data. */
    .rodata BLOCK(4K) : ALIGN(4K)
    {
        *(.rodata)
    }

    /* Read-write data (initialized) */
    .data BLOCK(4K) : ALIGN(4K)
    {
        *(.data)
    }

    /* Read-write data (uninitialized) and stack */
    .bss BLOCK(4K) : ALIGN(4K)
    {
        *(COMMON)
        *(.bss)
    }

    /* The compiler may produce other sections, by default it will put them in
       a segment with the same name. Simply add stuff here as needed. */
}

您的问题与 IRQ 处理无关。您的 kmallocfree 例程基于以下假设:在 boot.s 中您正确地将内核末尾的地址传递给了 kmain:

.att_syntax
push $test
.intel_syntax noprefix
push ebx
call kmain

问题是您将 boot.s 中的 test 定义为:

.section .text

[snipped the code for brevity]

.size _start, . - _start
test:

您的标签 test 不在内核的末尾。它恰好简单地设置在 boot.s 中代码的末尾。您的链接描述文件安排 .text 部分,使其出现在 .data:

之前
/* Read-write data (initialized) */
.data BLOCK(4K) : ALIGN(4K)
{
    *(.data)
}

/* Read-write data (uninitialized) and stack */
.bss BLOCK(4K) : ALIGN(4K)
{
    *(COMMON)
    *(.bss)
}

test 标签将放置在 .text 部分中 .data.bss 之前的某处。 GDT 恰好位于 .data 部分。您的 kmalloc 已被告知使用 .data.bss 及其后的任何内存作为堆 space。结果是您的 free 可能已经清除了 GDT,然后当它达到在 irq_common_stub 中执行 mov %eax, %ds 的点时,正在加载的选择器指向损坏的 GDT 描述符并因此出现故障.

要解决此问题,请删除标签 test 并在链接描述文件中创建一个类似 __kernel_end 的符号,如下所示:

/* The bootloader will look at this image and start execution at the symbol
   designated as the entry point. */
ENTRY(_start)

/* Tell where the various sections of the object files will be put in the final
   kernel image. */
SECTIONS
{
        /* Begin putting sections at 1 MiB, a conventional place for kernels to be
           loaded at by the bootloader. */
        . = 1M;

        /* First put the multiboot header, as it is required to be put very early
           early in the image or the bootloader won't recognize the file format.
           Next we'll put the .text section. */
        .text BLOCK(4K) : ALIGN(4K)
        {
                *(.multiboot)
                *(.text)
        }

        /* Read-only data. */
        .rodata BLOCK(4K) : ALIGN(4K)
        {
                *(.rodata)
        }

        /* Read-write data (initialized) */
        .data BLOCK(4K) : ALIGN(4K)
        {
                *(.data)
        }

        /* Read-write data (uninitialized) and stack */
        .bss BLOCK(4K) : ALIGN(4K)
        {
                *(COMMON)
                *(.bss)
        }
        __kernel_end = .;

        /* The compiler may produce other sections, by default it will put them in
           a segment with the same name. Simply add stuff here as needed. */
}

boot.s 中将 push $test 替换为 push $__kernel_end


注意:如果您希望 __kernel_end(因此堆的开始 space)在内核之后的 4KB 对齐边界上开始,您可以在链接描述文件中使用它:

__kernel_end = ALIGN(4K);

ALIGN(4K) 将获取当前位置计数器并将其对齐到下一个 4KiB 边界并将该地址分配给符号 __kernel_end.


观察

不清楚为什么在此代码中您切换回 AT&T 语法,然后又切换回 Intel:

.next:
        .att_syntax
        push $__kernel_end
        .intel_syntax noprefix

这可以写成:

.next:
        push __kernel_end